|
5 | 5 |
|
6 | 6 | LLVM.@function_pass "jl-inst-simplify" JLInstSimplifyPass |
7 | 7 |
|
| 8 | +const RunAttributor = Ref(true) |
| 9 | + |
8 | 10 | function enzyme_attributor_pass!(mod::LLVM.Module) |
9 | 11 | ccall( |
10 | 12 | (:RunAttributorOnModule, API.libEnzyme), |
|
17 | 19 |
|
18 | 20 | EnzymeAttributorPass() = NewPMModulePass("enzyme_attributor", enzyme_attributor_pass!) |
19 | 21 |
|
20 | | - |
21 | | -struct PipelineConfig |
22 | | - Speedup::Cint |
23 | | - Size::Cint |
24 | | - lower_intrinsics::Cint |
25 | | - dump_native::Cint |
26 | | - external_use::Cint |
27 | | - llvm_only::Cint |
28 | | - always_inline::Cint |
29 | | - enable_early_simplifications::Cint |
30 | | - enable_early_optimizations::Cint |
31 | | - enable_scalar_optimizations::Cint |
32 | | - enable_loop_optimizations::Cint |
33 | | - enable_vector_pipeline::Cint |
34 | | - remove_ni::Cint |
35 | | - cleanup::Cint |
36 | | -end |
37 | | - |
38 | | -const RunAttributor = Ref(true) |
39 | | - |
40 | | -function pipeline_options(; |
41 | | - lower_intrinsics::Bool = true, |
42 | | - dump_native::Bool = false, |
43 | | - external_use::Bool = false, |
44 | | - llvm_only::Bool = false, |
45 | | - always_inline::Bool = true, |
46 | | - enable_early_simplifications::Bool = true, |
47 | | - enable_early_optimizations::Bool = true, |
48 | | - enable_scalar_optimizations::Bool = true, |
49 | | - enable_loop_optimizations::Bool = true, |
50 | | - enable_vector_pipeline::Bool = true, |
51 | | - remove_ni::Bool = true, |
52 | | - cleanup::Bool = true, |
53 | | - Size::Cint = Cint(0), |
54 | | - Speedup::Cint = Cint(3), |
55 | | -) |
56 | | - return PipelineConfig( |
57 | | - Speedup, |
58 | | - Size, |
59 | | - lower_intrinsics, |
60 | | - dump_native, |
61 | | - external_use, |
62 | | - llvm_only, |
63 | | - always_inline, |
64 | | - enable_early_simplifications, |
65 | | - enable_early_optimizations, |
66 | | - enable_scalar_optimizations, |
67 | | - enable_loop_optimizations, |
68 | | - enable_vector_pipeline, |
69 | | - remove_ni, |
70 | | - cleanup, |
71 | | - ) |
72 | | -end |
73 | | - |
74 | | -function run_jl_pipeline(pm::ModulePassManager, tm::LLVM.TargetMachine; kwargs...) |
75 | | - config = Ref(pipeline_options(; kwargs...)) |
76 | | - function jl_pipeline(m) |
77 | | - @dispose pb = NewPMPassBuilder() begin |
78 | | - add!(pb, NewPMModulePassManager()) do mpm |
79 | | - @ccall jl_build_newpm_pipeline( |
80 | | - mpm.ref::Ptr{Cvoid}, |
81 | | - pb.ref::Ptr{Cvoid}, |
82 | | - config::Ptr{PipelineConfig}, |
83 | | - )::Cvoid |
84 | | - end |
85 | | - LLVM.run!(mpm, m, tm) |
86 | | - end |
87 | | - return true |
88 | | - end |
89 | | - add!(pm, ModulePass("JLPipeline", jl_pipeline)) |
90 | | -end |
91 | | - |
92 | | -function julia_pipeline(pb, mpm; kwargs...) |
93 | | - config = Ref(pipeline_options(; kwargs...)) |
94 | | - @ccall jl_build_newpm_pipeline( |
95 | | - mpm.ref::Ptr{Cvoid}, |
96 | | - pb.ref::Ptr{Cvoid}, |
97 | | - config::Ptr{PipelineConfig}, |
98 | | - )::Cvoid |
99 | | -end |
100 | | - |
101 | 22 | @static if VERSION < v"1.11.0-DEV.428" |
102 | 23 | else |
103 | 24 | barrier_noop!(pm) = nothing |
@@ -233,86 +154,32 @@ else |
233 | 154 | end |
234 | 155 | end |
235 | 156 |
|
236 | | - |
237 | 157 | function loop_optimizations_tm!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachine) |
238 | | - @static if true || VERSION < v"1.11-" |
239 | | - lower_simdloop_tm!(pm, tm) |
240 | | - licm!(pm) |
241 | | - if LLVM.version() >= v"15" |
242 | | - simple_loop_unswitch_legacy!(pm) |
243 | | - else |
244 | | - loop_unswitch!(pm) |
245 | | - end |
| 158 | + lower_simdloop_tm!(pm, tm) |
| 159 | + licm!(pm) |
| 160 | + if LLVM.version() >= v"15" |
| 161 | + simple_loop_unswitch_legacy!(pm) |
246 | 162 | else |
247 | | - run_jl_pipeline( |
248 | | - pm, |
249 | | - tm; |
250 | | - lower_intrinsics = false, |
251 | | - dump_native = false, |
252 | | - external_use = false, |
253 | | - llvm_only = false, |
254 | | - always_inline = false, |
255 | | - enable_early_simplifications = false, |
256 | | - enable_early_optimizations = false, |
257 | | - enable_scalar_optimizations = false, |
258 | | - enable_loop_optimizations = true, |
259 | | - enable_vector_pipeline = false, |
260 | | - remove_ni = false, |
261 | | - cleanup = false, |
262 | | - ) |
| 163 | + loop_unswitch!(pm) |
263 | 164 | end |
264 | 165 | end |
265 | 166 |
|
266 | | - |
267 | 167 | function more_loop_optimizations_tm!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachine) |
268 | | - @static if true || VERSION < v"1.11-" |
269 | | - loop_rotate!(pm) |
270 | | - # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1) |
271 | | - loop_idiom!(pm) |
272 | | - |
273 | | - # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards |
274 | | - lower_simdloop_tm!(pm, tm) # Annotate loop marked with "loopinfo" as LLVM parallel loop |
275 | | - licm!(pm) |
276 | | - julia_licm_tm!(pm, tm) |
277 | | - # Subsequent passes not stripping metadata from terminator |
278 | | - instruction_combining!(pm) # TODO: createInstSimplifyLegacy |
279 | | - jl_inst_simplify!(pm) |
| 168 | + loop_rotate!(pm) |
| 169 | + # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1) |
| 170 | + loop_idiom!(pm) |
| 171 | + |
| 172 | + # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards |
| 173 | + lower_simdloop_tm!(pm, tm) # Annotate loop marked with "loopinfo" as LLVM parallel loop |
| 174 | + licm!(pm) |
| 175 | + julia_licm_tm!(pm, tm) |
| 176 | + # Subsequent passes not stripping metadata from terminator |
| 177 | + instruction_combining!(pm) # TODO: createInstSimplifyLegacy |
| 178 | + jl_inst_simplify!(pm) |
280 | 179 |
|
281 | | - ind_var_simplify!(pm) |
282 | | - loop_deletion!(pm) |
283 | | - loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll |
284 | | - else |
285 | | - # LowerSIMDLoopPass |
286 | | - # LoopRotatePass [opt >= 2] |
287 | | - # LICMPass |
288 | | - # JuliaLICMPass |
289 | | - # SimpleLoopUnswitchPass |
290 | | - # LICMPass |
291 | | - # JuliaLICMPass |
292 | | - # IRCEPass |
293 | | - # LoopInstSimplifyPass |
294 | | - # - in ours this is instcombine with jlinstsimplify |
295 | | - # LoopIdiomRecognizePass |
296 | | - # IndVarSimplifyPass |
297 | | - # LoopDeletionPass |
298 | | - # LoopFullUnrollPass |
299 | | - run_jl_pipeline( |
300 | | - pm, |
301 | | - tm; |
302 | | - lower_intrinsics = false, |
303 | | - dump_native = false, |
304 | | - external_use = false, |
305 | | - llvm_only = false, |
306 | | - always_inline = false, |
307 | | - enable_early_simplifications = false, |
308 | | - enable_early_optimizations = false, |
309 | | - enable_scalar_optimizations = false, |
310 | | - enable_loop_optimizations = true, |
311 | | - enable_vector_pipeline = false, |
312 | | - remove_ni = false, |
313 | | - cleanup = false, |
314 | | - ) |
315 | | - end |
| 180 | + ind_var_simplify!(pm) |
| 181 | + loop_deletion!(pm) |
| 182 | + loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll |
316 | 183 | end |
317 | 184 |
|
318 | 185 | @static if VERSION < v"1.11-" |
@@ -860,47 +727,15 @@ function post_optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine, machine::Bool |
860 | 727 | register!(pb, ReinsertGCMarkerPass()) |
861 | 728 | add!(pb, NewPMModulePassManager()) do mpm |
862 | 729 | # TODO(NewPM) |
863 | | - # addTargetPasses!(mpm, tm, LLVM.triple(mod)) |
864 | 730 | # addOptimizationPasses!(mpm, tm) |
865 | | - end |
866 | | - if machine |
867 | | - add!(pb, NewPMModulePassManager()) do mpm |
| 731 | + if machine |
868 | 732 | addJuliaLegalizationPasses_newPM!(mpm, true) |
869 | 733 | addMachinePasses_newPM!(mpm) |
870 | 734 | end |
871 | 735 | end |
872 | 736 | run!(pb, mod, tm) |
873 | 737 | end |
874 | 738 | end |
875 | | - # Wanted to use this but julia_pipeline is not ready for prime time |
876 | | - # @dispose pb = NewPMPassBuilder() begin |
877 | | - # registerEnzymeAndPassPipeline!(pb) |
878 | | - # register!(pb, ReinsertGCMarkerPass()) |
879 | | - |
880 | | - # add!(pb, NewPMModulePassManager()) do mpm |
881 | | - # if machine |
882 | | - # add!(mpm, NewPMFunctionPassManager()) do fpm |
883 | | - # add!(fpm, ReinsertGCMarkerPass()) |
884 | | - # end |
885 | | - # end |
886 | | - |
887 | | - # julia_pipeline(pb, mpm; |
888 | | - # lower_intrinsics = machine, |
889 | | - # dump_native = false, |
890 | | - # external_use = false, |
891 | | - # llvm_only = false, |
892 | | - # always_inline = true, |
893 | | - # enable_early_simplifications = true, |
894 | | - # enable_early_optimizations = true, |
895 | | - # enable_scalar_optimizations = true, |
896 | | - # enable_loop_optimizations = true, |
897 | | - # enable_vector_pipeline = true, |
898 | | - # remove_ni = true, |
899 | | - # cleanup = true, |
900 | | - # ) |
901 | | - # end |
902 | | - # run!(pb, mod, tm) |
903 | | - # end |
904 | 739 | for f in functions(mod) |
905 | 740 | if isempty(blocks(f)) |
906 | 741 | continue |
|
0 commit comments