|
35 | 35 | #include <llvm/Passes/PassPlugin.h>
|
36 | 36 |
|
37 | 37 | // NewPM needs to manually include all the pass headers
|
| 38 | +#include <llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h> |
38 | 39 | #include <llvm/Transforms/IPO/AlwaysInliner.h>
|
39 | 40 | #include <llvm/Transforms/IPO/Annotation2Metadata.h>
|
40 | 41 | #include <llvm/Transforms/IPO/ConstantMerge.h>
|
|
46 | 47 | #include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
|
47 | 48 | #include <llvm/Transforms/Scalar/ADCE.h>
|
48 | 49 | #include <llvm/Transforms/Scalar/AnnotationRemarks.h>
|
| 50 | +#include <llvm/Transforms/Scalar/BDCE.h> |
49 | 51 | #include <llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
|
50 | 52 | #include <llvm/Transforms/Scalar/DCE.h>
|
51 | 53 | #include <llvm/Transforms/Scalar/DeadStoreElimination.h>
|
|
75 | 77 | #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
|
76 | 78 | #include <llvm/Transforms/Scalar/SimplifyCFG.h>
|
77 | 79 | #include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
|
| 80 | +#include <llvm/Transforms/Utils/LibCallsShrinkWrap.h> |
78 | 81 | #include <llvm/Transforms/Utils/InjectTLIMappings.h>
|
| 82 | +#include <llvm/Transforms/Utils/RelLookupTableConverter.h> |
79 | 83 | #include <llvm/Transforms/Vectorize/LoopVectorize.h>
|
80 | 84 | #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
|
81 | 85 | #include <llvm/Transforms/Vectorize/VectorCombine.h>
|
@@ -209,10 +213,10 @@ namespace {
|
209 | 213 | .convertSwitchRangeToICmp(true)
|
210 | 214 | .convertSwitchToLookupTable(true)
|
211 | 215 | .forwardSwitchCondToPhi(true)
|
| 216 | + .needCanonicalLoops(false) |
212 | 217 | //These mess with loop rotation, so only do them after that
|
213 | 218 | .hoistCommonInsts(true)
|
214 |
| - // Causes an SRET assertion error in late-gc-lowering |
215 |
| - // .sinkCommonInsts(true) |
| 219 | + .sinkCommonInsts(true) |
216 | 220 | ;
|
217 | 221 | }
|
218 | 222 |
|
@@ -354,8 +358,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
|
354 | 358 | FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
|
355 | 359 | if (O.getSpeedupLevel() >= 1) {
|
356 | 360 | #if JL_LLVM_VERSION >= 160000
|
357 |
| - // TODO check the LLVM 15 default. |
358 |
| - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); |
| 361 | + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); |
359 | 362 | #else
|
360 | 363 | FPM.addPass(SROAPass());
|
361 | 364 | #endif
|
@@ -396,27 +399,27 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
|
396 | 399 | FunctionPassManager FPM;
|
397 | 400 | if (O.getSpeedupLevel() >= 2) {
|
398 | 401 | #if JL_LLVM_VERSION >= 160000
|
399 |
| - // TODO check the LLVM 15 default. |
400 |
| - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); |
| 402 | + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); |
401 | 403 | #else
|
402 | 404 | FPM.addPass(SROAPass());
|
403 | 405 | #endif
|
404 |
| - // SROA can duplicate PHI nodes which can block LowerSIMD |
405 |
| - FPM.addPass(InstCombinePass()); |
406 |
| - FPM.addPass(JumpThreadingPass()); |
407 |
| - FPM.addPass(CorrelatedValuePropagationPass()); |
408 |
| - FPM.addPass(ReassociatePass()); |
409 |
| - FPM.addPass(EarlyCSEPass()); |
410 |
| - JULIA_PASS(FPM.addPass(AllocOptPass())); |
411 |
| - } else { // if (O.getSpeedupLevel() >= 1) (exactly) |
412 |
| - FPM.addPass(InstCombinePass()); |
413 |
| - FPM.addPass(EarlyCSEPass()); |
414 |
| - } |
415 |
| - invokePeepholeEPCallbacks(FPM, PB, O); |
416 |
| - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); |
417 |
| - } |
418 |
| - MPM.addPass(GlobalDCEPass()); |
| 406 | + // SROA can duplicate PHI nodes which can block LowerSIMD |
| 407 | + FPM.addPass(EarlyCSEPass()); |
| 408 | + FPM.addPass(JumpThreadingPass()); |
| 409 | + FPM.addPass(CorrelatedValuePropagationPass()); |
| 410 | + FPM.addPass(InstCombinePass()); |
| 411 | + FPM.addPass(AggressiveInstCombinePass()); |
| 412 | + FPM.addPass(LibCallsShrinkWrapPass()); |
| 413 | + FPM.addPass(ReassociatePass()); |
| 414 | + JULIA_PASS(FPM.addPass(AllocOptPass())); |
| 415 | + } else { // if (O.getSpeedupLevel() >= 1) (exactly) |
| 416 | + FPM.addPass(EarlyCSEPass()); |
| 417 | + FPM.addPass(InstCombinePass()); |
| 418 | + } |
| 419 | + invokePeepholeEPCallbacks(FPM, PB, O); |
| 420 | + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); |
419 | 421 | }
|
| 422 | + MPM.addPass(GlobalDCEPass()); |
420 | 423 | MPM.addPass(AfterEarlyOptimizationMarkerPass());
|
421 | 424 | }
|
422 | 425 |
|
@@ -471,64 +474,71 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB
|
471 | 474 |
|
472 | 475 | static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
|
473 | 476 | FPM.addPass(BeforeScalarOptimizationMarkerPass());
|
474 |
| - if (options.enable_scalar_optimizations) { |
475 |
| - if (O.getSpeedupLevel() >= 2) { |
476 |
| - JULIA_PASS(FPM.addPass(AllocOptPass())); |
477 |
| - #if JL_LLVM_VERSION >= 160000 |
478 |
| - // TODO check the LLVM 15 default. |
479 |
| - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); |
480 |
| - #else |
481 |
| - FPM.addPass(SROAPass()); |
482 |
| - #endif |
483 |
| - FPM.addPass(InstSimplifyPass()); |
484 |
| - FPM.addPass(GVNPass()); |
485 |
| - FPM.addPass(MemCpyOptPass()); |
486 |
| - FPM.addPass(SCCPPass()); |
487 |
| - FPM.addPass(CorrelatedValuePropagationPass()); |
488 |
| - FPM.addPass(DCEPass()); |
489 |
| - FPM.addPass(IRCEPass()); |
490 |
| - FPM.addPass(InstCombinePass()); |
491 |
| - FPM.addPass(JumpThreadingPass()); |
492 |
| - } |
493 |
| - if (O.getSpeedupLevel() >= 3) { |
494 |
| - FPM.addPass(GVNPass()); |
495 |
| - } |
496 |
| - if (O.getSpeedupLevel() >= 2) { |
497 |
| - FPM.addPass(DSEPass()); |
498 |
| - invokePeepholeEPCallbacks(FPM, PB, O); |
499 |
| - FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); |
500 |
| - JULIA_PASS(FPM.addPass(AllocOptPass())); |
501 |
| - { |
502 |
| - LoopPassManager LPM; |
503 |
| - LPM.addPass(LoopDeletionPass()); |
504 |
| - LPM.addPass(LoopInstSimplifyPass()); |
505 |
| - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); |
506 |
| - } |
507 |
| - FPM.addPass(LoopDistributePass()); |
| 477 | + if (O.getSpeedupLevel() >= 2) { |
| 478 | + JULIA_PASS(FPM.addPass(AllocOptPass())); |
| 479 | +#if JL_LLVM_VERSION >= 160000 |
| 480 | + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); |
| 481 | +#else |
| 482 | + FPM.addPass(SROAPass()); |
| 483 | +#endif |
| 484 | + FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); |
| 485 | + FPM.addPass(InstSimplifyPass()); |
| 486 | + FPM.addPass(GVNPass()); |
| 487 | + FPM.addPass(MemCpyOptPass()); |
| 488 | + FPM.addPass(SCCPPass()); |
| 489 | + FPM.addPass(BDCEPass()); |
| 490 | + FPM.addPass(CorrelatedValuePropagationPass()); |
| 491 | + FPM.addPass(ADCEPass()); |
| 492 | + FPM.addPass(IRCEPass()); |
| 493 | + FPM.addPass(InstCombinePass()); |
| 494 | + FPM.addPass(JumpThreadingPass()); |
| 495 | + } |
| 496 | + if (O.getSpeedupLevel() >= 3) { |
| 497 | + FPM.addPass(GVNPass()); |
| 498 | + } |
| 499 | + if (O.getSpeedupLevel() >= 2) { |
| 500 | + FPM.addPass(DSEPass()); |
| 501 | + invokePeepholeEPCallbacks(FPM, PB, O); |
| 502 | + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); |
| 503 | + JULIA_PASS(FPM.addPass(AllocOptPass())); |
| 504 | + { |
| 505 | + LoopPassManager LPM; |
| 506 | + LPM.addPass(LICMPass(LICMOptions())); |
| 507 | + LPM.addPass(JuliaLICMPass()); |
| 508 | + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); |
508 | 509 | }
|
509 |
| - invokeScalarOptimizerCallbacks(FPM, PB, O); |
| 510 | + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); |
| 511 | + FPM.addPass(InstCombinePass()); |
510 | 512 | }
|
| 513 | + invokeScalarOptimizerCallbacks(FPM, PB, O); |
511 | 514 | FPM.addPass(AfterScalarOptimizationMarkerPass());
|
512 | 515 | }
|
513 | 516 |
|
514 | 517 | static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
|
515 | 518 | FPM.addPass(BeforeVectorizationMarkerPass());
|
516 |
| - if (options.enable_vector_pipeline) { |
517 |
| - //TODO look into loop vectorize options |
518 |
| - FPM.addPass(InjectTLIMappings()); |
519 |
| - FPM.addPass(LoopVectorizePass()); |
520 |
| - FPM.addPass(LoopLoadEliminationPass()); |
521 |
| - FPM.addPass(InstCombinePass()); |
522 |
| - FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); |
523 |
| - FPM.addPass(SLPVectorizerPass()); |
524 |
| - invokeVectorizerCallbacks(FPM, PB, O); |
525 |
| - FPM.addPass(VectorCombinePass()); |
526 |
| - FPM.addPass(ADCEPass()); |
527 |
| - //TODO add BDCEPass here? |
528 |
| - // This unroll will unroll vectorized loops |
529 |
| - // as well as loops that we tried but failed to vectorize |
530 |
| - FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); |
531 |
| - } |
| 519 | + //TODO look into loop vectorize options |
| 520 | + // Rerotate loops that might have been unrotated in the simplification |
| 521 | + LoopPassManager LPM; |
| 522 | + LPM.addPass(LoopRotatePass()); |
| 523 | + LPM.addPass(LoopDeletionPass()); |
| 524 | + FPM.addPass(createFunctionToLoopPassAdaptor( |
| 525 | + std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); |
| 526 | + FPM.addPass(LoopDistributePass()); |
| 527 | + FPM.addPass(InjectTLIMappings()); |
| 528 | + FPM.addPass(LoopVectorizePass()); |
| 529 | + FPM.addPass(LoopLoadEliminationPass()); |
| 530 | + FPM.addPass(InstCombinePass()); |
| 531 | + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); |
| 532 | + FPM.addPass(SLPVectorizerPass()); |
| 533 | + invokeVectorizerCallbacks(FPM, PB, O); |
| 534 | + FPM.addPass(VectorCombinePass()); |
| 535 | + FPM.addPass(InstCombinePass()); |
| 536 | + //TODO add BDCEPass here? |
| 537 | + // This unroll will unroll vectorized loops |
| 538 | + // as well as loops that we tried but failed to vectorize |
| 539 | + FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); |
| 540 | + FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); |
| 541 | + FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
532 | 542 | FPM.addPass(AfterVectorizationMarkerPass());
|
533 | 543 | }
|
534 | 544 |
|
|
0 commit comments