Skip to content

Commit 1d22e85

Browse files
committed
Try further tuning
1 parent 4b1bbeb commit 1d22e85

File tree

1 file changed

+82
-72
lines changed

1 file changed

+82
-72
lines changed

src/pipeline.cpp

Lines changed: 82 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <llvm/Passes/PassPlugin.h>
3636

3737
// NewPM needs to manually include all the pass headers
38+
#include <llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
3839
#include <llvm/Transforms/IPO/AlwaysInliner.h>
3940
#include <llvm/Transforms/IPO/Annotation2Metadata.h>
4041
#include <llvm/Transforms/IPO/ConstantMerge.h>
@@ -46,6 +47,7 @@
4647
#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
4748
#include <llvm/Transforms/Scalar/ADCE.h>
4849
#include <llvm/Transforms/Scalar/AnnotationRemarks.h>
50+
#include <llvm/Transforms/Scalar/BDCE.h>
4951
#include <llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
5052
#include <llvm/Transforms/Scalar/DCE.h>
5153
#include <llvm/Transforms/Scalar/DeadStoreElimination.h>
@@ -75,7 +77,9 @@
7577
#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
7678
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
7779
#include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
80+
#include <llvm/Transforms/Utils/LibCallsShrinkWrap.h>
7881
#include <llvm/Transforms/Utils/InjectTLIMappings.h>
82+
#include <llvm/Transforms/Utils/RelLookupTableConverter.h>
7983
#include <llvm/Transforms/Vectorize/LoopVectorize.h>
8084
#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
8185
#include <llvm/Transforms/Vectorize/VectorCombine.h>
@@ -209,10 +213,10 @@ namespace {
209213
.convertSwitchRangeToICmp(true)
210214
.convertSwitchToLookupTable(true)
211215
.forwardSwitchCondToPhi(true)
216+
.needCanonicalLoops(false)
212217
//These mess with loop rotation, so only do them after that
213218
.hoistCommonInsts(true)
214-
// Causes an SRET assertion error in late-gc-lowering
215-
// .sinkCommonInsts(true)
219+
.sinkCommonInsts(true)
216220
;
217221
}
218222

@@ -354,8 +358,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
354358
FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
355359
if (O.getSpeedupLevel() >= 1) {
356360
#if JL_LLVM_VERSION >= 160000
357-
// TODO check the LLVM 15 default.
358-
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
361+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
359362
#else
360363
FPM.addPass(SROAPass());
361364
#endif
@@ -396,27 +399,27 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
396399
FunctionPassManager FPM;
397400
if (O.getSpeedupLevel() >= 2) {
398401
#if JL_LLVM_VERSION >= 160000
399-
// TODO check the LLVM 15 default.
400-
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
402+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
401403
#else
402404
FPM.addPass(SROAPass());
403405
#endif
404-
// SROA can duplicate PHI nodes which can block LowerSIMD
405-
FPM.addPass(InstCombinePass());
406-
FPM.addPass(JumpThreadingPass());
407-
FPM.addPass(CorrelatedValuePropagationPass());
408-
FPM.addPass(ReassociatePass());
409-
FPM.addPass(EarlyCSEPass());
410-
JULIA_PASS(FPM.addPass(AllocOptPass()));
411-
} else { // if (O.getSpeedupLevel() >= 1) (exactly)
412-
FPM.addPass(InstCombinePass());
413-
FPM.addPass(EarlyCSEPass());
414-
}
415-
invokePeepholeEPCallbacks(FPM, PB, O);
416-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
417-
}
418-
MPM.addPass(GlobalDCEPass());
406+
// SROA can duplicate PHI nodes which can block LowerSIMD
407+
FPM.addPass(EarlyCSEPass());
408+
FPM.addPass(JumpThreadingPass());
409+
FPM.addPass(CorrelatedValuePropagationPass());
410+
FPM.addPass(InstCombinePass());
411+
FPM.addPass(AggressiveInstCombinePass());
412+
FPM.addPass(LibCallsShrinkWrapPass());
413+
FPM.addPass(ReassociatePass());
414+
JULIA_PASS(FPM.addPass(AllocOptPass()));
415+
} else { // if (O.getSpeedupLevel() >= 1) (exactly)
416+
FPM.addPass(EarlyCSEPass());
417+
FPM.addPass(InstCombinePass());
418+
}
419+
invokePeepholeEPCallbacks(FPM, PB, O);
420+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
419421
}
422+
MPM.addPass(GlobalDCEPass());
420423
MPM.addPass(AfterEarlyOptimizationMarkerPass());
421424
}
422425

@@ -471,64 +474,71 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB
471474

472475
static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
473476
FPM.addPass(BeforeScalarOptimizationMarkerPass());
474-
if (options.enable_scalar_optimizations) {
475-
if (O.getSpeedupLevel() >= 2) {
476-
JULIA_PASS(FPM.addPass(AllocOptPass()));
477-
#if JL_LLVM_VERSION >= 160000
478-
// TODO check the LLVM 15 default.
479-
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
480-
#else
481-
FPM.addPass(SROAPass());
482-
#endif
483-
FPM.addPass(InstSimplifyPass());
484-
FPM.addPass(GVNPass());
485-
FPM.addPass(MemCpyOptPass());
486-
FPM.addPass(SCCPPass());
487-
FPM.addPass(CorrelatedValuePropagationPass());
488-
FPM.addPass(DCEPass());
489-
FPM.addPass(IRCEPass());
490-
FPM.addPass(InstCombinePass());
491-
FPM.addPass(JumpThreadingPass());
492-
}
493-
if (O.getSpeedupLevel() >= 3) {
494-
FPM.addPass(GVNPass());
495-
}
496-
if (O.getSpeedupLevel() >= 2) {
497-
FPM.addPass(DSEPass());
498-
invokePeepholeEPCallbacks(FPM, PB, O);
499-
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
500-
JULIA_PASS(FPM.addPass(AllocOptPass()));
501-
{
502-
LoopPassManager LPM;
503-
LPM.addPass(LoopDeletionPass());
504-
LPM.addPass(LoopInstSimplifyPass());
505-
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
506-
}
507-
FPM.addPass(LoopDistributePass());
477+
if (O.getSpeedupLevel() >= 2) {
478+
JULIA_PASS(FPM.addPass(AllocOptPass()));
479+
#if JL_LLVM_VERSION >= 160000
480+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
481+
#else
482+
FPM.addPass(SROAPass());
483+
#endif
484+
FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
485+
FPM.addPass(InstSimplifyPass());
486+
FPM.addPass(GVNPass());
487+
FPM.addPass(MemCpyOptPass());
488+
FPM.addPass(SCCPPass());
489+
FPM.addPass(BDCEPass());
490+
FPM.addPass(CorrelatedValuePropagationPass());
491+
FPM.addPass(ADCEPass());
492+
FPM.addPass(IRCEPass());
493+
FPM.addPass(InstCombinePass());
494+
FPM.addPass(JumpThreadingPass());
495+
}
496+
if (O.getSpeedupLevel() >= 3) {
497+
FPM.addPass(GVNPass());
498+
}
499+
if (O.getSpeedupLevel() >= 2) {
500+
FPM.addPass(DSEPass());
501+
invokePeepholeEPCallbacks(FPM, PB, O);
502+
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
503+
JULIA_PASS(FPM.addPass(AllocOptPass()));
504+
{
505+
LoopPassManager LPM;
506+
LPM.addPass(LICMPass(LICMOptions()));
507+
LPM.addPass(JuliaLICMPass());
508+
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
508509
}
509-
invokeScalarOptimizerCallbacks(FPM, PB, O);
510+
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
511+
FPM.addPass(InstCombinePass());
510512
}
513+
invokeScalarOptimizerCallbacks(FPM, PB, O);
511514
FPM.addPass(AfterScalarOptimizationMarkerPass());
512515
}
513516

514517
static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
515518
FPM.addPass(BeforeVectorizationMarkerPass());
516-
if (options.enable_vector_pipeline) {
517-
//TODO look into loop vectorize options
518-
FPM.addPass(InjectTLIMappings());
519-
FPM.addPass(LoopVectorizePass());
520-
FPM.addPass(LoopLoadEliminationPass());
521-
FPM.addPass(InstCombinePass());
522-
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
523-
FPM.addPass(SLPVectorizerPass());
524-
invokeVectorizerCallbacks(FPM, PB, O);
525-
FPM.addPass(VectorCombinePass());
526-
FPM.addPass(ADCEPass());
527-
//TODO add BDCEPass here?
528-
// This unroll will unroll vectorized loops
529-
// as well as loops that we tried but failed to vectorize
530-
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
531-
}
519+
//TODO look into loop vectorize options
520+
// Rerotate loops that might have been unrotated in the simplification
521+
LoopPassManager LPM;
522+
LPM.addPass(LoopRotatePass());
523+
LPM.addPass(LoopDeletionPass());
524+
FPM.addPass(createFunctionToLoopPassAdaptor(
525+
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
526+
FPM.addPass(LoopDistributePass());
527+
FPM.addPass(InjectTLIMappings());
528+
FPM.addPass(LoopVectorizePass());
529+
FPM.addPass(LoopLoadEliminationPass());
530+
FPM.addPass(InstCombinePass());
531+
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
532+
FPM.addPass(SLPVectorizerPass());
533+
invokeVectorizerCallbacks(FPM, PB, O);
534+
FPM.addPass(VectorCombinePass());
535+
FPM.addPass(InstCombinePass());
536+
//TODO add BDCEPass here?
537+
// This unroll will unroll vectorized loops
538+
// as well as loops that we tried but failed to vectorize
539+
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
540+
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
541+
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
532542
FPM.addPass(AfterVectorizationMarkerPass());
533543
}
534544

0 commit comments

Comments
 (0)