Skip to content

Commit 30ed1f0

Browse files
committed
Try further tuning
1 parent 73cdfd8 commit 30ed1f0

File tree

1 file changed

+33
-14
lines changed

1 file changed

+33
-14
lines changed

src/pipeline.cpp

+33-14
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <llvm/Passes/PassPlugin.h>
3636

3737
// NewPM needs to manually include all the pass headers
38+
#include <llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
3839
#include <llvm/Transforms/IPO/AlwaysInliner.h>
3940
#include <llvm/Transforms/IPO/Annotation2Metadata.h>
4041
#include <llvm/Transforms/IPO/ConstantMerge.h>
@@ -46,6 +47,7 @@
4647
#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
4748
#include <llvm/Transforms/Scalar/ADCE.h>
4849
#include <llvm/Transforms/Scalar/AnnotationRemarks.h>
50+
#include <llvm/Transforms/Scalar/BDCE.h>
4951
#include <llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
5052
#include <llvm/Transforms/Scalar/DCE.h>
5153
#include <llvm/Transforms/Scalar/DeadStoreElimination.h>
@@ -75,7 +77,9 @@
7577
#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
7678
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
7779
#include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
80+
#include <llvm/Transforms/Utils/LibCallsShrinkWrap.h>
7881
#include <llvm/Transforms/Utils/InjectTLIMappings.h>
82+
#include <llvm/Transforms/Utils/RelLookupTableConverter.h>
7983
#include <llvm/Transforms/Vectorize/LoopVectorize.h>
8084
#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
8185
#include <llvm/Transforms/Vectorize/VectorCombine.h>
@@ -209,10 +213,10 @@ namespace {
209213
.convertSwitchRangeToICmp(true)
210214
.convertSwitchToLookupTable(true)
211215
.forwardSwitchCondToPhi(true)
216+
.needCanonicalLoops(false)
212217
//These mess with loop rotation, so only do them after that
213218
.hoistCommonInsts(true)
214-
// Causes an SRET assertion error in late-gc-lowering
215-
// .sinkCommonInsts(true)
219+
.sinkCommonInsts(true)
216220
;
217221
}
218222
#if JL_LLVM_VERSION < 150000
@@ -357,7 +361,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
357361
if (O.getSpeedupLevel() >= 1) {
358362
#if JL_LLVM_VERSION >= 160000
359363
// TODO check the LLVM 15 default.
360-
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
364+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
361365
#else
362366
FPM.addPass(SROAPass());
363367
#endif
@@ -397,20 +401,23 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
397401
if (O.getSpeedupLevel() >= 2) {
398402
#if JL_LLVM_VERSION >= 160000
399403
// TODO check the LLVM 15 default.
400-
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
404+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
401405
#else
402406
FPM.addPass(SROAPass());
403407
#endif
404408
// SROA can duplicate PHI nodes which can block LowerSIMD
405-
FPM.addPass(InstCombinePass());
409+
FPM.addPass(EarlyCSEPass());
406410
FPM.addPass(JumpThreadingPass());
407411
FPM.addPass(CorrelatedValuePropagationPass());
412+
FPM.addPass(InstCombinePass());
413+
FPM.addPass(AggressiveInstCombinePass());
414+
FPM.addPass(LibCallsShrinkWrapPass());
415+
408416
FPM.addPass(ReassociatePass());
409-
FPM.addPass(EarlyCSEPass());
410417
JULIA_PASS(FPM.addPass(AllocOptPass()));
411418
} else { // if (O.getSpeedupLevel() >= 1) (exactly)
412-
FPM.addPass(InstCombinePass());
413419
FPM.addPass(EarlyCSEPass());
420+
FPM.addPass(InstCombinePass());
414421
}
415422
invokePeepholeEPCallbacks(FPM, PB, O);
416423
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
@@ -472,16 +479,18 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
472479
JULIA_PASS(FPM.addPass(AllocOptPass()));
473480
#if JL_LLVM_VERSION >= 160000
474481
// TODO check the LLVM 15 default.
475-
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
482+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
476483
#else
477484
FPM.addPass(SROAPass());
478485
#endif
486+
FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
479487
FPM.addPass(InstSimplifyPass());
480488
FPM.addPass(GVNPass());
481489
FPM.addPass(MemCpyOptPass());
482490
FPM.addPass(SCCPPass());
491+
FPM.addPass(BDCEPass());
483492
FPM.addPass(CorrelatedValuePropagationPass());
484-
FPM.addPass(DCEPass());
493+
FPM.addPass(ADCEPass());
485494
FPM.addPass(IRCEPass());
486495
FPM.addPass(InstCombinePass());
487496
FPM.addPass(JumpThreadingPass());
@@ -496,11 +505,12 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
496505
JULIA_PASS(FPM.addPass(AllocOptPass()));
497506
{
498507
LoopPassManager LPM;
499-
LPM.addPass(LoopDeletionPass());
500-
LPM.addPass(LoopInstSimplifyPass());
501-
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
508+
LPM.addPass(LICMPass(LICMOptions()));
509+
LPM.addPass(JuliaLICMPass());
510+
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
502511
}
503-
FPM.addPass(LoopDistributePass());
512+
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
513+
FPM.addPass(InstCombinePass());
504514
}
505515
invokeScalarOptimizerCallbacks(FPM, PB, O);
506516
FPM.addPass(AfterScalarOptimizationMarkerPass());
@@ -509,6 +519,13 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
509519
static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
510520
FPM.addPass(BeforeVectorizationMarkerPass());
511521
//TODO look into loop vectorize options
522+
// Rerotate loops that might have been unrotated in the simplification
523+
LoopPassManager LPM;
524+
LPM.addPass(LoopRotatePass());
525+
LPM.addPass(LoopDeletionPass());
526+
FPM.addPass(createFunctionToLoopPassAdaptor(
527+
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
528+
FPM.addPass(LoopDistributePass());
512529
FPM.addPass(InjectTLIMappings());
513530
FPM.addPass(LoopVectorizePass());
514531
FPM.addPass(LoopLoadEliminationPass());
@@ -517,11 +534,13 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim
517534
FPM.addPass(SLPVectorizerPass());
518535
invokeVectorizerCallbacks(FPM, PB, O);
519536
FPM.addPass(VectorCombinePass());
520-
FPM.addPass(ADCEPass());
537+
FPM.addPass(InstCombinePass());
521538
//TODO add BDCEPass here?
522539
// This unroll will unroll vectorized loops
523540
// as well as loops that we tried but failed to vectorize
524541
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
542+
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
543+
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
525544
FPM.addPass(AfterVectorizationMarkerPass());
526545
}
527546

0 commit comments

Comments
 (0)