diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index b0360f1903c0e..97c3ff869edf4 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -114,13 +114,16 @@ #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/CFGuard.h" +#include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LoopTermFold.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include @@ -754,7 +757,12 @@ void CodeGenPassBuilder::addIRPasses( // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableLSR) { - addPass(createFunctionToLoopPassAdaptor(LoopStrengthReducePass(), + LoopPassManager LPM; + LPM.addPass(CanonicalizeFreezeInLoopsPass()); + LPM.addPass(LoopStrengthReducePass()); + if (Opt.EnableLoopTermFold) + LPM.addPass(LoopTermFoldPass()); + addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true)); } @@ -799,7 +807,8 @@ void CodeGenPassBuilder::addIRPasses( addPass(ScalarizeMaskedMemIntrinPass()); // Expand reduction intrinsics into shuffle sequences if the target wants to. - addPass(ExpandReductionsPass()); + if (!Opt.DisableExpandReductions) + addPass(ExpandReductionsPass()); // Convert conditional moves to conditional jumps when profitable. if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize) @@ -877,6 +886,9 @@ void CodeGenPassBuilder::addISelPrepare( if (Opt.RequiresCodeGenSCCOrder) addPass.requireCGSCCOrder(); + if (getOptLevel() != CodeGenOptLevel::None) + addPass(ObjCARCContractPass()); + addPass(CallBrPreparePass()); // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index f29cbe78a1853..8d0a7e61970fa 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -52,6 +52,8 @@ struct CGPassBuilderOption { bool EnableMachineFunctionSplitter = false; bool EnableSinkAndFold = false; bool EnableTailMerge = true; + /// Enable LoopTermFold immediately after LSR. + bool EnableLoopTermFold = false; bool MISchedPostRA = false; bool EarlyLiveIntervals = false; bool GCEmptyBlocks = false; diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h index f68067d935458..f50511c9c0972 100644 --- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h @@ -23,6 +23,7 @@ struct StructurizeCFGPass : PassInfoMixin { function_ref MapClassName2PassName); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LoopSimplify.h b/llvm/include/llvm/Transforms/Utils/LoopSimplify.h index 8f3fa1f2b18ef..d179002fd6a27 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopSimplify.h +++ b/llvm/include/llvm/Transforms/Utils/LoopSimplify.h @@ -54,6 +54,7 @@ class ScalarEvolution; class LoopSimplifyPass : public PassInfoMixin { public: LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Simplify each loop in a loop nest recursively. diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 007b481f84960..10507aab9132e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -90,6 +90,7 @@ class SILowerI1CopiesPass : public PassInfoMixin { SILowerI1CopiesPass() = default; PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &); @@ -368,6 +369,7 @@ class SIModeRegisterPass : public PassInfoMixin { public: SIModeRegisterPass() {} PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; class SIMemoryLegalizerPass : public PassInfoMixin { @@ -480,6 +482,7 @@ class SIAnnotateControlFlowPass public: SIAnnotateControlFlowPass(const AMDGPUTargetMachine &TM) : TM(TM) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 6123d75d7b616..38fde6ee2f4a5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -304,6 +304,7 @@ class AMDGPUISelDAGToDAGPass : public SelectionDAGISelPass { PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; class AMDGPUDAGToDAGISelLegacy : public SelectionDAGISelLegacy { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index c865082a1dcea..8e7bde554bfd9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -104,7 +104,9 @@ #include "llvm/Transforms/Scalar/FlattenCFG.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" +#include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" #include "llvm/Transforms/Scalar/Sink.h" @@ -2062,7 +2064,12 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { // TODO: May want to move later or split into an early and late one. addPass(AMDGPUCodeGenPreparePass(TM)); - // TODO: LICM + // Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may + // have expanded. + if (TM.getOptLevel() > CodeGenOptLevel::Less) { + addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), + /*UseMemorySSA=*/true)); + } } Base::addIRPasses(addPass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h index 2fd98a2ee1a93..d6fb0e53e1169 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h @@ -29,6 +29,7 @@ class AMDGPUUnifyDivergentExitNodesPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.h b/llvm/lib/Target/AMDGPU/GCNNSAReassign.h index 97a72e7ddbb24..4f2abe0dd0086 100644 --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.h +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.h @@ -16,6 +16,7 @@ class GCNNSAReassignPass : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h index 4cd7dea83a061..4c4ac344cb206 100644 --- a/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h +++ b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h @@ -17,6 +17,7 @@ class GCNPreRALongBranchRegPass public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h index b2c3190b5c6ba..4e97128bdc2d5 100644 --- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h +++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h @@ -17,6 +17,7 @@ class GCNRewritePartialRegUsesPass public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.h b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.h index d7551a45887b9..12b87d756e664 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.h +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.h @@ -18,6 +18,7 @@ class SIFixSGPRCopiesPass : public PassInfoMixin { SIFixSGPRCopiesPass() = default; PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h index 7b098b71597ff..0637b5d1750f9 100644 --- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h +++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.h @@ -16,6 +16,7 @@ class SIFixVGPRCopiesPass : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h index 23803c679c246..478558dfbf97f 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h @@ -16,6 +16,7 @@ class SILowerControlFlowPass : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.h b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.h index 64ba3029f1c55..cfc25bba764e9 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.h +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.h @@ -21,6 +21,7 @@ class SILowerSGPRSpillsPass : public PassInfoMixin { // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs. return MachineFunctionProperties().setIsSSA().setNoVRegs(); } + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h index cfc8100901760..5c17a479d953c 100644 --- a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h +++ b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h @@ -16,6 +16,7 @@ class SILowerWWMCopiesPass : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h index 9964817649168..6eae4756d59b2 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h @@ -18,6 +18,7 @@ class SIPreAllocateWWMRegsPass public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h index 87350b810bba7..b49a0fc3f6a4d 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h @@ -20,6 +20,7 @@ class SIWholeQuadModePass : public PassInfoMixin { MachineFunctionProperties getClearedProperties() const { return MachineFunctionProperties().setIsSSA(); } + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 4f81d3599d1de..ceed41f3ed7c5 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -10,9 +10,9 @@ ; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) define void @empty() { ret void diff --git a/llvm/test/Feature/optnone-opt.ll b/llvm/test/Feature/optnone-opt.ll index 83ff946320f53..795c9bd544463 100644 --- a/llvm/test/Feature/optnone-opt.ll +++ b/llvm/test/Feature/optnone-opt.ll @@ -54,7 +54,6 @@ attributes #0 = { optnone noinline } ; Loop IR passes that opt doesn't turn on by default. ; LoopPassManager should not be skipped over an optnone function ; NPM-LOOP-NOT: Skipping pass: PassManager -; NPM-LOOP-DAG: Skipping pass: LoopSimplifyPass on foo ; NPM-LOOP-DAG: Skipping pass: LCSSAPass ; NPM-LOOP-DAG: Skipping pass: IndVarSimplifyPass ; NPM-LOOP-DAG: Skipping pass: SimpleLoopUnswitchPass