Skip to content

Commit 96c4f97

Browse files
authored
[AMDGPU][NewPM] Port SIOptimizeExecMasking to NPM (#123572)
1 parent 0f8297a commit 96c4f97

8 files changed

+71
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,8 @@ extern char &SIPreEmitPeepholeID;
216216
void initializeSILateBranchLoweringPass(PassRegistry &);
217217
extern char &SILateBranchLoweringPassID;
218218

219-
void initializeSIOptimizeExecMaskingPass(PassRegistry &);
220-
extern char &SIOptimizeExecMaskingID;
219+
void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
220+
extern char &SIOptimizeExecMaskingLegacyID;
221221

222222
void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
223223
extern char &SIPreAllocateWWMRegsLegacyID;

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
105105
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
106106
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
107107
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
108+
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
108109
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
109110
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
110111
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "SILowerSGPRSpills.h"
4343
#include "SIMachineFunctionInfo.h"
4444
#include "SIMachineScheduler.h"
45+
#include "SIOptimizeExecMasking.h"
4546
#include "SIOptimizeVGPRLiveRange.h"
4647
#include "SIPeepholeSDWA.h"
4748
#include "SIPreAllocateWWMRegs.h"
@@ -528,7 +529,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
528529
initializeSIPreEmitPeepholePass(*PR);
529530
initializeSILateBranchLoweringPass(*PR);
530531
initializeSIMemoryLegalizerPass(*PR);
531-
initializeSIOptimizeExecMaskingPass(*PR);
532+
initializeSIOptimizeExecMaskingLegacyPass(*PR);
532533
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
533534
initializeSIFormMemoryClausesPass(*PR);
534535
initializeSIPostRABundlerPass(*PR);
@@ -1634,7 +1635,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
16341635
void GCNPassConfig::addPostRegAlloc() {
16351636
addPass(&SIFixVGPRCopiesID);
16361637
if (getOptLevel() > CodeGenOptLevel::None)
1637-
addPass(&SIOptimizeExecMaskingID);
1638+
addPass(&SIOptimizeExecMaskingLegacyID);
16381639
TargetPassConfig::addPostRegAlloc();
16391640
}
16401641

@@ -2105,6 +2106,13 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
21052106
addPass(SIShrinkInstructionsPass());
21062107
}
21072108

2109+
void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
2110+
// addPass(SIFixVGPRCopiesID);
2111+
if (TM.getOptLevel() > CodeGenOptLevel::None)
2112+
addPass(SIOptimizeExecMaskingPass());
2113+
Base::addPostRegAlloc(addPass);
2114+
}
2115+
21082116
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
21092117
CodeGenOptLevel Level) const {
21102118
if (Opt.getNumOccurrences())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

+1
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ class AMDGPUCodeGenPassBuilder
176176
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
177177
Error addInstSelector(AddMachinePass &) const;
178178
void addMachineSSAOptimization(AddMachinePass &) const;
179+
void addPostRegAlloc(AddMachinePass &) const;
179180

180181
/// Check if a pass is enabled given \p Opt option. The option always
181182
/// overrides defaults if explicitly used. Otherwise its default will be used

llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp

+31-8
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "SIOptimizeExecMasking.h"
910
#include "AMDGPU.h"
1011
#include "GCNSubtarget.h"
1112
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -23,7 +24,7 @@ using namespace llvm;
2324

2425
namespace {
2526

26-
class SIOptimizeExecMasking : public MachineFunctionPass {
27+
class SIOptimizeExecMasking {
2728
MachineFunction *MF = nullptr;
2829
const GCNSubtarget *ST = nullptr;
2930
const SIRegisterInfo *TRI = nullptr;
@@ -61,11 +62,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
6162
void tryRecordOrSaveexecXorSequence(MachineInstr &MI);
6263
bool optimizeOrSaveexecXorSequences();
6364

65+
public:
66+
bool run(MachineFunction &MF);
67+
};
68+
69+
class SIOptimizeExecMaskingLegacy : public MachineFunctionPass {
6470
public:
6571
static char ID;
6672

67-
SIOptimizeExecMasking() : MachineFunctionPass(ID) {
68-
initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry());
73+
SIOptimizeExecMaskingLegacy() : MachineFunctionPass(ID) {
74+
initializeSIOptimizeExecMaskingLegacyPass(*PassRegistry::getPassRegistry());
6975
}
7076

7177
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -82,15 +88,28 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
8288

8389
} // End anonymous namespace.
8490

85-
INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,
91+
PreservedAnalyses
92+
SIOptimizeExecMaskingPass::run(MachineFunction &MF,
93+
MachineFunctionAnalysisManager &) {
94+
SIOptimizeExecMasking Impl;
95+
96+
if (!Impl.run(MF))
97+
return PreservedAnalyses::all();
98+
99+
auto PA = getMachineFunctionPassPreservedAnalyses();
100+
PA.preserveSet<CFGAnalyses>();
101+
return PA;
102+
}
103+
104+
INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingLegacy, DEBUG_TYPE,
86105
"SI optimize exec mask operations", false, false)
87106
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
88-
INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE,
107+
INITIALIZE_PASS_END(SIOptimizeExecMaskingLegacy, DEBUG_TYPE,
89108
"SI optimize exec mask operations", false, false)
90109

91-
char SIOptimizeExecMasking::ID = 0;
110+
char SIOptimizeExecMaskingLegacy::ID = 0;
92111

93-
char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
112+
char &llvm::SIOptimizeExecMaskingLegacyID = SIOptimizeExecMaskingLegacy::ID;
94113

95114
/// If \p MI is a copy from exec, return the register copied to.
96115
Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const {
@@ -786,10 +805,14 @@ bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() {
786805
return Changed;
787806
}
788807

789-
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
808+
bool SIOptimizeExecMaskingLegacy::runOnMachineFunction(MachineFunction &MF) {
790809
if (skipFunction(MF.getFunction()))
791810
return false;
792811

812+
return SIOptimizeExecMasking().run(MF);
813+
}
814+
815+
bool SIOptimizeExecMasking::run(MachineFunction &MF) {
793816
this->MF = &MF;
794817
ST = &MF.getSubtarget<GCNSubtarget>();
795818
TRI = ST->getRegisterInfo();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===- SIOptimizeExecMasking.h ----------------------------------*- C++- *-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
10+
#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
class SIOptimizeExecMaskingPass
16+
: public PassInfoMixin<SIOptimizeExecMaskingPass> {
17+
public:
18+
PreservedAnalyses run(MachineFunction &MF,
19+
MachineFunctionAnalysisManager &MFAM);
20+
};
21+
} // namespace llvm
22+
23+
#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H

llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
33
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
44

5+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
6+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
57
---
68
name: lower_term_opcodes
79
tracksRegLiveness: false

llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -passes=si-optimize-exec-masking -o - %s | FileCheck %s
34

45
# Make sure we can still optimize writes to exec when there are
56
# additional terminators after the exec write. This can happen with

0 commit comments

Comments
 (0)