Skip to content

Commit f5b20e4

Browse files
committed
[CodeGen][InlineSpiller] Enable subreg spill-restore during RA
The register tuple spilling during RA is suboptimal when we restore the entire tuple before the instructions that has only the subreg use of the original tuple. This will lead to unwanted register pressure and cases of unsuccessful allocation when deal with tests of large value types. This patch allows targets to enable subreg reloads by constructing a super class matching the subreg(s) involved in the MI in question and eventually would improve the allocation by avoiding the unwanted subreg reload components in the first place.
1 parent 0d7728c commit f5b20e4

File tree

107 files changed

+3870
-4464
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+3870
-4464
lines changed

llvm/include/llvm/CodeGen/LiveRangeEdit.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,10 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate {
154154

155155
ArrayRef<Register> regs() const { return ArrayRef(NewRegs).slice(FirstNew); }
156156

157-
/// createFrom - Create a new virtual register based on OldReg.
158-
Register createFrom(Register OldReg);
157+
/// createFrom - Create a new virtual register based on OldReg. If \p RC is
158+
/// non-null, create the new virtual register from it instead. Subreg spills
159+
/// will feed-in a subregclass derived from the regclass of OldReg.
160+
Register createFrom(Register OldReg, const TargetRegisterClass *RC = nullptr);
159161

160162
/// create - Create a new register with the same class and original slot as
161163
/// parent.

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,6 +1215,7 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
12151215
virtual void loadRegFromStackSlot(
12161216
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
12171217
int FrameIndex, const TargetRegisterClass *RC, Register VReg,
1218+
unsigned SubReg = 0,
12181219
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const {
12191220
llvm_unreachable("Target didn't implement "
12201221
"TargetInstrInfo::loadRegFromStackSlot!");

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,8 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
438438
LaneBitmask LaneMask,
439439
SmallVectorImpl<unsigned> &Indexes) const;
440440

441+
unsigned getSubRegIdxFromLaneMask(LaneBitmask LaneMask) const;
442+
441443
/// The lane masks returned by getSubRegIndexLaneMask() above can only be
442444
/// used to determine if sub-registers overlap - they can't be used to
443445
/// determine if a set of sub-registers completely cover another
@@ -1225,6 +1227,11 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
12251227
return true;
12261228
}
12271229

1230+
/// To enable the spill-restore of sub registers during RA. This would
1231+
/// eventually improve the register allocation for the functions that involve
1232+
/// subreg uses of register tuples.
1233+
virtual bool shouldEnableSubRegSpillRestore() const { return false; }
1234+
12281235
/// When prioritizing live ranges in register allocation, if this hook returns
12291236
/// true then the AllocationPriority of the register class will be treated as
12301237
/// more important than whether the range is local to a basic block or global.

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ class InlineSpiller : public Spiller {
217217
bool coalesceStackAccess(MachineInstr *MI, Register Reg);
218218
bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>,
219219
MachineInstr *LoadMI = nullptr);
220-
void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI);
220+
void insertReload(Register VReg, unsigned SubReg, SlotIndex,
221+
MachineBasicBlock::iterator MI);
221222
void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI);
222223

223224
void spillAroundUses(Register Reg);
@@ -1112,14 +1113,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
11121113
return true;
11131114
}
11141115

1115-
void InlineSpiller::insertReload(Register NewVReg,
1116+
void InlineSpiller::insertReload(Register NewVReg, unsigned SubReg,
11161117
SlotIndex Idx,
11171118
MachineBasicBlock::iterator MI) {
11181119
MachineBasicBlock &MBB = *MI->getParent();
11191120

11201121
MachineInstrSpan MIS(MI, &MBB);
11211122
TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
1122-
MRI.getRegClass(NewVReg), Register());
1123+
MRI.getRegClass(NewVReg), Register(), SubReg);
11231124

11241125
LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
11251126

@@ -1248,18 +1249,50 @@ void InlineSpiller::spillAroundUses(Register Reg) {
12481249

12491250
// Create a new virtual register for spill/fill.
12501251
// FIXME: Infer regclass from instruction alone.
1251-
Register NewVReg = Edit->createFrom(Reg);
1252+
1253+
unsigned SubReg = 0;
1254+
LaneBitmask CoveringLanes = LaneBitmask::getNone();
1255+
// Identify the subreg use(s). Skip if the instruction defines the register.
1256+
// For copy bundles, get the covering lane masks.
1257+
if (TRI.shouldEnableSubRegSpillRestore() && !RI.Writes) {
1258+
for (auto [MI, OpIdx] : Ops) {
1259+
const MachineOperand &MO = MI->getOperand(OpIdx);
1260+
assert(MO.isReg() && MO.getReg() == Reg);
1261+
if (MO.isUse()) {
1262+
SubReg = MO.getSubReg();
1263+
CoveringLanes |= TRI.getSubRegIndexLaneMask(SubReg);
1264+
}
1265+
}
1266+
}
1267+
1268+
const TargetRegisterClass *OrigRC = MRI.getRegClass(Reg);
1269+
if (MI.isBundled() && CoveringLanes.any()) {
1270+
CoveringLanes = LaneBitmask(bit_ceil(CoveringLanes.getAsInteger()) - 1);
1271+
// Get the covering subreg index including the missing indices in the
1272+
// identified small range. Even if this is suboptimal, it is advantageous
1273+
// when the higher subreg components are not really involved in the bundle
1274+
// copy as we emit the subreg reload rather than the one for the entire
1275+
// tuple.
1276+
SubReg = TRI.getSubRegIdxFromLaneMask(CoveringLanes);
1277+
}
1278+
1279+
const TargetRegisterClass *NewRC =
1280+
SubReg ? TRI.getSubRegisterClass(OrigRC, SubReg) : nullptr;
1281+
Register NewVReg = Edit->createFrom(Reg, NewRC);
12521282

12531283
if (RI.Reads)
1254-
insertReload(NewVReg, Idx, &MI);
1284+
insertReload(NewVReg, SubReg, Idx, &MI);
12551285

12561286
// Rewrite instruction operands.
12571287
bool hasLiveDef = false;
12581288
for (const auto &OpPair : Ops) {
12591289
MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
12601290
MO.setReg(NewVReg);
12611291
if (MO.isUse()) {
1262-
if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
1292+
if (SubReg && !MI.isBundled())
1293+
MO.setSubReg(0);
1294+
if (!OpPair.first->isRegTiedToDefOperand(OpPair.second) ||
1295+
(SubReg && !MI.isBundled()))
12631296
MO.setIsKill();
12641297
} else {
12651298
if (!MO.isDead())

llvm/lib/CodeGen/LiveRangeEdit.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg,
5252
return LI;
5353
}
5454

55-
Register LiveRangeEdit::createFrom(Register OldReg) {
56-
Register VReg = MRI.cloneVirtualRegister(OldReg);
55+
Register LiveRangeEdit::createFrom(Register OldReg,
56+
const TargetRegisterClass *RC) {
57+
Register VReg =
58+
RC ? MRI.createVirtualRegister(RC) : MRI.cloneVirtualRegister(OldReg);
5759
if (VRM) {
5860
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
5961
}

llvm/lib/CodeGen/TargetRegisterInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,16 @@ TargetRegisterInfo::getRegSizeInBits(Register Reg,
546546
return getRegSizeInBits(*RC);
547547
}
548548

549+
unsigned
550+
TargetRegisterInfo::getSubRegIdxFromLaneMask(LaneBitmask LaneMask) const {
551+
for (unsigned Idx = 1, E = getNumSubRegIndices(); Idx < E; ++Idx) {
552+
if (getSubRegIndexLaneMask(Idx) == LaneMask)
553+
return Idx;
554+
}
555+
556+
return 0 /*NoSubRegister*/;
557+
}
558+
549559
bool TargetRegisterInfo::getCoveringSubRegIndexes(
550560
const TargetRegisterClass *RC, LaneBitmask LaneMask,
551561
SmallVectorImpl<unsigned> &NeededIndexes) const {

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6035,7 +6035,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
60356035
MachineBasicBlock::iterator MBBI,
60366036
Register DestReg, int FI,
60376037
const TargetRegisterClass *RC,
6038-
Register VReg,
6038+
Register VReg, unsigned SubReg,
60396039
MachineInstr::MIFlag Flags) const {
60406040
MachineFunction &MF = *MBB.getParent();
60416041
MachineFrameInfo &MFI = MF.getFrameInfo();

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
368368
void loadRegFromStackSlot(
369369
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
370370
Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
371-
Register VReg,
371+
Register VReg, unsigned SubReg = 0,
372372
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
373373

374374
// This tells target independent code that it is okay to pass instructions

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,14 +1907,30 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
19071907
MachineBasicBlock::iterator MI,
19081908
Register DestReg, int FrameIndex,
19091909
const TargetRegisterClass *RC,
1910-
Register VReg,
1910+
Register VReg, unsigned SubReg,
19111911
MachineInstr::MIFlag Flags) const {
19121912
MachineFunction *MF = MBB.getParent();
19131913
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
19141914
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
19151915
const DebugLoc &DL = MBB.findDebugLoc(MI);
19161916
unsigned SpillSize = RI.getSpillSize(*RC);
19171917

1918+
assert(SubReg != AMDGPU::lo16 && SubReg != AMDGPU::hi16 &&
1919+
"unhandled 16-bit subregister spilling");
1920+
1921+
// For subreg reload, identify the start offset.
1922+
unsigned SubRegIdx =
1923+
SubReg
1924+
? llvm::countr_zero(RI.getSubRegIndexLaneMask(SubReg).getAsInteger())
1925+
: 0;
1926+
// Each subreg consists of two bits in the RegMask. The SubRegIdx should be
1927+
// either zero or an even number. This assert is to ensure we will not have
1928+
// any 16-bit subreg access at this point.
1929+
assert(SubRegIdx % 2 == 0 && "expected an even number for the subreg index");
1930+
1931+
// Now get the actual subreg index.
1932+
SubRegIdx /= 2;
1933+
19181934
MachinePointerInfo PtrInfo
19191935
= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
19201936

@@ -1939,19 +1955,23 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
19391955
if (RI.spillSGPRToVGPR())
19401956
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
19411957
BuildMI(MBB, MI, DL, OpDesc, DestReg)
1942-
.addFrameIndex(FrameIndex) // addr
1943-
.addMemOperand(MMO)
1944-
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
1958+
.addFrameIndex(FrameIndex) // addr
1959+
.addImm(SubRegIdx) // offset
1960+
.addMemOperand(MMO)
1961+
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
19451962

19461963
return;
19471964
}
19481965

1966+
// Convert the subreg index to stack offset.
1967+
SubRegIdx *= 4;
1968+
19491969
unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC,
19501970
SpillSize, *MFI);
19511971
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
19521972
.addFrameIndex(FrameIndex) // vaddr
19531973
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
1954-
.addImm(0) // offset
1974+
.addImm(SubRegIdx) // offset
19551975
.addMemOperand(MMO);
19561976
}
19571977

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
331331
void loadRegFromStackSlot(
332332
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
333333
int FrameIndex, const TargetRegisterClass *RC, Register VReg,
334+
unsigned SubReg = 0,
334335
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
335336

336337
bool expandPostRAPseudo(MachineInstr &MI) const override;

0 commit comments

Comments
 (0)