Skip to content

Commit 9d83e93

Browse files
committed
[CodeGen][InlineSpiller] Enable subreg spill-restore during RA
The register tuple spilling during RA is suboptimal when we restore the entire tuple before the instructions that has only the subreg use of the original tuple. This will lead to unwanted register pressure and cases of unsuccessful allocation when deal with tests of large value types. This patch allows targets to enable subreg reloads by constructing a super class matching the subreg(s) involved in the MI in question and eventually would improve the allocation by avoiding the unwanted subreg reload components in the first place.
1 parent d606bb1 commit 9d83e93

File tree

107 files changed

+3881
-4284
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+3881
-4284
lines changed

llvm/include/llvm/CodeGen/LiveRangeEdit.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,10 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate {
154154

155155
ArrayRef<Register> regs() const { return ArrayRef(NewRegs).slice(FirstNew); }
156156

157-
/// createFrom - Create a new virtual register based on OldReg.
158-
Register createFrom(Register OldReg);
157+
/// createFrom - Create a new virtual register based on OldReg. If \p RC is
158+
/// non-null, create the new virtual register from it instead. Subreg spills
159+
/// will feed-in a subregclass derived from the regclass of OldReg.
160+
Register createFrom(Register OldReg, const TargetRegisterClass *RC = nullptr);
159161

160162
/// create - Create a new register with the same class and original slot as
161163
/// parent.

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1210,7 +1210,7 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
12101210
virtual void loadRegFromStackSlot(
12111211
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
12121212
int FrameIndex, const TargetRegisterClass *RC,
1213-
const TargetRegisterInfo *TRI, Register VReg,
1213+
const TargetRegisterInfo *TRI, Register VReg, unsigned SubReg = 0,
12141214
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const {
12151215
llvm_unreachable("Target didn't implement "
12161216
"TargetInstrInfo::loadRegFromStackSlot!");

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,8 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
438438
LaneBitmask LaneMask,
439439
SmallVectorImpl<unsigned> &Indexes) const;
440440

441+
unsigned getSubRegIdxFromLaneMask(LaneBitmask LaneMask) const;
442+
441443
/// The lane masks returned by getSubRegIndexLaneMask() above can only be
442444
/// used to determine if sub-registers overlap - they can't be used to
443445
/// determine if a set of sub-registers completely cover another
@@ -1225,6 +1227,11 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
12251227
return true;
12261228
}
12271229

1230+
/// To enable the spill-restore of sub registers during RA. This would
1231+
/// eventually improve the register allocation for the functions that involve
1232+
/// subreg uses of register tuples.
1233+
virtual bool shouldEnableSubRegSpillRestore() const { return false; }
1234+
12281235
/// When prioritizing live ranges in register allocation, if this hook returns
12291236
/// true then the AllocationPriority of the register class will be treated as
12301237
/// more important than whether the range is local to a basic block or global.

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ class InlineSpiller : public Spiller {
217217
bool coalesceStackAccess(MachineInstr *MI, Register Reg);
218218
bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>,
219219
MachineInstr *LoadMI = nullptr);
220-
void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI);
220+
void insertReload(Register VReg, unsigned SubReg, SlotIndex,
221+
MachineBasicBlock::iterator MI);
221222
void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI);
222223

223224
void spillAroundUses(Register Reg);
@@ -1112,14 +1113,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
11121113
return true;
11131114
}
11141115

1115-
void InlineSpiller::insertReload(Register NewVReg,
1116+
void InlineSpiller::insertReload(Register NewVReg, unsigned SubReg,
11161117
SlotIndex Idx,
11171118
MachineBasicBlock::iterator MI) {
11181119
MachineBasicBlock &MBB = *MI->getParent();
11191120

11201121
MachineInstrSpan MIS(MI, &MBB);
11211122
TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
1122-
MRI.getRegClass(NewVReg), &TRI, Register());
1123+
MRI.getRegClass(NewVReg), &TRI, Register(), SubReg);
11231124

11241125
LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
11251126

@@ -1248,18 +1249,50 @@ void InlineSpiller::spillAroundUses(Register Reg) {
12481249

12491250
// Create a new virtual register for spill/fill.
12501251
// FIXME: Infer regclass from instruction alone.
1251-
Register NewVReg = Edit->createFrom(Reg);
1252+
1253+
unsigned SubReg = 0;
1254+
LaneBitmask CoveringLanes = LaneBitmask::getNone();
1255+
// Identify the subreg use(s). Skip if the instruction defines the register.
1256+
// For copy bundles, get the covering lane masks.
1257+
if (TRI.shouldEnableSubRegSpillRestore() && !RI.Writes) {
1258+
for (auto [MI, OpIdx] : Ops) {
1259+
const MachineOperand &MO = MI->getOperand(OpIdx);
1260+
assert(MO.isReg() && MO.getReg() == Reg);
1261+
if (MO.isUse()) {
1262+
SubReg = MO.getSubReg();
1263+
CoveringLanes |= TRI.getSubRegIndexLaneMask(SubReg);
1264+
}
1265+
}
1266+
}
1267+
1268+
const TargetRegisterClass *OrigRC = MRI.getRegClass(Reg);
1269+
if (MI.isBundled() && CoveringLanes.any()) {
1270+
CoveringLanes = LaneBitmask(bit_ceil(CoveringLanes.getAsInteger()) - 1);
1271+
// Get the covering subreg index including the missing indices in the
1272+
// identified small range. Even if this is suboptimal, it is advantageous
1273+
// when the higher subreg components are not really involved in the bundle
1274+
// copy as we emit the subreg reload rather than the one for the entire
1275+
// tuple.
1276+
SubReg = TRI.getSubRegIdxFromLaneMask(CoveringLanes);
1277+
}
1278+
1279+
const TargetRegisterClass *NewRC =
1280+
SubReg ? TRI.getSubRegisterClass(OrigRC, SubReg) : nullptr;
1281+
Register NewVReg = Edit->createFrom(Reg, NewRC);
12521282

12531283
if (RI.Reads)
1254-
insertReload(NewVReg, Idx, &MI);
1284+
insertReload(NewVReg, SubReg, Idx, &MI);
12551285

12561286
// Rewrite instruction operands.
12571287
bool hasLiveDef = false;
12581288
for (const auto &OpPair : Ops) {
12591289
MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
12601290
MO.setReg(NewVReg);
12611291
if (MO.isUse()) {
1262-
if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
1292+
if (SubReg && !MI.isBundled())
1293+
MO.setSubReg(0);
1294+
if (!OpPair.first->isRegTiedToDefOperand(OpPair.second) ||
1295+
(SubReg && !MI.isBundled()))
12631296
MO.setIsKill();
12641297
} else {
12651298
if (!MO.isDead())

llvm/lib/CodeGen/LiveRangeEdit.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg,
5252
return LI;
5353
}
5454

55-
Register LiveRangeEdit::createFrom(Register OldReg) {
56-
Register VReg = MRI.cloneVirtualRegister(OldReg);
55+
Register LiveRangeEdit::createFrom(Register OldReg,
56+
const TargetRegisterClass *RC) {
57+
Register VReg =
58+
RC ? MRI.createVirtualRegister(RC) : MRI.cloneVirtualRegister(OldReg);
5759
if (VRM) {
5860
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
5961
}

llvm/lib/CodeGen/TargetRegisterInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,16 @@ TargetRegisterInfo::getRegSizeInBits(Register Reg,
545545
return getRegSizeInBits(*RC);
546546
}
547547

548+
unsigned
549+
TargetRegisterInfo::getSubRegIdxFromLaneMask(LaneBitmask LaneMask) const {
550+
for (unsigned Idx = 1, E = getNumSubRegIndices(); Idx < E; ++Idx) {
551+
if (getSubRegIndexLaneMask(Idx) == LaneMask)
552+
return Idx;
553+
}
554+
555+
return 0 /*NoSubRegister*/;
556+
}
557+
548558
bool TargetRegisterInfo::getCoveringSubRegIndexes(
549559
const TargetRegisterClass *RC, LaneBitmask LaneMask,
550560
SmallVectorImpl<unsigned> &NeededIndexes) const {

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5798,7 +5798,7 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
57985798
void AArch64InstrInfo::loadRegFromStackSlot(
57995799
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
58005800
int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5801-
Register VReg, MachineInstr::MIFlag Flags) const {
5801+
Register VReg, unsigned SubReg, MachineInstr::MIFlag Flags) const {
58025802
MachineFunction &MF = *MBB.getParent();
58035803
MachineFrameInfo &MFI = MF.getFrameInfo();
58045804
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
360360
void loadRegFromStackSlot(
361361
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
362362
Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
363-
const TargetRegisterInfo *TRI, Register VReg,
363+
const TargetRegisterInfo *TRI, Register VReg, unsigned SubReg = 0,
364364
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
365365

366366
// This tells target independent code that it is okay to pass instructions

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,14 +1907,22 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
19071907
Register DestReg, int FrameIndex,
19081908
const TargetRegisterClass *RC,
19091909
const TargetRegisterInfo *TRI,
1910-
Register VReg,
1910+
Register VReg, unsigned SubReg,
19111911
MachineInstr::MIFlag Flags) const {
19121912
MachineFunction *MF = MBB.getParent();
19131913
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
19141914
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
19151915
const DebugLoc &DL = MBB.findDebugLoc(MI);
19161916
unsigned SpillSize = TRI->getSpillSize(*RC);
19171917

1918+
assert(SubReg != AMDGPU::lo16 && SubReg != AMDGPU::hi16 &&
1919+
"unhandled 16-bit subregister spilling");
1920+
// For subreg reload, identify the start offset.
1921+
unsigned Offset =
1922+
SubReg ? llvm::countr_zero(
1923+
RI.getSubRegIndexLaneMask(SubReg).getAsInteger()) /
1924+
2
1925+
: 0;
19181926
MachinePointerInfo PtrInfo
19191927
= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
19201928

@@ -1939,19 +1947,23 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
19391947
if (RI.spillSGPRToVGPR())
19401948
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
19411949
BuildMI(MBB, MI, DL, OpDesc, DestReg)
1942-
.addFrameIndex(FrameIndex) // addr
1943-
.addMemOperand(MMO)
1944-
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
1950+
.addFrameIndex(FrameIndex) // addr
1951+
.addImm(Offset) // offset
1952+
.addMemOperand(MMO)
1953+
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
19451954

19461955
return;
19471956
}
19481957

1958+
// Convert from Word-offset to byte-ffset.
1959+
Offset *= 4;
1960+
19491961
unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC,
19501962
SpillSize, *MFI);
19511963
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
19521964
.addFrameIndex(FrameIndex) // vaddr
19531965
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
1954-
.addImm(0) // offset
1966+
.addImm(Offset) // offset
19551967
.addMemOperand(MMO);
19561968
}
19571969

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
330330
void loadRegFromStackSlot(
331331
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
332332
int FrameIndex, const TargetRegisterClass *RC,
333-
const TargetRegisterInfo *TRI, Register VReg,
333+
const TargetRegisterInfo *TRI, Register VReg, unsigned SubReg = 0,
334334
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
335335

336336
bool expandPostRAPseudo(MachineInstr &MI) const override;

0 commit comments

Comments
 (0)