Skip to content

Commit 34167f9

Browse files
authored
RegAlloc: Fix verifier error after failed allocation (llvm#119690)
In some cases after reporting an allocation failure, this would fail the verifier. It picks the first allocatable register and assigns it, but didn't update the liveness appropriately. When VirtRegRewriter relied on the liveness to set kill flags, it would incorrectly add kill flags if there was another overlapping kill of the virtual register. We can't properly assign the register to an overlapping range, so break the liveness of the failing register (and any other interfering registers) instead. Give the virtual register dummy liveness by effectively deleting all the uses by setting them to undef. The edge case not tested here which I'm worried about is if the read of the register is a def of a subregister. I've been unable to come up with a test where this occurs. https://reviews.llvm.org/D122616
1 parent d578dbf commit 34167f9

13 files changed

+152
-24
lines changed

llvm/lib/CodeGen/RegAllocBase.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
6565
Matrix = &mat;
6666
MRI->freezeReservedRegs();
6767
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
68+
FailedVRegs.clear();
6869
}
6970

7071
// Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
128129

129130
// Keep going after reporting the error.
130131
VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
132+
FailedVRegs.insert(VirtReg->reg());
131133
} else if (AvailablePhysReg)
132134
Matrix->assign(*VirtReg, AvailablePhysReg);
133135

@@ -161,6 +163,40 @@ void RegAllocBase::postOptimization() {
161163
DeadRemats.clear();
162164
}
163165

166+
void RegAllocBase::cleanupFailedVRegs() {
167+
SmallSet<Register, 8> JunkRegs;
168+
169+
for (Register FailedReg : FailedVRegs) {
170+
JunkRegs.insert(FailedReg);
171+
172+
MCRegister PhysReg = VRM->getPhys(FailedReg);
173+
LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
174+
175+
// The liveness information for the failed register and anything interfering
176+
// with the physical register we arbitrarily chose is junk and needs to be
177+
// deleted.
178+
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
179+
LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
180+
for (const LiveInterval *InterferingReg : Q.interferingVRegs())
181+
JunkRegs.insert(InterferingReg->reg());
182+
}
183+
}
184+
185+
// TODO: Probably need to set undef on any physreg uses not associated with
186+
// a virtual register.
187+
for (Register JunkReg : JunkRegs) {
188+
// We still should produce valid IR. Kill all the uses and reduce the live
189+
// ranges so that we don't think it's possible to introduce kill flags
190+
// later which will fail the verifier.
191+
for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
192+
if (MO.readsReg())
193+
MO.setIsUndef(true);
194+
}
195+
196+
LIS->shrinkToUses(&LIS->getInterval(JunkReg));
197+
}
198+
}
199+
164200
void RegAllocBase::enqueue(const LiveInterval *LI) {
165201
const Register Reg = LI->reg();
166202

llvm/lib/CodeGen/RegAllocBase.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
3838

3939
#include "llvm/ADT/SmallPtrSet.h"
40+
#include "llvm/ADT/SmallSet.h"
4041
#include "llvm/CodeGen/MachineRegisterInfo.h"
4142
#include "llvm/CodeGen/RegAllocCommon.h"
4243
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
8182
/// always available for the remat of all the siblings of the original reg.
8283
SmallPtrSet<MachineInstr *, 32> DeadRemats;
8384

85+
SmallSet<Register, 2> FailedVRegs;
8486
RegAllocBase(const RegAllocFilterFunc F = nullptr)
8587
: shouldAllocateRegisterImpl(F) {}
8688

@@ -104,6 +106,10 @@ class RegAllocBase {
104106
// rematerialization.
105107
virtual void postOptimization();
106108

109+
/// Perform cleanups on registers that failed to allocate. This hacks on the
110+
/// liveness in order to avoid spurious verifier errors in later passes.
111+
void cleanupFailedVRegs();
112+
107113
// Get a temporary reference to a Spiller instance.
108114
virtual Spiller &spiller() = 0;
109115

llvm/lib/CodeGen/RegAllocBasic.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
329329

330330
allocatePhysRegs();
331331
postOptimization();
332+
cleanupFailedVRegs();
332333

333334
// Diagnostic output before rewriting
334335
LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,6 +2795,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
27952795
if (VerifyEnabled)
27962796
MF->verify(this, "Before post optimization", &errs());
27972797
postOptimization();
2798+
cleanupFailedVRegs();
27982799
reportStats();
27992800

28002801
releaseMemory();

llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
...
1919

20-
# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3
21-
# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
20+
# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3
21+
# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
2222

2323
---
2424
name: foo

llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
2828
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
2929
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
30-
# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31-
# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32-
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
33-
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
30+
# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31+
# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
33+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
3434
# CHECK-NEXT: }
3535
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
3636
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
@@ -118,10 +118,10 @@ body: |
118118
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
119119
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
120120
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
121-
# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122-
# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
123-
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
124-
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
121+
# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122+
# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
123+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
124+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
125125
# CHECK-NEXT: }
126126
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
127127
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0

llvm/test/CodeGen/AMDGPU/issue48473.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
4444

4545
# CHECK-LABEL: name: issue48473
46-
# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
46+
# CHECK: S_NOP 0, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
47+
4748

4849
---
4950
name: issue48473

llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
2-
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
3-
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
4-
5-
; FIXME: Should pass verifier after failure.
1+
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
2+
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
3+
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
64

75
declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32 immarg, i32 immarg, i32 immarg)
86

llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
1-
# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
2-
# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
3-
4-
# FIXME: We should not produce a verifier error after erroring
1+
# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -filetype=null 2>&1 | FileCheck -check-prefix=ERR %s
52

63
# ERR: error: inline assembly requires more registers than available
7-
# VERIFIER: *** Bad machine code: Using an undefined physical register ***
4+
# ERR-NOT: Bad machine code
85

96
# This testcase cannot be compiled with the enforced register
107
# budget. Previously, tryLastChanceRecoloring would assert here. It
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
2+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s
3+
4+
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
5+
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err
6+
7+
# This testcase must fail register allocation. It should also not
8+
# produce a verifier error after doing so. Previously, it would not
9+
# properly update the liveness for the dummy selected register. As a
10+
# result, VirtRegRewriter would incorrectly add kill flags which
11+
# combined with other uses of the physical register produced a
12+
# verifier error.
13+
14+
# ERR: error: <unknown>:0:0: ran out of registers during register allocation
15+
16+
# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
17+
# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
18+
# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
19+
20+
# GREEDY: dead renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
21+
# GREEDY: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
22+
# GREEDY: S_NOP 0, implicit undef renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
23+
# GREEDY: S_NOP 0, implicit killed renamable $vgpr20_vgpr21
24+
25+
26+
# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
27+
# BASIC: SI_SPILL_V256_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
28+
# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
29+
# BASIC: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
30+
# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
31+
# BASIC: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
32+
# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
33+
# BASIC: S_NOP 0, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
34+
# BASIC: renamable $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE
35+
36+
--- |
37+
define void @killed_reg_after_regalloc_failure() #0 {
38+
ret void
39+
}
40+
41+
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
42+
43+
...
44+
---
45+
name: killed_reg_after_regalloc_failure
46+
tracksRegLiveness: true
47+
machineFunctionInfo:
48+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
49+
frameOffsetReg: '$sgpr33'
50+
stackPtrOffsetReg: '$sgpr32'
51+
body: |
52+
bb.0:
53+
S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
54+
S_NOP 0, implicit-def %3:vreg_64
55+
S_NOP 0, implicit %0, implicit %1, implicit %2
56+
S_NOP 0, implicit %3
57+
S_ENDPGM 0
58+
59+
...
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s
2+
3+
; ERR: error: inline assembly requires more registers than available
4+
; ERR-NOT: ERROR
5+
; ERR-NOT: Bad machine code
6+
7+
; This test requires respecting undef on the spill source operand when
8+
; expanding the pseudos to avoid all verifier errors
9+
10+
%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }
11+
12+
define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
13+
%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
14+
%asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
15+
%vgpr0 = extractvalue %asm.output %asm, 0
16+
%vgpr1 = extractvalue %asm.output %asm, 1
17+
%vgpr2 = extractvalue %asm.output %asm, 2
18+
%vgpr3 = extractvalue %asm.output %asm, 3
19+
%vgpr4 = extractvalue %asm.output %asm, 4
20+
call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
21+
call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
22+
call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
23+
call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
24+
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
25+
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
26+
call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
27+
ret void
28+
}
29+
30+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }

llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
1+
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
22

33
; This testcase fails register allocation at the same time it performs
44
; virtual register splitting (by introducing VGPR to AGPR copies). We
@@ -11,7 +11,6 @@
1111
; it takes the first avialable register.
1212

1313
; CHECK: error: <unknown>:0:0: ran out of registers during register allocation
14-
; CHECK: Bad machine code: Using an undefined physical register
1514
define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 {
1615
%agpr0 = call float asm sideeffect "; def $0", "=${a0}"()
1716
%agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0

llvm/test/CodeGen/X86/inline-asm-assertion.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: not llc -verify-machineinstrs -O0 < %s 2>&1 | FileCheck %s
2-
; RUN: not --crash llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
2+
; RUN: not llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
33
; CHECK: error: inline assembly requires more registers than available
44
; CHECK: .size main, .Lfunc_end0-main
55
; CHECK-O2: error: inline assembly requires more registers than available

0 commit comments

Comments
 (0)