Skip to content

Commit 0f0d3fb

Browse files
authored
[AMDGPU] Do not allow M0 as v_readlane_b32 dst (#128867)
See #128851 - this is the same patch, but for v_readlane_b32. This instruction is used much less often so there were less changes required.
1 parent aace6a2 commit 0f0d3fb

12 files changed

+26
-20
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -4997,7 +4997,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
49974997
Register NewActiveBitsReg = MRI.createVirtualRegister(WaveMaskRegClass);
49984998

49994999
Register FF1Reg = MRI.createVirtualRegister(DstRegClass);
5000-
Register LaneValueReg = MRI.createVirtualRegister(DstRegClass);
5000+
Register LaneValueReg =
5001+
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
50015002

50025003
bool IsWave32 = ST.isWave32();
50035004
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21792179

21802180
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
21812181
MI.setDesc(get(AMDGPU::V_READLANE_B32));
2182+
MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
2183+
&AMDGPU::SReg_32_XM0RegClass);
21822184
break;
21832185

21842186
case AMDGPU::V_MOV_B64_PSEUDO: {

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
22022202
// Don't need to write VGPR out.
22032203
}
22042204

2205+
MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2206+
22052207
// Restore clobbered registers in the specified restore block.
22062208
MI = RestoreMBB.end();
22072209
SB.setMI(&RestoreMBB, MI);
@@ -2216,6 +2218,7 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
22162218
SB.NumSubRegs == 1
22172219
? SB.SuperReg
22182220
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
2221+
MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
22192222
bool LastSubReg = (i + 1 == e);
22202223
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
22212224
SubReg)

llvm/lib/Target/AMDGPU/VOP2Instructions.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
763763
}
764764

765765
def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
766-
let Outs32 = (outs SReg_32:$vdst);
766+
let Outs32 = (outs SReg_32_XM0:$vdst);
767767
let Outs64 = Outs32;
768768
let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1);
769769
let Ins64 = Ins32;

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
103103
; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
104104
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
105105
; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
106-
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
106+
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
107107
; GFX908-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
108108
; GFX908-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
109109
; GFX908-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -175,7 +175,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
175175
; GFX90A-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
176176
; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
177177
; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
178-
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
178+
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
179179
; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
180180
; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
181181
; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -247,7 +247,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
247247
; GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
248248
; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
249249
; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
250-
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
250+
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
251251
; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
252252
; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec
253253
; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
9797
; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
9898
; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
9999
; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
100-
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
100+
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
101101
; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
102102
; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec
103103
; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -188,7 +188,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
188188
; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
189189
; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
190190
; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
191-
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
191+
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]]
192192
; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]]
193193
; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec
194194
; GFX942-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -274,11 +274,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
274274
; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
275275
; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
276276
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
277-
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_3]]
277+
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_3]]
278278
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
279279
; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READLANE_B32_]], [[S_MOV_B32_4]], [[V_MOV_B32_dpp5]]
280280
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 31
281-
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]]
281+
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]]
282282
; GFX11-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_1]]
283283
; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY14]], implicit $exec
284284
; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]

llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
7474
; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
7575
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
7676
; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
77-
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
77+
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
7878
; GFX908-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
7979
; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
8080
; GFX908-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -136,7 +136,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
136136
; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
137137
; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
138138
; GFX90A_GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
139-
; GFX90A_GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
139+
; GFX90A_GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
140140
; GFX90A_GFX942-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
141141
; GFX90A_GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
142142
; GFX90A_GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec

llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
7777
; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
7878
; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
7979
; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
80-
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
80+
; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
8181
; GFX90A-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
8282
; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
8383
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
@@ -159,7 +159,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
159159
; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
160160
; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec
161161
; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63
162-
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
162+
; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]]
163163
; GFX942-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
164164
; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
165165
; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]
@@ -239,11 +239,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
239239
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
240240
; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
241241
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15
242-
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_2]]
242+
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_2]]
243243
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16
244244
; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed [[V_READLANE_B32_]], killed [[S_MOV_B32_3]], [[V_MOV_B32_dpp5]]
245245
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 31
246-
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_4]]
246+
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_4]]
247247
; GFX11-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_1]], implicit $exec
248248
; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_LO_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
249249
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]]

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ body: |
5858
; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.2
5959
; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %11, %bb.2
6060
; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]]
61-
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
61+
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
6262
; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sgpr_32 = S_MAX_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc
6363
; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]]
6464
; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ body: |
5858
; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.2
5959
; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %11, %bb.2
6060
; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]]
61-
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
61+
; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]]
6262
; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sgpr_32 = S_MIN_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc
6363
; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]]
6464
; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc

llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ body: |
242242
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]]
243243
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]]
244244
; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY8]]
245-
; CHECK-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]]
245+
; CHECK-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]]
246246
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY7]], [[V_READLANE_B32_]], implicit-def dead $scc
247247
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 1, [[S_FF1_I32_B32_]], implicit-def dead $scc
248248
; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[COPY8]], [[S_LSHL_B32_]], implicit-def dead $scc
@@ -311,7 +311,7 @@ body: |
311311
%2:sreg_32 = COPY killed %25
312312
%3:sreg_32 = COPY killed %26
313313
%14:sreg_32 = S_FF1_I32_B32 %3
314-
%15:sreg_32 = V_READLANE_B32 %8, %14
314+
%15:sreg_32_xm0 = V_READLANE_B32 %8, %14
315315
%4:sreg_32 = S_ADD_I32 killed %2, killed %15, implicit-def dead $scc
316316
%17:sreg_32 = S_LSHL_B32 1, killed %14, implicit-def dead $scc
317317
%5:sreg_32 = S_ANDN2_B32 killed %3, killed %17, implicit-def dead $scc

llvm/test/CodeGen/AMDGPU/wqm.mir

+1-1
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ body: |
213213
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
214214
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
215215
%11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
216-
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
216+
%12:sreg_32_xm0 = V_READLANE_B32 %11:vgpr_32, 63
217217
early-clobber %13:vgpr_32 = STRICT_WWM %9:vgpr_32, implicit $exec
218218
219219
BUFFER_STORE_DWORD_OFFSET_exact killed %13, %4, %5, 4, 0, 0, implicit $exec

0 commit comments

Comments
 (0)