Skip to content

Commit 3bf2052

Browse files
AMDGPU/GlobalISel: add RegBankLegalize rules for bitfield extract
Divergent S32 instruction is available, for S64 need to lower to S32. Uniform instructions available for both S32 and S64 but need to pack bitfield offset and size of bitfield into S32. Uniform instruction is straight up selected since there is no available isel pattern.
1 parent ac09b78 commit 3bf2052

8 files changed

+215
-68
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

+119
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@
1515
#include "AMDGPUGlobalISelUtils.h"
1616
#include "AMDGPUInstrInfo.h"
1717
#include "AMDGPURegisterBankInfo.h"
18+
#include "GCNSubtarget.h"
1819
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20+
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1921
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2022
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
23+
#include "llvm/IR/IntrinsicsAMDGPU.h"
24+
#include "llvm/Support/ErrorHandling.h"
2125

2226
#define DEBUG_TYPE "amdgpu-regbanklegalize"
2327

@@ -127,6 +131,117 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
127131
MI.eraseFromParent();
128132
}
129133

134+
bool isSignedBFE(MachineInstr &MI) {
135+
unsigned Opc =
136+
isa<GIntrinsic>(MI) ? MI.getOperand(1).getIntrinsicID() : MI.getOpcode();
137+
138+
switch (Opc) {
139+
case AMDGPU::G_SBFX:
140+
case Intrinsic::amdgcn_sbfe:
141+
return true;
142+
case AMDGPU::G_UBFX:
143+
case Intrinsic::amdgcn_ubfe:
144+
return false;
145+
default:
146+
llvm_unreachable("Opcode not supported");
147+
}
148+
}
149+
150+
void RegBankLegalizeHelper::lowerDiv_BFE(MachineInstr &MI) {
151+
Register Dst = MI.getOperand(0).getReg();
152+
assert(MRI.getType(Dst) == LLT::scalar(64));
153+
bool Signed = isSignedBFE(MI);
154+
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
155+
// Extract bitfield from Src, LSBit is the least-significant bit for the
156+
// extraction (field offset) and Width is size of bitfield.
157+
Register Src = MI.getOperand(FirstOpnd).getReg();
158+
Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
159+
Register Width = MI.getOperand(FirstOpnd + 2).getReg();
160+
// Comments are for signed bitfield extract, similar for unsigned. x is sign
161+
// bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
162+
163+
// Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
164+
unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
165+
auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
166+
167+
auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
168+
169+
// Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
170+
// << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
171+
// >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
172+
if (!ConstWidth) {
173+
auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
174+
auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
175+
B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
176+
MI.eraseFromParent();
177+
return;
178+
}
179+
180+
auto WidthImm = ConstWidth->Value.getZExtValue();
181+
auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
182+
Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
183+
Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
184+
auto Zero = B.buildConstant({VgprRB, S32}, 0);
185+
unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
186+
187+
if (WidthImm <= 32) {
188+
// SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
189+
auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
190+
MachineInstrBuilder Hi;
191+
if (Signed) {
192+
// SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
193+
Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
194+
} else {
195+
// SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
196+
Hi = Zero;
197+
}
198+
B.buildMergeLikeInstr(Dst, {Lo, Hi});
199+
} else {
200+
auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
201+
// SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
202+
auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
203+
B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
204+
}
205+
206+
MI.eraseFromParent();
207+
return;
208+
}
209+
210+
void RegBankLegalizeHelper::lowerUni_BFE(MachineInstr &MI) {
211+
Register DstReg = MI.getOperand(0).getReg();
212+
LLT Ty = MRI.getType(DstReg);
213+
bool Signed = isSignedBFE(MI);
214+
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
215+
Register Src = MI.getOperand(FirstOpnd).getReg();
216+
Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
217+
Register Width = MI.getOperand(FirstOpnd + 2).getReg();
218+
// For uniform bit field extract there are 4 available instructions, but
219+
// LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
220+
// field offset in low and size in high 16 bits.
221+
222+
// Src1 Hi16|Lo16 = Size|FieldOffset
223+
auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
224+
auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
225+
auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
226+
auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
227+
unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
228+
unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
229+
unsigned Opc = Ty == S32 ? Opc32 : Opc64;
230+
231+
// Select machine instruction, because of reg class constraining, insert
232+
// copies from reg class to reg bank.
233+
auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
234+
{B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
235+
const GCNSubtarget &ST = B.getMF().getSubtarget<GCNSubtarget>();
236+
if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
237+
*ST.getRegisterInfo(), RBI))
238+
llvm_unreachable("failed to constrain BFE");
239+
240+
B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
241+
MI.eraseFromParent();
242+
return;
243+
}
244+
130245
void RegBankLegalizeHelper::lower(MachineInstr &MI,
131246
const RegBankLLTMapping &Mapping,
132247
SmallSet<Register, 4> &WaterfallSgprs) {
@@ -225,6 +340,10 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
225340
MI.eraseFromParent();
226341
break;
227342
}
343+
case Div_BFE:
344+
return lowerDiv_BFE(MI);
345+
case Uni_BFE:
346+
return lowerUni_BFE(MI);
228347
case SplitLoad: {
229348
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
230349
unsigned Size = DstTy.getSizeInBits();

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

+3
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ class RegBankLegalizeHelper {
108108

109109
void lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
110110
SmallSet<Register, 4> &SgprWaterfallOperandRegs);
111+
112+
void lowerDiv_BFE(MachineInstr &MI);
113+
void lowerUni_BFE(MachineInstr &MI);
111114
};
112115

113116
} // end namespace AMDGPU

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
450450
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
451451
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
452452

453+
addRulesForGOpcs({G_LSHR}, Standard).Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}});
454+
455+
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
456+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, Uni_BFE})
457+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
458+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, Uni_BFE})
459+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, Div_BFE});
460+
453461
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
454462
// and G_FREEZE here, rest is trivially regbankselected earlier
455463
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
@@ -628,6 +636,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
628636
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
629637

630638
addRulesForIOpcs({amdgcn_readfirstlane})
631-
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
639+
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
640+
// this should not exist in the first place, it is from call lowering
641+
// readfirstlaning just in case register is not in sgpr.
642+
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
632643

633644
} // end initialize rules

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

+2
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ enum LoweringMethodID {
168168
DoNotLower,
169169
VccExtToSel,
170170
UniExtToSel,
171+
Uni_BFE,
172+
Div_BFE,
171173
VgprToVccCopy,
172174
SplitTo32,
173175
Ext32To64,

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir

+36-30
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s
43

54
...
65

@@ -96,12 +95,11 @@ body: |
9695
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
9796
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
9897
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32)
99-
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
100-
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
98+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
10199
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
102100
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
103101
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
104-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
102+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
105103
%0:_(s64) = COPY $vgpr0_vgpr1
106104
%1:_(s32) = COPY $vgpr2
107105
%2:_(s32) = COPY $vgpr3
@@ -124,12 +122,11 @@ body: |
124122
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
125123
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
126124
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32)
127-
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
128-
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
125+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
129126
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
130127
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
131128
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
132-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
129+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
133130
%0:_(s64) = COPY $vgpr0_vgpr1
134131
%1:_(s32) = COPY $vgpr0
135132
%2:_(s32) = COPY $vgpr1
@@ -216,12 +213,11 @@ body: |
216213
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
217214
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
218215
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32)
219-
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
220-
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
216+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
221217
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
222218
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
223219
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
224-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
220+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
225221
%0:_(s64) = COPY $sgpr0_sgpr1
226222
%1:_(s32) = COPY $vgpr0
227223
%2:_(s32) = COPY $vgpr1
@@ -266,16 +262,19 @@ body: |
266262
; CHECK-LABEL: name: test_sbfx_s32_sss
267263
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr3
268264
; CHECK-NEXT: {{ $}}
269-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0
265+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
270266
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
271267
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
272268
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
273269
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]]
274270
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
275271
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32)
276-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
277-
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc
278-
; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32)
272+
; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]]
273+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32)
274+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32)
275+
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY3]](s32), [[COPY4]](s32), implicit-def $scc
276+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32)
277+
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]](s32)
279278
%0:_(s32) = COPY $sgpr0
280279
%1:_(s32) = COPY $sgpr1
281280
%2:_(s32) = COPY $sgpr2
@@ -294,16 +293,18 @@ body: |
294293
; CHECK-LABEL: name: test_sbfx_s32_sii
295294
; CHECK: liveins: $sgpr0
296295
; CHECK-NEXT: {{ $}}
297-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0
296+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
298297
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
299298
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
300299
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
301-
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]]
302300
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
303-
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32)
304-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
305-
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc
306-
; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32)
301+
; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360
302+
; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361
303+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32)
304+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32)
305+
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY1]](s32), [[COPY2]](s32), implicit-def $scc
306+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32)
307+
; CHECK-NEXT: $sgpr0 = COPY [[COPY3]](s32)
307308
%0:_(s32) = COPY $sgpr0
308309
%1:_(s32) = G_CONSTANT i32 1
309310
%2:_(s32) = G_CONSTANT i32 10
@@ -324,16 +325,19 @@ body: |
324325
; CHECK-LABEL: name: test_sbfx_s64_sss
325326
; CHECK: liveins: $sgpr0_sgpr1, $sgpr0, $sgpr1
326327
; CHECK-NEXT: {{ $}}
327-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1
328+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
328329
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
329330
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
330331
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
331332
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]]
332333
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
333334
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32)
334-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
335-
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc
336-
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](s64)
335+
; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]]
336+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64)
337+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32)
338+
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY3]](s64), [[COPY4]](s32), implicit-def $scc
339+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64)
340+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[COPY5]](s64)
337341
%0:_(s64) = COPY $sgpr0_sgpr1
338342
%1:_(s32) = COPY $sgpr0
339343
%2:_(s32) = COPY $sgpr1
@@ -352,15 +356,17 @@ body: |
352356
; CHECK-LABEL: name: test_sbfx_s64_sii
353357
; CHECK: liveins: $sgpr0_sgpr1
354358
; CHECK-NEXT: {{ $}}
355-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1
359+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
356360
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
357361
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
358362
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
359-
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]]
360363
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
361-
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32)
362-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
363-
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc
364+
; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360
365+
; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361
366+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64)
367+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32)
368+
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY1]](s64), [[COPY2]](s32), implicit-def $scc
369+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64)
364370
%0:_(s64) = COPY $sgpr0_sgpr1
365371
%1:_(s32) = G_CONSTANT i32 1
366372
%2:_(s32) = G_CONSTANT i32 10

0 commit comments

Comments
 (0)