diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 3383175fc1bdb..e2f664396c16d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -15,9 +15,13 @@ #include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterBankInfo.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineUniformityAnalysis.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "amdgpu-regbanklegalize" @@ -127,6 +131,117 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, MI.eraseFromParent(); } +bool isSignedBFE(MachineInstr &MI) { + unsigned Opc = + isa(MI) ? MI.getOperand(1).getIntrinsicID() : MI.getOpcode(); + + switch (Opc) { + case AMDGPU::G_SBFX: + case Intrinsic::amdgcn_sbfe: + return true; + case AMDGPU::G_UBFX: + case Intrinsic::amdgcn_ubfe: + return false; + default: + llvm_unreachable("Opcode not supported"); + } +} + +void RegBankLegalizeHelper::lowerDiv_BFE(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + assert(MRI.getType(Dst) == LLT::scalar(64)); + bool Signed = isSignedBFE(MI); + unsigned FirstOpnd = isa(MI) ? 2 : 1; + // Extract bitfield from Src, LSBit is the least-significant bit for the + // extraction (field offset) and Width is size of bitfield. + Register Src = MI.getOperand(FirstOpnd).getReg(); + Register LSBit = MI.getOperand(FirstOpnd + 1).getReg(); + Register Width = MI.getOperand(FirstOpnd + 2).getReg(); + // Comments are for signed bitfield extract, similar for unsigned. x is sign + // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract. + + // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl + unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR; + auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit}); + + auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI); + + // Expand to Src >> LSBit << (64 - Width) >> (64 - Width) + // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000 + // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl + if (!ConstWidth) { + auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width); + auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt); + B.buildInstr(SHROpc, {Dst}, {SignBit, Amt}); + MI.eraseFromParent(); + return; + } + + auto WidthImm = ConstWidth->Value.getZExtValue(); + auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc); + Register SHRSrcLo = UnmergeSHRSrc.getReg(0); + Register SHRSrcHi = UnmergeSHRSrc.getReg(1); + auto Zero = B.buildConstant({VgprRB, S32}, 0); + unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX; + + if (WidthImm <= 32) { + // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl + auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width}); + MachineInstrBuilder Hi; + if (Signed) { + // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl + Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31)); + } else { + // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl + Hi = Zero; + } + B.buildMergeLikeInstr(Dst, {Lo, Hi}); + } else { + auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32); + // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl + auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt}); + B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi}); + } + + MI.eraseFromParent(); + return; +} + +void RegBankLegalizeHelper::lowerUni_BFE(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + bool Signed = isSignedBFE(MI); + unsigned FirstOpnd = isa(MI) ? 2 : 1; + Register Src = MI.getOperand(FirstOpnd).getReg(); + Register LSBit = MI.getOperand(FirstOpnd + 1).getReg(); + Register Width = MI.getOperand(FirstOpnd + 2).getReg(); + // For uniform bit field extract there are 4 available instructions, but + // LSBit(field offset) and Width(size of bitfield) need to be packed in S32, + // field offset in low and size in high 16 bits. + + // Src1 Hi16|Lo16 = Size|FieldOffset + auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes(6)); + auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask); + auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16)); + auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size); + unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; + unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; + unsigned Opc = Ty == S32 ? Opc32 : Opc64; + + // Select machine instruction, because of reg class constraining, insert + // copies from reg class to reg bank. + auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}}, + {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)}); + const GCNSubtarget &ST = B.getMF().getSubtarget(); + if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(), + *ST.getRegisterInfo(), RBI)) + llvm_unreachable("failed to constrain BFE"); + + B.buildCopy(DstReg, S_BFE->getOperand(0).getReg()); + MI.eraseFromParent(); + return; +} + void RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet &WaterfallSgprs) { @@ -225,6 +340,10 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, MI.eraseFromParent(); break; } + case Div_BFE: + return lowerDiv_BFE(MI); + case Uni_BFE: + return lowerUni_BFE(MI); case SplitLoad: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = DstTy.getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index ae3ab86449dd5..f91a1e2fa4771 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -108,6 +108,9 @@ class RegBankLegalizeHelper { void lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet &SgprWaterfallOperandRegs); + + void lowerDiv_BFE(MachineInstr &MI); + void lowerUni_BFE(MachineInstr &MI); }; } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 6ee15709d2fa6..7959bf30ca27d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -450,6 +450,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}}) .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}}); + addRulesForGOpcs({G_LSHR}, Standard).Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}); + + addRulesForGOpcs({G_UBFX, G_SBFX}, Standard) + .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, Uni_BFE}) + .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}}) + .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, Uni_BFE}) + .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, Div_BFE}); + // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT // and G_FREEZE here, rest is trivially regbankselected earlier addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}}); @@ -628,6 +636,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}}); addRulesForIOpcs({amdgcn_readfirstlane}) - .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}}); + .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}}) + // this should not exist in the first place, it is from call lowering + // readfirstlaning just in case register is not in sgpr. + .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}}); } // end initialize rules diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 6bde7f2cd676d..5edc88aaff73e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -168,6 +168,8 @@ enum LoweringMethodID { DoNotLower, VccExtToSel, UniExtToSel, + Uni_BFE, + Div_BFE, VgprToVccCopy, SplitTo32, Ext32To64, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir index 97c006a1a7216..572f1ea2516f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s ... @@ -96,12 +95,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr3 @@ -124,12 +122,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -216,12 +213,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -266,16 +262,19 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32) + ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY3]](s32), [[COPY4]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[COPY5]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -294,16 +293,18 @@ body: | ; CHECK-LABEL: name: test_sbfx_s32_sii ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360 + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY1]](s32), [[COPY2]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[COPY3]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 10 @@ -324,16 +325,19 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_sss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32) + ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY3]](s64), [[COPY4]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64) + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[COPY5]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -352,15 +356,17 @@ body: | ; CHECK-LABEL: name: test_sbfx_s64_sii ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360 + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY1]](s64), [[COPY2]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir index 20d280680a09d..267960ad74eff 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s ... @@ -96,12 +95,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr3 @@ -124,12 +122,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -214,12 +211,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY3]], [[COPY1]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR1]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -264,16 +260,19 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_sss ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_U32_]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32) + ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY3]](s32), [[COPY4]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_U32_]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[COPY5]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -292,16 +291,18 @@ body: | ; CHECK-LABEL: name: test_ubfx_s32_sii ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY]](s32), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_U32_]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360 + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY1]](s32), [[COPY2]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_U32_]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[COPY3]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 10 @@ -322,16 +323,19 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_sss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32) + ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY3]](s64), [[COPY4]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_U64_]](s64) + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[COPY5]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 @@ -350,16 +354,18 @@ body: | ; CHECK-LABEL: name: test_ubfx_s64_sii ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 - ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY]](s64), [[OR]](s32), implicit-def $scc - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360 + ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY1]](s64), [[COPY2]](s32), implicit-def $scc + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_U64_]](s64) + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[COPY3]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll index b1a36c7eaeedc..4f25f16069276 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s ; Test vector signed bitfield extract. define signext i8 @v_ashr_i8_i32(i32 %value) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll index 9fb359b768a5e..1fa58d41300ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s ; Test vector bitfield extract. define i32 @v_srl_mask_i32(i32 %value) {