diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index e2f664396c16d..57ad43cfb8fa9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -242,6 +242,22 @@ void RegBankLegalizeHelper::lowerUni_BFE(MachineInstr &MI) { return; } +void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32; + auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg()); + auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg()); + unsigned Opc = MI.getOpcode(); + auto Flags = MI.getFlags(); + auto Lo = + B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags); + auto Hi = + B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags); + B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi}); + MI.eraseFromParent(); + return; +} + void RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet &WaterfallSgprs) { @@ -330,20 +346,12 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, MI.eraseFromParent(); return; } - case SplitTo32: { - auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg()); - auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg()); - unsigned Opc = MI.getOpcode(); - auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)}); - auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)}); - B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi}); - MI.eraseFromParent(); - break; - } case Div_BFE: return lowerDiv_BFE(MI); case Uni_BFE: return lowerUni_BFE(MI); + case SplitTo32: + return lowerSplitTo32(MI); case SplitLoad: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = DstTy.getSizeInBits(); @@ -403,6 +411,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case UniInVcc: return LLT::scalar(1); case Sgpr16: + case Vgpr16: return LLT::scalar(16); case Sgpr32: case Sgpr32Trunc: @@ -522,6 +531,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case Sgpr32AExtBoolInReg: case Sgpr32SExt: return SgprRB; + case Vgpr16: case Vgpr32: case Vgpr64: case VgprP0: @@ -565,6 +575,7 @@ void RegBankLegalizeHelper::applyMappingDst( case SgprP4: case SgprP5: case SgprV4S32: + case Vgpr16: case Vgpr32: case Vgpr64: case VgprP0: @@ -696,6 +707,7 @@ void RegBankLegalizeHelper::applyMappingSrc( break; } // vgpr scalars, pointers and vectors + case Vgpr16: case Vgpr32: case Vgpr64: case VgprP0: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index f91a1e2fa4771..a518c708a07b3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -111,6 +111,7 @@ class RegBankLegalizeHelper { void lowerDiv_BFE(MachineInstr &MI); void lowerUni_BFE(MachineInstr &MI); + void lowerSplitTo32(MachineInstr &MI); }; } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 7959bf30ca27d..96bc969dd1f40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -106,6 +106,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg); case DivS1: return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg); + case DivS16: + return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg); case DivS32: return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg); case DivS64: @@ -441,6 +443,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) + .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}}) + .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}}) + .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}}) .Div(B32, {{VgprB32}, {VgprB32, VgprB32}}) .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}}) .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32}); @@ -483,11 +488,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}}) .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}}); - addRulesForGOpcs({G_ANYEXT}).Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}); + addRulesForGOpcs({G_ANYEXT}) + .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away + .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}); // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY. // It is up to user to deal with truncated bits. addRulesForGOpcs({G_TRUNC}) + .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}) // This is non-trivial. VgprToVccCopy is done using compare instruction. .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 5edc88aaff73e..685cc0acc0cc4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -46,6 +46,7 @@ enum UniformityLLTOpPredicateID { UniS64, DivS1, + DivS16, DivS32, DivS64, @@ -125,6 +126,7 @@ enum RegBankLLTMappingApplyID { SgprB512, // vgpr scalars, pointers, vectors and B-types + Vgpr16, Vgpr32, Vgpr64, VgprP0, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir index ae818e036747a..212de32c6840f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: and_s32_ss @@ -106,7 +105,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] @@ -128,8 +128,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) @@ -258,7 +259,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] @@ -287,7 +289,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] @@ -382,12 +385,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[UV4]], [[UV6]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[UV5]], [[UV7]] @@ -433,7 +438,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] @@ -458,8 +464,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) @@ -524,7 +531,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]] @@ -546,8 +554,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir index da3b24c404efe..ae03b781db28d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: or_s32_ss @@ -90,21 +89,17 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(ne), %0, %2 %5:_(s1) = G_ICMP intpred(ne), %1, %2 %6:_(s1) = G_OR %4, %5 - S_NOP 0, implicit %6 + %7:_(s32) = G_ANYEXT %6 + S_NOP 0, implicit %7 ... --- @@ -149,11 +144,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[AMDGPU_COPY_VCC_SCC]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -175,19 +169,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_TRUNC %1 %4:_(s1) = G_OR %2, %3 - S_NOP 0, implicit %4 + %5:_(s32) = G_ANYEXT %4 + S_NOP 0, implicit %5 ... @@ -202,20 +192,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[ICMP]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_ICMP intpred(ne), %0, %1 %4:_(s1) = G_OR %2, %3 - S_NOP 0, implicit %4 + %5:_(s32) = G_ANYEXT %4 + S_NOP 0, implicit %5 ... --- @@ -229,11 +215,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[AMDGPU_COPY_VCC_SCC]], [[ICMP]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -273,7 +258,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] @@ -295,8 +281,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) @@ -341,9 +328,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: %3:vgpr(s32) = disjoint G_OR [[UV]], [[UV2]] - ; CHECK-NEXT: %4:vgpr(s32) = disjoint G_OR [[UV1]], [[UV3]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES %3(s32), %4(s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = disjoint G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = disjoint G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = disjoint G_OR %0, %1 @@ -447,7 +434,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] @@ -476,7 +464,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] @@ -571,12 +560,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:vgpr(s32) = G_OR [[UV4]], [[UV6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:vgpr(s32) = G_OR [[UV5]], [[UV7]] @@ -622,7 +613,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] @@ -647,8 +639,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) @@ -713,7 +706,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV1]], [[UV3]] @@ -735,8 +729,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV]], [[UV2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir index 874a8a064b481..0ebb1344d407d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: xor_s32_ss @@ -90,21 +89,17 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(ne), %0, %2 %5:_(s1) = G_ICMP intpred(ne), %1, %2 %6:_(s1) = G_XOR %4, %5 - S_NOP 0, implicit %6 + %7:_(s32) = G_ANYEXT %6 + S_NOP 0, implicit %7 ... --- @@ -149,11 +144,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[AMDGPU_COPY_VCC_SCC]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -175,19 +169,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_TRUNC %1 %4:_(s1) = G_XOR %2, %3 - S_NOP 0, implicit %4 + %5:_(s32) = G_ANYEXT %4 + S_NOP 0, implicit %5 ... @@ -202,20 +192,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[COPY]], [[ICMP]] + ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_ICMP intpred(ne), %0, %1 %4:_(s1) = G_XOR %2, %3 - S_NOP 0, implicit %4 + %5:_(s32) = G_ANYEXT %4 + S_NOP 0, implicit %5 ... --- @@ -229,11 +215,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[AMDGPU_COPY_VCC_SCC]], [[ICMP]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -273,7 +258,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] @@ -295,8 +281,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) @@ -425,7 +412,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] @@ -454,7 +442,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] @@ -549,12 +538,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64) ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR2:%[0-9]+]]:vgpr(s32) = G_XOR [[UV4]], [[UV6]] ; CHECK-NEXT: [[XOR3:%[0-9]+]]:vgpr(s32) = G_XOR [[UV5]], [[UV7]] @@ -600,7 +591,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] @@ -625,8 +617,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) @@ -691,7 +684,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] @@ -713,8 +707,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) @@ -834,9 +829,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[COPY2]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[C1]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[AMDGPU_COPY_VCC_SCC]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0