Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and XOR #132382

Open
wants to merge 1 commit into
base: users/petar-avramovic/bfe
Choose a base branch
from

Conversation

petar-avramovic
Copy link
Collaborator

Uniform S1 is lowered to S32.
Divergent S1 is selected as VCC(S1) instruction select will select
SALU instruction based on wavesize (S32 or S64).
S16 are selected as is. There are register classes for vgpr S16.
Since some isel patterns check for sgpr S16 we don't lower to S32.
For 32 and 64 bit types we use B32/B64 rules that cover scalar vector
and pointers types.
SALU B32 and B64 and VALU B32 instructions are available.
Divergent B64 is lowered to B32.

Uniform S1 is lowered to S32.
Divergent S1 is selected as VCC(S1) instruction select will select
SALU instruction based on wavesize (S32 or S64).
S16 are selected as is. There are register classes for vgpr S16.
Since some isel patterns check for sgpr S16 we don't lower to S32.
For 32 and 64 bit types we use B32/B64 rules that cover scalar vector
and pointers types.
SALU B32 and B64 and VALU B32 instructions are available.
Divergent B64 is lowered to B32.
Copy link
Collaborator Author

petar-avramovic commented Mar 21, 2025

@llvmbot
Copy link
Member

llvmbot commented Mar 21, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Petar Avramovic (petar-avramovic)

Changes

Uniform S1 is lowered to S32.
Divergent S1 is selected as VCC(S1) instruction select will select
SALU instruction based on wavesize (S32 or S64).
S16 are selected as is. There are register classes for vgpr S16.
Since some isel patterns check for sgpr S16 we don't lower to S32.
For 32 and 64 bit types we use B32/B64 rules that cover scalar vector
and pointers types.
SALU B32 and B64 and VALU B32 instructions are available.
Divergent B64 is lowered to B32.


Patch is 41.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132382.diff

6 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+13-4)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+9-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (+2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir (+21-12)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir (+40-45)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir (+39-45)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index e4eaa01951a7f..5dbaa9488d668 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -219,11 +219,16 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
     return;
   }
   case SplitTo32: {
-    auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
-    auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg());
+    Register Dst = MI.getOperand(0).getReg();
+    LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32;
+    auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
+    auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
     unsigned Opc = MI.getOpcode();
-    auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)});
-    auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)});
+    auto Flags = MI.getFlags();
+    auto Lo = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)},
+                           Flags);
+    auto Hi = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)},
+                           Flags);
     B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
     MI.eraseFromParent();
     break;
@@ -384,6 +389,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case UniInVcc:
     return LLT::scalar(1);
   case Sgpr16:
+  case Vgpr16:
     return LLT::scalar(16);
   case Sgpr32:
   case Sgpr32Trunc:
@@ -503,6 +509,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32AExtBoolInReg:
   case Sgpr32SExt:
     return SgprRB;
+  case Vgpr16:
   case Vgpr32:
   case Vgpr64:
   case VgprP0:
@@ -546,6 +553,7 @@ void RegBankLegalizeHelper::applyMappingDst(
     case SgprP4:
     case SgprP5:
     case SgprV4S32:
+    case Vgpr16:
     case Vgpr32:
     case Vgpr64:
     case VgprP0:
@@ -677,6 +685,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
       break;
     }
     // vgpr scalars, pointers and vectors
+    case Vgpr16:
     case Vgpr32:
     case Vgpr64:
     case VgprP0:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 7959bf30ca27d..96bc969dd1f40 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -106,6 +106,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
   case DivS1:
     return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
+  case DivS16:
+    return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
   case DivS32:
     return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
   case DivS64:
@@ -441,6 +443,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
       .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
       .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
+      .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
+      .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
+      .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
       .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
       .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
       .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
@@ -483,11 +488,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
       .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}});
 
-  addRulesForGOpcs({G_ANYEXT}).Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}});
+  addRulesForGOpcs({G_ANYEXT})
+      .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
+      .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}});
 
   // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
   // It is up to user to deal with truncated bits.
   addRulesForGOpcs({G_TRUNC})
+      .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
       .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
       // This is non-trivial. VgprToVccCopy is done using compare instruction.
       .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 5edc88aaff73e..685cc0acc0cc4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -46,6 +46,7 @@ enum UniformityLLTOpPredicateID {
   UniS64,
 
   DivS1,
+  DivS16,
   DivS32,
   DivS64,
 
@@ -125,6 +126,7 @@ enum RegBankLLTMappingApplyID {
   SgprB512,
 
   // vgpr scalars, pointers, vectors and B-types
+  Vgpr16,
   Vgpr32,
   Vgpr64,
   VgprP0,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
index ae818e036747a..212de32c6840f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
 
 ---
 name: and_s32_ss
@@ -106,7 +105,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -128,8 +128,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
@@ -258,7 +259,8 @@ body: |
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32)
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -287,7 +289,8 @@ body: |
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -382,12 +385,14 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
-    ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
     ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
     ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[UV4]], [[UV6]]
     ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[UV5]], [[UV7]]
@@ -433,7 +438,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -458,8 +464,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
@@ -524,7 +531,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]]
@@ -546,8 +554,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
index da3b24c404efe..ae03b781db28d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
 
 ---
 name: or_s32_ss
@@ -90,21 +89,17 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
     ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32)
-    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ICMP]], [[ICMP1]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32)
       %0:_(s32) = COPY $sgpr0
       %1:_(s32) = COPY $sgpr1
       %2:_(s32) = G_CONSTANT i32 0
       %4:_(s1) = G_ICMP intpred(ne), %0, %2
       %5:_(s1) = G_ICMP intpred(ne), %1, %2
       %6:_(s1) = G_OR %4, %5
-      S_NOP 0, implicit %6
+      %7:_(s32) = G_ANYEXT %6
+      S_NOP 0, implicit %7
 ...
 
 ---
@@ -149,11 +144,10 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP1]]
+    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[AMDGPU_COPY_VCC_SCC]], [[ICMP1]]
     ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1)
       %0:_(s32) = COPY $sgpr0
       %1:_(s32) = COPY $vgpr0
@@ -175,19 +169,15 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32)
-    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_TRUNC %0
     %3:_(s1) = G_TRUNC %1
     %4:_(s1) = G_OR %2, %3
-    S_NOP 0, implicit %4
+    %5:_(s32) = G_ANYEXT %4
+    S_NOP 0, implicit %5
 
 ...
 
@@ -202,20 +192,16 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32)
-    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[ICMP]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_TRUNC %0
     %3:_(s1) = G_ICMP intpred(ne), %0, %1
     %4:_(s1) = G_OR %2, %3
-    S_NOP 0, implicit %4
+    %5:_(s32) = G_ANYEXT %4
+    S_NOP 0, implicit %5
 ...
 
 ---
@@ -229,11 +215,10 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP]]
+    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[AMDGPU_COPY_VCC_SCC]], [[ICMP]]
     ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $vgpr0
@@ -273,7 +258,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]]
     ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]]
@@ -295,8 +281,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
     ; CHEC...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Mar 21, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Petar Avramovic (petar-avramovic)

Changes

Uniform S1 is lowered to S32.
Divergent S1 is selected as VCC(S1) instruction select will select
SALU instruction based on wavesize (S32 or S64).
S16 are selected as is. There are register classes for vgpr S16.
Since some isel patterns check for sgpr S16 we don't lower to S32.
For 32 and 64 bit types we use B32/B64 rules that cover scalar vector
and pointers types.
SALU B32 and B64 and VALU B32 instructions are available.
Divergent B64 is lowered to B32.


Patch is 41.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132382.diff

6 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+13-4)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+9-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (+2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir (+21-12)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir (+40-45)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir (+39-45)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index e4eaa01951a7f..5dbaa9488d668 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -219,11 +219,16 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
     return;
   }
   case SplitTo32: {
-    auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
-    auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg());
+    Register Dst = MI.getOperand(0).getReg();
+    LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32;
+    auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
+    auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
     unsigned Opc = MI.getOpcode();
-    auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)});
-    auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)});
+    auto Flags = MI.getFlags();
+    auto Lo = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)},
+                           Flags);
+    auto Hi = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)},
+                           Flags);
     B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
     MI.eraseFromParent();
     break;
@@ -384,6 +389,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case UniInVcc:
     return LLT::scalar(1);
   case Sgpr16:
+  case Vgpr16:
     return LLT::scalar(16);
   case Sgpr32:
   case Sgpr32Trunc:
@@ -503,6 +509,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32AExtBoolInReg:
   case Sgpr32SExt:
     return SgprRB;
+  case Vgpr16:
   case Vgpr32:
   case Vgpr64:
   case VgprP0:
@@ -546,6 +553,7 @@ void RegBankLegalizeHelper::applyMappingDst(
     case SgprP4:
     case SgprP5:
     case SgprV4S32:
+    case Vgpr16:
     case Vgpr32:
     case Vgpr64:
     case VgprP0:
@@ -677,6 +685,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
       break;
     }
     // vgpr scalars, pointers and vectors
+    case Vgpr16:
     case Vgpr32:
     case Vgpr64:
     case VgprP0:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 7959bf30ca27d..96bc969dd1f40 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -106,6 +106,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
   case DivS1:
     return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
+  case DivS16:
+    return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
   case DivS32:
     return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
   case DivS64:
@@ -441,6 +443,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
       .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
       .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
+      .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
+      .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
+      .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
       .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
       .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
       .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
@@ -483,11 +488,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
       .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}});
 
-  addRulesForGOpcs({G_ANYEXT}).Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}});
+  addRulesForGOpcs({G_ANYEXT})
+      .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
+      .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}});
 
   // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
   // It is up to user to deal with truncated bits.
   addRulesForGOpcs({G_TRUNC})
+      .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
       .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
       // This is non-trivial. VgprToVccCopy is done using compare instruction.
       .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 5edc88aaff73e..685cc0acc0cc4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -46,6 +46,7 @@ enum UniformityLLTOpPredicateID {
   UniS64,
 
   DivS1,
+  DivS16,
   DivS32,
   DivS64,
 
@@ -125,6 +126,7 @@ enum RegBankLLTMappingApplyID {
   SgprB512,
 
   // vgpr scalars, pointers, vectors and B-types
+  Vgpr16,
   Vgpr32,
   Vgpr64,
   VgprP0,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
index ae818e036747a..212de32c6840f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
 
 ---
 name: and_s32_ss
@@ -106,7 +105,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -128,8 +128,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
@@ -258,7 +259,8 @@ body: |
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32)
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -287,7 +289,8 @@ body: |
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -382,12 +385,14 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
-    ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
     ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
     ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[UV4]], [[UV6]]
     ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[UV5]], [[UV7]]
@@ -433,7 +438,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
@@ -458,8 +464,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
@@ -524,7 +531,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]]
@@ -546,8 +554,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
     ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]]
     ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
index da3b24c404efe..ae03b781db28d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
 
 ---
 name: or_s32_ss
@@ -90,21 +89,17 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
     ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32)
-    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ICMP]], [[ICMP1]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32)
       %0:_(s32) = COPY $sgpr0
       %1:_(s32) = COPY $sgpr1
       %2:_(s32) = G_CONSTANT i32 0
       %4:_(s1) = G_ICMP intpred(ne), %0, %2
       %5:_(s1) = G_ICMP intpred(ne), %1, %2
       %6:_(s1) = G_OR %4, %5
-      S_NOP 0, implicit %6
+      %7:_(s32) = G_ANYEXT %6
+      S_NOP 0, implicit %7
 ...
 
 ---
@@ -149,11 +144,10 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP1]]
+    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[AMDGPU_COPY_VCC_SCC]], [[ICMP1]]
     ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1)
       %0:_(s32) = COPY $sgpr0
       %1:_(s32) = COPY $vgpr0
@@ -175,19 +169,15 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32)
-    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_TRUNC %0
     %3:_(s1) = G_TRUNC %1
     %4:_(s1) = G_OR %2, %3
-    S_NOP 0, implicit %4
+    %5:_(s32) = G_ANYEXT %4
+    S_NOP 0, implicit %5
 
 ...
 
@@ -202,20 +192,16 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[OR]](s32)
-    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[COPY]], [[ICMP]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_TRUNC %0
     %3:_(s1) = G_ICMP intpred(ne), %0, %1
     %4:_(s1) = G_OR %2, %3
-    S_NOP 0, implicit %4
+    %5:_(s32) = G_ANYEXT %4
+    S_NOP 0, implicit %5
 ...
 
 ---
@@ -229,11 +215,10 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP]]
+    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[AMDGPU_COPY_VCC_SCC]], [[ICMP]]
     ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $vgpr0
@@ -273,7 +258,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]]
     ; CHECK-NEXT: [[OR1:%[0-9]+]]:vgpr(s32) = G_OR [[UV1]], [[UV3]]
@@ -295,8 +281,9 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
     ; CHEC...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants