Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU/GlobalISel: add RegBankLegalize rules for AND OR and XOR #132382

Open
wants to merge 1 commit into
base: users/petar-avramovic/bfe
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,11 +219,16 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
return;
}
case SplitTo32: {
auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
auto Op2 = B.buildUnmerge(VgprRB_S32, MI.getOperand(2).getReg());
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32;
auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
unsigned Opc = MI.getOpcode();
auto Lo = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)});
auto Hi = B.buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)});
auto Flags = MI.getFlags();
auto Lo = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)},
Flags);
auto Hi = B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)},
Flags);
B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
MI.eraseFromParent();
break;
Expand Down Expand Up @@ -384,6 +389,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case UniInVcc:
return LLT::scalar(1);
case Sgpr16:
case Vgpr16:
return LLT::scalar(16);
case Sgpr32:
case Sgpr32Trunc:
Expand Down Expand Up @@ -503,6 +509,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case Sgpr32AExtBoolInReg:
case Sgpr32SExt:
return SgprRB;
case Vgpr16:
case Vgpr32:
case Vgpr64:
case VgprP0:
Expand Down Expand Up @@ -546,6 +553,7 @@ void RegBankLegalizeHelper::applyMappingDst(
case SgprP4:
case SgprP5:
case SgprV4S32:
case Vgpr16:
case Vgpr32:
case Vgpr64:
case VgprP0:
Expand Down Expand Up @@ -677,6 +685,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
break;
}
// vgpr scalars, pointers and vectors
case Vgpr16:
case Vgpr32:
case Vgpr64:
case VgprP0:
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
case DivS1:
return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
case DivS16:
return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
case DivS32:
return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
case DivS64:
Expand Down Expand Up @@ -441,6 +443,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
.Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
.Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
.Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
.Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
.Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
.Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
.Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
.Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
Expand Down Expand Up @@ -483,11 +488,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
.Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}});

addRulesForGOpcs({G_ANYEXT}).Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}});
addRulesForGOpcs({G_ANYEXT})
.Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}});

// In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
// It is up to user to deal with truncated bits.
addRulesForGOpcs({G_TRUNC})
.Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
.Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
// This is non-trivial. VgprToVccCopy is done using compare instruction.
.Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}});
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ enum UniformityLLTOpPredicateID {
UniS64,

DivS1,
DivS16,
DivS32,
DivS64,

Expand Down Expand Up @@ -125,6 +126,7 @@ enum RegBankLLTMappingApplyID {
SgprB512,

// vgpr scalars, pointers, vectors and B-types
Vgpr16,
Vgpr32,
Vgpr64,
VgprP0,
Expand Down
33 changes: 21 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s

---
name: and_s32_ss
Expand Down Expand Up @@ -106,7 +105,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
Expand All @@ -128,8 +128,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
Expand Down Expand Up @@ -258,7 +259,8 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
Expand Down Expand Up @@ -287,7 +289,8 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
Expand Down Expand Up @@ -382,12 +385,14 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](s64)
; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[UV4]], [[UV6]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[UV5]], [[UV7]]
Expand Down Expand Up @@ -433,7 +438,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY]](<2 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
Expand All @@ -458,8 +464,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
Expand Down Expand Up @@ -524,7 +531,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]]
Expand All @@ -546,8 +554,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV]], [[UV2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[UV1]], [[UV3]]
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[AND]](<2 x s16>), [[AND1]](<2 x s16>)
Expand Down
Loading
Loading