Skip to content

Commit 6560c53

Browse files
AMDGPU/GlobalISel: add RegBankLegalize rules for select
Uniform condition S1 is AnyExtended to S32 and high bits are cleaned using AND with 1. Divergent S1 uses VCC. Using B32/B64 rules to cover scalars vector and pointer types. Divergent B64 is split to S32.
1 parent 2ac46e4 commit 6560c53

File tree

4 files changed

+649
-1280
lines changed

4 files changed

+649
-1280
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

+18-1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,22 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
243243
MI.eraseFromParent();
244244
break;
245245
}
246+
case SplitTo32Sel: {
247+
Register Dst = MI.getOperand(0).getReg();
248+
LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32;
249+
auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
250+
auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
251+
Register Cond = MI.getOperand(1).getReg();
252+
auto Flags = MI.getFlags();
253+
auto ResLo =
254+
B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
255+
auto ResHi =
256+
B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
257+
258+
B.buildMergeLikeInstr(Dst, {ResLo, ResHi});
259+
MI.eraseFromParent();
260+
break;
261+
}
246262
case Div_BFE: {
247263
Register Dst = MI.getOperand(0).getReg();
248264
assert(MRI.getType(Dst) == LLT::scalar(64));
@@ -453,7 +469,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
453469
case UniInVgprB64:
454470
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
455471
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) ||
456-
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64))
472+
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) ||
473+
Ty == LLT::pointer(999, 64))
457474
return Ty;
458475
return LLT();
459476
case SgprB96:

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
198198
return B32;
199199
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
200200
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) ||
201-
Ty == LLT::pointer(4, 64))
201+
Ty == LLT::pointer(4, 64) || Ty == LLT::pointer(999, 64))
202202
return B64;
203203
if (Ty == LLT::fixed_vector(3, 32))
204204
return B96;
@@ -485,8 +485,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
485485
addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
486486

487487
addRulesForGOpcs({G_SELECT}, StandardB)
488+
.Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
489+
.Any({{UniS16}, {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
488490
.Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
489-
.Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}});
491+
.Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
492+
.Div(B64, {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Sel})
493+
.Uni(B64, {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
490494

491495
addRulesForGOpcs({G_ANYEXT})
492496
.Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

+1
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ enum LoweringMethodID {
177177
Div_BFE,
178178
VgprToVccCopy,
179179
SplitTo32,
180+
SplitTo32Sel,
180181
Ext32To64,
181182
UniCstExt,
182183
SplitLoad,

0 commit comments

Comments
 (0)