Skip to content

Commit c7a043e

Browse files
AMDGPU/GlobalISel: add RegBankLegalize rules for bit shifts and sext-inreg
Uniform S16 shifts have to be extended to S32 using appropriate Extend before lowering to S32 instruction. Uniform packed V2S16 are lowered to SGPR S32 instructions, other option is to use VALU packed V2S16 and ReadAnyLane. For uniform S32 and S64 and divergent S16, S32, S64 and V2S16 there are instructions available.
1 parent 6d31707 commit c7a043e

13 files changed

+313
-151
lines changed

Diff for: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
312312
// Opcodes that support pretty much all combinations of reg banks and LLTs
313313
// (except S1). There is no point in writing rules for them.
314314
if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
315-
Opc == AMDGPU::G_MERGE_VALUES) {
315+
Opc == AMDGPU::G_MERGE_VALUES || Opc == G_BITCAST) {
316316
RBLHelper.applyMappingTrivial(*MI);
317317
continue;
318318
}

Diff for: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

+109
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
#include "AMDGPURegBankLegalizeHelper.h"
1515
#include "AMDGPUGlobalISelUtils.h"
1616
#include "AMDGPUInstrInfo.h"
17+
#include "AMDGPURegBankLegalizeRules.h"
1718
#include "AMDGPURegisterBankInfo.h"
1819
#include "GCNSubtarget.h"
1920
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2021
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2122
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23+
#include "llvm/CodeGen/MachineInstr.h"
2224
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
2325
#include "llvm/IR/IntrinsicsAMDGPU.h"
2426
#include "llvm/Support/ErrorHandling.h"
@@ -166,6 +168,60 @@ void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
166168
return;
167169
}
168170

171+
std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
172+
auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
173+
auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
174+
auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
175+
auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
176+
return {Lo.getReg(0), Hi.getReg(0)};
177+
}
178+
179+
std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
180+
auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
181+
auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
182+
auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
183+
return {Lo.getReg(0), Hi.getReg(0)};
184+
}
185+
186+
std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
187+
auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
188+
auto Lo = PackedS32;
189+
auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
190+
return {Lo.getReg(0), Hi.getReg(0)};
191+
}
192+
193+
void RegBankLegalizeHelper::lowerUnpack(MachineInstr &MI) {
194+
Register Lo, Hi;
195+
switch (MI.getOpcode()) {
196+
case AMDGPU::G_SHL: {
197+
auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
198+
auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
199+
Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
200+
Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
201+
break;
202+
}
203+
case AMDGPU::G_LSHR: {
204+
auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
205+
auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
206+
Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
207+
Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
208+
break;
209+
}
210+
case AMDGPU::G_ASHR: {
211+
auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
212+
auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
213+
Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
214+
Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
215+
break;
216+
}
217+
default:
218+
llvm_unreachable("Unpack lowering not implemented");
219+
}
220+
B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
221+
MI.eraseFromParent();
222+
return;
223+
}
224+
169225
bool isSignedBFE(MachineInstr &MI) {
170226
unsigned Opc =
171227
isa<GIntrinsic>(MI) ? MI.getOperand(1).getIntrinsicID() : MI.getOpcode();
@@ -310,6 +366,34 @@ void RegBankLegalizeHelper::lowerSplitTo32Sel(MachineInstr &MI) {
310366
return;
311367
}
312368

369+
void RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
370+
auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
371+
int Amt = MI.getOperand(2).getImm();
372+
Register Lo, Hi;
373+
// Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend
374+
if (Amt <= 32) {
375+
auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
376+
if (Amt == 32) {
377+
// Hi|Lo: ????????|sxxxxxxx -> ssssssss|sxxxxxxx
378+
Lo = Freeze.getReg(0);
379+
} else {
380+
// Hi|Lo: ????????|???sxxxx -> ssssssss|ssssxxxx
381+
Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
382+
}
383+
384+
auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
385+
Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
386+
} else {
387+
// Hi|Lo: ?????sxx|xxxxxxxx -> ssssssxx|xxxxxxxx
388+
Lo = Op1.getReg(0);
389+
Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
390+
}
391+
392+
B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
393+
MI.eraseFromParent();
394+
return;
395+
}
396+
313397
void RegBankLegalizeHelper::lower(MachineInstr &MI,
314398
const RegBankLLTMapping &Mapping,
315399
SmallSet<Register, 4> &WaterfallSgprs) {
@@ -332,6 +416,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
332416
MI.eraseFromParent();
333417
return;
334418
}
419+
case Unpack:
420+
return lowerUnpack(MI);
335421
case Ext32To64: {
336422
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
337423
MachineInstrBuilder Hi;
@@ -398,6 +484,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
398484
return lowerSplitTo32(MI);
399485
case SplitTo32Sel:
400486
return lowerSplitTo32Sel(MI);
487+
case SplitTo32SExtInReg:
488+
return lowerSplitTo32SExtInReg(MI);
401489
case SplitLoad: {
402490
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
403491
unsigned Size = DstTy.getSizeInBits();
@@ -487,6 +575,13 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
487575
case SgprP5:
488576
case VgprP5:
489577
return LLT::pointer(5, 32);
578+
case SgprV2S16:
579+
case VgprV2S16:
580+
case UniInVgprV2S16:
581+
return LLT::fixed_vector(2, 16);
582+
case SgprV2S32:
583+
case VgprV2S32:
584+
return LLT::fixed_vector(2, 32);
490585
case SgprV4S32:
491586
case VgprV4S32:
492587
case UniInVgprV4S32:
@@ -560,6 +655,8 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
560655
case SgprP3:
561656
case SgprP4:
562657
case SgprP5:
658+
case SgprV2S16:
659+
case SgprV2S32:
563660
case SgprV4S32:
564661
case SgprB32:
565662
case SgprB64:
@@ -569,6 +666,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
569666
case SgprB512:
570667
case UniInVcc:
571668
case UniInVgprS32:
669+
case UniInVgprV2S16:
572670
case UniInVgprV4S32:
573671
case UniInVgprB32:
574672
case UniInVgprB64:
@@ -590,6 +688,8 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
590688
case VgprP3:
591689
case VgprP4:
592690
case VgprP5:
691+
case VgprV2S16:
692+
case VgprV2S32:
593693
case VgprV4S32:
594694
case VgprB32:
595695
case VgprB64:
@@ -627,6 +727,8 @@ void RegBankLegalizeHelper::applyMappingDst(
627727
case SgprP3:
628728
case SgprP4:
629729
case SgprP5:
730+
case SgprV2S16:
731+
case SgprV2S32:
630732
case SgprV4S32:
631733
case Vgpr16:
632734
case Vgpr32:
@@ -636,6 +738,8 @@ void RegBankLegalizeHelper::applyMappingDst(
636738
case VgprP3:
637739
case VgprP4:
638740
case VgprP5:
741+
case VgprV2S16:
742+
case VgprV2S32:
639743
case VgprV4S32: {
640744
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
641745
assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
@@ -670,6 +774,7 @@ void RegBankLegalizeHelper::applyMappingDst(
670774
break;
671775
}
672776
case UniInVgprS32:
777+
case UniInVgprV2S16:
673778
case UniInVgprV4S32: {
674779
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
675780
assert(RB == SgprRB);
@@ -743,6 +848,8 @@ void RegBankLegalizeHelper::applyMappingSrc(
743848
case SgprP3:
744849
case SgprP4:
745850
case SgprP5:
851+
case SgprV2S16:
852+
case SgprV2S32:
746853
case SgprV4S32: {
747854
assert(Ty == getTyFromID(MethodIDs[i]));
748855
assert(RB == getRegBankFromID(MethodIDs[i]));
@@ -768,6 +875,8 @@ void RegBankLegalizeHelper::applyMappingSrc(
768875
case VgprP3:
769876
case VgprP4:
770877
case VgprP5:
878+
case VgprV2S16:
879+
case VgprV2S32:
771880
case VgprV4S32: {
772881
assert(Ty == getTyFromID(MethodIDs[i]));
773882
if (RB != VgprRB) {

Diff for: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

+5
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,15 @@ class RegBankLegalizeHelper {
110110
SmallSet<Register, 4> &SgprWaterfallOperandRegs);
111111

112112
void lowerVccExtToSel(MachineInstr &MI);
113+
std::pair<Register, Register> unpackZExt(Register Reg);
114+
std::pair<Register, Register> unpackSExt(Register Reg);
115+
std::pair<Register, Register> unpackAExt(Register Reg);
116+
void lowerUnpack(MachineInstr &MI);
113117
void lowerDiv_BFE(MachineInstr &MI);
114118
void lowerUni_BFE(MachineInstr &MI);
115119
void lowerSplitTo32(MachineInstr &MI);
116120
void lowerSplitTo32Sel(MachineInstr &MI);
121+
void lowerSplitTo32SExtInReg(MachineInstr &MI);
117122
};
118123

119124
} // end namespace AMDGPU

Diff for: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

+41-2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
6060
return MRI.getType(Reg) == LLT::pointer(4, 64);
6161
case P5:
6262
return MRI.getType(Reg) == LLT::pointer(5, 32);
63+
case V2S32:
64+
return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
6365
case V4S32:
6466
return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
6567
case B32:
@@ -92,6 +94,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
9294
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
9395
case UniP5:
9496
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
97+
case UniV2S16:
98+
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
9599
case UniB32:
96100
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
97101
case UniB64:
@@ -122,6 +126,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
122126
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
123127
case DivP5:
124128
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
129+
case DivV2S16:
130+
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
125131
case DivB32:
126132
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
127133
case DivB64:
@@ -434,7 +440,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
434440
MachineRegisterInfo &_MRI)
435441
: ST(&_ST), MRI(&_MRI) {
436442

437-
addRulesForGOpcs({G_ADD}, Standard)
443+
addRulesForGOpcs({G_ADD, G_SUB}, Standard)
438444
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
439445
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
440446

@@ -451,11 +457,36 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
451457
.Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
452458

453459
addRulesForGOpcs({G_SHL}, Standard)
460+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
461+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
462+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
463+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
464+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
465+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
454466
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
467+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
468+
469+
addRulesForGOpcs({G_LSHR}, Standard)
470+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
471+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
472+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
473+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
474+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
455475
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
476+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
456477
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
457478

458-
addRulesForGOpcs({G_LSHR}, Standard).Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}});
479+
addRulesForGOpcs({G_ASHR}, Standard)
480+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
481+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
482+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
483+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
484+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
485+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
486+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
487+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
488+
489+
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
459490

460491
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
461492
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, Uni_BFE})
@@ -514,6 +545,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
514545
.Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
515546
.Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
516547
.Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
548+
.Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
549+
.Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
517550
// This is non-trivial. VgprToVccCopy is done using compare instruction.
518551
.Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
519552
.Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
@@ -549,6 +582,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
549582
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
550583
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
551584

585+
addRulesForGOpcs({G_SEXT_INREG})
586+
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
587+
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
588+
.Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
589+
.Any({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
590+
552591
bool hasUnalignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12;
553592
bool hasSMRDSmall = ST->hasScalarSubwordLoads();
554593

Diff for: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

+11
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ enum UniformityLLTOpPredicateID {
7575
V3S32,
7676
V4S32,
7777

78+
UniV2S16,
79+
80+
DivV2S16,
81+
7882
// B types
7983
B32,
8084
B64,
@@ -117,7 +121,9 @@ enum RegBankLLTMappingApplyID {
117121
SgprP3,
118122
SgprP4,
119123
SgprP5,
124+
SgprV2S16,
120125
SgprV4S32,
126+
SgprV2S32,
121127
SgprB32,
122128
SgprB64,
123129
SgprB96,
@@ -134,6 +140,8 @@ enum RegBankLLTMappingApplyID {
134140
VgprP3,
135141
VgprP4,
136142
VgprP5,
143+
VgprV2S16,
144+
VgprV2S32,
137145
VgprB32,
138146
VgprB64,
139147
VgprB96,
@@ -145,6 +153,7 @@ enum RegBankLLTMappingApplyID {
145153
// Dst only modifiers: read-any-lane and truncs
146154
UniInVcc,
147155
UniInVgprS32,
156+
UniInVgprV2S16,
148157
UniInVgprV4S32,
149158
UniInVgprB32,
150159
UniInVgprB64,
@@ -173,11 +182,13 @@ enum LoweringMethodID {
173182
DoNotLower,
174183
VccExtToSel,
175184
UniExtToSel,
185+
Unpack,
176186
Uni_BFE,
177187
Div_BFE,
178188
VgprToVccCopy,
179189
SplitTo32,
180190
SplitTo32Sel,
191+
SplitTo32SExtInReg,
181192
Ext32To64,
182193
UniCstExt,
183194
SplitLoad,

Diff for: llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
77

88
define i8 @v_ashr_i8(i8 %value, i8 %amount) {
99
; GFX6-LABEL: v_ashr_i8:

0 commit comments

Comments
 (0)