16
16
#include " AMDGPURegBankLegalizeRules.h"
17
17
#include " AMDGPUInstrInfo.h"
18
18
#include " GCNSubtarget.h"
19
+ #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
19
20
#include " llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
21
+ #include " llvm/CodeGen/MachineInstr.h"
20
22
#include " llvm/CodeGen/MachineUniformityAnalysis.h"
21
23
#include " llvm/IR/IntrinsicsAMDGPU.h"
22
24
#include " llvm/Support/AMDGPUAddrSpace.h"
@@ -60,6 +62,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
60
62
return MRI.getType (Reg) == LLT::pointer (4 , 64 );
61
63
case P5:
62
64
return MRI.getType (Reg) == LLT::pointer (5 , 32 );
65
+ case V2S32:
66
+ return MRI.getType (Reg) == LLT::fixed_vector (2 , 32 );
63
67
case V4S32:
64
68
return MRI.getType (Reg) == LLT::fixed_vector (4 , 32 );
65
69
case B32:
@@ -92,6 +96,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
92
96
return MRI.getType (Reg) == LLT::pointer (4 , 64 ) && MUI.isUniform (Reg);
93
97
case UniP5:
94
98
return MRI.getType (Reg) == LLT::pointer (5 , 32 ) && MUI.isUniform (Reg);
99
+ case UniV2S16:
100
+ return MRI.getType (Reg) == LLT::fixed_vector (2 , 16 ) && MUI.isUniform (Reg);
95
101
case UniB32:
96
102
return MRI.getType (Reg).getSizeInBits () == 32 && MUI.isUniform (Reg);
97
103
case UniB64:
@@ -122,6 +128,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
122
128
return MRI.getType (Reg) == LLT::pointer (4 , 64 ) && MUI.isDivergent (Reg);
123
129
case DivP5:
124
130
return MRI.getType (Reg) == LLT::pointer (5 , 32 ) && MUI.isDivergent (Reg);
131
+ case DivV2S16:
132
+ return MRI.getType (Reg) == LLT::fixed_vector (2 , 16 ) && MUI.isDivergent (Reg);
125
133
case DivB32:
126
134
return MRI.getType (Reg).getSizeInBits () == 32 && MUI.isDivergent (Reg);
127
135
case DivB64:
@@ -434,7 +442,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
434
442
MachineRegisterInfo &_MRI)
435
443
: ST(&_ST), MRI(&_MRI) {
436
444
437
- addRulesForGOpcs ({G_ADD}, Standard)
445
+ addRulesForGOpcs ({G_ADD, G_SUB }, Standard)
438
446
.Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
439
447
.Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
440
448
@@ -451,11 +459,36 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
451
459
.Div (B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
452
460
453
461
addRulesForGOpcs ({G_SHL}, Standard)
462
+ .Uni (S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
463
+ .Div (S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
464
+ .Uni (V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
465
+ .Div (V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
466
+ .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
467
+ .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
454
468
.Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
469
+ .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
470
+
471
+ addRulesForGOpcs ({G_LSHR}, Standard)
472
+ .Uni (S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
473
+ .Div (S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
474
+ .Uni (V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
475
+ .Div (V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
476
+ .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
455
477
.Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
478
+ .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
456
479
.Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
457
480
458
- addRulesForGOpcs ({G_LSHR}, Standard).Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}});
481
+ addRulesForGOpcs ({G_ASHR}, Standard)
482
+ .Uni (S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
483
+ .Div (S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
484
+ .Uni (V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, Unpack})
485
+ .Div (V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
486
+ .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
487
+ .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
488
+ .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
489
+ .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
490
+
491
+ addRulesForGOpcs ({G_FRAME_INDEX}).Any ({{UniP5, _}, {{SgprP5}, {None}}});
459
492
460
493
addRulesForGOpcs ({G_UBFX, G_SBFX}, Standard)
461
494
.Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, Uni_BFE})
@@ -514,6 +547,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
514
547
.Any ({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
515
548
.Any ({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
516
549
.Any ({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
550
+ .Any ({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
551
+ .Any ({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
517
552
// This is non-trivial. VgprToVccCopy is done using compare instruction.
518
553
.Any ({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
519
554
.Any ({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
@@ -549,6 +584,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
549
584
.Any ({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
550
585
.Any ({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
551
586
587
+ addRulesForGOpcs ({G_SEXT_INREG})
588
+ .Any ({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
589
+ .Any ({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
590
+ .Any ({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
591
+ .Any ({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SExtInRegSplitTo32}});
592
+
552
593
bool hasUnalignedLoads = ST->getGeneration () >= AMDGPUSubtarget::GFX12;
553
594
bool hasSMRDSmall = ST->hasScalarSubwordLoads ();
554
595
0 commit comments