Skip to content

Commit

Permalink
AMDGPU: Correctly handle folding immediates into subregister use oper…
Browse files Browse the repository at this point in the history
…ands (llvm#129664)

This fixes a miscompile where a 64-bit materialize incorrectly folds
into
a sub1 use operand.

We currently do not see many subregister use operands. Incidentally,
there are also SIFoldOperands bugs that prevent this fold from
appearing here. Pre-fix folding of 32-bit subregister uses from 64-bit
materializes, in preparation for future patches.

The existing APIs are awkward since they expect to have a fully formed
instruction with operands to use, and not something new which needs
to be created.
  • Loading branch information
arsenm authored Mar 4, 2025
1 parent 0f869cc commit c8f4c35
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 16 deletions.
65 changes: 53 additions & 12 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ struct FoldCandidate {
}
}

FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm,
bool Commuted_ = false, int ShrinkOp = -1)
: UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {}

bool isFI() const {
return Kind == MachineOperand::MO_FrameIndex;
}
Expand Down Expand Up @@ -578,16 +583,29 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
}

static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *FoldOp, bool Commuted = false,
int ShrinkOp = -1) {
FoldCandidate &&Entry) {
// Skip additional folding on the same operand.
for (FoldCandidate &Fold : FoldList)
if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
return;
LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
<< " operand " << OpNo << "\n " << *MI);
FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
LLVM_DEBUG(dbgs() << "Append " << (Entry.Commuted ? "commuted" : "normal")
<< " operand " << Entry.UseOpNo << "\n " << *Entry.UseMI);
FoldList.push_back(Entry);
}

static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *FoldOp, bool Commuted = false,
int ShrinkOp = -1) {
appendFoldCandidate(FoldList,
FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
}

static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo, int64_t ImmVal,
bool Commuted = false, int ShrinkOp = -1) {
appendFoldCandidate(FoldList,
FoldCandidate(MI, OpNo, ImmVal, Commuted, ShrinkOp));
}

bool SIFoldOperandsImpl::tryAddToFoldList(
Expand Down Expand Up @@ -847,11 +865,35 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;

uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
if (OpToFold.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
return true;
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (OpToFold.isImm()) {
if (unsigned UseSubReg = UseOp.getSubReg()) {
std::optional<int64_t> SubImm =
SIInstrInfo::extractSubregFromImm(OpToFold.getImm(), UseSubReg);
if (!SubImm)
return false;

// TODO: Avoid the temporary MachineOperand
MachineOperand TmpOp = MachineOperand::CreateImm(*SubImm);
if (TII->isOperandLegal(*UseMI, UseOpIdx, &TmpOp)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, *SubImm);
return true;
}

return false;
}

if (TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
return true;
}
}

// TODO: Verify the following code handles subregisters correctly.
// TODO: Handle extract of global reference
if (UseOp.getSubReg())
return false;

if (!OpToFold.isReg())
return false;

Expand All @@ -861,8 +903,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(

// Maybe it is just a COPY of an immediate itself.
MachineInstr *Def = MRI->getVRegDef(UseReg);
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
if (Def && TII->isFoldableCopy(*Def)) {
MachineOperand &DefOp = Def->getOperand(1);
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[REG_SEQUENCE]].sub0, 8, implicit-def $scc
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 8, implicit-def $scc, implicit $scc
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 0, implicit-def $scc, implicit $scc
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
%0:sgpr_64 = COPY $sgpr8_sgpr9
Expand All @@ -42,8 +42,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 30064771075, 0, implicit $exec
; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 30064771075, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 3, 0, implicit $exec
; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 7, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
%0:vreg_64 = COPY $vgpr8_vgpr9
Expand Down Expand Up @@ -116,7 +116,7 @@ body: |
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 8, implicit-def $scc
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 8, implicit-def $scc, implicit $scc
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 0, implicit-def $scc, implicit $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADDC_U32_]]
%0:sgpr_64 = COPY $sgpr8_sgpr9
%1:sreg_64 = S_MOV_B64 8
Expand Down

0 comments on commit c8f4c35

Please sign in to comment.