Skip to content

Commit

Permalink
AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (llvm#128123)
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm authored Feb 21, 2025
1 parent e729dc7 commit cc46d00
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 245 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13515,7 +13515,8 @@ static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc,
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
(VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
(VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16()) ||
(VT == MVT::v2f16 && Subtarget.hasMinimum3Maximum3PKF16());
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
} // End SubtargetPredicate = HasVOP3PInsts

let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, AMDGPUfminimum3>;
defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, AMDGPUfmaximum3>;
}

// TODO: Make sure we're doing the right thing with denormals. Note
Expand Down
Loading

0 comments on commit cc46d00

Please sign in to comment.