Skip to content

Commit 40a92ac

Browse files
committed
X86: add patterns for X86ISD::VSHLV and X86ISD::VSRLV
Replace VSELECT instruction which zeroes their result on exceeding legal SHL/SRL shift amount.
1 parent 1efaf6b commit 40a92ac

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

lib/Target/X86/X86ISelLowering.cpp

+34-2
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
196196
// Integer absolute.
197197
if (Subtarget.hasCMov()) {
198198
setOperationAction(ISD::ABS , MVT::i16 , Custom);
199-
setOperationAction(ISD::ABS , MVT::i32 , Custom);
199+
setOperationAction(ISD::ABS , MVT::i32 , Custom);
200200
}
201201
setOperationAction(ISD::ABS , MVT::i64 , Custom);
202202

@@ -34667,7 +34667,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
3466734667
}
3466834668

3466934669
// TODO: This switch could include FNEG and the x86-specific FP logic ops
34670-
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
34670+
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
3467134671
// missed load folding and fma+fneg combining.
3467234672
switch (Vec.getOpcode()) {
3467334673
case ISD::FMA: // Begin 3 operands
@@ -35354,6 +35354,38 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3535435354
}
3535535355
}
3535635356

35357+
// Detect pattern for AVX2+ variable shifts (shl, lshr) for inf precision.
35358+
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
35359+
SupportedVectorVarShift(VT.getSimpleVT(), Subtarget, ISD::SHL)) {
35360+
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
35361+
35362+
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
35363+
// left side invert the predicate to simplify logic below.
35364+
SDValue Other;
35365+
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
35366+
Other = RHS;
35367+
CC = ISD::getSetCCInverse(CC, true);
35368+
} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
35369+
Other = LHS;
35370+
}
35371+
35372+
// Look for the following patterns (>> becomes vsrlv):
35373+
// y < 32 ? x << y : 0 --> vshlv(x, y)
35374+
// y <= 31 ? x << y : 0 --> vshlv(x, y)
35375+
APInt CondRHS;
35376+
if (Other && Other.getNumOperands() == 2 &&
35377+
DAG.isEqualTo(Other.getOperand(1), Cond.getOperand(0)) &&
35378+
(Other.getOpcode() == ISD::SHL || Other.getOpcode() == ISD::SRL) &&
35379+
ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), CondRHS)) {
35380+
35381+
// Replace ISD::SHL or ISD::SHR with appropriate AVX2 vector-vector shift.
35382+
unsigned op = Other.getOpcode() == ISD::SHL ? X86ISD::VSHLV : X86ISD::VSRLV;
35383+
if ((CC == ISD::SETULT && CondRHS == VT.getScalarSizeInBits()) ||
35384+
(CC == ISD::SETULE && CondRHS == VT.getScalarSizeInBits() - 1))
35385+
return DAG.getNode(op, DL, VT, Other.getOperand(0), Other.getOperand(1));
35386+
}
35387+
}
35388+
3535735389
// Match VSELECTs into subs with unsigned saturation.
3535835390
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
3535935391
// psubus is available in SSE2 for i8 and i16 vectors.

0 commit comments

Comments
 (0)