@@ -196,7 +196,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
196
196
// Integer absolute.
197
197
if (Subtarget.hasCMov()) {
198
198
setOperationAction(ISD::ABS , MVT::i16 , Custom);
199
- setOperationAction(ISD::ABS , MVT::i32 , Custom);
199
+ setOperationAction(ISD::ABS , MVT::i32 , Custom);
200
200
}
201
201
setOperationAction(ISD::ABS , MVT::i64 , Custom);
202
202
@@ -34667,7 +34667,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
34667
34667
}
34668
34668
34669
34669
// TODO: This switch could include FNEG and the x86-specific FP logic ops
34670
- // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
34670
+ // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
34671
34671
// missed load folding and fma+fneg combining.
34672
34672
switch (Vec.getOpcode()) {
34673
34673
case ISD::FMA: // Begin 3 operands
@@ -35354,6 +35354,38 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
35354
35354
}
35355
35355
}
35356
35356
35357
+ // Detect pattern for AVX2+ variable shifts (shl, lshr) for inf precision.
35358
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
35359
+ SupportedVectorVarShift(VT.getSimpleVT(), Subtarget, ISD::SHL)) {
35360
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
35361
+
35362
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on the
35363
+ // left side invert the predicate to simplify logic below.
35364
+ SDValue Other;
35365
+ if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
35366
+ Other = RHS;
35367
+ CC = ISD::getSetCCInverse(CC, true);
35368
+ } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
35369
+ Other = LHS;
35370
+ }
35371
+
35372
+ // Look for the following patterns (>> becomes vsrlv):
35373
+ // y < 32 ? x << y : 0 --> vshlv(x, y)
35374
+ // y <= 31 ? x << y : 0 --> vshlv(x, y)
35375
+ APInt CondRHS;
35376
+ if (Other && Other.getNumOperands() == 2 &&
35377
+ DAG.isEqualTo(Other.getOperand(1), Cond.getOperand(0)) &&
35378
+ (Other.getOpcode() == ISD::SHL || Other.getOpcode() == ISD::SRL) &&
35379
+ ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), CondRHS)) {
35380
+
35381
+ // Replace ISD::SHL or ISD::SHR with appropriate AVX2 vector-vector shift.
35382
+ unsigned op = Other.getOpcode() == ISD::SHL ? X86ISD::VSHLV : X86ISD::VSRLV;
35383
+ if ((CC == ISD::SETULT && CondRHS == VT.getScalarSizeInBits()) ||
35384
+ (CC == ISD::SETULE && CondRHS == VT.getScalarSizeInBits() - 1))
35385
+ return DAG.getNode(op, DL, VT, Other.getOperand(0), Other.getOperand(1));
35386
+ }
35387
+ }
35388
+
35357
35389
// Match VSELECTs into subs with unsigned saturation.
35358
35390
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
35359
35391
// psubus is available in SSE2 for i8 and i16 vectors.
0 commit comments