Skip to content

Commit 50bfa85

Browse files
authored
[DAGCombiner] Fix scalarizeExtractedBinOp for some SETCC cases (#123071)
PR #118823 added a DAG combine for extracting elements of a vector returned from SETCC, however it doesn't correctly deal with the case where the vector element type is not i1. In this case we have to take account of the boolean contents, which are represented differently between vectors and scalars. The code now explicitly performs an inreg sign extend in order to get the same result. Fixes #121372
1 parent 84fa175 commit 50bfa85

File tree

2 files changed

+44
-10
lines changed

2 files changed

+44
-10
lines changed

Diff for: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+19-6
Original file line numberDiff line numberDiff line change
@@ -22807,15 +22807,15 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
2280722807
Vec->getNumValues() != 1)
2280822808
return SDValue();
2280922809

22810+
// Targets may want to avoid this to prevent an expensive register transfer.
22811+
if (!TLI.shouldScalarizeBinop(Vec))
22812+
return SDValue();
22813+
2281022814
EVT ResVT = ExtElt->getValueType(0);
2281122815
if (Opc == ISD::SETCC &&
2281222816
(ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
2281322817
return SDValue();
2281422818

22815-
// Targets may want to avoid this to prevent an expensive register transfer.
22816-
if (!TLI.shouldScalarizeBinop(Vec))
22817-
return SDValue();
22818-
2281922819
// Extracting an element of a vector constant is constant-folded, so this
2282022820
// transform is just replacing a vector op with a scalar op while moving the
2282122821
// extract.
@@ -22834,8 +22834,21 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
2283422834
EVT OpVT = Op0.getValueType().getVectorElementType();
2283522835
Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
2283622836
Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
22837-
return DAG.getSetCC(DL, ResVT, Op0, Op1,
22838-
cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22837+
SDValue NewVal = DAG.getSetCC(
22838+
DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22839+
// We may need to sign- or zero-extend the result to match the same
22840+
// behaviour as the vector version of SETCC.
22841+
unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
22842+
if (ResVT != MVT::i1 &&
22843+
VecBoolContents != TargetLowering::UndefinedBooleanContent &&
22844+
VecBoolContents != TLI.getBooleanContents(ResVT)) {
22845+
if (VecBoolContents == TargetLowering::ZeroOrNegativeOneBooleanContent)
22846+
NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
22847+
DAG.getValueType(MVT::i1));
22848+
else
22849+
NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
22850+
}
22851+
return NewVal;
2283922852
}
2284022853
Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
2284122854
Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);

Diff for: llvm/test/CodeGen/AArch64/extract-vector-cmp.ll

+25-4
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ define i128 @extract_icmp_v1i128(ptr %p) {
5858
; CHECK-LABEL: extract_icmp_v1i128:
5959
; CHECK: // %bb.0:
6060
; CHECK-NEXT: ldp x9, x8, [x0]
61-
; CHECK-NEXT: mov x1, xzr
6261
; CHECK-NEXT: orr x8, x9, x8
6362
; CHECK-NEXT: cmp x8, #0
64-
; CHECK-NEXT: cset w0, eq
63+
; CHECK-NEXT: cset w8, eq
64+
; CHECK-NEXT: sbfx x0, x8, #0, #1
65+
; CHECK-NEXT: mov x1, x0
6566
; CHECK-NEXT: ret
6667
%load = load <1 x i128>, ptr %p, align 16
6768
%cmp = icmp eq <1 x i128> %load, zeroinitializer
@@ -141,6 +142,26 @@ for.cond.cleanup:
141142
}
142143

143144

145+
; TODO: Combine the sbfx(cset) into a csetm
146+
define i32 @issue_121372(<4 x i32> %v) {
147+
; CHECK-LABEL: issue_121372:
148+
; CHECK: // %bb.0:
149+
; CHECK-NEXT: fmov w8, s0
150+
; CHECK-NEXT: cmp w8, #0
151+
; CHECK-NEXT: cset w8, eq
152+
; CHECK-NEXT: sbfx w8, w8, #0, #1
153+
; CHECK-NEXT: cmp w8, #1
154+
; CHECK-NEXT: csetm w0, lt
155+
; CHECK-NEXT: ret
156+
%cmp_ule = icmp ule <4 x i32> %v, zeroinitializer
157+
%sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32>
158+
%cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1
159+
%ext = extractelement <4 x i1> %cmp_sge, i32 0
160+
%res = sext i1 %ext to i32
161+
ret i32 %res
162+
}
163+
164+
144165
; Negative tests
145166

146167
define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
@@ -163,9 +184,9 @@ define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
163184
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
164185
; CHECK: // %bb.0:
165186
; CHECK-NEXT: movi v1.4s, #235
166-
; CHECK-NEXT: adrp x9, .LCPI7_0
187+
; CHECK-NEXT: adrp x9, .LCPI8_0
167188
; CHECK-NEXT: mov x8, x0
168-
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_0]
189+
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_0]
169190
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
170191
; CHECK-NEXT: xtn v1.4h, v0.4s
171192
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b

0 commit comments

Comments
 (0)