Skip to content

Commit 0751418

Browse files
authored
[X86] Extend combinei64TruncSrlAdd to handle patterns with or and xor (#128435)
As discussed in #126448, the fold implemented by #126448 / #128353 can be extended to operations other than `add`. This patch extends the fold performed by `combinei64TruncSrlAdd` to include `or` and `xor` (proof: https://alive2.llvm.org/ce/z/AXuaQu). There's no need to extend it to `sub` and `and`, as similar folds are already being performed for those operations. CC: @phoebewang @RKSimon
1 parent 8f4d2e0 commit 0751418

File tree

2 files changed

+136
-30
lines changed

2 files changed

+136
-30
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+37-28
Original file line numberDiff line numberDiff line change
@@ -53788,36 +53788,35 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
5378853788
return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
5378953789
}
5379053790

53791-
// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
53792-
// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
53793-
// to avoid generating code with MOVABS and large constants in certain cases.
53794-
static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
53795-
const SDLoc &DL) {
53796-
using namespace llvm::SDPatternMatch;
53791+
// Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to
53792+
// (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we
53793+
// are able to avoid generating code with MOVABS and large constants in certain
53794+
// cases.
53795+
static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
53796+
const SDLoc &DL) {
5379753797

53798-
SDValue AddLhs;
53799-
APInt AddConst, SrlConst;
53800-
if (VT != MVT::i32 ||
53801-
!sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
53802-
m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
53803-
m_ConstInt(AddConst))),
53804-
m_ConstInt(SrlConst)))))
53805-
return SDValue();
53798+
SDValue Op = N.getOperand(0);
53799+
APInt OpConst = Op.getConstantOperandAPInt(1);
53800+
APInt SrlConst = N.getConstantOperandAPInt(1);
53801+
uint64_t SrlConstVal = SrlConst.getZExtValue();
53802+
unsigned Opcode = Op.getOpcode();
5380653803

53807-
if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue())
53804+
if (SrlConst.ule(32) ||
53805+
(Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal))
5380853806
return SDValue();
5380953807

53810-
SDValue AddLHSSrl =
53811-
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
53812-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
53808+
SDValue OpLhsSrl =
53809+
DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1));
53810+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl);
5381353811

53814-
APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits());
53815-
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
53816-
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
53812+
APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits());
53813+
SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT);
53814+
SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst);
53815+
EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal);
5381753816

53818-
EVT CleanUpVT =
53819-
EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
53820-
return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT);
53817+
if (Opcode == ISD::ADD)
53818+
return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT);
53819+
return NewOpNode;
5382153820
}
5382253821

5382353822
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53865,11 +53864,21 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
5386553864
if (!Src.hasOneUse())
5386653865
return SDValue();
5386753866

53868-
if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
53869-
return R;
53867+
if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL &&
53868+
isa<ConstantSDNode>(Src.getOperand(1))) {
53869+
53870+
unsigned SrcOpOpcode = Src.getOperand(0).getOpcode();
53871+
if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR &&
53872+
SrcOpOpcode != ISD::XOR) ||
53873+
!isa<ConstantSDNode>(Src.getOperand(0).getOperand(1)))
53874+
return SDValue();
53875+
53876+
if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL))
53877+
return R;
53878+
53879+
return SDValue();
53880+
}
5387053881

53871-
// Only support vector truncation for now.
53872-
// TODO: i64 scalar math would benefit as well.
5387353882
if (!VT.isVector())
5387453883
return SDValue();
5387553884

llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll

+99-2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,103 @@ define i32 @test_trunc_add(i64 %x) {
128128
ret i32 %conv
129129
}
130130

131+
define i32 @test_trunc_sub(i64 %x) {
132+
; X64-LABEL: test_trunc_sub:
133+
; X64: # %bb.0:
134+
; X64-NEXT: shrq $49, %rdi
135+
; X64-NEXT: leal 32762(%rdi), %eax
136+
; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
137+
; X64-NEXT: retq
138+
%sub = sub i64 %x, 3377699720527872
139+
%shr = lshr i64 %sub, 49
140+
%conv = trunc i64 %shr to i32
141+
ret i32 %conv
142+
}
143+
144+
define i32 @test_trunc_and_1(i64 %x) {
145+
; X64-LABEL: test_trunc_and_1:
146+
; X64: # %bb.0:
147+
; X64-NEXT: movq %rdi, %rax
148+
; X64-NEXT: shrq $50, %rax
149+
; X64-NEXT: andl $3, %eax
150+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
151+
; X64-NEXT: retq
152+
%and = and i64 %x, 3940649673949184
153+
%shr = lshr i64 %and, 50
154+
%conv = trunc i64 %shr to i32
155+
ret i32 %conv
156+
}
157+
158+
define i32 @test_trunc_or_1(i64 %x) {
159+
; X64-LABEL: test_trunc_or_1:
160+
; X64: # %bb.0:
161+
; X64-NEXT: movq %rdi, %rax
162+
; X64-NEXT: shrq $50, %rax
163+
; X64-NEXT: orl $3, %eax
164+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
165+
; X64-NEXT: retq
166+
%or = or i64 %x, 3940649673949184
167+
%shr = lshr i64 %or, 50
168+
%conv = trunc i64 %shr to i32
169+
ret i32 %conv
170+
}
171+
172+
define i32 @test_trunc_xor_1(i64 %x) {
173+
; X64-LABEL: test_trunc_xor_1:
174+
; X64: # %bb.0:
175+
; X64-NEXT: movq %rdi, %rax
176+
; X64-NEXT: shrq $50, %rax
177+
; X64-NEXT: xorl $3, %eax
178+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
179+
; X64-NEXT: retq
180+
%xor = xor i64 %x, 3940649673949184
181+
%shr = lshr i64 %xor, 50
182+
%conv = trunc i64 %shr to i32
183+
ret i32 %conv
184+
}
185+
186+
define i32 @test_trunc_and_2(i64 %x) {
187+
; X64-LABEL: test_trunc_and_2:
188+
; X64: # %bb.0:
189+
; X64-NEXT: movq %rdi, %rax
190+
; X64-NEXT: shrq $45, %rax
191+
; X64-NEXT: andl $111, %eax
192+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
193+
; X64-NEXT: retq
194+
%and = and i64 %x, 3940649673949183
195+
%shr = lshr i64 %and, 45
196+
%conv = trunc i64 %shr to i32
197+
ret i32 %conv
198+
}
199+
200+
define i32 @test_trunc_or_2(i64 %x) {
201+
; X64-LABEL: test_trunc_or_2:
202+
; X64: # %bb.0:
203+
; X64-NEXT: movq %rdi, %rax
204+
; X64-NEXT: shrq $45, %rax
205+
; X64-NEXT: orl $111, %eax
206+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
207+
; X64-NEXT: retq
208+
%or = or i64 %x, 3940649673949183
209+
%shr = lshr i64 %or, 45
210+
%conv = trunc i64 %shr to i32
211+
ret i32 %conv
212+
}
213+
214+
define i32 @test_trunc_xor_2(i64 %x) {
215+
; X64-LABEL: test_trunc_xor_2:
216+
; X64: # %bb.0:
217+
; X64-NEXT: movq %rdi, %rax
218+
; X64-NEXT: shrq $45, %rax
219+
; X64-NEXT: xorl $111, %eax
220+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
221+
; X64-NEXT: retq
222+
%xor = xor i64 %x, 3940649673949183
223+
%shr = lshr i64 %xor, 45
224+
%conv = trunc i64 %shr to i32
225+
ret i32 %conv
226+
}
227+
131228
; Make sure we don't crash on this test case.
132229

133230
define i32 @pr128158(i64 %x) {
@@ -137,10 +234,10 @@ define i32 @pr128158(i64 %x) {
137234
; X64-NEXT: addq %rdi, %rax
138235
; X64-NEXT: shrq $32, %rax
139236
; X64-NEXT: .p2align 4
140-
; X64-NEXT: .LBB9_1: # %for.body
237+
; X64-NEXT: .LBB16_1: # %for.body
141238
; X64-NEXT: # =>This Inner Loop Header: Depth=1
142239
; X64-NEXT: cmpl $9, %eax
143-
; X64-NEXT: jb .LBB9_1
240+
; X64-NEXT: jb .LBB16_1
144241
; X64-NEXT: # %bb.2: # %exit
145242
; X64-NEXT: xorl %eax, %eax
146243
; X64-NEXT: retq

0 commit comments

Comments
 (0)