Skip to content

Commit 481e1eb

Browse files
authored
[NFC] add a pre-commit test case for patch llvm#127121 that hoists xxsplitib out of loop (llvm#127701)
This is a pre-commit test case for patch llvm#127121 that hoists xxsplitib out of loop
1 parent 3ce2a7d commit 481e1eb

File tree

1 file changed

+184
-0
lines changed

1 file changed

+184
-0
lines changed
+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
;; Test hoisting `xxspltib` out of the loop.
2+
3+
; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff --mcpu=pwr10 \
4+
; RUN: %s -o - 2>&1 | FileCheck --check-prefix=AIX64 %s
5+
6+
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff --mcpu=pwr10 \
7+
; RUN: %s -o - 2>&1 | FileCheck --check-prefix=AIX32 %s
8+
9+
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu --mcpu=pwr10 \
10+
; RUN: %s -o - 2>&1 | FileCheck --check-prefix=LINUX64LE %s
11+
12+
define void @_Z3fooPfS_Pi(ptr noalias nocapture noundef %_a, ptr noalias nocapture %In_a, ptr noalias nocapture %n) {
13+
entry:
14+
%0 = load i32, ptr %n, align 4
15+
%cmp9 = icmp sgt i32 %0, 0
16+
br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup
17+
18+
for.body.preheader:
19+
%wide.trip.count = zext nneg i32 %0 to i64
20+
%xtraiter = and i64 %wide.trip.count, 1
21+
%1 = icmp eq i32 %0, 1
22+
br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
23+
24+
for.body.preheader.new:
25+
%unroll_iter = and i64 %wide.trip.count, 2147483646
26+
br label %for.body
27+
28+
for.cond.cleanup.loopexit.unr-lcssa:
29+
%indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.1, %for.body ]
30+
%lcmp.mod.not = icmp eq i64 %xtraiter, 0
31+
br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil
32+
33+
for.body.epil:
34+
%arrayidx.epil = getelementptr inbounds nuw float, ptr %In_a, i64 %indvars.iv.unr
35+
%2 = load float, ptr %arrayidx.epil, align 4
36+
%vecins.i.epil = insertelement <4 x float> poison, float %2, i64 0
37+
%3 = bitcast <4 x float> %vecins.i.epil to <16 x i8>
38+
%and1.i.epil = and <16 x i8> %3, <i8 6, i8 6, i8 6, i8 6, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
39+
%4 = bitcast <16 x i8> %and1.i.epil to <4 x float>
40+
%vecext.i.epil = extractelement <4 x float> %4, i64 0
41+
%arrayidx5.epil = getelementptr inbounds nuw float, ptr %_a, i64 %indvars.iv.unr
42+
store float %vecext.i.epil, ptr %arrayidx5.epil, align 4
43+
br label %for.cond.cleanup
44+
45+
for.cond.cleanup:
46+
ret void
47+
48+
for.body:
49+
%indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ]
50+
%niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.1, %for.body ]
51+
%arrayidx = getelementptr inbounds nuw float, ptr %In_a, i64 %indvars.iv
52+
%5 = load float, ptr %arrayidx, align 4
53+
%vecins.i = insertelement <4 x float> poison, float %5, i64 0
54+
%6 = bitcast <4 x float> %vecins.i to <16 x i8>
55+
%and1.i = and <16 x i8> %6, <i8 6, i8 6, i8 6, i8 6, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
56+
%7 = bitcast <16 x i8> %and1.i to <4 x float>
57+
%vecext.i = extractelement <4 x float> %7, i64 0
58+
%arrayidx5 = getelementptr inbounds nuw float, ptr %_a, i64 %indvars.iv
59+
store float %vecext.i, ptr %arrayidx5, align 4
60+
%indvars.iv.next = or disjoint i64 %indvars.iv, 1
61+
%arrayidx.1 = getelementptr inbounds nuw float, ptr %In_a, i64 %indvars.iv.next
62+
%8 = load float, ptr %arrayidx.1, align 4
63+
%vecins.i.1 = insertelement <4 x float> poison, float %8, i64 0
64+
%9 = bitcast <4 x float> %vecins.i.1 to <16 x i8>
65+
%and1.i.1 = and <16 x i8> %9, <i8 6, i8 6, i8 6, i8 6, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
66+
%10 = bitcast <16 x i8> %and1.i.1 to <4 x float>
67+
%vecext.i.1 = extractelement <4 x float> %10, i64 0
68+
%arrayidx5.1 = getelementptr inbounds nuw float, ptr %_a, i64 %indvars.iv.next
69+
store float %vecext.i.1, ptr %arrayidx5.1, align 4
70+
%indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
71+
%niter.next.1 = add i64 %niter, 2
72+
%niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
73+
br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
74+
}
75+
76+
; AIX32: ._Z3fooPfS_Pi:
77+
; AIX32-NEXT: # %bb.0: # %entry
78+
; AIX32-NEXT: lwz 5, 0(5)
79+
; AIX32-NEXT: cmpwi 5, 1
80+
; AIX32-NEXT: bltlr 0
81+
; AIX32-NEXT: # %bb.1: # %for.body.preheader
82+
; AIX32-NEXT: li 6, 0
83+
; AIX32-NEXT: beq 0, L..BB0_4
84+
; AIX32-NEXT: # %bb.2: # %for.body.preheader.new
85+
; AIX32-NEXT: addi 12, 4, -8
86+
; AIX32-NEXT: addi 9, 3, -8
87+
; AIX32-NEXT: rlwinm 7, 5, 0, 1, 30
88+
; AIX32-NEXT: li 8, 0
89+
; AIX32-NEXT: li 10, 8
90+
; AIX32-NEXT: li 11, 12
91+
; AIX32-NEXT: .align 4
92+
; AIX32-NEXT: L..BB0_3: # %for.body
93+
; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
94+
; AIX32-NEXT: lxvwsx 0, 12, 10
95+
; AIX32-NEXT: xxspltib 1, 6
96+
; AIX32-NEXT: lxvwsx 2, 12, 11
97+
; AIX32-NEXT: addic 6, 6, 2
98+
; AIX32-NEXT: addi 12, 12, 8
99+
; AIX32-NEXT: addze 8, 8
100+
; AIX32-NEXT: xor 0, 6, 7
101+
; AIX32-NEXT: or. 0, 0, 8
102+
; AIX32-NEXT: xxland 0, 0, 1
103+
; AIX32-NEXT: xxland 1, 2, 1
104+
; AIX32-NEXT: xscvspdpn 0, 0
105+
; AIX32-NEXT: stfsu 0, 8(9)
106+
; AIX32-NEXT: xscvspdpn 0, 1
107+
; AIX32-NEXT: stfs 0, 4(9)
108+
; AIX32-NEXT: bne 0, L..BB0_3
109+
110+
; AIX64: ._Z3fooPfS_Pi:
111+
; AIX64-NEXT: # %bb.0: # %entry
112+
; AIX64-NEXT: lwz 5, 0(5)
113+
; AIX64-NEXT: cmpwi 5, 1
114+
; AIX64-NEXT: bltlr 0
115+
; AIX64-NEXT: # %bb.1: # %for.body.preheader
116+
; AIX64-NEXT: li 6, 0
117+
; AIX64-NEXT: cmplwi 5, 1
118+
; AIX64-NEXT: beq 0, L..BB0_4
119+
; AIX64-NEXT: # %bb.2: # %for.body.preheader.new
120+
; AIX64-NEXT: rlwinm 6, 5, 0, 1, 30
121+
; AIX64-NEXT: addi 10, 4, -8
122+
; AIX64-NEXT: addi 7, 3, -8
123+
; AIX64-NEXT: li 8, 8
124+
; AIX64-NEXT: li 9, 12
125+
; AIX64-NEXT: li 11, 4
126+
; AIX64-NEXT: addi 6, 6, -2
127+
; AIX64-NEXT: rldicl 6, 6, 63, 1
128+
; AIX64-NEXT: addi 6, 6, 1
129+
; AIX64-NEXT: mtctr 6
130+
; AIX64-NEXT: li 6, 0
131+
; AIX64-NEXT: .align 4
132+
; AIX64-NEXT: L..BB0_3: # %for.body
133+
; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
134+
; AIX64-NEXT: lxvwsx 0, 10, 8
135+
; AIX64-NEXT: xxspltib 1, 6
136+
; AIX64-NEXT: addi 6, 6, 2
137+
; AIX64-NEXT: xxland 0, 0, 1
138+
; AIX64-NEXT: xscvspdpn 0, 0
139+
; AIX64-NEXT: stfsu 0, 8(7)
140+
; AIX64-NEXT: lxvwsx 0, 10, 9
141+
; AIX64-NEXT: addi 10, 10, 8
142+
; AIX64-NEXT: xxland 0, 0, 1
143+
; AIX64-NEXT: xxsldwi 0, 0, 0, 3
144+
; AIX64-NEXT: stfiwx 0, 7, 11
145+
; AIX64-NEXT: bdnz L..BB0_3
146+
147+
; LINUX64LE: _Z3fooPfS_Pi: # @_Z3fooPfS_Pi
148+
; LINUX64LE-NEXT: .Lfunc_begin0:
149+
; LINUX64LE-NEXT: .cfi_startproc
150+
; LINUX64LE-NEXT: # %bb.0: # %entry
151+
; LINUX64LE-NEXT: lwz 5, 0(5)
152+
; LINUX64LE-NEXT: cmpwi 5, 1
153+
; LINUX64LE-NEXT: bltlr 0
154+
; LINUX64LE-NEXT: # %bb.1: # %for.body.preheader
155+
; LINUX64LE-NEXT: li 6, 0
156+
; LINUX64LE-NEXT: cmplwi 5, 1
157+
; LINUX64LE-NEXT: beq 0, .LBB0_4
158+
; LINUX64LE-NEXT: # %bb.2: # %for.body.preheader.new
159+
; LINUX64LE-NEXT: rlwinm 6, 5, 0, 1, 30
160+
; LINUX64LE-NEXT: addi 8, 4, -8
161+
; LINUX64LE-NEXT: addi 7, 3, -8
162+
; LINUX64LE-NEXT: li 9, 8
163+
; LINUX64LE-NEXT: li 10, 12
164+
; LINUX64LE-NEXT: li 11, 4
165+
; LINUX64LE-NEXT: addi 6, 6, -2
166+
; LINUX64LE-NEXT: rldicl 6, 6, 63, 1
167+
; LINUX64LE-NEXT: addi 6, 6, 1
168+
; LINUX64LE-NEXT: mtctr 6
169+
; LINUX64LE-NEXT: li 6, 0
170+
; LINUX64LE-NEXT: .p2align 4
171+
; LINUX64LE-NEXT: .LBB0_3: # %for.body
172+
; LINUX64LE-NEXT: # =>This Inner Loop Header: Depth=1
173+
; LINUX64LE-NEXT: lxvwsx 0, 8, 9
174+
; LINUX64LE-NEXT: xxspltib 1, 6
175+
; LINUX64LE-NEXT: addi 6, 6, 2
176+
; LINUX64LE-NEXT: xxland 0, 0, 1
177+
; LINUX64LE-NEXT: xxsldwi 0, 0, 0, 3
178+
; LINUX64LE-NEXT: xscvspdpn 0, 0
179+
; LINUX64LE-NEXT: stfsu 0, 8(7)
180+
; LINUX64LE-NEXT: lxvwsx 0, 8, 10
181+
; LINUX64LE-NEXT: addi 8, 8, 8
182+
; LINUX64LE-NEXT: xxland 0, 0, 1
183+
; LINUX64LE-NEXT: stxvrwx 0, 7, 11
184+
; LINUX64LE-NEXT: bdnz .LBB0_3

0 commit comments

Comments
 (0)