1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu \
4 ; RUN: < %s | FileCheck %s --check-prefixes=P9LE
6 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
7 ; RUN: -mtriple=powerpc64-unknown-linux-gnu \
8 ; RUN: < %s | FileCheck %s --check-prefixes=P9BE
10 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
11 ; RUN: -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi \
12 ; RUN: < %s | FileCheck %s --check-prefixes=P9BE-AIX
14 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
15 ; RUN: -mtriple=powerpc-ibm-aix-xcoff -vec-extabi \
16 ; RUN: < %s | FileCheck %s --check-prefixes=P9BE-AIX32
18 define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
20 ; P9LE: # %bb.0: # %entry
21 ; P9LE-NEXT: add 5, 3, 4
22 ; P9LE-NEXT: lfdx 0, 3, 4
23 ; P9LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
24 ; P9LE-NEXT: xxlxor 2, 2, 2
25 ; P9LE-NEXT: vspltisw 4, 8
26 ; P9LE-NEXT: lxsd 3, 4(5)
27 ; P9LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
28 ; P9LE-NEXT: vadduwm 4, 4, 4
29 ; P9LE-NEXT: lxv 1, 0(3)
30 ; P9LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
31 ; P9LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
32 ; P9LE-NEXT: xxperm 2, 0, 1
33 ; P9LE-NEXT: lxv 0, 0(3)
34 ; P9LE-NEXT: xxperm 3, 3, 0
35 ; P9LE-NEXT: vnegw 3, 3
36 ; P9LE-NEXT: vslw 3, 3, 4
37 ; P9LE-NEXT: vsubuwm 2, 3, 2
38 ; P9LE-NEXT: xxswapd 0, 2
39 ; P9LE-NEXT: stxv 0, 0(3)
43 ; P9BE: # %bb.0: # %entry
44 ; P9BE-NEXT: add 5, 3, 4
45 ; P9BE-NEXT: lxsdx 2, 3, 4
46 ; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
47 ; P9BE-NEXT: xxlxor 1, 1, 1
48 ; P9BE-NEXT: vspltisw 4, 8
49 ; P9BE-NEXT: lxsd 3, 4(5)
50 ; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
51 ; P9BE-NEXT: vadduwm 4, 4, 4
52 ; P9BE-NEXT: lxv 0, 0(3)
53 ; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
54 ; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
55 ; P9BE-NEXT: xxperm 2, 1, 0
56 ; P9BE-NEXT: lxv 0, 0(3)
57 ; P9BE-NEXT: xxperm 3, 3, 0
58 ; P9BE-NEXT: vnegw 3, 3
59 ; P9BE-NEXT: vslw 3, 3, 4
60 ; P9BE-NEXT: vsubuwm 2, 3, 2
61 ; P9BE-NEXT: xxswapd 0, 2
62 ; P9BE-NEXT: stxv 0, 0(3)
65 ; P9BE-AIX-LABEL: test64:
66 ; P9BE-AIX: # %bb.0: # %entry
67 ; P9BE-AIX-NEXT: add 5, 3, 4
68 ; P9BE-AIX-NEXT: lxsdx 2, 3, 4
69 ; P9BE-AIX-NEXT: ld 3, L..C0(2) # %const.0
70 ; P9BE-AIX-NEXT: xxlxor 1, 1, 1
71 ; P9BE-AIX-NEXT: vspltisw 4, 8
72 ; P9BE-AIX-NEXT: lxsd 3, 4(5)
73 ; P9BE-AIX-NEXT: lxv 0, 0(3)
74 ; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
75 ; P9BE-AIX-NEXT: vadduwm 4, 4, 4
76 ; P9BE-AIX-NEXT: xxperm 2, 1, 0
77 ; P9BE-AIX-NEXT: lxv 0, 0(3)
78 ; P9BE-AIX-NEXT: xxperm 3, 3, 0
79 ; P9BE-AIX-NEXT: vnegw 3, 3
80 ; P9BE-AIX-NEXT: vslw 3, 3, 4
81 ; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
82 ; P9BE-AIX-NEXT: xxswapd 0, 2
83 ; P9BE-AIX-NEXT: stxv 0, 0(3)
86 ; P9BE-AIX32-LABEL: test64:
87 ; P9BE-AIX32: # %bb.0: # %entry
88 ; P9BE-AIX32-NEXT: lwzux 4, 3, 4
89 ; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
90 ; P9BE-AIX32-NEXT: vspltisw 4, 8
91 ; P9BE-AIX32-NEXT: stw 4, -48(1)
92 ; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
93 ; P9BE-AIX32-NEXT: lwz 4, 4(3)
94 ; P9BE-AIX32-NEXT: lxv 0, -48(1)
95 ; P9BE-AIX32-NEXT: stw 4, -32(1)
96 ; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
97 ; P9BE-AIX32-NEXT: lxv 1, -32(1)
98 ; P9BE-AIX32-NEXT: lwz 3, 8(3)
99 ; P9BE-AIX32-NEXT: stw 3, -16(1)
100 ; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
101 ; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
102 ; P9BE-AIX32-NEXT: lxv 0, 0(4)
103 ; P9BE-AIX32-NEXT: xxperm 2, 2, 0
104 ; P9BE-AIX32-NEXT: lxv 0, -16(1)
105 ; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
106 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
107 ; P9BE-AIX32-NEXT: xxperm 3, 3, 0
108 ; P9BE-AIX32-NEXT: vnegw 3, 3
109 ; P9BE-AIX32-NEXT: vslw 3, 3, 4
110 ; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
111 ; P9BE-AIX32-NEXT: xxswapd 0, 2
112 ; P9BE-AIX32-NEXT: stxv 0, 0(3)
113 ; P9BE-AIX32-NEXT: blr
115 %idx.ext63 = sext i32 %i_pix2 to i64
116 %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63
117 %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
118 %0 = load <4 x i16>, ptr %add.ptr64, align 1
119 %reorder_shuffle117 = shufflevector <4 x i16> %0, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
120 %1 = zext <4 x i16> %reorder_shuffle117 to <4 x i32>
121 %2 = sub nsw <4 x i32> zeroinitializer, %1
122 %3 = load <4 x i16>, ptr %arrayidx5.1, align 1
123 %reorder_shuffle115 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
124 %4 = zext <4 x i16> %reorder_shuffle115 to <4 x i32>
125 %5 = sub nsw <4 x i32> zeroinitializer, %4
126 %6 = shl nsw <4 x i32> %5, <i32 16, i32 16, i32 16, i32 16>
127 %7 = add nsw <4 x i32> %6, %2
128 %8 = sub nsw <4 x i32> %7, zeroinitializer
129 %9 = shufflevector <4 x i32> undef, <4 x i32> %8, <4 x i32> <i32 2, i32 7, i32 0, i32 5>
130 %10 = add nsw <4 x i32> zeroinitializer, %9
131 %11 = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
132 store <4 x i32> %11, ptr undef, align 16
136 define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
137 ; P9LE-LABEL: test32:
138 ; P9LE: # %bb.0: # %entry
139 ; P9LE-NEXT: add 5, 3, 4
140 ; P9LE-NEXT: lxsiwzx 2, 3, 4
141 ; P9LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
142 ; P9LE-NEXT: xxlxor 0, 0, 0
143 ; P9LE-NEXT: vspltisw 4, 8
144 ; P9LE-NEXT: addi 3, 3, .LCPI1_0@toc@l
145 ; P9LE-NEXT: lxv 1, 0(3)
147 ; P9LE-NEXT: vadduwm 4, 4, 4
148 ; P9LE-NEXT: lxsiwzx 3, 5, 3
149 ; P9LE-NEXT: xxperm 2, 0, 1
150 ; P9LE-NEXT: xxperm 3, 0, 1
151 ; P9LE-NEXT: vnegw 3, 3
152 ; P9LE-NEXT: vslw 3, 3, 4
153 ; P9LE-NEXT: vsubuwm 2, 3, 2
154 ; P9LE-NEXT: xxswapd 0, 2
155 ; P9LE-NEXT: stxv 0, 0(3)
158 ; P9BE-LABEL: test32:
159 ; P9BE: # %bb.0: # %entry
160 ; P9BE-NEXT: add 5, 3, 4
161 ; P9BE-NEXT: lxsiwzx 2, 3, 4
162 ; P9BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
163 ; P9BE-NEXT: xxlxor 0, 0, 0
164 ; P9BE-NEXT: vspltisw 4, 8
165 ; P9BE-NEXT: addi 3, 3, .LCPI1_0@toc@l
166 ; P9BE-NEXT: lxv 1, 0(3)
168 ; P9BE-NEXT: vadduwm 4, 4, 4
169 ; P9BE-NEXT: lxsiwzx 3, 5, 3
170 ; P9BE-NEXT: xxperm 2, 0, 1
171 ; P9BE-NEXT: xxperm 3, 0, 1
172 ; P9BE-NEXT: vnegw 3, 3
173 ; P9BE-NEXT: vslw 3, 3, 4
174 ; P9BE-NEXT: vsubuwm 2, 3, 2
175 ; P9BE-NEXT: xxswapd 0, 2
176 ; P9BE-NEXT: stxv 0, 0(3)
179 ; P9BE-AIX-LABEL: test32:
180 ; P9BE-AIX: # %bb.0: # %entry
181 ; P9BE-AIX-NEXT: add 5, 3, 4
182 ; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
183 ; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
184 ; P9BE-AIX-NEXT: xxlxor 0, 0, 0
185 ; P9BE-AIX-NEXT: vspltisw 4, 8
186 ; P9BE-AIX-NEXT: lxv 1, 0(3)
187 ; P9BE-AIX-NEXT: li 3, 4
188 ; P9BE-AIX-NEXT: vadduwm 4, 4, 4
189 ; P9BE-AIX-NEXT: lxsiwzx 3, 5, 3
190 ; P9BE-AIX-NEXT: xxperm 2, 0, 1
191 ; P9BE-AIX-NEXT: xxperm 3, 0, 1
192 ; P9BE-AIX-NEXT: vnegw 3, 3
193 ; P9BE-AIX-NEXT: vslw 3, 3, 4
194 ; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
195 ; P9BE-AIX-NEXT: xxswapd 0, 2
196 ; P9BE-AIX-NEXT: stxv 0, 0(3)
199 ; P9BE-AIX32-LABEL: test32:
200 ; P9BE-AIX32: # %bb.0: # %entry
201 ; P9BE-AIX32-NEXT: add 5, 3, 4
202 ; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
203 ; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
204 ; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
205 ; P9BE-AIX32-NEXT: vspltisw 4, 8
206 ; P9BE-AIX32-NEXT: lxv 1, 0(3)
207 ; P9BE-AIX32-NEXT: li 3, 4
208 ; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
209 ; P9BE-AIX32-NEXT: lxsiwzx 3, 5, 3
210 ; P9BE-AIX32-NEXT: xxperm 2, 0, 1
211 ; P9BE-AIX32-NEXT: xxperm 3, 0, 1
212 ; P9BE-AIX32-NEXT: vnegw 3, 3
213 ; P9BE-AIX32-NEXT: vslw 3, 3, 4
214 ; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
215 ; P9BE-AIX32-NEXT: xxswapd 0, 2
216 ; P9BE-AIX32-NEXT: stxv 0, 0(3)
217 ; P9BE-AIX32-NEXT: blr
219 %idx.ext63 = sext i32 %i_pix2 to i64
220 %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63
221 %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
222 %0 = load <4 x i8>, ptr %add.ptr64, align 1
223 %reorder_shuffle117 = shufflevector <4 x i8> %0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
224 %1 = zext <4 x i8> %reorder_shuffle117 to <4 x i32>
225 %2 = sub nsw <4 x i32> zeroinitializer, %1
226 %3 = load <4 x i8>, ptr %arrayidx5.1, align 1
227 %reorder_shuffle115 = shufflevector <4 x i8> %3, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
228 %4 = zext <4 x i8> %reorder_shuffle115 to <4 x i32>
229 %5 = sub nsw <4 x i32> zeroinitializer, %4
230 %6 = shl nsw <4 x i32> %5, <i32 16, i32 16, i32 16, i32 16>
231 %7 = add nsw <4 x i32> %6, %2
232 %8 = sub nsw <4 x i32> %7, zeroinitializer
233 %9 = shufflevector <4 x i32> undef, <4 x i32> %8, <4 x i32> <i32 2, i32 7, i32 0, i32 5>
234 %10 = add nsw <4 x i32> zeroinitializer, %9
235 %11 = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
236 store <4 x i32> %11, ptr undef, align 16
240 define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
241 ; P9LE-LABEL: test16:
242 ; P9LE: # %bb.0: # %entry
243 ; P9LE-NEXT: sldi 4, 4, 1
244 ; P9LE-NEXT: li 7, 16
245 ; P9LE-NEXT: add 6, 3, 4
246 ; P9LE-NEXT: lxsihzx 4, 3, 4
247 ; P9LE-NEXT: addis 3, 2, .LCPI2_0@toc@ha
248 ; P9LE-NEXT: lxsihzx 2, 6, 7
250 ; P9LE-NEXT: addi 3, 3, .LCPI2_0@toc@l
251 ; P9LE-NEXT: mtvsrd 3, 6
252 ; P9LE-NEXT: lxv 0, 0(3)
254 ; P9LE-NEXT: vmrghh 4, 3, 4
255 ; P9LE-NEXT: vmrghh 2, 3, 2
256 ; P9LE-NEXT: vsplth 3, 3, 3
257 ; P9LE-NEXT: xxmrglw 3, 4, 3
258 ; P9LE-NEXT: xxperm 3, 2, 0
259 ; P9LE-NEXT: xxspltw 2, 3, 2
260 ; P9LE-NEXT: vadduwm 2, 3, 2
261 ; P9LE-NEXT: vextuwrx 3, 3, 2
262 ; P9LE-NEXT: cmpw 3, 5
263 ; P9LE-NEXT: bgelr+ 0
264 ; P9LE-NEXT: # %bb.1: # %if.then
266 ; P9BE-LABEL: test16:
267 ; P9BE: # %bb.0: # %entry
268 ; P9BE-NEXT: sldi 4, 4, 1
269 ; P9BE-NEXT: li 7, 16
270 ; P9BE-NEXT: add 6, 3, 4
271 ; P9BE-NEXT: lxsihzx 1, 3, 4
272 ; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha
273 ; P9BE-NEXT: lxsihzx 2, 6, 7
274 ; P9BE-NEXT: addis 6, 2, .LCPI2_0@toc@ha
275 ; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l
276 ; P9BE-NEXT: addi 6, 6, .LCPI2_0@toc@l
277 ; P9BE-NEXT: lxv 0, 0(6)
279 ; P9BE-NEXT: mtvsrwz 3, 6
280 ; P9BE-NEXT: xxperm 2, 3, 0
281 ; P9BE-NEXT: xxperm 1, 3, 0
282 ; P9BE-NEXT: vsplth 3, 3, 3
283 ; P9BE-NEXT: lxv 0, 0(3)
285 ; P9BE-NEXT: xxmrghw 3, 3, 1
286 ; P9BE-NEXT: xxperm 2, 3, 0
287 ; P9BE-NEXT: xxspltw 3, 2, 1
288 ; P9BE-NEXT: vadduwm 2, 2, 3
289 ; P9BE-NEXT: vextuwlx 3, 3, 2
290 ; P9BE-NEXT: cmpw 3, 5
291 ; P9BE-NEXT: bgelr+ 0
292 ; P9BE-NEXT: # %bb.1: # %if.then
294 ; P9BE-AIX-LABEL: test16:
295 ; P9BE-AIX: # %bb.0: # %entry
296 ; P9BE-AIX-NEXT: sldi 4, 4, 1
297 ; P9BE-AIX-NEXT: li 7, 16
298 ; P9BE-AIX-NEXT: add 6, 3, 4
299 ; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
300 ; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
301 ; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
302 ; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
303 ; P9BE-AIX-NEXT: lxv 0, 0(6)
304 ; P9BE-AIX-NEXT: li 6, 0
305 ; P9BE-AIX-NEXT: mtvsrwz 3, 6
306 ; P9BE-AIX-NEXT: xxperm 2, 3, 0
307 ; P9BE-AIX-NEXT: xxperm 1, 3, 0
308 ; P9BE-AIX-NEXT: vsplth 3, 3, 3
309 ; P9BE-AIX-NEXT: lxv 0, 0(3)
310 ; P9BE-AIX-NEXT: li 3, 0
311 ; P9BE-AIX-NEXT: xxmrghw 3, 3, 1
312 ; P9BE-AIX-NEXT: xxperm 2, 3, 0
313 ; P9BE-AIX-NEXT: xxspltw 3, 2, 1
314 ; P9BE-AIX-NEXT: vadduwm 2, 2, 3
315 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
316 ; P9BE-AIX-NEXT: cmpw 3, 5
317 ; P9BE-AIX-NEXT: bgelr+ 0
318 ; P9BE-AIX-NEXT: # %bb.1: # %if.then
320 ; P9BE-AIX32-LABEL: test16:
321 ; P9BE-AIX32: # %bb.0: # %entry
322 ; P9BE-AIX32-NEXT: slwi 4, 4, 1
323 ; P9BE-AIX32-NEXT: li 6, 0
324 ; P9BE-AIX32-NEXT: lhzux 4, 3, 4
325 ; P9BE-AIX32-NEXT: lhz 3, 16(3)
326 ; P9BE-AIX32-NEXT: sth 6, -64(1)
327 ; P9BE-AIX32-NEXT: lxv 2, -64(1)
328 ; P9BE-AIX32-NEXT: sth 4, -48(1)
329 ; P9BE-AIX32-NEXT: lxv 4, -48(1)
330 ; P9BE-AIX32-NEXT: sth 3, -32(1)
331 ; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0
332 ; P9BE-AIX32-NEXT: lxv 3, -32(1)
333 ; P9BE-AIX32-NEXT: vmrghh 4, 2, 4
334 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
335 ; P9BE-AIX32-NEXT: vmrghh 3, 2, 3
336 ; P9BE-AIX32-NEXT: vsplth 2, 2, 0
337 ; P9BE-AIX32-NEXT: xxmrghw 2, 2, 4
338 ; P9BE-AIX32-NEXT: xxperm 3, 2, 0
339 ; P9BE-AIX32-NEXT: xxspltw 2, 3, 1
340 ; P9BE-AIX32-NEXT: vadduwm 2, 3, 2
341 ; P9BE-AIX32-NEXT: stxv 2, -16(1)
342 ; P9BE-AIX32-NEXT: lwz 3, -16(1)
343 ; P9BE-AIX32-NEXT: cmpw 3, 5
344 ; P9BE-AIX32-NEXT: bgelr+ 0
345 ; P9BE-AIX32-NEXT: # %bb.1: # %if.then
347 %idxprom = sext i32 %delta to i64
348 %add14 = add nsw i32 %delta, 8
349 %idxprom15 = sext i32 %add14 to i64
352 for.body: ; preds = %entry
353 %arrayidx8 = getelementptr inbounds i16, ptr %sums, i64 %idxprom
354 %0 = load i16, ptr %arrayidx8, align 2
355 %arrayidx16 = getelementptr inbounds i16, ptr %sums, i64 %idxprom15
356 %1 = load i16, ptr %arrayidx16, align 2
357 %2 = insertelement <4 x i16> undef, i16 %0, i32 2
358 %3 = insertelement <4 x i16> %2, i16 %1, i32 3
359 %4 = zext <4 x i16> %3 to <4 x i32>
360 %5 = sub nsw <4 x i32> zeroinitializer, %4
361 %6 = sub nsw <4 x i32> zeroinitializer, %5
362 %7 = select <4 x i1> undef, <4 x i32> %6, <4 x i32> %5
363 %bin.rdx = add <4 x i32> %7, zeroinitializer
364 %rdx.shuf54 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
365 %bin.rdx55 = add <4 x i32> %bin.rdx, %rdx.shuf54
366 %8 = extractelement <4 x i32> %bin.rdx55, i32 0
367 %op.extra = add nuw i32 %8, 0
368 %cmp25 = icmp slt i32 %op.extra, %thresh
369 br i1 %cmp25, label %if.then, label %if.end
371 if.then: ; preds = %for.body
374 if.end: ; preds = %for.body
378 define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
380 ; P9LE: # %bb.0: # %entry
381 ; P9LE-NEXT: add 6, 3, 4
382 ; P9LE-NEXT: lxsibzx 2, 3, 4
384 ; P9LE-NEXT: mtvsrd 3, 3
386 ; P9LE-NEXT: lxsibzx 5, 6, 3
387 ; P9LE-NEXT: vspltb 4, 3, 7
388 ; P9LE-NEXT: addis 3, 2, .LCPI3_0@toc@ha
389 ; P9LE-NEXT: vmrghb 2, 3, 2
390 ; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l
391 ; P9LE-NEXT: vmrglh 2, 2, 4
392 ; P9LE-NEXT: lxv 0, 0(3)
394 ; P9LE-NEXT: vmrghb 3, 3, 5
395 ; P9LE-NEXT: xxmrglw 2, 2, 4
396 ; P9LE-NEXT: vmrglh 3, 3, 4
397 ; P9LE-NEXT: xxmrglw 3, 4, 3
398 ; P9LE-NEXT: xxperm 2, 3, 0
399 ; P9LE-NEXT: xxspltw 3, 2, 2
400 ; P9LE-NEXT: vadduwm 2, 2, 3
401 ; P9LE-NEXT: vextuwrx 3, 3, 2
402 ; P9LE-NEXT: cmpw 3, 5
403 ; P9LE-NEXT: bgelr+ 0
404 ; P9LE-NEXT: # %bb.1: # %if.then
407 ; P9BE: # %bb.0: # %entry
408 ; P9BE-NEXT: add 6, 3, 4
410 ; P9BE-NEXT: lxsibzx 3, 3, 4
411 ; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha
412 ; P9BE-NEXT: lxsibzx 0, 6, 7
413 ; P9BE-NEXT: addis 6, 2, .LCPI3_0@toc@ha
414 ; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l
415 ; P9BE-NEXT: addi 6, 6, .LCPI3_0@toc@l
416 ; P9BE-NEXT: lxv 1, 0(6)
418 ; P9BE-NEXT: mtvsrwz 2, 6
419 ; P9BE-NEXT: xxperm 0, 2, 1
420 ; P9BE-NEXT: xxperm 3, 2, 1
421 ; P9BE-NEXT: vspltb 2, 2, 7
422 ; P9BE-NEXT: vmrghh 3, 3, 2
423 ; P9BE-NEXT: xxspltw 1, 2, 0
424 ; P9BE-NEXT: xxmrghw 3, 3, 0
425 ; P9BE-NEXT: lxv 0, 0(3)
427 ; P9BE-NEXT: xxperm 3, 1, 0
428 ; P9BE-NEXT: xxspltw 2, 3, 1
429 ; P9BE-NEXT: vadduwm 2, 3, 2
430 ; P9BE-NEXT: vextuwlx 3, 3, 2
431 ; P9BE-NEXT: cmpw 3, 5
432 ; P9BE-NEXT: bgelr+ 0
433 ; P9BE-NEXT: # %bb.1: # %if.then
435 ; P9BE-AIX-LABEL: test8:
436 ; P9BE-AIX: # %bb.0: # %entry
437 ; P9BE-AIX-NEXT: add 6, 3, 4
438 ; P9BE-AIX-NEXT: li 7, 8
439 ; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
440 ; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
441 ; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
442 ; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
443 ; P9BE-AIX-NEXT: lxv 1, 0(6)
444 ; P9BE-AIX-NEXT: li 6, 0
445 ; P9BE-AIX-NEXT: mtvsrwz 2, 6
446 ; P9BE-AIX-NEXT: xxperm 0, 2, 1
447 ; P9BE-AIX-NEXT: xxperm 3, 2, 1
448 ; P9BE-AIX-NEXT: vspltb 2, 2, 7
449 ; P9BE-AIX-NEXT: vmrghh 3, 3, 2
450 ; P9BE-AIX-NEXT: xxspltw 1, 2, 0
451 ; P9BE-AIX-NEXT: xxmrghw 3, 3, 0
452 ; P9BE-AIX-NEXT: lxv 0, 0(3)
453 ; P9BE-AIX-NEXT: li 3, 0
454 ; P9BE-AIX-NEXT: xxperm 3, 1, 0
455 ; P9BE-AIX-NEXT: xxspltw 2, 3, 1
456 ; P9BE-AIX-NEXT: vadduwm 2, 3, 2
457 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
458 ; P9BE-AIX-NEXT: cmpw 3, 5
459 ; P9BE-AIX-NEXT: bgelr+ 0
460 ; P9BE-AIX-NEXT: # %bb.1: # %if.then
462 ; P9BE-AIX32-LABEL: test8:
463 ; P9BE-AIX32: # %bb.0: # %entry
464 ; P9BE-AIX32-NEXT: add 6, 3, 4
465 ; P9BE-AIX32-NEXT: li 7, 8
466 ; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
467 ; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
468 ; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
469 ; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
470 ; P9BE-AIX32-NEXT: lxv 1, 0(6)
471 ; P9BE-AIX32-NEXT: li 6, 0
472 ; P9BE-AIX32-NEXT: mtvsrwz 2, 6
473 ; P9BE-AIX32-NEXT: xxperm 0, 2, 1
474 ; P9BE-AIX32-NEXT: xxperm 3, 2, 1
475 ; P9BE-AIX32-NEXT: vspltb 2, 2, 7
476 ; P9BE-AIX32-NEXT: vmrghh 3, 3, 2
477 ; P9BE-AIX32-NEXT: xxspltw 1, 2, 0
478 ; P9BE-AIX32-NEXT: xxmrghw 3, 3, 0
479 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
480 ; P9BE-AIX32-NEXT: xxperm 3, 1, 0
481 ; P9BE-AIX32-NEXT: xxspltw 2, 3, 1
482 ; P9BE-AIX32-NEXT: vadduwm 2, 3, 2
483 ; P9BE-AIX32-NEXT: stxv 2, -16(1)
484 ; P9BE-AIX32-NEXT: lwz 3, -16(1)
485 ; P9BE-AIX32-NEXT: cmpw 3, 5
486 ; P9BE-AIX32-NEXT: bgelr+ 0
487 ; P9BE-AIX32-NEXT: # %bb.1: # %if.then
489 %idxprom = sext i32 %delta to i64
490 %add14 = add nsw i32 %delta, 8
491 %idxprom15 = sext i32 %add14 to i64
494 for.body: ; preds = %entry
495 %arrayidx8 = getelementptr inbounds i8, ptr %sums, i64 %idxprom
496 %0 = load i8, ptr %arrayidx8, align 2
497 %arrayidx16 = getelementptr inbounds i8, ptr %sums, i64 %idxprom15
498 %1 = load i8, ptr %arrayidx16, align 2
499 %2 = insertelement <4 x i8> undef, i8 %0, i32 2
500 %3 = insertelement <4 x i8> %2, i8 %1, i32 3
501 %4 = zext <4 x i8> %3 to <4 x i32>
502 %5 = sub nsw <4 x i32> zeroinitializer, %4
503 %6 = sub nsw <4 x i32> zeroinitializer, %5
504 %7 = select <4 x i1> undef, <4 x i32> %6, <4 x i32> %5
505 %bin.rdx = add <4 x i32> %7, zeroinitializer
506 %rdx.shuf54 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
507 %bin.rdx55 = add <4 x i32> %bin.rdx, %rdx.shuf54
508 %8 = extractelement <4 x i32> %bin.rdx55, i32 0
509 %op.extra = add nuw i32 %8, 0
510 %cmp25 = icmp slt i32 %op.extra, %thresh
511 br i1 %cmp25, label %if.then, label %if.end
513 if.then: ; preds = %for.body
516 if.end: ; preds = %for.body