1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu \
4 ; RUN: < %s | FileCheck %s --check-prefixes=P9LE
6 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
7 ; RUN: -mtriple=powerpc64-unknown-linux-gnu \
8 ; RUN: < %s | FileCheck %s --check-prefixes=P9BE
10 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
11 ; RUN: -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi \
12 ; RUN: < %s | FileCheck %s --check-prefixes=P9BE-AIX
14 ; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
15 ; RUN: -mtriple=powerpc-ibm-aix-xcoff -vec-extabi \
16 ; RUN: < %s | FileCheck %s --check-prefixes=P9BE-AIX32
18 define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
20 ; P9LE: # %bb.0: # %entry
21 ; P9LE-NEXT: add 5, 3, 4
22 ; P9LE-NEXT: lfdx 0, 3, 4
23 ; P9LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
24 ; P9LE-NEXT: xxlxor 2, 2, 2
25 ; P9LE-NEXT: vspltisw 4, 8
26 ; P9LE-NEXT: lxsd 3, 4(5)
27 ; P9LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
28 ; P9LE-NEXT: vadduwm 4, 4, 4
29 ; P9LE-NEXT: lxv 1, 0(3)
30 ; P9LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
31 ; P9LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
32 ; P9LE-NEXT: xxperm 2, 0, 1
33 ; P9LE-NEXT: lxv 0, 0(3)
34 ; P9LE-NEXT: xxperm 3, 3, 0
35 ; P9LE-NEXT: vnegw 3, 3
36 ; P9LE-NEXT: vslw 3, 3, 4
37 ; P9LE-NEXT: vsubuwm 2, 3, 2
38 ; P9LE-NEXT: xxswapd 0, 2
39 ; P9LE-NEXT: stxv 0, 0(3)
43 ; P9BE: # %bb.0: # %entry
44 ; P9BE-NEXT: add 5, 3, 4
45 ; P9BE-NEXT: lxsdx 2, 3, 4
46 ; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
47 ; P9BE-NEXT: xxlxor 1, 1, 1
48 ; P9BE-NEXT: vspltisw 4, 8
49 ; P9BE-NEXT: lxsd 3, 4(5)
50 ; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
51 ; P9BE-NEXT: vadduwm 4, 4, 4
52 ; P9BE-NEXT: lxv 0, 0(3)
53 ; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
54 ; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
55 ; P9BE-NEXT: xxperm 2, 1, 0
56 ; P9BE-NEXT: lxv 0, 0(3)
57 ; P9BE-NEXT: xxperm 3, 3, 0
58 ; P9BE-NEXT: vnegw 3, 3
59 ; P9BE-NEXT: vslw 3, 3, 4
60 ; P9BE-NEXT: vsubuwm 2, 3, 2
61 ; P9BE-NEXT: xxswapd 0, 2
62 ; P9BE-NEXT: stxv 0, 0(3)
65 ; P9BE-AIX-LABEL: test64:
66 ; P9BE-AIX: # %bb.0: # %entry
67 ; P9BE-AIX-NEXT: add 5, 3, 4
68 ; P9BE-AIX-NEXT: lxsdx 2, 3, 4
69 ; P9BE-AIX-NEXT: ld 3, L..C0(2) # %const.0
70 ; P9BE-AIX-NEXT: xxlxor 1, 1, 1
71 ; P9BE-AIX-NEXT: vspltisw 4, 8
72 ; P9BE-AIX-NEXT: lxsd 3, 4(5)
73 ; P9BE-AIX-NEXT: lxv 0, 0(3)
74 ; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
75 ; P9BE-AIX-NEXT: vadduwm 4, 4, 4
76 ; P9BE-AIX-NEXT: xxperm 2, 1, 0
77 ; P9BE-AIX-NEXT: lxv 0, 0(3)
78 ; P9BE-AIX-NEXT: xxperm 3, 3, 0
79 ; P9BE-AIX-NEXT: vnegw 3, 3
80 ; P9BE-AIX-NEXT: vslw 3, 3, 4
81 ; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
82 ; P9BE-AIX-NEXT: xxswapd 0, 2
83 ; P9BE-AIX-NEXT: stxv 0, 0(3)
86 ; P9BE-AIX32-LABEL: test64:
87 ; P9BE-AIX32: # %bb.0: # %entry
88 ; P9BE-AIX32-NEXT: add 5, 3, 4
89 ; P9BE-AIX32-NEXT: lxvwsx 0, 3, 4
90 ; P9BE-AIX32-NEXT: li 3, 4
91 ; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
92 ; P9BE-AIX32-NEXT: vspltisw 4, 8
93 ; P9BE-AIX32-NEXT: lxvwsx 1, 5, 3
94 ; P9BE-AIX32-NEXT: lwz 3, L..C0(2) # %const.0
95 ; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
96 ; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
97 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
98 ; P9BE-AIX32-NEXT: li 3, 8
99 ; P9BE-AIX32-NEXT: xxperm 2, 2, 0
100 ; P9BE-AIX32-NEXT: lxvwsx 0, 5, 3
101 ; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
102 ; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
103 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
104 ; P9BE-AIX32-NEXT: xxperm 3, 3, 0
105 ; P9BE-AIX32-NEXT: vnegw 3, 3
106 ; P9BE-AIX32-NEXT: vslw 3, 3, 4
107 ; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
108 ; P9BE-AIX32-NEXT: xxswapd 0, 2
109 ; P9BE-AIX32-NEXT: stxv 0, 0(3)
110 ; P9BE-AIX32-NEXT: blr
112 %idx.ext63 = sext i32 %i_pix2 to i64
113 %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63
114 %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
115 %0 = load <4 x i16>, ptr %add.ptr64, align 1
116 %reorder_shuffle117 = shufflevector <4 x i16> %0, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
117 %1 = zext <4 x i16> %reorder_shuffle117 to <4 x i32>
118 %2 = sub nsw <4 x i32> zeroinitializer, %1
119 %3 = load <4 x i16>, ptr %arrayidx5.1, align 1
120 %reorder_shuffle115 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
121 %4 = zext <4 x i16> %reorder_shuffle115 to <4 x i32>
122 %5 = sub nsw <4 x i32> zeroinitializer, %4
123 %6 = shl nsw <4 x i32> %5, <i32 16, i32 16, i32 16, i32 16>
124 %7 = add nsw <4 x i32> %6, %2
125 %8 = sub nsw <4 x i32> %7, zeroinitializer
126 %9 = shufflevector <4 x i32> undef, <4 x i32> %8, <4 x i32> <i32 2, i32 7, i32 0, i32 5>
127 %10 = add nsw <4 x i32> zeroinitializer, %9
128 %11 = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
129 store <4 x i32> %11, ptr undef, align 16
133 define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
134 ; P9LE-LABEL: test32:
135 ; P9LE: # %bb.0: # %entry
136 ; P9LE-NEXT: add 5, 3, 4
137 ; P9LE-NEXT: lxsiwzx 2, 3, 4
138 ; P9LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
139 ; P9LE-NEXT: xxlxor 0, 0, 0
140 ; P9LE-NEXT: vspltisw 4, 8
141 ; P9LE-NEXT: addi 3, 3, .LCPI1_0@toc@l
142 ; P9LE-NEXT: lxv 1, 0(3)
144 ; P9LE-NEXT: vadduwm 4, 4, 4
145 ; P9LE-NEXT: lxsiwzx 3, 5, 3
146 ; P9LE-NEXT: xxperm 2, 0, 1
147 ; P9LE-NEXT: xxperm 3, 0, 1
148 ; P9LE-NEXT: vnegw 3, 3
149 ; P9LE-NEXT: vslw 3, 3, 4
150 ; P9LE-NEXT: vsubuwm 2, 3, 2
151 ; P9LE-NEXT: xxswapd 0, 2
152 ; P9LE-NEXT: stxv 0, 0(3)
155 ; P9BE-LABEL: test32:
156 ; P9BE: # %bb.0: # %entry
157 ; P9BE-NEXT: add 5, 3, 4
158 ; P9BE-NEXT: lxsiwzx 2, 3, 4
159 ; P9BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
160 ; P9BE-NEXT: xxlxor 0, 0, 0
161 ; P9BE-NEXT: vspltisw 4, 8
162 ; P9BE-NEXT: addi 3, 3, .LCPI1_0@toc@l
163 ; P9BE-NEXT: lxv 1, 0(3)
165 ; P9BE-NEXT: vadduwm 4, 4, 4
166 ; P9BE-NEXT: lxsiwzx 3, 5, 3
167 ; P9BE-NEXT: xxperm 2, 0, 1
168 ; P9BE-NEXT: xxperm 3, 0, 1
169 ; P9BE-NEXT: vnegw 3, 3
170 ; P9BE-NEXT: vslw 3, 3, 4
171 ; P9BE-NEXT: vsubuwm 2, 3, 2
172 ; P9BE-NEXT: xxswapd 0, 2
173 ; P9BE-NEXT: stxv 0, 0(3)
176 ; P9BE-AIX-LABEL: test32:
177 ; P9BE-AIX: # %bb.0: # %entry
178 ; P9BE-AIX-NEXT: add 5, 3, 4
179 ; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
180 ; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
181 ; P9BE-AIX-NEXT: xxlxor 0, 0, 0
182 ; P9BE-AIX-NEXT: vspltisw 4, 8
183 ; P9BE-AIX-NEXT: lxv 1, 0(3)
184 ; P9BE-AIX-NEXT: li 3, 4
185 ; P9BE-AIX-NEXT: vadduwm 4, 4, 4
186 ; P9BE-AIX-NEXT: lxsiwzx 3, 5, 3
187 ; P9BE-AIX-NEXT: xxperm 2, 0, 1
188 ; P9BE-AIX-NEXT: xxperm 3, 0, 1
189 ; P9BE-AIX-NEXT: vnegw 3, 3
190 ; P9BE-AIX-NEXT: vslw 3, 3, 4
191 ; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
192 ; P9BE-AIX-NEXT: xxswapd 0, 2
193 ; P9BE-AIX-NEXT: stxv 0, 0(3)
196 ; P9BE-AIX32-LABEL: test32:
197 ; P9BE-AIX32: # %bb.0: # %entry
198 ; P9BE-AIX32-NEXT: add 5, 3, 4
199 ; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
200 ; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
201 ; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
202 ; P9BE-AIX32-NEXT: vspltisw 4, 8
203 ; P9BE-AIX32-NEXT: lxv 1, 0(3)
204 ; P9BE-AIX32-NEXT: li 3, 4
205 ; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
206 ; P9BE-AIX32-NEXT: lxsiwzx 3, 5, 3
207 ; P9BE-AIX32-NEXT: xxperm 2, 0, 1
208 ; P9BE-AIX32-NEXT: xxperm 3, 0, 1
209 ; P9BE-AIX32-NEXT: vnegw 3, 3
210 ; P9BE-AIX32-NEXT: vslw 3, 3, 4
211 ; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
212 ; P9BE-AIX32-NEXT: xxswapd 0, 2
213 ; P9BE-AIX32-NEXT: stxv 0, 0(3)
214 ; P9BE-AIX32-NEXT: blr
216 %idx.ext63 = sext i32 %i_pix2 to i64
217 %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63
218 %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
219 %0 = load <4 x i8>, ptr %add.ptr64, align 1
220 %reorder_shuffle117 = shufflevector <4 x i8> %0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
221 %1 = zext <4 x i8> %reorder_shuffle117 to <4 x i32>
222 %2 = sub nsw <4 x i32> zeroinitializer, %1
223 %3 = load <4 x i8>, ptr %arrayidx5.1, align 1
224 %reorder_shuffle115 = shufflevector <4 x i8> %3, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
225 %4 = zext <4 x i8> %reorder_shuffle115 to <4 x i32>
226 %5 = sub nsw <4 x i32> zeroinitializer, %4
227 %6 = shl nsw <4 x i32> %5, <i32 16, i32 16, i32 16, i32 16>
228 %7 = add nsw <4 x i32> %6, %2
229 %8 = sub nsw <4 x i32> %7, zeroinitializer
230 %9 = shufflevector <4 x i32> undef, <4 x i32> %8, <4 x i32> <i32 2, i32 7, i32 0, i32 5>
231 %10 = add nsw <4 x i32> zeroinitializer, %9
232 %11 = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
233 store <4 x i32> %11, ptr undef, align 16
237 define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
238 ; P9LE-LABEL: test16:
239 ; P9LE: # %bb.0: # %entry
240 ; P9LE-NEXT: sldi 4, 4, 1
241 ; P9LE-NEXT: li 7, 16
242 ; P9LE-NEXT: add 6, 3, 4
243 ; P9LE-NEXT: lxsihzx 4, 3, 4
244 ; P9LE-NEXT: addis 3, 2, .LCPI2_0@toc@ha
245 ; P9LE-NEXT: lxsihzx 2, 6, 7
247 ; P9LE-NEXT: addi 3, 3, .LCPI2_0@toc@l
248 ; P9LE-NEXT: mtvsrd 3, 6
249 ; P9LE-NEXT: lxv 0, 0(3)
251 ; P9LE-NEXT: vmrghh 4, 3, 4
252 ; P9LE-NEXT: vmrghh 2, 3, 2
253 ; P9LE-NEXT: vsplth 3, 3, 3
254 ; P9LE-NEXT: xxmrglw 3, 4, 3
255 ; P9LE-NEXT: xxperm 3, 2, 0
256 ; P9LE-NEXT: xxspltw 2, 3, 2
257 ; P9LE-NEXT: vadduwm 2, 3, 2
258 ; P9LE-NEXT: vextuwrx 3, 3, 2
259 ; P9LE-NEXT: cmpw 3, 5
260 ; P9LE-NEXT: bgelr+ 0
261 ; P9LE-NEXT: # %bb.1: # %if.then
263 ; P9BE-LABEL: test16:
264 ; P9BE: # %bb.0: # %entry
265 ; P9BE-NEXT: sldi 4, 4, 1
266 ; P9BE-NEXT: li 7, 16
267 ; P9BE-NEXT: add 6, 3, 4
268 ; P9BE-NEXT: lxsihzx 1, 3, 4
269 ; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha
270 ; P9BE-NEXT: lxsihzx 2, 6, 7
271 ; P9BE-NEXT: addis 6, 2, .LCPI2_0@toc@ha
272 ; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l
273 ; P9BE-NEXT: addi 6, 6, .LCPI2_0@toc@l
274 ; P9BE-NEXT: lxv 0, 0(6)
276 ; P9BE-NEXT: mtvsrwz 3, 6
277 ; P9BE-NEXT: xxperm 2, 3, 0
278 ; P9BE-NEXT: xxperm 1, 3, 0
279 ; P9BE-NEXT: vsplth 3, 3, 3
280 ; P9BE-NEXT: lxv 0, 0(3)
282 ; P9BE-NEXT: xxmrghw 3, 3, 1
283 ; P9BE-NEXT: xxperm 2, 3, 0
284 ; P9BE-NEXT: xxspltw 3, 2, 1
285 ; P9BE-NEXT: vadduwm 2, 2, 3
286 ; P9BE-NEXT: vextuwlx 3, 3, 2
287 ; P9BE-NEXT: cmpw 3, 5
288 ; P9BE-NEXT: bgelr+ 0
289 ; P9BE-NEXT: # %bb.1: # %if.then
291 ; P9BE-AIX-LABEL: test16:
292 ; P9BE-AIX: # %bb.0: # %entry
293 ; P9BE-AIX-NEXT: sldi 4, 4, 1
294 ; P9BE-AIX-NEXT: li 7, 16
295 ; P9BE-AIX-NEXT: add 6, 3, 4
296 ; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
297 ; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
298 ; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
299 ; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
300 ; P9BE-AIX-NEXT: lxv 0, 0(6)
301 ; P9BE-AIX-NEXT: li 6, 0
302 ; P9BE-AIX-NEXT: mtvsrwz 3, 6
303 ; P9BE-AIX-NEXT: xxperm 2, 3, 0
304 ; P9BE-AIX-NEXT: xxperm 1, 3, 0
305 ; P9BE-AIX-NEXT: vsplth 3, 3, 3
306 ; P9BE-AIX-NEXT: lxv 0, 0(3)
307 ; P9BE-AIX-NEXT: li 3, 0
308 ; P9BE-AIX-NEXT: xxmrghw 3, 3, 1
309 ; P9BE-AIX-NEXT: xxperm 2, 3, 0
310 ; P9BE-AIX-NEXT: xxspltw 3, 2, 1
311 ; P9BE-AIX-NEXT: vadduwm 2, 2, 3
312 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
313 ; P9BE-AIX-NEXT: cmpw 3, 5
314 ; P9BE-AIX-NEXT: bgelr+ 0
315 ; P9BE-AIX-NEXT: # %bb.1: # %if.then
317 ; P9BE-AIX32-LABEL: test16:
318 ; P9BE-AIX32: # %bb.0: # %entry
319 ; P9BE-AIX32-NEXT: slwi 4, 4, 1
320 ; P9BE-AIX32-NEXT: li 6, 0
321 ; P9BE-AIX32-NEXT: lhzux 4, 3, 4
322 ; P9BE-AIX32-NEXT: lhz 3, 16(3)
323 ; P9BE-AIX32-NEXT: sth 6, -64(1)
324 ; P9BE-AIX32-NEXT: lxv 2, -64(1)
325 ; P9BE-AIX32-NEXT: sth 4, -48(1)
326 ; P9BE-AIX32-NEXT: lxv 4, -48(1)
327 ; P9BE-AIX32-NEXT: sth 3, -32(1)
328 ; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0
329 ; P9BE-AIX32-NEXT: lxv 3, -32(1)
330 ; P9BE-AIX32-NEXT: vmrghh 4, 2, 4
331 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
332 ; P9BE-AIX32-NEXT: vmrghh 3, 2, 3
333 ; P9BE-AIX32-NEXT: vsplth 2, 2, 0
334 ; P9BE-AIX32-NEXT: xxmrghw 2, 2, 4
335 ; P9BE-AIX32-NEXT: xxperm 3, 2, 0
336 ; P9BE-AIX32-NEXT: xxspltw 2, 3, 1
337 ; P9BE-AIX32-NEXT: vadduwm 2, 3, 2
338 ; P9BE-AIX32-NEXT: stxv 2, -16(1)
339 ; P9BE-AIX32-NEXT: lwz 3, -16(1)
340 ; P9BE-AIX32-NEXT: cmpw 3, 5
341 ; P9BE-AIX32-NEXT: bgelr+ 0
342 ; P9BE-AIX32-NEXT: # %bb.1: # %if.then
344 %idxprom = sext i32 %delta to i64
345 %add14 = add nsw i32 %delta, 8
346 %idxprom15 = sext i32 %add14 to i64
349 for.body: ; preds = %entry
350 %arrayidx8 = getelementptr inbounds i16, ptr %sums, i64 %idxprom
351 %0 = load i16, ptr %arrayidx8, align 2
352 %arrayidx16 = getelementptr inbounds i16, ptr %sums, i64 %idxprom15
353 %1 = load i16, ptr %arrayidx16, align 2
354 %2 = insertelement <4 x i16> undef, i16 %0, i32 2
355 %3 = insertelement <4 x i16> %2, i16 %1, i32 3
356 %4 = zext <4 x i16> %3 to <4 x i32>
357 %5 = sub nsw <4 x i32> zeroinitializer, %4
358 %6 = sub nsw <4 x i32> zeroinitializer, %5
359 %7 = select <4 x i1> undef, <4 x i32> %6, <4 x i32> %5
360 %bin.rdx = add <4 x i32> %7, zeroinitializer
361 %rdx.shuf54 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
362 %bin.rdx55 = add <4 x i32> %bin.rdx, %rdx.shuf54
363 %8 = extractelement <4 x i32> %bin.rdx55, i32 0
364 %op.extra = add nuw i32 %8, 0
365 %cmp25 = icmp slt i32 %op.extra, %thresh
366 br i1 %cmp25, label %if.then, label %if.end
368 if.then: ; preds = %for.body
371 if.end: ; preds = %for.body
375 define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
377 ; P9LE: # %bb.0: # %entry
378 ; P9LE-NEXT: add 6, 3, 4
379 ; P9LE-NEXT: lxsibzx 2, 3, 4
381 ; P9LE-NEXT: mtvsrd 3, 3
383 ; P9LE-NEXT: lxsibzx 5, 6, 3
384 ; P9LE-NEXT: vspltb 4, 3, 7
385 ; P9LE-NEXT: addis 3, 2, .LCPI3_0@toc@ha
386 ; P9LE-NEXT: vmrghb 2, 3, 2
387 ; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l
388 ; P9LE-NEXT: vmrglh 2, 2, 4
389 ; P9LE-NEXT: lxv 0, 0(3)
391 ; P9LE-NEXT: vmrghb 3, 3, 5
392 ; P9LE-NEXT: xxmrglw 2, 2, 4
393 ; P9LE-NEXT: vmrglh 3, 3, 4
394 ; P9LE-NEXT: xxmrglw 3, 4, 3
395 ; P9LE-NEXT: xxperm 2, 3, 0
396 ; P9LE-NEXT: xxspltw 3, 2, 2
397 ; P9LE-NEXT: vadduwm 2, 2, 3
398 ; P9LE-NEXT: vextuwrx 3, 3, 2
399 ; P9LE-NEXT: cmpw 3, 5
400 ; P9LE-NEXT: bgelr+ 0
401 ; P9LE-NEXT: # %bb.1: # %if.then
404 ; P9BE: # %bb.0: # %entry
405 ; P9BE-NEXT: add 6, 3, 4
407 ; P9BE-NEXT: lxsibzx 3, 3, 4
408 ; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha
409 ; P9BE-NEXT: lxsibzx 0, 6, 7
410 ; P9BE-NEXT: addis 6, 2, .LCPI3_0@toc@ha
411 ; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l
412 ; P9BE-NEXT: addi 6, 6, .LCPI3_0@toc@l
413 ; P9BE-NEXT: lxv 1, 0(6)
415 ; P9BE-NEXT: mtvsrwz 2, 6
416 ; P9BE-NEXT: xxperm 0, 2, 1
417 ; P9BE-NEXT: xxperm 3, 2, 1
418 ; P9BE-NEXT: vspltb 2, 2, 7
419 ; P9BE-NEXT: vmrghh 3, 3, 2
420 ; P9BE-NEXT: xxspltw 1, 2, 0
421 ; P9BE-NEXT: xxmrghw 3, 3, 0
422 ; P9BE-NEXT: lxv 0, 0(3)
424 ; P9BE-NEXT: xxperm 3, 1, 0
425 ; P9BE-NEXT: xxspltw 2, 3, 1
426 ; P9BE-NEXT: vadduwm 2, 3, 2
427 ; P9BE-NEXT: vextuwlx 3, 3, 2
428 ; P9BE-NEXT: cmpw 3, 5
429 ; P9BE-NEXT: bgelr+ 0
430 ; P9BE-NEXT: # %bb.1: # %if.then
432 ; P9BE-AIX-LABEL: test8:
433 ; P9BE-AIX: # %bb.0: # %entry
434 ; P9BE-AIX-NEXT: add 6, 3, 4
435 ; P9BE-AIX-NEXT: li 7, 8
436 ; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
437 ; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
438 ; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
439 ; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
440 ; P9BE-AIX-NEXT: lxv 1, 0(6)
441 ; P9BE-AIX-NEXT: li 6, 0
442 ; P9BE-AIX-NEXT: mtvsrwz 2, 6
443 ; P9BE-AIX-NEXT: xxperm 0, 2, 1
444 ; P9BE-AIX-NEXT: xxperm 3, 2, 1
445 ; P9BE-AIX-NEXT: vspltb 2, 2, 7
446 ; P9BE-AIX-NEXT: vmrghh 3, 3, 2
447 ; P9BE-AIX-NEXT: xxspltw 1, 2, 0
448 ; P9BE-AIX-NEXT: xxmrghw 3, 3, 0
449 ; P9BE-AIX-NEXT: lxv 0, 0(3)
450 ; P9BE-AIX-NEXT: li 3, 0
451 ; P9BE-AIX-NEXT: xxperm 3, 1, 0
452 ; P9BE-AIX-NEXT: xxspltw 2, 3, 1
453 ; P9BE-AIX-NEXT: vadduwm 2, 3, 2
454 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
455 ; P9BE-AIX-NEXT: cmpw 3, 5
456 ; P9BE-AIX-NEXT: bgelr+ 0
457 ; P9BE-AIX-NEXT: # %bb.1: # %if.then
459 ; P9BE-AIX32-LABEL: test8:
460 ; P9BE-AIX32: # %bb.0: # %entry
461 ; P9BE-AIX32-NEXT: add 6, 3, 4
462 ; P9BE-AIX32-NEXT: li 7, 8
463 ; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
464 ; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
465 ; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
466 ; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
467 ; P9BE-AIX32-NEXT: lxv 1, 0(6)
468 ; P9BE-AIX32-NEXT: li 6, 0
469 ; P9BE-AIX32-NEXT: mtvsrwz 2, 6
470 ; P9BE-AIX32-NEXT: xxperm 0, 2, 1
471 ; P9BE-AIX32-NEXT: xxperm 3, 2, 1
472 ; P9BE-AIX32-NEXT: vspltb 2, 2, 7
473 ; P9BE-AIX32-NEXT: vmrghh 3, 3, 2
474 ; P9BE-AIX32-NEXT: xxspltw 1, 2, 0
475 ; P9BE-AIX32-NEXT: xxmrghw 3, 3, 0
476 ; P9BE-AIX32-NEXT: lxv 0, 0(3)
477 ; P9BE-AIX32-NEXT: xxperm 3, 1, 0
478 ; P9BE-AIX32-NEXT: xxspltw 2, 3, 1
479 ; P9BE-AIX32-NEXT: vadduwm 2, 3, 2
480 ; P9BE-AIX32-NEXT: stxv 2, -16(1)
481 ; P9BE-AIX32-NEXT: lwz 3, -16(1)
482 ; P9BE-AIX32-NEXT: cmpw 3, 5
483 ; P9BE-AIX32-NEXT: bgelr+ 0
484 ; P9BE-AIX32-NEXT: # %bb.1: # %if.then
486 %idxprom = sext i32 %delta to i64
487 %add14 = add nsw i32 %delta, 8
488 %idxprom15 = sext i32 %add14 to i64
491 for.body: ; preds = %entry
492 %arrayidx8 = getelementptr inbounds i8, ptr %sums, i64 %idxprom
493 %0 = load i8, ptr %arrayidx8, align 2
494 %arrayidx16 = getelementptr inbounds i8, ptr %sums, i64 %idxprom15
495 %1 = load i8, ptr %arrayidx16, align 2
496 %2 = insertelement <4 x i8> undef, i8 %0, i32 2
497 %3 = insertelement <4 x i8> %2, i8 %1, i32 3
498 %4 = zext <4 x i8> %3 to <4 x i32>
499 %5 = sub nsw <4 x i32> zeroinitializer, %4
500 %6 = sub nsw <4 x i32> zeroinitializer, %5
501 %7 = select <4 x i1> undef, <4 x i32> %6, <4 x i32> %5
502 %bin.rdx = add <4 x i32> %7, zeroinitializer
503 %rdx.shuf54 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
504 %bin.rdx55 = add <4 x i32> %bin.rdx, %rdx.shuf54
505 %8 = extractelement <4 x i32> %bin.rdx55, i32 0
506 %op.extra = add nuw i32 %8, 0
507 %cmp25 = icmp slt i32 %op.extra, %thresh
508 br i1 %cmp25, label %if.then, label %if.end
510 if.then: ; preds = %for.body
513 if.end: ; preds = %for.body