1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
7 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8 ; SSE-LABEL: test_v2f64_sext:
10 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
11 ; SSE-NEXT: movmskpd %xmm1, %eax
13 ; SSE-NEXT: sbbq %rax, %rax
16 ; AVX-LABEL: test_v2f64_sext:
18 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
19 ; AVX-NEXT: vmovmskpd %xmm0, %eax
21 ; AVX-NEXT: sbbq %rax, %rax
24 ; AVX512-LABEL: test_v2f64_sext:
26 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
27 ; AVX512-NEXT: vmovmskpd %xmm0, %eax
28 ; AVX512-NEXT: negl %eax
29 ; AVX512-NEXT: sbbq %rax, %rax
31 %c = fcmp ogt <2 x double> %a0, %a1
32 %s = sext <2 x i1> %c to <2 x i64>
33 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
34 %2 = or <2 x i64> %s, %1
35 %3 = extractelement <2 x i64> %2, i32 0
39 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
40 ; SSE-LABEL: test_v4f64_sext:
42 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
43 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
44 ; SSE-NEXT: orpd %xmm3, %xmm2
45 ; SSE-NEXT: movmskpd %xmm2, %eax
47 ; SSE-NEXT: sbbq %rax, %rax
50 ; AVX-LABEL: test_v4f64_sext:
52 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
53 ; AVX-NEXT: vmovmskpd %ymm0, %eax
55 ; AVX-NEXT: sbbq %rax, %rax
56 ; AVX-NEXT: vzeroupper
59 ; AVX512-LABEL: test_v4f64_sext:
61 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
62 ; AVX512-NEXT: vmovmskpd %ymm0, %eax
63 ; AVX512-NEXT: negl %eax
64 ; AVX512-NEXT: sbbq %rax, %rax
65 ; AVX512-NEXT: vzeroupper
67 %c = fcmp ogt <4 x double> %a0, %a1
68 %s = sext <4 x i1> %c to <4 x i64>
69 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
70 %2 = or <4 x i64> %s, %1
71 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
72 %4 = or <4 x i64> %2, %3
73 %5 = extractelement <4 x i64> %4, i64 0
77 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
78 ; SSE-LABEL: test_v4f64_legal_sext:
80 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
81 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
82 ; SSE-NEXT: packssdw %xmm3, %xmm2
83 ; SSE-NEXT: movmskps %xmm2, %eax
85 ; SSE-NEXT: sbbq %rax, %rax
88 ; AVX-LABEL: test_v4f64_legal_sext:
90 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
91 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
92 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
93 ; AVX-NEXT: vmovmskps %xmm0, %eax
95 ; AVX-NEXT: sbbq %rax, %rax
96 ; AVX-NEXT: vzeroupper
99 ; AVX512-LABEL: test_v4f64_legal_sext:
101 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
102 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
103 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
104 ; AVX512-NEXT: vmovmskps %xmm0, %eax
105 ; AVX512-NEXT: negl %eax
106 ; AVX512-NEXT: sbbq %rax, %rax
107 ; AVX512-NEXT: vzeroupper
109 %c = fcmp ogt <4 x double> %a0, %a1
110 %s = sext <4 x i1> %c to <4 x i32>
111 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
112 %2 = or <4 x i32> %s, %1
113 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
114 %4 = or <4 x i32> %2, %3
115 %5 = extractelement <4 x i32> %4, i64 0
116 %6 = sext i32 %5 to i64
120 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
121 ; SSE-LABEL: test_v4f32_sext:
123 ; SSE-NEXT: cmpltps %xmm0, %xmm1
124 ; SSE-NEXT: movmskps %xmm1, %eax
125 ; SSE-NEXT: negl %eax
126 ; SSE-NEXT: sbbl %eax, %eax
129 ; AVX-LABEL: test_v4f32_sext:
131 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
132 ; AVX-NEXT: vmovmskps %xmm0, %eax
133 ; AVX-NEXT: negl %eax
134 ; AVX-NEXT: sbbl %eax, %eax
137 ; AVX512-LABEL: test_v4f32_sext:
139 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
140 ; AVX512-NEXT: vmovmskps %xmm0, %eax
141 ; AVX512-NEXT: negl %eax
142 ; AVX512-NEXT: sbbl %eax, %eax
144 %c = fcmp ogt <4 x float> %a0, %a1
145 %s = sext <4 x i1> %c to <4 x i32>
146 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
147 %2 = or <4 x i32> %s, %1
148 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
149 %4 = or <4 x i32> %2, %3
150 %5 = extractelement <4 x i32> %4, i32 0
154 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
155 ; SSE-LABEL: test_v8f32_sext:
157 ; SSE-NEXT: cmpltps %xmm1, %xmm3
158 ; SSE-NEXT: cmpltps %xmm0, %xmm2
159 ; SSE-NEXT: orps %xmm3, %xmm2
160 ; SSE-NEXT: movmskps %xmm2, %eax
161 ; SSE-NEXT: negl %eax
162 ; SSE-NEXT: sbbl %eax, %eax
165 ; AVX-LABEL: test_v8f32_sext:
167 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
168 ; AVX-NEXT: vmovmskps %ymm0, %eax
169 ; AVX-NEXT: negl %eax
170 ; AVX-NEXT: sbbl %eax, %eax
171 ; AVX-NEXT: vzeroupper
174 ; AVX512-LABEL: test_v8f32_sext:
176 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
177 ; AVX512-NEXT: vmovmskps %ymm0, %eax
178 ; AVX512-NEXT: negl %eax
179 ; AVX512-NEXT: sbbl %eax, %eax
180 ; AVX512-NEXT: vzeroupper
182 %c = fcmp ogt <8 x float> %a0, %a1
183 %s = sext <8 x i1> %c to <8 x i32>
184 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
185 %2 = or <8 x i32> %s, %1
186 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
187 %4 = or <8 x i32> %2, %3
188 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
189 %6 = or <8 x i32> %4, %5
190 %7 = extractelement <8 x i32> %6, i32 0
194 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
195 ; SSE-LABEL: test_v8f32_legal_sext:
197 ; SSE-NEXT: cmpltps %xmm1, %xmm3
198 ; SSE-NEXT: cmpltps %xmm0, %xmm2
199 ; SSE-NEXT: packssdw %xmm3, %xmm2
200 ; SSE-NEXT: pmovmskb %xmm2, %eax
201 ; SSE-NEXT: negl %eax
202 ; SSE-NEXT: sbbl %eax, %eax
205 ; AVX-LABEL: test_v8f32_legal_sext:
207 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
208 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
209 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
210 ; AVX-NEXT: vpmovmskb %xmm0, %eax
211 ; AVX-NEXT: negl %eax
212 ; AVX-NEXT: sbbl %eax, %eax
213 ; AVX-NEXT: vzeroupper
216 ; AVX512-LABEL: test_v8f32_legal_sext:
218 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
219 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
220 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
221 ; AVX512-NEXT: negl %eax
222 ; AVX512-NEXT: sbbl %eax, %eax
223 ; AVX512-NEXT: vzeroupper
225 %c = fcmp ogt <8 x float> %a0, %a1
226 %s = sext <8 x i1> %c to <8 x i16>
227 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
228 %2 = or <8 x i16> %s, %1
229 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
230 %4 = or <8 x i16> %2, %3
231 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
232 %6 = or <8 x i16> %4, %5
233 %7 = extractelement <8 x i16> %6, i32 0
234 %8 = sext i16 %7 to i32
238 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
239 ; SSE-LABEL: test_v2i64_sext:
241 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
242 ; SSE-NEXT: movmskpd %xmm0, %eax
243 ; SSE-NEXT: negl %eax
244 ; SSE-NEXT: sbbq %rax, %rax
247 ; AVX-LABEL: test_v2i64_sext:
249 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
250 ; AVX-NEXT: vmovmskpd %xmm0, %eax
251 ; AVX-NEXT: negl %eax
252 ; AVX-NEXT: sbbq %rax, %rax
255 ; AVX512-LABEL: test_v2i64_sext:
257 ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
258 ; AVX512-NEXT: vmovmskpd %xmm0, %eax
259 ; AVX512-NEXT: negl %eax
260 ; AVX512-NEXT: sbbq %rax, %rax
262 %c = icmp sgt <2 x i64> %a0, %a1
263 %s = sext <2 x i1> %c to <2 x i64>
264 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
265 %2 = or <2 x i64> %s, %1
266 %3 = extractelement <2 x i64> %2, i32 0
270 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
271 ; SSE-LABEL: test_v4i64_sext:
273 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
274 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
275 ; SSE-NEXT: por %xmm1, %xmm0
276 ; SSE-NEXT: movmskpd %xmm0, %eax
277 ; SSE-NEXT: negl %eax
278 ; SSE-NEXT: sbbq %rax, %rax
281 ; AVX1-LABEL: test_v4i64_sext:
283 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
284 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
285 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
286 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
287 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
288 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
289 ; AVX1-NEXT: negl %eax
290 ; AVX1-NEXT: sbbq %rax, %rax
291 ; AVX1-NEXT: vzeroupper
294 ; AVX2-LABEL: test_v4i64_sext:
296 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
297 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
298 ; AVX2-NEXT: negl %eax
299 ; AVX2-NEXT: sbbq %rax, %rax
300 ; AVX2-NEXT: vzeroupper
303 ; AVX512-LABEL: test_v4i64_sext:
305 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
306 ; AVX512-NEXT: vmovmskpd %ymm0, %eax
307 ; AVX512-NEXT: negl %eax
308 ; AVX512-NEXT: sbbq %rax, %rax
309 ; AVX512-NEXT: vzeroupper
311 %c = icmp sgt <4 x i64> %a0, %a1
312 %s = sext <4 x i1> %c to <4 x i64>
313 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
314 %2 = or <4 x i64> %s, %1
315 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
316 %4 = or <4 x i64> %2, %3
317 %5 = extractelement <4 x i64> %4, i64 0
321 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
322 ; SSE-LABEL: test_v4i64_legal_sext:
324 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
325 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
326 ; SSE-NEXT: packssdw %xmm1, %xmm0
327 ; SSE-NEXT: movmskps %xmm0, %eax
328 ; SSE-NEXT: negl %eax
329 ; SSE-NEXT: sbbq %rax, %rax
332 ; AVX1-LABEL: test_v4i64_legal_sext:
334 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
335 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
336 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
337 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
338 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
339 ; AVX1-NEXT: vmovmskps %xmm0, %eax
340 ; AVX1-NEXT: negl %eax
341 ; AVX1-NEXT: sbbq %rax, %rax
342 ; AVX1-NEXT: vzeroupper
345 ; AVX2-LABEL: test_v4i64_legal_sext:
347 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
348 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
349 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
350 ; AVX2-NEXT: vmovmskps %xmm0, %eax
351 ; AVX2-NEXT: negl %eax
352 ; AVX2-NEXT: sbbq %rax, %rax
353 ; AVX2-NEXT: vzeroupper
356 ; AVX512-LABEL: test_v4i64_legal_sext:
358 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
359 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
360 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
361 ; AVX512-NEXT: vmovmskps %xmm0, %eax
362 ; AVX512-NEXT: negl %eax
363 ; AVX512-NEXT: sbbq %rax, %rax
364 ; AVX512-NEXT: vzeroupper
366 %c = icmp sgt <4 x i64> %a0, %a1
367 %s = sext <4 x i1> %c to <4 x i32>
368 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
369 %2 = or <4 x i32> %s, %1
370 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
371 %4 = or <4 x i32> %2, %3
372 %5 = extractelement <4 x i32> %4, i64 0
373 %6 = sext i32 %5 to i64
377 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
378 ; SSE-LABEL: test_v4i32_sext:
380 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
381 ; SSE-NEXT: movmskps %xmm0, %eax
382 ; SSE-NEXT: negl %eax
383 ; SSE-NEXT: sbbl %eax, %eax
386 ; AVX-LABEL: test_v4i32_sext:
388 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
389 ; AVX-NEXT: vmovmskps %xmm0, %eax
390 ; AVX-NEXT: negl %eax
391 ; AVX-NEXT: sbbl %eax, %eax
394 ; AVX512-LABEL: test_v4i32_sext:
396 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
397 ; AVX512-NEXT: vmovmskps %xmm0, %eax
398 ; AVX512-NEXT: negl %eax
399 ; AVX512-NEXT: sbbl %eax, %eax
401 %c = icmp sgt <4 x i32> %a0, %a1
402 %s = sext <4 x i1> %c to <4 x i32>
403 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
404 %2 = or <4 x i32> %s, %1
405 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
406 %4 = or <4 x i32> %2, %3
407 %5 = extractelement <4 x i32> %4, i32 0
411 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
412 ; SSE-LABEL: test_v8i32_sext:
414 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
415 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
416 ; SSE-NEXT: por %xmm1, %xmm0
417 ; SSE-NEXT: movmskps %xmm0, %eax
418 ; SSE-NEXT: negl %eax
419 ; SSE-NEXT: sbbl %eax, %eax
422 ; AVX1-LABEL: test_v8i32_sext:
424 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
425 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
426 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
427 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
428 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
429 ; AVX1-NEXT: vmovmskps %ymm0, %eax
430 ; AVX1-NEXT: negl %eax
431 ; AVX1-NEXT: sbbl %eax, %eax
432 ; AVX1-NEXT: vzeroupper
435 ; AVX2-LABEL: test_v8i32_sext:
437 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
438 ; AVX2-NEXT: vmovmskps %ymm0, %eax
439 ; AVX2-NEXT: negl %eax
440 ; AVX2-NEXT: sbbl %eax, %eax
441 ; AVX2-NEXT: vzeroupper
444 ; AVX512-LABEL: test_v8i32_sext:
446 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
447 ; AVX512-NEXT: vmovmskps %ymm0, %eax
448 ; AVX512-NEXT: negl %eax
449 ; AVX512-NEXT: sbbl %eax, %eax
450 ; AVX512-NEXT: vzeroupper
452 %c = icmp sgt <8 x i32> %a0, %a1
453 %s = sext <8 x i1> %c to <8 x i32>
454 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
455 %2 = or <8 x i32> %s, %1
456 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
457 %4 = or <8 x i32> %2, %3
458 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
459 %6 = or <8 x i32> %4, %5
460 %7 = extractelement <8 x i32> %6, i32 0
464 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
465 ; SSE-LABEL: test_v8i32_legal_sext:
467 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
468 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
469 ; SSE-NEXT: packssdw %xmm1, %xmm0
470 ; SSE-NEXT: pmovmskb %xmm0, %eax
471 ; SSE-NEXT: negl %eax
472 ; SSE-NEXT: sbbl %eax, %eax
475 ; AVX1-LABEL: test_v8i32_legal_sext:
477 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
478 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
479 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
480 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
481 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
482 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
483 ; AVX1-NEXT: negl %eax
484 ; AVX1-NEXT: sbbl %eax, %eax
485 ; AVX1-NEXT: vzeroupper
488 ; AVX2-LABEL: test_v8i32_legal_sext:
490 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
491 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
492 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
493 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
494 ; AVX2-NEXT: negl %eax
495 ; AVX2-NEXT: sbbl %eax, %eax
496 ; AVX2-NEXT: vzeroupper
499 ; AVX512-LABEL: test_v8i32_legal_sext:
501 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
502 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
503 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
504 ; AVX512-NEXT: negl %eax
505 ; AVX512-NEXT: sbbl %eax, %eax
506 ; AVX512-NEXT: vzeroupper
508 %c = icmp sgt <8 x i32> %a0, %a1
509 %s = sext <8 x i1> %c to <8 x i16>
510 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
511 %2 = or <8 x i16> %s, %1
512 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
513 %4 = or <8 x i16> %2, %3
514 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
515 %6 = or <8 x i16> %4, %5
516 %7 = extractelement <8 x i16> %6, i32 0
517 %8 = sext i16 %7 to i32
521 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
522 ; SSE-LABEL: test_v8i16_sext:
524 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
525 ; SSE-NEXT: pmovmskb %xmm0, %eax
526 ; SSE-NEXT: negl %eax
527 ; SSE-NEXT: sbbl %eax, %eax
528 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
531 ; AVX-LABEL: test_v8i16_sext:
533 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
534 ; AVX-NEXT: vpmovmskb %xmm0, %eax
535 ; AVX-NEXT: negl %eax
536 ; AVX-NEXT: sbbl %eax, %eax
537 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
540 ; AVX512-LABEL: test_v8i16_sext:
542 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
543 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
544 ; AVX512-NEXT: negl %eax
545 ; AVX512-NEXT: sbbl %eax, %eax
546 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
548 %c = icmp sgt <8 x i16> %a0, %a1
549 %s = sext <8 x i1> %c to <8 x i16>
550 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
551 %2 = or <8 x i16> %s, %1
552 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
553 %4 = or <8 x i16> %2, %3
554 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
555 %6 = or <8 x i16> %4, %5
556 %7 = extractelement <8 x i16> %6, i32 0
560 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
561 ; SSE-LABEL: test_v16i16_sext:
563 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
564 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
565 ; SSE-NEXT: por %xmm1, %xmm0
566 ; SSE-NEXT: pmovmskb %xmm0, %eax
567 ; SSE-NEXT: negl %eax
568 ; SSE-NEXT: sbbl %eax, %eax
569 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
572 ; AVX1-LABEL: test_v16i16_sext:
574 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
575 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
576 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
577 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
578 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
579 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
580 ; AVX1-NEXT: negl %eax
581 ; AVX1-NEXT: sbbl %eax, %eax
582 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
583 ; AVX1-NEXT: vzeroupper
586 ; AVX2-LABEL: test_v16i16_sext:
588 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
589 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
590 ; AVX2-NEXT: negl %eax
591 ; AVX2-NEXT: sbbl %eax, %eax
592 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
593 ; AVX2-NEXT: vzeroupper
596 ; AVX512-LABEL: test_v16i16_sext:
598 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
599 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
600 ; AVX512-NEXT: negl %eax
601 ; AVX512-NEXT: sbbl %eax, %eax
602 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
603 ; AVX512-NEXT: vzeroupper
605 %c = icmp sgt <16 x i16> %a0, %a1
606 %s = sext <16 x i1> %c to <16 x i16>
607 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
608 %2 = or <16 x i16> %s, %1
609 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
610 %4 = or <16 x i16> %2, %3
611 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
612 %6 = or <16 x i16> %4, %5
613 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
614 %8 = or <16 x i16> %6, %7
615 %9 = extractelement <16 x i16> %8, i32 0
619 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
620 ; SSE-LABEL: test_v16i16_legal_sext:
622 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
623 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
624 ; SSE-NEXT: packsswb %xmm1, %xmm0
625 ; SSE-NEXT: pmovmskb %xmm0, %eax
626 ; SSE-NEXT: negl %eax
627 ; SSE-NEXT: sbbl %eax, %eax
628 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
631 ; AVX1-LABEL: test_v16i16_legal_sext:
633 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
634 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
635 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
636 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
637 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
638 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
639 ; AVX1-NEXT: negl %eax
640 ; AVX1-NEXT: sbbl %eax, %eax
641 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
642 ; AVX1-NEXT: vzeroupper
645 ; AVX2-LABEL: test_v16i16_legal_sext:
647 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
648 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
649 ; AVX2-NEXT: negl %eax
650 ; AVX2-NEXT: sbbl %eax, %eax
651 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
652 ; AVX2-NEXT: vzeroupper
655 ; AVX512-LABEL: test_v16i16_legal_sext:
657 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
658 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
659 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
660 ; AVX512-NEXT: negl %eax
661 ; AVX512-NEXT: sbbl %eax, %eax
662 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
663 ; AVX512-NEXT: vzeroupper
665 %c = icmp sgt <16 x i16> %a0, %a1
666 %s = sext <16 x i1> %c to <16 x i8>
667 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
668 %2 = or <16 x i8> %s, %1
669 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
670 %4 = or <16 x i8> %2, %3
671 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
672 %6 = or <16 x i8> %4, %5
673 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
674 %8 = or <16 x i8> %6, %7
675 %9 = extractelement <16 x i8> %8, i32 0
676 %10 = sext i8 %9 to i16
680 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
681 ; SSE-LABEL: test_v16i8_sext:
683 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
684 ; SSE-NEXT: pmovmskb %xmm0, %eax
685 ; SSE-NEXT: negl %eax
686 ; SSE-NEXT: sbbl %eax, %eax
687 ; SSE-NEXT: # kill: def $al killed $al killed $eax
690 ; AVX-LABEL: test_v16i8_sext:
692 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
693 ; AVX-NEXT: vpmovmskb %xmm0, %eax
694 ; AVX-NEXT: negl %eax
695 ; AVX-NEXT: sbbl %eax, %eax
696 ; AVX-NEXT: # kill: def $al killed $al killed $eax
699 ; AVX512-LABEL: test_v16i8_sext:
701 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
702 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
703 ; AVX512-NEXT: negl %eax
704 ; AVX512-NEXT: sbbl %eax, %eax
705 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
707 %c = icmp sgt <16 x i8> %a0, %a1
708 %s = sext <16 x i1> %c to <16 x i8>
709 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
710 %2 = or <16 x i8> %s, %1
711 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
712 %4 = or <16 x i8> %2, %3
713 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
714 %6 = or <16 x i8> %4, %5
715 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
716 %8 = or <16 x i8> %6, %7
717 %9 = extractelement <16 x i8> %8, i32 0
721 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
722 ; SSE-LABEL: test_v32i8_sext:
724 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
725 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
726 ; SSE-NEXT: por %xmm1, %xmm0
727 ; SSE-NEXT: pmovmskb %xmm0, %eax
728 ; SSE-NEXT: negl %eax
729 ; SSE-NEXT: sbbl %eax, %eax
730 ; SSE-NEXT: # kill: def $al killed $al killed $eax
733 ; AVX1-LABEL: test_v32i8_sext:
735 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
736 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
737 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
738 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
739 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
740 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
741 ; AVX1-NEXT: negl %eax
742 ; AVX1-NEXT: sbbl %eax, %eax
743 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
744 ; AVX1-NEXT: vzeroupper
747 ; AVX2-LABEL: test_v32i8_sext:
749 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
750 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
751 ; AVX2-NEXT: negl %eax
752 ; AVX2-NEXT: sbbl %eax, %eax
753 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
754 ; AVX2-NEXT: vzeroupper
757 ; AVX512-LABEL: test_v32i8_sext:
759 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
760 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
761 ; AVX512-NEXT: negl %eax
762 ; AVX512-NEXT: sbbl %eax, %eax
763 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
764 ; AVX512-NEXT: vzeroupper
766 %c = icmp sgt <32 x i8> %a0, %a1
767 %s = sext <32 x i1> %c to <32 x i8>
768 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
769 %2 = or <32 x i8> %s, %1
770 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
771 %4 = or <32 x i8> %2, %3
772 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
773 %6 = or <32 x i8> %4, %5
774 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
775 %8 = or <32 x i8> %6, %7
776 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
777 %10 = or <32 x i8> %8, %9
778 %11 = extractelement <32 x i8> %10, i32 0
782 define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
783 ; SSE-LABEL: bool_reduction_v2f64:
785 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
786 ; SSE-NEXT: movmskpd %xmm1, %eax
787 ; SSE-NEXT: testl %eax, %eax
788 ; SSE-NEXT: setne %al
791 ; AVX-LABEL: bool_reduction_v2f64:
793 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
794 ; AVX-NEXT: vmovmskpd %xmm0, %eax
795 ; AVX-NEXT: testl %eax, %eax
796 ; AVX-NEXT: setne %al
799 ; AVX512-LABEL: bool_reduction_v2f64:
801 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
802 ; AVX512-NEXT: kmovd %k0, %eax
803 ; AVX512-NEXT: testb %al, %al
804 ; AVX512-NEXT: setne %al
806 %a = fcmp ogt <2 x double> %x, %y
807 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
808 %c = or <2 x i1> %a, %b
809 %d = extractelement <2 x i1> %c, i32 0
813 define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
814 ; SSE-LABEL: bool_reduction_v4f32:
816 ; SSE-NEXT: cmpeqps %xmm1, %xmm0
817 ; SSE-NEXT: movmskps %xmm0, %eax
818 ; SSE-NEXT: testl %eax, %eax
819 ; SSE-NEXT: setne %al
822 ; AVX-LABEL: bool_reduction_v4f32:
824 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
825 ; AVX-NEXT: vmovmskps %xmm0, %eax
826 ; AVX-NEXT: testl %eax, %eax
827 ; AVX-NEXT: setne %al
830 ; AVX512-LABEL: bool_reduction_v4f32:
832 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
833 ; AVX512-NEXT: kmovd %k0, %eax
834 ; AVX512-NEXT: testb %al, %al
835 ; AVX512-NEXT: setne %al
837 %a = fcmp oeq <4 x float> %x, %y
838 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
839 %b = or <4 x i1> %s1, %a
840 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
841 %c = or <4 x i1> %s2, %b
842 %d = extractelement <4 x i1> %c, i32 0
846 define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
847 ; SSE-LABEL: bool_reduction_v4f64:
849 ; SSE-NEXT: cmplepd %xmm1, %xmm3
850 ; SSE-NEXT: cmplepd %xmm0, %xmm2
851 ; SSE-NEXT: packssdw %xmm3, %xmm2
852 ; SSE-NEXT: movmskps %xmm2, %eax
853 ; SSE-NEXT: testl %eax, %eax
854 ; SSE-NEXT: setne %al
857 ; AVX-LABEL: bool_reduction_v4f64:
859 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
860 ; AVX-NEXT: vmovmskpd %ymm0, %eax
861 ; AVX-NEXT: testl %eax, %eax
862 ; AVX-NEXT: setne %al
863 ; AVX-NEXT: vzeroupper
866 ; AVX512-LABEL: bool_reduction_v4f64:
868 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
869 ; AVX512-NEXT: kmovd %k0, %eax
870 ; AVX512-NEXT: testb %al, %al
871 ; AVX512-NEXT: setne %al
872 ; AVX512-NEXT: vzeroupper
874 %a = fcmp oge <4 x double> %x, %y
875 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
876 %b = or <4 x i1> %s1, %a
877 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
878 %c = or <4 x i1> %s2, %b
879 %d = extractelement <4 x i1> %c, i32 0
883 define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
884 ; SSE-LABEL: bool_reduction_v8f32:
886 ; SSE-NEXT: cmpneqps %xmm3, %xmm1
887 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
888 ; SSE-NEXT: packssdw %xmm1, %xmm0
889 ; SSE-NEXT: pmovmskb %xmm0, %eax
890 ; SSE-NEXT: testl %eax, %eax
891 ; SSE-NEXT: setne %al
894 ; AVX-LABEL: bool_reduction_v8f32:
896 ; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
897 ; AVX-NEXT: vmovmskps %ymm0, %eax
898 ; AVX-NEXT: testl %eax, %eax
899 ; AVX-NEXT: setne %al
900 ; AVX-NEXT: vzeroupper
903 ; AVX512-LABEL: bool_reduction_v8f32:
905 ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0
906 ; AVX512-NEXT: kmovd %k0, %eax
907 ; AVX512-NEXT: testb %al, %al
908 ; AVX512-NEXT: setne %al
909 ; AVX512-NEXT: vzeroupper
911 %a = fcmp une <8 x float> %x, %y
912 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
913 %b = or <8 x i1> %s1, %a
914 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
915 %c = or <8 x i1> %s2, %b
916 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
917 %d = or <8 x i1> %s3, %c
918 %e = extractelement <8 x i1> %d, i32 0
922 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
923 ; SSE-LABEL: bool_reduction_v2i64:
925 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
926 ; SSE-NEXT: pxor %xmm2, %xmm1
927 ; SSE-NEXT: pxor %xmm2, %xmm0
928 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
929 ; SSE-NEXT: movmskpd %xmm0, %eax
930 ; SSE-NEXT: testl %eax, %eax
931 ; SSE-NEXT: setne %al
934 ; AVX-LABEL: bool_reduction_v2i64:
936 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
937 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
938 ; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
939 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
940 ; AVX-NEXT: vmovmskpd %xmm0, %eax
941 ; AVX-NEXT: testl %eax, %eax
942 ; AVX-NEXT: setne %al
945 ; AVX512-LABEL: bool_reduction_v2i64:
947 ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
948 ; AVX512-NEXT: kmovd %k0, %eax
949 ; AVX512-NEXT: testb %al, %al
950 ; AVX512-NEXT: setne %al
952 %a = icmp ugt <2 x i64> %x, %y
953 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
954 %c = or <2 x i1> %a, %b
955 %d = extractelement <2 x i1> %c, i32 0
959 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
960 ; SSE-LABEL: bool_reduction_v4i32:
962 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
963 ; SSE-NEXT: movmskps %xmm0, %eax
964 ; SSE-NEXT: cmpl $15, %eax
965 ; SSE-NEXT: setne %al
968 ; AVX-LABEL: bool_reduction_v4i32:
970 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
971 ; AVX-NEXT: vmovmskps %xmm0, %eax
972 ; AVX-NEXT: cmpl $15, %eax
973 ; AVX-NEXT: setne %al
976 ; AVX512-LABEL: bool_reduction_v4i32:
978 ; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
979 ; AVX512-NEXT: kmovd %k0, %eax
980 ; AVX512-NEXT: testb %al, %al
981 ; AVX512-NEXT: setne %al
983 %a = icmp ne <4 x i32> %x, %y
984 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
985 %b = or <4 x i1> %s1, %a
986 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
987 %c = or <4 x i1> %s2, %b
988 %d = extractelement <4 x i1> %c, i32 0
992 define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
993 ; SSE-LABEL: bool_reduction_v8i16:
995 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
996 ; SSE-NEXT: pmovmskb %xmm1, %eax
997 ; SSE-NEXT: testl %eax, %eax
998 ; SSE-NEXT: setne %al
1001 ; AVX-LABEL: bool_reduction_v8i16:
1003 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1004 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1005 ; AVX-NEXT: testl %eax, %eax
1006 ; AVX-NEXT: setne %al
1009 ; AVX512-LABEL: bool_reduction_v8i16:
1011 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0
1012 ; AVX512-NEXT: kmovd %k0, %eax
1013 ; AVX512-NEXT: testb %al, %al
1014 ; AVX512-NEXT: setne %al
1016 %a = icmp slt <8 x i16> %x, %y
1017 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1018 %b = or <8 x i1> %s1, %a
1019 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1020 %c = or <8 x i1> %s2, %b
1021 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1022 %d = or <8 x i1> %s3, %c
1023 %e = extractelement <8 x i1> %d, i32 0
1027 define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1028 ; SSE-LABEL: bool_reduction_v16i8:
1030 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
1031 ; SSE-NEXT: pmovmskb %xmm0, %eax
1032 ; SSE-NEXT: testl %eax, %eax
1033 ; SSE-NEXT: setne %al
1036 ; AVX-LABEL: bool_reduction_v16i8:
1038 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
1039 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1040 ; AVX-NEXT: testl %eax, %eax
1041 ; AVX-NEXT: setne %al
1044 ; AVX512-LABEL: bool_reduction_v16i8:
1046 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
1047 ; AVX512-NEXT: kortestw %k0, %k0
1048 ; AVX512-NEXT: setne %al
1050 %a = icmp sgt <16 x i8> %x, %y
1051 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1052 %b = or <16 x i1> %s1, %a
1053 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1054 %c = or <16 x i1> %s2, %b
1055 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1056 %d = or <16 x i1> %s3, %c
1057 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1058 %e = or <16 x i1> %s4, %d
1059 %f = extractelement <16 x i1> %e, i32 0
1063 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1064 ; SSE-LABEL: bool_reduction_v4i64:
1066 ; SSE-NEXT: pcmpgtq %xmm1, %xmm3
1067 ; SSE-NEXT: pcmpgtq %xmm0, %xmm2
1068 ; SSE-NEXT: packssdw %xmm3, %xmm2
1069 ; SSE-NEXT: movmskps %xmm2, %eax
1070 ; SSE-NEXT: testl %eax, %eax
1071 ; SSE-NEXT: setne %al
1074 ; AVX1-LABEL: bool_reduction_v4i64:
1076 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1077 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1078 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1079 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1080 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1081 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1082 ; AVX1-NEXT: testl %eax, %eax
1083 ; AVX1-NEXT: setne %al
1084 ; AVX1-NEXT: vzeroupper
1087 ; AVX2-LABEL: bool_reduction_v4i64:
1089 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
1090 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1091 ; AVX2-NEXT: testl %eax, %eax
1092 ; AVX2-NEXT: setne %al
1093 ; AVX2-NEXT: vzeroupper
1096 ; AVX512-LABEL: bool_reduction_v4i64:
1098 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
1099 ; AVX512-NEXT: kmovd %k0, %eax
1100 ; AVX512-NEXT: testb %al, %al
1101 ; AVX512-NEXT: setne %al
1102 ; AVX512-NEXT: vzeroupper
1104 %a = icmp slt <4 x i64> %x, %y
1105 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1106 %b = or <4 x i1> %s1, %a
1107 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1108 %c = or <4 x i1> %s2, %b
1109 %d = extractelement <4 x i1> %c, i32 0
1113 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1114 ; SSE-LABEL: bool_reduction_v8i32:
1116 ; SSE-NEXT: pminud %xmm1, %xmm3
1117 ; SSE-NEXT: pcmpeqd %xmm1, %xmm3
1118 ; SSE-NEXT: pminud %xmm0, %xmm2
1119 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2
1120 ; SSE-NEXT: packssdw %xmm3, %xmm2
1121 ; SSE-NEXT: pmovmskb %xmm2, %eax
1122 ; SSE-NEXT: testl %eax, %eax
1123 ; SSE-NEXT: setne %al
1126 ; AVX1-LABEL: bool_reduction_v8i32:
1128 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1130 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1131 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
1132 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
1133 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1134 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1135 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1136 ; AVX1-NEXT: testl %eax, %eax
1137 ; AVX1-NEXT: setne %al
1138 ; AVX1-NEXT: vzeroupper
1141 ; AVX2-LABEL: bool_reduction_v8i32:
1143 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
1144 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1145 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1146 ; AVX2-NEXT: testl %eax, %eax
1147 ; AVX2-NEXT: setne %al
1148 ; AVX2-NEXT: vzeroupper
1151 ; AVX512-LABEL: bool_reduction_v8i32:
1153 ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0
1154 ; AVX512-NEXT: kmovd %k0, %eax
1155 ; AVX512-NEXT: testb %al, %al
1156 ; AVX512-NEXT: setne %al
1157 ; AVX512-NEXT: vzeroupper
1159 %a = icmp ule <8 x i32> %x, %y
1160 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1161 %b = or <8 x i1> %s1, %a
1162 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1163 %c = or <8 x i1> %s2, %b
1164 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1165 %d = or <8 x i1> %s3, %c
1166 %e = extractelement <8 x i1> %d, i32 0
1170 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1171 ; SSE-LABEL: bool_reduction_v16i16:
1173 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
1174 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1175 ; SSE-NEXT: packsswb %xmm1, %xmm0
1176 ; SSE-NEXT: pmovmskb %xmm0, %eax
1177 ; SSE-NEXT: testl %eax, %eax
1178 ; SSE-NEXT: setne %al
1181 ; AVX1-LABEL: bool_reduction_v16i16:
1183 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1184 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1185 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
1186 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1187 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1188 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1189 ; AVX1-NEXT: testl %eax, %eax
1190 ; AVX1-NEXT: setne %al
1191 ; AVX1-NEXT: vzeroupper
1194 ; AVX2-LABEL: bool_reduction_v16i16:
1196 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1197 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1198 ; AVX2-NEXT: testl %eax, %eax
1199 ; AVX2-NEXT: setne %al
1200 ; AVX2-NEXT: vzeroupper
1203 ; AVX512-LABEL: bool_reduction_v16i16:
1205 ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
1206 ; AVX512-NEXT: kortestw %k0, %k0
1207 ; AVX512-NEXT: setne %al
1208 ; AVX512-NEXT: vzeroupper
1210 %a = icmp eq <16 x i16> %x, %y
1211 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1212 %b = or <16 x i1> %s1, %a
1213 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1214 %c = or <16 x i1> %s2, %b
1215 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1216 %d = or <16 x i1> %s3, %c
1217 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1218 %e = or <16 x i1> %s4, %d
1219 %f = extractelement <16 x i1> %e, i32 0
1223 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1224 ; SSE-LABEL: bool_reduction_v32i8:
1226 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1
1227 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1228 ; SSE-NEXT: por %xmm1, %xmm0
1229 ; SSE-NEXT: pmovmskb %xmm0, %eax
1230 ; SSE-NEXT: testl %eax, %eax
1231 ; SSE-NEXT: setne %al
1234 ; AVX1-LABEL: bool_reduction_v32i8:
1236 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1237 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1238 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
1239 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1240 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1241 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1242 ; AVX1-NEXT: testl %eax, %eax
1243 ; AVX1-NEXT: setne %al
1244 ; AVX1-NEXT: vzeroupper
1247 ; AVX2-LABEL: bool_reduction_v32i8:
1249 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1250 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1251 ; AVX2-NEXT: testl %eax, %eax
1252 ; AVX2-NEXT: setne %al
1253 ; AVX2-NEXT: vzeroupper
1256 ; AVX512-LABEL: bool_reduction_v32i8:
1258 ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
1259 ; AVX512-NEXT: kortestd %k0, %k0
1260 ; AVX512-NEXT: setne %al
1261 ; AVX512-NEXT: vzeroupper
1263 %a = icmp eq <32 x i8> %x, %y
1264 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1265 %b = or <32 x i1> %s1, %a
1266 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1267 %c = or <32 x i1> %s2, %b
1268 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1269 %d = or <32 x i1> %s3, %c
1270 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1271 %e = or <32 x i1> %s4, %d
1272 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1273 %f = or <32 x i1> %s5, %e
1274 %g = extractelement <32 x i1> %f, i32 0