1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
7 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8 ; SSE-LABEL: test_v2f64_sext:
10 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
11 ; SSE-NEXT: movmskpd %xmm1, %ecx
12 ; SSE-NEXT: xorl %eax, %eax
13 ; SSE-NEXT: cmpl $3, %ecx
18 ; AVX-LABEL: test_v2f64_sext:
20 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
21 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
22 ; AVX-NEXT: xorl %eax, %eax
23 ; AVX-NEXT: cmpl $3, %ecx
28 ; AVX512-LABEL: test_v2f64_sext:
30 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
31 ; AVX512-NEXT: vmovmskpd %xmm0, %ecx
32 ; AVX512-NEXT: xorl %eax, %eax
33 ; AVX512-NEXT: cmpl $3, %ecx
34 ; AVX512-NEXT: sete %al
35 ; AVX512-NEXT: negq %rax
37 %c = fcmp ogt <2 x double> %a0, %a1
38 %s = sext <2 x i1> %c to <2 x i64>
39 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
40 %2 = and <2 x i64> %s, %1
41 %3 = extractelement <2 x i64> %2, i32 0
45 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
46 ; SSE-LABEL: test_v4f64_sext:
48 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
49 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
50 ; SSE-NEXT: andpd %xmm3, %xmm2
51 ; SSE-NEXT: movmskpd %xmm2, %ecx
52 ; SSE-NEXT: xorl %eax, %eax
53 ; SSE-NEXT: cmpl $3, %ecx
58 ; AVX-LABEL: test_v4f64_sext:
60 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
61 ; AVX-NEXT: vmovmskpd %ymm0, %ecx
62 ; AVX-NEXT: xorl %eax, %eax
63 ; AVX-NEXT: cmpl $15, %ecx
66 ; AVX-NEXT: vzeroupper
69 ; AVX512-LABEL: test_v4f64_sext:
71 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
72 ; AVX512-NEXT: vmovmskpd %ymm0, %ecx
73 ; AVX512-NEXT: xorl %eax, %eax
74 ; AVX512-NEXT: cmpl $15, %ecx
75 ; AVX512-NEXT: sete %al
76 ; AVX512-NEXT: negq %rax
77 ; AVX512-NEXT: vzeroupper
79 %c = fcmp ogt <4 x double> %a0, %a1
80 %s = sext <4 x i1> %c to <4 x i64>
81 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
82 %2 = and <4 x i64> %s, %1
83 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
84 %4 = and <4 x i64> %2, %3
85 %5 = extractelement <4 x i64> %4, i64 0
89 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
90 ; SSE-LABEL: test_v4f64_legal_sext:
92 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
93 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
94 ; SSE-NEXT: packssdw %xmm3, %xmm2
95 ; SSE-NEXT: movmskps %xmm2, %ecx
96 ; SSE-NEXT: xorl %eax, %eax
97 ; SSE-NEXT: cmpl $15, %ecx
102 ; AVX-LABEL: test_v4f64_legal_sext:
104 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
105 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
106 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
107 ; AVX-NEXT: vmovmskps %xmm0, %ecx
108 ; AVX-NEXT: xorl %eax, %eax
109 ; AVX-NEXT: cmpl $15, %ecx
111 ; AVX-NEXT: negq %rax
112 ; AVX-NEXT: vzeroupper
115 ; AVX512-LABEL: test_v4f64_legal_sext:
117 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
118 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
119 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
120 ; AVX512-NEXT: vmovmskps %xmm0, %ecx
121 ; AVX512-NEXT: xorl %eax, %eax
122 ; AVX512-NEXT: cmpl $15, %ecx
123 ; AVX512-NEXT: sete %al
124 ; AVX512-NEXT: negq %rax
125 ; AVX512-NEXT: vzeroupper
127 %c = fcmp ogt <4 x double> %a0, %a1
128 %s = sext <4 x i1> %c to <4 x i32>
129 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
130 %2 = and <4 x i32> %s, %1
131 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
132 %4 = and <4 x i32> %2, %3
133 %5 = extractelement <4 x i32> %4, i64 0
134 %6 = sext i32 %5 to i64
138 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
139 ; SSE-LABEL: test_v4f32_sext:
141 ; SSE-NEXT: cmpltps %xmm0, %xmm1
142 ; SSE-NEXT: movmskps %xmm1, %ecx
143 ; SSE-NEXT: xorl %eax, %eax
144 ; SSE-NEXT: cmpl $15, %ecx
146 ; SSE-NEXT: negl %eax
149 ; AVX-LABEL: test_v4f32_sext:
151 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
152 ; AVX-NEXT: vmovmskps %xmm0, %ecx
153 ; AVX-NEXT: xorl %eax, %eax
154 ; AVX-NEXT: cmpl $15, %ecx
156 ; AVX-NEXT: negl %eax
159 ; AVX512-LABEL: test_v4f32_sext:
161 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
162 ; AVX512-NEXT: vmovmskps %xmm0, %ecx
163 ; AVX512-NEXT: xorl %eax, %eax
164 ; AVX512-NEXT: cmpl $15, %ecx
165 ; AVX512-NEXT: sete %al
166 ; AVX512-NEXT: negl %eax
168 %c = fcmp ogt <4 x float> %a0, %a1
169 %s = sext <4 x i1> %c to <4 x i32>
170 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
171 %2 = and <4 x i32> %s, %1
172 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
173 %4 = and <4 x i32> %2, %3
174 %5 = extractelement <4 x i32> %4, i32 0
178 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
179 ; SSE-LABEL: test_v8f32_sext:
181 ; SSE-NEXT: cmpltps %xmm1, %xmm3
182 ; SSE-NEXT: cmpltps %xmm0, %xmm2
183 ; SSE-NEXT: andps %xmm3, %xmm2
184 ; SSE-NEXT: movmskps %xmm2, %ecx
185 ; SSE-NEXT: xorl %eax, %eax
186 ; SSE-NEXT: cmpl $15, %ecx
188 ; SSE-NEXT: negl %eax
191 ; AVX-LABEL: test_v8f32_sext:
193 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
194 ; AVX-NEXT: vmovmskps %ymm0, %ecx
195 ; AVX-NEXT: xorl %eax, %eax
196 ; AVX-NEXT: cmpl $255, %ecx
198 ; AVX-NEXT: negl %eax
199 ; AVX-NEXT: vzeroupper
202 ; AVX512-LABEL: test_v8f32_sext:
204 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
205 ; AVX512-NEXT: vmovmskps %ymm0, %ecx
206 ; AVX512-NEXT: xorl %eax, %eax
207 ; AVX512-NEXT: cmpl $255, %ecx
208 ; AVX512-NEXT: sete %al
209 ; AVX512-NEXT: negl %eax
210 ; AVX512-NEXT: vzeroupper
212 %c = fcmp ogt <8 x float> %a0, %a1
213 %s = sext <8 x i1> %c to <8 x i32>
214 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
215 %2 = and <8 x i32> %s, %1
216 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
217 %4 = and <8 x i32> %2, %3
218 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
219 %6 = and <8 x i32> %4, %5
220 %7 = extractelement <8 x i32> %6, i32 0
224 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
225 ; SSE-LABEL: test_v8f32_legal_sext:
227 ; SSE-NEXT: cmpltps %xmm1, %xmm3
228 ; SSE-NEXT: cmpltps %xmm0, %xmm2
229 ; SSE-NEXT: packssdw %xmm3, %xmm2
230 ; SSE-NEXT: pmovmskb %xmm2, %ecx
231 ; SSE-NEXT: xorl %eax, %eax
232 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
234 ; SSE-NEXT: negl %eax
237 ; AVX-LABEL: test_v8f32_legal_sext:
239 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
240 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
241 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
242 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
243 ; AVX-NEXT: xorl %eax, %eax
244 ; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
246 ; AVX-NEXT: negl %eax
247 ; AVX-NEXT: vzeroupper
250 ; AVX512-LABEL: test_v8f32_legal_sext:
252 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
253 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
254 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
255 ; AVX512-NEXT: xorl %eax, %eax
256 ; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
257 ; AVX512-NEXT: sete %al
258 ; AVX512-NEXT: negl %eax
259 ; AVX512-NEXT: vzeroupper
261 %c = fcmp ogt <8 x float> %a0, %a1
262 %s = sext <8 x i1> %c to <8 x i16>
263 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
264 %2 = and <8 x i16> %s, %1
265 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
266 %4 = and <8 x i16> %2, %3
267 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
268 %6 = and <8 x i16> %4, %5
269 %7 = extractelement <8 x i16> %6, i32 0
270 %8 = sext i16 %7 to i32
274 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
275 ; SSE-LABEL: test_v2i64_sext:
277 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
278 ; SSE-NEXT: movmskpd %xmm0, %ecx
279 ; SSE-NEXT: xorl %eax, %eax
280 ; SSE-NEXT: cmpl $3, %ecx
282 ; SSE-NEXT: negq %rax
285 ; AVX-LABEL: test_v2i64_sext:
287 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
288 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
289 ; AVX-NEXT: xorl %eax, %eax
290 ; AVX-NEXT: cmpl $3, %ecx
292 ; AVX-NEXT: negq %rax
295 ; AVX512-LABEL: test_v2i64_sext:
297 ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
298 ; AVX512-NEXT: vmovmskpd %xmm0, %ecx
299 ; AVX512-NEXT: xorl %eax, %eax
300 ; AVX512-NEXT: cmpl $3, %ecx
301 ; AVX512-NEXT: sete %al
302 ; AVX512-NEXT: negq %rax
304 %c = icmp sgt <2 x i64> %a0, %a1
305 %s = sext <2 x i1> %c to <2 x i64>
306 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
307 %2 = and <2 x i64> %s, %1
308 %3 = extractelement <2 x i64> %2, i32 0
312 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
313 ; SSE-LABEL: test_v4i64_sext:
315 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
316 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
317 ; SSE-NEXT: pand %xmm1, %xmm0
318 ; SSE-NEXT: movmskpd %xmm0, %ecx
319 ; SSE-NEXT: xorl %eax, %eax
320 ; SSE-NEXT: cmpl $3, %ecx
322 ; SSE-NEXT: negq %rax
325 ; AVX1-LABEL: test_v4i64_sext:
327 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
328 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
329 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
330 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
331 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
332 ; AVX1-NEXT: vmovmskpd %ymm0, %ecx
333 ; AVX1-NEXT: xorl %eax, %eax
334 ; AVX1-NEXT: cmpl $15, %ecx
335 ; AVX1-NEXT: sete %al
336 ; AVX1-NEXT: negq %rax
337 ; AVX1-NEXT: vzeroupper
340 ; AVX2-LABEL: test_v4i64_sext:
342 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
343 ; AVX2-NEXT: vmovmskpd %ymm0, %ecx
344 ; AVX2-NEXT: xorl %eax, %eax
345 ; AVX2-NEXT: cmpl $15, %ecx
346 ; AVX2-NEXT: sete %al
347 ; AVX2-NEXT: negq %rax
348 ; AVX2-NEXT: vzeroupper
351 ; AVX512-LABEL: test_v4i64_sext:
353 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
354 ; AVX512-NEXT: vmovmskpd %ymm0, %ecx
355 ; AVX512-NEXT: xorl %eax, %eax
356 ; AVX512-NEXT: cmpl $15, %ecx
357 ; AVX512-NEXT: sete %al
358 ; AVX512-NEXT: negq %rax
359 ; AVX512-NEXT: vzeroupper
361 %c = icmp sgt <4 x i64> %a0, %a1
362 %s = sext <4 x i1> %c to <4 x i64>
363 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
364 %2 = and <4 x i64> %s, %1
365 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
366 %4 = and <4 x i64> %2, %3
367 %5 = extractelement <4 x i64> %4, i64 0
371 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
372 ; SSE-LABEL: test_v4i64_legal_sext:
374 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
375 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
376 ; SSE-NEXT: packssdw %xmm1, %xmm0
377 ; SSE-NEXT: movmskps %xmm0, %ecx
378 ; SSE-NEXT: xorl %eax, %eax
379 ; SSE-NEXT: cmpl $15, %ecx
381 ; SSE-NEXT: negq %rax
384 ; AVX1-LABEL: test_v4i64_legal_sext:
386 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
387 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
388 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
389 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
390 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
391 ; AVX1-NEXT: vmovmskps %xmm0, %ecx
392 ; AVX1-NEXT: xorl %eax, %eax
393 ; AVX1-NEXT: cmpl $15, %ecx
394 ; AVX1-NEXT: sete %al
395 ; AVX1-NEXT: negq %rax
396 ; AVX1-NEXT: vzeroupper
399 ; AVX2-LABEL: test_v4i64_legal_sext:
401 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
402 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
403 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
404 ; AVX2-NEXT: vmovmskps %xmm0, %ecx
405 ; AVX2-NEXT: xorl %eax, %eax
406 ; AVX2-NEXT: cmpl $15, %ecx
407 ; AVX2-NEXT: sete %al
408 ; AVX2-NEXT: negq %rax
409 ; AVX2-NEXT: vzeroupper
412 ; AVX512-LABEL: test_v4i64_legal_sext:
414 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
415 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
416 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
417 ; AVX512-NEXT: vmovmskps %xmm0, %ecx
418 ; AVX512-NEXT: xorl %eax, %eax
419 ; AVX512-NEXT: cmpl $15, %ecx
420 ; AVX512-NEXT: sete %al
421 ; AVX512-NEXT: negq %rax
422 ; AVX512-NEXT: vzeroupper
424 %c = icmp sgt <4 x i64> %a0, %a1
425 %s = sext <4 x i1> %c to <4 x i32>
426 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
427 %2 = and <4 x i32> %s, %1
428 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
429 %4 = and <4 x i32> %2, %3
430 %5 = extractelement <4 x i32> %4, i64 0
431 %6 = sext i32 %5 to i64
435 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
436 ; SSE-LABEL: test_v4i32_sext:
438 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
439 ; SSE-NEXT: movmskps %xmm0, %ecx
440 ; SSE-NEXT: xorl %eax, %eax
441 ; SSE-NEXT: cmpl $15, %ecx
443 ; SSE-NEXT: negl %eax
446 ; AVX-LABEL: test_v4i32_sext:
448 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
449 ; AVX-NEXT: vmovmskps %xmm0, %ecx
450 ; AVX-NEXT: xorl %eax, %eax
451 ; AVX-NEXT: cmpl $15, %ecx
453 ; AVX-NEXT: negl %eax
456 ; AVX512-LABEL: test_v4i32_sext:
458 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
459 ; AVX512-NEXT: vmovmskps %xmm0, %ecx
460 ; AVX512-NEXT: xorl %eax, %eax
461 ; AVX512-NEXT: cmpl $15, %ecx
462 ; AVX512-NEXT: sete %al
463 ; AVX512-NEXT: negl %eax
465 %c = icmp sgt <4 x i32> %a0, %a1
466 %s = sext <4 x i1> %c to <4 x i32>
467 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
468 %2 = and <4 x i32> %s, %1
469 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
470 %4 = and <4 x i32> %2, %3
471 %5 = extractelement <4 x i32> %4, i32 0
475 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
476 ; SSE-LABEL: test_v8i32_sext:
478 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
479 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
480 ; SSE-NEXT: pand %xmm1, %xmm0
481 ; SSE-NEXT: movmskps %xmm0, %ecx
482 ; SSE-NEXT: xorl %eax, %eax
483 ; SSE-NEXT: cmpl $15, %ecx
485 ; SSE-NEXT: negl %eax
488 ; AVX1-LABEL: test_v8i32_sext:
490 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
491 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
492 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
493 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
494 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
495 ; AVX1-NEXT: vmovmskps %ymm0, %ecx
496 ; AVX1-NEXT: xorl %eax, %eax
497 ; AVX1-NEXT: cmpl $255, %ecx
498 ; AVX1-NEXT: sete %al
499 ; AVX1-NEXT: negl %eax
500 ; AVX1-NEXT: vzeroupper
503 ; AVX2-LABEL: test_v8i32_sext:
505 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
506 ; AVX2-NEXT: vmovmskps %ymm0, %ecx
507 ; AVX2-NEXT: xorl %eax, %eax
508 ; AVX2-NEXT: cmpl $255, %ecx
509 ; AVX2-NEXT: sete %al
510 ; AVX2-NEXT: negl %eax
511 ; AVX2-NEXT: vzeroupper
514 ; AVX512-LABEL: test_v8i32_sext:
516 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
517 ; AVX512-NEXT: vmovmskps %ymm0, %ecx
518 ; AVX512-NEXT: xorl %eax, %eax
519 ; AVX512-NEXT: cmpl $255, %ecx
520 ; AVX512-NEXT: sete %al
521 ; AVX512-NEXT: negl %eax
522 ; AVX512-NEXT: vzeroupper
524 %c = icmp sgt <8 x i32> %a0, %a1
525 %s = sext <8 x i1> %c to <8 x i32>
526 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
527 %2 = and <8 x i32> %s, %1
528 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
529 %4 = and <8 x i32> %2, %3
530 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
531 %6 = and <8 x i32> %4, %5
532 %7 = extractelement <8 x i32> %6, i32 0
536 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
537 ; SSE-LABEL: test_v8i32_legal_sext:
539 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
540 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
541 ; SSE-NEXT: packssdw %xmm1, %xmm0
542 ; SSE-NEXT: pmovmskb %xmm0, %ecx
543 ; SSE-NEXT: xorl %eax, %eax
544 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
546 ; SSE-NEXT: negl %eax
549 ; AVX1-LABEL: test_v8i32_legal_sext:
551 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
552 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
553 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
554 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
555 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
556 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
557 ; AVX1-NEXT: xorl %eax, %eax
558 ; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
559 ; AVX1-NEXT: sete %al
560 ; AVX1-NEXT: negl %eax
561 ; AVX1-NEXT: vzeroupper
564 ; AVX2-LABEL: test_v8i32_legal_sext:
566 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
567 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
568 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
569 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
570 ; AVX2-NEXT: xorl %eax, %eax
571 ; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
572 ; AVX2-NEXT: sete %al
573 ; AVX2-NEXT: negl %eax
574 ; AVX2-NEXT: vzeroupper
577 ; AVX512-LABEL: test_v8i32_legal_sext:
579 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
580 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
581 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
582 ; AVX512-NEXT: xorl %eax, %eax
583 ; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
584 ; AVX512-NEXT: sete %al
585 ; AVX512-NEXT: negl %eax
586 ; AVX512-NEXT: vzeroupper
588 %c = icmp sgt <8 x i32> %a0, %a1
589 %s = sext <8 x i1> %c to <8 x i16>
590 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
591 %2 = and <8 x i16> %s, %1
592 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
593 %4 = and <8 x i16> %2, %3
594 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
595 %6 = and <8 x i16> %4, %5
596 %7 = extractelement <8 x i16> %6, i32 0
597 %8 = sext i16 %7 to i32
601 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
602 ; SSE-LABEL: test_v8i16_sext:
604 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
605 ; SSE-NEXT: pmovmskb %xmm0, %ecx
606 ; SSE-NEXT: xorl %eax, %eax
607 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
609 ; SSE-NEXT: negl %eax
610 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
613 ; AVX-LABEL: test_v8i16_sext:
615 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
616 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
617 ; AVX-NEXT: xorl %eax, %eax
618 ; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
620 ; AVX-NEXT: negl %eax
621 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
624 ; AVX512-LABEL: test_v8i16_sext:
626 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
627 ; AVX512-NEXT: vpmovmskb %xmm0, %ecx
628 ; AVX512-NEXT: xorl %eax, %eax
629 ; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
630 ; AVX512-NEXT: sete %al
631 ; AVX512-NEXT: negl %eax
632 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
634 %c = icmp sgt <8 x i16> %a0, %a1
635 %s = sext <8 x i1> %c to <8 x i16>
636 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
637 %2 = and <8 x i16> %s, %1
638 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
639 %4 = and <8 x i16> %2, %3
640 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
641 %6 = and <8 x i16> %4, %5
642 %7 = extractelement <8 x i16> %6, i32 0
646 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
647 ; SSE-LABEL: test_v16i16_sext:
649 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
650 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
651 ; SSE-NEXT: pand %xmm1, %xmm0
652 ; SSE-NEXT: pmovmskb %xmm0, %ecx
653 ; SSE-NEXT: xorl %eax, %eax
654 ; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
656 ; SSE-NEXT: negl %eax
657 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
660 ; AVX1-LABEL: test_v16i16_sext:
662 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
664 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
665 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
666 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
667 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
668 ; AVX1-NEXT: xorl %eax, %eax
669 ; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
670 ; AVX1-NEXT: sete %al
671 ; AVX1-NEXT: negl %eax
672 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
673 ; AVX1-NEXT: vzeroupper
676 ; AVX2-LABEL: test_v16i16_sext:
678 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
679 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
680 ; AVX2-NEXT: xorl %eax, %eax
681 ; AVX2-NEXT: cmpl $-1, %ecx
682 ; AVX2-NEXT: sete %al
683 ; AVX2-NEXT: negl %eax
684 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
685 ; AVX2-NEXT: vzeroupper
688 ; AVX512-LABEL: test_v16i16_sext:
690 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
691 ; AVX512-NEXT: vpmovmskb %ymm0, %ecx
692 ; AVX512-NEXT: xorl %eax, %eax
693 ; AVX512-NEXT: cmpl $-1, %ecx
694 ; AVX512-NEXT: sete %al
695 ; AVX512-NEXT: negl %eax
696 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
697 ; AVX512-NEXT: vzeroupper
699 %c = icmp sgt <16 x i16> %a0, %a1
700 %s = sext <16 x i1> %c to <16 x i16>
701 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
702 %2 = and <16 x i16> %s, %1
703 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
704 %4 = and <16 x i16> %2, %3
705 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
706 %6 = and <16 x i16> %4, %5
707 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
708 %8 = and <16 x i16> %6, %7
709 %9 = extractelement <16 x i16> %8, i32 0
713 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
714 ; SSE-LABEL: test_v16i16_legal_sext:
716 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
717 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
718 ; SSE-NEXT: packsswb %xmm1, %xmm0
719 ; SSE-NEXT: pmovmskb %xmm0, %eax
720 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
723 ; SSE-NEXT: movsbl %al, %eax
724 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
727 ; AVX1-LABEL: test_v16i16_legal_sext:
729 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
730 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
731 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
732 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
733 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
734 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
735 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
736 ; AVX1-NEXT: sete %al
737 ; AVX1-NEXT: negb %al
738 ; AVX1-NEXT: movsbl %al, %eax
739 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
740 ; AVX1-NEXT: vzeroupper
743 ; AVX2-LABEL: test_v16i16_legal_sext:
745 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
746 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
747 ; AVX2-NEXT: cmpl $-1, %eax
748 ; AVX2-NEXT: sete %al
749 ; AVX2-NEXT: negb %al
750 ; AVX2-NEXT: movsbl %al, %eax
751 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
752 ; AVX2-NEXT: vzeroupper
755 ; AVX512-LABEL: test_v16i16_legal_sext:
757 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
758 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
759 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
760 ; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
761 ; AVX512-NEXT: sete %al
762 ; AVX512-NEXT: negb %al
763 ; AVX512-NEXT: movsbl %al, %eax
764 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
765 ; AVX512-NEXT: vzeroupper
767 %c = icmp sgt <16 x i16> %a0, %a1
768 %s = sext <16 x i1> %c to <16 x i8>
769 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
770 %2 = and <16 x i8> %s, %1
771 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
772 %4 = and <16 x i8> %2, %3
773 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
774 %6 = and <16 x i8> %4, %5
775 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
776 %8 = and <16 x i8> %6, %7
777 %9 = extractelement <16 x i8> %8, i32 0
778 %10 = sext i8 %9 to i16
782 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
783 ; SSE-LABEL: test_v16i8_sext:
785 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
786 ; SSE-NEXT: pmovmskb %xmm0, %eax
787 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
792 ; AVX-LABEL: test_v16i8_sext:
794 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
795 ; AVX-NEXT: vpmovmskb %xmm0, %eax
796 ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
801 ; AVX512-LABEL: test_v16i8_sext:
803 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
804 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
805 ; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
806 ; AVX512-NEXT: sete %al
807 ; AVX512-NEXT: negb %al
809 %c = icmp sgt <16 x i8> %a0, %a1
810 %s = sext <16 x i1> %c to <16 x i8>
811 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
812 %2 = and <16 x i8> %s, %1
813 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
814 %4 = and <16 x i8> %2, %3
815 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
816 %6 = and <16 x i8> %4, %5
817 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
818 %8 = and <16 x i8> %6, %7
819 %9 = extractelement <16 x i8> %8, i32 0
823 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
824 ; SSE-LABEL: test_v32i8_sext:
826 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
827 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
828 ; SSE-NEXT: pand %xmm1, %xmm0
829 ; SSE-NEXT: pmovmskb %xmm0, %eax
830 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
835 ; AVX1-LABEL: test_v32i8_sext:
837 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
838 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
839 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
840 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
841 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
842 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
843 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
844 ; AVX1-NEXT: sete %al
845 ; AVX1-NEXT: negb %al
846 ; AVX1-NEXT: vzeroupper
849 ; AVX2-LABEL: test_v32i8_sext:
851 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
852 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
853 ; AVX2-NEXT: cmpl $-1, %eax
854 ; AVX2-NEXT: sete %al
855 ; AVX2-NEXT: negb %al
856 ; AVX2-NEXT: vzeroupper
859 ; AVX512-LABEL: test_v32i8_sext:
861 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
862 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
863 ; AVX512-NEXT: cmpl $-1, %eax
864 ; AVX512-NEXT: sete %al
865 ; AVX512-NEXT: negb %al
866 ; AVX512-NEXT: vzeroupper
868 %c = icmp sgt <32 x i8> %a0, %a1
869 %s = sext <32 x i1> %c to <32 x i8>
870 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
871 %2 = and <32 x i8> %s, %1
872 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
873 %4 = and <32 x i8> %2, %3
874 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
875 %6 = and <32 x i8> %4, %5
876 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
877 %8 = and <32 x i8> %6, %7
878 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
879 %10 = and <32 x i8> %8, %9
880 %11 = extractelement <32 x i8> %10, i32 0
884 define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
885 ; SSE-LABEL: bool_reduction_v2f64:
887 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
888 ; SSE-NEXT: movmskpd %xmm1, %eax
889 ; SSE-NEXT: cmpb $3, %al
893 ; AVX-LABEL: bool_reduction_v2f64:
895 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
896 ; AVX-NEXT: vmovmskpd %xmm0, %eax
897 ; AVX-NEXT: cmpb $3, %al
901 ; AVX512-LABEL: bool_reduction_v2f64:
903 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
904 ; AVX512-NEXT: kmovd %k0, %eax
905 ; AVX512-NEXT: cmpb $3, %al
906 ; AVX512-NEXT: sete %al
908 %a = fcmp ogt <2 x double> %x, %y
909 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
910 %c = and <2 x i1> %a, %b
911 %d = extractelement <2 x i1> %c, i32 0
915 define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
916 ; SSE-LABEL: bool_reduction_v4f32:
918 ; SSE-NEXT: cmpeqps %xmm1, %xmm0
919 ; SSE-NEXT: movmskps %xmm0, %eax
920 ; SSE-NEXT: cmpb $15, %al
924 ; AVX-LABEL: bool_reduction_v4f32:
926 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
927 ; AVX-NEXT: vmovmskps %xmm0, %eax
928 ; AVX-NEXT: cmpb $15, %al
932 ; AVX512-LABEL: bool_reduction_v4f32:
934 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
935 ; AVX512-NEXT: kmovd %k0, %eax
936 ; AVX512-NEXT: cmpb $15, %al
937 ; AVX512-NEXT: sete %al
939 %a = fcmp oeq <4 x float> %x, %y
940 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
941 %b = and <4 x i1> %s1, %a
942 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
943 %c = and <4 x i1> %s2, %b
944 %d = extractelement <4 x i1> %c, i32 0
948 define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
949 ; SSE-LABEL: bool_reduction_v4f64:
951 ; SSE-NEXT: cmplepd %xmm1, %xmm3
952 ; SSE-NEXT: cmplepd %xmm0, %xmm2
953 ; SSE-NEXT: packssdw %xmm3, %xmm2
954 ; SSE-NEXT: movmskps %xmm2, %eax
955 ; SSE-NEXT: cmpb $15, %al
959 ; AVX-LABEL: bool_reduction_v4f64:
961 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
962 ; AVX-NEXT: vmovmskpd %ymm0, %eax
963 ; AVX-NEXT: cmpb $15, %al
965 ; AVX-NEXT: vzeroupper
968 ; AVX512-LABEL: bool_reduction_v4f64:
970 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
971 ; AVX512-NEXT: kmovd %k0, %eax
972 ; AVX512-NEXT: cmpb $15, %al
973 ; AVX512-NEXT: sete %al
974 ; AVX512-NEXT: vzeroupper
976 %a = fcmp oge <4 x double> %x, %y
977 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
978 %b = and <4 x i1> %s1, %a
979 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
980 %c = and <4 x i1> %s2, %b
981 %d = extractelement <4 x i1> %c, i32 0
985 define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
986 ; SSE-LABEL: bool_reduction_v8f32:
988 ; SSE-NEXT: cmpneqps %xmm3, %xmm1
989 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
990 ; SSE-NEXT: packssdw %xmm1, %xmm0
991 ; SSE-NEXT: packsswb %xmm0, %xmm0
992 ; SSE-NEXT: pmovmskb %xmm0, %eax
993 ; SSE-NEXT: cmpb $-1, %al
997 ; AVX-LABEL: bool_reduction_v8f32:
999 ; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
1000 ; AVX-NEXT: vmovmskps %ymm0, %eax
1001 ; AVX-NEXT: cmpb $-1, %al
1002 ; AVX-NEXT: sete %al
1003 ; AVX-NEXT: vzeroupper
1006 ; AVX512-LABEL: bool_reduction_v8f32:
1008 ; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0
1009 ; AVX512-NEXT: kmovd %k0, %eax
1010 ; AVX512-NEXT: cmpb $-1, %al
1011 ; AVX512-NEXT: sete %al
1012 ; AVX512-NEXT: vzeroupper
1014 %a = fcmp une <8 x float> %x, %y
1015 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1016 %b = and <8 x i1> %s1, %a
1017 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1018 %c = and <8 x i1> %s2, %b
1019 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1020 %d = and <8 x i1> %s3, %c
1021 %e = extractelement <8 x i1> %d, i32 0
1025 define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
1026 ; SSE-LABEL: bool_reduction_v2i64:
1028 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1029 ; SSE-NEXT: pxor %xmm2, %xmm1
1030 ; SSE-NEXT: pxor %xmm2, %xmm0
1031 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
1032 ; SSE-NEXT: movmskpd %xmm0, %eax
1033 ; SSE-NEXT: cmpb $3, %al
1034 ; SSE-NEXT: sete %al
1037 ; AVX-LABEL: bool_reduction_v2i64:
1039 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1040 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
1041 ; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
1042 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
1043 ; AVX-NEXT: vmovmskpd %xmm0, %eax
1044 ; AVX-NEXT: cmpb $3, %al
1045 ; AVX-NEXT: sete %al
1048 ; AVX512-LABEL: bool_reduction_v2i64:
1050 ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
1051 ; AVX512-NEXT: kmovd %k0, %eax
1052 ; AVX512-NEXT: cmpb $3, %al
1053 ; AVX512-NEXT: sete %al
1055 %a = icmp ugt <2 x i64> %x, %y
1056 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
1057 %c = and <2 x i1> %a, %b
1058 %d = extractelement <2 x i1> %c, i32 0
1062 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
1063 ; SSE-LABEL: bool_reduction_v4i32:
1065 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
1066 ; SSE-NEXT: movmskps %xmm0, %eax
1067 ; SSE-NEXT: xorl $15, %eax
1068 ; SSE-NEXT: cmpb $15, %al
1069 ; SSE-NEXT: sete %al
1072 ; AVX-LABEL: bool_reduction_v4i32:
1074 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1075 ; AVX-NEXT: vmovmskps %xmm0, %eax
1076 ; AVX-NEXT: xorl $15, %eax
1077 ; AVX-NEXT: cmpb $15, %al
1078 ; AVX-NEXT: sete %al
1081 ; AVX512-LABEL: bool_reduction_v4i32:
1083 ; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
1084 ; AVX512-NEXT: kmovd %k0, %eax
1085 ; AVX512-NEXT: cmpb $15, %al
1086 ; AVX512-NEXT: sete %al
1088 %a = icmp ne <4 x i32> %x, %y
1089 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1090 %b = and <4 x i1> %s1, %a
1091 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1092 %c = and <4 x i1> %s2, %b
1093 %d = extractelement <4 x i1> %c, i32 0
1097 define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1098 ; SSE-LABEL: bool_reduction_v8i16:
1100 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1
1101 ; SSE-NEXT: packsswb %xmm1, %xmm1
1102 ; SSE-NEXT: pmovmskb %xmm1, %eax
1103 ; SSE-NEXT: cmpb $-1, %al
1104 ; SSE-NEXT: sete %al
1107 ; AVX-LABEL: bool_reduction_v8i16:
1109 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1110 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1111 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1112 ; AVX-NEXT: cmpb $-1, %al
1113 ; AVX-NEXT: sete %al
1116 ; AVX512-LABEL: bool_reduction_v8i16:
1118 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0
1119 ; AVX512-NEXT: kmovd %k0, %eax
1120 ; AVX512-NEXT: cmpb $-1, %al
1121 ; AVX512-NEXT: sete %al
1123 %a = icmp slt <8 x i16> %x, %y
1124 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1125 %b = and <8 x i1> %s1, %a
1126 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1127 %c = and <8 x i1> %s2, %b
1128 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1129 %d = and <8 x i1> %s3, %c
1130 %e = extractelement <8 x i1> %d, i32 0
1134 define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1135 ; SSE-LABEL: bool_reduction_v16i8:
1137 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
1138 ; SSE-NEXT: pmovmskb %xmm0, %eax
1139 ; SSE-NEXT: cmpw $-1, %ax
1140 ; SSE-NEXT: sete %al
1143 ; AVX-LABEL: bool_reduction_v16i8:
1145 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
1146 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1147 ; AVX-NEXT: cmpw $-1, %ax
1148 ; AVX-NEXT: sete %al
1151 ; AVX512-LABEL: bool_reduction_v16i8:
1153 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
1154 ; AVX512-NEXT: kortestw %k0, %k0
1155 ; AVX512-NEXT: setb %al
1157 %a = icmp sgt <16 x i8> %x, %y
1158 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1159 %b = and <16 x i1> %s1, %a
1160 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1161 %c = and <16 x i1> %s2, %b
1162 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1163 %d = and <16 x i1> %s3, %c
1164 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1165 %e = and <16 x i1> %s4, %d
1166 %f = extractelement <16 x i1> %e, i32 0
1170 define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1171 ; SSE-LABEL: bool_reduction_v4i64:
1173 ; SSE-NEXT: pcmpgtq %xmm1, %xmm3
1174 ; SSE-NEXT: pcmpgtq %xmm0, %xmm2
1175 ; SSE-NEXT: packssdw %xmm3, %xmm2
1176 ; SSE-NEXT: movmskps %xmm2, %eax
1177 ; SSE-NEXT: cmpb $15, %al
1178 ; SSE-NEXT: sete %al
1181 ; AVX1-LABEL: bool_reduction_v4i64:
1183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1184 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1185 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1186 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1187 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1188 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1189 ; AVX1-NEXT: cmpb $15, %al
1190 ; AVX1-NEXT: sete %al
1191 ; AVX1-NEXT: vzeroupper
1194 ; AVX2-LABEL: bool_reduction_v4i64:
1196 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
1197 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1198 ; AVX2-NEXT: cmpb $15, %al
1199 ; AVX2-NEXT: sete %al
1200 ; AVX2-NEXT: vzeroupper
1203 ; AVX512-LABEL: bool_reduction_v4i64:
1205 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
1206 ; AVX512-NEXT: kmovd %k0, %eax
1207 ; AVX512-NEXT: cmpb $15, %al
1208 ; AVX512-NEXT: sete %al
1209 ; AVX512-NEXT: vzeroupper
1211 %a = icmp slt <4 x i64> %x, %y
1212 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1213 %b = and <4 x i1> %s1, %a
1214 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1215 %c = and <4 x i1> %s2, %b
1216 %d = extractelement <4 x i1> %c, i32 0
1220 define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1221 ; SSE-LABEL: bool_reduction_v8i32:
1223 ; SSE-NEXT: pminud %xmm1, %xmm3
1224 ; SSE-NEXT: pcmpeqd %xmm1, %xmm3
1225 ; SSE-NEXT: pminud %xmm0, %xmm2
1226 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2
1227 ; SSE-NEXT: packssdw %xmm3, %xmm2
1228 ; SSE-NEXT: packsswb %xmm2, %xmm2
1229 ; SSE-NEXT: pmovmskb %xmm2, %eax
1230 ; SSE-NEXT: cmpb $-1, %al
1231 ; SSE-NEXT: sete %al
1234 ; AVX1-LABEL: bool_reduction_v8i32:
1236 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1237 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1238 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1239 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
1240 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
1241 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1242 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1243 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1244 ; AVX1-NEXT: cmpb $-1, %al
1245 ; AVX1-NEXT: sete %al
1246 ; AVX1-NEXT: vzeroupper
1249 ; AVX2-LABEL: bool_reduction_v8i32:
1251 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
1252 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1253 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1254 ; AVX2-NEXT: cmpb $-1, %al
1255 ; AVX2-NEXT: sete %al
1256 ; AVX2-NEXT: vzeroupper
1259 ; AVX512-LABEL: bool_reduction_v8i32:
1261 ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0
1262 ; AVX512-NEXT: kmovd %k0, %eax
1263 ; AVX512-NEXT: cmpb $-1, %al
1264 ; AVX512-NEXT: sete %al
1265 ; AVX512-NEXT: vzeroupper
1267 %a = icmp ule <8 x i32> %x, %y
1268 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1269 %b = and <8 x i1> %s1, %a
1270 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1271 %c = and <8 x i1> %s2, %b
1272 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1273 %d = and <8 x i1> %s3, %c
1274 %e = extractelement <8 x i1> %d, i32 0
1278 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1279 ; SSE-LABEL: bool_reduction_v16i16:
1281 ; SSE-NEXT: pcmpeqw %xmm3, %xmm1
1282 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1283 ; SSE-NEXT: packsswb %xmm1, %xmm0
1284 ; SSE-NEXT: pmovmskb %xmm0, %eax
1285 ; SSE-NEXT: cmpw $-1, %ax
1286 ; SSE-NEXT: sete %al
1289 ; AVX1-LABEL: bool_reduction_v16i16:
1291 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1292 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1293 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
1294 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1295 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1296 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1297 ; AVX1-NEXT: cmpw $-1, %ax
1298 ; AVX1-NEXT: sete %al
1299 ; AVX1-NEXT: vzeroupper
1302 ; AVX2-LABEL: bool_reduction_v16i16:
1304 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1305 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1306 ; AVX2-NEXT: cmpl $-1, %eax
1307 ; AVX2-NEXT: sete %al
1308 ; AVX2-NEXT: vzeroupper
1311 ; AVX512-LABEL: bool_reduction_v16i16:
1313 ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
1314 ; AVX512-NEXT: kortestw %k0, %k0
1315 ; AVX512-NEXT: setb %al
1316 ; AVX512-NEXT: vzeroupper
1318 %a = icmp eq <16 x i16> %x, %y
1319 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1320 %b = and <16 x i1> %s1, %a
1321 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1322 %c = and <16 x i1> %s2, %b
1323 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1324 %d = and <16 x i1> %s3, %c
1325 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1326 %e = and <16 x i1> %s4, %d
1327 %f = extractelement <16 x i1> %e, i32 0
1331 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1332 ; SSE-LABEL: bool_reduction_v32i8:
1334 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1
1335 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1336 ; SSE-NEXT: pand %xmm1, %xmm0
1337 ; SSE-NEXT: pmovmskb %xmm0, %eax
1338 ; SSE-NEXT: cmpw $-1, %ax
1339 ; SSE-NEXT: sete %al
1342 ; AVX1-LABEL: bool_reduction_v32i8:
1344 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1345 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1346 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
1347 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1348 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1349 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1350 ; AVX1-NEXT: cmpw $-1, %ax
1351 ; AVX1-NEXT: sete %al
1352 ; AVX1-NEXT: vzeroupper
1355 ; AVX2-LABEL: bool_reduction_v32i8:
1357 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1358 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1359 ; AVX2-NEXT: cmpl $-1, %eax
1360 ; AVX2-NEXT: sete %al
1361 ; AVX2-NEXT: vzeroupper
1364 ; AVX512-LABEL: bool_reduction_v32i8:
1366 ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
1367 ; AVX512-NEXT: kortestd %k0, %k0
1368 ; AVX512-NEXT: setb %al
1369 ; AVX512-NEXT: vzeroupper
1371 %a = icmp eq <32 x i8> %x, %y
1372 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1373 %b = and <32 x i1> %s1, %a
1374 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1375 %c = and <32 x i1> %s2, %b
1376 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1377 %d = and <32 x i1> %s3, %c
1378 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1379 %e = and <32 x i1> %s4, %d
1380 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1381 %f = and <32 x i1> %s5, %e
1382 %g = extractelement <32 x i1> %f, i32 0