1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX12,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX12,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
13 define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
14 ; SSE-LABEL: bitcast_v2i64_to_v2i1:
16 ; SSE-NEXT: movmskpd %xmm0, %ecx
17 ; SSE-NEXT: movl %ecx, %eax
19 ; SSE-NEXT: addb %cl, %al
22 ; AVX12-LABEL: bitcast_v2i64_to_v2i1:
24 ; AVX12-NEXT: vmovmskpd %xmm0, %ecx
25 ; AVX12-NEXT: movl %ecx, %eax
26 ; AVX12-NEXT: shrb %al
27 ; AVX12-NEXT: addb %cl, %al
30 ; AVX512-LABEL: bitcast_v2i64_to_v2i1:
32 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
33 ; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
34 ; AVX512-NEXT: kshiftrw $1, %k0, %k1
35 ; AVX512-NEXT: kmovd %k1, %ecx
36 ; AVX512-NEXT: kmovd %k0, %eax
37 ; AVX512-NEXT: addb %cl, %al
38 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
40 %1 = icmp slt <2 x i64> %a0, zeroinitializer
41 %2 = bitcast <2 x i1> %1 to <2 x i1>
42 %3 = extractelement <2 x i1> %2, i32 0
43 %4 = extractelement <2 x i1> %2, i32 1
48 define i1 @trunc_v2i64_cmp(<2 x i64> %a0) nounwind {
49 ; SSE2-SSSE3-LABEL: trunc_v2i64_cmp:
50 ; SSE2-SSSE3: # %bb.0:
51 ; SSE2-SSSE3-NEXT: psllq $63, %xmm0
52 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
53 ; SSE2-SSSE3-NEXT: testl %eax, %eax
54 ; SSE2-SSSE3-NEXT: sete %al
55 ; SSE2-SSSE3-NEXT: retq
57 ; SSE41-LABEL: trunc_v2i64_cmp:
59 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
60 ; SSE41-NEXT: sete %al
63 ; AVX12-LABEL: trunc_v2i64_cmp:
65 ; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
66 ; AVX12-NEXT: sete %al
69 ; AVX512-LABEL: trunc_v2i64_cmp:
71 ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
72 ; AVX512-NEXT: vptest %xmm1, %xmm0
73 ; AVX512-NEXT: sete %al
75 %1 = trunc <2 x i64> %a0 to <2 x i1>
76 %2 = bitcast <2 x i1> %1 to i2
81 define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
82 ; SSE-LABEL: bitcast_v4i32_to_v2i2:
84 ; SSE-NEXT: movmskps %xmm0, %eax
85 ; SSE-NEXT: movl %eax, %ecx
86 ; SSE-NEXT: shrb $2, %cl
87 ; SSE-NEXT: andb $3, %al
88 ; SSE-NEXT: addb %cl, %al
89 ; SSE-NEXT: # kill: def $al killed $al killed $eax
92 ; AVX-LABEL: bitcast_v4i32_to_v2i2:
94 ; AVX-NEXT: vmovmskps %xmm0, %eax
95 ; AVX-NEXT: movl %eax, %ecx
96 ; AVX-NEXT: shrb $2, %cl
97 ; AVX-NEXT: andb $3, %al
98 ; AVX-NEXT: addb %cl, %al
99 ; AVX-NEXT: # kill: def $al killed $al killed $eax
101 %1 = icmp slt <4 x i32> %a0, zeroinitializer
102 %2 = bitcast <4 x i1> %1 to <2 x i2>
103 %3 = extractelement <2 x i2> %2, i32 0
104 %4 = extractelement <2 x i2> %2, i32 1
109 define i1 @trunc_v4i32_cmp(<4 x i32> %a0) nounwind {
110 ; SSE2-SSSE3-LABEL: trunc_v4i32_cmp:
111 ; SSE2-SSSE3: # %bb.0:
112 ; SSE2-SSSE3-NEXT: pslld $31, %xmm0
113 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
114 ; SSE2-SSSE3-NEXT: xorl $15, %eax
115 ; SSE2-SSSE3-NEXT: sete %al
116 ; SSE2-SSSE3-NEXT: retq
118 ; SSE41-LABEL: trunc_v4i32_cmp:
120 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
121 ; SSE41-NEXT: setb %al
124 ; AVX12-LABEL: trunc_v4i32_cmp:
126 ; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
127 ; AVX12-NEXT: setb %al
130 ; AVX512-LABEL: trunc_v4i32_cmp:
132 ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
133 ; AVX512-NEXT: vptest %xmm1, %xmm0
134 ; AVX512-NEXT: setb %al
136 %1 = trunc <4 x i32> %a0 to <4 x i1>
137 %2 = bitcast <4 x i1> %1 to i4
138 %3 = icmp eq i4 %2, -1
142 define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
143 ; SSE-LABEL: bitcast_v8i16_to_v2i4:
145 ; SSE-NEXT: packsswb %xmm0, %xmm0
146 ; SSE-NEXT: pmovmskb %xmm0, %eax
147 ; SSE-NEXT: movl %eax, %ecx
148 ; SSE-NEXT: shrb $4, %cl
149 ; SSE-NEXT: andb $15, %al
150 ; SSE-NEXT: addb %cl, %al
151 ; SSE-NEXT: # kill: def $al killed $al killed $eax
154 ; AVX12-LABEL: bitcast_v8i16_to_v2i4:
156 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
157 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
158 ; AVX12-NEXT: movl %eax, %ecx
159 ; AVX12-NEXT: shrb $4, %cl
160 ; AVX12-NEXT: andb $15, %al
161 ; AVX12-NEXT: addb %cl, %al
162 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
165 ; AVX512-LABEL: bitcast_v8i16_to_v2i4:
167 ; AVX512-NEXT: vpmovw2m %xmm0, %k0
168 ; AVX512-NEXT: kmovd %k0, %eax
169 ; AVX512-NEXT: movl %eax, %ecx
170 ; AVX512-NEXT: shrb $4, %cl
171 ; AVX512-NEXT: andb $15, %al
172 ; AVX512-NEXT: addb %cl, %al
173 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
175 %1 = icmp slt <8 x i16> %a0, zeroinitializer
176 %2 = bitcast <8 x i1> %1 to <2 x i4>
177 %3 = extractelement <2 x i4> %2, i32 0
178 %4 = extractelement <2 x i4> %2, i32 1
183 define i1 @trunc_v8i16_cmp(<8 x i16> %a0) nounwind {
184 ; SSE2-SSSE3-LABEL: trunc_v8i16_cmp:
185 ; SSE2-SSSE3: # %bb.0:
186 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
187 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
188 ; SSE2-SSSE3-NEXT: testl $21845, %eax # imm = 0x5555
189 ; SSE2-SSSE3-NEXT: setne %al
190 ; SSE2-SSSE3-NEXT: retq
192 ; SSE41-LABEL: trunc_v8i16_cmp:
194 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
195 ; SSE41-NEXT: setne %al
198 ; AVX12-LABEL: trunc_v8i16_cmp:
200 ; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
201 ; AVX12-NEXT: setne %al
204 ; AVX512-LABEL: trunc_v8i16_cmp:
206 ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
207 ; AVX512-NEXT: vptest %xmm1, %xmm0
208 ; AVX512-NEXT: setne %al
210 %1 = trunc <8 x i16> %a0 to <8 x i1>
211 %2 = bitcast <8 x i1> %1 to i8
212 %3 = icmp ne i8 %2, 0
216 define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
217 ; SSE-LABEL: bitcast_v16i8_to_v2i8:
219 ; SSE-NEXT: pmovmskb %xmm0, %ecx
220 ; SSE-NEXT: movl %ecx, %eax
221 ; SSE-NEXT: shrl $8, %eax
222 ; SSE-NEXT: addb %cl, %al
223 ; SSE-NEXT: # kill: def $al killed $al killed $eax
226 ; AVX12-LABEL: bitcast_v16i8_to_v2i8:
228 ; AVX12-NEXT: vpmovmskb %xmm0, %ecx
229 ; AVX12-NEXT: movl %ecx, %eax
230 ; AVX12-NEXT: shrl $8, %eax
231 ; AVX12-NEXT: addb %cl, %al
232 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
235 ; AVX512-LABEL: bitcast_v16i8_to_v2i8:
237 ; AVX512-NEXT: vpmovb2m %xmm0, %k0
238 ; AVX512-NEXT: kshiftrw $8, %k0, %k1
239 ; AVX512-NEXT: kmovd %k0, %ecx
240 ; AVX512-NEXT: kmovd %k1, %eax
241 ; AVX512-NEXT: addb %cl, %al
242 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
244 %1 = icmp slt <16 x i8> %a0, zeroinitializer
245 %2 = bitcast <16 x i1> %1 to <2 x i8>
246 %3 = extractelement <2 x i8> %2, i32 0
247 %4 = extractelement <2 x i8> %2, i32 1
252 define i1 @trunc_v16i8_cmp(<16 x i8> %a0) nounwind {
253 ; SSE2-SSSE3-LABEL: trunc_v16i8_cmp:
254 ; SSE2-SSSE3: # %bb.0:
255 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
256 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
257 ; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
258 ; SSE2-SSSE3-NEXT: setne %al
259 ; SSE2-SSSE3-NEXT: retq
261 ; SSE41-LABEL: trunc_v16i8_cmp:
263 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
264 ; SSE41-NEXT: setae %al
267 ; AVX12-LABEL: trunc_v16i8_cmp:
269 ; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
270 ; AVX12-NEXT: setae %al
273 ; AVX512-LABEL: trunc_v16i8_cmp:
275 ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
276 ; AVX512-NEXT: vptest %xmm1, %xmm0
277 ; AVX512-NEXT: setae %al
279 %1 = trunc <16 x i8> %a0 to <16 x i1>
280 %2 = bitcast <16 x i1> %1 to i16
281 %3 = icmp ne i16 %2, -1
289 define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
290 ; SSE-LABEL: bitcast_v4i64_to_v2i2:
292 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
293 ; SSE-NEXT: movmskps %xmm0, %eax
294 ; SSE-NEXT: movl %eax, %ecx
295 ; SSE-NEXT: shrb $2, %cl
296 ; SSE-NEXT: andb $3, %al
297 ; SSE-NEXT: addb %cl, %al
298 ; SSE-NEXT: # kill: def $al killed $al killed $eax
301 ; AVX-LABEL: bitcast_v4i64_to_v2i2:
303 ; AVX-NEXT: vmovmskpd %ymm0, %eax
304 ; AVX-NEXT: movl %eax, %ecx
305 ; AVX-NEXT: shrb $2, %cl
306 ; AVX-NEXT: andb $3, %al
307 ; AVX-NEXT: addb %cl, %al
308 ; AVX-NEXT: # kill: def $al killed $al killed $eax
309 ; AVX-NEXT: vzeroupper
311 %1 = icmp slt <4 x i64> %a0, zeroinitializer
312 %2 = bitcast <4 x i1> %1 to <2 x i2>
313 %3 = extractelement <2 x i2> %2, i32 0
314 %4 = extractelement <2 x i2> %2, i32 1
319 define i1 @trunc_v4i64_cmp(<4 x i64> %a0) nounwind {
320 ; SSE2-SSSE3-LABEL: trunc_v4i64_cmp:
321 ; SSE2-SSSE3: # %bb.0:
322 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
323 ; SSE2-SSSE3-NEXT: pslld $31, %xmm0
324 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
325 ; SSE2-SSSE3-NEXT: testl %eax, %eax
326 ; SSE2-SSSE3-NEXT: setne %al
327 ; SSE2-SSSE3-NEXT: retq
329 ; SSE41-LABEL: trunc_v4i64_cmp:
331 ; SSE41-NEXT: por %xmm1, %xmm0
332 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
333 ; SSE41-NEXT: setne %al
336 ; AVX1-LABEL: trunc_v4i64_cmp:
338 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
339 ; AVX1-NEXT: setne %al
340 ; AVX1-NEXT: vzeroupper
343 ; AVX2-LABEL: trunc_v4i64_cmp:
345 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
346 ; AVX2-NEXT: vptest %ymm1, %ymm0
347 ; AVX2-NEXT: setne %al
348 ; AVX2-NEXT: vzeroupper
351 ; AVX512-LABEL: trunc_v4i64_cmp:
353 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
354 ; AVX512-NEXT: vptest %ymm1, %ymm0
355 ; AVX512-NEXT: setne %al
356 ; AVX512-NEXT: vzeroupper
358 %1 = trunc <4 x i64> %a0 to <4 x i1>
359 %2 = bitcast <4 x i1> %1 to i4
360 %3 = icmp ne i4 %2, 0
364 define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
365 ; SSE-LABEL: bitcast_v8i32_to_v2i4:
367 ; SSE-NEXT: packssdw %xmm1, %xmm0
368 ; SSE-NEXT: packsswb %xmm0, %xmm0
369 ; SSE-NEXT: pmovmskb %xmm0, %eax
370 ; SSE-NEXT: movl %eax, %ecx
371 ; SSE-NEXT: shrb $4, %cl
372 ; SSE-NEXT: andb $15, %al
373 ; SSE-NEXT: addb %cl, %al
374 ; SSE-NEXT: # kill: def $al killed $al killed $eax
377 ; AVX-LABEL: bitcast_v8i32_to_v2i4:
379 ; AVX-NEXT: vmovmskps %ymm0, %eax
380 ; AVX-NEXT: movl %eax, %ecx
381 ; AVX-NEXT: shrb $4, %cl
382 ; AVX-NEXT: andb $15, %al
383 ; AVX-NEXT: addb %cl, %al
384 ; AVX-NEXT: # kill: def $al killed $al killed $eax
385 ; AVX-NEXT: vzeroupper
387 %1 = icmp slt <8 x i32> %a0, zeroinitializer
388 %2 = bitcast <8 x i1> %1 to <2 x i4>
389 %3 = extractelement <2 x i4> %2, i32 0
390 %4 = extractelement <2 x i4> %2, i32 1
395 define i1 @trunc_v8i132_cmp(<8 x i32> %a0) nounwind {
396 ; SSE2-SSSE3-LABEL: trunc_v8i132_cmp:
397 ; SSE2-SSSE3: # %bb.0:
398 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
399 ; SSE2-SSSE3-NEXT: pslld $31, %xmm0
400 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
401 ; SSE2-SSSE3-NEXT: xorl $15, %eax
402 ; SSE2-SSSE3-NEXT: setne %al
403 ; SSE2-SSSE3-NEXT: retq
405 ; SSE41-LABEL: trunc_v8i132_cmp:
407 ; SSE41-NEXT: pand %xmm1, %xmm0
408 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
409 ; SSE41-NEXT: setae %al
412 ; AVX1-LABEL: trunc_v8i132_cmp:
414 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
415 ; AVX1-NEXT: setae %al
416 ; AVX1-NEXT: vzeroupper
419 ; AVX2-LABEL: trunc_v8i132_cmp:
421 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
422 ; AVX2-NEXT: vptest %ymm1, %ymm0
423 ; AVX2-NEXT: setae %al
424 ; AVX2-NEXT: vzeroupper
427 ; AVX512-LABEL: trunc_v8i132_cmp:
429 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
430 ; AVX512-NEXT: vptest %ymm1, %ymm0
431 ; AVX512-NEXT: setae %al
432 ; AVX512-NEXT: vzeroupper
434 %1 = trunc <8 x i32> %a0 to <8 x i1>
435 %2 = bitcast <8 x i1> %1 to i8
436 %3 = icmp ne i8 %2, -1
440 define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
441 ; SSE-LABEL: bitcast_v16i16_to_v2i8:
443 ; SSE-NEXT: packsswb %xmm1, %xmm0
444 ; SSE-NEXT: pmovmskb %xmm0, %ecx
445 ; SSE-NEXT: movl %ecx, %eax
446 ; SSE-NEXT: shrl $8, %eax
447 ; SSE-NEXT: addb %cl, %al
448 ; SSE-NEXT: # kill: def $al killed $al killed $eax
451 ; AVX1-LABEL: bitcast_v16i16_to_v2i8:
453 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
454 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
455 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
456 ; AVX1-NEXT: movl %ecx, %eax
457 ; AVX1-NEXT: shrl $8, %eax
458 ; AVX1-NEXT: addb %cl, %al
459 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
460 ; AVX1-NEXT: vzeroupper
463 ; AVX2-LABEL: bitcast_v16i16_to_v2i8:
465 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
466 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
467 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
468 ; AVX2-NEXT: movl %ecx, %eax
469 ; AVX2-NEXT: shrl $8, %eax
470 ; AVX2-NEXT: addb %cl, %al
471 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
472 ; AVX2-NEXT: vzeroupper
475 ; AVX512-LABEL: bitcast_v16i16_to_v2i8:
477 ; AVX512-NEXT: vpmovw2m %ymm0, %k0
478 ; AVX512-NEXT: kshiftrw $8, %k0, %k1
479 ; AVX512-NEXT: kmovd %k0, %ecx
480 ; AVX512-NEXT: kmovd %k1, %eax
481 ; AVX512-NEXT: addb %cl, %al
482 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
483 ; AVX512-NEXT: vzeroupper
485 %1 = icmp slt <16 x i16> %a0, zeroinitializer
486 %2 = bitcast <16 x i1> %1 to <2 x i8>
487 %3 = extractelement <2 x i8> %2, i32 0
488 %4 = extractelement <2 x i8> %2, i32 1
493 define i1 @trunc_v16i16_cmp(<16 x i16> %a0) nounwind {
494 ; SSE2-SSSE3-LABEL: trunc_v16i16_cmp:
495 ; SSE2-SSSE3: # %bb.0:
496 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm0
497 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
498 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
499 ; SSE2-SSSE3-NEXT: testl $21845, %eax # imm = 0x5555
500 ; SSE2-SSSE3-NEXT: sete %al
501 ; SSE2-SSSE3-NEXT: retq
503 ; SSE41-LABEL: trunc_v16i16_cmp:
505 ; SSE41-NEXT: por %xmm1, %xmm0
506 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
507 ; SSE41-NEXT: sete %al
510 ; AVX1-LABEL: trunc_v16i16_cmp:
512 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
513 ; AVX1-NEXT: sete %al
514 ; AVX1-NEXT: vzeroupper
517 ; AVX2-LABEL: trunc_v16i16_cmp:
519 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
520 ; AVX2-NEXT: vptest %ymm1, %ymm0
521 ; AVX2-NEXT: sete %al
522 ; AVX2-NEXT: vzeroupper
525 ; AVX512-LABEL: trunc_v16i16_cmp:
527 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
528 ; AVX512-NEXT: vptest %ymm1, %ymm0
529 ; AVX512-NEXT: sete %al
530 ; AVX512-NEXT: vzeroupper
532 %1 = trunc <16 x i16> %a0 to <16 x i1>
533 %2 = bitcast <16 x i1> %1 to i16
534 %3 = icmp eq i16 %2, 0
538 define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
539 ; SSE-LABEL: bitcast_v32i8_to_v2i16:
541 ; SSE-NEXT: pmovmskb %xmm1, %ecx
542 ; SSE-NEXT: pmovmskb %xmm0, %eax
543 ; SSE-NEXT: addl %ecx, %eax
544 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
547 ; AVX1-LABEL: bitcast_v32i8_to_v2i16:
549 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
550 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx
551 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
552 ; AVX1-NEXT: addl %ecx, %eax
553 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
554 ; AVX1-NEXT: vzeroupper
557 ; AVX2-LABEL: bitcast_v32i8_to_v2i16:
559 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
560 ; AVX2-NEXT: movl %ecx, %eax
561 ; AVX2-NEXT: shrl $16, %eax
562 ; AVX2-NEXT: addl %ecx, %eax
563 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
564 ; AVX2-NEXT: vzeroupper
567 ; AVX512-LABEL: bitcast_v32i8_to_v2i16:
569 ; AVX512-NEXT: vpmovb2m %ymm0, %k0
570 ; AVX512-NEXT: kshiftrd $16, %k0, %k1
571 ; AVX512-NEXT: kmovd %k0, %ecx
572 ; AVX512-NEXT: kmovd %k1, %eax
573 ; AVX512-NEXT: addl %ecx, %eax
574 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
575 ; AVX512-NEXT: vzeroupper
577 %1 = icmp slt <32 x i8> %a0, zeroinitializer
578 %2 = bitcast <32 x i1> %1 to <2 x i16>
579 %3 = extractelement <2 x i16> %2, i32 0
580 %4 = extractelement <2 x i16> %2, i32 1
585 define i1 @trunc_v32i8_cmp(<32 x i8> %a0) nounwind {
586 ; SSE2-SSSE3-LABEL: trunc_v32i8_cmp:
587 ; SSE2-SSSE3: # %bb.0:
588 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
589 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
590 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
591 ; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
592 ; SSE2-SSSE3-NEXT: sete %al
593 ; SSE2-SSSE3-NEXT: retq
595 ; SSE41-LABEL: trunc_v32i8_cmp:
597 ; SSE41-NEXT: pand %xmm1, %xmm0
598 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
599 ; SSE41-NEXT: setb %al
602 ; AVX1-LABEL: trunc_v32i8_cmp:
604 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
605 ; AVX1-NEXT: setb %al
606 ; AVX1-NEXT: vzeroupper
609 ; AVX2-LABEL: trunc_v32i8_cmp:
611 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
612 ; AVX2-NEXT: vptest %ymm1, %ymm0
613 ; AVX2-NEXT: setb %al
614 ; AVX2-NEXT: vzeroupper
617 ; AVX512-LABEL: trunc_v32i8_cmp:
619 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
620 ; AVX512-NEXT: vptest %ymm1, %ymm0
621 ; AVX512-NEXT: setb %al
622 ; AVX512-NEXT: vzeroupper
624 %1 = trunc <32 x i8> %a0 to <32 x i1>
625 %2 = bitcast <32 x i1> %1 to i32
626 %3 = icmp eq i32 %2, -1
634 define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
635 ; SSE-LABEL: bitcast_v8i64_to_v2i4:
637 ; SSE-NEXT: packssdw %xmm3, %xmm2
638 ; SSE-NEXT: packssdw %xmm1, %xmm0
639 ; SSE-NEXT: packssdw %xmm2, %xmm0
640 ; SSE-NEXT: packsswb %xmm0, %xmm0
641 ; SSE-NEXT: pmovmskb %xmm0, %eax
642 ; SSE-NEXT: movl %eax, %ecx
643 ; SSE-NEXT: shrb $4, %cl
644 ; SSE-NEXT: andb $15, %al
645 ; SSE-NEXT: addb %cl, %al
646 ; SSE-NEXT: # kill: def $al killed $al killed $eax
649 ; AVX1-LABEL: bitcast_v8i64_to_v2i4:
651 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
652 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
653 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
654 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
655 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
656 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
657 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
658 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
659 ; AVX1-NEXT: vmovmskps %ymm0, %eax
660 ; AVX1-NEXT: movl %eax, %ecx
661 ; AVX1-NEXT: shrb $4, %cl
662 ; AVX1-NEXT: andb $15, %al
663 ; AVX1-NEXT: addb %cl, %al
664 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
665 ; AVX1-NEXT: vzeroupper
668 ; AVX2-LABEL: bitcast_v8i64_to_v2i4:
670 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
671 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
672 ; AVX2-NEXT: vmovmskps %ymm0, %eax
673 ; AVX2-NEXT: movl %eax, %ecx
674 ; AVX2-NEXT: shrb $4, %cl
675 ; AVX2-NEXT: andb $15, %al
676 ; AVX2-NEXT: addb %cl, %al
677 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
678 ; AVX2-NEXT: vzeroupper
681 ; AVX512-LABEL: bitcast_v8i64_to_v2i4:
683 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
684 ; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
685 ; AVX512-NEXT: kmovd %k0, %eax
686 ; AVX512-NEXT: movl %eax, %ecx
687 ; AVX512-NEXT: shrb $4, %cl
688 ; AVX512-NEXT: andb $15, %al
689 ; AVX512-NEXT: addb %cl, %al
690 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
691 ; AVX512-NEXT: vzeroupper
693 %1 = icmp slt <8 x i64> %a0, zeroinitializer
694 %2 = bitcast <8 x i1> %1 to <2 x i4>
695 %3 = extractelement <2 x i4> %2, i32 0
696 %4 = extractelement <2 x i4> %2, i32 1
701 define i1 @trunc_v8i64_cmp(<8 x i64> %a0) nounwind {
702 ; SSE2-SSSE3-LABEL: trunc_v8i64_cmp:
703 ; SSE2-SSSE3: # %bb.0:
704 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
705 ; SSE2-SSSE3-NEXT: pslld $16, %xmm2
706 ; SSE2-SSSE3-NEXT: psrad $16, %xmm2
707 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
708 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0
709 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
710 ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
711 ; SSE2-SSSE3-NEXT: psllw $15, %xmm0
712 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
713 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
714 ; SSE2-SSSE3-NEXT: cmpb $-1, %al
715 ; SSE2-SSSE3-NEXT: sete %al
716 ; SSE2-SSSE3-NEXT: retq
718 ; SSE41-LABEL: trunc_v8i64_cmp:
720 ; SSE41-NEXT: pand %xmm3, %xmm1
721 ; SSE41-NEXT: pand %xmm2, %xmm0
722 ; SSE41-NEXT: pand %xmm1, %xmm0
723 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
724 ; SSE41-NEXT: setb %al
727 ; AVX1-LABEL: trunc_v8i64_cmp:
729 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
730 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
731 ; AVX1-NEXT: setb %al
732 ; AVX1-NEXT: vzeroupper
735 ; AVX2-LABEL: trunc_v8i64_cmp:
737 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
738 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
739 ; AVX2-NEXT: vptest %ymm1, %ymm0
740 ; AVX2-NEXT: setb %al
741 ; AVX2-NEXT: vzeroupper
744 ; AVX512-LABEL: trunc_v8i64_cmp:
746 ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
747 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
748 ; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
749 ; AVX512-NEXT: kortestw %k0, %k0
750 ; AVX512-NEXT: sete %al
751 ; AVX512-NEXT: vzeroupper
753 %1 = trunc <8 x i64> %a0 to <8 x i1>
754 %2 = bitcast <8 x i1> %1 to i8
755 %3 = icmp eq i8 %2, -1
759 define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
760 ; SSE-LABEL: bitcast_v16i32_to_v2i8:
762 ; SSE-NEXT: packssdw %xmm3, %xmm2
763 ; SSE-NEXT: packssdw %xmm1, %xmm0
764 ; SSE-NEXT: packsswb %xmm2, %xmm0
765 ; SSE-NEXT: pmovmskb %xmm0, %ecx
766 ; SSE-NEXT: movl %ecx, %eax
767 ; SSE-NEXT: shrl $8, %eax
768 ; SSE-NEXT: addb %cl, %al
769 ; SSE-NEXT: # kill: def $al killed $al killed $eax
772 ; AVX1-LABEL: bitcast_v16i32_to_v2i8:
774 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
775 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
776 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
777 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
778 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
779 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
780 ; AVX1-NEXT: movl %ecx, %eax
781 ; AVX1-NEXT: shrl $8, %eax
782 ; AVX1-NEXT: addb %cl, %al
783 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
784 ; AVX1-NEXT: vzeroupper
787 ; AVX2-LABEL: bitcast_v16i32_to_v2i8:
789 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
790 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
791 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
792 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
793 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
794 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
795 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
796 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
797 ; AVX2-NEXT: movl %ecx, %eax
798 ; AVX2-NEXT: shrl $8, %eax
799 ; AVX2-NEXT: addb %cl, %al
800 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
801 ; AVX2-NEXT: vzeroupper
804 ; AVX512-LABEL: bitcast_v16i32_to_v2i8:
806 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
807 ; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
808 ; AVX512-NEXT: kshiftrw $8, %k0, %k1
809 ; AVX512-NEXT: kmovd %k0, %ecx
810 ; AVX512-NEXT: kmovd %k1, %eax
811 ; AVX512-NEXT: addb %cl, %al
812 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
813 ; AVX512-NEXT: vzeroupper
815 %1 = icmp slt <16 x i32> %a0, zeroinitializer
816 %2 = bitcast <16 x i1> %1 to <2 x i8>
817 %3 = extractelement <2 x i8> %2, i32 0
818 %4 = extractelement <2 x i8> %2, i32 1
823 define i1 @trunc_v16i32_cmp(<16 x i32> %a0) nounwind {
824 ; SSE2-SSSE3-LABEL: trunc_v16i32_cmp:
825 ; SSE2-SSSE3: # %bb.0:
826 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm1
827 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
828 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm0
829 ; SSE2-SSSE3-NEXT: pslld $31, %xmm0
830 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
831 ; SSE2-SSSE3-NEXT: testl %eax, %eax
832 ; SSE2-SSSE3-NEXT: sete %al
833 ; SSE2-SSSE3-NEXT: retq
835 ; SSE41-LABEL: trunc_v16i32_cmp:
837 ; SSE41-NEXT: por %xmm3, %xmm1
838 ; SSE41-NEXT: por %xmm2, %xmm0
839 ; SSE41-NEXT: por %xmm1, %xmm0
840 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
841 ; SSE41-NEXT: sete %al
844 ; AVX1-LABEL: trunc_v16i32_cmp:
846 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
847 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
848 ; AVX1-NEXT: sete %al
849 ; AVX1-NEXT: vzeroupper
852 ; AVX2-LABEL: trunc_v16i32_cmp:
854 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
855 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
856 ; AVX2-NEXT: vptest %ymm1, %ymm0
857 ; AVX2-NEXT: sete %al
858 ; AVX2-NEXT: vzeroupper
861 ; AVX512-LABEL: trunc_v16i32_cmp:
863 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
864 ; AVX512-NEXT: kortestw %k0, %k0
865 ; AVX512-NEXT: sete %al
866 ; AVX512-NEXT: vzeroupper
868 %1 = trunc <16 x i32> %a0 to <16 x i1>
869 %2 = bitcast <16 x i1> %1 to i16
870 %3 = icmp eq i16 %2, 0
874 define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
875 ; SSE-LABEL: bitcast_v32i16_to_v2i16:
877 ; SSE-NEXT: packsswb %xmm3, %xmm2
878 ; SSE-NEXT: pmovmskb %xmm2, %ecx
879 ; SSE-NEXT: packsswb %xmm1, %xmm0
880 ; SSE-NEXT: pmovmskb %xmm0, %eax
881 ; SSE-NEXT: addl %ecx, %eax
882 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
885 ; AVX1-LABEL: bitcast_v32i16_to_v2i16:
887 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
888 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
889 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx
890 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
891 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
892 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
893 ; AVX1-NEXT: addl %ecx, %eax
894 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
895 ; AVX1-NEXT: vzeroupper
898 ; AVX2-LABEL: bitcast_v32i16_to_v2i16:
900 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
901 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
902 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
903 ; AVX2-NEXT: movl %ecx, %eax
904 ; AVX2-NEXT: shrl $16, %eax
905 ; AVX2-NEXT: addl %ecx, %eax
906 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
907 ; AVX2-NEXT: vzeroupper
910 ; AVX512-LABEL: bitcast_v32i16_to_v2i16:
912 ; AVX512-NEXT: vpmovw2m %zmm0, %k0
913 ; AVX512-NEXT: kshiftrd $16, %k0, %k1
914 ; AVX512-NEXT: kmovd %k0, %ecx
915 ; AVX512-NEXT: kmovd %k1, %eax
916 ; AVX512-NEXT: addl %ecx, %eax
917 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
918 ; AVX512-NEXT: vzeroupper
920 %1 = icmp slt <32 x i16> %a0, zeroinitializer
921 %2 = bitcast <32 x i1> %1 to <2 x i16>
922 %3 = extractelement <2 x i16> %2, i32 0
923 %4 = extractelement <2 x i16> %2, i32 1
928 define i1 @trunc_v32i16_cmp(<32 x i16> %a0) nounwind {
929 ; SSE2-SSSE3-LABEL: trunc_v32i16_cmp:
930 ; SSE2-SSSE3: # %bb.0:
931 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
932 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
933 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
934 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
935 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
936 ; SSE2-SSSE3-NEXT: notl %eax
937 ; SSE2-SSSE3-NEXT: testl $21845, %eax # imm = 0x5555
938 ; SSE2-SSSE3-NEXT: setne %al
939 ; SSE2-SSSE3-NEXT: retq
941 ; SSE41-LABEL: trunc_v32i16_cmp:
943 ; SSE41-NEXT: pand %xmm3, %xmm1
944 ; SSE41-NEXT: pand %xmm2, %xmm0
945 ; SSE41-NEXT: pand %xmm1, %xmm0
946 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
947 ; SSE41-NEXT: setae %al
950 ; AVX1-LABEL: trunc_v32i16_cmp:
952 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
953 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
954 ; AVX1-NEXT: setae %al
955 ; AVX1-NEXT: vzeroupper
958 ; AVX2-LABEL: trunc_v32i16_cmp:
960 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
961 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
962 ; AVX2-NEXT: vptest %ymm1, %ymm0
963 ; AVX2-NEXT: setae %al
964 ; AVX2-NEXT: vzeroupper
967 ; AVX512-LABEL: trunc_v32i16_cmp:
969 ; AVX512-NEXT: vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
970 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
971 ; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
972 ; AVX512-NEXT: kortestw %k0, %k0
973 ; AVX512-NEXT: setne %al
974 ; AVX512-NEXT: vzeroupper
976 %1 = trunc <32 x i16> %a0 to <32 x i1>
977 %2 = bitcast <32 x i1> %1 to i32
978 %3 = icmp ne i32 %2, -1
982 define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
983 ; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
984 ; SSE2-SSSE3: # %bb.0:
985 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
986 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
987 ; SSE2-SSSE3-NEXT: shll $16, %ecx
988 ; SSE2-SSSE3-NEXT: orl %eax, %ecx
989 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
990 ; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %edx
991 ; SSE2-SSSE3-NEXT: shll $16, %edx
992 ; SSE2-SSSE3-NEXT: orl %eax, %edx
993 ; SSE2-SSSE3-NEXT: shlq $32, %rdx
994 ; SSE2-SSSE3-NEXT: orq %rcx, %rdx
995 ; SSE2-SSSE3-NEXT: movq %rdx, %xmm0
996 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
997 ; SSE2-SSSE3-NEXT: movd %xmm0, %eax
998 ; SSE2-SSSE3-NEXT: addl %ecx, %eax
999 ; SSE2-SSSE3-NEXT: retq
1001 ; SSE41-LABEL: bitcast_v64i8_to_v2i32:
1003 ; SSE41-NEXT: pmovmskb %xmm2, %eax
1004 ; SSE41-NEXT: pmovmskb %xmm3, %ecx
1005 ; SSE41-NEXT: shll $16, %ecx
1006 ; SSE41-NEXT: orl %eax, %ecx
1007 ; SSE41-NEXT: pmovmskb %xmm0, %edx
1008 ; SSE41-NEXT: pmovmskb %xmm1, %eax
1009 ; SSE41-NEXT: shll $16, %eax
1010 ; SSE41-NEXT: orl %edx, %eax
1011 ; SSE41-NEXT: addl %ecx, %eax
1014 ; AVX1-LABEL: bitcast_v64i8_to_v2i32:
1016 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
1017 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1018 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx
1019 ; AVX1-NEXT: shll $16, %ecx
1020 ; AVX1-NEXT: orl %eax, %ecx
1021 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1022 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1023 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1024 ; AVX1-NEXT: shll $16, %eax
1025 ; AVX1-NEXT: orl %edx, %eax
1026 ; AVX1-NEXT: addl %ecx, %eax
1027 ; AVX1-NEXT: vzeroupper
1030 ; AVX2-LABEL: bitcast_v64i8_to_v2i32:
1032 ; AVX2-NEXT: vpmovmskb %ymm1, %ecx
1033 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1034 ; AVX2-NEXT: addl %ecx, %eax
1035 ; AVX2-NEXT: vzeroupper
1038 ; AVX512-LABEL: bitcast_v64i8_to_v2i32:
1040 ; AVX512-NEXT: vpmovb2m %zmm0, %k0
1041 ; AVX512-NEXT: kshiftrq $32, %k0, %k1
1042 ; AVX512-NEXT: kmovd %k0, %ecx
1043 ; AVX512-NEXT: kmovd %k1, %eax
1044 ; AVX512-NEXT: addl %ecx, %eax
1045 ; AVX512-NEXT: vzeroupper
1047 %1 = icmp slt <64 x i8> %a0, zeroinitializer
1048 %2 = bitcast <64 x i1> %1 to <2 x i32>
1049 %3 = extractelement <2 x i32> %2, i32 0
1050 %4 = extractelement <2 x i32> %2, i32 1
1055 define i1 @trunc_v64i8_cmp(<64 x i8> %a0) nounwind {
1056 ; SSE2-SSSE3-LABEL: trunc_v64i8_cmp:
1057 ; SSE2-SSSE3: # %bb.0:
1058 ; SSE2-SSSE3-NEXT: por %xmm3, %xmm1
1059 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
1060 ; SSE2-SSSE3-NEXT: por %xmm1, %xmm0
1061 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
1062 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
1063 ; SSE2-SSSE3-NEXT: testl %eax, %eax
1064 ; SSE2-SSSE3-NEXT: setne %al
1065 ; SSE2-SSSE3-NEXT: retq
1067 ; SSE41-LABEL: trunc_v64i8_cmp:
1069 ; SSE41-NEXT: por %xmm3, %xmm1
1070 ; SSE41-NEXT: por %xmm2, %xmm0
1071 ; SSE41-NEXT: por %xmm1, %xmm0
1072 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1073 ; SSE41-NEXT: setne %al
1076 ; AVX1-LABEL: trunc_v64i8_cmp:
1078 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1079 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1080 ; AVX1-NEXT: setne %al
1081 ; AVX1-NEXT: vzeroupper
1084 ; AVX2-LABEL: trunc_v64i8_cmp:
1086 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1087 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1088 ; AVX2-NEXT: vptest %ymm1, %ymm0
1089 ; AVX2-NEXT: setne %al
1090 ; AVX2-NEXT: vzeroupper
1093 ; AVX512-LABEL: trunc_v64i8_cmp:
1095 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1096 ; AVX512-NEXT: kortestw %k0, %k0
1097 ; AVX512-NEXT: setne %al
1098 ; AVX512-NEXT: vzeroupper
1100 %1 = trunc <64 x i8> %a0 to <64 x i1>
1101 %2 = bitcast <64 x i1> %1 to i64
1102 %3 = icmp ne i64 %2, 0
1106 define i64 @bitcast_v128i8_to_v2i64(<128 x i8> %a0) nounwind {
1107 ; SSE-LABEL: bitcast_v128i8_to_v2i64:
1109 ; SSE-NEXT: pmovmskb %xmm4, %eax
1110 ; SSE-NEXT: pmovmskb %xmm5, %ecx
1111 ; SSE-NEXT: shll $16, %ecx
1112 ; SSE-NEXT: orl %eax, %ecx
1113 ; SSE-NEXT: pmovmskb %xmm6, %eax
1114 ; SSE-NEXT: pmovmskb %xmm7, %edx
1115 ; SSE-NEXT: shll $16, %edx
1116 ; SSE-NEXT: orl %eax, %edx
1117 ; SSE-NEXT: shlq $32, %rdx
1118 ; SSE-NEXT: orq %rcx, %rdx
1119 ; SSE-NEXT: pmovmskb %xmm0, %eax
1120 ; SSE-NEXT: pmovmskb %xmm1, %ecx
1121 ; SSE-NEXT: shll $16, %ecx
1122 ; SSE-NEXT: orl %eax, %ecx
1123 ; SSE-NEXT: pmovmskb %xmm2, %esi
1124 ; SSE-NEXT: pmovmskb %xmm3, %eax
1125 ; SSE-NEXT: shll $16, %eax
1126 ; SSE-NEXT: orl %esi, %eax
1127 ; SSE-NEXT: shlq $32, %rax
1128 ; SSE-NEXT: orq %rcx, %rax
1129 ; SSE-NEXT: addq %rdx, %rax
1132 ; AVX1-LABEL: bitcast_v128i8_to_v2i64:
1134 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
1135 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
1136 ; AVX1-NEXT: vpmovmskb %xmm2, %edx
1137 ; AVX1-NEXT: shll $16, %edx
1138 ; AVX1-NEXT: orl %eax, %edx
1139 ; AVX1-NEXT: vpmovmskb %xmm3, %eax
1140 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
1141 ; AVX1-NEXT: vpmovmskb %xmm2, %ecx
1142 ; AVX1-NEXT: shll $16, %ecx
1143 ; AVX1-NEXT: orl %eax, %ecx
1144 ; AVX1-NEXT: shlq $32, %rcx
1145 ; AVX1-NEXT: orq %rdx, %rcx
1146 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1147 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1148 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1149 ; AVX1-NEXT: shll $16, %edx
1150 ; AVX1-NEXT: orl %eax, %edx
1151 ; AVX1-NEXT: vpmovmskb %xmm1, %esi
1152 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1153 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1154 ; AVX1-NEXT: shll $16, %eax
1155 ; AVX1-NEXT: orl %esi, %eax
1156 ; AVX1-NEXT: shlq $32, %rax
1157 ; AVX1-NEXT: orq %rdx, %rax
1158 ; AVX1-NEXT: addq %rcx, %rax
1159 ; AVX1-NEXT: vzeroupper
1162 ; AVX2-LABEL: bitcast_v128i8_to_v2i64:
1164 ; AVX2-NEXT: vpmovmskb %ymm3, %eax
1165 ; AVX2-NEXT: shlq $32, %rax
1166 ; AVX2-NEXT: vpmovmskb %ymm2, %ecx
1167 ; AVX2-NEXT: orq %rax, %rcx
1168 ; AVX2-NEXT: vpmovmskb %ymm1, %edx
1169 ; AVX2-NEXT: shlq $32, %rdx
1170 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1171 ; AVX2-NEXT: orq %rdx, %rax
1172 ; AVX2-NEXT: addq %rcx, %rax
1173 ; AVX2-NEXT: vzeroupper
1176 ; AVX512-LABEL: bitcast_v128i8_to_v2i64:
1178 ; AVX512-NEXT: vpmovb2m %zmm1, %k0
1179 ; AVX512-NEXT: kmovq %k0, %rcx
1180 ; AVX512-NEXT: vpmovb2m %zmm0, %k0
1181 ; AVX512-NEXT: kmovq %k0, %rax
1182 ; AVX512-NEXT: addq %rcx, %rax
1183 ; AVX512-NEXT: vzeroupper
1185 %1 = icmp slt <128 x i8> %a0, zeroinitializer
1186 %2 = bitcast <128 x i1> %1 to <2 x i64>
1187 %3 = extractelement <2 x i64> %2, i32 0
1188 %4 = extractelement <2 x i64> %2, i32 1
1193 define i1 @trunc_v128i8_cmp(<128 x i8> %a0) nounwind {
1194 ; SSE2-SSSE3-LABEL: trunc_v128i8_cmp:
1195 ; SSE2-SSSE3: # %bb.0:
1196 ; SSE2-SSSE3-NEXT: psllw $7, %xmm0
1197 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
1198 ; SSE2-SSSE3-NEXT: psllw $7, %xmm1
1199 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
1200 ; SSE2-SSSE3-NEXT: shll $16, %ecx
1201 ; SSE2-SSSE3-NEXT: orl %eax, %ecx
1202 ; SSE2-SSSE3-NEXT: psllw $7, %xmm2
1203 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %edx
1204 ; SSE2-SSSE3-NEXT: psllw $7, %xmm3
1205 ; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %eax
1206 ; SSE2-SSSE3-NEXT: shll $16, %eax
1207 ; SSE2-SSSE3-NEXT: orl %edx, %eax
1208 ; SSE2-SSSE3-NEXT: shlq $32, %rax
1209 ; SSE2-SSSE3-NEXT: orq %rcx, %rax
1210 ; SSE2-SSSE3-NEXT: psllw $7, %xmm4
1211 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
1212 ; SSE2-SSSE3-NEXT: psllw $7, %xmm5
1213 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %edx
1214 ; SSE2-SSSE3-NEXT: shll $16, %edx
1215 ; SSE2-SSSE3-NEXT: orl %ecx, %edx
1216 ; SSE2-SSSE3-NEXT: psllw $7, %xmm6
1217 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %ecx
1218 ; SSE2-SSSE3-NEXT: psllw $7, %xmm7
1219 ; SSE2-SSSE3-NEXT: pmovmskb %xmm7, %esi
1220 ; SSE2-SSSE3-NEXT: shll $16, %esi
1221 ; SSE2-SSSE3-NEXT: orl %ecx, %esi
1222 ; SSE2-SSSE3-NEXT: shlq $32, %rsi
1223 ; SSE2-SSSE3-NEXT: orq %rdx, %rsi
1224 ; SSE2-SSSE3-NEXT: movq %rsi, %xmm0
1225 ; SSE2-SSSE3-NEXT: movq %rax, %xmm1
1226 ; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1227 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm0
1228 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
1229 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
1230 ; SSE2-SSSE3-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1231 ; SSE2-SSSE3-NEXT: setne %al
1232 ; SSE2-SSSE3-NEXT: retq
1234 ; SSE41-LABEL: trunc_v128i8_cmp:
1236 ; SSE41-NEXT: psllw $7, %xmm0
1237 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1238 ; SSE41-NEXT: psllw $7, %xmm1
1239 ; SSE41-NEXT: pmovmskb %xmm1, %ecx
1240 ; SSE41-NEXT: shll $16, %ecx
1241 ; SSE41-NEXT: orl %eax, %ecx
1242 ; SSE41-NEXT: psllw $7, %xmm2
1243 ; SSE41-NEXT: pmovmskb %xmm2, %edx
1244 ; SSE41-NEXT: psllw $7, %xmm3
1245 ; SSE41-NEXT: pmovmskb %xmm3, %eax
1246 ; SSE41-NEXT: shll $16, %eax
1247 ; SSE41-NEXT: orl %edx, %eax
1248 ; SSE41-NEXT: shlq $32, %rax
1249 ; SSE41-NEXT: orq %rcx, %rax
1250 ; SSE41-NEXT: psllw $7, %xmm4
1251 ; SSE41-NEXT: pmovmskb %xmm4, %ecx
1252 ; SSE41-NEXT: psllw $7, %xmm5
1253 ; SSE41-NEXT: pmovmskb %xmm5, %edx
1254 ; SSE41-NEXT: shll $16, %edx
1255 ; SSE41-NEXT: orl %ecx, %edx
1256 ; SSE41-NEXT: psllw $7, %xmm6
1257 ; SSE41-NEXT: pmovmskb %xmm6, %ecx
1258 ; SSE41-NEXT: psllw $7, %xmm7
1259 ; SSE41-NEXT: pmovmskb %xmm7, %esi
1260 ; SSE41-NEXT: shll $16, %esi
1261 ; SSE41-NEXT: orl %ecx, %esi
1262 ; SSE41-NEXT: shlq $32, %rsi
1263 ; SSE41-NEXT: orq %rdx, %rsi
1264 ; SSE41-NEXT: movq %rsi, %xmm0
1265 ; SSE41-NEXT: movq %rax, %xmm1
1266 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1267 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
1268 ; SSE41-NEXT: ptest %xmm0, %xmm1
1269 ; SSE41-NEXT: setae %al
1272 ; AVX1-LABEL: trunc_v128i8_cmp:
1274 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm4
1275 ; AVX1-NEXT: vpmovmskb %xmm4, %eax
1276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1277 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1278 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1279 ; AVX1-NEXT: shll $16, %ecx
1280 ; AVX1-NEXT: orl %eax, %ecx
1281 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
1282 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1283 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1284 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1285 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1286 ; AVX1-NEXT: shll $16, %eax
1287 ; AVX1-NEXT: orl %edx, %eax
1288 ; AVX1-NEXT: shlq $32, %rax
1289 ; AVX1-NEXT: orq %rcx, %rax
1290 ; AVX1-NEXT: vpsllw $7, %xmm2, %xmm0
1291 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1292 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0
1293 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1294 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1295 ; AVX1-NEXT: shll $16, %edx
1296 ; AVX1-NEXT: orl %ecx, %edx
1297 ; AVX1-NEXT: vpsllw $7, %xmm3, %xmm0
1298 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1299 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0
1300 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1301 ; AVX1-NEXT: vpmovmskb %xmm0, %esi
1302 ; AVX1-NEXT: shll $16, %esi
1303 ; AVX1-NEXT: orl %ecx, %esi
1304 ; AVX1-NEXT: shlq $32, %rsi
1305 ; AVX1-NEXT: orq %rdx, %rsi
1306 ; AVX1-NEXT: vmovq %rsi, %xmm0
1307 ; AVX1-NEXT: vmovq %rax, %xmm1
1308 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1309 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1310 ; AVX1-NEXT: vptest %xmm1, %xmm0
1311 ; AVX1-NEXT: setae %al
1312 ; AVX1-NEXT: vzeroupper
1315 ; AVX2-LABEL: trunc_v128i8_cmp:
1317 ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1318 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
1319 ; AVX2-NEXT: shlq $32, %rax
1320 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1321 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
1322 ; AVX2-NEXT: orq %rax, %rcx
1323 ; AVX2-NEXT: vpsllw $7, %ymm3, %ymm0
1324 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1325 ; AVX2-NEXT: shlq $32, %rax
1326 ; AVX2-NEXT: vpsllw $7, %ymm2, %ymm0
1327 ; AVX2-NEXT: vpmovmskb %ymm0, %edx
1328 ; AVX2-NEXT: orq %rax, %rdx
1329 ; AVX2-NEXT: vmovq %rdx, %xmm0
1330 ; AVX2-NEXT: vmovq %rcx, %xmm1
1331 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1332 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1333 ; AVX2-NEXT: vptest %xmm1, %xmm0
1334 ; AVX2-NEXT: setae %al
1335 ; AVX2-NEXT: vzeroupper
1338 ; AVX512-LABEL: trunc_v128i8_cmp:
1340 ; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0
1341 ; AVX512-NEXT: vpmovb2m %zmm0, %k0
1342 ; AVX512-NEXT: kmovq %k0, %rax
1343 ; AVX512-NEXT: vpsllw $7, %zmm1, %zmm0
1344 ; AVX512-NEXT: vpmovb2m %zmm0, %k0
1345 ; AVX512-NEXT: kmovq %k0, %rcx
1346 ; AVX512-NEXT: vmovq %rcx, %xmm0
1347 ; AVX512-NEXT: vmovq %rax, %xmm1
1348 ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1349 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1350 ; AVX512-NEXT: vptest %xmm1, %xmm0
1351 ; AVX512-NEXT: setae %al
1352 ; AVX512-NEXT: vzeroupper
1354 %1 = trunc <128 x i8> %a0 to <128 x i1>
1355 %2 = bitcast <128 x i1> %1 to i128
1356 %3 = icmp ne i128 %2, -1
1360 define [2 x i8] @PR58546(<16 x float> %a0) {
1361 ; SSE-LABEL: PR58546:
1363 ; SSE-NEXT: xorps %xmm4, %xmm4
1364 ; SSE-NEXT: cmpunordps %xmm4, %xmm3
1365 ; SSE-NEXT: cmpunordps %xmm4, %xmm2
1366 ; SSE-NEXT: packssdw %xmm3, %xmm2
1367 ; SSE-NEXT: cmpunordps %xmm4, %xmm1
1368 ; SSE-NEXT: cmpunordps %xmm4, %xmm0
1369 ; SSE-NEXT: packssdw %xmm1, %xmm0
1370 ; SSE-NEXT: packsswb %xmm2, %xmm0
1371 ; SSE-NEXT: pmovmskb %xmm0, %eax
1372 ; SSE-NEXT: movl %eax, %edx
1373 ; SSE-NEXT: shrl $8, %edx
1374 ; SSE-NEXT: # kill: def $al killed $al killed $eax
1375 ; SSE-NEXT: # kill: def $dl killed $dl killed $edx
1378 ; AVX1-LABEL: PR58546:
1380 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
1381 ; AVX1-NEXT: vcmpunordps %ymm2, %ymm1, %ymm1
1382 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1383 ; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
1384 ; AVX1-NEXT: vcmpunordps %ymm2, %ymm0, %ymm0
1385 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1386 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1387 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1388 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1389 ; AVX1-NEXT: movl %eax, %edx
1390 ; AVX1-NEXT: shrl $8, %edx
1391 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
1392 ; AVX1-NEXT: # kill: def $dl killed $dl killed $edx
1393 ; AVX1-NEXT: vzeroupper
1396 ; AVX2-LABEL: PR58546:
1398 ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
1399 ; AVX2-NEXT: vcmpunordps %ymm2, %ymm1, %ymm1
1400 ; AVX2-NEXT: vcmpunordps %ymm2, %ymm0, %ymm0
1401 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1402 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1403 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1404 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1405 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1406 ; AVX2-NEXT: movl %eax, %edx
1407 ; AVX2-NEXT: shrl $8, %edx
1408 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
1409 ; AVX2-NEXT: # kill: def $dl killed $dl killed $edx
1410 ; AVX2-NEXT: vzeroupper
1413 ; AVX512-LABEL: PR58546:
1415 ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
1416 ; AVX512-NEXT: vcmpunordps %zmm1, %zmm0, %k0
1417 ; AVX512-NEXT: kshiftrw $8, %k0, %k1
1418 ; AVX512-NEXT: kmovd %k0, %eax
1419 ; AVX512-NEXT: kmovd %k1, %edx
1420 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
1421 ; AVX512-NEXT: # kill: def $dl killed $dl killed $edx
1422 ; AVX512-NEXT: vzeroupper
1424 %1 = fcmp uno <16 x float> %a0, zeroinitializer
1425 %2 = bitcast <16 x i1> %1 to <2 x i8>
1426 %3 = extractelement <2 x i8> %2, i64 0
1427 %4 = extractelement <2 x i8> %2, i64 1
1428 %5 = insertvalue [2 x i8] poison, i8 %3, 0
1429 %6 = insertvalue [2 x i8] %5, i8 %4, 1
1433 define i8 @PR59526(<8 x i32> %a, <8 x i32> %b, ptr %mask) {
1434 ; SSE-LABEL: PR59526:
1436 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
1437 ; SSE-NEXT: pcmpeqd %xmm3, %xmm1
1438 ; SSE-NEXT: movdqu (%rdi), %xmm2
1439 ; SSE-NEXT: pand %xmm0, %xmm2
1440 ; SSE-NEXT: movdqu 16(%rdi), %xmm0
1441 ; SSE-NEXT: pand %xmm1, %xmm0
1442 ; SSE-NEXT: packssdw %xmm0, %xmm2
1443 ; SSE-NEXT: pmovmskb %xmm2, %eax
1444 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
1445 ; SSE-NEXT: setne %al
1448 ; AVX1-LABEL: PR59526:
1450 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1451 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1452 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
1453 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1454 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1455 ; AVX1-NEXT: vtestps (%rdi), %ymm0
1456 ; AVX1-NEXT: setne %al
1457 ; AVX1-NEXT: vzeroupper
1460 ; AVX2-LABEL: PR59526:
1462 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1463 ; AVX2-NEXT: vtestps (%rdi), %ymm0
1464 ; AVX2-NEXT: setne %al
1465 ; AVX2-NEXT: vzeroupper
1468 ; AVX512-LABEL: PR59526:
1470 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
1471 ; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0
1472 ; AVX512-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 {%k1}
1473 ; AVX512-NEXT: kmovd %k0, %eax
1474 ; AVX512-NEXT: testb %al, %al
1475 ; AVX512-NEXT: setne %al
1476 ; AVX512-NEXT: vzeroupper
1478 %cmp.eq = icmp eq <8 x i32> %a, %b
1479 %load = load <8 x i32>, ptr %mask, align 1
1480 %cmp.slt = icmp slt <8 x i32> %load, zeroinitializer
1481 %sel = select <8 x i1> %cmp.eq, <8 x i1> %cmp.slt, <8 x i1> zeroinitializer
1482 %bc = bitcast <8 x i1> %sel to i8
1483 %cmp = icmp ne i8 %bc, 0
1484 %conv = zext i1 %cmp to i8