1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
9 define i1 @allones_v16i8_sign(<16 x i8> %arg) {
10 ; SSE-LABEL: allones_v16i8_sign:
12 ; SSE-NEXT: pmovmskb %xmm0, %eax
13 ; SSE-NEXT: cmpw $-1, %ax
17 ; AVX-LABEL: allones_v16i8_sign:
19 ; AVX-NEXT: vpmovmskb %xmm0, %eax
20 ; AVX-NEXT: cmpw $-1, %ax
23 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
24 %tmp1 = bitcast <16 x i1> %tmp to i16
25 %tmp2 = icmp eq i16 %tmp1, -1
29 define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
30 ; SSE-LABEL: allzeros_v16i8_sign:
32 ; SSE-NEXT: pmovmskb %xmm0, %eax
33 ; SSE-NEXT: testl %eax, %eax
37 ; AVX-LABEL: allzeros_v16i8_sign:
39 ; AVX-NEXT: vpmovmskb %xmm0, %eax
40 ; AVX-NEXT: testl %eax, %eax
43 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
44 %tmp1 = bitcast <16 x i1> %tmp to i16
45 %tmp2 = icmp eq i16 %tmp1, 0
49 define i1 @allones_v32i8_sign(<32 x i8> %arg) {
50 ; SSE-LABEL: allones_v32i8_sign:
52 ; SSE-NEXT: pand %xmm1, %xmm0
53 ; SSE-NEXT: pmovmskb %xmm0, %eax
54 ; SSE-NEXT: cmpw $-1, %ax
58 ; AVX1-LABEL: allones_v32i8_sign:
60 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
61 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
62 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
63 ; AVX1-NEXT: cmpw $-1, %ax
65 ; AVX1-NEXT: vzeroupper
68 ; AVX2-LABEL: allones_v32i8_sign:
70 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
71 ; AVX2-NEXT: cmpl $-1, %eax
73 ; AVX2-NEXT: vzeroupper
76 ; AVX512-LABEL: allones_v32i8_sign:
78 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
79 ; AVX512-NEXT: cmpl $-1, %eax
80 ; AVX512-NEXT: sete %al
81 ; AVX512-NEXT: vzeroupper
83 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
84 %tmp1 = bitcast <32 x i1> %tmp to i32
85 %tmp2 = icmp eq i32 %tmp1, -1
89 define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
90 ; SSE-LABEL: allzeros_v32i8_sign:
92 ; SSE-NEXT: por %xmm1, %xmm0
93 ; SSE-NEXT: pmovmskb %xmm0, %eax
94 ; SSE-NEXT: testl %eax, %eax
98 ; AVX1-LABEL: allzeros_v32i8_sign:
100 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
101 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
102 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
103 ; AVX1-NEXT: testl %eax, %eax
104 ; AVX1-NEXT: sete %al
105 ; AVX1-NEXT: vzeroupper
108 ; AVX2-LABEL: allzeros_v32i8_sign:
110 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
111 ; AVX2-NEXT: testl %eax, %eax
112 ; AVX2-NEXT: sete %al
113 ; AVX2-NEXT: vzeroupper
116 ; AVX512-LABEL: allzeros_v32i8_sign:
118 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
119 ; AVX512-NEXT: testl %eax, %eax
120 ; AVX512-NEXT: sete %al
121 ; AVX512-NEXT: vzeroupper
123 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
124 %tmp1 = bitcast <32 x i1> %tmp to i32
125 %tmp2 = icmp eq i32 %tmp1, 0
129 define i1 @allones_v64i8_sign(<64 x i8> %arg) {
130 ; SSE-LABEL: allones_v64i8_sign:
132 ; SSE-NEXT: pand %xmm2, %xmm0
133 ; SSE-NEXT: pand %xmm1, %xmm0
134 ; SSE-NEXT: pand %xmm3, %xmm0
135 ; SSE-NEXT: pmovmskb %xmm0, %eax
136 ; SSE-NEXT: cmpw $-1, %ax
140 ; AVX1-LABEL: allones_v64i8_sign:
142 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
143 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
144 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
145 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
146 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
147 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
148 ; AVX1-NEXT: cmpw $-1, %ax
149 ; AVX1-NEXT: sete %al
150 ; AVX1-NEXT: vzeroupper
153 ; AVX2-LABEL: allones_v64i8_sign:
155 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
156 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
157 ; AVX2-NEXT: cmpl $-1, %eax
158 ; AVX2-NEXT: sete %al
159 ; AVX2-NEXT: vzeroupper
162 ; KNL-LABEL: allones_v64i8_sign:
164 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
165 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
166 ; KNL-NEXT: vpmovmskb %ymm0, %eax
167 ; KNL-NEXT: cmpl $-1, %eax
169 ; KNL-NEXT: vzeroupper
172 ; SKX-LABEL: allones_v64i8_sign:
174 ; SKX-NEXT: vpmovb2m %zmm0, %k0
175 ; SKX-NEXT: kortestq %k0, %k0
177 ; SKX-NEXT: vzeroupper
179 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
180 %tmp1 = bitcast <64 x i1> %tmp to i64
181 %tmp2 = icmp eq i64 %tmp1, -1
185 define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
186 ; SSE-LABEL: allzeros_v64i8_sign:
188 ; SSE-NEXT: por %xmm3, %xmm1
189 ; SSE-NEXT: por %xmm2, %xmm1
190 ; SSE-NEXT: por %xmm0, %xmm1
191 ; SSE-NEXT: pmovmskb %xmm1, %eax
192 ; SSE-NEXT: testl %eax, %eax
196 ; AVX1-LABEL: allzeros_v64i8_sign:
198 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
199 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
200 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
201 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
202 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
203 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
204 ; AVX1-NEXT: testl %eax, %eax
205 ; AVX1-NEXT: sete %al
206 ; AVX1-NEXT: vzeroupper
209 ; AVX2-LABEL: allzeros_v64i8_sign:
211 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
212 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
213 ; AVX2-NEXT: testl %eax, %eax
214 ; AVX2-NEXT: sete %al
215 ; AVX2-NEXT: vzeroupper
218 ; KNL-LABEL: allzeros_v64i8_sign:
220 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
221 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
222 ; KNL-NEXT: vpmovmskb %ymm0, %eax
223 ; KNL-NEXT: testl %eax, %eax
225 ; KNL-NEXT: vzeroupper
228 ; SKX-LABEL: allzeros_v64i8_sign:
230 ; SKX-NEXT: vpmovb2m %zmm0, %k0
231 ; SKX-NEXT: kortestq %k0, %k0
233 ; SKX-NEXT: vzeroupper
235 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
236 %tmp1 = bitcast <64 x i1> %tmp to i64
237 %tmp2 = icmp eq i64 %tmp1, 0
241 define i1 @allones_v8i16_sign(<8 x i16> %arg) {
242 ; SSE-LABEL: allones_v8i16_sign:
244 ; SSE-NEXT: packsswb %xmm0, %xmm0
245 ; SSE-NEXT: pmovmskb %xmm0, %eax
246 ; SSE-NEXT: cmpb $-1, %al
250 ; AVX1OR2-LABEL: allones_v8i16_sign:
252 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
253 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
254 ; AVX1OR2-NEXT: cmpb $-1, %al
255 ; AVX1OR2-NEXT: sete %al
258 ; KNL-LABEL: allones_v8i16_sign:
260 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
261 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
262 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
263 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
264 ; KNL-NEXT: kmovw %k0, %eax
265 ; KNL-NEXT: cmpb $-1, %al
267 ; KNL-NEXT: vzeroupper
270 ; SKX-LABEL: allones_v8i16_sign:
272 ; SKX-NEXT: vpmovw2m %xmm0, %k0
273 ; SKX-NEXT: kortestb %k0, %k0
276 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
277 %tmp1 = bitcast <8 x i1> %tmp to i8
278 %tmp2 = icmp eq i8 %tmp1, -1
282 define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
283 ; SSE-LABEL: allzeros_v8i16_sign:
285 ; SSE-NEXT: pmovmskb %xmm0, %eax
286 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
290 ; AVX1OR2-LABEL: allzeros_v8i16_sign:
292 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
293 ; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA
294 ; AVX1OR2-NEXT: sete %al
297 ; KNL-LABEL: allzeros_v8i16_sign:
299 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
300 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
301 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
302 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
303 ; KNL-NEXT: kmovw %k0, %eax
304 ; KNL-NEXT: testb %al, %al
306 ; KNL-NEXT: vzeroupper
309 ; SKX-LABEL: allzeros_v8i16_sign:
311 ; SKX-NEXT: vpmovw2m %xmm0, %k0
312 ; SKX-NEXT: kortestb %k0, %k0
315 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
316 %tmp1 = bitcast <8 x i1> %tmp to i8
317 %tmp2 = icmp eq i8 %tmp1, 0
321 define i1 @allones_v16i16_sign(<16 x i16> %arg) {
322 ; SSE-LABEL: allones_v16i16_sign:
324 ; SSE-NEXT: packsswb %xmm1, %xmm0
325 ; SSE-NEXT: pmovmskb %xmm0, %eax
326 ; SSE-NEXT: cmpw $-1, %ax
330 ; AVX1-LABEL: allones_v16i16_sign:
332 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
333 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
334 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
335 ; AVX1-NEXT: cmpw $-1, %ax
336 ; AVX1-NEXT: sete %al
337 ; AVX1-NEXT: vzeroupper
340 ; AVX2-LABEL: allones_v16i16_sign:
342 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
343 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
344 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
345 ; AVX2-NEXT: cmpw $-1, %ax
346 ; AVX2-NEXT: sete %al
347 ; AVX2-NEXT: vzeroupper
350 ; KNL-LABEL: allones_v16i16_sign:
352 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
353 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
354 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
355 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
356 ; KNL-NEXT: kortestw %k0, %k0
358 ; KNL-NEXT: vzeroupper
361 ; SKX-LABEL: allones_v16i16_sign:
363 ; SKX-NEXT: vpmovw2m %ymm0, %k0
364 ; SKX-NEXT: kortestw %k0, %k0
366 ; SKX-NEXT: vzeroupper
368 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
369 %tmp1 = bitcast <16 x i1> %tmp to i16
370 %tmp2 = icmp eq i16 %tmp1, -1
374 define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
375 ; SSE-LABEL: allzeros_v16i16_sign:
377 ; SSE-NEXT: packsswb %xmm1, %xmm0
378 ; SSE-NEXT: pmovmskb %xmm0, %eax
379 ; SSE-NEXT: testl %eax, %eax
383 ; AVX1-LABEL: allzeros_v16i16_sign:
385 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
386 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
387 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
388 ; AVX1-NEXT: testl %eax, %eax
389 ; AVX1-NEXT: sete %al
390 ; AVX1-NEXT: vzeroupper
393 ; AVX2-LABEL: allzeros_v16i16_sign:
395 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
396 ; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
397 ; AVX2-NEXT: sete %al
398 ; AVX2-NEXT: vzeroupper
401 ; KNL-LABEL: allzeros_v16i16_sign:
403 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
404 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
405 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
406 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
407 ; KNL-NEXT: kortestw %k0, %k0
409 ; KNL-NEXT: vzeroupper
412 ; SKX-LABEL: allzeros_v16i16_sign:
414 ; SKX-NEXT: vpmovw2m %ymm0, %k0
415 ; SKX-NEXT: kortestw %k0, %k0
417 ; SKX-NEXT: vzeroupper
419 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
420 %tmp1 = bitcast <16 x i1> %tmp to i16
421 %tmp2 = icmp eq i16 %tmp1, 0
425 define i1 @allones_v32i16_sign(<32 x i16> %arg) {
426 ; SSE-LABEL: allones_v32i16_sign:
428 ; SSE-NEXT: packsswb %xmm1, %xmm0
429 ; SSE-NEXT: packsswb %xmm3, %xmm2
430 ; SSE-NEXT: pand %xmm0, %xmm2
431 ; SSE-NEXT: pmovmskb %xmm2, %eax
432 ; SSE-NEXT: cmpw $-1, %ax
436 ; AVX1-LABEL: allones_v32i16_sign:
438 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
439 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
440 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
441 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
442 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
443 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
444 ; AVX1-NEXT: cmpw $-1, %ax
445 ; AVX1-NEXT: sete %al
446 ; AVX1-NEXT: vzeroupper
449 ; AVX2-LABEL: allones_v32i16_sign:
451 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
452 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
453 ; AVX2-NEXT: cmpl $-1, %eax
454 ; AVX2-NEXT: sete %al
455 ; AVX2-NEXT: vzeroupper
458 ; KNL-LABEL: allones_v32i16_sign:
460 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
461 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
462 ; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
463 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
464 ; KNL-NEXT: kmovw %k0, %eax
465 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
466 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
467 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
468 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
469 ; KNL-NEXT: kmovw %k0, %ecx
470 ; KNL-NEXT: andl %eax, %ecx
471 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
473 ; KNL-NEXT: vzeroupper
476 ; SKX-LABEL: allones_v32i16_sign:
478 ; SKX-NEXT: vpmovw2m %zmm0, %k0
479 ; SKX-NEXT: kortestd %k0, %k0
481 ; SKX-NEXT: vzeroupper
483 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
484 %tmp1 = bitcast <32 x i1> %tmp to i32
485 %tmp2 = icmp eq i32 %tmp1, -1
489 define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
490 ; SSE-LABEL: allzeros_v32i16_sign:
492 ; SSE-NEXT: packsswb %xmm3, %xmm2
493 ; SSE-NEXT: packsswb %xmm1, %xmm0
494 ; SSE-NEXT: por %xmm2, %xmm0
495 ; SSE-NEXT: pmovmskb %xmm0, %eax
496 ; SSE-NEXT: testl %eax, %eax
500 ; AVX1-LABEL: allzeros_v32i16_sign:
502 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
503 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
504 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
505 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
506 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
507 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
508 ; AVX1-NEXT: testl %eax, %eax
509 ; AVX1-NEXT: sete %al
510 ; AVX1-NEXT: vzeroupper
513 ; AVX2-LABEL: allzeros_v32i16_sign:
515 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
516 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
517 ; AVX2-NEXT: testl %eax, %eax
518 ; AVX2-NEXT: sete %al
519 ; AVX2-NEXT: vzeroupper
522 ; KNL-LABEL: allzeros_v32i16_sign:
524 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
525 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
526 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
527 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
528 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
529 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
530 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
531 ; KNL-NEXT: kortestw %k0, %k0
533 ; KNL-NEXT: vzeroupper
536 ; SKX-LABEL: allzeros_v32i16_sign:
538 ; SKX-NEXT: vpmovw2m %zmm0, %k0
539 ; SKX-NEXT: kortestd %k0, %k0
541 ; SKX-NEXT: vzeroupper
543 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
544 %tmp1 = bitcast <32 x i1> %tmp to i32
545 %tmp2 = icmp eq i32 %tmp1, 0
549 define i1 @allones_v4i32_sign(<4 x i32> %arg) {
550 ; SSE-LABEL: allones_v4i32_sign:
552 ; SSE-NEXT: movmskps %xmm0, %eax
553 ; SSE-NEXT: cmpb $15, %al
557 ; AVX-LABEL: allones_v4i32_sign:
559 ; AVX-NEXT: vmovmskps %xmm0, %eax
560 ; AVX-NEXT: cmpb $15, %al
563 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
564 %tmp1 = bitcast <4 x i1> %tmp to i4
565 %tmp2 = icmp eq i4 %tmp1, -1
569 define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
570 ; SSE-LABEL: allzeros_v4i32_sign:
572 ; SSE-NEXT: movmskps %xmm0, %eax
573 ; SSE-NEXT: testl %eax, %eax
577 ; AVX-LABEL: allzeros_v4i32_sign:
579 ; AVX-NEXT: vmovmskps %xmm0, %eax
580 ; AVX-NEXT: testl %eax, %eax
583 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
584 %tmp1 = bitcast <4 x i1> %tmp to i4
585 %tmp2 = icmp eq i4 %tmp1, 0
589 define i1 @allones_v8i32_sign(<8 x i32> %arg) {
590 ; SSE-LABEL: allones_v8i32_sign:
592 ; SSE-NEXT: packssdw %xmm1, %xmm0
593 ; SSE-NEXT: packsswb %xmm0, %xmm0
594 ; SSE-NEXT: pmovmskb %xmm0, %eax
595 ; SSE-NEXT: cmpb $-1, %al
599 ; AVX-LABEL: allones_v8i32_sign:
601 ; AVX-NEXT: vmovmskps %ymm0, %eax
602 ; AVX-NEXT: cmpb $-1, %al
604 ; AVX-NEXT: vzeroupper
606 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
607 %tmp1 = bitcast <8 x i1> %tmp to i8
608 %tmp2 = icmp eq i8 %tmp1, -1
612 define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
613 ; SSE-LABEL: allzeros_v8i32_sign:
615 ; SSE-NEXT: packssdw %xmm1, %xmm0
616 ; SSE-NEXT: pmovmskb %xmm0, %eax
617 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
621 ; AVX-LABEL: allzeros_v8i32_sign:
623 ; AVX-NEXT: vmovmskps %ymm0, %eax
624 ; AVX-NEXT: testl %eax, %eax
626 ; AVX-NEXT: vzeroupper
628 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
629 %tmp1 = bitcast <8 x i1> %tmp to i8
630 %tmp2 = icmp eq i8 %tmp1, 0
634 define i1 @allones_v16i32_sign(<16 x i32> %arg) {
635 ; SSE-LABEL: allones_v16i32_sign:
637 ; SSE-NEXT: packssdw %xmm3, %xmm2
638 ; SSE-NEXT: packssdw %xmm1, %xmm0
639 ; SSE-NEXT: packsswb %xmm2, %xmm0
640 ; SSE-NEXT: pmovmskb %xmm0, %eax
641 ; SSE-NEXT: cmpw $-1, %ax
645 ; AVX1-LABEL: allones_v16i32_sign:
647 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
648 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
649 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
650 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
651 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
652 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
653 ; AVX1-NEXT: cmpw $-1, %ax
654 ; AVX1-NEXT: sete %al
655 ; AVX1-NEXT: vzeroupper
658 ; AVX2-LABEL: allones_v16i32_sign:
660 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
661 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
662 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
663 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
664 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
665 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
666 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
667 ; AVX2-NEXT: cmpw $-1, %ax
668 ; AVX2-NEXT: sete %al
669 ; AVX2-NEXT: vzeroupper
672 ; KNL-LABEL: allones_v16i32_sign:
674 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
675 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
676 ; KNL-NEXT: kortestw %k0, %k0
678 ; KNL-NEXT: vzeroupper
681 ; SKX-LABEL: allones_v16i32_sign:
683 ; SKX-NEXT: vpmovd2m %zmm0, %k0
684 ; SKX-NEXT: kortestw %k0, %k0
686 ; SKX-NEXT: vzeroupper
688 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
689 %tmp1 = bitcast <16 x i1> %tmp to i16
690 %tmp2 = icmp eq i16 %tmp1, -1
694 define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
695 ; SSE-LABEL: allzeros_v16i32_sign:
697 ; SSE-NEXT: packssdw %xmm3, %xmm2
698 ; SSE-NEXT: packssdw %xmm1, %xmm0
699 ; SSE-NEXT: packsswb %xmm2, %xmm0
700 ; SSE-NEXT: pmovmskb %xmm0, %eax
701 ; SSE-NEXT: testl %eax, %eax
705 ; AVX1-LABEL: allzeros_v16i32_sign:
707 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
708 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
709 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
710 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
711 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
712 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
713 ; AVX1-NEXT: testl %eax, %eax
714 ; AVX1-NEXT: sete %al
715 ; AVX1-NEXT: vzeroupper
718 ; AVX2-LABEL: allzeros_v16i32_sign:
720 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
721 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
722 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
723 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
724 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
725 ; AVX2-NEXT: testl %eax, %eax
726 ; AVX2-NEXT: sete %al
727 ; AVX2-NEXT: vzeroupper
730 ; KNL-LABEL: allzeros_v16i32_sign:
732 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
733 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
734 ; KNL-NEXT: kortestw %k0, %k0
736 ; KNL-NEXT: vzeroupper
739 ; SKX-LABEL: allzeros_v16i32_sign:
741 ; SKX-NEXT: vpmovd2m %zmm0, %k0
742 ; SKX-NEXT: kortestw %k0, %k0
744 ; SKX-NEXT: vzeroupper
746 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
747 %tmp1 = bitcast <16 x i1> %tmp to i16
748 %tmp2 = icmp eq i16 %tmp1, 0
752 define i1 @allones_v4i64_sign(<4 x i64> %arg) {
753 ; SSE-LABEL: allones_v4i64_sign:
755 ; SSE-NEXT: packssdw %xmm1, %xmm0
756 ; SSE-NEXT: movmskps %xmm0, %eax
757 ; SSE-NEXT: cmpb $15, %al
761 ; AVX-LABEL: allones_v4i64_sign:
763 ; AVX-NEXT: vmovmskpd %ymm0, %eax
764 ; AVX-NEXT: cmpb $15, %al
766 ; AVX-NEXT: vzeroupper
768 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
769 %tmp1 = bitcast <4 x i1> %tmp to i4
770 %tmp2 = icmp eq i4 %tmp1, -1
774 define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
775 ; SSE-LABEL: allzeros_v4i64_sign:
777 ; SSE-NEXT: packssdw %xmm1, %xmm0
778 ; SSE-NEXT: movmskps %xmm0, %eax
779 ; SSE-NEXT: testl %eax, %eax
783 ; AVX-LABEL: allzeros_v4i64_sign:
785 ; AVX-NEXT: vmovmskpd %ymm0, %eax
786 ; AVX-NEXT: testl %eax, %eax
788 ; AVX-NEXT: vzeroupper
790 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
791 %tmp1 = bitcast <4 x i1> %tmp to i4
792 %tmp2 = icmp eq i4 %tmp1, 0
796 define i1 @allones_v8i64_sign(<8 x i64> %arg) {
797 ; SSE-LABEL: allones_v8i64_sign:
799 ; SSE-NEXT: packssdw %xmm3, %xmm2
800 ; SSE-NEXT: packssdw %xmm1, %xmm0
801 ; SSE-NEXT: packssdw %xmm2, %xmm0
802 ; SSE-NEXT: packsswb %xmm0, %xmm0
803 ; SSE-NEXT: pmovmskb %xmm0, %eax
804 ; SSE-NEXT: cmpb $-1, %al
808 ; AVX1-LABEL: allones_v8i64_sign:
810 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
811 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
812 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
813 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
814 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
815 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
816 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
817 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
818 ; AVX1-NEXT: vmovmskps %ymm0, %eax
819 ; AVX1-NEXT: cmpb $-1, %al
820 ; AVX1-NEXT: sete %al
821 ; AVX1-NEXT: vzeroupper
824 ; AVX2-LABEL: allones_v8i64_sign:
826 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
827 ; AVX2-NEXT: vmovmskps %ymm0, %eax
828 ; AVX2-NEXT: cmpb $-1, %al
829 ; AVX2-NEXT: sete %al
830 ; AVX2-NEXT: vzeroupper
833 ; KNL-LABEL: allones_v8i64_sign:
835 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
836 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
837 ; KNL-NEXT: kmovw %k0, %eax
838 ; KNL-NEXT: cmpb $-1, %al
840 ; KNL-NEXT: vzeroupper
843 ; SKX-LABEL: allones_v8i64_sign:
845 ; SKX-NEXT: vpmovq2m %zmm0, %k0
846 ; SKX-NEXT: kortestb %k0, %k0
848 ; SKX-NEXT: vzeroupper
850 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
851 %tmp1 = bitcast <8 x i1> %tmp to i8
852 %tmp2 = icmp eq i8 %tmp1, -1
856 define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
857 ; SSE-LABEL: allzeros_v8i64_sign:
859 ; SSE-NEXT: packssdw %xmm3, %xmm2
860 ; SSE-NEXT: packssdw %xmm1, %xmm0
861 ; SSE-NEXT: packssdw %xmm2, %xmm0
862 ; SSE-NEXT: pmovmskb %xmm0, %eax
863 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
867 ; AVX1-LABEL: allzeros_v8i64_sign:
869 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
870 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
871 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
872 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
873 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
874 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
875 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
876 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
877 ; AVX1-NEXT: vmovmskps %ymm0, %eax
878 ; AVX1-NEXT: testl %eax, %eax
879 ; AVX1-NEXT: sete %al
880 ; AVX1-NEXT: vzeroupper
883 ; AVX2-LABEL: allzeros_v8i64_sign:
885 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
886 ; AVX2-NEXT: vmovmskps %ymm0, %eax
887 ; AVX2-NEXT: testl %eax, %eax
888 ; AVX2-NEXT: sete %al
889 ; AVX2-NEXT: vzeroupper
892 ; KNL-LABEL: allzeros_v8i64_sign:
894 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
895 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
896 ; KNL-NEXT: kmovw %k0, %eax
897 ; KNL-NEXT: testb %al, %al
899 ; KNL-NEXT: vzeroupper
902 ; SKX-LABEL: allzeros_v8i64_sign:
904 ; SKX-NEXT: vpmovq2m %zmm0, %k0
905 ; SKX-NEXT: kortestb %k0, %k0
907 ; SKX-NEXT: vzeroupper
909 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
910 %tmp1 = bitcast <8 x i1> %tmp to i8
911 %tmp2 = icmp eq i8 %tmp1, 0
915 define i1 @allones_v16i8_and1(<16 x i8> %arg) {
916 ; SSE-LABEL: allones_v16i8_and1:
918 ; SSE-NEXT: psllw $7, %xmm0
919 ; SSE-NEXT: pmovmskb %xmm0, %eax
920 ; SSE-NEXT: cmpw $-1, %ax
924 ; AVX1OR2-LABEL: allones_v16i8_and1:
926 ; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0
927 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
928 ; AVX1OR2-NEXT: cmpw $-1, %ax
929 ; AVX1OR2-NEXT: sete %al
932 ; KNL-LABEL: allones_v16i8_and1:
934 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
935 ; KNL-NEXT: vpmovmskb %xmm0, %eax
936 ; KNL-NEXT: cmpw $-1, %ax
940 ; SKX-LABEL: allones_v16i8_and1:
942 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
943 ; SKX-NEXT: kortestw %k0, %k0
946 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
947 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
948 %tmp2 = bitcast <16 x i1> %tmp1 to i16
949 %tmp3 = icmp eq i16 %tmp2, -1
953 define i1 @allzeros_v16i8_not(<16 x i8> %a0) {
954 ; SSE2-LABEL: allzeros_v16i8_not:
956 ; SSE2-NEXT: pxor %xmm1, %xmm1
957 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
958 ; SSE2-NEXT: pmovmskb %xmm1, %eax
959 ; SSE2-NEXT: cmpw $-1, %ax
960 ; SSE2-NEXT: setne %al
963 ; SSE41-LABEL: allzeros_v16i8_not:
965 ; SSE41-NEXT: ptest %xmm0, %xmm0
966 ; SSE41-NEXT: setne %al
969 ; AVX1OR2-LABEL: allzeros_v16i8_not:
971 ; AVX1OR2-NEXT: vptest %xmm0, %xmm0
972 ; AVX1OR2-NEXT: setne %al
975 ; KNL-LABEL: allzeros_v16i8_not:
977 ; KNL-NEXT: vptest %xmm0, %xmm0
978 ; KNL-NEXT: setne %al
981 ; SKX-LABEL: allzeros_v16i8_not:
983 ; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k0
984 ; SKX-NEXT: kortestw %k0, %k0
985 ; SKX-NEXT: setae %al
987 %1 = icmp eq <16 x i8> %a0, zeroinitializer
988 %2 = bitcast <16 x i1> %1 to i16
989 %3 = icmp ne i16 %2, -1
993 define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
994 ; SSE2-LABEL: allzeros_v2i64_not:
996 ; SSE2-NEXT: pxor %xmm1, %xmm1
997 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
998 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
999 ; SSE2-NEXT: pand %xmm1, %xmm0
1000 ; SSE2-NEXT: movmskpd %xmm0, %eax
1001 ; SSE2-NEXT: cmpb $3, %al
1002 ; SSE2-NEXT: setne %al
1005 ; SSE41-LABEL: allzeros_v2i64_not:
1007 ; SSE41-NEXT: ptest %xmm0, %xmm0
1008 ; SSE41-NEXT: setne %al
1011 ; AVX1OR2-LABEL: allzeros_v2i64_not:
1013 ; AVX1OR2-NEXT: vptest %xmm0, %xmm0
1014 ; AVX1OR2-NEXT: setne %al
1015 ; AVX1OR2-NEXT: retq
1017 ; KNL-LABEL: allzeros_v2i64_not:
1019 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1020 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1021 ; KNL-NEXT: kmovw %k0, %eax
1022 ; KNL-NEXT: testb $3, %al
1023 ; KNL-NEXT: setne %al
1024 ; KNL-NEXT: vzeroupper
1027 ; SKX-LABEL: allzeros_v2i64_not:
1029 ; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0
1030 ; SKX-NEXT: kmovd %k0, %eax
1031 ; SKX-NEXT: cmpb $3, %al
1032 ; SKX-NEXT: setne %al
1034 %1 = icmp eq <2 x i64> %a0, zeroinitializer
1035 %2 = bitcast <2 x i1> %1 to i2
1036 %3 = icmp ne i2 %2, -1
1040 define i1 @allzeros_v8i32_not(<8 x i32> %a0) {
1041 ; SSE-LABEL: allzeros_v8i32_not:
1043 ; SSE-NEXT: pxor %xmm2, %xmm2
1044 ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
1045 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
1046 ; SSE-NEXT: packssdw %xmm1, %xmm0
1047 ; SSE-NEXT: packsswb %xmm0, %xmm0
1048 ; SSE-NEXT: pmovmskb %xmm0, %eax
1049 ; SSE-NEXT: cmpb $-1, %al
1050 ; SSE-NEXT: setne %al
1053 ; AVX1-LABEL: allzeros_v8i32_not:
1055 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1056 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1057 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
1058 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1059 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1060 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1061 ; AVX1-NEXT: cmpb $-1, %al
1062 ; AVX1-NEXT: setne %al
1063 ; AVX1-NEXT: vzeroupper
1066 ; AVX2-LABEL: allzeros_v8i32_not:
1068 ; AVX2-NEXT: vptest %ymm0, %ymm0
1069 ; AVX2-NEXT: setne %al
1070 ; AVX2-NEXT: vzeroupper
1073 ; KNL-LABEL: allzeros_v8i32_not:
1075 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1076 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1077 ; KNL-NEXT: kmovw %k0, %eax
1078 ; KNL-NEXT: cmpb $-1, %al
1079 ; KNL-NEXT: setne %al
1080 ; KNL-NEXT: vzeroupper
1083 ; SKX-LABEL: allzeros_v8i32_not:
1085 ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
1086 ; SKX-NEXT: kortestb %k0, %k0
1087 ; SKX-NEXT: setae %al
1088 ; SKX-NEXT: vzeroupper
1090 %1 = icmp eq <8 x i32> %a0, zeroinitializer
1091 %2 = bitcast <8 x i1> %1 to i8
1092 %3 = icmp ne i8 %2, -1
1096 define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
1097 ; SSE2-LABEL: allzeros_v8i64_not:
1099 ; SSE2-NEXT: pxor %xmm4, %xmm4
1100 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1101 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
1102 ; SSE2-NEXT: pand %xmm3, %xmm5
1103 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1104 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
1105 ; SSE2-NEXT: pand %xmm2, %xmm3
1106 ; SSE2-NEXT: packssdw %xmm5, %xmm3
1107 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1108 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
1109 ; SSE2-NEXT: pand %xmm1, %xmm2
1110 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1111 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1112 ; SSE2-NEXT: pand %xmm0, %xmm1
1113 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1114 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1115 ; SSE2-NEXT: packsswb %xmm1, %xmm1
1116 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1117 ; SSE2-NEXT: cmpb $-1, %al
1118 ; SSE2-NEXT: setne %al
1121 ; SSE41-LABEL: allzeros_v8i64_not:
1123 ; SSE41-NEXT: pxor %xmm4, %xmm4
1124 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm3
1125 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm2
1126 ; SSE41-NEXT: packssdw %xmm3, %xmm2
1127 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
1128 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
1129 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1130 ; SSE41-NEXT: packssdw %xmm2, %xmm0
1131 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1132 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1133 ; SSE41-NEXT: cmpb $-1, %al
1134 ; SSE41-NEXT: setne %al
1137 ; AVX1-LABEL: allzeros_v8i64_not:
1139 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1140 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1141 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1142 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
1143 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1144 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1145 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1146 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1147 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1148 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1149 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1150 ; AVX1-NEXT: cmpb $-1, %al
1151 ; AVX1-NEXT: setne %al
1152 ; AVX1-NEXT: vzeroupper
1155 ; AVX2-LABEL: allzeros_v8i64_not:
1157 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1158 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
1159 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
1160 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1161 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1162 ; AVX2-NEXT: cmpb $-1, %al
1163 ; AVX2-NEXT: setne %al
1164 ; AVX2-NEXT: vzeroupper
1167 ; KNL-LABEL: allzeros_v8i64_not:
1169 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1170 ; KNL-NEXT: kmovw %k0, %eax
1171 ; KNL-NEXT: cmpb $-1, %al
1172 ; KNL-NEXT: setne %al
1173 ; KNL-NEXT: vzeroupper
1176 ; SKX-LABEL: allzeros_v8i64_not:
1178 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
1179 ; SKX-NEXT: kortestb %k0, %k0
1180 ; SKX-NEXT: setae %al
1181 ; SKX-NEXT: vzeroupper
1183 %1 = icmp eq <8 x i64> %a0, zeroinitializer
1184 %2 = bitcast <8 x i1> %1 to i8
1185 %3 = icmp ne i8 %2, -1
1189 define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
1190 ; SSE-LABEL: allzeros_v16i8_and1:
1192 ; SSE-NEXT: psllw $7, %xmm0
1193 ; SSE-NEXT: pmovmskb %xmm0, %eax
1194 ; SSE-NEXT: testl %eax, %eax
1195 ; SSE-NEXT: sete %al
1198 ; AVX1OR2-LABEL: allzeros_v16i8_and1:
1200 ; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0
1201 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1202 ; AVX1OR2-NEXT: testl %eax, %eax
1203 ; AVX1OR2-NEXT: sete %al
1204 ; AVX1OR2-NEXT: retq
1206 ; KNL-LABEL: allzeros_v16i8_and1:
1208 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
1209 ; KNL-NEXT: vpmovmskb %xmm0, %eax
1210 ; KNL-NEXT: testl %eax, %eax
1211 ; KNL-NEXT: sete %al
1214 ; SKX-LABEL: allzeros_v16i8_and1:
1216 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1217 ; SKX-NEXT: kortestw %k0, %k0
1218 ; SKX-NEXT: sete %al
1220 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1221 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1222 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1223 %tmp3 = icmp eq i16 %tmp2, 0
1227 define i1 @allones_v32i8_and1(<32 x i8> %arg) {
1228 ; SSE-LABEL: allones_v32i8_and1:
1230 ; SSE-NEXT: psllw $7, %xmm0
1231 ; SSE-NEXT: psllw $7, %xmm1
1232 ; SSE-NEXT: pand %xmm0, %xmm1
1233 ; SSE-NEXT: pmovmskb %xmm1, %eax
1234 ; SSE-NEXT: cmpw $-1, %ax
1235 ; SSE-NEXT: sete %al
1238 ; AVX1-LABEL: allones_v32i8_and1:
1240 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1
1241 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1242 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1243 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1244 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1245 ; AVX1-NEXT: cmpw $-1, %ax
1246 ; AVX1-NEXT: sete %al
1247 ; AVX1-NEXT: vzeroupper
1250 ; AVX2-LABEL: allones_v32i8_and1:
1252 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1253 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1254 ; AVX2-NEXT: cmpl $-1, %eax
1255 ; AVX2-NEXT: sete %al
1256 ; AVX2-NEXT: vzeroupper
1259 ; KNL-LABEL: allones_v32i8_and1:
1261 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1262 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1263 ; KNL-NEXT: cmpl $-1, %eax
1264 ; KNL-NEXT: sete %al
1265 ; KNL-NEXT: vzeroupper
1268 ; SKX-LABEL: allones_v32i8_and1:
1270 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1271 ; SKX-NEXT: kortestd %k0, %k0
1272 ; SKX-NEXT: setb %al
1273 ; SKX-NEXT: vzeroupper
1275 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1276 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1277 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1278 %tmp3 = icmp eq i32 %tmp2, -1
1282 define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
1283 ; SSE-LABEL: allzeros_v32i8_and1:
1285 ; SSE-NEXT: por %xmm1, %xmm0
1286 ; SSE-NEXT: psllw $7, %xmm0
1287 ; SSE-NEXT: pmovmskb %xmm0, %eax
1288 ; SSE-NEXT: testl %eax, %eax
1289 ; SSE-NEXT: sete %al
1292 ; AVX1-LABEL: allzeros_v32i8_and1:
1294 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1295 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
1296 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1297 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1298 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1299 ; AVX1-NEXT: testl %eax, %eax
1300 ; AVX1-NEXT: sete %al
1301 ; AVX1-NEXT: vzeroupper
1304 ; AVX2-LABEL: allzeros_v32i8_and1:
1306 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1307 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1308 ; AVX2-NEXT: testl %eax, %eax
1309 ; AVX2-NEXT: sete %al
1310 ; AVX2-NEXT: vzeroupper
1313 ; KNL-LABEL: allzeros_v32i8_and1:
1315 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1316 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1317 ; KNL-NEXT: testl %eax, %eax
1318 ; KNL-NEXT: sete %al
1319 ; KNL-NEXT: vzeroupper
1322 ; SKX-LABEL: allzeros_v32i8_and1:
1324 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1325 ; SKX-NEXT: kortestd %k0, %k0
1326 ; SKX-NEXT: sete %al
1327 ; SKX-NEXT: vzeroupper
1329 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1330 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1331 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1332 %tmp3 = icmp eq i32 %tmp2, 0
1336 define i1 @allones_v64i8_and1(<64 x i8> %arg) {
1337 ; SSE-LABEL: allones_v64i8_and1:
1339 ; SSE-NEXT: psllw $7, %xmm1
1340 ; SSE-NEXT: psllw $7, %xmm3
1341 ; SSE-NEXT: psllw $7, %xmm0
1342 ; SSE-NEXT: psllw $7, %xmm2
1343 ; SSE-NEXT: pand %xmm0, %xmm2
1344 ; SSE-NEXT: pand %xmm1, %xmm2
1345 ; SSE-NEXT: pand %xmm3, %xmm2
1346 ; SSE-NEXT: pmovmskb %xmm2, %eax
1347 ; SSE-NEXT: cmpw $-1, %ax
1348 ; SSE-NEXT: sete %al
1351 ; AVX1-LABEL: allones_v64i8_and1:
1353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1354 ; AVX1-NEXT: vpsllw $7, %xmm2, %xmm2
1355 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1356 ; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3
1357 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1358 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
1359 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1360 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
1361 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
1362 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1363 ; AVX1-NEXT: cmpw $-1, %ax
1364 ; AVX1-NEXT: sete %al
1365 ; AVX1-NEXT: vzeroupper
1368 ; AVX2-LABEL: allones_v64i8_and1:
1370 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1371 ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1372 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1373 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1374 ; AVX2-NEXT: cmpl $-1, %eax
1375 ; AVX2-NEXT: sete %al
1376 ; AVX2-NEXT: vzeroupper
1379 ; KNL-LABEL: allones_v64i8_and1:
1381 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm1
1382 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1383 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1384 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
1385 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1386 ; KNL-NEXT: cmpl $-1, %eax
1387 ; KNL-NEXT: sete %al
1388 ; KNL-NEXT: vzeroupper
1391 ; SKX-LABEL: allones_v64i8_and1:
1393 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1394 ; SKX-NEXT: kortestq %k0, %k0
1395 ; SKX-NEXT: setb %al
1396 ; SKX-NEXT: vzeroupper
1398 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1399 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1400 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1401 %tmp3 = icmp eq i64 %tmp2, -1
1405 define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
1406 ; SSE-LABEL: allzeros_v64i8_and1:
1408 ; SSE-NEXT: por %xmm3, %xmm1
1409 ; SSE-NEXT: por %xmm2, %xmm1
1410 ; SSE-NEXT: por %xmm0, %xmm1
1411 ; SSE-NEXT: psllw $7, %xmm1
1412 ; SSE-NEXT: pmovmskb %xmm1, %eax
1413 ; SSE-NEXT: testl %eax, %eax
1414 ; SSE-NEXT: sete %al
1417 ; AVX1-LABEL: allzeros_v64i8_and1:
1419 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm2
1420 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm3
1421 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1422 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
1423 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1424 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1425 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1426 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
1427 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1428 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1429 ; AVX1-NEXT: testl %eax, %eax
1430 ; AVX1-NEXT: sete %al
1431 ; AVX1-NEXT: vzeroupper
1434 ; AVX2-LABEL: allzeros_v64i8_and1:
1436 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1437 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1438 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1439 ; AVX2-NEXT: testl %eax, %eax
1440 ; AVX2-NEXT: sete %al
1441 ; AVX2-NEXT: vzeroupper
1444 ; KNL-LABEL: allzeros_v64i8_and1:
1446 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1447 ; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
1448 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1449 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
1450 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1451 ; KNL-NEXT: testl %eax, %eax
1452 ; KNL-NEXT: sete %al
1453 ; KNL-NEXT: vzeroupper
1456 ; SKX-LABEL: allzeros_v64i8_and1:
1458 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1459 ; SKX-NEXT: kortestq %k0, %k0
1460 ; SKX-NEXT: sete %al
1461 ; SKX-NEXT: vzeroupper
1463 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1464 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1465 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1466 %tmp3 = icmp eq i64 %tmp2, 0
1470 define i1 @allones_v8i16_and1(<8 x i16> %arg) {
1471 ; SSE-LABEL: allones_v8i16_and1:
1473 ; SSE-NEXT: psllw $15, %xmm0
1474 ; SSE-NEXT: packsswb %xmm0, %xmm0
1475 ; SSE-NEXT: pmovmskb %xmm0, %eax
1476 ; SSE-NEXT: cmpb $-1, %al
1477 ; SSE-NEXT: sete %al
1480 ; AVX1OR2-LABEL: allones_v8i16_and1:
1482 ; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0
1483 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1484 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1485 ; AVX1OR2-NEXT: cmpb $-1, %al
1486 ; AVX1OR2-NEXT: sete %al
1487 ; AVX1OR2-NEXT: retq
1489 ; KNL-LABEL: allones_v8i16_and1:
1491 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1492 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1493 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1494 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1495 ; KNL-NEXT: kmovw %k0, %eax
1496 ; KNL-NEXT: cmpb $-1, %al
1497 ; KNL-NEXT: sete %al
1498 ; KNL-NEXT: vzeroupper
1501 ; SKX-LABEL: allones_v8i16_and1:
1503 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1504 ; SKX-NEXT: kortestb %k0, %k0
1505 ; SKX-NEXT: setb %al
1507 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1508 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1509 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1510 %tmp3 = icmp eq i8 %tmp2, -1
1514 define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
1515 ; SSE-LABEL: allzeros_v8i16_and1:
1517 ; SSE-NEXT: psllw $15, %xmm0
1518 ; SSE-NEXT: pmovmskb %xmm0, %eax
1519 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
1520 ; SSE-NEXT: sete %al
1523 ; AVX1OR2-LABEL: allzeros_v8i16_and1:
1525 ; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0
1526 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1527 ; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA
1528 ; AVX1OR2-NEXT: sete %al
1529 ; AVX1OR2-NEXT: retq
1531 ; KNL-LABEL: allzeros_v8i16_and1:
1533 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1534 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1535 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1536 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1537 ; KNL-NEXT: kmovw %k0, %eax
1538 ; KNL-NEXT: testb %al, %al
1539 ; KNL-NEXT: sete %al
1540 ; KNL-NEXT: vzeroupper
1543 ; SKX-LABEL: allzeros_v8i16_and1:
1545 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1546 ; SKX-NEXT: kortestb %k0, %k0
1547 ; SKX-NEXT: sete %al
1549 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1550 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1551 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1552 %tmp3 = icmp eq i8 %tmp2, 0
1556 define i1 @allones_v16i16_and1(<16 x i16> %arg) {
1557 ; SSE-LABEL: allones_v16i16_and1:
1559 ; SSE-NEXT: psllw $15, %xmm1
1560 ; SSE-NEXT: psllw $15, %xmm0
1561 ; SSE-NEXT: packsswb %xmm1, %xmm0
1562 ; SSE-NEXT: pmovmskb %xmm0, %eax
1563 ; SSE-NEXT: cmpw $-1, %ax
1564 ; SSE-NEXT: sete %al
1567 ; AVX1-LABEL: allones_v16i16_and1:
1569 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1570 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1571 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1572 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1573 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1574 ; AVX1-NEXT: cmpw $-1, %ax
1575 ; AVX1-NEXT: sete %al
1576 ; AVX1-NEXT: vzeroupper
1579 ; AVX2-LABEL: allones_v16i16_and1:
1581 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1582 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1583 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1584 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1585 ; AVX2-NEXT: cmpw $-1, %ax
1586 ; AVX2-NEXT: sete %al
1587 ; AVX2-NEXT: vzeroupper
1590 ; KNL-LABEL: allones_v16i16_and1:
1592 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1593 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1594 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1595 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1596 ; KNL-NEXT: kortestw %k0, %k0
1597 ; KNL-NEXT: setb %al
1598 ; KNL-NEXT: vzeroupper
1601 ; SKX-LABEL: allones_v16i16_and1:
1603 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1604 ; SKX-NEXT: kortestw %k0, %k0
1605 ; SKX-NEXT: setb %al
1606 ; SKX-NEXT: vzeroupper
1608 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1609 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1610 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1611 %tmp3 = icmp eq i16 %tmp2, -1
1615 define i1 @allones_v32i16_and1(<32 x i16> %arg) {
1616 ; SSE-LABEL: allones_v32i16_and1:
1618 ; SSE-NEXT: psllw $15, %xmm1
1619 ; SSE-NEXT: psllw $15, %xmm0
1620 ; SSE-NEXT: packsswb %xmm1, %xmm0
1621 ; SSE-NEXT: psllw $15, %xmm3
1622 ; SSE-NEXT: psllw $15, %xmm2
1623 ; SSE-NEXT: packsswb %xmm3, %xmm2
1624 ; SSE-NEXT: pand %xmm0, %xmm2
1625 ; SSE-NEXT: pmovmskb %xmm2, %eax
1626 ; SSE-NEXT: cmpw $-1, %ax
1627 ; SSE-NEXT: sete %al
1630 ; AVX1-LABEL: allones_v32i16_and1:
1632 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1633 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1634 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1635 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1636 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1637 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1638 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1639 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
1640 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1641 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1642 ; AVX1-NEXT: cmpw $-1, %ax
1643 ; AVX1-NEXT: sete %al
1644 ; AVX1-NEXT: vzeroupper
1647 ; AVX2-LABEL: allones_v32i16_and1:
1649 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1650 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1651 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1652 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1653 ; AVX2-NEXT: cmpl $-1, %eax
1654 ; AVX2-NEXT: sete %al
1655 ; AVX2-NEXT: vzeroupper
1658 ; KNL-LABEL: allones_v32i16_and1:
1660 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm1
1661 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1662 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
1663 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1664 ; KNL-NEXT: kmovw %k0, %eax
1665 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1666 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1667 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1668 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1669 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1670 ; KNL-NEXT: kmovw %k0, %ecx
1671 ; KNL-NEXT: andl %eax, %ecx
1672 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1673 ; KNL-NEXT: sete %al
1674 ; KNL-NEXT: vzeroupper
1677 ; SKX-LABEL: allones_v32i16_and1:
1679 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1680 ; SKX-NEXT: kortestd %k0, %k0
1681 ; SKX-NEXT: setb %al
1682 ; SKX-NEXT: vzeroupper
1684 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1685 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1686 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1687 %tmp3 = icmp eq i32 %tmp2, -1
1691 define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
1692 ; SSE-LABEL: allzeros_v32i16_and1:
1694 ; SSE-NEXT: psllw $15, %xmm3
1695 ; SSE-NEXT: psllw $15, %xmm2
1696 ; SSE-NEXT: packsswb %xmm3, %xmm2
1697 ; SSE-NEXT: psllw $15, %xmm1
1698 ; SSE-NEXT: psllw $15, %xmm0
1699 ; SSE-NEXT: packsswb %xmm1, %xmm0
1700 ; SSE-NEXT: por %xmm2, %xmm0
1701 ; SSE-NEXT: pmovmskb %xmm0, %eax
1702 ; SSE-NEXT: testl %eax, %eax
1703 ; SSE-NEXT: sete %al
1706 ; AVX1-LABEL: allzeros_v32i16_and1:
1708 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1709 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1710 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1711 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
1712 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1713 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1714 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1715 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1716 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1717 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1718 ; AVX1-NEXT: testl %eax, %eax
1719 ; AVX1-NEXT: sete %al
1720 ; AVX1-NEXT: vzeroupper
1723 ; AVX2-LABEL: allzeros_v32i16_and1:
1725 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1726 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1727 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1728 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1729 ; AVX2-NEXT: testl %eax, %eax
1730 ; AVX2-NEXT: sete %al
1731 ; AVX2-NEXT: vzeroupper
1734 ; KNL-LABEL: allzeros_v32i16_and1:
1736 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1737 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
1738 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1739 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1740 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1741 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
1742 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1743 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1744 ; KNL-NEXT: kortestw %k0, %k0
1745 ; KNL-NEXT: sete %al
1746 ; KNL-NEXT: vzeroupper
1749 ; SKX-LABEL: allzeros_v32i16_and1:
1751 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1752 ; SKX-NEXT: kortestd %k0, %k0
1753 ; SKX-NEXT: sete %al
1754 ; SKX-NEXT: vzeroupper
1756 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1757 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1758 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1759 %tmp3 = icmp eq i32 %tmp2, 0
1763 define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
1764 ; SSE-LABEL: allzeros_v16i16_and1:
1766 ; SSE-NEXT: psllw $15, %xmm1
1767 ; SSE-NEXT: psllw $15, %xmm0
1768 ; SSE-NEXT: packsswb %xmm1, %xmm0
1769 ; SSE-NEXT: pmovmskb %xmm0, %eax
1770 ; SSE-NEXT: testl %eax, %eax
1771 ; SSE-NEXT: sete %al
1774 ; AVX1-LABEL: allzeros_v16i16_and1:
1776 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1777 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1778 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1779 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1780 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1781 ; AVX1-NEXT: testl %eax, %eax
1782 ; AVX1-NEXT: sete %al
1783 ; AVX1-NEXT: vzeroupper
1786 ; AVX2-LABEL: allzeros_v16i16_and1:
1788 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1789 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1790 ; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
1791 ; AVX2-NEXT: sete %al
1792 ; AVX2-NEXT: vzeroupper
1795 ; KNL-LABEL: allzeros_v16i16_and1:
1797 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1798 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1799 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1800 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1801 ; KNL-NEXT: kortestw %k0, %k0
1802 ; KNL-NEXT: sete %al
1803 ; KNL-NEXT: vzeroupper
1806 ; SKX-LABEL: allzeros_v16i16_and1:
1808 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1809 ; SKX-NEXT: kortestw %k0, %k0
1810 ; SKX-NEXT: sete %al
1811 ; SKX-NEXT: vzeroupper
1813 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1814 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1815 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1816 %tmp3 = icmp eq i16 %tmp2, 0
1820 define i1 @allones_v4i32_and1(<4 x i32> %arg) {
1821 ; SSE-LABEL: allones_v4i32_and1:
1823 ; SSE-NEXT: pslld $31, %xmm0
1824 ; SSE-NEXT: movmskps %xmm0, %eax
1825 ; SSE-NEXT: cmpb $15, %al
1826 ; SSE-NEXT: sete %al
1829 ; AVX1OR2-LABEL: allones_v4i32_and1:
1831 ; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0
1832 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
1833 ; AVX1OR2-NEXT: cmpb $15, %al
1834 ; AVX1OR2-NEXT: sete %al
1835 ; AVX1OR2-NEXT: retq
1837 ; KNL-LABEL: allones_v4i32_and1:
1839 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1840 ; KNL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1841 ; KNL-NEXT: kmovw %k0, %eax
1842 ; KNL-NEXT: testb $15, %al
1843 ; KNL-NEXT: sete %al
1844 ; KNL-NEXT: vzeroupper
1847 ; SKX-LABEL: allones_v4i32_and1:
1849 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
1850 ; SKX-NEXT: kmovd %k0, %eax
1851 ; SKX-NEXT: cmpb $15, %al
1852 ; SKX-NEXT: sete %al
1854 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1855 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1856 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1857 %tmp3 = icmp eq i4 %tmp2, -1
1861 define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
1862 ; SSE-LABEL: allzeros_v4i32_and1:
1864 ; SSE-NEXT: pslld $31, %xmm0
1865 ; SSE-NEXT: movmskps %xmm0, %eax
1866 ; SSE-NEXT: testl %eax, %eax
1867 ; SSE-NEXT: sete %al
1870 ; AVX1OR2-LABEL: allzeros_v4i32_and1:
1872 ; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0
1873 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
1874 ; AVX1OR2-NEXT: testl %eax, %eax
1875 ; AVX1OR2-NEXT: sete %al
1876 ; AVX1OR2-NEXT: retq
1878 ; KNL-LABEL: allzeros_v4i32_and1:
1880 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1881 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1882 ; KNL-NEXT: kmovw %k0, %eax
1883 ; KNL-NEXT: testb $15, %al
1884 ; KNL-NEXT: sete %al
1885 ; KNL-NEXT: vzeroupper
1888 ; SKX-LABEL: allzeros_v4i32_and1:
1890 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
1891 ; SKX-NEXT: kortestb %k0, %k0
1892 ; SKX-NEXT: sete %al
1894 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1895 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1896 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1897 %tmp3 = icmp eq i4 %tmp2, 0
1901 define i1 @allones_v8i32_and1(<8 x i32> %arg) {
1902 ; SSE-LABEL: allones_v8i32_and1:
1904 ; SSE-NEXT: pslld $31, %xmm1
1905 ; SSE-NEXT: pslld $31, %xmm0
1906 ; SSE-NEXT: packssdw %xmm1, %xmm0
1907 ; SSE-NEXT: packsswb %xmm0, %xmm0
1908 ; SSE-NEXT: pmovmskb %xmm0, %eax
1909 ; SSE-NEXT: cmpb $-1, %al
1910 ; SSE-NEXT: sete %al
1913 ; AVX1-LABEL: allones_v8i32_and1:
1915 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
1916 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1917 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1918 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1919 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1920 ; AVX1-NEXT: cmpb $-1, %al
1921 ; AVX1-NEXT: sete %al
1922 ; AVX1-NEXT: vzeroupper
1925 ; AVX2-LABEL: allones_v8i32_and1:
1927 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1928 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1929 ; AVX2-NEXT: cmpb $-1, %al
1930 ; AVX2-NEXT: sete %al
1931 ; AVX2-NEXT: vzeroupper
1934 ; KNL-LABEL: allones_v8i32_and1:
1936 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1937 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1938 ; KNL-NEXT: kmovw %k0, %eax
1939 ; KNL-NEXT: cmpb $-1, %al
1940 ; KNL-NEXT: sete %al
1941 ; KNL-NEXT: vzeroupper
1944 ; SKX-LABEL: allones_v8i32_and1:
1946 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
1947 ; SKX-NEXT: kortestb %k0, %k0
1948 ; SKX-NEXT: setb %al
1949 ; SKX-NEXT: vzeroupper
1951 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1952 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
1953 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1954 %tmp3 = icmp eq i8 %tmp2, -1
1958 define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
1959 ; SSE-LABEL: allzeros_v8i32_and1:
1961 ; SSE-NEXT: pslld $31, %xmm1
1962 ; SSE-NEXT: pslld $31, %xmm0
1963 ; SSE-NEXT: packssdw %xmm1, %xmm0
1964 ; SSE-NEXT: pmovmskb %xmm0, %eax
1965 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
1966 ; SSE-NEXT: sete %al
1969 ; AVX1-LABEL: allzeros_v8i32_and1:
1971 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
1972 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1973 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1974 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1975 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1976 ; AVX1-NEXT: testl %eax, %eax
1977 ; AVX1-NEXT: sete %al
1978 ; AVX1-NEXT: vzeroupper
1981 ; AVX2-LABEL: allzeros_v8i32_and1:
1983 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1984 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1985 ; AVX2-NEXT: testl %eax, %eax
1986 ; AVX2-NEXT: sete %al
1987 ; AVX2-NEXT: vzeroupper
1990 ; KNL-LABEL: allzeros_v8i32_and1:
1992 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1993 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1994 ; KNL-NEXT: kmovw %k0, %eax
1995 ; KNL-NEXT: testb %al, %al
1996 ; KNL-NEXT: sete %al
1997 ; KNL-NEXT: vzeroupper
2000 ; SKX-LABEL: allzeros_v8i32_and1:
2002 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
2003 ; SKX-NEXT: kortestb %k0, %k0
2004 ; SKX-NEXT: sete %al
2005 ; SKX-NEXT: vzeroupper
2007 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2008 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
2009 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2010 %tmp3 = icmp eq i8 %tmp2, 0
2014 define i1 @allones_v16i32_and1(<16 x i32> %arg) {
2015 ; SSE-LABEL: allones_v16i32_and1:
2017 ; SSE-NEXT: pslld $31, %xmm3
2018 ; SSE-NEXT: pslld $31, %xmm2
2019 ; SSE-NEXT: packssdw %xmm3, %xmm2
2020 ; SSE-NEXT: pslld $31, %xmm1
2021 ; SSE-NEXT: pslld $31, %xmm0
2022 ; SSE-NEXT: packssdw %xmm1, %xmm0
2023 ; SSE-NEXT: packsswb %xmm2, %xmm0
2024 ; SSE-NEXT: pmovmskb %xmm0, %eax
2025 ; SSE-NEXT: cmpw $-1, %ax
2026 ; SSE-NEXT: sete %al
2029 ; AVX1-LABEL: allones_v16i32_and1:
2031 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2032 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2033 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
2034 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2035 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2036 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2037 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2038 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2039 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2040 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2041 ; AVX1-NEXT: cmpw $-1, %ax
2042 ; AVX1-NEXT: sete %al
2043 ; AVX1-NEXT: vzeroupper
2046 ; AVX2-LABEL: allones_v16i32_and1:
2048 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
2049 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
2050 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2051 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
2052 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2053 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2054 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2055 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2056 ; AVX2-NEXT: cmpw $-1, %ax
2057 ; AVX2-NEXT: sete %al
2058 ; AVX2-NEXT: vzeroupper
2061 ; AVX512-LABEL: allones_v16i32_and1:
2063 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2064 ; AVX512-NEXT: kortestw %k0, %k0
2065 ; AVX512-NEXT: setb %al
2066 ; AVX512-NEXT: vzeroupper
2068 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2069 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
2070 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2071 %tmp3 = icmp eq i16 %tmp2, -1
2075 define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
2076 ; SSE-LABEL: allzeros_v16i32_and1:
2078 ; SSE-NEXT: pslld $31, %xmm3
2079 ; SSE-NEXT: pslld $31, %xmm2
2080 ; SSE-NEXT: packssdw %xmm3, %xmm2
2081 ; SSE-NEXT: pslld $31, %xmm1
2082 ; SSE-NEXT: pslld $31, %xmm0
2083 ; SSE-NEXT: packssdw %xmm1, %xmm0
2084 ; SSE-NEXT: packsswb %xmm2, %xmm0
2085 ; SSE-NEXT: pmovmskb %xmm0, %eax
2086 ; SSE-NEXT: testl %eax, %eax
2087 ; SSE-NEXT: sete %al
2090 ; AVX1-LABEL: allzeros_v16i32_and1:
2092 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2093 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2094 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
2095 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2096 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2097 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2098 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2099 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2100 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2101 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2102 ; AVX1-NEXT: testl %eax, %eax
2103 ; AVX1-NEXT: sete %al
2104 ; AVX1-NEXT: vzeroupper
2107 ; AVX2-LABEL: allzeros_v16i32_and1:
2109 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
2110 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
2111 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2112 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
2113 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2114 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2115 ; AVX2-NEXT: testl %eax, %eax
2116 ; AVX2-NEXT: sete %al
2117 ; AVX2-NEXT: vzeroupper
2120 ; AVX512-LABEL: allzeros_v16i32_and1:
2122 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2123 ; AVX512-NEXT: kortestw %k0, %k0
2124 ; AVX512-NEXT: sete %al
2125 ; AVX512-NEXT: vzeroupper
2127 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2128 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
2129 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2130 %tmp3 = icmp eq i16 %tmp2, 0
2134 define i1 @allones_v2i64_and1(<2 x i64> %arg) {
2135 ; SSE-LABEL: allones_v2i64_and1:
2137 ; SSE-NEXT: psllq $63, %xmm0
2138 ; SSE-NEXT: movmskpd %xmm0, %eax
2139 ; SSE-NEXT: cmpb $3, %al
2140 ; SSE-NEXT: sete %al
2143 ; AVX1OR2-LABEL: allones_v2i64_and1:
2145 ; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0
2146 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
2147 ; AVX1OR2-NEXT: cmpb $3, %al
2148 ; AVX1OR2-NEXT: sete %al
2149 ; AVX1OR2-NEXT: retq
2151 ; KNL-LABEL: allones_v2i64_and1:
2153 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2154 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
2155 ; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
2156 ; KNL-NEXT: kmovw %k0, %eax
2157 ; KNL-NEXT: testb $3, %al
2158 ; KNL-NEXT: sete %al
2159 ; KNL-NEXT: vzeroupper
2162 ; SKX-LABEL: allones_v2i64_and1:
2164 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2165 ; SKX-NEXT: kmovd %k0, %eax
2166 ; SKX-NEXT: cmpb $3, %al
2167 ; SKX-NEXT: sete %al
2169 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2170 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2171 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2172 %tmp3 = icmp eq i2 %tmp2, -1
2176 define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
2177 ; SSE-LABEL: allzeros_v2i64_and1:
2179 ; SSE-NEXT: psllq $63, %xmm0
2180 ; SSE-NEXT: movmskpd %xmm0, %eax
2181 ; SSE-NEXT: testl %eax, %eax
2182 ; SSE-NEXT: sete %al
2185 ; AVX1OR2-LABEL: allzeros_v2i64_and1:
2187 ; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0
2188 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
2189 ; AVX1OR2-NEXT: testl %eax, %eax
2190 ; AVX1OR2-NEXT: sete %al
2191 ; AVX1OR2-NEXT: retq
2193 ; KNL-LABEL: allzeros_v2i64_and1:
2195 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2196 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
2197 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
2198 ; KNL-NEXT: kmovw %k0, %eax
2199 ; KNL-NEXT: testb $3, %al
2200 ; KNL-NEXT: sete %al
2201 ; KNL-NEXT: vzeroupper
2204 ; SKX-LABEL: allzeros_v2i64_and1:
2206 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2207 ; SKX-NEXT: kortestb %k0, %k0
2208 ; SKX-NEXT: sete %al
2210 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2211 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2212 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2213 %tmp3 = icmp eq i2 %tmp2, 0
2217 define i1 @allones_v4i64_and1(<4 x i64> %arg) {
2218 ; SSE-LABEL: allones_v4i64_and1:
2220 ; SSE-NEXT: psllq $63, %xmm1
2221 ; SSE-NEXT: psllq $63, %xmm0
2222 ; SSE-NEXT: packssdw %xmm1, %xmm0
2223 ; SSE-NEXT: movmskps %xmm0, %eax
2224 ; SSE-NEXT: cmpb $15, %al
2225 ; SSE-NEXT: sete %al
2228 ; AVX1-LABEL: allones_v4i64_and1:
2230 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2231 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2232 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2233 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2234 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2235 ; AVX1-NEXT: cmpb $15, %al
2236 ; AVX1-NEXT: sete %al
2237 ; AVX1-NEXT: vzeroupper
2240 ; AVX2-LABEL: allones_v4i64_and1:
2242 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2243 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2244 ; AVX2-NEXT: cmpb $15, %al
2245 ; AVX2-NEXT: sete %al
2246 ; AVX2-NEXT: vzeroupper
2249 ; KNL-LABEL: allones_v4i64_and1:
2251 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2252 ; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2253 ; KNL-NEXT: kmovw %k0, %eax
2254 ; KNL-NEXT: testb $15, %al
2255 ; KNL-NEXT: sete %al
2256 ; KNL-NEXT: vzeroupper
2259 ; SKX-LABEL: allones_v4i64_and1:
2261 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
2262 ; SKX-NEXT: kmovd %k0, %eax
2263 ; SKX-NEXT: cmpb $15, %al
2264 ; SKX-NEXT: sete %al
2265 ; SKX-NEXT: vzeroupper
2267 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2268 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2269 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2270 %tmp3 = icmp eq i4 %tmp2, -1
2274 define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
2275 ; SSE-LABEL: allzeros_v4i64_and1:
2277 ; SSE-NEXT: psllq $63, %xmm1
2278 ; SSE-NEXT: psllq $63, %xmm0
2279 ; SSE-NEXT: packssdw %xmm1, %xmm0
2280 ; SSE-NEXT: movmskps %xmm0, %eax
2281 ; SSE-NEXT: testl %eax, %eax
2282 ; SSE-NEXT: sete %al
2285 ; AVX1-LABEL: allzeros_v4i64_and1:
2287 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2288 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2289 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2290 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2291 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2292 ; AVX1-NEXT: testl %eax, %eax
2293 ; AVX1-NEXT: sete %al
2294 ; AVX1-NEXT: vzeroupper
2297 ; AVX2-LABEL: allzeros_v4i64_and1:
2299 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2300 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2301 ; AVX2-NEXT: testl %eax, %eax
2302 ; AVX2-NEXT: sete %al
2303 ; AVX2-NEXT: vzeroupper
2306 ; KNL-LABEL: allzeros_v4i64_and1:
2308 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2309 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2310 ; KNL-NEXT: kmovw %k0, %eax
2311 ; KNL-NEXT: testb $15, %al
2312 ; KNL-NEXT: sete %al
2313 ; KNL-NEXT: vzeroupper
2316 ; SKX-LABEL: allzeros_v4i64_and1:
2318 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
2319 ; SKX-NEXT: kortestb %k0, %k0
2320 ; SKX-NEXT: sete %al
2321 ; SKX-NEXT: vzeroupper
2323 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2324 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2325 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2326 %tmp3 = icmp eq i4 %tmp2, 0
2330 define i1 @allones_v8i64_and1(<8 x i64> %arg) {
2331 ; SSE-LABEL: allones_v8i64_and1:
2333 ; SSE-NEXT: psllq $63, %xmm3
2334 ; SSE-NEXT: psllq $63, %xmm2
2335 ; SSE-NEXT: packssdw %xmm3, %xmm2
2336 ; SSE-NEXT: psllq $63, %xmm1
2337 ; SSE-NEXT: psllq $63, %xmm0
2338 ; SSE-NEXT: packssdw %xmm1, %xmm0
2339 ; SSE-NEXT: packssdw %xmm2, %xmm0
2340 ; SSE-NEXT: packsswb %xmm0, %xmm0
2341 ; SSE-NEXT: pmovmskb %xmm0, %eax
2342 ; SSE-NEXT: cmpb $-1, %al
2343 ; SSE-NEXT: sete %al
2346 ; AVX1-LABEL: allones_v8i64_and1:
2348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2349 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2350 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2351 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2352 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2353 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2354 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2355 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2356 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2357 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2358 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2359 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2360 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2361 ; AVX1-NEXT: cmpb $-1, %al
2362 ; AVX1-NEXT: sete %al
2363 ; AVX1-NEXT: vzeroupper
2366 ; AVX2-LABEL: allones_v8i64_and1:
2368 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2369 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2370 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2371 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2372 ; AVX2-NEXT: cmpb $-1, %al
2373 ; AVX2-NEXT: sete %al
2374 ; AVX2-NEXT: vzeroupper
2377 ; KNL-LABEL: allones_v8i64_and1:
2379 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2380 ; KNL-NEXT: kmovw %k0, %eax
2381 ; KNL-NEXT: cmpb $-1, %al
2382 ; KNL-NEXT: sete %al
2383 ; KNL-NEXT: vzeroupper
2386 ; SKX-LABEL: allones_v8i64_and1:
2388 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2389 ; SKX-NEXT: kortestb %k0, %k0
2390 ; SKX-NEXT: setb %al
2391 ; SKX-NEXT: vzeroupper
2393 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2394 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2395 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2396 %tmp3 = icmp eq i8 %tmp2, -1
2400 define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
2401 ; SSE-LABEL: allzeros_v8i64_and1:
2403 ; SSE-NEXT: psllq $63, %xmm3
2404 ; SSE-NEXT: psllq $63, %xmm2
2405 ; SSE-NEXT: packssdw %xmm3, %xmm2
2406 ; SSE-NEXT: psllq $63, %xmm1
2407 ; SSE-NEXT: psllq $63, %xmm0
2408 ; SSE-NEXT: packssdw %xmm1, %xmm0
2409 ; SSE-NEXT: packssdw %xmm2, %xmm0
2410 ; SSE-NEXT: pmovmskb %xmm0, %eax
2411 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
2412 ; SSE-NEXT: sete %al
2415 ; AVX1-LABEL: allzeros_v8i64_and1:
2417 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2418 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2419 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2420 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2421 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2422 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2423 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2424 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2425 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2426 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2427 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2428 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2429 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2430 ; AVX1-NEXT: testl %eax, %eax
2431 ; AVX1-NEXT: sete %al
2432 ; AVX1-NEXT: vzeroupper
2435 ; AVX2-LABEL: allzeros_v8i64_and1:
2437 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2438 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2439 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2440 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2441 ; AVX2-NEXT: testl %eax, %eax
2442 ; AVX2-NEXT: sete %al
2443 ; AVX2-NEXT: vzeroupper
2446 ; KNL-LABEL: allzeros_v8i64_and1:
2448 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2449 ; KNL-NEXT: kmovw %k0, %eax
2450 ; KNL-NEXT: testb %al, %al
2451 ; KNL-NEXT: sete %al
2452 ; KNL-NEXT: vzeroupper
2455 ; SKX-LABEL: allzeros_v8i64_and1:
2457 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2458 ; SKX-NEXT: kortestb %k0, %k0
2459 ; SKX-NEXT: sete %al
2460 ; SKX-NEXT: vzeroupper
2462 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2463 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2464 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2465 %tmp3 = icmp eq i8 %tmp2, 0
2469 define i1 @allones_v16i8_and4(<16 x i8> %arg) {
2470 ; SSE-LABEL: allones_v16i8_and4:
2472 ; SSE-NEXT: psllw $5, %xmm0
2473 ; SSE-NEXT: pmovmskb %xmm0, %eax
2474 ; SSE-NEXT: cmpw $-1, %ax
2475 ; SSE-NEXT: sete %al
2478 ; AVX1OR2-LABEL: allones_v16i8_and4:
2480 ; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0
2481 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2482 ; AVX1OR2-NEXT: cmpw $-1, %ax
2483 ; AVX1OR2-NEXT: sete %al
2484 ; AVX1OR2-NEXT: retq
2486 ; KNL-LABEL: allones_v16i8_and4:
2488 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2489 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2490 ; KNL-NEXT: cmpw $-1, %ax
2491 ; KNL-NEXT: sete %al
2494 ; SKX-LABEL: allones_v16i8_and4:
2496 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2497 ; SKX-NEXT: kortestw %k0, %k0
2498 ; SKX-NEXT: setb %al
2500 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2501 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2502 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2503 %tmp3 = icmp eq i16 %tmp2, -1
2507 define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
2508 ; SSE-LABEL: allzeros_v16i8_and4:
2510 ; SSE-NEXT: psllw $5, %xmm0
2511 ; SSE-NEXT: pmovmskb %xmm0, %eax
2512 ; SSE-NEXT: testl %eax, %eax
2513 ; SSE-NEXT: sete %al
2516 ; AVX1OR2-LABEL: allzeros_v16i8_and4:
2518 ; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0
2519 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2520 ; AVX1OR2-NEXT: testl %eax, %eax
2521 ; AVX1OR2-NEXT: sete %al
2522 ; AVX1OR2-NEXT: retq
2524 ; KNL-LABEL: allzeros_v16i8_and4:
2526 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2527 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2528 ; KNL-NEXT: testl %eax, %eax
2529 ; KNL-NEXT: sete %al
2532 ; SKX-LABEL: allzeros_v16i8_and4:
2534 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2535 ; SKX-NEXT: kortestw %k0, %k0
2536 ; SKX-NEXT: sete %al
2538 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2539 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2540 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2541 %tmp3 = icmp eq i16 %tmp2, 0
2545 define i1 @allones_v32i8_and4(<32 x i8> %arg) {
2546 ; SSE-LABEL: allones_v32i8_and4:
2548 ; SSE-NEXT: psllw $5, %xmm0
2549 ; SSE-NEXT: psllw $5, %xmm1
2550 ; SSE-NEXT: pand %xmm0, %xmm1
2551 ; SSE-NEXT: pmovmskb %xmm1, %eax
2552 ; SSE-NEXT: cmpw $-1, %ax
2553 ; SSE-NEXT: sete %al
2556 ; AVX1-LABEL: allones_v32i8_and4:
2558 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1
2559 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2560 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2561 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2562 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2563 ; AVX1-NEXT: cmpw $-1, %ax
2564 ; AVX1-NEXT: sete %al
2565 ; AVX1-NEXT: vzeroupper
2568 ; AVX2-LABEL: allones_v32i8_and4:
2570 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2571 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2572 ; AVX2-NEXT: cmpl $-1, %eax
2573 ; AVX2-NEXT: sete %al
2574 ; AVX2-NEXT: vzeroupper
2577 ; KNL-LABEL: allones_v32i8_and4:
2579 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2580 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2581 ; KNL-NEXT: cmpl $-1, %eax
2582 ; KNL-NEXT: sete %al
2583 ; KNL-NEXT: vzeroupper
2586 ; SKX-LABEL: allones_v32i8_and4:
2588 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2589 ; SKX-NEXT: kortestd %k0, %k0
2590 ; SKX-NEXT: setb %al
2591 ; SKX-NEXT: vzeroupper
2593 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2594 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2595 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2596 %tmp3 = icmp eq i32 %tmp2, -1
2600 define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
2601 ; SSE-LABEL: allzeros_v32i8_and4:
2603 ; SSE-NEXT: por %xmm1, %xmm0
2604 ; SSE-NEXT: psllw $5, %xmm0
2605 ; SSE-NEXT: pmovmskb %xmm0, %eax
2606 ; SSE-NEXT: testl %eax, %eax
2607 ; SSE-NEXT: sete %al
2610 ; AVX1-LABEL: allzeros_v32i8_and4:
2612 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2613 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
2614 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2615 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
2616 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2617 ; AVX1-NEXT: testl %eax, %eax
2618 ; AVX1-NEXT: sete %al
2619 ; AVX1-NEXT: vzeroupper
2622 ; AVX2-LABEL: allzeros_v32i8_and4:
2624 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2625 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2626 ; AVX2-NEXT: testl %eax, %eax
2627 ; AVX2-NEXT: sete %al
2628 ; AVX2-NEXT: vzeroupper
2631 ; KNL-LABEL: allzeros_v32i8_and4:
2633 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2634 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2635 ; KNL-NEXT: testl %eax, %eax
2636 ; KNL-NEXT: sete %al
2637 ; KNL-NEXT: vzeroupper
2640 ; SKX-LABEL: allzeros_v32i8_and4:
2642 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2643 ; SKX-NEXT: kortestd %k0, %k0
2644 ; SKX-NEXT: sete %al
2645 ; SKX-NEXT: vzeroupper
2647 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2648 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2649 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2650 %tmp3 = icmp eq i32 %tmp2, 0
2654 define i1 @allones_v64i8_and4(<64 x i8> %arg) {
2655 ; SSE-LABEL: allones_v64i8_and4:
2657 ; SSE-NEXT: psllw $5, %xmm1
2658 ; SSE-NEXT: psllw $5, %xmm3
2659 ; SSE-NEXT: psllw $5, %xmm0
2660 ; SSE-NEXT: psllw $5, %xmm2
2661 ; SSE-NEXT: pand %xmm0, %xmm2
2662 ; SSE-NEXT: pand %xmm1, %xmm2
2663 ; SSE-NEXT: pand %xmm3, %xmm2
2664 ; SSE-NEXT: pmovmskb %xmm2, %eax
2665 ; SSE-NEXT: cmpw $-1, %ax
2666 ; SSE-NEXT: sete %al
2669 ; AVX1-LABEL: allones_v64i8_and4:
2671 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2672 ; AVX1-NEXT: vpsllw $5, %xmm2, %xmm2
2673 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2674 ; AVX1-NEXT: vpsllw $5, %xmm3, %xmm3
2675 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2676 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
2677 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2678 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
2679 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
2680 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2681 ; AVX1-NEXT: cmpw $-1, %ax
2682 ; AVX1-NEXT: sete %al
2683 ; AVX1-NEXT: vzeroupper
2686 ; AVX2-LABEL: allones_v64i8_and4:
2688 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2689 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
2690 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
2691 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2692 ; AVX2-NEXT: cmpl $-1, %eax
2693 ; AVX2-NEXT: sete %al
2694 ; AVX2-NEXT: vzeroupper
2697 ; KNL-LABEL: allones_v64i8_and4:
2699 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm1
2700 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
2701 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2702 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
2703 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2704 ; KNL-NEXT: cmpl $-1, %eax
2705 ; KNL-NEXT: sete %al
2706 ; KNL-NEXT: vzeroupper
2709 ; SKX-LABEL: allones_v64i8_and4:
2711 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2712 ; SKX-NEXT: kortestq %k0, %k0
2713 ; SKX-NEXT: setb %al
2714 ; SKX-NEXT: vzeroupper
2716 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2717 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2718 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2719 %tmp3 = icmp eq i64 %tmp2, -1
2723 define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
2724 ; SSE-LABEL: allzeros_v64i8_and4:
2726 ; SSE-NEXT: por %xmm3, %xmm1
2727 ; SSE-NEXT: por %xmm2, %xmm1
2728 ; SSE-NEXT: por %xmm0, %xmm1
2729 ; SSE-NEXT: psllw $5, %xmm1
2730 ; SSE-NEXT: pmovmskb %xmm1, %eax
2731 ; SSE-NEXT: testl %eax, %eax
2732 ; SSE-NEXT: sete %al
2735 ; AVX1-LABEL: allzeros_v64i8_and4:
2737 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm2
2738 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm3
2739 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2740 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
2741 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2742 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2743 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
2744 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
2745 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
2746 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2747 ; AVX1-NEXT: testl %eax, %eax
2748 ; AVX1-NEXT: sete %al
2749 ; AVX1-NEXT: vzeroupper
2752 ; AVX2-LABEL: allzeros_v64i8_and4:
2754 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2755 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2756 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2757 ; AVX2-NEXT: testl %eax, %eax
2758 ; AVX2-NEXT: sete %al
2759 ; AVX2-NEXT: vzeroupper
2762 ; KNL-LABEL: allzeros_v64i8_and4:
2764 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2765 ; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
2766 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2767 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
2768 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2769 ; KNL-NEXT: testl %eax, %eax
2770 ; KNL-NEXT: sete %al
2771 ; KNL-NEXT: vzeroupper
2774 ; SKX-LABEL: allzeros_v64i8_and4:
2776 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2777 ; SKX-NEXT: kortestq %k0, %k0
2778 ; SKX-NEXT: sete %al
2779 ; SKX-NEXT: vzeroupper
2781 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2782 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2783 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2784 %tmp3 = icmp eq i64 %tmp2, 0
2788 define i1 @allones_v8i16_and4(<8 x i16> %arg) {
2789 ; SSE-LABEL: allones_v8i16_and4:
2791 ; SSE-NEXT: psllw $13, %xmm0
2792 ; SSE-NEXT: packsswb %xmm0, %xmm0
2793 ; SSE-NEXT: pmovmskb %xmm0, %eax
2794 ; SSE-NEXT: cmpb $-1, %al
2795 ; SSE-NEXT: sete %al
2798 ; AVX1OR2-LABEL: allones_v8i16_and4:
2800 ; AVX1OR2-NEXT: vpsllw $13, %xmm0, %xmm0
2801 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2802 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2803 ; AVX1OR2-NEXT: cmpb $-1, %al
2804 ; AVX1OR2-NEXT: sete %al
2805 ; AVX1OR2-NEXT: retq
2807 ; KNL-LABEL: allones_v8i16_and4:
2809 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
2810 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
2811 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2812 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2813 ; KNL-NEXT: kmovw %k0, %eax
2814 ; KNL-NEXT: cmpb $-1, %al
2815 ; KNL-NEXT: sete %al
2816 ; KNL-NEXT: vzeroupper
2819 ; SKX-LABEL: allones_v8i16_and4:
2821 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2822 ; SKX-NEXT: kortestb %k0, %k0
2823 ; SKX-NEXT: setb %al
2825 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2826 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
2827 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2828 %tmp3 = icmp eq i8 %tmp2, -1
2832 define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
2833 ; SSE-LABEL: allzeros_v8i16_and4:
2835 ; SSE-NEXT: psllw $13, %xmm0
2836 ; SSE-NEXT: pmovmskb %xmm0, %eax
2837 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
2838 ; SSE-NEXT: sete %al
2841 ; AVX1OR2-LABEL: allzeros_v8i16_and4:
2843 ; AVX1OR2-NEXT: vpsllw $13, %xmm0, %xmm0
2844 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2845 ; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA
2846 ; AVX1OR2-NEXT: sete %al
2847 ; AVX1OR2-NEXT: retq
2849 ; KNL-LABEL: allzeros_v8i16_and4:
2851 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
2852 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
2853 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2854 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2855 ; KNL-NEXT: kmovw %k0, %eax
2856 ; KNL-NEXT: testb %al, %al
2857 ; KNL-NEXT: sete %al
2858 ; KNL-NEXT: vzeroupper
2861 ; SKX-LABEL: allzeros_v8i16_and4:
2863 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2864 ; SKX-NEXT: kortestb %k0, %k0
2865 ; SKX-NEXT: sete %al
2867 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2868 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
2869 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2870 %tmp3 = icmp eq i8 %tmp2, 0
2874 define i1 @allones_v16i16_and4(<16 x i16> %arg) {
2875 ; SSE-LABEL: allones_v16i16_and4:
2877 ; SSE-NEXT: psllw $13, %xmm1
2878 ; SSE-NEXT: psllw $13, %xmm0
2879 ; SSE-NEXT: packsswb %xmm1, %xmm0
2880 ; SSE-NEXT: pmovmskb %xmm0, %eax
2881 ; SSE-NEXT: cmpw $-1, %ax
2882 ; SSE-NEXT: sete %al
2885 ; AVX1-LABEL: allones_v16i16_and4:
2887 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2888 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
2889 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
2890 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2891 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2892 ; AVX1-NEXT: cmpw $-1, %ax
2893 ; AVX1-NEXT: sete %al
2894 ; AVX1-NEXT: vzeroupper
2897 ; AVX2-LABEL: allones_v16i16_and4:
2899 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
2900 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2901 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2902 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2903 ; AVX2-NEXT: cmpw $-1, %ax
2904 ; AVX2-NEXT: sete %al
2905 ; AVX2-NEXT: vzeroupper
2908 ; KNL-LABEL: allones_v16i16_and4:
2910 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
2911 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
2912 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2913 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2914 ; KNL-NEXT: kortestw %k0, %k0
2915 ; KNL-NEXT: setb %al
2916 ; KNL-NEXT: vzeroupper
2919 ; SKX-LABEL: allones_v16i16_and4:
2921 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2922 ; SKX-NEXT: kortestw %k0, %k0
2923 ; SKX-NEXT: setb %al
2924 ; SKX-NEXT: vzeroupper
2926 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2927 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
2928 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2929 %tmp3 = icmp eq i16 %tmp2, -1
2933 define i1 @allones_v32i16_and4(<32 x i16> %arg) {
2934 ; SSE-LABEL: allones_v32i16_and4:
2936 ; SSE-NEXT: psllw $13, %xmm1
2937 ; SSE-NEXT: psllw $13, %xmm0
2938 ; SSE-NEXT: packsswb %xmm1, %xmm0
2939 ; SSE-NEXT: psllw $13, %xmm3
2940 ; SSE-NEXT: psllw $13, %xmm2
2941 ; SSE-NEXT: packsswb %xmm3, %xmm2
2942 ; SSE-NEXT: pand %xmm0, %xmm2
2943 ; SSE-NEXT: pmovmskb %xmm2, %eax
2944 ; SSE-NEXT: cmpw $-1, %ax
2945 ; SSE-NEXT: sete %al
2948 ; AVX1-LABEL: allones_v32i16_and4:
2950 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2951 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
2952 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
2953 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
2954 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2955 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
2956 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
2957 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
2958 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2959 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2960 ; AVX1-NEXT: cmpw $-1, %ax
2961 ; AVX1-NEXT: sete %al
2962 ; AVX1-NEXT: vzeroupper
2965 ; AVX2-LABEL: allones_v32i16_and4:
2967 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
2968 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
2969 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
2970 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2971 ; AVX2-NEXT: cmpl $-1, %eax
2972 ; AVX2-NEXT: sete %al
2973 ; AVX2-NEXT: vzeroupper
2976 ; KNL-LABEL: allones_v32i16_and4:
2978 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm1
2979 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
2980 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
2981 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2982 ; KNL-NEXT: kmovw %k0, %eax
2983 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
2984 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
2985 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
2986 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2987 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2988 ; KNL-NEXT: kmovw %k0, %ecx
2989 ; KNL-NEXT: andl %eax, %ecx
2990 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
2991 ; KNL-NEXT: sete %al
2992 ; KNL-NEXT: vzeroupper
2995 ; SKX-LABEL: allones_v32i16_and4:
2997 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2998 ; SKX-NEXT: kortestd %k0, %k0
2999 ; SKX-NEXT: setb %al
3000 ; SKX-NEXT: vzeroupper
3002 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3003 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
3004 %tmp2 = bitcast <32 x i1> %tmp1 to i32
3005 %tmp3 = icmp eq i32 %tmp2, -1
3009 define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
3010 ; SSE-LABEL: allzeros_v32i16_and4:
3012 ; SSE-NEXT: psllw $13, %xmm3
3013 ; SSE-NEXT: psllw $13, %xmm2
3014 ; SSE-NEXT: packsswb %xmm3, %xmm2
3015 ; SSE-NEXT: psllw $13, %xmm1
3016 ; SSE-NEXT: psllw $13, %xmm0
3017 ; SSE-NEXT: packsswb %xmm1, %xmm0
3018 ; SSE-NEXT: por %xmm2, %xmm0
3019 ; SSE-NEXT: pmovmskb %xmm0, %eax
3020 ; SSE-NEXT: testl %eax, %eax
3021 ; SSE-NEXT: sete %al
3024 ; AVX1-LABEL: allzeros_v32i16_and4:
3026 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3027 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
3028 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3029 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
3030 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3031 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
3032 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3033 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
3034 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
3035 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3036 ; AVX1-NEXT: testl %eax, %eax
3037 ; AVX1-NEXT: sete %al
3038 ; AVX1-NEXT: vzeroupper
3041 ; AVX2-LABEL: allzeros_v32i16_and4:
3043 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
3044 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3045 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
3046 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3047 ; AVX2-NEXT: testl %eax, %eax
3048 ; AVX2-NEXT: sete %al
3049 ; AVX2-NEXT: vzeroupper
3052 ; KNL-LABEL: allzeros_v32i16_and4:
3054 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3055 ; KNL-NEXT: vpsllw $13, %ymm1, %ymm1
3056 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
3057 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3058 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3059 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
3060 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3061 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3062 ; KNL-NEXT: kortestw %k0, %k0
3063 ; KNL-NEXT: sete %al
3064 ; KNL-NEXT: vzeroupper
3067 ; SKX-LABEL: allzeros_v32i16_and4:
3069 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
3070 ; SKX-NEXT: kortestd %k0, %k0
3071 ; SKX-NEXT: sete %al
3072 ; SKX-NEXT: vzeroupper
3074 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3075 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
3076 %tmp2 = bitcast <32 x i1> %tmp1 to i32
3077 %tmp3 = icmp eq i32 %tmp2, 0
3081 define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
3082 ; SSE-LABEL: allzeros_v16i16_and4:
3084 ; SSE-NEXT: psllw $13, %xmm1
3085 ; SSE-NEXT: psllw $13, %xmm0
3086 ; SSE-NEXT: packsswb %xmm1, %xmm0
3087 ; SSE-NEXT: pmovmskb %xmm0, %eax
3088 ; SSE-NEXT: testl %eax, %eax
3089 ; SSE-NEXT: sete %al
3092 ; AVX1-LABEL: allzeros_v16i16_and4:
3094 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3095 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3096 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3097 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3098 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3099 ; AVX1-NEXT: testl %eax, %eax
3100 ; AVX1-NEXT: sete %al
3101 ; AVX1-NEXT: vzeroupper
3104 ; AVX2-LABEL: allzeros_v16i16_and4:
3106 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3107 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3108 ; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
3109 ; AVX2-NEXT: sete %al
3110 ; AVX2-NEXT: vzeroupper
3113 ; KNL-LABEL: allzeros_v16i16_and4:
3115 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3116 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3117 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3118 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3119 ; KNL-NEXT: kortestw %k0, %k0
3120 ; KNL-NEXT: sete %al
3121 ; KNL-NEXT: vzeroupper
3124 ; SKX-LABEL: allzeros_v16i16_and4:
3126 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
3127 ; SKX-NEXT: kortestw %k0, %k0
3128 ; SKX-NEXT: sete %al
3129 ; SKX-NEXT: vzeroupper
3131 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3132 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
3133 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3134 %tmp3 = icmp eq i16 %tmp2, 0
3138 define i1 @allones_v4i32_and4(<4 x i32> %arg) {
3139 ; SSE-LABEL: allones_v4i32_and4:
3141 ; SSE-NEXT: pslld $29, %xmm0
3142 ; SSE-NEXT: movmskps %xmm0, %eax
3143 ; SSE-NEXT: cmpb $15, %al
3144 ; SSE-NEXT: sete %al
3147 ; AVX1OR2-LABEL: allones_v4i32_and4:
3149 ; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0
3150 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
3151 ; AVX1OR2-NEXT: cmpb $15, %al
3152 ; AVX1OR2-NEXT: sete %al
3153 ; AVX1OR2-NEXT: retq
3155 ; KNL-LABEL: allones_v4i32_and4:
3157 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3158 ; KNL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3159 ; KNL-NEXT: kmovw %k0, %eax
3160 ; KNL-NEXT: testb $15, %al
3161 ; KNL-NEXT: sete %al
3162 ; KNL-NEXT: vzeroupper
3165 ; SKX-LABEL: allones_v4i32_and4:
3167 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
3168 ; SKX-NEXT: kmovd %k0, %eax
3169 ; SKX-NEXT: cmpb $15, %al
3170 ; SKX-NEXT: sete %al
3172 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
3173 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
3174 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3175 %tmp3 = icmp eq i4 %tmp2, -1
3179 define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
3180 ; SSE-LABEL: allzeros_v4i32_and4:
3182 ; SSE-NEXT: pslld $29, %xmm0
3183 ; SSE-NEXT: movmskps %xmm0, %eax
3184 ; SSE-NEXT: testl %eax, %eax
3185 ; SSE-NEXT: sete %al
3188 ; AVX1OR2-LABEL: allzeros_v4i32_and4:
3190 ; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0
3191 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
3192 ; AVX1OR2-NEXT: testl %eax, %eax
3193 ; AVX1OR2-NEXT: sete %al
3194 ; AVX1OR2-NEXT: retq
3196 ; KNL-LABEL: allzeros_v4i32_and4:
3198 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3199 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3200 ; KNL-NEXT: kmovw %k0, %eax
3201 ; KNL-NEXT: testb $15, %al
3202 ; KNL-NEXT: sete %al
3203 ; KNL-NEXT: vzeroupper
3206 ; SKX-LABEL: allzeros_v4i32_and4:
3208 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
3209 ; SKX-NEXT: kortestb %k0, %k0
3210 ; SKX-NEXT: sete %al
3212 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
3213 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
3214 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3215 %tmp3 = icmp eq i4 %tmp2, 0
3219 define i1 @allones_v8i32_and4(<8 x i32> %arg) {
3220 ; SSE-LABEL: allones_v8i32_and4:
3222 ; SSE-NEXT: pslld $29, %xmm1
3223 ; SSE-NEXT: pslld $29, %xmm0
3224 ; SSE-NEXT: packssdw %xmm1, %xmm0
3225 ; SSE-NEXT: packsswb %xmm0, %xmm0
3226 ; SSE-NEXT: pmovmskb %xmm0, %eax
3227 ; SSE-NEXT: cmpb $-1, %al
3228 ; SSE-NEXT: sete %al
3231 ; AVX1-LABEL: allones_v8i32_and4:
3233 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
3234 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3235 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3236 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3237 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3238 ; AVX1-NEXT: cmpb $-1, %al
3239 ; AVX1-NEXT: sete %al
3240 ; AVX1-NEXT: vzeroupper
3243 ; AVX2-LABEL: allones_v8i32_and4:
3245 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3246 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3247 ; AVX2-NEXT: cmpb $-1, %al
3248 ; AVX2-NEXT: sete %al
3249 ; AVX2-NEXT: vzeroupper
3252 ; KNL-LABEL: allones_v8i32_and4:
3254 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3255 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3256 ; KNL-NEXT: kmovw %k0, %eax
3257 ; KNL-NEXT: cmpb $-1, %al
3258 ; KNL-NEXT: sete %al
3259 ; KNL-NEXT: vzeroupper
3262 ; SKX-LABEL: allones_v8i32_and4:
3264 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
3265 ; SKX-NEXT: kortestb %k0, %k0
3266 ; SKX-NEXT: setb %al
3267 ; SKX-NEXT: vzeroupper
3269 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3270 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3271 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3272 %tmp3 = icmp eq i8 %tmp2, -1
3276 define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
3277 ; SSE-LABEL: allzeros_v8i32_and4:
3279 ; SSE-NEXT: pslld $29, %xmm1
3280 ; SSE-NEXT: pslld $29, %xmm0
3281 ; SSE-NEXT: packssdw %xmm1, %xmm0
3282 ; SSE-NEXT: pmovmskb %xmm0, %eax
3283 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
3284 ; SSE-NEXT: sete %al
3287 ; AVX1-LABEL: allzeros_v8i32_and4:
3289 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
3290 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3291 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3292 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3293 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3294 ; AVX1-NEXT: testl %eax, %eax
3295 ; AVX1-NEXT: sete %al
3296 ; AVX1-NEXT: vzeroupper
3299 ; AVX2-LABEL: allzeros_v8i32_and4:
3301 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3302 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3303 ; AVX2-NEXT: testl %eax, %eax
3304 ; AVX2-NEXT: sete %al
3305 ; AVX2-NEXT: vzeroupper
3308 ; KNL-LABEL: allzeros_v8i32_and4:
3310 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3311 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3312 ; KNL-NEXT: kmovw %k0, %eax
3313 ; KNL-NEXT: testb %al, %al
3314 ; KNL-NEXT: sete %al
3315 ; KNL-NEXT: vzeroupper
3318 ; SKX-LABEL: allzeros_v8i32_and4:
3320 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
3321 ; SKX-NEXT: kortestb %k0, %k0
3322 ; SKX-NEXT: sete %al
3323 ; SKX-NEXT: vzeroupper
3325 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3326 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3327 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3328 %tmp3 = icmp eq i8 %tmp2, 0
3332 define i1 @allones_v16i32_and4(<16 x i32> %arg) {
3333 ; SSE-LABEL: allones_v16i32_and4:
3335 ; SSE-NEXT: pslld $29, %xmm3
3336 ; SSE-NEXT: pslld $29, %xmm2
3337 ; SSE-NEXT: packssdw %xmm3, %xmm2
3338 ; SSE-NEXT: pslld $29, %xmm1
3339 ; SSE-NEXT: pslld $29, %xmm0
3340 ; SSE-NEXT: packssdw %xmm1, %xmm0
3341 ; SSE-NEXT: packsswb %xmm2, %xmm0
3342 ; SSE-NEXT: pmovmskb %xmm0, %eax
3343 ; SSE-NEXT: cmpw $-1, %ax
3344 ; SSE-NEXT: sete %al
3347 ; AVX1-LABEL: allones_v16i32_and4:
3349 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3350 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3351 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3352 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3354 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3355 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3356 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3357 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3358 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3359 ; AVX1-NEXT: cmpw $-1, %ax
3360 ; AVX1-NEXT: sete %al
3361 ; AVX1-NEXT: vzeroupper
3364 ; AVX2-LABEL: allones_v16i32_and4:
3366 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3367 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3368 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3369 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3370 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3371 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3372 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3373 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3374 ; AVX2-NEXT: cmpw $-1, %ax
3375 ; AVX2-NEXT: sete %al
3376 ; AVX2-NEXT: vzeroupper
3379 ; AVX512-LABEL: allones_v16i32_and4:
3381 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3382 ; AVX512-NEXT: kortestw %k0, %k0
3383 ; AVX512-NEXT: setb %al
3384 ; AVX512-NEXT: vzeroupper
3386 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3387 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3388 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3389 %tmp3 = icmp eq i16 %tmp2, -1
3393 define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
3394 ; SSE-LABEL: allzeros_v16i32_and4:
3396 ; SSE-NEXT: pslld $29, %xmm3
3397 ; SSE-NEXT: pslld $29, %xmm2
3398 ; SSE-NEXT: packssdw %xmm3, %xmm2
3399 ; SSE-NEXT: pslld $29, %xmm1
3400 ; SSE-NEXT: pslld $29, %xmm0
3401 ; SSE-NEXT: packssdw %xmm1, %xmm0
3402 ; SSE-NEXT: packsswb %xmm2, %xmm0
3403 ; SSE-NEXT: pmovmskb %xmm0, %eax
3404 ; SSE-NEXT: testl %eax, %eax
3405 ; SSE-NEXT: sete %al
3408 ; AVX1-LABEL: allzeros_v16i32_and4:
3410 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3411 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3412 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3413 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3414 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3415 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3416 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3417 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3418 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3419 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3420 ; AVX1-NEXT: testl %eax, %eax
3421 ; AVX1-NEXT: sete %al
3422 ; AVX1-NEXT: vzeroupper
3425 ; AVX2-LABEL: allzeros_v16i32_and4:
3427 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3428 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3429 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3430 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3431 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3432 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3433 ; AVX2-NEXT: testl %eax, %eax
3434 ; AVX2-NEXT: sete %al
3435 ; AVX2-NEXT: vzeroupper
3438 ; AVX512-LABEL: allzeros_v16i32_and4:
3440 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3441 ; AVX512-NEXT: kortestw %k0, %k0
3442 ; AVX512-NEXT: sete %al
3443 ; AVX512-NEXT: vzeroupper
3445 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3446 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3447 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3448 %tmp3 = icmp eq i16 %tmp2, 0
3452 define i1 @allones_v2i64_and4(<2 x i64> %arg) {
3453 ; SSE-LABEL: allones_v2i64_and4:
3455 ; SSE-NEXT: psllq $61, %xmm0
3456 ; SSE-NEXT: movmskpd %xmm0, %eax
3457 ; SSE-NEXT: cmpb $3, %al
3458 ; SSE-NEXT: sete %al
3461 ; AVX1OR2-LABEL: allones_v2i64_and4:
3463 ; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0
3464 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
3465 ; AVX1OR2-NEXT: cmpb $3, %al
3466 ; AVX1OR2-NEXT: sete %al
3467 ; AVX1OR2-NEXT: retq
3469 ; KNL-LABEL: allones_v2i64_and4:
3471 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3472 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
3473 ; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
3474 ; KNL-NEXT: kmovw %k0, %eax
3475 ; KNL-NEXT: testb $3, %al
3476 ; KNL-NEXT: sete %al
3477 ; KNL-NEXT: vzeroupper
3480 ; SKX-LABEL: allones_v2i64_and4:
3482 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3483 ; SKX-NEXT: kmovd %k0, %eax
3484 ; SKX-NEXT: cmpb $3, %al
3485 ; SKX-NEXT: sete %al
3487 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3488 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3489 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3490 %tmp3 = icmp eq i2 %tmp2, -1
3494 define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
3495 ; SSE-LABEL: allzeros_v2i64_and4:
3497 ; SSE-NEXT: psllq $61, %xmm0
3498 ; SSE-NEXT: movmskpd %xmm0, %eax
3499 ; SSE-NEXT: testl %eax, %eax
3500 ; SSE-NEXT: sete %al
3503 ; AVX1OR2-LABEL: allzeros_v2i64_and4:
3505 ; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0
3506 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
3507 ; AVX1OR2-NEXT: testl %eax, %eax
3508 ; AVX1OR2-NEXT: sete %al
3509 ; AVX1OR2-NEXT: retq
3511 ; KNL-LABEL: allzeros_v2i64_and4:
3513 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3514 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
3515 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
3516 ; KNL-NEXT: kmovw %k0, %eax
3517 ; KNL-NEXT: testb $3, %al
3518 ; KNL-NEXT: sete %al
3519 ; KNL-NEXT: vzeroupper
3522 ; SKX-LABEL: allzeros_v2i64_and4:
3524 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3525 ; SKX-NEXT: kortestb %k0, %k0
3526 ; SKX-NEXT: sete %al
3528 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3529 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3530 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3531 %tmp3 = icmp eq i2 %tmp2, 0
3535 define i1 @allones_v4i64_and4(<4 x i64> %arg) {
3536 ; SSE-LABEL: allones_v4i64_and4:
3538 ; SSE-NEXT: psllq $61, %xmm1
3539 ; SSE-NEXT: psllq $61, %xmm0
3540 ; SSE-NEXT: packssdw %xmm1, %xmm0
3541 ; SSE-NEXT: movmskps %xmm0, %eax
3542 ; SSE-NEXT: cmpb $15, %al
3543 ; SSE-NEXT: sete %al
3546 ; AVX1-LABEL: allones_v4i64_and4:
3548 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
3549 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3550 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3551 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3552 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
3553 ; AVX1-NEXT: cmpb $15, %al
3554 ; AVX1-NEXT: sete %al
3555 ; AVX1-NEXT: vzeroupper
3558 ; AVX2-LABEL: allones_v4i64_and4:
3560 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3561 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
3562 ; AVX2-NEXT: cmpb $15, %al
3563 ; AVX2-NEXT: sete %al
3564 ; AVX2-NEXT: vzeroupper
3567 ; KNL-LABEL: allones_v4i64_and4:
3569 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3570 ; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3571 ; KNL-NEXT: kmovw %k0, %eax
3572 ; KNL-NEXT: testb $15, %al
3573 ; KNL-NEXT: sete %al
3574 ; KNL-NEXT: vzeroupper
3577 ; SKX-LABEL: allones_v4i64_and4:
3579 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
3580 ; SKX-NEXT: kmovd %k0, %eax
3581 ; SKX-NEXT: cmpb $15, %al
3582 ; SKX-NEXT: sete %al
3583 ; SKX-NEXT: vzeroupper
3585 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3586 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3587 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3588 %tmp3 = icmp eq i4 %tmp2, -1
3592 define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
3593 ; SSE-LABEL: allzeros_v4i64_and4:
3595 ; SSE-NEXT: psllq $61, %xmm1
3596 ; SSE-NEXT: psllq $61, %xmm0
3597 ; SSE-NEXT: packssdw %xmm1, %xmm0
3598 ; SSE-NEXT: movmskps %xmm0, %eax
3599 ; SSE-NEXT: testl %eax, %eax
3600 ; SSE-NEXT: sete %al
3603 ; AVX1-LABEL: allzeros_v4i64_and4:
3605 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
3606 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3607 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3608 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3609 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
3610 ; AVX1-NEXT: testl %eax, %eax
3611 ; AVX1-NEXT: sete %al
3612 ; AVX1-NEXT: vzeroupper
3615 ; AVX2-LABEL: allzeros_v4i64_and4:
3617 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3618 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
3619 ; AVX2-NEXT: testl %eax, %eax
3620 ; AVX2-NEXT: sete %al
3621 ; AVX2-NEXT: vzeroupper
3624 ; KNL-LABEL: allzeros_v4i64_and4:
3626 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3627 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3628 ; KNL-NEXT: kmovw %k0, %eax
3629 ; KNL-NEXT: testb $15, %al
3630 ; KNL-NEXT: sete %al
3631 ; KNL-NEXT: vzeroupper
3634 ; SKX-LABEL: allzeros_v4i64_and4:
3636 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
3637 ; SKX-NEXT: kortestb %k0, %k0
3638 ; SKX-NEXT: sete %al
3639 ; SKX-NEXT: vzeroupper
3641 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3642 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3643 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3644 %tmp3 = icmp eq i4 %tmp2, 0
3648 define i1 @allones_v8i64_and4(<8 x i64> %arg) {
3649 ; SSE-LABEL: allones_v8i64_and4:
3651 ; SSE-NEXT: psllq $61, %xmm3
3652 ; SSE-NEXT: psllq $61, %xmm2
3653 ; SSE-NEXT: packssdw %xmm3, %xmm2
3654 ; SSE-NEXT: psllq $61, %xmm1
3655 ; SSE-NEXT: psllq $61, %xmm0
3656 ; SSE-NEXT: packssdw %xmm1, %xmm0
3657 ; SSE-NEXT: packssdw %xmm2, %xmm0
3658 ; SSE-NEXT: packsswb %xmm0, %xmm0
3659 ; SSE-NEXT: pmovmskb %xmm0, %eax
3660 ; SSE-NEXT: cmpb $-1, %al
3661 ; SSE-NEXT: sete %al
3664 ; AVX1-LABEL: allones_v8i64_and4:
3666 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3667 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3668 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3669 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
3670 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3671 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
3672 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3673 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3674 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3675 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3676 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3677 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3678 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3679 ; AVX1-NEXT: cmpb $-1, %al
3680 ; AVX1-NEXT: sete %al
3681 ; AVX1-NEXT: vzeroupper
3684 ; AVX2-LABEL: allones_v8i64_and4:
3686 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
3687 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3688 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3689 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3690 ; AVX2-NEXT: cmpb $-1, %al
3691 ; AVX2-NEXT: sete %al
3692 ; AVX2-NEXT: vzeroupper
3695 ; KNL-LABEL: allones_v8i64_and4:
3697 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3698 ; KNL-NEXT: kmovw %k0, %eax
3699 ; KNL-NEXT: cmpb $-1, %al
3700 ; KNL-NEXT: sete %al
3701 ; KNL-NEXT: vzeroupper
3704 ; SKX-LABEL: allones_v8i64_and4:
3706 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3707 ; SKX-NEXT: kortestb %k0, %k0
3708 ; SKX-NEXT: setb %al
3709 ; SKX-NEXT: vzeroupper
3711 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3712 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3713 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3714 %tmp3 = icmp eq i8 %tmp2, -1
3718 define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
3719 ; SSE-LABEL: allzeros_v8i64_and4:
3721 ; SSE-NEXT: psllq $61, %xmm3
3722 ; SSE-NEXT: psllq $61, %xmm2
3723 ; SSE-NEXT: packssdw %xmm3, %xmm2
3724 ; SSE-NEXT: psllq $61, %xmm1
3725 ; SSE-NEXT: psllq $61, %xmm0
3726 ; SSE-NEXT: packssdw %xmm1, %xmm0
3727 ; SSE-NEXT: packssdw %xmm2, %xmm0
3728 ; SSE-NEXT: pmovmskb %xmm0, %eax
3729 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
3730 ; SSE-NEXT: sete %al
3733 ; AVX1-LABEL: allzeros_v8i64_and4:
3735 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3736 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3737 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3738 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
3739 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3740 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
3741 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3742 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3743 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3744 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3745 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3746 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3747 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3748 ; AVX1-NEXT: testl %eax, %eax
3749 ; AVX1-NEXT: sete %al
3750 ; AVX1-NEXT: vzeroupper
3753 ; AVX2-LABEL: allzeros_v8i64_and4:
3755 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
3756 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3757 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3758 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3759 ; AVX2-NEXT: testl %eax, %eax
3760 ; AVX2-NEXT: sete %al
3761 ; AVX2-NEXT: vzeroupper
3764 ; KNL-LABEL: allzeros_v8i64_and4:
3766 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3767 ; KNL-NEXT: kmovw %k0, %eax
3768 ; KNL-NEXT: testb %al, %al
3769 ; KNL-NEXT: sete %al
3770 ; KNL-NEXT: vzeroupper
3773 ; SKX-LABEL: allzeros_v8i64_and4:
3775 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3776 ; SKX-NEXT: kortestb %k0, %k0
3777 ; SKX-NEXT: sete %al
3778 ; SKX-NEXT: vzeroupper
3780 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3781 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3782 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3783 %tmp3 = icmp eq i8 %tmp2, 0
3787 ; The below are IR patterns that should directly represent the behavior of a
3788 ; MOVMSK instruction.
3790 define i32 @movmskpd(<2 x double> %x) {
3791 ; SSE-LABEL: movmskpd:
3793 ; SSE-NEXT: movmskpd %xmm0, %eax
3796 ; AVX-LABEL: movmskpd:
3798 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3800 %a = bitcast <2 x double> %x to <2 x i64>
3801 %b = icmp slt <2 x i64> %a, zeroinitializer
3802 %c = bitcast <2 x i1> %b to i2
3803 %d = zext i2 %c to i32
3807 define i32 @movmskps(<4 x float> %x) {
3808 ; SSE-LABEL: movmskps:
3810 ; SSE-NEXT: movmskps %xmm0, %eax
3813 ; AVX-LABEL: movmskps:
3815 ; AVX-NEXT: vmovmskps %xmm0, %eax
3817 %a = bitcast <4 x float> %x to <4 x i32>
3818 %b = icmp slt <4 x i32> %a, zeroinitializer
3819 %c = bitcast <4 x i1> %b to i4
3820 %d = zext i4 %c to i32
3824 define i32 @movmskpd256(<4 x double> %x) {
3825 ; SSE-LABEL: movmskpd256:
3827 ; SSE-NEXT: packssdw %xmm1, %xmm0
3828 ; SSE-NEXT: movmskps %xmm0, %eax
3831 ; AVX-LABEL: movmskpd256:
3833 ; AVX-NEXT: vmovmskpd %ymm0, %eax
3834 ; AVX-NEXT: vzeroupper
3836 %a = bitcast <4 x double> %x to <4 x i64>
3837 %b = icmp slt <4 x i64> %a, zeroinitializer
3838 %c = bitcast <4 x i1> %b to i4
3839 %d = zext i4 %c to i32
3843 define i32 @movmskps256(<8 x float> %x) {
3844 ; SSE-LABEL: movmskps256:
3846 ; SSE-NEXT: packssdw %xmm1, %xmm0
3847 ; SSE-NEXT: packsswb %xmm0, %xmm0
3848 ; SSE-NEXT: pmovmskb %xmm0, %eax
3849 ; SSE-NEXT: movzbl %al, %eax
3852 ; AVX-LABEL: movmskps256:
3854 ; AVX-NEXT: vmovmskps %ymm0, %eax
3855 ; AVX-NEXT: vzeroupper
3857 %a = bitcast <8 x float> %x to <8 x i32>
3858 %b = icmp slt <8 x i32> %a, zeroinitializer
3859 %c = bitcast <8 x i1> %b to i8
3860 %d = zext i8 %c to i32
3864 define i32 @movmskb(<16 x i8> %x) {
3865 ; SSE-LABEL: movmskb:
3867 ; SSE-NEXT: pmovmskb %xmm0, %eax
3870 ; AVX-LABEL: movmskb:
3872 ; AVX-NEXT: vpmovmskb %xmm0, %eax
3874 %a = icmp slt <16 x i8> %x, zeroinitializer
3875 %b = bitcast <16 x i1> %a to i16
3876 %c = zext i16 %b to i32
3880 define i32 @movmskb256(<32 x i8> %x) {
3881 ; SSE-LABEL: movmskb256:
3883 ; SSE-NEXT: pmovmskb %xmm0, %ecx
3884 ; SSE-NEXT: pmovmskb %xmm1, %eax
3885 ; SSE-NEXT: shll $16, %eax
3886 ; SSE-NEXT: orl %ecx, %eax
3889 ; AVX1-LABEL: movmskb256:
3891 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3892 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3893 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3894 ; AVX1-NEXT: shll $16, %eax
3895 ; AVX1-NEXT: orl %ecx, %eax
3896 ; AVX1-NEXT: vzeroupper
3899 ; AVX2-LABEL: movmskb256:
3901 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3902 ; AVX2-NEXT: vzeroupper
3905 ; AVX512-LABEL: movmskb256:
3907 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
3908 ; AVX512-NEXT: vzeroupper
3910 %a = icmp slt <32 x i8> %x, zeroinitializer
3911 %b = bitcast <32 x i1> %a to i32
3915 ; Multiple extract elements from a vector compare.
3917 define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
3918 ; SSE-LABEL: movmsk_v16i8:
3920 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
3921 ; SSE-NEXT: pmovmskb %xmm0, %eax
3922 ; SSE-NEXT: movl %eax, %ecx
3923 ; SSE-NEXT: shrl $15, %ecx
3924 ; SSE-NEXT: movl %eax, %edx
3925 ; SSE-NEXT: shrl $8, %edx
3926 ; SSE-NEXT: andl $1, %edx
3927 ; SSE-NEXT: andl $8, %eax
3928 ; SSE-NEXT: shrl $3, %eax
3929 ; SSE-NEXT: xorl %edx, %eax
3930 ; SSE-NEXT: andl %ecx, %eax
3931 ; SSE-NEXT: # kill: def $al killed $al killed $eax
3934 ; AVX1OR2-LABEL: movmsk_v16i8:
3936 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3937 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
3938 ; AVX1OR2-NEXT: movl %eax, %ecx
3939 ; AVX1OR2-NEXT: shrl $15, %ecx
3940 ; AVX1OR2-NEXT: movl %eax, %edx
3941 ; AVX1OR2-NEXT: shrl $8, %edx
3942 ; AVX1OR2-NEXT: andl $1, %edx
3943 ; AVX1OR2-NEXT: andl $8, %eax
3944 ; AVX1OR2-NEXT: shrl $3, %eax
3945 ; AVX1OR2-NEXT: xorl %edx, %eax
3946 ; AVX1OR2-NEXT: andl %ecx, %eax
3947 ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
3948 ; AVX1OR2-NEXT: retq
3950 ; KNL-LABEL: movmsk_v16i8:
3952 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3953 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
3954 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3955 ; KNL-NEXT: kshiftrw $15, %k0, %k1
3956 ; KNL-NEXT: kmovw %k1, %ecx
3957 ; KNL-NEXT: kshiftrw $8, %k0, %k1
3958 ; KNL-NEXT: kmovw %k1, %edx
3959 ; KNL-NEXT: kshiftrw $3, %k0, %k0
3960 ; KNL-NEXT: kmovw %k0, %eax
3961 ; KNL-NEXT: xorb %dl, %al
3962 ; KNL-NEXT: andb %cl, %al
3963 ; KNL-NEXT: # kill: def $al killed $al killed $eax
3964 ; KNL-NEXT: vzeroupper
3967 ; SKX-LABEL: movmsk_v16i8:
3969 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3970 ; SKX-NEXT: kshiftrw $15, %k0, %k1
3971 ; SKX-NEXT: kmovd %k1, %ecx
3972 ; SKX-NEXT: kshiftrw $8, %k0, %k1
3973 ; SKX-NEXT: kmovd %k1, %edx
3974 ; SKX-NEXT: kshiftrw $3, %k0, %k0
3975 ; SKX-NEXT: kmovd %k0, %eax
3976 ; SKX-NEXT: xorb %dl, %al
3977 ; SKX-NEXT: andb %cl, %al
3978 ; SKX-NEXT: # kill: def $al killed $al killed $eax
3980 %cmp = icmp eq <16 x i8> %x, %y
3981 %e1 = extractelement <16 x i1> %cmp, i32 3
3982 %e2 = extractelement <16 x i1> %cmp, i32 8
3983 %e3 = extractelement <16 x i1> %cmp, i32 15
3984 %u1 = xor i1 %e1, %e2
3985 %u2 = and i1 %e3, %u1
3989 define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
3990 ; SSE-LABEL: movmsk_v8i16:
3992 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
3993 ; SSE-NEXT: packsswb %xmm0, %xmm0
3994 ; SSE-NEXT: pmovmskb %xmm0, %eax
3995 ; SSE-NEXT: notb %al
3996 ; SSE-NEXT: testb $-109, %al
3997 ; SSE-NEXT: sete %al
4000 ; AVX1OR2-LABEL: movmsk_v8i16:
4002 ; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4003 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
4004 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
4005 ; AVX1OR2-NEXT: notb %al
4006 ; AVX1OR2-NEXT: testb $-109, %al
4007 ; AVX1OR2-NEXT: sete %al
4008 ; AVX1OR2-NEXT: retq
4010 ; KNL-LABEL: movmsk_v8i16:
4012 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4013 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
4014 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
4015 ; KNL-NEXT: kmovw %k0, %eax
4016 ; KNL-NEXT: testb $-109, %al
4017 ; KNL-NEXT: sete %al
4018 ; KNL-NEXT: vzeroupper
4021 ; SKX-LABEL: movmsk_v8i16:
4023 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
4024 ; SKX-NEXT: knotb %k0, %k0
4025 ; SKX-NEXT: kmovd %k0, %eax
4026 ; SKX-NEXT: testb $-109, %al
4027 ; SKX-NEXT: sete %al
4029 %cmp = icmp sgt <8 x i16> %x, %y
4030 %e1 = extractelement <8 x i1> %cmp, i32 0
4031 %e2 = extractelement <8 x i1> %cmp, i32 1
4032 %e3 = extractelement <8 x i1> %cmp, i32 7
4033 %e4 = extractelement <8 x i1> %cmp, i32 4
4034 %u1 = and i1 %e1, %e2
4035 %u2 = and i1 %e3, %e4
4036 %u3 = and i1 %u1, %u2
4040 ; TODO: Replace shift+mask chain with AND+CMP.
4041 define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
4042 ; SSE-LABEL: movmsk_v4i32:
4044 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
4045 ; SSE-NEXT: movmskps %xmm1, %eax
4046 ; SSE-NEXT: movl %eax, %ecx
4047 ; SSE-NEXT: shrb $3, %cl
4048 ; SSE-NEXT: andb $4, %al
4049 ; SSE-NEXT: shrb $2, %al
4050 ; SSE-NEXT: xorb %cl, %al
4051 ; SSE-NEXT: # kill: def $al killed $al killed $eax
4054 ; AVX1OR2-LABEL: movmsk_v4i32:
4056 ; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
4057 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
4058 ; AVX1OR2-NEXT: movl %eax, %ecx
4059 ; AVX1OR2-NEXT: shrb $3, %cl
4060 ; AVX1OR2-NEXT: andb $4, %al
4061 ; AVX1OR2-NEXT: shrb $2, %al
4062 ; AVX1OR2-NEXT: xorb %cl, %al
4063 ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
4064 ; AVX1OR2-NEXT: retq
4066 ; KNL-LABEL: movmsk_v4i32:
4068 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4069 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4070 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4071 ; KNL-NEXT: kshiftrw $3, %k0, %k1
4072 ; KNL-NEXT: kmovw %k1, %ecx
4073 ; KNL-NEXT: kshiftrw $2, %k0, %k0
4074 ; KNL-NEXT: kmovw %k0, %eax
4075 ; KNL-NEXT: xorb %cl, %al
4076 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4077 ; KNL-NEXT: vzeroupper
4080 ; SKX-LABEL: movmsk_v4i32:
4082 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
4083 ; SKX-NEXT: kshiftrb $3, %k0, %k1
4084 ; SKX-NEXT: kmovd %k1, %ecx
4085 ; SKX-NEXT: kshiftrb $2, %k0, %k0
4086 ; SKX-NEXT: kmovd %k0, %eax
4087 ; SKX-NEXT: xorb %cl, %al
4088 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4090 %cmp = icmp slt <4 x i32> %x, %y
4091 %e1 = extractelement <4 x i1> %cmp, i32 2
4092 %e2 = extractelement <4 x i1> %cmp, i32 3
4093 %u1 = xor i1 %e1, %e2
4097 define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
4098 ; SSE2-LABEL: movmsk_and_v2i64:
4100 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
4101 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
4102 ; SSE2-NEXT: pand %xmm0, %xmm1
4103 ; SSE2-NEXT: movmskpd %xmm1, %eax
4104 ; SSE2-NEXT: xorl $3, %eax
4105 ; SSE2-NEXT: cmpb $3, %al
4106 ; SSE2-NEXT: sete %al
4109 ; SSE41-LABEL: movmsk_and_v2i64:
4111 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4112 ; SSE41-NEXT: movmskpd %xmm0, %eax
4113 ; SSE41-NEXT: xorl $3, %eax
4114 ; SSE41-NEXT: cmpb $3, %al
4115 ; SSE41-NEXT: sete %al
4118 ; AVX1OR2-LABEL: movmsk_and_v2i64:
4120 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4121 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4122 ; AVX1OR2-NEXT: xorl $3, %eax
4123 ; AVX1OR2-NEXT: cmpb $3, %al
4124 ; AVX1OR2-NEXT: sete %al
4125 ; AVX1OR2-NEXT: retq
4127 ; KNL-LABEL: movmsk_and_v2i64:
4129 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4130 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4131 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4132 ; KNL-NEXT: kmovw %k0, %eax
4133 ; KNL-NEXT: testb $3, %al
4134 ; KNL-NEXT: sete %al
4135 ; KNL-NEXT: vzeroupper
4138 ; SKX-LABEL: movmsk_and_v2i64:
4140 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
4141 ; SKX-NEXT: kmovd %k0, %eax
4142 ; SKX-NEXT: cmpb $3, %al
4143 ; SKX-NEXT: sete %al
4145 %cmp = icmp ne <2 x i64> %x, %y
4146 %e1 = extractelement <2 x i1> %cmp, i32 0
4147 %e2 = extractelement <2 x i1> %cmp, i32 1
4148 %u1 = and i1 %e1, %e2
4152 define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) {
4153 ; SSE2-LABEL: movmsk_or_v2i64:
4155 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
4156 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
4157 ; SSE2-NEXT: pand %xmm0, %xmm1
4158 ; SSE2-NEXT: movmskpd %xmm1, %eax
4159 ; SSE2-NEXT: cmpl $3, %eax
4160 ; SSE2-NEXT: setne %al
4163 ; SSE41-LABEL: movmsk_or_v2i64:
4165 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4166 ; SSE41-NEXT: movmskpd %xmm0, %eax
4167 ; SSE41-NEXT: cmpl $3, %eax
4168 ; SSE41-NEXT: setne %al
4171 ; AVX1OR2-LABEL: movmsk_or_v2i64:
4173 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4174 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4175 ; AVX1OR2-NEXT: cmpl $3, %eax
4176 ; AVX1OR2-NEXT: setne %al
4177 ; AVX1OR2-NEXT: retq
4179 ; KNL-LABEL: movmsk_or_v2i64:
4181 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4182 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4183 ; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
4184 ; KNL-NEXT: kmovw %k0, %eax
4185 ; KNL-NEXT: testb $3, %al
4186 ; KNL-NEXT: setne %al
4187 ; KNL-NEXT: vzeroupper
4190 ; SKX-LABEL: movmsk_or_v2i64:
4192 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
4193 ; SKX-NEXT: kortestb %k0, %k0
4194 ; SKX-NEXT: setne %al
4196 %cmp = icmp ne <2 x i64> %x, %y
4197 %e1 = extractelement <2 x i1> %cmp, i32 0
4198 %e2 = extractelement <2 x i1> %cmp, i32 1
4199 %u1 = or i1 %e1, %e2
4203 define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
4204 ; SSE-LABEL: movmsk_v4f32:
4206 ; SSE-NEXT: movaps %xmm0, %xmm2
4207 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
4208 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
4209 ; SSE-NEXT: orps %xmm2, %xmm0
4210 ; SSE-NEXT: movmskps %xmm0, %eax
4211 ; SSE-NEXT: testb $14, %al
4212 ; SSE-NEXT: setne %al
4215 ; AVX1OR2-LABEL: movmsk_v4f32:
4217 ; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
4218 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
4219 ; AVX1OR2-NEXT: testb $14, %al
4220 ; AVX1OR2-NEXT: setne %al
4221 ; AVX1OR2-NEXT: retq
4223 ; KNL-LABEL: movmsk_v4f32:
4225 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4226 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4227 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k0
4228 ; KNL-NEXT: kmovw %k0, %eax
4229 ; KNL-NEXT: testb $14, %al
4230 ; KNL-NEXT: setne %al
4231 ; KNL-NEXT: vzeroupper
4234 ; SKX-LABEL: movmsk_v4f32:
4236 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
4237 ; SKX-NEXT: kmovd %k0, %eax
4238 ; SKX-NEXT: testb $14, %al
4239 ; SKX-NEXT: setne %al
4241 %cmp = fcmp ueq <4 x float> %x, %y
4242 %e1 = extractelement <4 x i1> %cmp, i32 1
4243 %e2 = extractelement <4 x i1> %cmp, i32 2
4244 %e3 = extractelement <4 x i1> %cmp, i32 3
4245 %u1 = or i1 %e1, %e2
4246 %u2 = or i1 %u1, %e3
4250 define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
4251 ; SSE-LABEL: movmsk_and_v2f64:
4253 ; SSE-NEXT: cmplepd %xmm0, %xmm1
4254 ; SSE-NEXT: movmskpd %xmm1, %eax
4255 ; SSE-NEXT: cmpb $3, %al
4256 ; SSE-NEXT: sete %al
4259 ; AVX1OR2-LABEL: movmsk_and_v2f64:
4261 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4262 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4263 ; AVX1OR2-NEXT: cmpb $3, %al
4264 ; AVX1OR2-NEXT: sete %al
4265 ; AVX1OR2-NEXT: retq
4267 ; KNL-LABEL: movmsk_and_v2f64:
4269 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4270 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4271 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
4272 ; KNL-NEXT: knotw %k0, %k0
4273 ; KNL-NEXT: kmovw %k0, %eax
4274 ; KNL-NEXT: testb $3, %al
4275 ; KNL-NEXT: sete %al
4276 ; KNL-NEXT: vzeroupper
4279 ; SKX-LABEL: movmsk_and_v2f64:
4281 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4282 ; SKX-NEXT: kmovd %k0, %eax
4283 ; SKX-NEXT: cmpb $3, %al
4284 ; SKX-NEXT: sete %al
4286 %cmp = fcmp oge <2 x double> %x, %y
4287 %e1 = extractelement <2 x i1> %cmp, i32 0
4288 %e2 = extractelement <2 x i1> %cmp, i32 1
4289 %u1 = and i1 %e1, %e2
4293 define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
4294 ; SSE-LABEL: movmsk_or_v2f64:
4296 ; SSE-NEXT: cmplepd %xmm0, %xmm1
4297 ; SSE-NEXT: movmskpd %xmm1, %eax
4298 ; SSE-NEXT: testl %eax, %eax
4299 ; SSE-NEXT: setne %al
4302 ; AVX1OR2-LABEL: movmsk_or_v2f64:
4304 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4305 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4306 ; AVX1OR2-NEXT: testl %eax, %eax
4307 ; AVX1OR2-NEXT: setne %al
4308 ; AVX1OR2-NEXT: retq
4310 ; KNL-LABEL: movmsk_or_v2f64:
4312 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4313 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4314 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
4315 ; KNL-NEXT: kmovw %k0, %eax
4316 ; KNL-NEXT: testb $3, %al
4317 ; KNL-NEXT: setne %al
4318 ; KNL-NEXT: vzeroupper
4321 ; SKX-LABEL: movmsk_or_v2f64:
4323 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4324 ; SKX-NEXT: kortestb %k0, %k0
4325 ; SKX-NEXT: setne %al
4327 %cmp = fcmp oge <2 x double> %x, %y
4328 %e1 = extractelement <2 x i1> %cmp, i32 0
4329 %e2 = extractelement <2 x i1> %cmp, i32 1
4330 %u1 = or i1 %e1, %e2
4334 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
4335 ; SSE-LABEL: PR39665_c_ray:
4337 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
4338 ; SSE-NEXT: movmskpd %xmm1, %eax
4339 ; SSE-NEXT: cmpb $3, %al
4340 ; SSE-NEXT: movl $42, %ecx
4341 ; SSE-NEXT: movl $99, %eax
4342 ; SSE-NEXT: cmovel %ecx, %eax
4345 ; AVX1OR2-LABEL: PR39665_c_ray:
4347 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4348 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4349 ; AVX1OR2-NEXT: cmpb $3, %al
4350 ; AVX1OR2-NEXT: movl $42, %ecx
4351 ; AVX1OR2-NEXT: movl $99, %eax
4352 ; AVX1OR2-NEXT: cmovel %ecx, %eax
4353 ; AVX1OR2-NEXT: retq
4355 ; KNL-LABEL: PR39665_c_ray:
4357 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4358 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4359 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4360 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4361 ; KNL-NEXT: kmovw %k1, %eax
4362 ; KNL-NEXT: kmovw %k0, %ecx
4363 ; KNL-NEXT: testb $1, %al
4364 ; KNL-NEXT: movl $42, %eax
4365 ; KNL-NEXT: movl $99, %edx
4366 ; KNL-NEXT: cmovel %edx, %eax
4367 ; KNL-NEXT: testb $1, %cl
4368 ; KNL-NEXT: cmovel %edx, %eax
4369 ; KNL-NEXT: vzeroupper
4372 ; SKX-LABEL: PR39665_c_ray:
4374 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4375 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4376 ; SKX-NEXT: kmovd %k1, %eax
4377 ; SKX-NEXT: kmovd %k0, %ecx
4378 ; SKX-NEXT: testb $1, %al
4379 ; SKX-NEXT: movl $42, %eax
4380 ; SKX-NEXT: movl $99, %edx
4381 ; SKX-NEXT: cmovel %edx, %eax
4382 ; SKX-NEXT: testb $1, %cl
4383 ; SKX-NEXT: cmovel %edx, %eax
4385 %cmp = fcmp ogt <2 x double> %x, %y
4386 %e1 = extractelement <2 x i1> %cmp, i32 0
4387 %e2 = extractelement <2 x i1> %cmp, i32 1
4388 %u = and i1 %e1, %e2
4389 %r = select i1 %u, i32 42, i32 99