1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
9 define i1 @allones_v16i8_sign(<16 x i8> %arg) {
10 ; SSE-LABEL: allones_v16i8_sign:
12 ; SSE-NEXT: pmovmskb %xmm0, %eax
13 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
17 ; AVX-LABEL: allones_v16i8_sign:
19 ; AVX-NEXT: vpmovmskb %xmm0, %eax
20 ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
23 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
24 %tmp1 = bitcast <16 x i1> %tmp to i16
25 %tmp2 = icmp eq i16 %tmp1, -1
29 define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
30 ; SSE-LABEL: allzeros_v16i8_sign:
32 ; SSE-NEXT: pmovmskb %xmm0, %eax
33 ; SSE-NEXT: testl %eax, %eax
37 ; AVX-LABEL: allzeros_v16i8_sign:
39 ; AVX-NEXT: vpmovmskb %xmm0, %eax
40 ; AVX-NEXT: testl %eax, %eax
43 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
44 %tmp1 = bitcast <16 x i1> %tmp to i16
45 %tmp2 = icmp eq i16 %tmp1, 0
49 define i1 @allones_v32i8_sign(<32 x i8> %arg) {
50 ; SSE-LABEL: allones_v32i8_sign:
52 ; SSE-NEXT: pand %xmm1, %xmm0
53 ; SSE-NEXT: pmovmskb %xmm0, %eax
54 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
58 ; AVX1-LABEL: allones_v32i8_sign:
60 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
61 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
62 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
63 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
65 ; AVX1-NEXT: vzeroupper
68 ; AVX2-LABEL: allones_v32i8_sign:
70 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
71 ; AVX2-NEXT: cmpl $-1, %eax
73 ; AVX2-NEXT: vzeroupper
76 ; AVX512-LABEL: allones_v32i8_sign:
78 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
79 ; AVX512-NEXT: cmpl $-1, %eax
80 ; AVX512-NEXT: sete %al
81 ; AVX512-NEXT: vzeroupper
83 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
84 %tmp1 = bitcast <32 x i1> %tmp to i32
85 %tmp2 = icmp eq i32 %tmp1, -1
89 define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
90 ; SSE-LABEL: allzeros_v32i8_sign:
92 ; SSE-NEXT: por %xmm1, %xmm0
93 ; SSE-NEXT: pmovmskb %xmm0, %eax
94 ; SSE-NEXT: testl %eax, %eax
98 ; AVX1-LABEL: allzeros_v32i8_sign:
100 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
101 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
102 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
103 ; AVX1-NEXT: testl %eax, %eax
104 ; AVX1-NEXT: sete %al
105 ; AVX1-NEXT: vzeroupper
108 ; AVX2-LABEL: allzeros_v32i8_sign:
110 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
111 ; AVX2-NEXT: testl %eax, %eax
112 ; AVX2-NEXT: sete %al
113 ; AVX2-NEXT: vzeroupper
116 ; AVX512-LABEL: allzeros_v32i8_sign:
118 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
119 ; AVX512-NEXT: testl %eax, %eax
120 ; AVX512-NEXT: sete %al
121 ; AVX512-NEXT: vzeroupper
123 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
124 %tmp1 = bitcast <32 x i1> %tmp to i32
125 %tmp2 = icmp eq i32 %tmp1, 0
129 define i1 @allones_v64i8_sign(<64 x i8> %arg) {
130 ; SSE-LABEL: allones_v64i8_sign:
132 ; SSE-NEXT: pand %xmm2, %xmm0
133 ; SSE-NEXT: pand %xmm3, %xmm1
134 ; SSE-NEXT: pand %xmm0, %xmm1
135 ; SSE-NEXT: pmovmskb %xmm1, %eax
136 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
140 ; AVX1-LABEL: allones_v64i8_sign:
142 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
143 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
144 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
145 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
146 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
147 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
148 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
149 ; AVX1-NEXT: sete %al
150 ; AVX1-NEXT: vzeroupper
153 ; AVX2-LABEL: allones_v64i8_sign:
155 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
156 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
157 ; AVX2-NEXT: cmpl $-1, %eax
158 ; AVX2-NEXT: sete %al
159 ; AVX2-NEXT: vzeroupper
162 ; KNL-LABEL: allones_v64i8_sign:
164 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
165 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
166 ; KNL-NEXT: vpmovmskb %ymm0, %eax
167 ; KNL-NEXT: cmpl $-1, %eax
169 ; KNL-NEXT: vzeroupper
172 ; SKX-LABEL: allones_v64i8_sign:
174 ; SKX-NEXT: vpmovb2m %zmm0, %k0
175 ; SKX-NEXT: kortestq %k0, %k0
177 ; SKX-NEXT: vzeroupper
179 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
180 %tmp1 = bitcast <64 x i1> %tmp to i64
181 %tmp2 = icmp eq i64 %tmp1, -1
185 define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
186 ; SSE-LABEL: allzeros_v64i8_sign:
188 ; SSE-NEXT: por %xmm3, %xmm1
189 ; SSE-NEXT: por %xmm2, %xmm0
190 ; SSE-NEXT: por %xmm1, %xmm0
191 ; SSE-NEXT: pmovmskb %xmm0, %eax
192 ; SSE-NEXT: testl %eax, %eax
196 ; AVX1-LABEL: allzeros_v64i8_sign:
198 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
199 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
200 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
201 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
202 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
203 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
204 ; AVX1-NEXT: testl %eax, %eax
205 ; AVX1-NEXT: sete %al
206 ; AVX1-NEXT: vzeroupper
209 ; AVX2-LABEL: allzeros_v64i8_sign:
211 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
212 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
213 ; AVX2-NEXT: testl %eax, %eax
214 ; AVX2-NEXT: sete %al
215 ; AVX2-NEXT: vzeroupper
218 ; KNL-LABEL: allzeros_v64i8_sign:
220 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
221 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
222 ; KNL-NEXT: vpmovmskb %ymm0, %eax
223 ; KNL-NEXT: testl %eax, %eax
225 ; KNL-NEXT: vzeroupper
228 ; SKX-LABEL: allzeros_v64i8_sign:
230 ; SKX-NEXT: vpmovb2m %zmm0, %k0
231 ; SKX-NEXT: kortestq %k0, %k0
233 ; SKX-NEXT: vzeroupper
235 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
236 %tmp1 = bitcast <64 x i1> %tmp to i64
237 %tmp2 = icmp eq i64 %tmp1, 0
241 define i1 @allones_v8i16_sign(<8 x i16> %arg) {
242 ; SSE-LABEL: allones_v8i16_sign:
244 ; SSE-NEXT: packsswb %xmm0, %xmm0
245 ; SSE-NEXT: pmovmskb %xmm0, %eax
246 ; SSE-NEXT: cmpb $-1, %al
250 ; AVX1OR2-LABEL: allones_v8i16_sign:
252 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
253 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
254 ; AVX1OR2-NEXT: cmpb $-1, %al
255 ; AVX1OR2-NEXT: sete %al
258 ; KNL-LABEL: allones_v8i16_sign:
260 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
261 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
262 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
263 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
264 ; KNL-NEXT: kmovw %k0, %eax
265 ; KNL-NEXT: cmpb $-1, %al
267 ; KNL-NEXT: vzeroupper
270 ; SKX-LABEL: allones_v8i16_sign:
272 ; SKX-NEXT: vpmovw2m %xmm0, %k0
273 ; SKX-NEXT: kortestb %k0, %k0
276 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
277 %tmp1 = bitcast <8 x i1> %tmp to i8
278 %tmp2 = icmp eq i8 %tmp1, -1
282 define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
283 ; SSE-LABEL: allzeros_v8i16_sign:
285 ; SSE-NEXT: pmovmskb %xmm0, %eax
286 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
290 ; AVX1OR2-LABEL: allzeros_v8i16_sign:
292 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
293 ; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA
294 ; AVX1OR2-NEXT: sete %al
297 ; KNL-LABEL: allzeros_v8i16_sign:
299 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
300 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
301 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
302 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
303 ; KNL-NEXT: kortestw %k0, %k0
305 ; KNL-NEXT: vzeroupper
308 ; SKX-LABEL: allzeros_v8i16_sign:
310 ; SKX-NEXT: vpmovw2m %xmm0, %k0
311 ; SKX-NEXT: kortestb %k0, %k0
314 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
315 %tmp1 = bitcast <8 x i1> %tmp to i8
316 %tmp2 = icmp eq i8 %tmp1, 0
320 define i1 @allones_v16i16_sign(<16 x i16> %arg) {
321 ; SSE-LABEL: allones_v16i16_sign:
323 ; SSE-NEXT: packsswb %xmm1, %xmm0
324 ; SSE-NEXT: pmovmskb %xmm0, %eax
325 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
329 ; AVX1-LABEL: allones_v16i16_sign:
331 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
332 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
333 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
334 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
335 ; AVX1-NEXT: sete %al
336 ; AVX1-NEXT: vzeroupper
339 ; AVX2-LABEL: allones_v16i16_sign:
341 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
342 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
343 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
344 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
345 ; AVX2-NEXT: sete %al
346 ; AVX2-NEXT: vzeroupper
349 ; KNL-LABEL: allones_v16i16_sign:
351 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
352 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
353 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
354 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
355 ; KNL-NEXT: kortestw %k0, %k0
357 ; KNL-NEXT: vzeroupper
360 ; SKX-LABEL: allones_v16i16_sign:
362 ; SKX-NEXT: vpmovw2m %ymm0, %k0
363 ; SKX-NEXT: kortestw %k0, %k0
365 ; SKX-NEXT: vzeroupper
367 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
368 %tmp1 = bitcast <16 x i1> %tmp to i16
369 %tmp2 = icmp eq i16 %tmp1, -1
373 define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
374 ; SSE-LABEL: allzeros_v16i16_sign:
376 ; SSE-NEXT: packsswb %xmm1, %xmm0
377 ; SSE-NEXT: pmovmskb %xmm0, %eax
378 ; SSE-NEXT: testl %eax, %eax
382 ; AVX1-LABEL: allzeros_v16i16_sign:
384 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
385 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
386 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
387 ; AVX1-NEXT: testl %eax, %eax
388 ; AVX1-NEXT: sete %al
389 ; AVX1-NEXT: vzeroupper
392 ; AVX2-LABEL: allzeros_v16i16_sign:
394 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
395 ; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
396 ; AVX2-NEXT: sete %al
397 ; AVX2-NEXT: vzeroupper
400 ; KNL-LABEL: allzeros_v16i16_sign:
402 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
403 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
404 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
405 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
406 ; KNL-NEXT: kortestw %k0, %k0
408 ; KNL-NEXT: vzeroupper
411 ; SKX-LABEL: allzeros_v16i16_sign:
413 ; SKX-NEXT: vpmovw2m %ymm0, %k0
414 ; SKX-NEXT: kortestw %k0, %k0
416 ; SKX-NEXT: vzeroupper
418 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
419 %tmp1 = bitcast <16 x i1> %tmp to i16
420 %tmp2 = icmp eq i16 %tmp1, 0
424 define i1 @allones_v32i16_sign(<32 x i16> %arg) {
425 ; SSE-LABEL: allones_v32i16_sign:
427 ; SSE-NEXT: packsswb %xmm1, %xmm0
428 ; SSE-NEXT: packsswb %xmm3, %xmm2
429 ; SSE-NEXT: pand %xmm0, %xmm2
430 ; SSE-NEXT: pmovmskb %xmm2, %eax
431 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
435 ; AVX1-LABEL: allones_v32i16_sign:
437 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
438 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
439 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
440 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
441 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
442 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
443 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
444 ; AVX1-NEXT: sete %al
445 ; AVX1-NEXT: vzeroupper
448 ; AVX2-LABEL: allones_v32i16_sign:
450 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
451 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
452 ; AVX2-NEXT: cmpl $-1, %eax
453 ; AVX2-NEXT: sete %al
454 ; AVX2-NEXT: vzeroupper
457 ; KNL-LABEL: allones_v32i16_sign:
459 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
460 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
461 ; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
462 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
463 ; KNL-NEXT: kmovw %k0, %eax
464 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
465 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
466 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
467 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
468 ; KNL-NEXT: kmovw %k0, %ecx
469 ; KNL-NEXT: andl %eax, %ecx
470 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
472 ; KNL-NEXT: vzeroupper
475 ; SKX-LABEL: allones_v32i16_sign:
477 ; SKX-NEXT: vpmovw2m %zmm0, %k0
478 ; SKX-NEXT: kortestd %k0, %k0
480 ; SKX-NEXT: vzeroupper
482 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
483 %tmp1 = bitcast <32 x i1> %tmp to i32
484 %tmp2 = icmp eq i32 %tmp1, -1
488 define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
489 ; SSE-LABEL: allzeros_v32i16_sign:
491 ; SSE-NEXT: packsswb %xmm3, %xmm2
492 ; SSE-NEXT: packsswb %xmm1, %xmm0
493 ; SSE-NEXT: por %xmm2, %xmm0
494 ; SSE-NEXT: pmovmskb %xmm0, %eax
495 ; SSE-NEXT: testl %eax, %eax
499 ; AVX1-LABEL: allzeros_v32i16_sign:
501 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
502 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
503 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
504 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
505 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
506 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
507 ; AVX1-NEXT: testl %eax, %eax
508 ; AVX1-NEXT: sete %al
509 ; AVX1-NEXT: vzeroupper
512 ; AVX2-LABEL: allzeros_v32i16_sign:
514 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
515 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
516 ; AVX2-NEXT: testl %eax, %eax
517 ; AVX2-NEXT: sete %al
518 ; AVX2-NEXT: vzeroupper
521 ; KNL-LABEL: allzeros_v32i16_sign:
523 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
524 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
525 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
526 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
527 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
528 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
529 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
530 ; KNL-NEXT: kortestw %k0, %k0
532 ; KNL-NEXT: vzeroupper
535 ; SKX-LABEL: allzeros_v32i16_sign:
537 ; SKX-NEXT: vpmovw2m %zmm0, %k0
538 ; SKX-NEXT: kortestd %k0, %k0
540 ; SKX-NEXT: vzeroupper
542 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
543 %tmp1 = bitcast <32 x i1> %tmp to i32
544 %tmp2 = icmp eq i32 %tmp1, 0
548 define i1 @allones_v4i32_sign(<4 x i32> %arg) {
549 ; SSE-LABEL: allones_v4i32_sign:
551 ; SSE-NEXT: movmskps %xmm0, %eax
552 ; SSE-NEXT: cmpl $15, %eax
556 ; AVX-LABEL: allones_v4i32_sign:
558 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
559 ; AVX-NEXT: vtestps %xmm1, %xmm0
562 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
563 %tmp1 = bitcast <4 x i1> %tmp to i4
564 %tmp2 = icmp eq i4 %tmp1, -1
568 define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
569 ; SSE-LABEL: allzeros_v4i32_sign:
571 ; SSE-NEXT: movmskps %xmm0, %eax
572 ; SSE-NEXT: testl %eax, %eax
576 ; AVX-LABEL: allzeros_v4i32_sign:
578 ; AVX-NEXT: vtestps %xmm0, %xmm0
581 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
582 %tmp1 = bitcast <4 x i1> %tmp to i4
583 %tmp2 = icmp eq i4 %tmp1, 0
587 define i1 @allones_v8i32_sign(<8 x i32> %arg) {
588 ; SSE-LABEL: allones_v8i32_sign:
590 ; SSE-NEXT: packssdw %xmm1, %xmm0
591 ; SSE-NEXT: packsswb %xmm0, %xmm0
592 ; SSE-NEXT: pmovmskb %xmm0, %eax
593 ; SSE-NEXT: cmpb $-1, %al
597 ; AVX1-LABEL: allones_v8i32_sign:
599 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
600 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
601 ; AVX1-NEXT: vtestps %ymm1, %ymm0
602 ; AVX1-NEXT: setb %al
603 ; AVX1-NEXT: vzeroupper
606 ; AVX2-LABEL: allones_v8i32_sign:
608 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
609 ; AVX2-NEXT: vtestps %ymm1, %ymm0
610 ; AVX2-NEXT: setb %al
611 ; AVX2-NEXT: vzeroupper
614 ; AVX512-LABEL: allones_v8i32_sign:
616 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
617 ; AVX512-NEXT: vtestps %ymm1, %ymm0
618 ; AVX512-NEXT: setb %al
619 ; AVX512-NEXT: vzeroupper
621 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
622 %tmp1 = bitcast <8 x i1> %tmp to i8
623 %tmp2 = icmp eq i8 %tmp1, -1
627 define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
628 ; SSE-LABEL: allzeros_v8i32_sign:
630 ; SSE-NEXT: packssdw %xmm1, %xmm0
631 ; SSE-NEXT: pmovmskb %xmm0, %eax
632 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
636 ; AVX-LABEL: allzeros_v8i32_sign:
638 ; AVX-NEXT: vtestps %ymm0, %ymm0
640 ; AVX-NEXT: vzeroupper
642 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
643 %tmp1 = bitcast <8 x i1> %tmp to i8
644 %tmp2 = icmp eq i8 %tmp1, 0
648 define i1 @allones_v16i32_sign(<16 x i32> %arg) {
649 ; SSE-LABEL: allones_v16i32_sign:
651 ; SSE-NEXT: packssdw %xmm3, %xmm2
652 ; SSE-NEXT: packssdw %xmm1, %xmm0
653 ; SSE-NEXT: packsswb %xmm2, %xmm0
654 ; SSE-NEXT: pmovmskb %xmm0, %eax
655 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
659 ; AVX1-LABEL: allones_v16i32_sign:
661 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
662 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
664 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
665 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
666 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
667 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
668 ; AVX1-NEXT: sete %al
669 ; AVX1-NEXT: vzeroupper
672 ; AVX2-LABEL: allones_v16i32_sign:
674 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
675 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
676 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
677 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
678 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
679 ; AVX2-NEXT: cmpl $-1, %eax
680 ; AVX2-NEXT: sete %al
681 ; AVX2-NEXT: vzeroupper
684 ; KNL-LABEL: allones_v16i32_sign:
686 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
687 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
688 ; KNL-NEXT: kortestw %k0, %k0
690 ; KNL-NEXT: vzeroupper
693 ; SKX-LABEL: allones_v16i32_sign:
695 ; SKX-NEXT: vpmovd2m %zmm0, %k0
696 ; SKX-NEXT: kortestw %k0, %k0
698 ; SKX-NEXT: vzeroupper
700 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
701 %tmp1 = bitcast <16 x i1> %tmp to i16
702 %tmp2 = icmp eq i16 %tmp1, -1
706 define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
707 ; SSE-LABEL: allzeros_v16i32_sign:
709 ; SSE-NEXT: packssdw %xmm3, %xmm2
710 ; SSE-NEXT: packssdw %xmm1, %xmm0
711 ; SSE-NEXT: packsswb %xmm2, %xmm0
712 ; SSE-NEXT: pmovmskb %xmm0, %eax
713 ; SSE-NEXT: testl %eax, %eax
717 ; AVX1-LABEL: allzeros_v16i32_sign:
719 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
720 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
721 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
722 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
723 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
724 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
725 ; AVX1-NEXT: testl %eax, %eax
726 ; AVX1-NEXT: sete %al
727 ; AVX1-NEXT: vzeroupper
730 ; AVX2-LABEL: allzeros_v16i32_sign:
732 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
733 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
734 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
735 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
736 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
737 ; AVX2-NEXT: testl %eax, %eax
738 ; AVX2-NEXT: sete %al
739 ; AVX2-NEXT: vzeroupper
742 ; KNL-LABEL: allzeros_v16i32_sign:
744 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
745 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
746 ; KNL-NEXT: kortestw %k0, %k0
748 ; KNL-NEXT: vzeroupper
751 ; SKX-LABEL: allzeros_v16i32_sign:
753 ; SKX-NEXT: vpmovd2m %zmm0, %k0
754 ; SKX-NEXT: kortestw %k0, %k0
756 ; SKX-NEXT: vzeroupper
758 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
759 %tmp1 = bitcast <16 x i1> %tmp to i16
760 %tmp2 = icmp eq i16 %tmp1, 0
764 define i1 @allones_v4i64_sign(<4 x i64> %arg) {
765 ; SSE-LABEL: allones_v4i64_sign:
767 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
768 ; SSE-NEXT: movmskps %xmm0, %eax
769 ; SSE-NEXT: cmpl $15, %eax
773 ; AVX1-LABEL: allones_v4i64_sign:
775 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
776 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
777 ; AVX1-NEXT: vtestpd %ymm1, %ymm0
778 ; AVX1-NEXT: setb %al
779 ; AVX1-NEXT: vzeroupper
782 ; AVX2-LABEL: allones_v4i64_sign:
784 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
785 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
786 ; AVX2-NEXT: setb %al
787 ; AVX2-NEXT: vzeroupper
790 ; AVX512-LABEL: allones_v4i64_sign:
792 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
793 ; AVX512-NEXT: vtestpd %ymm1, %ymm0
794 ; AVX512-NEXT: setb %al
795 ; AVX512-NEXT: vzeroupper
797 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
798 %tmp1 = bitcast <4 x i1> %tmp to i4
799 %tmp2 = icmp eq i4 %tmp1, -1
803 define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
804 ; SSE-LABEL: allzeros_v4i64_sign:
806 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
807 ; SSE-NEXT: movmskps %xmm0, %eax
808 ; SSE-NEXT: testl %eax, %eax
812 ; AVX-LABEL: allzeros_v4i64_sign:
814 ; AVX-NEXT: vtestpd %ymm0, %ymm0
816 ; AVX-NEXT: vzeroupper
818 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
819 %tmp1 = bitcast <4 x i1> %tmp to i4
820 %tmp2 = icmp eq i4 %tmp1, 0
824 define i1 @allones_v8i64_sign(<8 x i64> %arg) {
825 ; SSE-LABEL: allones_v8i64_sign:
827 ; SSE-NEXT: packssdw %xmm3, %xmm2
828 ; SSE-NEXT: packssdw %xmm1, %xmm0
829 ; SSE-NEXT: packssdw %xmm2, %xmm0
830 ; SSE-NEXT: packsswb %xmm0, %xmm0
831 ; SSE-NEXT: pmovmskb %xmm0, %eax
832 ; SSE-NEXT: cmpb $-1, %al
836 ; AVX1-LABEL: allones_v8i64_sign:
838 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
840 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
841 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
842 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
843 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
844 ; AVX1-NEXT: vtestps %xmm1, %xmm0
845 ; AVX1-NEXT: setb %al
846 ; AVX1-NEXT: vzeroupper
849 ; AVX2-LABEL: allones_v8i64_sign:
851 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
852 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
853 ; AVX2-NEXT: vtestps %ymm1, %ymm0
854 ; AVX2-NEXT: setb %al
855 ; AVX2-NEXT: vzeroupper
858 ; KNL-LABEL: allones_v8i64_sign:
860 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
861 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
862 ; KNL-NEXT: kmovw %k0, %eax
863 ; KNL-NEXT: cmpb $-1, %al
865 ; KNL-NEXT: vzeroupper
868 ; SKX-LABEL: allones_v8i64_sign:
870 ; SKX-NEXT: vpmovq2m %zmm0, %k0
871 ; SKX-NEXT: kortestb %k0, %k0
873 ; SKX-NEXT: vzeroupper
875 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
876 %tmp1 = bitcast <8 x i1> %tmp to i8
877 %tmp2 = icmp eq i8 %tmp1, -1
881 define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
882 ; SSE-LABEL: allzeros_v8i64_sign:
884 ; SSE-NEXT: packssdw %xmm3, %xmm2
885 ; SSE-NEXT: packssdw %xmm1, %xmm0
886 ; SSE-NEXT: packssdw %xmm2, %xmm0
887 ; SSE-NEXT: pmovmskb %xmm0, %eax
888 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
892 ; AVX1-LABEL: allzeros_v8i64_sign:
894 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
896 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
897 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
898 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
899 ; AVX1-NEXT: vtestps %xmm0, %xmm0
900 ; AVX1-NEXT: sete %al
901 ; AVX1-NEXT: vzeroupper
904 ; AVX2-LABEL: allzeros_v8i64_sign:
906 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
907 ; AVX2-NEXT: vtestps %ymm0, %ymm0
908 ; AVX2-NEXT: sete %al
909 ; AVX2-NEXT: vzeroupper
912 ; KNL-LABEL: allzeros_v8i64_sign:
914 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
915 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
916 ; KNL-NEXT: kmovw %k0, %eax
917 ; KNL-NEXT: testb %al, %al
919 ; KNL-NEXT: vzeroupper
922 ; SKX-LABEL: allzeros_v8i64_sign:
924 ; SKX-NEXT: vpmovq2m %zmm0, %k0
925 ; SKX-NEXT: kortestb %k0, %k0
927 ; SKX-NEXT: vzeroupper
929 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
930 %tmp1 = bitcast <8 x i1> %tmp to i8
931 %tmp2 = icmp eq i8 %tmp1, 0
935 define i1 @allones_v16i8_and1(<16 x i8> %arg) {
936 ; SSE-LABEL: allones_v16i8_and1:
938 ; SSE-NEXT: psllw $7, %xmm0
939 ; SSE-NEXT: pmovmskb %xmm0, %eax
940 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
944 ; AVX1OR2-LABEL: allones_v16i8_and1:
946 ; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0
947 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
948 ; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
949 ; AVX1OR2-NEXT: sete %al
952 ; KNL-LABEL: allones_v16i8_and1:
954 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
955 ; KNL-NEXT: vpmovmskb %xmm0, %eax
956 ; KNL-NEXT: cmpl $65535, %eax # imm = 0xFFFF
960 ; SKX-LABEL: allones_v16i8_and1:
962 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
963 ; SKX-NEXT: kortestw %k0, %k0
966 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
967 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
968 %tmp2 = bitcast <16 x i1> %tmp1 to i16
969 %tmp3 = icmp eq i16 %tmp2, -1
973 define i1 @allzeros_v16i8_not(<16 x i8> %a0) {
974 ; SSE2-LABEL: allzeros_v16i8_not:
976 ; SSE2-NEXT: pxor %xmm1, %xmm1
977 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
978 ; SSE2-NEXT: pmovmskb %xmm1, %eax
979 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
980 ; SSE2-NEXT: setne %al
983 ; SSE41-LABEL: allzeros_v16i8_not:
985 ; SSE41-NEXT: ptest %xmm0, %xmm0
986 ; SSE41-NEXT: setne %al
989 ; AVX-LABEL: allzeros_v16i8_not:
991 ; AVX-NEXT: vptest %xmm0, %xmm0
992 ; AVX-NEXT: setne %al
994 %1 = icmp eq <16 x i8> %a0, zeroinitializer
995 %2 = bitcast <16 x i1> %1 to i16
996 %3 = icmp ne i16 %2, -1
1000 define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
1001 ; SSE2-LABEL: allzeros_v2i64_not:
1003 ; SSE2-NEXT: pxor %xmm1, %xmm1
1004 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1005 ; SSE2-NEXT: movmskps %xmm1, %eax
1006 ; SSE2-NEXT: xorl $15, %eax
1007 ; SSE2-NEXT: setne %al
1010 ; SSE41-LABEL: allzeros_v2i64_not:
1012 ; SSE41-NEXT: ptest %xmm0, %xmm0
1013 ; SSE41-NEXT: setne %al
1016 ; AVX-LABEL: allzeros_v2i64_not:
1018 ; AVX-NEXT: vptest %xmm0, %xmm0
1019 ; AVX-NEXT: setne %al
1021 %1 = icmp eq <2 x i64> %a0, zeroinitializer
1022 %2 = bitcast <2 x i1> %1 to i2
1023 %3 = icmp ne i2 %2, -1
1027 define i1 @allzeros_v8i32_not(<8 x i32> %a0) {
1028 ; SSE2-LABEL: allzeros_v8i32_not:
1030 ; SSE2-NEXT: por %xmm1, %xmm0
1031 ; SSE2-NEXT: pxor %xmm1, %xmm1
1032 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1033 ; SSE2-NEXT: movmskps %xmm1, %eax
1034 ; SSE2-NEXT: xorl $15, %eax
1035 ; SSE2-NEXT: setne %al
1038 ; SSE41-LABEL: allzeros_v8i32_not:
1040 ; SSE41-NEXT: por %xmm1, %xmm0
1041 ; SSE41-NEXT: ptest %xmm0, %xmm0
1042 ; SSE41-NEXT: setne %al
1045 ; AVX-LABEL: allzeros_v8i32_not:
1047 ; AVX-NEXT: vptest %ymm0, %ymm0
1048 ; AVX-NEXT: setne %al
1049 ; AVX-NEXT: vzeroupper
1051 %1 = icmp eq <8 x i32> %a0, zeroinitializer
1052 %2 = bitcast <8 x i1> %1 to i8
1053 %3 = icmp ne i8 %2, -1
1057 define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
1058 ; SSE2-LABEL: allzeros_v8i64_not:
1060 ; SSE2-NEXT: por %xmm3, %xmm1
1061 ; SSE2-NEXT: por %xmm2, %xmm0
1062 ; SSE2-NEXT: por %xmm1, %xmm0
1063 ; SSE2-NEXT: pxor %xmm1, %xmm1
1064 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1065 ; SSE2-NEXT: movmskps %xmm1, %eax
1066 ; SSE2-NEXT: xorl $15, %eax
1067 ; SSE2-NEXT: setne %al
1070 ; SSE41-LABEL: allzeros_v8i64_not:
1072 ; SSE41-NEXT: por %xmm3, %xmm1
1073 ; SSE41-NEXT: por %xmm2, %xmm0
1074 ; SSE41-NEXT: por %xmm1, %xmm0
1075 ; SSE41-NEXT: ptest %xmm0, %xmm0
1076 ; SSE41-NEXT: setne %al
1079 ; AVX1-LABEL: allzeros_v8i64_not:
1081 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1082 ; AVX1-NEXT: vptest %ymm0, %ymm0
1083 ; AVX1-NEXT: setne %al
1084 ; AVX1-NEXT: vzeroupper
1087 ; AVX2-LABEL: allzeros_v8i64_not:
1089 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1090 ; AVX2-NEXT: vptest %ymm0, %ymm0
1091 ; AVX2-NEXT: setne %al
1092 ; AVX2-NEXT: vzeroupper
1095 ; AVX512-LABEL: allzeros_v8i64_not:
1097 ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
1098 ; AVX512-NEXT: kortestw %k0, %k0
1099 ; AVX512-NEXT: setne %al
1100 ; AVX512-NEXT: vzeroupper
1102 %1 = icmp eq <8 x i64> %a0, zeroinitializer
1103 %2 = bitcast <8 x i1> %1 to i8
1104 %3 = icmp ne i8 %2, -1
1108 define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
1109 ; SSE2-LABEL: allzeros_v16i8_and1:
1111 ; SSE2-NEXT: psllw $7, %xmm0
1112 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1113 ; SSE2-NEXT: testl %eax, %eax
1114 ; SSE2-NEXT: sete %al
1117 ; SSE41-LABEL: allzeros_v16i8_and1:
1119 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1120 ; SSE41-NEXT: sete %al
1123 ; AVX1OR2-LABEL: allzeros_v16i8_and1:
1125 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1126 ; AVX1OR2-NEXT: sete %al
1127 ; AVX1OR2-NEXT: retq
1129 ; KNL-LABEL: allzeros_v16i8_and1:
1131 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1132 ; KNL-NEXT: sete %al
1135 ; SKX-LABEL: allzeros_v16i8_and1:
1137 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
1138 ; SKX-NEXT: vptest %xmm1, %xmm0
1139 ; SKX-NEXT: sete %al
1141 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1142 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1143 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1144 %tmp3 = icmp eq i16 %tmp2, 0
1148 define i1 @allones_v32i8_and1(<32 x i8> %arg) {
1149 ; SSE-LABEL: allones_v32i8_and1:
1151 ; SSE-NEXT: pand %xmm1, %xmm0
1152 ; SSE-NEXT: psllw $7, %xmm0
1153 ; SSE-NEXT: pmovmskb %xmm0, %eax
1154 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1155 ; SSE-NEXT: sete %al
1158 ; AVX1-LABEL: allones_v32i8_and1:
1160 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1161 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1162 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1163 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1164 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1165 ; AVX1-NEXT: sete %al
1166 ; AVX1-NEXT: vzeroupper
1169 ; AVX2-LABEL: allones_v32i8_and1:
1171 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1172 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1173 ; AVX2-NEXT: cmpl $-1, %eax
1174 ; AVX2-NEXT: sete %al
1175 ; AVX2-NEXT: vzeroupper
1178 ; KNL-LABEL: allones_v32i8_and1:
1180 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1181 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1182 ; KNL-NEXT: cmpl $-1, %eax
1183 ; KNL-NEXT: sete %al
1184 ; KNL-NEXT: vzeroupper
1187 ; SKX-LABEL: allones_v32i8_and1:
1189 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1190 ; SKX-NEXT: kortestd %k0, %k0
1191 ; SKX-NEXT: setb %al
1192 ; SKX-NEXT: vzeroupper
1194 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1195 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1196 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1197 %tmp3 = icmp eq i32 %tmp2, -1
1201 define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
1202 ; SSE2-LABEL: allzeros_v32i8_and1:
1204 ; SSE2-NEXT: por %xmm1, %xmm0
1205 ; SSE2-NEXT: psllw $7, %xmm0
1206 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1207 ; SSE2-NEXT: testl %eax, %eax
1208 ; SSE2-NEXT: sete %al
1211 ; SSE41-LABEL: allzeros_v32i8_and1:
1213 ; SSE41-NEXT: por %xmm1, %xmm0
1214 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1215 ; SSE41-NEXT: sete %al
1218 ; AVX1-LABEL: allzeros_v32i8_and1:
1220 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1221 ; AVX1-NEXT: sete %al
1222 ; AVX1-NEXT: vzeroupper
1225 ; AVX2-LABEL: allzeros_v32i8_and1:
1227 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1228 ; AVX2-NEXT: vptest %ymm1, %ymm0
1229 ; AVX2-NEXT: sete %al
1230 ; AVX2-NEXT: vzeroupper
1233 ; AVX512-LABEL: allzeros_v32i8_and1:
1235 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1236 ; AVX512-NEXT: vptest %ymm1, %ymm0
1237 ; AVX512-NEXT: sete %al
1238 ; AVX512-NEXT: vzeroupper
1240 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1241 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1242 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1243 %tmp3 = icmp eq i32 %tmp2, 0
1247 define i1 @allones_v64i8_and1(<64 x i8> %arg) {
1248 ; SSE-LABEL: allones_v64i8_and1:
1250 ; SSE-NEXT: pand %xmm2, %xmm0
1251 ; SSE-NEXT: pand %xmm3, %xmm1
1252 ; SSE-NEXT: pand %xmm0, %xmm1
1253 ; SSE-NEXT: psllw $7, %xmm1
1254 ; SSE-NEXT: pmovmskb %xmm1, %eax
1255 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1256 ; SSE-NEXT: sete %al
1259 ; AVX1-LABEL: allones_v64i8_and1:
1261 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1262 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1263 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
1264 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1265 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
1266 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1267 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1268 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1269 ; AVX1-NEXT: sete %al
1270 ; AVX1-NEXT: vzeroupper
1273 ; AVX2-LABEL: allones_v64i8_and1:
1275 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1276 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1277 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1278 ; AVX2-NEXT: cmpl $-1, %eax
1279 ; AVX2-NEXT: sete %al
1280 ; AVX2-NEXT: vzeroupper
1283 ; KNL-LABEL: allones_v64i8_and1:
1285 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1286 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
1287 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1288 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1289 ; KNL-NEXT: cmpl $-1, %eax
1290 ; KNL-NEXT: sete %al
1291 ; KNL-NEXT: vzeroupper
1294 ; SKX-LABEL: allones_v64i8_and1:
1296 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1297 ; SKX-NEXT: kortestq %k0, %k0
1298 ; SKX-NEXT: setb %al
1299 ; SKX-NEXT: vzeroupper
1301 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1302 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1303 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1304 %tmp3 = icmp eq i64 %tmp2, -1
1308 define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
1309 ; SSE2-LABEL: allzeros_v64i8_and1:
1311 ; SSE2-NEXT: por %xmm3, %xmm1
1312 ; SSE2-NEXT: por %xmm2, %xmm0
1313 ; SSE2-NEXT: por %xmm1, %xmm0
1314 ; SSE2-NEXT: psllw $7, %xmm0
1315 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1316 ; SSE2-NEXT: testl %eax, %eax
1317 ; SSE2-NEXT: sete %al
1320 ; SSE41-LABEL: allzeros_v64i8_and1:
1322 ; SSE41-NEXT: por %xmm3, %xmm1
1323 ; SSE41-NEXT: por %xmm2, %xmm0
1324 ; SSE41-NEXT: por %xmm1, %xmm0
1325 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1326 ; SSE41-NEXT: sete %al
1329 ; AVX1-LABEL: allzeros_v64i8_and1:
1331 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1332 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1333 ; AVX1-NEXT: sete %al
1334 ; AVX1-NEXT: vzeroupper
1337 ; AVX2-LABEL: allzeros_v64i8_and1:
1339 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1340 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1341 ; AVX2-NEXT: vptest %ymm1, %ymm0
1342 ; AVX2-NEXT: sete %al
1343 ; AVX2-NEXT: vzeroupper
1346 ; AVX512-LABEL: allzeros_v64i8_and1:
1348 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1349 ; AVX512-NEXT: kortestw %k0, %k0
1350 ; AVX512-NEXT: sete %al
1351 ; AVX512-NEXT: vzeroupper
1353 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1354 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1355 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1356 %tmp3 = icmp eq i64 %tmp2, 0
1360 define i1 @allones_v8i16_and1(<8 x i16> %arg) {
1361 ; SSE-LABEL: allones_v8i16_and1:
1363 ; SSE-NEXT: psllw $15, %xmm0
1364 ; SSE-NEXT: packsswb %xmm0, %xmm0
1365 ; SSE-NEXT: pmovmskb %xmm0, %eax
1366 ; SSE-NEXT: cmpb $-1, %al
1367 ; SSE-NEXT: sete %al
1370 ; AVX1OR2-LABEL: allones_v8i16_and1:
1372 ; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0
1373 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1374 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1375 ; AVX1OR2-NEXT: cmpb $-1, %al
1376 ; AVX1OR2-NEXT: sete %al
1377 ; AVX1OR2-NEXT: retq
1379 ; KNL-LABEL: allones_v8i16_and1:
1381 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1382 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1383 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1384 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1385 ; KNL-NEXT: kmovw %k0, %eax
1386 ; KNL-NEXT: cmpb $-1, %al
1387 ; KNL-NEXT: sete %al
1388 ; KNL-NEXT: vzeroupper
1391 ; SKX-LABEL: allones_v8i16_and1:
1393 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1394 ; SKX-NEXT: kortestb %k0, %k0
1395 ; SKX-NEXT: setb %al
1397 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1398 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1399 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1400 %tmp3 = icmp eq i8 %tmp2, -1
1404 define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
1405 ; SSE2-LABEL: allzeros_v8i16_and1:
1407 ; SSE2-NEXT: psllw $7, %xmm0
1408 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1409 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
1410 ; SSE2-NEXT: sete %al
1413 ; SSE41-LABEL: allzeros_v8i16_and1:
1415 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1416 ; SSE41-NEXT: sete %al
1419 ; AVX1OR2-LABEL: allzeros_v8i16_and1:
1421 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1422 ; AVX1OR2-NEXT: sete %al
1423 ; AVX1OR2-NEXT: retq
1425 ; KNL-LABEL: allzeros_v8i16_and1:
1427 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1428 ; KNL-NEXT: sete %al
1431 ; SKX-LABEL: allzeros_v8i16_and1:
1433 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
1434 ; SKX-NEXT: vptest %xmm1, %xmm0
1435 ; SKX-NEXT: sete %al
1437 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1438 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1439 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1440 %tmp3 = icmp eq i8 %tmp2, 0
1444 define i1 @allones_v16i16_and1(<16 x i16> %arg) {
1445 ; SSE-LABEL: allones_v16i16_and1:
1447 ; SSE-NEXT: psllw $15, %xmm1
1448 ; SSE-NEXT: psllw $15, %xmm0
1449 ; SSE-NEXT: packsswb %xmm1, %xmm0
1450 ; SSE-NEXT: pmovmskb %xmm0, %eax
1451 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1452 ; SSE-NEXT: sete %al
1455 ; AVX1-LABEL: allones_v16i16_and1:
1457 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1458 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1459 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1460 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1461 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1462 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1463 ; AVX1-NEXT: sete %al
1464 ; AVX1-NEXT: vzeroupper
1467 ; AVX2-LABEL: allones_v16i16_and1:
1469 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1470 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1471 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1472 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1473 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1474 ; AVX2-NEXT: sete %al
1475 ; AVX2-NEXT: vzeroupper
1478 ; KNL-LABEL: allones_v16i16_and1:
1480 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1481 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1482 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1483 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1484 ; KNL-NEXT: kortestw %k0, %k0
1485 ; KNL-NEXT: setb %al
1486 ; KNL-NEXT: vzeroupper
1489 ; SKX-LABEL: allones_v16i16_and1:
1491 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1492 ; SKX-NEXT: kortestw %k0, %k0
1493 ; SKX-NEXT: setb %al
1494 ; SKX-NEXT: vzeroupper
1496 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1497 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1498 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1499 %tmp3 = icmp eq i16 %tmp2, -1
1503 define i1 @allones_v32i16_and1(<32 x i16> %arg) {
1504 ; SSE-LABEL: allones_v32i16_and1:
1506 ; SSE-NEXT: pand %xmm3, %xmm1
1507 ; SSE-NEXT: psllw $15, %xmm1
1508 ; SSE-NEXT: pand %xmm2, %xmm0
1509 ; SSE-NEXT: psllw $15, %xmm0
1510 ; SSE-NEXT: packsswb %xmm1, %xmm0
1511 ; SSE-NEXT: pmovmskb %xmm0, %eax
1512 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1513 ; SSE-NEXT: sete %al
1516 ; AVX1-LABEL: allones_v32i16_and1:
1518 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1519 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1520 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
1521 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1522 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1523 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1524 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1525 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1526 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1527 ; AVX1-NEXT: sete %al
1528 ; AVX1-NEXT: vzeroupper
1531 ; AVX2-LABEL: allones_v32i16_and1:
1533 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1534 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1535 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1536 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1537 ; AVX2-NEXT: cmpl $-1, %eax
1538 ; AVX2-NEXT: sete %al
1539 ; AVX2-NEXT: vzeroupper
1542 ; KNL-LABEL: allones_v32i16_and1:
1544 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm1
1545 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1546 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
1547 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1548 ; KNL-NEXT: kmovw %k0, %eax
1549 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1550 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1551 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1552 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1553 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1554 ; KNL-NEXT: kmovw %k0, %ecx
1555 ; KNL-NEXT: andl %eax, %ecx
1556 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1557 ; KNL-NEXT: sete %al
1558 ; KNL-NEXT: vzeroupper
1561 ; SKX-LABEL: allones_v32i16_and1:
1563 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1564 ; SKX-NEXT: kortestd %k0, %k0
1565 ; SKX-NEXT: setb %al
1566 ; SKX-NEXT: vzeroupper
1568 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1569 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1570 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1571 %tmp3 = icmp eq i32 %tmp2, -1
1575 define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
1576 ; SSE2-LABEL: allzeros_v32i16_and1:
1578 ; SSE2-NEXT: por %xmm3, %xmm1
1579 ; SSE2-NEXT: por %xmm2, %xmm0
1580 ; SSE2-NEXT: por %xmm1, %xmm0
1581 ; SSE2-NEXT: psllw $7, %xmm0
1582 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1583 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
1584 ; SSE2-NEXT: sete %al
1587 ; SSE41-LABEL: allzeros_v32i16_and1:
1589 ; SSE41-NEXT: por %xmm3, %xmm1
1590 ; SSE41-NEXT: por %xmm2, %xmm0
1591 ; SSE41-NEXT: por %xmm1, %xmm0
1592 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1593 ; SSE41-NEXT: sete %al
1596 ; AVX1-LABEL: allzeros_v32i16_and1:
1598 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1599 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1600 ; AVX1-NEXT: sete %al
1601 ; AVX1-NEXT: vzeroupper
1604 ; AVX2-LABEL: allzeros_v32i16_and1:
1606 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1607 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
1608 ; AVX2-NEXT: vptest %ymm1, %ymm0
1609 ; AVX2-NEXT: sete %al
1610 ; AVX2-NEXT: vzeroupper
1613 ; AVX512-LABEL: allzeros_v32i16_and1:
1615 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1616 ; AVX512-NEXT: kortestw %k0, %k0
1617 ; AVX512-NEXT: sete %al
1618 ; AVX512-NEXT: vzeroupper
1620 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1621 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1622 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1623 %tmp3 = icmp eq i32 %tmp2, 0
1627 define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
1628 ; SSE2-LABEL: allzeros_v16i16_and1:
1630 ; SSE2-NEXT: por %xmm1, %xmm0
1631 ; SSE2-NEXT: psllw $7, %xmm0
1632 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1633 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
1634 ; SSE2-NEXT: sete %al
1637 ; SSE41-LABEL: allzeros_v16i16_and1:
1639 ; SSE41-NEXT: por %xmm1, %xmm0
1640 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1641 ; SSE41-NEXT: sete %al
1644 ; AVX1-LABEL: allzeros_v16i16_and1:
1646 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1647 ; AVX1-NEXT: sete %al
1648 ; AVX1-NEXT: vzeroupper
1651 ; AVX2-LABEL: allzeros_v16i16_and1:
1653 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
1654 ; AVX2-NEXT: vptest %ymm1, %ymm0
1655 ; AVX2-NEXT: sete %al
1656 ; AVX2-NEXT: vzeroupper
1659 ; AVX512-LABEL: allzeros_v16i16_and1:
1661 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
1662 ; AVX512-NEXT: vptest %ymm1, %ymm0
1663 ; AVX512-NEXT: sete %al
1664 ; AVX512-NEXT: vzeroupper
1666 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1667 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1668 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1669 %tmp3 = icmp eq i16 %tmp2, 0
1673 define i1 @allones_v4i32_and1(<4 x i32> %arg) {
1674 ; SSE-LABEL: allones_v4i32_and1:
1676 ; SSE-NEXT: pslld $31, %xmm0
1677 ; SSE-NEXT: movmskps %xmm0, %eax
1678 ; SSE-NEXT: cmpl $15, %eax
1679 ; SSE-NEXT: sete %al
1682 ; AVX1OR2-LABEL: allones_v4i32_and1:
1684 ; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0
1685 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1686 ; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
1687 ; AVX1OR2-NEXT: setb %al
1688 ; AVX1OR2-NEXT: retq
1690 ; KNL-LABEL: allones_v4i32_and1:
1692 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1693 ; KNL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1694 ; KNL-NEXT: kmovw %k0, %eax
1695 ; KNL-NEXT: testb $15, %al
1696 ; KNL-NEXT: sete %al
1697 ; KNL-NEXT: vzeroupper
1700 ; SKX-LABEL: allones_v4i32_and1:
1702 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
1703 ; SKX-NEXT: kmovd %k0, %eax
1704 ; SKX-NEXT: cmpb $15, %al
1705 ; SKX-NEXT: sete %al
1707 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1708 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1709 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1710 %tmp3 = icmp eq i4 %tmp2, -1
1714 define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
1715 ; SSE2-LABEL: allzeros_v4i32_and1:
1717 ; SSE2-NEXT: pslld $31, %xmm0
1718 ; SSE2-NEXT: movmskps %xmm0, %eax
1719 ; SSE2-NEXT: testl %eax, %eax
1720 ; SSE2-NEXT: sete %al
1723 ; SSE41-LABEL: allzeros_v4i32_and1:
1725 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1726 ; SSE41-NEXT: sete %al
1729 ; AVX1OR2-LABEL: allzeros_v4i32_and1:
1731 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1732 ; AVX1OR2-NEXT: sete %al
1733 ; AVX1OR2-NEXT: retq
1735 ; KNL-LABEL: allzeros_v4i32_and1:
1737 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1738 ; KNL-NEXT: sete %al
1741 ; SKX-LABEL: allzeros_v4i32_and1:
1743 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
1744 ; SKX-NEXT: vptest %xmm1, %xmm0
1745 ; SKX-NEXT: sete %al
1747 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1748 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1749 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1750 %tmp3 = icmp eq i4 %tmp2, 0
1754 define i1 @allones_v8i32_and1(<8 x i32> %arg) {
1755 ; SSE-LABEL: allones_v8i32_and1:
1757 ; SSE-NEXT: pslld $31, %xmm1
1758 ; SSE-NEXT: pslld $31, %xmm0
1759 ; SSE-NEXT: packssdw %xmm1, %xmm0
1760 ; SSE-NEXT: packsswb %xmm0, %xmm0
1761 ; SSE-NEXT: pmovmskb %xmm0, %eax
1762 ; SSE-NEXT: cmpb $-1, %al
1763 ; SSE-NEXT: sete %al
1766 ; AVX1-LABEL: allones_v8i32_and1:
1768 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1769 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1770 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1771 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1772 ; AVX1-NEXT: vtestps %xmm1, %xmm0
1773 ; AVX1-NEXT: setb %al
1774 ; AVX1-NEXT: vzeroupper
1777 ; AVX2-LABEL: allones_v8i32_and1:
1779 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1780 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1781 ; AVX2-NEXT: vtestps %ymm1, %ymm0
1782 ; AVX2-NEXT: setb %al
1783 ; AVX2-NEXT: vzeroupper
1786 ; KNL-LABEL: allones_v8i32_and1:
1788 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1789 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1790 ; KNL-NEXT: kmovw %k0, %eax
1791 ; KNL-NEXT: cmpb $-1, %al
1792 ; KNL-NEXT: sete %al
1793 ; KNL-NEXT: vzeroupper
1796 ; SKX-LABEL: allones_v8i32_and1:
1798 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
1799 ; SKX-NEXT: kortestb %k0, %k0
1800 ; SKX-NEXT: setb %al
1801 ; SKX-NEXT: vzeroupper
1803 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1804 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
1805 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1806 %tmp3 = icmp eq i8 %tmp2, -1
1810 define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
1811 ; SSE2-LABEL: allzeros_v8i32_and1:
1813 ; SSE2-NEXT: por %xmm1, %xmm0
1814 ; SSE2-NEXT: pslld $31, %xmm0
1815 ; SSE2-NEXT: movmskps %xmm0, %eax
1816 ; SSE2-NEXT: testl %eax, %eax
1817 ; SSE2-NEXT: sete %al
1820 ; SSE41-LABEL: allzeros_v8i32_and1:
1822 ; SSE41-NEXT: por %xmm1, %xmm0
1823 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1824 ; SSE41-NEXT: sete %al
1827 ; AVX1-LABEL: allzeros_v8i32_and1:
1829 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1830 ; AVX1-NEXT: sete %al
1831 ; AVX1-NEXT: vzeroupper
1834 ; AVX2-LABEL: allzeros_v8i32_and1:
1836 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
1837 ; AVX2-NEXT: vptest %ymm1, %ymm0
1838 ; AVX2-NEXT: sete %al
1839 ; AVX2-NEXT: vzeroupper
1842 ; AVX512-LABEL: allzeros_v8i32_and1:
1844 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
1845 ; AVX512-NEXT: vptest %ymm1, %ymm0
1846 ; AVX512-NEXT: sete %al
1847 ; AVX512-NEXT: vzeroupper
1849 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1850 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
1851 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1852 %tmp3 = icmp eq i8 %tmp2, 0
1856 define i1 @allones_v16i32_and1(<16 x i32> %arg) {
1857 ; SSE-LABEL: allones_v16i32_and1:
1859 ; SSE-NEXT: pslld $31, %xmm3
1860 ; SSE-NEXT: pslld $31, %xmm2
1861 ; SSE-NEXT: packssdw %xmm3, %xmm2
1862 ; SSE-NEXT: pslld $31, %xmm1
1863 ; SSE-NEXT: pslld $31, %xmm0
1864 ; SSE-NEXT: packssdw %xmm1, %xmm0
1865 ; SSE-NEXT: packsswb %xmm2, %xmm0
1866 ; SSE-NEXT: pmovmskb %xmm0, %eax
1867 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1868 ; SSE-NEXT: sete %al
1871 ; AVX1-LABEL: allones_v16i32_and1:
1873 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1874 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
1875 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
1876 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1877 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1878 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
1879 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1880 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1881 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1882 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1883 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1884 ; AVX1-NEXT: sete %al
1885 ; AVX1-NEXT: vzeroupper
1888 ; AVX2-LABEL: allones_v16i32_and1:
1890 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
1891 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
1892 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1893 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
1894 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1895 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1896 ; AVX2-NEXT: cmpl $-1, %eax
1897 ; AVX2-NEXT: sete %al
1898 ; AVX2-NEXT: vzeroupper
1901 ; AVX512-LABEL: allones_v16i32_and1:
1903 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1904 ; AVX512-NEXT: kortestw %k0, %k0
1905 ; AVX512-NEXT: setb %al
1906 ; AVX512-NEXT: vzeroupper
1908 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1909 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
1910 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1911 %tmp3 = icmp eq i16 %tmp2, -1
1915 define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
1916 ; SSE2-LABEL: allzeros_v16i32_and1:
1918 ; SSE2-NEXT: por %xmm3, %xmm1
1919 ; SSE2-NEXT: por %xmm2, %xmm0
1920 ; SSE2-NEXT: por %xmm1, %xmm0
1921 ; SSE2-NEXT: pslld $31, %xmm0
1922 ; SSE2-NEXT: movmskps %xmm0, %eax
1923 ; SSE2-NEXT: testl %eax, %eax
1924 ; SSE2-NEXT: sete %al
1927 ; SSE41-LABEL: allzeros_v16i32_and1:
1929 ; SSE41-NEXT: por %xmm3, %xmm1
1930 ; SSE41-NEXT: por %xmm2, %xmm0
1931 ; SSE41-NEXT: por %xmm1, %xmm0
1932 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1933 ; SSE41-NEXT: sete %al
1936 ; AVX1-LABEL: allzeros_v16i32_and1:
1938 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1939 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1940 ; AVX1-NEXT: sete %al
1941 ; AVX1-NEXT: vzeroupper
1944 ; AVX2-LABEL: allzeros_v16i32_and1:
1946 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1947 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
1948 ; AVX2-NEXT: vptest %ymm1, %ymm0
1949 ; AVX2-NEXT: sete %al
1950 ; AVX2-NEXT: vzeroupper
1953 ; AVX512-LABEL: allzeros_v16i32_and1:
1955 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1956 ; AVX512-NEXT: kortestw %k0, %k0
1957 ; AVX512-NEXT: sete %al
1958 ; AVX512-NEXT: vzeroupper
1960 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1961 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
1962 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1963 %tmp3 = icmp eq i16 %tmp2, 0
1967 define i1 @allones_v2i64_and1(<2 x i64> %arg) {
1968 ; SSE-LABEL: allones_v2i64_and1:
1970 ; SSE-NEXT: psllq $63, %xmm0
1971 ; SSE-NEXT: movmskpd %xmm0, %eax
1972 ; SSE-NEXT: cmpl $3, %eax
1973 ; SSE-NEXT: sete %al
1976 ; AVX1OR2-LABEL: allones_v2i64_and1:
1978 ; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0
1979 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1980 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
1981 ; AVX1OR2-NEXT: setb %al
1982 ; AVX1OR2-NEXT: retq
1984 ; KNL-LABEL: allones_v2i64_and1:
1986 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1987 ; KNL-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,1]
1988 ; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
1989 ; KNL-NEXT: kmovw %k0, %eax
1990 ; KNL-NEXT: testb $3, %al
1991 ; KNL-NEXT: sete %al
1992 ; KNL-NEXT: vzeroupper
1995 ; SKX-LABEL: allones_v2i64_and1:
1997 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
1998 ; SKX-NEXT: kmovd %k0, %eax
1999 ; SKX-NEXT: cmpb $3, %al
2000 ; SKX-NEXT: sete %al
2002 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2003 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2004 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2005 %tmp3 = icmp eq i2 %tmp2, -1
2009 define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
2010 ; SSE2-LABEL: allzeros_v2i64_and1:
2012 ; SSE2-NEXT: pslld $31, %xmm0
2013 ; SSE2-NEXT: movmskps %xmm0, %eax
2014 ; SSE2-NEXT: testb $5, %al
2015 ; SSE2-NEXT: sete %al
2018 ; SSE41-LABEL: allzeros_v2i64_and1:
2020 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2021 ; SSE41-NEXT: sete %al
2024 ; AVX1OR2-LABEL: allzeros_v2i64_and1:
2026 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2027 ; AVX1OR2-NEXT: sete %al
2028 ; AVX1OR2-NEXT: retq
2030 ; KNL-LABEL: allzeros_v2i64_and1:
2032 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2033 ; KNL-NEXT: sete %al
2036 ; SKX-LABEL: allzeros_v2i64_and1:
2038 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
2039 ; SKX-NEXT: vptest %xmm1, %xmm0
2040 ; SKX-NEXT: sete %al
2042 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2043 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2044 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2045 %tmp3 = icmp eq i2 %tmp2, 0
2049 define i1 @allones_v4i64_and1(<4 x i64> %arg) {
2050 ; SSE-LABEL: allones_v4i64_and1:
2052 ; SSE-NEXT: psllq $63, %xmm1
2053 ; SSE-NEXT: psllq $63, %xmm0
2054 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
2055 ; SSE-NEXT: movmskps %xmm0, %eax
2056 ; SSE-NEXT: cmpl $15, %eax
2057 ; SSE-NEXT: sete %al
2060 ; AVX1-LABEL: allones_v4i64_and1:
2062 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2063 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2064 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2065 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2066 ; AVX1-NEXT: vtestpd %xmm1, %xmm0
2067 ; AVX1-NEXT: setb %al
2068 ; AVX1-NEXT: vzeroupper
2071 ; AVX2-LABEL: allones_v4i64_and1:
2073 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2074 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
2075 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
2076 ; AVX2-NEXT: setb %al
2077 ; AVX2-NEXT: vzeroupper
2080 ; KNL-LABEL: allones_v4i64_and1:
2082 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2083 ; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2084 ; KNL-NEXT: kmovw %k0, %eax
2085 ; KNL-NEXT: testb $15, %al
2086 ; KNL-NEXT: sete %al
2087 ; KNL-NEXT: vzeroupper
2090 ; SKX-LABEL: allones_v4i64_and1:
2092 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
2093 ; SKX-NEXT: kmovd %k0, %eax
2094 ; SKX-NEXT: cmpb $15, %al
2095 ; SKX-NEXT: sete %al
2096 ; SKX-NEXT: vzeroupper
2098 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2099 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2100 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2101 %tmp3 = icmp eq i4 %tmp2, -1
2105 define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
2106 ; SSE2-LABEL: allzeros_v4i64_and1:
2108 ; SSE2-NEXT: por %xmm1, %xmm0
2109 ; SSE2-NEXT: pslld $31, %xmm0
2110 ; SSE2-NEXT: movmskps %xmm0, %eax
2111 ; SSE2-NEXT: testb $5, %al
2112 ; SSE2-NEXT: sete %al
2115 ; SSE41-LABEL: allzeros_v4i64_and1:
2117 ; SSE41-NEXT: por %xmm1, %xmm0
2118 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2119 ; SSE41-NEXT: sete %al
2122 ; AVX1-LABEL: allzeros_v4i64_and1:
2124 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2125 ; AVX1-NEXT: sete %al
2126 ; AVX1-NEXT: vzeroupper
2129 ; AVX2-LABEL: allzeros_v4i64_and1:
2131 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2132 ; AVX2-NEXT: vptest %ymm1, %ymm0
2133 ; AVX2-NEXT: sete %al
2134 ; AVX2-NEXT: vzeroupper
2137 ; AVX512-LABEL: allzeros_v4i64_and1:
2139 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2140 ; AVX512-NEXT: vptest %ymm1, %ymm0
2141 ; AVX512-NEXT: sete %al
2142 ; AVX512-NEXT: vzeroupper
2144 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2145 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2146 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2147 %tmp3 = icmp eq i4 %tmp2, 0
2151 define i1 @allones_v8i64_and1(<8 x i64> %arg) {
2152 ; SSE-LABEL: allones_v8i64_and1:
2154 ; SSE-NEXT: psllq $63, %xmm3
2155 ; SSE-NEXT: psllq $63, %xmm2
2156 ; SSE-NEXT: packssdw %xmm3, %xmm2
2157 ; SSE-NEXT: psllq $63, %xmm1
2158 ; SSE-NEXT: psllq $63, %xmm0
2159 ; SSE-NEXT: packssdw %xmm1, %xmm0
2160 ; SSE-NEXT: packssdw %xmm2, %xmm0
2161 ; SSE-NEXT: packsswb %xmm0, %xmm0
2162 ; SSE-NEXT: pmovmskb %xmm0, %eax
2163 ; SSE-NEXT: cmpb $-1, %al
2164 ; SSE-NEXT: sete %al
2167 ; AVX1-LABEL: allones_v8i64_and1:
2169 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm2
2170 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3
2171 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
2172 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2173 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2175 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2176 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2177 ; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm0
2178 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2179 ; AVX1-NEXT: vtestps %xmm1, %xmm0
2180 ; AVX1-NEXT: setb %al
2181 ; AVX1-NEXT: vzeroupper
2184 ; AVX2-LABEL: allones_v8i64_and1:
2186 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2187 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2188 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2189 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
2190 ; AVX2-NEXT: vtestps %ymm1, %ymm0
2191 ; AVX2-NEXT: setb %al
2192 ; AVX2-NEXT: vzeroupper
2195 ; KNL-LABEL: allones_v8i64_and1:
2197 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2198 ; KNL-NEXT: kmovw %k0, %eax
2199 ; KNL-NEXT: cmpb $-1, %al
2200 ; KNL-NEXT: sete %al
2201 ; KNL-NEXT: vzeroupper
2204 ; SKX-LABEL: allones_v8i64_and1:
2206 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2207 ; SKX-NEXT: kortestb %k0, %k0
2208 ; SKX-NEXT: setb %al
2209 ; SKX-NEXT: vzeroupper
2211 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2212 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2213 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2214 %tmp3 = icmp eq i8 %tmp2, -1
2218 define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
2219 ; SSE2-LABEL: allzeros_v8i64_and1:
2221 ; SSE2-NEXT: por %xmm3, %xmm1
2222 ; SSE2-NEXT: por %xmm2, %xmm0
2223 ; SSE2-NEXT: por %xmm1, %xmm0
2224 ; SSE2-NEXT: pslld $31, %xmm0
2225 ; SSE2-NEXT: movmskps %xmm0, %eax
2226 ; SSE2-NEXT: testb $5, %al
2227 ; SSE2-NEXT: sete %al
2230 ; SSE41-LABEL: allzeros_v8i64_and1:
2232 ; SSE41-NEXT: por %xmm3, %xmm1
2233 ; SSE41-NEXT: por %xmm2, %xmm0
2234 ; SSE41-NEXT: por %xmm1, %xmm0
2235 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2236 ; SSE41-NEXT: sete %al
2239 ; AVX1-LABEL: allzeros_v8i64_and1:
2241 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2242 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2243 ; AVX1-NEXT: sete %al
2244 ; AVX1-NEXT: vzeroupper
2247 ; AVX2-LABEL: allzeros_v8i64_and1:
2249 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2250 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2251 ; AVX2-NEXT: vptest %ymm1, %ymm0
2252 ; AVX2-NEXT: sete %al
2253 ; AVX2-NEXT: vzeroupper
2256 ; AVX512-LABEL: allzeros_v8i64_and1:
2258 ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
2259 ; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
2260 ; AVX512-NEXT: kortestw %k0, %k0
2261 ; AVX512-NEXT: sete %al
2262 ; AVX512-NEXT: vzeroupper
2264 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2265 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2266 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2267 %tmp3 = icmp eq i8 %tmp2, 0
2271 define i1 @allones_v16i8_and4(<16 x i8> %arg) {
2272 ; SSE-LABEL: allones_v16i8_and4:
2274 ; SSE-NEXT: psllw $5, %xmm0
2275 ; SSE-NEXT: pmovmskb %xmm0, %eax
2276 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2277 ; SSE-NEXT: sete %al
2280 ; AVX1OR2-LABEL: allones_v16i8_and4:
2282 ; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0
2283 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2284 ; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2285 ; AVX1OR2-NEXT: sete %al
2286 ; AVX1OR2-NEXT: retq
2288 ; KNL-LABEL: allones_v16i8_and4:
2290 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2291 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2292 ; KNL-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2293 ; KNL-NEXT: sete %al
2296 ; SKX-LABEL: allones_v16i8_and4:
2298 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2299 ; SKX-NEXT: kortestw %k0, %k0
2300 ; SKX-NEXT: setb %al
2302 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2303 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2304 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2305 %tmp3 = icmp eq i16 %tmp2, -1
2309 define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
2310 ; SSE2-LABEL: allzeros_v16i8_and4:
2312 ; SSE2-NEXT: psllw $5, %xmm0
2313 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2314 ; SSE2-NEXT: testl %eax, %eax
2315 ; SSE2-NEXT: sete %al
2318 ; SSE41-LABEL: allzeros_v16i8_and4:
2320 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2321 ; SSE41-NEXT: sete %al
2324 ; AVX1OR2-LABEL: allzeros_v16i8_and4:
2326 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2327 ; AVX1OR2-NEXT: sete %al
2328 ; AVX1OR2-NEXT: retq
2330 ; KNL-LABEL: allzeros_v16i8_and4:
2332 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2333 ; KNL-NEXT: sete %al
2336 ; SKX-LABEL: allzeros_v16i8_and4:
2338 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [289360691352306692,289360691352306692]
2339 ; SKX-NEXT: vptest %xmm1, %xmm0
2340 ; SKX-NEXT: sete %al
2342 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2343 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2344 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2345 %tmp3 = icmp eq i16 %tmp2, 0
2349 define i1 @allones_v32i8_and4(<32 x i8> %arg) {
2350 ; SSE-LABEL: allones_v32i8_and4:
2352 ; SSE-NEXT: pand %xmm1, %xmm0
2353 ; SSE-NEXT: psllw $5, %xmm0
2354 ; SSE-NEXT: pmovmskb %xmm0, %eax
2355 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2356 ; SSE-NEXT: sete %al
2359 ; AVX1-LABEL: allones_v32i8_and4:
2361 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2362 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2363 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2364 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2365 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2366 ; AVX1-NEXT: sete %al
2367 ; AVX1-NEXT: vzeroupper
2370 ; AVX2-LABEL: allones_v32i8_and4:
2372 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2373 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2374 ; AVX2-NEXT: cmpl $-1, %eax
2375 ; AVX2-NEXT: sete %al
2376 ; AVX2-NEXT: vzeroupper
2379 ; KNL-LABEL: allones_v32i8_and4:
2381 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2382 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2383 ; KNL-NEXT: cmpl $-1, %eax
2384 ; KNL-NEXT: sete %al
2385 ; KNL-NEXT: vzeroupper
2388 ; SKX-LABEL: allones_v32i8_and4:
2390 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2391 ; SKX-NEXT: kortestd %k0, %k0
2392 ; SKX-NEXT: setb %al
2393 ; SKX-NEXT: vzeroupper
2395 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2396 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2397 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2398 %tmp3 = icmp eq i32 %tmp2, -1
2402 define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
2403 ; SSE2-LABEL: allzeros_v32i8_and4:
2405 ; SSE2-NEXT: por %xmm1, %xmm0
2406 ; SSE2-NEXT: psllw $5, %xmm0
2407 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2408 ; SSE2-NEXT: testl %eax, %eax
2409 ; SSE2-NEXT: sete %al
2412 ; SSE41-LABEL: allzeros_v32i8_and4:
2414 ; SSE41-NEXT: por %xmm1, %xmm0
2415 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2416 ; SSE41-NEXT: sete %al
2419 ; AVX1-LABEL: allzeros_v32i8_and4:
2421 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2422 ; AVX1-NEXT: sete %al
2423 ; AVX1-NEXT: vzeroupper
2426 ; AVX2-LABEL: allzeros_v32i8_and4:
2428 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
2429 ; AVX2-NEXT: vptest %ymm1, %ymm0
2430 ; AVX2-NEXT: sete %al
2431 ; AVX2-NEXT: vzeroupper
2434 ; AVX512-LABEL: allzeros_v32i8_and4:
2436 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
2437 ; AVX512-NEXT: vptest %ymm1, %ymm0
2438 ; AVX512-NEXT: sete %al
2439 ; AVX512-NEXT: vzeroupper
2441 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2442 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2443 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2444 %tmp3 = icmp eq i32 %tmp2, 0
2448 define i1 @allones_v64i8_and4(<64 x i8> %arg) {
2449 ; SSE-LABEL: allones_v64i8_and4:
2451 ; SSE-NEXT: pand %xmm2, %xmm0
2452 ; SSE-NEXT: pand %xmm3, %xmm1
2453 ; SSE-NEXT: pand %xmm0, %xmm1
2454 ; SSE-NEXT: psllw $5, %xmm1
2455 ; SSE-NEXT: pmovmskb %xmm1, %eax
2456 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2457 ; SSE-NEXT: sete %al
2460 ; AVX1-LABEL: allones_v64i8_and4:
2462 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2463 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2464 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
2465 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2466 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
2467 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2468 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2469 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2470 ; AVX1-NEXT: sete %al
2471 ; AVX1-NEXT: vzeroupper
2474 ; AVX2-LABEL: allones_v64i8_and4:
2476 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
2477 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2478 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2479 ; AVX2-NEXT: cmpl $-1, %eax
2480 ; AVX2-NEXT: sete %al
2481 ; AVX2-NEXT: vzeroupper
2484 ; KNL-LABEL: allones_v64i8_and4:
2486 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2487 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
2488 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2489 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2490 ; KNL-NEXT: cmpl $-1, %eax
2491 ; KNL-NEXT: sete %al
2492 ; KNL-NEXT: vzeroupper
2495 ; SKX-LABEL: allones_v64i8_and4:
2497 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2498 ; SKX-NEXT: kortestq %k0, %k0
2499 ; SKX-NEXT: setb %al
2500 ; SKX-NEXT: vzeroupper
2502 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2503 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2504 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2505 %tmp3 = icmp eq i64 %tmp2, -1
2509 define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
2510 ; SSE2-LABEL: allzeros_v64i8_and4:
2512 ; SSE2-NEXT: por %xmm3, %xmm1
2513 ; SSE2-NEXT: por %xmm2, %xmm0
2514 ; SSE2-NEXT: por %xmm1, %xmm0
2515 ; SSE2-NEXT: psllw $5, %xmm0
2516 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2517 ; SSE2-NEXT: testl %eax, %eax
2518 ; SSE2-NEXT: sete %al
2521 ; SSE41-LABEL: allzeros_v64i8_and4:
2523 ; SSE41-NEXT: por %xmm3, %xmm1
2524 ; SSE41-NEXT: por %xmm2, %xmm0
2525 ; SSE41-NEXT: por %xmm1, %xmm0
2526 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2527 ; SSE41-NEXT: sete %al
2530 ; AVX1-LABEL: allzeros_v64i8_and4:
2532 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2533 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2534 ; AVX1-NEXT: sete %al
2535 ; AVX1-NEXT: vzeroupper
2538 ; AVX2-LABEL: allzeros_v64i8_and4:
2540 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2541 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
2542 ; AVX2-NEXT: vptest %ymm1, %ymm0
2543 ; AVX2-NEXT: sete %al
2544 ; AVX2-NEXT: vzeroupper
2547 ; AVX512-LABEL: allzeros_v64i8_and4:
2549 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2550 ; AVX512-NEXT: kortestw %k0, %k0
2551 ; AVX512-NEXT: sete %al
2552 ; AVX512-NEXT: vzeroupper
2554 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2555 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2556 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2557 %tmp3 = icmp eq i64 %tmp2, 0
2561 define i1 @allones_v8i16_and4(<8 x i16> %arg) {
2562 ; SSE-LABEL: allones_v8i16_and4:
2564 ; SSE-NEXT: psllw $13, %xmm0
2565 ; SSE-NEXT: packsswb %xmm0, %xmm0
2566 ; SSE-NEXT: pmovmskb %xmm0, %eax
2567 ; SSE-NEXT: cmpb $-1, %al
2568 ; SSE-NEXT: sete %al
2571 ; AVX1OR2-LABEL: allones_v8i16_and4:
2573 ; AVX1OR2-NEXT: vpsllw $13, %xmm0, %xmm0
2574 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2575 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2576 ; AVX1OR2-NEXT: cmpb $-1, %al
2577 ; AVX1OR2-NEXT: sete %al
2578 ; AVX1OR2-NEXT: retq
2580 ; KNL-LABEL: allones_v8i16_and4:
2582 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
2583 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
2584 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2585 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2586 ; KNL-NEXT: kmovw %k0, %eax
2587 ; KNL-NEXT: cmpb $-1, %al
2588 ; KNL-NEXT: sete %al
2589 ; KNL-NEXT: vzeroupper
2592 ; SKX-LABEL: allones_v8i16_and4:
2594 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2595 ; SKX-NEXT: kortestb %k0, %k0
2596 ; SKX-NEXT: setb %al
2598 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2599 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
2600 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2601 %tmp3 = icmp eq i8 %tmp2, -1
2605 define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
2606 ; SSE2-LABEL: allzeros_v8i16_and4:
2608 ; SSE2-NEXT: psllw $5, %xmm0
2609 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2610 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
2611 ; SSE2-NEXT: sete %al
2614 ; SSE41-LABEL: allzeros_v8i16_and4:
2616 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2617 ; SSE41-NEXT: sete %al
2620 ; AVX1OR2-LABEL: allzeros_v8i16_and4:
2622 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2623 ; AVX1OR2-NEXT: sete %al
2624 ; AVX1OR2-NEXT: retq
2626 ; KNL-LABEL: allzeros_v8i16_and4:
2628 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2629 ; KNL-NEXT: sete %al
2632 ; SKX-LABEL: allzeros_v8i16_and4:
2634 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1125917086973956,1125917086973956]
2635 ; SKX-NEXT: vptest %xmm1, %xmm0
2636 ; SKX-NEXT: sete %al
2638 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2639 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
2640 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2641 %tmp3 = icmp eq i8 %tmp2, 0
2645 define i1 @allones_v16i16_and4(<16 x i16> %arg) {
2646 ; SSE-LABEL: allones_v16i16_and4:
2648 ; SSE-NEXT: psllw $13, %xmm1
2649 ; SSE-NEXT: psllw $13, %xmm0
2650 ; SSE-NEXT: packsswb %xmm1, %xmm0
2651 ; SSE-NEXT: pmovmskb %xmm0, %eax
2652 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2653 ; SSE-NEXT: sete %al
2656 ; AVX1-LABEL: allones_v16i16_and4:
2658 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2659 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
2660 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
2661 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2662 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2663 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2664 ; AVX1-NEXT: sete %al
2665 ; AVX1-NEXT: vzeroupper
2668 ; AVX2-LABEL: allones_v16i16_and4:
2670 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
2671 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2672 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2673 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2674 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2675 ; AVX2-NEXT: sete %al
2676 ; AVX2-NEXT: vzeroupper
2679 ; KNL-LABEL: allones_v16i16_and4:
2681 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
2682 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
2683 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2684 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2685 ; KNL-NEXT: kortestw %k0, %k0
2686 ; KNL-NEXT: setb %al
2687 ; KNL-NEXT: vzeroupper
2690 ; SKX-LABEL: allones_v16i16_and4:
2692 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2693 ; SKX-NEXT: kortestw %k0, %k0
2694 ; SKX-NEXT: setb %al
2695 ; SKX-NEXT: vzeroupper
2697 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2698 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
2699 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2700 %tmp3 = icmp eq i16 %tmp2, -1
2704 define i1 @allones_v32i16_and4(<32 x i16> %arg) {
2705 ; SSE-LABEL: allones_v32i16_and4:
2707 ; SSE-NEXT: pand %xmm3, %xmm1
2708 ; SSE-NEXT: psllw $13, %xmm1
2709 ; SSE-NEXT: pand %xmm2, %xmm0
2710 ; SSE-NEXT: psllw $13, %xmm0
2711 ; SSE-NEXT: packsswb %xmm1, %xmm0
2712 ; SSE-NEXT: pmovmskb %xmm0, %eax
2713 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2714 ; SSE-NEXT: sete %al
2717 ; AVX1-LABEL: allones_v32i16_and4:
2719 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2720 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2721 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
2722 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
2723 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2724 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
2725 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
2726 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2727 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2728 ; AVX1-NEXT: sete %al
2729 ; AVX1-NEXT: vzeroupper
2732 ; AVX2-LABEL: allones_v32i16_and4:
2734 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
2735 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
2736 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
2737 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2738 ; AVX2-NEXT: cmpl $-1, %eax
2739 ; AVX2-NEXT: sete %al
2740 ; AVX2-NEXT: vzeroupper
2743 ; KNL-LABEL: allones_v32i16_and4:
2745 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm1
2746 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
2747 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
2748 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2749 ; KNL-NEXT: kmovw %k0, %eax
2750 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
2751 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
2752 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
2753 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2754 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2755 ; KNL-NEXT: kmovw %k0, %ecx
2756 ; KNL-NEXT: andl %eax, %ecx
2757 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
2758 ; KNL-NEXT: sete %al
2759 ; KNL-NEXT: vzeroupper
2762 ; SKX-LABEL: allones_v32i16_and4:
2764 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2765 ; SKX-NEXT: kortestd %k0, %k0
2766 ; SKX-NEXT: setb %al
2767 ; SKX-NEXT: vzeroupper
2769 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2770 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
2771 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2772 %tmp3 = icmp eq i32 %tmp2, -1
2776 define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
2777 ; SSE2-LABEL: allzeros_v32i16_and4:
2779 ; SSE2-NEXT: por %xmm3, %xmm1
2780 ; SSE2-NEXT: por %xmm2, %xmm0
2781 ; SSE2-NEXT: por %xmm1, %xmm0
2782 ; SSE2-NEXT: psllw $5, %xmm0
2783 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2784 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
2785 ; SSE2-NEXT: sete %al
2788 ; SSE41-LABEL: allzeros_v32i16_and4:
2790 ; SSE41-NEXT: por %xmm3, %xmm1
2791 ; SSE41-NEXT: por %xmm2, %xmm0
2792 ; SSE41-NEXT: por %xmm1, %xmm0
2793 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2794 ; SSE41-NEXT: sete %al
2797 ; AVX1-LABEL: allzeros_v32i16_and4:
2799 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2800 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2801 ; AVX1-NEXT: sete %al
2802 ; AVX1-NEXT: vzeroupper
2805 ; AVX2-LABEL: allzeros_v32i16_and4:
2807 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2808 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
2809 ; AVX2-NEXT: vptest %ymm1, %ymm0
2810 ; AVX2-NEXT: sete %al
2811 ; AVX2-NEXT: vzeroupper
2814 ; AVX512-LABEL: allzeros_v32i16_and4:
2816 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2817 ; AVX512-NEXT: kortestw %k0, %k0
2818 ; AVX512-NEXT: sete %al
2819 ; AVX512-NEXT: vzeroupper
2821 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2822 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
2823 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2824 %tmp3 = icmp eq i32 %tmp2, 0
2828 define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
2829 ; SSE2-LABEL: allzeros_v16i16_and4:
2831 ; SSE2-NEXT: por %xmm1, %xmm0
2832 ; SSE2-NEXT: psllw $5, %xmm0
2833 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2834 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
2835 ; SSE2-NEXT: sete %al
2838 ; SSE41-LABEL: allzeros_v16i16_and4:
2840 ; SSE41-NEXT: por %xmm1, %xmm0
2841 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2842 ; SSE41-NEXT: sete %al
2845 ; AVX1-LABEL: allzeros_v16i16_and4:
2847 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2848 ; AVX1-NEXT: sete %al
2849 ; AVX1-NEXT: vzeroupper
2852 ; AVX2-LABEL: allzeros_v16i16_and4:
2854 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
2855 ; AVX2-NEXT: vptest %ymm1, %ymm0
2856 ; AVX2-NEXT: sete %al
2857 ; AVX2-NEXT: vzeroupper
2860 ; AVX512-LABEL: allzeros_v16i16_and4:
2862 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
2863 ; AVX512-NEXT: vptest %ymm1, %ymm0
2864 ; AVX512-NEXT: sete %al
2865 ; AVX512-NEXT: vzeroupper
2867 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2868 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
2869 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2870 %tmp3 = icmp eq i16 %tmp2, 0
2874 define i1 @allones_v4i32_and4(<4 x i32> %arg) {
2875 ; SSE-LABEL: allones_v4i32_and4:
2877 ; SSE-NEXT: pslld $29, %xmm0
2878 ; SSE-NEXT: movmskps %xmm0, %eax
2879 ; SSE-NEXT: cmpl $15, %eax
2880 ; SSE-NEXT: sete %al
2883 ; AVX1OR2-LABEL: allones_v4i32_and4:
2885 ; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0
2886 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2887 ; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
2888 ; AVX1OR2-NEXT: setb %al
2889 ; AVX1OR2-NEXT: retq
2891 ; KNL-LABEL: allones_v4i32_and4:
2893 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2894 ; KNL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2895 ; KNL-NEXT: kmovw %k0, %eax
2896 ; KNL-NEXT: testb $15, %al
2897 ; KNL-NEXT: sete %al
2898 ; KNL-NEXT: vzeroupper
2901 ; SKX-LABEL: allones_v4i32_and4:
2903 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
2904 ; SKX-NEXT: kmovd %k0, %eax
2905 ; SKX-NEXT: cmpb $15, %al
2906 ; SKX-NEXT: sete %al
2908 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
2909 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
2910 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2911 %tmp3 = icmp eq i4 %tmp2, -1
2915 define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
2916 ; SSE2-LABEL: allzeros_v4i32_and4:
2918 ; SSE2-NEXT: pslld $29, %xmm0
2919 ; SSE2-NEXT: movmskps %xmm0, %eax
2920 ; SSE2-NEXT: testl %eax, %eax
2921 ; SSE2-NEXT: sete %al
2924 ; SSE41-LABEL: allzeros_v4i32_and4:
2926 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2927 ; SSE41-NEXT: sete %al
2930 ; AVX1OR2-LABEL: allzeros_v4i32_and4:
2932 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2933 ; AVX1OR2-NEXT: sete %al
2934 ; AVX1OR2-NEXT: retq
2936 ; KNL-LABEL: allzeros_v4i32_and4:
2938 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2939 ; KNL-NEXT: sete %al
2942 ; SKX-LABEL: allzeros_v4i32_and4:
2944 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17179869188,17179869188]
2945 ; SKX-NEXT: vptest %xmm1, %xmm0
2946 ; SKX-NEXT: sete %al
2948 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
2949 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
2950 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2951 %tmp3 = icmp eq i4 %tmp2, 0
2955 define i1 @allones_v8i32_and4(<8 x i32> %arg) {
2956 ; SSE-LABEL: allones_v8i32_and4:
2958 ; SSE-NEXT: pslld $29, %xmm1
2959 ; SSE-NEXT: pslld $29, %xmm0
2960 ; SSE-NEXT: packssdw %xmm1, %xmm0
2961 ; SSE-NEXT: packsswb %xmm0, %xmm0
2962 ; SSE-NEXT: pmovmskb %xmm0, %eax
2963 ; SSE-NEXT: cmpb $-1, %al
2964 ; SSE-NEXT: sete %al
2967 ; AVX1-LABEL: allones_v8i32_and4:
2969 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2970 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2971 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
2972 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2973 ; AVX1-NEXT: vtestps %xmm1, %xmm0
2974 ; AVX1-NEXT: setb %al
2975 ; AVX1-NEXT: vzeroupper
2978 ; AVX2-LABEL: allones_v8i32_and4:
2980 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
2981 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
2982 ; AVX2-NEXT: vtestps %ymm1, %ymm0
2983 ; AVX2-NEXT: setb %al
2984 ; AVX2-NEXT: vzeroupper
2987 ; KNL-LABEL: allones_v8i32_and4:
2989 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2990 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2991 ; KNL-NEXT: kmovw %k0, %eax
2992 ; KNL-NEXT: cmpb $-1, %al
2993 ; KNL-NEXT: sete %al
2994 ; KNL-NEXT: vzeroupper
2997 ; SKX-LABEL: allones_v8i32_and4:
2999 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
3000 ; SKX-NEXT: kortestb %k0, %k0
3001 ; SKX-NEXT: setb %al
3002 ; SKX-NEXT: vzeroupper
3004 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3005 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3006 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3007 %tmp3 = icmp eq i8 %tmp2, -1
3011 define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
3012 ; SSE2-LABEL: allzeros_v8i32_and4:
3014 ; SSE2-NEXT: por %xmm1, %xmm0
3015 ; SSE2-NEXT: pslld $29, %xmm0
3016 ; SSE2-NEXT: movmskps %xmm0, %eax
3017 ; SSE2-NEXT: testl %eax, %eax
3018 ; SSE2-NEXT: sete %al
3021 ; SSE41-LABEL: allzeros_v8i32_and4:
3023 ; SSE41-NEXT: por %xmm1, %xmm0
3024 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3025 ; SSE41-NEXT: sete %al
3028 ; AVX1-LABEL: allzeros_v8i32_and4:
3030 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3031 ; AVX1-NEXT: sete %al
3032 ; AVX1-NEXT: vzeroupper
3035 ; AVX2-LABEL: allzeros_v8i32_and4:
3037 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
3038 ; AVX2-NEXT: vptest %ymm1, %ymm0
3039 ; AVX2-NEXT: sete %al
3040 ; AVX2-NEXT: vzeroupper
3043 ; AVX512-LABEL: allzeros_v8i32_and4:
3045 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
3046 ; AVX512-NEXT: vptest %ymm1, %ymm0
3047 ; AVX512-NEXT: sete %al
3048 ; AVX512-NEXT: vzeroupper
3050 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3051 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3052 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3053 %tmp3 = icmp eq i8 %tmp2, 0
3057 define i1 @allones_v16i32_and4(<16 x i32> %arg) {
3058 ; SSE-LABEL: allones_v16i32_and4:
3060 ; SSE-NEXT: pslld $29, %xmm3
3061 ; SSE-NEXT: pslld $29, %xmm2
3062 ; SSE-NEXT: packssdw %xmm3, %xmm2
3063 ; SSE-NEXT: pslld $29, %xmm1
3064 ; SSE-NEXT: pslld $29, %xmm0
3065 ; SSE-NEXT: packssdw %xmm1, %xmm0
3066 ; SSE-NEXT: packsswb %xmm2, %xmm0
3067 ; SSE-NEXT: pmovmskb %xmm0, %eax
3068 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
3069 ; SSE-NEXT: sete %al
3072 ; AVX1-LABEL: allones_v16i32_and4:
3074 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3075 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3076 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3077 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3078 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3079 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3080 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3081 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3082 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3083 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3084 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
3085 ; AVX1-NEXT: sete %al
3086 ; AVX1-NEXT: vzeroupper
3089 ; AVX2-LABEL: allones_v16i32_and4:
3091 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3092 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3093 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3094 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3095 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3096 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3097 ; AVX2-NEXT: cmpl $-1, %eax
3098 ; AVX2-NEXT: sete %al
3099 ; AVX2-NEXT: vzeroupper
3102 ; AVX512-LABEL: allones_v16i32_and4:
3104 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3105 ; AVX512-NEXT: kortestw %k0, %k0
3106 ; AVX512-NEXT: setb %al
3107 ; AVX512-NEXT: vzeroupper
3109 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3110 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3111 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3112 %tmp3 = icmp eq i16 %tmp2, -1
3116 define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
3117 ; SSE2-LABEL: allzeros_v16i32_and4:
3119 ; SSE2-NEXT: por %xmm3, %xmm1
3120 ; SSE2-NEXT: por %xmm2, %xmm0
3121 ; SSE2-NEXT: por %xmm1, %xmm0
3122 ; SSE2-NEXT: pslld $29, %xmm0
3123 ; SSE2-NEXT: movmskps %xmm0, %eax
3124 ; SSE2-NEXT: testl %eax, %eax
3125 ; SSE2-NEXT: sete %al
3128 ; SSE41-LABEL: allzeros_v16i32_and4:
3130 ; SSE41-NEXT: por %xmm3, %xmm1
3131 ; SSE41-NEXT: por %xmm2, %xmm0
3132 ; SSE41-NEXT: por %xmm1, %xmm0
3133 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3134 ; SSE41-NEXT: sete %al
3137 ; AVX1-LABEL: allzeros_v16i32_and4:
3139 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
3140 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3141 ; AVX1-NEXT: sete %al
3142 ; AVX1-NEXT: vzeroupper
3145 ; AVX2-LABEL: allzeros_v16i32_and4:
3147 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
3148 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
3149 ; AVX2-NEXT: vptest %ymm1, %ymm0
3150 ; AVX2-NEXT: sete %al
3151 ; AVX2-NEXT: vzeroupper
3154 ; AVX512-LABEL: allzeros_v16i32_and4:
3156 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3157 ; AVX512-NEXT: kortestw %k0, %k0
3158 ; AVX512-NEXT: sete %al
3159 ; AVX512-NEXT: vzeroupper
3161 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3162 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3163 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3164 %tmp3 = icmp eq i16 %tmp2, 0
3168 define i1 @allones_v2i64_and4(<2 x i64> %arg) {
3169 ; SSE-LABEL: allones_v2i64_and4:
3171 ; SSE-NEXT: psllq $61, %xmm0
3172 ; SSE-NEXT: movmskpd %xmm0, %eax
3173 ; SSE-NEXT: cmpl $3, %eax
3174 ; SSE-NEXT: sete %al
3177 ; AVX1OR2-LABEL: allones_v2i64_and4:
3179 ; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0
3180 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3181 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
3182 ; AVX1OR2-NEXT: setb %al
3183 ; AVX1OR2-NEXT: retq
3185 ; KNL-LABEL: allones_v2i64_and4:
3187 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3188 ; KNL-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,4]
3189 ; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
3190 ; KNL-NEXT: kmovw %k0, %eax
3191 ; KNL-NEXT: testb $3, %al
3192 ; KNL-NEXT: sete %al
3193 ; KNL-NEXT: vzeroupper
3196 ; SKX-LABEL: allones_v2i64_and4:
3198 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
3199 ; SKX-NEXT: kmovd %k0, %eax
3200 ; SKX-NEXT: cmpb $3, %al
3201 ; SKX-NEXT: sete %al
3203 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3204 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3205 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3206 %tmp3 = icmp eq i2 %tmp2, -1
3210 define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
3211 ; SSE2-LABEL: allzeros_v2i64_and4:
3213 ; SSE2-NEXT: pslld $29, %xmm0
3214 ; SSE2-NEXT: movmskps %xmm0, %eax
3215 ; SSE2-NEXT: testb $5, %al
3216 ; SSE2-NEXT: sete %al
3219 ; SSE41-LABEL: allzeros_v2i64_and4:
3221 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3222 ; SSE41-NEXT: sete %al
3225 ; AVX1OR2-LABEL: allzeros_v2i64_and4:
3227 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3228 ; AVX1OR2-NEXT: sete %al
3229 ; AVX1OR2-NEXT: retq
3231 ; KNL-LABEL: allzeros_v2i64_and4:
3233 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3234 ; KNL-NEXT: sete %al
3237 ; SKX-LABEL: allzeros_v2i64_and4:
3239 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
3240 ; SKX-NEXT: vptest %xmm1, %xmm0
3241 ; SKX-NEXT: sete %al
3243 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3244 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3245 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3246 %tmp3 = icmp eq i2 %tmp2, 0
3250 define i1 @allones_v4i64_and4(<4 x i64> %arg) {
3251 ; SSE-LABEL: allones_v4i64_and4:
3253 ; SSE-NEXT: psllq $61, %xmm1
3254 ; SSE-NEXT: psllq $61, %xmm0
3255 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
3256 ; SSE-NEXT: movmskps %xmm0, %eax
3257 ; SSE-NEXT: cmpl $15, %eax
3258 ; SSE-NEXT: sete %al
3261 ; AVX1-LABEL: allones_v4i64_and4:
3263 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3264 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3265 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3266 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3267 ; AVX1-NEXT: vtestpd %xmm1, %xmm0
3268 ; AVX1-NEXT: setb %al
3269 ; AVX1-NEXT: vzeroupper
3272 ; AVX2-LABEL: allones_v4i64_and4:
3274 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3275 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
3276 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
3277 ; AVX2-NEXT: setb %al
3278 ; AVX2-NEXT: vzeroupper
3281 ; KNL-LABEL: allones_v4i64_and4:
3283 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3284 ; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3285 ; KNL-NEXT: kmovw %k0, %eax
3286 ; KNL-NEXT: testb $15, %al
3287 ; KNL-NEXT: sete %al
3288 ; KNL-NEXT: vzeroupper
3291 ; SKX-LABEL: allones_v4i64_and4:
3293 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
3294 ; SKX-NEXT: kmovd %k0, %eax
3295 ; SKX-NEXT: cmpb $15, %al
3296 ; SKX-NEXT: sete %al
3297 ; SKX-NEXT: vzeroupper
3299 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3300 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3301 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3302 %tmp3 = icmp eq i4 %tmp2, -1
3306 define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
3307 ; SSE2-LABEL: allzeros_v4i64_and4:
3309 ; SSE2-NEXT: por %xmm1, %xmm0
3310 ; SSE2-NEXT: pslld $29, %xmm0
3311 ; SSE2-NEXT: movmskps %xmm0, %eax
3312 ; SSE2-NEXT: testb $5, %al
3313 ; SSE2-NEXT: sete %al
3316 ; SSE41-LABEL: allzeros_v4i64_and4:
3318 ; SSE41-NEXT: por %xmm1, %xmm0
3319 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3320 ; SSE41-NEXT: sete %al
3323 ; AVX1-LABEL: allzeros_v4i64_and4:
3325 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3326 ; AVX1-NEXT: sete %al
3327 ; AVX1-NEXT: vzeroupper
3330 ; AVX2-LABEL: allzeros_v4i64_and4:
3332 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
3333 ; AVX2-NEXT: vptest %ymm1, %ymm0
3334 ; AVX2-NEXT: sete %al
3335 ; AVX2-NEXT: vzeroupper
3338 ; AVX512-LABEL: allzeros_v4i64_and4:
3340 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
3341 ; AVX512-NEXT: vptest %ymm1, %ymm0
3342 ; AVX512-NEXT: sete %al
3343 ; AVX512-NEXT: vzeroupper
3345 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3346 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3347 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3348 %tmp3 = icmp eq i4 %tmp2, 0
3352 define i1 @allones_v8i64_and4(<8 x i64> %arg) {
3353 ; SSE-LABEL: allones_v8i64_and4:
3355 ; SSE-NEXT: psllq $61, %xmm3
3356 ; SSE-NEXT: psllq $61, %xmm2
3357 ; SSE-NEXT: packssdw %xmm3, %xmm2
3358 ; SSE-NEXT: psllq $61, %xmm1
3359 ; SSE-NEXT: psllq $61, %xmm0
3360 ; SSE-NEXT: packssdw %xmm1, %xmm0
3361 ; SSE-NEXT: packssdw %xmm2, %xmm0
3362 ; SSE-NEXT: packsswb %xmm0, %xmm0
3363 ; SSE-NEXT: pmovmskb %xmm0, %eax
3364 ; SSE-NEXT: cmpb $-1, %al
3365 ; SSE-NEXT: sete %al
3368 ; AVX1-LABEL: allones_v8i64_and4:
3370 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm2
3371 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm3
3372 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
3373 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3374 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3375 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3376 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3377 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3378 ; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm0
3379 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3380 ; AVX1-NEXT: vtestps %xmm1, %xmm0
3381 ; AVX1-NEXT: setb %al
3382 ; AVX1-NEXT: vzeroupper
3385 ; AVX2-LABEL: allones_v8i64_and4:
3387 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
3388 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3389 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3390 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
3391 ; AVX2-NEXT: vtestps %ymm1, %ymm0
3392 ; AVX2-NEXT: setb %al
3393 ; AVX2-NEXT: vzeroupper
3396 ; KNL-LABEL: allones_v8i64_and4:
3398 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3399 ; KNL-NEXT: kmovw %k0, %eax
3400 ; KNL-NEXT: cmpb $-1, %al
3401 ; KNL-NEXT: sete %al
3402 ; KNL-NEXT: vzeroupper
3405 ; SKX-LABEL: allones_v8i64_and4:
3407 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3408 ; SKX-NEXT: kortestb %k0, %k0
3409 ; SKX-NEXT: setb %al
3410 ; SKX-NEXT: vzeroupper
3412 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3413 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3414 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3415 %tmp3 = icmp eq i8 %tmp2, -1
3419 define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
3420 ; SSE2-LABEL: allzeros_v8i64_and4:
3422 ; SSE2-NEXT: por %xmm3, %xmm1
3423 ; SSE2-NEXT: por %xmm2, %xmm0
3424 ; SSE2-NEXT: por %xmm1, %xmm0
3425 ; SSE2-NEXT: pslld $29, %xmm0
3426 ; SSE2-NEXT: movmskps %xmm0, %eax
3427 ; SSE2-NEXT: testb $5, %al
3428 ; SSE2-NEXT: sete %al
3431 ; SSE41-LABEL: allzeros_v8i64_and4:
3433 ; SSE41-NEXT: por %xmm3, %xmm1
3434 ; SSE41-NEXT: por %xmm2, %xmm0
3435 ; SSE41-NEXT: por %xmm1, %xmm0
3436 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3437 ; SSE41-NEXT: sete %al
3440 ; AVX1-LABEL: allzeros_v8i64_and4:
3442 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
3443 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3444 ; AVX1-NEXT: sete %al
3445 ; AVX1-NEXT: vzeroupper
3448 ; AVX2-LABEL: allzeros_v8i64_and4:
3450 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
3451 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
3452 ; AVX2-NEXT: vptest %ymm1, %ymm0
3453 ; AVX2-NEXT: sete %al
3454 ; AVX2-NEXT: vzeroupper
3457 ; AVX512-LABEL: allzeros_v8i64_and4:
3459 ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
3460 ; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
3461 ; AVX512-NEXT: kortestw %k0, %k0
3462 ; AVX512-NEXT: sete %al
3463 ; AVX512-NEXT: vzeroupper
3465 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3466 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3467 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3468 %tmp3 = icmp eq i8 %tmp2, 0
3472 ; FCMP may use ISD::SETNE when nnan, don't attempt to use LowerVectorAllEqual.
3473 define i1 @allzeros_v8f32_nnan(<8 x float> %a0) {
3474 ; SSE-LABEL: allzeros_v8f32_nnan:
3476 ; SSE-NEXT: xorps %xmm2, %xmm2
3477 ; SSE-NEXT: cmpneqps %xmm2, %xmm1
3478 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
3479 ; SSE-NEXT: packssdw %xmm1, %xmm0
3480 ; SSE-NEXT: pmovmskb %xmm0, %eax
3481 ; SSE-NEXT: testl %eax, %eax
3482 ; SSE-NEXT: setne %al
3485 ; AVX1OR2-LABEL: allzeros_v8f32_nnan:
3487 ; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
3488 ; AVX1OR2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
3489 ; AVX1OR2-NEXT: vtestps %ymm0, %ymm0
3490 ; AVX1OR2-NEXT: setne %al
3491 ; AVX1OR2-NEXT: vzeroupper
3492 ; AVX1OR2-NEXT: retq
3494 ; KNL-LABEL: allzeros_v8f32_nnan:
3496 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3497 ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
3498 ; KNL-NEXT: vcmpneqps %zmm1, %zmm0, %k0
3499 ; KNL-NEXT: kmovw %k0, %eax
3500 ; KNL-NEXT: testb %al, %al
3501 ; KNL-NEXT: setne %al
3502 ; KNL-NEXT: vzeroupper
3505 ; SKX-LABEL: allzeros_v8f32_nnan:
3507 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
3508 ; SKX-NEXT: vcmpneqps %ymm1, %ymm0, %k0
3509 ; SKX-NEXT: kortestb %k0, %k0
3510 ; SKX-NEXT: setne %al
3511 ; SKX-NEXT: vzeroupper
3513 %1 = fcmp nnan une <8 x float> %a0, zeroinitializer
3514 %2 = bitcast <8 x i1> %1 to i8
3515 %3 = icmp ne i8 %2, 0
3519 ; The below are IR patterns that should directly represent the behavior of a
3520 ; MOVMSK instruction.
3522 define i32 @movmskpd(<2 x double> %x) {
3523 ; SSE-LABEL: movmskpd:
3525 ; SSE-NEXT: movmskpd %xmm0, %eax
3528 ; AVX-LABEL: movmskpd:
3530 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3532 %a = bitcast <2 x double> %x to <2 x i64>
3533 %b = icmp slt <2 x i64> %a, zeroinitializer
3534 %c = bitcast <2 x i1> %b to i2
3535 %d = zext i2 %c to i32
3539 define i32 @movmskps(<4 x float> %x) {
3540 ; SSE-LABEL: movmskps:
3542 ; SSE-NEXT: movmskps %xmm0, %eax
3545 ; AVX-LABEL: movmskps:
3547 ; AVX-NEXT: vmovmskps %xmm0, %eax
3549 %a = bitcast <4 x float> %x to <4 x i32>
3550 %b = icmp slt <4 x i32> %a, zeroinitializer
3551 %c = bitcast <4 x i1> %b to i4
3552 %d = zext i4 %c to i32
3556 define i32 @movmskpd256(<4 x double> %x) {
3557 ; SSE-LABEL: movmskpd256:
3559 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
3560 ; SSE-NEXT: movmskps %xmm0, %eax
3563 ; AVX-LABEL: movmskpd256:
3565 ; AVX-NEXT: vmovmskpd %ymm0, %eax
3566 ; AVX-NEXT: vzeroupper
3568 %a = bitcast <4 x double> %x to <4 x i64>
3569 %b = icmp slt <4 x i64> %a, zeroinitializer
3570 %c = bitcast <4 x i1> %b to i4
3571 %d = zext i4 %c to i32
3575 define i32 @movmskps256(<8 x float> %x) {
3576 ; SSE-LABEL: movmskps256:
3578 ; SSE-NEXT: packssdw %xmm1, %xmm0
3579 ; SSE-NEXT: packsswb %xmm0, %xmm0
3580 ; SSE-NEXT: pmovmskb %xmm0, %eax
3581 ; SSE-NEXT: movzbl %al, %eax
3584 ; AVX-LABEL: movmskps256:
3586 ; AVX-NEXT: vmovmskps %ymm0, %eax
3587 ; AVX-NEXT: vzeroupper
3589 %a = bitcast <8 x float> %x to <8 x i32>
3590 %b = icmp slt <8 x i32> %a, zeroinitializer
3591 %c = bitcast <8 x i1> %b to i8
3592 %d = zext i8 %c to i32
3596 define i32 @movmskb(<16 x i8> %x) {
3597 ; SSE-LABEL: movmskb:
3599 ; SSE-NEXT: pmovmskb %xmm0, %eax
3602 ; AVX-LABEL: movmskb:
3604 ; AVX-NEXT: vpmovmskb %xmm0, %eax
3606 %a = icmp slt <16 x i8> %x, zeroinitializer
3607 %b = bitcast <16 x i1> %a to i16
3608 %c = zext i16 %b to i32
3612 define i32 @movmskb256(<32 x i8> %x) {
3613 ; SSE-LABEL: movmskb256:
3615 ; SSE-NEXT: pmovmskb %xmm0, %ecx
3616 ; SSE-NEXT: pmovmskb %xmm1, %eax
3617 ; SSE-NEXT: shll $16, %eax
3618 ; SSE-NEXT: orl %ecx, %eax
3621 ; AVX1-LABEL: movmskb256:
3623 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3624 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3625 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3626 ; AVX1-NEXT: shll $16, %eax
3627 ; AVX1-NEXT: orl %ecx, %eax
3628 ; AVX1-NEXT: vzeroupper
3631 ; AVX2-LABEL: movmskb256:
3633 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3634 ; AVX2-NEXT: vzeroupper
3637 ; AVX512-LABEL: movmskb256:
3639 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
3640 ; AVX512-NEXT: vzeroupper
3642 %a = icmp slt <32 x i8> %x, zeroinitializer
3643 %b = bitcast <32 x i1> %a to i32
3647 ; Multiple extract elements from a vector compare.
3649 define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
3650 ; SSE-LABEL: movmsk_v16i8:
3652 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
3653 ; SSE-NEXT: pmovmskb %xmm0, %eax
3654 ; SSE-NEXT: movl %eax, %ecx
3655 ; SSE-NEXT: shrl $15, %ecx
3656 ; SSE-NEXT: movl %eax, %edx
3657 ; SSE-NEXT: shrl $8, %edx
3658 ; SSE-NEXT: andl $1, %edx
3659 ; SSE-NEXT: andl $8, %eax
3660 ; SSE-NEXT: shrl $3, %eax
3661 ; SSE-NEXT: xorl %edx, %eax
3662 ; SSE-NEXT: andl %ecx, %eax
3663 ; SSE-NEXT: # kill: def $al killed $al killed $eax
3666 ; AVX1OR2-LABEL: movmsk_v16i8:
3668 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3669 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
3670 ; AVX1OR2-NEXT: movl %eax, %ecx
3671 ; AVX1OR2-NEXT: shrl $15, %ecx
3672 ; AVX1OR2-NEXT: movl %eax, %edx
3673 ; AVX1OR2-NEXT: shrl $8, %edx
3674 ; AVX1OR2-NEXT: andl $1, %edx
3675 ; AVX1OR2-NEXT: andl $8, %eax
3676 ; AVX1OR2-NEXT: shrl $3, %eax
3677 ; AVX1OR2-NEXT: xorl %edx, %eax
3678 ; AVX1OR2-NEXT: andl %ecx, %eax
3679 ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
3680 ; AVX1OR2-NEXT: retq
3682 ; KNL-LABEL: movmsk_v16i8:
3684 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3685 ; KNL-NEXT: vpextrb $15, %xmm0, %ecx
3686 ; KNL-NEXT: vpextrb $8, %xmm0, %edx
3687 ; KNL-NEXT: vpextrb $3, %xmm0, %eax
3688 ; KNL-NEXT: xorl %edx, %eax
3689 ; KNL-NEXT: andl %ecx, %eax
3690 ; KNL-NEXT: # kill: def $al killed $al killed $eax
3693 ; SKX-LABEL: movmsk_v16i8:
3695 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3696 ; SKX-NEXT: kshiftrw $15, %k0, %k1
3697 ; SKX-NEXT: kmovd %k1, %ecx
3698 ; SKX-NEXT: kshiftrw $8, %k0, %k1
3699 ; SKX-NEXT: kmovd %k1, %edx
3700 ; SKX-NEXT: kshiftrw $3, %k0, %k0
3701 ; SKX-NEXT: kmovd %k0, %eax
3702 ; SKX-NEXT: xorb %dl, %al
3703 ; SKX-NEXT: andb %cl, %al
3704 ; SKX-NEXT: # kill: def $al killed $al killed $eax
3706 %cmp = icmp eq <16 x i8> %x, %y
3707 %e1 = extractelement <16 x i1> %cmp, i32 3
3708 %e2 = extractelement <16 x i1> %cmp, i32 8
3709 %e3 = extractelement <16 x i1> %cmp, i32 15
3710 %u1 = xor i1 %e1, %e2
3711 %u2 = and i1 %e3, %u1
3715 define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
3716 ; SSE-LABEL: movmsk_v8i16:
3718 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
3719 ; SSE-NEXT: packsswb %xmm0, %xmm0
3720 ; SSE-NEXT: pmovmskb %xmm0, %eax
3721 ; SSE-NEXT: notb %al
3722 ; SSE-NEXT: testb $-109, %al
3723 ; SSE-NEXT: sete %al
3726 ; AVX1OR2-LABEL: movmsk_v8i16:
3728 ; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
3729 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3730 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
3731 ; AVX1OR2-NEXT: notb %al
3732 ; AVX1OR2-NEXT: testb $-109, %al
3733 ; AVX1OR2-NEXT: sete %al
3734 ; AVX1OR2-NEXT: retq
3736 ; KNL-LABEL: movmsk_v8i16:
3738 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
3739 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
3740 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
3741 ; KNL-NEXT: kmovw %k0, %eax
3742 ; KNL-NEXT: testb $-109, %al
3743 ; KNL-NEXT: sete %al
3744 ; KNL-NEXT: vzeroupper
3747 ; SKX-LABEL: movmsk_v8i16:
3749 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
3750 ; SKX-NEXT: knotb %k0, %k0
3751 ; SKX-NEXT: kmovd %k0, %eax
3752 ; SKX-NEXT: testb $-109, %al
3753 ; SKX-NEXT: sete %al
3755 %cmp = icmp sgt <8 x i16> %x, %y
3756 %e1 = extractelement <8 x i1> %cmp, i32 0
3757 %e2 = extractelement <8 x i1> %cmp, i32 1
3758 %e3 = extractelement <8 x i1> %cmp, i32 7
3759 %e4 = extractelement <8 x i1> %cmp, i32 4
3760 %u1 = and i1 %e1, %e2
3761 %u2 = and i1 %e3, %e4
3762 %u3 = and i1 %u1, %u2
3766 ; TODO: Replace shift+mask chain with AND+CMP.
3767 define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
3768 ; SSE-LABEL: movmsk_v4i32:
3770 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
3771 ; SSE-NEXT: movmskps %xmm1, %eax
3772 ; SSE-NEXT: movl %eax, %ecx
3773 ; SSE-NEXT: shrb $3, %cl
3774 ; SSE-NEXT: andb $4, %al
3775 ; SSE-NEXT: shrb $2, %al
3776 ; SSE-NEXT: xorb %cl, %al
3777 ; SSE-NEXT: # kill: def $al killed $al killed $eax
3780 ; AVX1OR2-LABEL: movmsk_v4i32:
3782 ; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
3783 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
3784 ; AVX1OR2-NEXT: movl %eax, %ecx
3785 ; AVX1OR2-NEXT: shrb $3, %cl
3786 ; AVX1OR2-NEXT: andb $4, %al
3787 ; AVX1OR2-NEXT: shrb $2, %al
3788 ; AVX1OR2-NEXT: xorb %cl, %al
3789 ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
3790 ; AVX1OR2-NEXT: retq
3792 ; KNL-LABEL: movmsk_v4i32:
3794 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3795 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3796 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
3797 ; KNL-NEXT: kshiftrw $3, %k0, %k1
3798 ; KNL-NEXT: kmovw %k1, %ecx
3799 ; KNL-NEXT: kshiftrw $2, %k0, %k0
3800 ; KNL-NEXT: kmovw %k0, %eax
3801 ; KNL-NEXT: xorb %cl, %al
3802 ; KNL-NEXT: # kill: def $al killed $al killed $eax
3803 ; KNL-NEXT: vzeroupper
3806 ; SKX-LABEL: movmsk_v4i32:
3808 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
3809 ; SKX-NEXT: kshiftrb $3, %k0, %k1
3810 ; SKX-NEXT: kmovd %k1, %ecx
3811 ; SKX-NEXT: kshiftrb $2, %k0, %k0
3812 ; SKX-NEXT: kmovd %k0, %eax
3813 ; SKX-NEXT: xorb %cl, %al
3814 ; SKX-NEXT: # kill: def $al killed $al killed $eax
3816 %cmp = icmp slt <4 x i32> %x, %y
3817 %e1 = extractelement <4 x i1> %cmp, i32 2
3818 %e2 = extractelement <4 x i1> %cmp, i32 3
3819 %u1 = xor i1 %e1, %e2
3823 define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
3824 ; SSE2-LABEL: movmsk_and_v2i64:
3826 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3827 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
3828 ; SSE2-NEXT: pand %xmm0, %xmm1
3829 ; SSE2-NEXT: movmskpd %xmm1, %eax
3830 ; SSE2-NEXT: testl %eax, %eax
3831 ; SSE2-NEXT: sete %al
3834 ; SSE41-LABEL: movmsk_and_v2i64:
3836 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
3837 ; SSE41-NEXT: movmskpd %xmm0, %eax
3838 ; SSE41-NEXT: testl %eax, %eax
3839 ; SSE41-NEXT: sete %al
3842 ; AVX1OR2-LABEL: movmsk_and_v2i64:
3844 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
3845 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
3846 ; AVX1OR2-NEXT: sete %al
3847 ; AVX1OR2-NEXT: retq
3849 ; KNL-LABEL: movmsk_and_v2i64:
3851 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3852 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3853 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3854 ; KNL-NEXT: kmovw %k0, %eax
3855 ; KNL-NEXT: testb $3, %al
3856 ; KNL-NEXT: sete %al
3857 ; KNL-NEXT: vzeroupper
3860 ; SKX-LABEL: movmsk_and_v2i64:
3862 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
3863 ; SKX-NEXT: kmovd %k0, %eax
3864 ; SKX-NEXT: cmpb $3, %al
3865 ; SKX-NEXT: sete %al
3867 %cmp = icmp ne <2 x i64> %x, %y
3868 %e1 = extractelement <2 x i1> %cmp, i32 0
3869 %e2 = extractelement <2 x i1> %cmp, i32 1
3870 %u1 = and i1 %e1, %e2
3874 define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) {
3875 ; SSE2-LABEL: movmsk_or_v2i64:
3877 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3878 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
3879 ; SSE2-NEXT: pand %xmm0, %xmm1
3880 ; SSE2-NEXT: movmskpd %xmm1, %eax
3881 ; SSE2-NEXT: cmpl $3, %eax
3882 ; SSE2-NEXT: setne %al
3885 ; SSE41-LABEL: movmsk_or_v2i64:
3887 ; SSE41-NEXT: pxor %xmm1, %xmm0
3888 ; SSE41-NEXT: ptest %xmm0, %xmm0
3889 ; SSE41-NEXT: setne %al
3892 ; AVX-LABEL: movmsk_or_v2i64:
3894 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
3895 ; AVX-NEXT: vptest %xmm0, %xmm0
3896 ; AVX-NEXT: setne %al
3898 %cmp = icmp ne <2 x i64> %x, %y
3899 %e1 = extractelement <2 x i1> %cmp, i32 0
3900 %e2 = extractelement <2 x i1> %cmp, i32 1
3901 %u1 = or i1 %e1, %e2
3905 define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
3906 ; SSE-LABEL: movmsk_v4f32:
3908 ; SSE-NEXT: movaps %xmm0, %xmm2
3909 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
3910 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
3911 ; SSE-NEXT: orps %xmm2, %xmm0
3912 ; SSE-NEXT: movmskps %xmm0, %eax
3913 ; SSE-NEXT: testb $14, %al
3914 ; SSE-NEXT: setne %al
3917 ; AVX1OR2-LABEL: movmsk_v4f32:
3919 ; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
3920 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
3921 ; AVX1OR2-NEXT: testb $14, %al
3922 ; AVX1OR2-NEXT: setne %al
3923 ; AVX1OR2-NEXT: retq
3925 ; KNL-LABEL: movmsk_v4f32:
3927 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3928 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3929 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k0
3930 ; KNL-NEXT: kmovw %k0, %eax
3931 ; KNL-NEXT: testb $14, %al
3932 ; KNL-NEXT: setne %al
3933 ; KNL-NEXT: vzeroupper
3936 ; SKX-LABEL: movmsk_v4f32:
3938 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
3939 ; SKX-NEXT: kmovd %k0, %eax
3940 ; SKX-NEXT: testb $14, %al
3941 ; SKX-NEXT: setne %al
3943 %cmp = fcmp ueq <4 x float> %x, %y
3944 %e1 = extractelement <4 x i1> %cmp, i32 1
3945 %e2 = extractelement <4 x i1> %cmp, i32 2
3946 %e3 = extractelement <4 x i1> %cmp, i32 3
3947 %u1 = or i1 %e1, %e2
3948 %u2 = or i1 %u1, %e3
3952 define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
3953 ; SSE-LABEL: movmsk_and_v2f64:
3955 ; SSE-NEXT: cmplepd %xmm0, %xmm1
3956 ; SSE-NEXT: movmskpd %xmm1, %eax
3957 ; SSE-NEXT: cmpl $3, %eax
3958 ; SSE-NEXT: sete %al
3961 ; AVX1OR2-LABEL: movmsk_and_v2f64:
3963 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
3964 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3965 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
3966 ; AVX1OR2-NEXT: setb %al
3967 ; AVX1OR2-NEXT: retq
3969 ; KNL-LABEL: movmsk_and_v2f64:
3971 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3972 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3973 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
3974 ; KNL-NEXT: knotw %k0, %k0
3975 ; KNL-NEXT: kmovw %k0, %eax
3976 ; KNL-NEXT: testb $3, %al
3977 ; KNL-NEXT: sete %al
3978 ; KNL-NEXT: vzeroupper
3981 ; SKX-LABEL: movmsk_and_v2f64:
3983 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
3984 ; SKX-NEXT: kmovd %k0, %eax
3985 ; SKX-NEXT: cmpb $3, %al
3986 ; SKX-NEXT: sete %al
3988 %cmp = fcmp oge <2 x double> %x, %y
3989 %e1 = extractelement <2 x i1> %cmp, i32 0
3990 %e2 = extractelement <2 x i1> %cmp, i32 1
3991 %u1 = and i1 %e1, %e2
3995 define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
3996 ; SSE-LABEL: movmsk_or_v2f64:
3998 ; SSE-NEXT: cmplepd %xmm0, %xmm1
3999 ; SSE-NEXT: movmskpd %xmm1, %eax
4000 ; SSE-NEXT: testl %eax, %eax
4001 ; SSE-NEXT: setne %al
4004 ; AVX1OR2-LABEL: movmsk_or_v2f64:
4006 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4007 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
4008 ; AVX1OR2-NEXT: setne %al
4009 ; AVX1OR2-NEXT: retq
4011 ; KNL-LABEL: movmsk_or_v2f64:
4013 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4014 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4015 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
4016 ; KNL-NEXT: kmovw %k0, %eax
4017 ; KNL-NEXT: testb $3, %al
4018 ; KNL-NEXT: setne %al
4019 ; KNL-NEXT: vzeroupper
4022 ; SKX-LABEL: movmsk_or_v2f64:
4024 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4025 ; SKX-NEXT: kortestb %k0, %k0
4026 ; SKX-NEXT: setne %al
4028 %cmp = fcmp oge <2 x double> %x, %y
4029 %e1 = extractelement <2 x i1> %cmp, i32 0
4030 %e2 = extractelement <2 x i1> %cmp, i32 1
4031 %u1 = or i1 %e1, %e2
4035 ; Extract elements from a non-constant index.
4037 define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
4038 ; SSE-LABEL: movmsk_v16i8_var:
4040 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
4041 ; SSE-NEXT: pmovmskb %xmm0, %eax
4042 ; SSE-NEXT: btl %edi, %eax
4043 ; SSE-NEXT: setb %al
4046 ; AVX1OR2-LABEL: movmsk_v16i8_var:
4048 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4049 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
4050 ; AVX1OR2-NEXT: btl %edi, %eax
4051 ; AVX1OR2-NEXT: setb %al
4052 ; AVX1OR2-NEXT: retq
4054 ; KNL-LABEL: movmsk_v16i8_var:
4056 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4057 ; KNL-NEXT: vpmovmskb %xmm0, %eax
4058 ; KNL-NEXT: btl %edi, %eax
4059 ; KNL-NEXT: setb %al
4062 ; SKX-LABEL: movmsk_v16i8_var:
4064 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4065 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
4066 ; SKX-NEXT: vpmovm2b %k0, %xmm0
4067 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4068 ; SKX-NEXT: andl $15, %edi
4069 ; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
4071 %cmp = icmp eq <16 x i8> %x, %y
4072 %val = extractelement <16 x i1> %cmp, i32 %z
4076 define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
4077 ; SSE-LABEL: movmsk_v8i16_var:
4079 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
4080 ; SSE-NEXT: packsswb %xmm0, %xmm0
4081 ; SSE-NEXT: pmovmskb %xmm0, %eax
4082 ; SSE-NEXT: btl %edi, %eax
4083 ; SSE-NEXT: setb %al
4086 ; AVX1OR2-LABEL: movmsk_v8i16_var:
4088 ; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4089 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
4090 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
4091 ; AVX1OR2-NEXT: btl %edi, %eax
4092 ; AVX1OR2-NEXT: setb %al
4093 ; AVX1OR2-NEXT: retq
4095 ; KNL-LABEL: movmsk_v8i16_var:
4097 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4098 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4099 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
4100 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
4101 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4102 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
4103 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4104 ; KNL-NEXT: andl $7, %edi
4105 ; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
4106 ; KNL-NEXT: vzeroupper
4109 ; SKX-LABEL: movmsk_v8i16_var:
4111 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4112 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
4113 ; SKX-NEXT: vpmovm2w %k0, %xmm0
4114 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4115 ; SKX-NEXT: andl $7, %edi
4116 ; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
4118 %cmp = icmp sgt <8 x i16> %x, %y
4119 %val = extractelement <8 x i1> %cmp, i32 %z
4123 define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
4124 ; SSE-LABEL: movmsk_v4i32_var:
4126 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
4127 ; SSE-NEXT: movmskps %xmm1, %eax
4128 ; SSE-NEXT: btl %edi, %eax
4129 ; SSE-NEXT: setb %al
4132 ; AVX1OR2-LABEL: movmsk_v4i32_var:
4134 ; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
4135 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
4136 ; AVX1OR2-NEXT: btl %edi, %eax
4137 ; AVX1OR2-NEXT: setb %al
4138 ; AVX1OR2-NEXT: retq
4140 ; KNL-LABEL: movmsk_v4i32_var:
4142 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4143 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4144 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4145 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
4146 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4147 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4148 ; KNL-NEXT: andl $3, %edi
4149 ; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4150 ; KNL-NEXT: vzeroupper
4153 ; SKX-LABEL: movmsk_v4i32_var:
4155 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4156 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
4157 ; SKX-NEXT: vpmovm2d %k0, %xmm0
4158 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4159 ; SKX-NEXT: andl $3, %edi
4160 ; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4162 %cmp = icmp slt <4 x i32> %x, %y
4163 %val = extractelement <4 x i1> %cmp, i32 %z
4167 define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
4168 ; SSE2-LABEL: movmsk_v2i64_var:
4170 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
4171 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
4172 ; SSE2-NEXT: pand %xmm0, %xmm1
4173 ; SSE2-NEXT: movmskpd %xmm1, %eax
4174 ; SSE2-NEXT: xorl $3, %eax
4175 ; SSE2-NEXT: btl %edi, %eax
4176 ; SSE2-NEXT: setb %al
4179 ; SSE41-LABEL: movmsk_v2i64_var:
4181 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4182 ; SSE41-NEXT: movmskpd %xmm0, %eax
4183 ; SSE41-NEXT: xorl $3, %eax
4184 ; SSE41-NEXT: btl %edi, %eax
4185 ; SSE41-NEXT: setb %al
4188 ; AVX1OR2-LABEL: movmsk_v2i64_var:
4190 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4191 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4192 ; AVX1OR2-NEXT: xorl $3, %eax
4193 ; AVX1OR2-NEXT: btl %edi, %eax
4194 ; AVX1OR2-NEXT: setb %al
4195 ; AVX1OR2-NEXT: retq
4197 ; KNL-LABEL: movmsk_v2i64_var:
4199 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4200 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4201 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4202 ; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k1
4203 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4204 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4205 ; KNL-NEXT: andl $1, %edi
4206 ; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4207 ; KNL-NEXT: vzeroupper
4210 ; SKX-LABEL: movmsk_v2i64_var:
4212 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4213 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
4214 ; SKX-NEXT: vpmovm2q %k0, %xmm0
4215 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4216 ; SKX-NEXT: andl $1, %edi
4217 ; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4219 %cmp = icmp ne <2 x i64> %x, %y
4220 %val = extractelement <2 x i1> %cmp, i32 %z
4224 define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
4225 ; SSE-LABEL: movmsk_v4f32_var:
4227 ; SSE-NEXT: movaps %xmm0, %xmm2
4228 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
4229 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
4230 ; SSE-NEXT: orps %xmm2, %xmm0
4231 ; SSE-NEXT: movmskps %xmm0, %eax
4232 ; SSE-NEXT: btl %edi, %eax
4233 ; SSE-NEXT: setb %al
4236 ; AVX1OR2-LABEL: movmsk_v4f32_var:
4238 ; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
4239 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
4240 ; AVX1OR2-NEXT: btl %edi, %eax
4241 ; AVX1OR2-NEXT: setb %al
4242 ; AVX1OR2-NEXT: retq
4244 ; KNL-LABEL: movmsk_v4f32_var:
4246 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4247 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4248 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4249 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k1
4250 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4251 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4252 ; KNL-NEXT: andl $3, %edi
4253 ; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4254 ; KNL-NEXT: vzeroupper
4257 ; SKX-LABEL: movmsk_v4f32_var:
4259 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4260 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
4261 ; SKX-NEXT: vpmovm2d %k0, %xmm0
4262 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4263 ; SKX-NEXT: andl $3, %edi
4264 ; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4266 %cmp = fcmp ueq <4 x float> %x, %y
4267 %val = extractelement <4 x i1> %cmp, i32 %z
4271 define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
4272 ; SSE-LABEL: movmsk_v2f64_var:
4274 ; SSE-NEXT: cmplepd %xmm0, %xmm1
4275 ; SSE-NEXT: movmskpd %xmm1, %eax
4276 ; SSE-NEXT: btl %edi, %eax
4277 ; SSE-NEXT: setb %al
4280 ; AVX1OR2-LABEL: movmsk_v2f64_var:
4282 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4283 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4284 ; AVX1OR2-NEXT: btl %edi, %eax
4285 ; AVX1OR2-NEXT: setb %al
4286 ; AVX1OR2-NEXT: retq
4288 ; KNL-LABEL: movmsk_v2f64_var:
4290 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4291 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4292 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4293 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k1
4294 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4295 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4296 ; KNL-NEXT: andl $1, %edi
4297 ; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4298 ; KNL-NEXT: vzeroupper
4301 ; SKX-LABEL: movmsk_v2f64_var:
4303 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4304 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4305 ; SKX-NEXT: vpmovm2q %k0, %xmm0
4306 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4307 ; SKX-NEXT: andl $1, %edi
4308 ; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4310 %cmp = fcmp oge <2 x double> %x, %y
4311 %val = extractelement <2 x i1> %cmp, i32 %z
4315 ; TODO: We expect similar result as for PR39665_c_ray_opt,
4316 ; but this is not the case in practice.
4317 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
4318 ; SSE-LABEL: PR39665_c_ray:
4320 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
4321 ; SSE-NEXT: movmskpd %xmm1, %ecx
4322 ; SSE-NEXT: testb $2, %cl
4323 ; SSE-NEXT: movl $42, %eax
4324 ; SSE-NEXT: movl $99, %edx
4325 ; SSE-NEXT: cmovel %edx, %eax
4326 ; SSE-NEXT: testb $1, %cl
4327 ; SSE-NEXT: cmovel %edx, %eax
4330 ; AVX1OR2-LABEL: PR39665_c_ray:
4332 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4333 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %ecx
4334 ; AVX1OR2-NEXT: testb $2, %cl
4335 ; AVX1OR2-NEXT: movl $42, %eax
4336 ; AVX1OR2-NEXT: movl $99, %edx
4337 ; AVX1OR2-NEXT: cmovel %edx, %eax
4338 ; AVX1OR2-NEXT: testb $1, %cl
4339 ; AVX1OR2-NEXT: cmovel %edx, %eax
4340 ; AVX1OR2-NEXT: retq
4342 ; KNL-LABEL: PR39665_c_ray:
4344 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4345 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4346 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4347 ; KNL-NEXT: kmovw %k0, %ecx
4348 ; KNL-NEXT: testb $2, %cl
4349 ; KNL-NEXT: movl $42, %eax
4350 ; KNL-NEXT: movl $99, %edx
4351 ; KNL-NEXT: cmovel %edx, %eax
4352 ; KNL-NEXT: testb $1, %cl
4353 ; KNL-NEXT: cmovel %edx, %eax
4354 ; KNL-NEXT: vzeroupper
4357 ; SKX-LABEL: PR39665_c_ray:
4359 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4360 ; SKX-NEXT: kmovd %k0, %ecx
4361 ; SKX-NEXT: testb $2, %cl
4362 ; SKX-NEXT: movl $42, %eax
4363 ; SKX-NEXT: movl $99, %edx
4364 ; SKX-NEXT: cmovel %edx, %eax
4365 ; SKX-NEXT: testb $1, %cl
4366 ; SKX-NEXT: cmovel %edx, %eax
4368 %cmp = fcmp ogt <2 x double> %x, %y
4369 %e1 = extractelement <2 x i1> %cmp, i32 0
4370 %e2 = extractelement <2 x i1> %cmp, i32 1
4371 %u = and i1 %e1, %e2
4372 %r = select i1 %u, i32 42, i32 99
4376 define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
4377 ; SSE-LABEL: PR39665_c_ray_opt:
4379 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
4380 ; SSE-NEXT: movmskpd %xmm1, %eax
4381 ; SSE-NEXT: cmpl $3, %eax
4382 ; SSE-NEXT: movl $42, %ecx
4383 ; SSE-NEXT: movl $99, %eax
4384 ; SSE-NEXT: cmovel %ecx, %eax
4387 ; AVX1OR2-LABEL: PR39665_c_ray_opt:
4389 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4390 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
4391 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
4392 ; AVX1OR2-NEXT: movl $42, %ecx
4393 ; AVX1OR2-NEXT: movl $99, %eax
4394 ; AVX1OR2-NEXT: cmovbl %ecx, %eax
4395 ; AVX1OR2-NEXT: retq
4397 ; KNL-LABEL: PR39665_c_ray_opt:
4399 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4400 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4401 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4402 ; KNL-NEXT: knotw %k0, %k0
4403 ; KNL-NEXT: kmovw %k0, %eax
4404 ; KNL-NEXT: testb $3, %al
4405 ; KNL-NEXT: movl $42, %ecx
4406 ; KNL-NEXT: movl $99, %eax
4407 ; KNL-NEXT: cmovel %ecx, %eax
4408 ; KNL-NEXT: vzeroupper
4411 ; SKX-LABEL: PR39665_c_ray_opt:
4413 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4414 ; SKX-NEXT: kmovd %k0, %eax
4415 ; SKX-NEXT: cmpb $3, %al
4416 ; SKX-NEXT: movl $42, %ecx
4417 ; SKX-NEXT: movl $99, %eax
4418 ; SKX-NEXT: cmovel %ecx, %eax
4420 %cmp = fcmp ogt <2 x double> %x, %y
4421 %shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
4422 %1 = and <2 x i1> %cmp, %shift
4423 %u = extractelement <2 x i1> %1, i64 0
4424 %r = select i1 %u, i32 42, i32 99
4428 define i32 @PR39665_c_ray_select(<2 x double> %x, <2 x double> %y) {
4429 ; SSE-LABEL: PR39665_c_ray_select:
4431 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
4432 ; SSE-NEXT: movmskpd %xmm1, %eax
4433 ; SSE-NEXT: cmpl $3, %eax
4434 ; SSE-NEXT: movl $42, %ecx
4435 ; SSE-NEXT: movl $99, %eax
4436 ; SSE-NEXT: cmovel %ecx, %eax
4439 ; AVX1OR2-LABEL: PR39665_c_ray_select:
4441 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4442 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
4443 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
4444 ; AVX1OR2-NEXT: movl $42, %ecx
4445 ; AVX1OR2-NEXT: movl $99, %eax
4446 ; AVX1OR2-NEXT: cmovbl %ecx, %eax
4447 ; AVX1OR2-NEXT: retq
4449 ; KNL-LABEL: PR39665_c_ray_select:
4451 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4452 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4453 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4454 ; KNL-NEXT: knotw %k0, %k0
4455 ; KNL-NEXT: kmovw %k0, %eax
4456 ; KNL-NEXT: testb $3, %al
4457 ; KNL-NEXT: movl $42, %ecx
4458 ; KNL-NEXT: movl $99, %eax
4459 ; KNL-NEXT: cmovel %ecx, %eax
4460 ; KNL-NEXT: vzeroupper
4463 ; SKX-LABEL: PR39665_c_ray_select:
4465 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4466 ; SKX-NEXT: knotw %k0, %k0
4467 ; SKX-NEXT: kmovd %k0, %eax
4468 ; SKX-NEXT: testb $3, %al
4469 ; SKX-NEXT: movl $42, %ecx
4470 ; SKX-NEXT: movl $99, %eax
4471 ; SKX-NEXT: cmovel %ecx, %eax
4473 %cmp = fcmp fast ogt <2 x double> %x, %y
4474 %b0 = extractelement <2 x i1> %cmp, i64 0
4475 %b1 = extractelement <2 x i1> %cmp, i64 1
4476 %b = select i1 %b0, i1 %b1, i1 false
4477 %r = select i1 %b, i32 42, i32 99
4481 define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
4482 ; SSE2-LABEL: pr67287:
4483 ; SSE2: # %bb.0: # %entry
4484 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4485 ; SSE2-NEXT: pxor %xmm1, %xmm1
4486 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
4487 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
4488 ; SSE2-NEXT: movmskpd %xmm0, %eax
4489 ; SSE2-NEXT: testl %eax, %eax
4490 ; SSE2-NEXT: setne %al
4491 ; SSE2-NEXT: movd %xmm1, %ecx
4492 ; SSE2-NEXT: orb %al, %cl
4493 ; SSE2-NEXT: testb $1, %cl
4494 ; SSE2-NEXT: je .LBB98_2
4495 ; SSE2-NEXT: # %bb.1:
4496 ; SSE2-NEXT: movw $0, 0
4497 ; SSE2-NEXT: .LBB98_2: # %middle.block
4498 ; SSE2-NEXT: xorl %eax, %eax
4501 ; SSE41-LABEL: pr67287:
4502 ; SSE41: # %bb.0: # %entry
4503 ; SSE41-NEXT: pxor %xmm1, %xmm1
4504 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
4505 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4506 ; SSE41-NEXT: movmskpd %xmm0, %eax
4507 ; SSE41-NEXT: testl %eax, %eax
4508 ; SSE41-NEXT: setne %al
4509 ; SSE41-NEXT: movd %xmm0, %ecx
4510 ; SSE41-NEXT: orb %al, %cl
4511 ; SSE41-NEXT: testb $1, %cl
4512 ; SSE41-NEXT: je .LBB98_2
4513 ; SSE41-NEXT: # %bb.1:
4514 ; SSE41-NEXT: movw $0, 0
4515 ; SSE41-NEXT: .LBB98_2: # %middle.block
4516 ; SSE41-NEXT: xorl %eax, %eax
4519 ; AVX1-LABEL: pr67287:
4520 ; AVX1: # %bb.0: # %entry
4521 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4522 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
4523 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4524 ; AVX1-NEXT: vtestpd %xmm0, %xmm0
4525 ; AVX1-NEXT: setne %al
4526 ; AVX1-NEXT: vmovd %xmm0, %ecx
4527 ; AVX1-NEXT: orb %al, %cl
4528 ; AVX1-NEXT: testb $1, %cl
4529 ; AVX1-NEXT: je .LBB98_2
4530 ; AVX1-NEXT: # %bb.1:
4531 ; AVX1-NEXT: movw $0, 0
4532 ; AVX1-NEXT: .LBB98_2: # %middle.block
4533 ; AVX1-NEXT: xorl %eax, %eax
4536 ; AVX2-LABEL: pr67287:
4537 ; AVX2: # %bb.0: # %entry
4538 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
4539 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4540 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4541 ; AVX2-NEXT: vtestpd %xmm0, %xmm0
4542 ; AVX2-NEXT: setne %al
4543 ; AVX2-NEXT: vmovd %xmm0, %ecx
4544 ; AVX2-NEXT: orb %al, %cl
4545 ; AVX2-NEXT: testb $1, %cl
4546 ; AVX2-NEXT: je .LBB98_2
4547 ; AVX2-NEXT: # %bb.1:
4548 ; AVX2-NEXT: movw $0, 0
4549 ; AVX2-NEXT: .LBB98_2: # %middle.block
4550 ; AVX2-NEXT: xorl %eax, %eax
4553 ; KNL-LABEL: pr67287:
4554 ; KNL: # %bb.0: # %entry
4555 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4556 ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4557 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
4558 ; KNL-NEXT: kmovw %k0, %eax
4559 ; KNL-NEXT: kmovw %k0, %ecx
4560 ; KNL-NEXT: testb $3, %cl
4561 ; KNL-NEXT: setne %cl
4562 ; KNL-NEXT: orb %cl, %al
4563 ; KNL-NEXT: testb $1, %al
4564 ; KNL-NEXT: je .LBB98_2
4565 ; KNL-NEXT: # %bb.1:
4566 ; KNL-NEXT: movw $0, 0
4567 ; KNL-NEXT: .LBB98_2: # %middle.block
4568 ; KNL-NEXT: xorl %eax, %eax
4569 ; KNL-NEXT: vzeroupper
4572 ; SKX-LABEL: pr67287:
4573 ; SKX: # %bb.0: # %entry
4574 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
4575 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4576 ; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0
4577 ; SKX-NEXT: kortestb %k0, %k0
4578 ; SKX-NEXT: setne %al
4579 ; SKX-NEXT: kmovd %k0, %ecx
4580 ; SKX-NEXT: orb %al, %cl
4581 ; SKX-NEXT: testb $1, %cl
4582 ; SKX-NEXT: je .LBB98_2
4583 ; SKX-NEXT: # %bb.1:
4584 ; SKX-NEXT: movw $0, 0
4585 ; SKX-NEXT: .LBB98_2: # %middle.block
4586 ; SKX-NEXT: xorl %eax, %eax
4589 %0 = and <2 x i64> %broadcast.splatinsert25, <i64 4294967295, i64 4294967295>
4590 %1 = icmp eq <2 x i64> %0, zeroinitializer
4591 %shift = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 poison>
4592 %2 = or <2 x i1> %1, %shift
4593 %3 = extractelement <2 x i1> %2, i64 0
4594 %4 = extractelement <2 x i1> %1, i64 0
4596 br i1 %5, label %6, label %middle.block
4599 store i16 0, ptr null, align 2
4600 br label %middle.block
4602 middle.block: ; preds = %6, %entry