1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
9 define i1 @allones_v16i8_sign(<16 x i8> %arg) {
10 ; SSE-LABEL: allones_v16i8_sign:
12 ; SSE-NEXT: pmovmskb %xmm0, %eax
13 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
17 ; AVX-LABEL: allones_v16i8_sign:
19 ; AVX-NEXT: vpmovmskb %xmm0, %eax
20 ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
23 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
24 %tmp1 = bitcast <16 x i1> %tmp to i16
25 %tmp2 = icmp eq i16 %tmp1, -1
29 define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
30 ; SSE-LABEL: allzeros_v16i8_sign:
32 ; SSE-NEXT: pmovmskb %xmm0, %eax
33 ; SSE-NEXT: testl %eax, %eax
37 ; AVX-LABEL: allzeros_v16i8_sign:
39 ; AVX-NEXT: vpmovmskb %xmm0, %eax
40 ; AVX-NEXT: testl %eax, %eax
43 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
44 %tmp1 = bitcast <16 x i1> %tmp to i16
45 %tmp2 = icmp eq i16 %tmp1, 0
49 define i1 @allones_v32i8_sign(<32 x i8> %arg) {
50 ; SSE-LABEL: allones_v32i8_sign:
52 ; SSE-NEXT: pand %xmm1, %xmm0
53 ; SSE-NEXT: pmovmskb %xmm0, %eax
54 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
58 ; AVX1-LABEL: allones_v32i8_sign:
60 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
61 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
62 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
63 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
65 ; AVX1-NEXT: vzeroupper
68 ; AVX2-LABEL: allones_v32i8_sign:
70 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
71 ; AVX2-NEXT: cmpl $-1, %eax
73 ; AVX2-NEXT: vzeroupper
76 ; AVX512-LABEL: allones_v32i8_sign:
78 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
79 ; AVX512-NEXT: cmpl $-1, %eax
80 ; AVX512-NEXT: sete %al
81 ; AVX512-NEXT: vzeroupper
83 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
84 %tmp1 = bitcast <32 x i1> %tmp to i32
85 %tmp2 = icmp eq i32 %tmp1, -1
89 define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
90 ; SSE-LABEL: allzeros_v32i8_sign:
92 ; SSE-NEXT: por %xmm1, %xmm0
93 ; SSE-NEXT: pmovmskb %xmm0, %eax
94 ; SSE-NEXT: testl %eax, %eax
98 ; AVX1-LABEL: allzeros_v32i8_sign:
100 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
101 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
102 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
103 ; AVX1-NEXT: testl %eax, %eax
104 ; AVX1-NEXT: sete %al
105 ; AVX1-NEXT: vzeroupper
108 ; AVX2-LABEL: allzeros_v32i8_sign:
110 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
111 ; AVX2-NEXT: testl %eax, %eax
112 ; AVX2-NEXT: sete %al
113 ; AVX2-NEXT: vzeroupper
116 ; AVX512-LABEL: allzeros_v32i8_sign:
118 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
119 ; AVX512-NEXT: testl %eax, %eax
120 ; AVX512-NEXT: sete %al
121 ; AVX512-NEXT: vzeroupper
123 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
124 %tmp1 = bitcast <32 x i1> %tmp to i32
125 %tmp2 = icmp eq i32 %tmp1, 0
129 define i1 @allones_v64i8_sign(<64 x i8> %arg) {
130 ; SSE-LABEL: allones_v64i8_sign:
132 ; SSE-NEXT: pand %xmm2, %xmm0
133 ; SSE-NEXT: pand %xmm3, %xmm1
134 ; SSE-NEXT: pand %xmm0, %xmm1
135 ; SSE-NEXT: pmovmskb %xmm1, %eax
136 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
140 ; AVX1-LABEL: allones_v64i8_sign:
142 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
143 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
144 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
145 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
146 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
147 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
148 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
149 ; AVX1-NEXT: sete %al
150 ; AVX1-NEXT: vzeroupper
153 ; AVX2-LABEL: allones_v64i8_sign:
155 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
156 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
157 ; AVX2-NEXT: cmpl $-1, %eax
158 ; AVX2-NEXT: sete %al
159 ; AVX2-NEXT: vzeroupper
162 ; KNL-LABEL: allones_v64i8_sign:
164 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
165 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
166 ; KNL-NEXT: vpmovmskb %ymm0, %eax
167 ; KNL-NEXT: cmpl $-1, %eax
169 ; KNL-NEXT: vzeroupper
172 ; SKX-LABEL: allones_v64i8_sign:
174 ; SKX-NEXT: vpmovb2m %zmm0, %k0
175 ; SKX-NEXT: kortestq %k0, %k0
177 ; SKX-NEXT: vzeroupper
179 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
180 %tmp1 = bitcast <64 x i1> %tmp to i64
181 %tmp2 = icmp eq i64 %tmp1, -1
185 define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
186 ; SSE-LABEL: allzeros_v64i8_sign:
188 ; SSE-NEXT: por %xmm3, %xmm1
189 ; SSE-NEXT: por %xmm2, %xmm0
190 ; SSE-NEXT: por %xmm1, %xmm0
191 ; SSE-NEXT: pmovmskb %xmm0, %eax
192 ; SSE-NEXT: testl %eax, %eax
196 ; AVX1-LABEL: allzeros_v64i8_sign:
198 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
199 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
200 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
201 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
202 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
203 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
204 ; AVX1-NEXT: testl %eax, %eax
205 ; AVX1-NEXT: sete %al
206 ; AVX1-NEXT: vzeroupper
209 ; AVX2-LABEL: allzeros_v64i8_sign:
211 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
212 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
213 ; AVX2-NEXT: testl %eax, %eax
214 ; AVX2-NEXT: sete %al
215 ; AVX2-NEXT: vzeroupper
218 ; KNL-LABEL: allzeros_v64i8_sign:
220 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
221 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
222 ; KNL-NEXT: vpmovmskb %ymm0, %eax
223 ; KNL-NEXT: testl %eax, %eax
225 ; KNL-NEXT: vzeroupper
228 ; SKX-LABEL: allzeros_v64i8_sign:
230 ; SKX-NEXT: vpmovb2m %zmm0, %k0
231 ; SKX-NEXT: kortestq %k0, %k0
233 ; SKX-NEXT: vzeroupper
235 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
236 %tmp1 = bitcast <64 x i1> %tmp to i64
237 %tmp2 = icmp eq i64 %tmp1, 0
241 define i1 @allones_v8i16_sign(<8 x i16> %arg) {
242 ; SSE-LABEL: allones_v8i16_sign:
244 ; SSE-NEXT: packsswb %xmm0, %xmm0
245 ; SSE-NEXT: pmovmskb %xmm0, %eax
246 ; SSE-NEXT: cmpb $-1, %al
250 ; AVX1OR2-LABEL: allones_v8i16_sign:
252 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
253 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
254 ; AVX1OR2-NEXT: cmpb $-1, %al
255 ; AVX1OR2-NEXT: sete %al
258 ; KNL-LABEL: allones_v8i16_sign:
260 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
261 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
262 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
263 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
264 ; KNL-NEXT: kmovw %k0, %eax
265 ; KNL-NEXT: cmpb $-1, %al
267 ; KNL-NEXT: vzeroupper
270 ; SKX-LABEL: allones_v8i16_sign:
272 ; SKX-NEXT: vpmovw2m %xmm0, %k0
273 ; SKX-NEXT: kortestb %k0, %k0
276 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
277 %tmp1 = bitcast <8 x i1> %tmp to i8
278 %tmp2 = icmp eq i8 %tmp1, -1
282 define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
283 ; SSE-LABEL: allzeros_v8i16_sign:
285 ; SSE-NEXT: pmovmskb %xmm0, %eax
286 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
290 ; AVX1OR2-LABEL: allzeros_v8i16_sign:
292 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
293 ; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA
294 ; AVX1OR2-NEXT: sete %al
297 ; KNL-LABEL: allzeros_v8i16_sign:
299 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
300 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
301 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
302 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
303 ; KNL-NEXT: kortestw %k0, %k0
305 ; KNL-NEXT: vzeroupper
308 ; SKX-LABEL: allzeros_v8i16_sign:
310 ; SKX-NEXT: vpmovw2m %xmm0, %k0
311 ; SKX-NEXT: kortestb %k0, %k0
314 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
315 %tmp1 = bitcast <8 x i1> %tmp to i8
316 %tmp2 = icmp eq i8 %tmp1, 0
320 define i1 @allones_v16i16_sign(<16 x i16> %arg) {
321 ; SSE-LABEL: allones_v16i16_sign:
323 ; SSE-NEXT: packsswb %xmm1, %xmm0
324 ; SSE-NEXT: pmovmskb %xmm0, %eax
325 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
329 ; AVX1-LABEL: allones_v16i16_sign:
331 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
332 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
333 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
334 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
335 ; AVX1-NEXT: sete %al
336 ; AVX1-NEXT: vzeroupper
339 ; AVX2-LABEL: allones_v16i16_sign:
341 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
342 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
343 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
344 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
345 ; AVX2-NEXT: sete %al
346 ; AVX2-NEXT: vzeroupper
349 ; KNL-LABEL: allones_v16i16_sign:
351 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
352 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
353 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
354 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
355 ; KNL-NEXT: kortestw %k0, %k0
357 ; KNL-NEXT: vzeroupper
360 ; SKX-LABEL: allones_v16i16_sign:
362 ; SKX-NEXT: vpmovw2m %ymm0, %k0
363 ; SKX-NEXT: kortestw %k0, %k0
365 ; SKX-NEXT: vzeroupper
367 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
368 %tmp1 = bitcast <16 x i1> %tmp to i16
369 %tmp2 = icmp eq i16 %tmp1, -1
373 define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
374 ; SSE-LABEL: allzeros_v16i16_sign:
376 ; SSE-NEXT: packsswb %xmm1, %xmm0
377 ; SSE-NEXT: pmovmskb %xmm0, %eax
378 ; SSE-NEXT: testl %eax, %eax
382 ; AVX1-LABEL: allzeros_v16i16_sign:
384 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
385 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
386 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
387 ; AVX1-NEXT: testl %eax, %eax
388 ; AVX1-NEXT: sete %al
389 ; AVX1-NEXT: vzeroupper
392 ; AVX2-LABEL: allzeros_v16i16_sign:
394 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
395 ; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
396 ; AVX2-NEXT: sete %al
397 ; AVX2-NEXT: vzeroupper
400 ; KNL-LABEL: allzeros_v16i16_sign:
402 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
403 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
404 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
405 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
406 ; KNL-NEXT: kortestw %k0, %k0
408 ; KNL-NEXT: vzeroupper
411 ; SKX-LABEL: allzeros_v16i16_sign:
413 ; SKX-NEXT: vpmovw2m %ymm0, %k0
414 ; SKX-NEXT: kortestw %k0, %k0
416 ; SKX-NEXT: vzeroupper
418 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
419 %tmp1 = bitcast <16 x i1> %tmp to i16
420 %tmp2 = icmp eq i16 %tmp1, 0
424 define i1 @allones_v32i16_sign(<32 x i16> %arg) {
425 ; SSE-LABEL: allones_v32i16_sign:
427 ; SSE-NEXT: packsswb %xmm1, %xmm0
428 ; SSE-NEXT: packsswb %xmm3, %xmm2
429 ; SSE-NEXT: pand %xmm0, %xmm2
430 ; SSE-NEXT: pmovmskb %xmm2, %eax
431 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
435 ; AVX1-LABEL: allones_v32i16_sign:
437 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
438 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
439 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
440 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
441 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
442 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
443 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
444 ; AVX1-NEXT: sete %al
445 ; AVX1-NEXT: vzeroupper
448 ; AVX2-LABEL: allones_v32i16_sign:
450 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
451 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
452 ; AVX2-NEXT: cmpl $-1, %eax
453 ; AVX2-NEXT: sete %al
454 ; AVX2-NEXT: vzeroupper
457 ; KNL-LABEL: allones_v32i16_sign:
459 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
460 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
461 ; KNL-NEXT: vpmovsxwd %ymm2, %zmm2
462 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
463 ; KNL-NEXT: kmovw %k0, %eax
464 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
465 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
466 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
467 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
468 ; KNL-NEXT: kmovw %k0, %ecx
469 ; KNL-NEXT: andl %eax, %ecx
470 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
472 ; KNL-NEXT: vzeroupper
475 ; SKX-LABEL: allones_v32i16_sign:
477 ; SKX-NEXT: vpmovw2m %zmm0, %k0
478 ; SKX-NEXT: kortestd %k0, %k0
480 ; SKX-NEXT: vzeroupper
482 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
483 %tmp1 = bitcast <32 x i1> %tmp to i32
484 %tmp2 = icmp eq i32 %tmp1, -1
488 define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
489 ; SSE-LABEL: allzeros_v32i16_sign:
491 ; SSE-NEXT: packsswb %xmm3, %xmm2
492 ; SSE-NEXT: packsswb %xmm1, %xmm0
493 ; SSE-NEXT: por %xmm2, %xmm0
494 ; SSE-NEXT: pmovmskb %xmm0, %eax
495 ; SSE-NEXT: testl %eax, %eax
499 ; AVX1-LABEL: allzeros_v32i16_sign:
501 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
502 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
503 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
504 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
505 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
506 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
507 ; AVX1-NEXT: testl %eax, %eax
508 ; AVX1-NEXT: sete %al
509 ; AVX1-NEXT: vzeroupper
512 ; AVX2-LABEL: allzeros_v32i16_sign:
514 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
515 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
516 ; AVX2-NEXT: testl %eax, %eax
517 ; AVX2-NEXT: sete %al
518 ; AVX2-NEXT: vzeroupper
521 ; KNL-LABEL: allzeros_v32i16_sign:
523 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
524 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
525 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
526 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
527 ; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0
528 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
529 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
530 ; KNL-NEXT: kortestw %k0, %k0
532 ; KNL-NEXT: vzeroupper
535 ; SKX-LABEL: allzeros_v32i16_sign:
537 ; SKX-NEXT: vpmovw2m %zmm0, %k0
538 ; SKX-NEXT: kortestd %k0, %k0
540 ; SKX-NEXT: vzeroupper
542 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
543 %tmp1 = bitcast <32 x i1> %tmp to i32
544 %tmp2 = icmp eq i32 %tmp1, 0
548 define i1 @allones_v4i32_sign(<4 x i32> %arg) {
549 ; SSE-LABEL: allones_v4i32_sign:
551 ; SSE-NEXT: movmskps %xmm0, %eax
552 ; SSE-NEXT: cmpl $15, %eax
556 ; AVX-LABEL: allones_v4i32_sign:
558 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
559 ; AVX-NEXT: vtestps %xmm1, %xmm0
562 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
563 %tmp1 = bitcast <4 x i1> %tmp to i4
564 %tmp2 = icmp eq i4 %tmp1, -1
568 define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
569 ; SSE-LABEL: allzeros_v4i32_sign:
571 ; SSE-NEXT: movmskps %xmm0, %eax
572 ; SSE-NEXT: testl %eax, %eax
576 ; AVX-LABEL: allzeros_v4i32_sign:
578 ; AVX-NEXT: vtestps %xmm0, %xmm0
581 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
582 %tmp1 = bitcast <4 x i1> %tmp to i4
583 %tmp2 = icmp eq i4 %tmp1, 0
587 define i1 @allones_v8i32_sign(<8 x i32> %arg) {
588 ; SSE-LABEL: allones_v8i32_sign:
590 ; SSE-NEXT: packssdw %xmm1, %xmm0
591 ; SSE-NEXT: packsswb %xmm0, %xmm0
592 ; SSE-NEXT: pmovmskb %xmm0, %eax
593 ; SSE-NEXT: cmpb $-1, %al
597 ; AVX1-LABEL: allones_v8i32_sign:
599 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
600 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
601 ; AVX1-NEXT: vtestps %ymm1, %ymm0
602 ; AVX1-NEXT: setb %al
603 ; AVX1-NEXT: vzeroupper
606 ; AVX2-LABEL: allones_v8i32_sign:
608 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
609 ; AVX2-NEXT: vtestps %ymm1, %ymm0
610 ; AVX2-NEXT: setb %al
611 ; AVX2-NEXT: vzeroupper
614 ; AVX512-LABEL: allones_v8i32_sign:
616 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
617 ; AVX512-NEXT: vtestps %ymm1, %ymm0
618 ; AVX512-NEXT: setb %al
619 ; AVX512-NEXT: vzeroupper
621 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
622 %tmp1 = bitcast <8 x i1> %tmp to i8
623 %tmp2 = icmp eq i8 %tmp1, -1
627 define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
628 ; SSE-LABEL: allzeros_v8i32_sign:
630 ; SSE-NEXT: packssdw %xmm1, %xmm0
631 ; SSE-NEXT: pmovmskb %xmm0, %eax
632 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
636 ; AVX-LABEL: allzeros_v8i32_sign:
638 ; AVX-NEXT: vtestps %ymm0, %ymm0
640 ; AVX-NEXT: vzeroupper
642 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
643 %tmp1 = bitcast <8 x i1> %tmp to i8
644 %tmp2 = icmp eq i8 %tmp1, 0
648 define i1 @allones_v16i32_sign(<16 x i32> %arg) {
649 ; SSE-LABEL: allones_v16i32_sign:
651 ; SSE-NEXT: packssdw %xmm3, %xmm2
652 ; SSE-NEXT: packssdw %xmm1, %xmm0
653 ; SSE-NEXT: packsswb %xmm2, %xmm0
654 ; SSE-NEXT: pmovmskb %xmm0, %eax
655 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
659 ; AVX1-LABEL: allones_v16i32_sign:
661 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
662 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
664 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
665 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
666 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
667 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
668 ; AVX1-NEXT: sete %al
669 ; AVX1-NEXT: vzeroupper
672 ; AVX2-LABEL: allones_v16i32_sign:
674 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
675 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
676 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
677 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
678 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
679 ; AVX2-NEXT: cmpl $-1, %eax
680 ; AVX2-NEXT: sete %al
681 ; AVX2-NEXT: vzeroupper
684 ; KNL-LABEL: allones_v16i32_sign:
686 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
687 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
688 ; KNL-NEXT: kortestw %k0, %k0
690 ; KNL-NEXT: vzeroupper
693 ; SKX-LABEL: allones_v16i32_sign:
695 ; SKX-NEXT: vpmovd2m %zmm0, %k0
696 ; SKX-NEXT: kortestw %k0, %k0
698 ; SKX-NEXT: vzeroupper
700 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
701 %tmp1 = bitcast <16 x i1> %tmp to i16
702 %tmp2 = icmp eq i16 %tmp1, -1
706 define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
707 ; SSE-LABEL: allzeros_v16i32_sign:
709 ; SSE-NEXT: packssdw %xmm3, %xmm2
710 ; SSE-NEXT: packssdw %xmm1, %xmm0
711 ; SSE-NEXT: packsswb %xmm2, %xmm0
712 ; SSE-NEXT: pmovmskb %xmm0, %eax
713 ; SSE-NEXT: testl %eax, %eax
717 ; AVX1-LABEL: allzeros_v16i32_sign:
719 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
720 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
721 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
722 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
723 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
724 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
725 ; AVX1-NEXT: testl %eax, %eax
726 ; AVX1-NEXT: sete %al
727 ; AVX1-NEXT: vzeroupper
730 ; AVX2-LABEL: allzeros_v16i32_sign:
732 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
733 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
734 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
735 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
736 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
737 ; AVX2-NEXT: testl %eax, %eax
738 ; AVX2-NEXT: sete %al
739 ; AVX2-NEXT: vzeroupper
742 ; KNL-LABEL: allzeros_v16i32_sign:
744 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
745 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
746 ; KNL-NEXT: kortestw %k0, %k0
748 ; KNL-NEXT: vzeroupper
751 ; SKX-LABEL: allzeros_v16i32_sign:
753 ; SKX-NEXT: vpmovd2m %zmm0, %k0
754 ; SKX-NEXT: kortestw %k0, %k0
756 ; SKX-NEXT: vzeroupper
758 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
759 %tmp1 = bitcast <16 x i1> %tmp to i16
760 %tmp2 = icmp eq i16 %tmp1, 0
764 define i1 @allones_v4i64_sign(<4 x i64> %arg) {
765 ; SSE-LABEL: allones_v4i64_sign:
767 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
768 ; SSE-NEXT: movmskps %xmm0, %eax
769 ; SSE-NEXT: cmpl $15, %eax
773 ; AVX1-LABEL: allones_v4i64_sign:
775 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
776 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
777 ; AVX1-NEXT: vtestpd %ymm1, %ymm0
778 ; AVX1-NEXT: setb %al
779 ; AVX1-NEXT: vzeroupper
782 ; AVX2-LABEL: allones_v4i64_sign:
784 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
785 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
786 ; AVX2-NEXT: setb %al
787 ; AVX2-NEXT: vzeroupper
790 ; AVX512-LABEL: allones_v4i64_sign:
792 ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
793 ; AVX512-NEXT: vtestpd %ymm1, %ymm0
794 ; AVX512-NEXT: setb %al
795 ; AVX512-NEXT: vzeroupper
797 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
798 %tmp1 = bitcast <4 x i1> %tmp to i4
799 %tmp2 = icmp eq i4 %tmp1, -1
803 define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
804 ; SSE-LABEL: allzeros_v4i64_sign:
806 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
807 ; SSE-NEXT: movmskps %xmm0, %eax
808 ; SSE-NEXT: testl %eax, %eax
812 ; AVX-LABEL: allzeros_v4i64_sign:
814 ; AVX-NEXT: vtestpd %ymm0, %ymm0
816 ; AVX-NEXT: vzeroupper
818 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
819 %tmp1 = bitcast <4 x i1> %tmp to i4
820 %tmp2 = icmp eq i4 %tmp1, 0
824 define i1 @allones_v8i64_sign(<8 x i64> %arg) {
825 ; SSE-LABEL: allones_v8i64_sign:
827 ; SSE-NEXT: packssdw %xmm3, %xmm2
828 ; SSE-NEXT: packssdw %xmm1, %xmm0
829 ; SSE-NEXT: packssdw %xmm2, %xmm0
830 ; SSE-NEXT: packsswb %xmm0, %xmm0
831 ; SSE-NEXT: pmovmskb %xmm0, %eax
832 ; SSE-NEXT: cmpb $-1, %al
836 ; AVX1-LABEL: allones_v8i64_sign:
838 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
840 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
841 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
842 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
843 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
844 ; AVX1-NEXT: vtestps %xmm1, %xmm0
845 ; AVX1-NEXT: setb %al
846 ; AVX1-NEXT: vzeroupper
849 ; AVX2-LABEL: allones_v8i64_sign:
851 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
852 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
853 ; AVX2-NEXT: vtestps %ymm1, %ymm0
854 ; AVX2-NEXT: setb %al
855 ; AVX2-NEXT: vzeroupper
858 ; KNL-LABEL: allones_v8i64_sign:
860 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
861 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
862 ; KNL-NEXT: kmovw %k0, %eax
863 ; KNL-NEXT: cmpb $-1, %al
865 ; KNL-NEXT: vzeroupper
868 ; SKX-LABEL: allones_v8i64_sign:
870 ; SKX-NEXT: vpmovq2m %zmm0, %k0
871 ; SKX-NEXT: kortestb %k0, %k0
873 ; SKX-NEXT: vzeroupper
875 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
876 %tmp1 = bitcast <8 x i1> %tmp to i8
877 %tmp2 = icmp eq i8 %tmp1, -1
881 define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
882 ; SSE-LABEL: allzeros_v8i64_sign:
884 ; SSE-NEXT: packssdw %xmm3, %xmm2
885 ; SSE-NEXT: packssdw %xmm1, %xmm0
886 ; SSE-NEXT: packssdw %xmm2, %xmm0
887 ; SSE-NEXT: pmovmskb %xmm0, %eax
888 ; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA
892 ; AVX1-LABEL: allzeros_v8i64_sign:
894 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
896 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
897 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
898 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
899 ; AVX1-NEXT: vtestps %xmm0, %xmm0
900 ; AVX1-NEXT: sete %al
901 ; AVX1-NEXT: vzeroupper
904 ; AVX2-LABEL: allzeros_v8i64_sign:
906 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
907 ; AVX2-NEXT: vtestps %ymm0, %ymm0
908 ; AVX2-NEXT: sete %al
909 ; AVX2-NEXT: vzeroupper
912 ; KNL-LABEL: allzeros_v8i64_sign:
914 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
915 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
916 ; KNL-NEXT: kmovw %k0, %eax
917 ; KNL-NEXT: testb %al, %al
919 ; KNL-NEXT: vzeroupper
922 ; SKX-LABEL: allzeros_v8i64_sign:
924 ; SKX-NEXT: vpmovq2m %zmm0, %k0
925 ; SKX-NEXT: kortestb %k0, %k0
927 ; SKX-NEXT: vzeroupper
929 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
930 %tmp1 = bitcast <8 x i1> %tmp to i8
931 %tmp2 = icmp eq i8 %tmp1, 0
935 define i1 @allones_v16i8_and1(<16 x i8> %arg) {
936 ; SSE-LABEL: allones_v16i8_and1:
938 ; SSE-NEXT: psllw $7, %xmm0
939 ; SSE-NEXT: pmovmskb %xmm0, %eax
940 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
944 ; AVX1OR2-LABEL: allones_v16i8_and1:
946 ; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0
947 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
948 ; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
949 ; AVX1OR2-NEXT: sete %al
952 ; KNL-LABEL: allones_v16i8_and1:
954 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
955 ; KNL-NEXT: vpmovmskb %xmm0, %eax
956 ; KNL-NEXT: cmpl $65535, %eax # imm = 0xFFFF
960 ; SKX-LABEL: allones_v16i8_and1:
962 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
963 ; SKX-NEXT: kortestw %k0, %k0
966 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
967 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
968 %tmp2 = bitcast <16 x i1> %tmp1 to i16
969 %tmp3 = icmp eq i16 %tmp2, -1
973 define i1 @allzeros_v16i8_not(<16 x i8> %a0) {
974 ; SSE2-LABEL: allzeros_v16i8_not:
976 ; SSE2-NEXT: pxor %xmm1, %xmm1
977 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
978 ; SSE2-NEXT: pmovmskb %xmm1, %eax
979 ; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
980 ; SSE2-NEXT: setne %al
983 ; SSE41-LABEL: allzeros_v16i8_not:
985 ; SSE41-NEXT: ptest %xmm0, %xmm0
986 ; SSE41-NEXT: setne %al
989 ; AVX-LABEL: allzeros_v16i8_not:
991 ; AVX-NEXT: vptest %xmm0, %xmm0
992 ; AVX-NEXT: setne %al
994 %1 = icmp eq <16 x i8> %a0, zeroinitializer
995 %2 = bitcast <16 x i1> %1 to i16
996 %3 = icmp ne i16 %2, -1
1000 define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
1001 ; SSE2-LABEL: allzeros_v2i64_not:
1003 ; SSE2-NEXT: pxor %xmm1, %xmm1
1004 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1005 ; SSE2-NEXT: movmskps %xmm1, %eax
1006 ; SSE2-NEXT: xorl $15, %eax
1007 ; SSE2-NEXT: setne %al
1010 ; SSE41-LABEL: allzeros_v2i64_not:
1012 ; SSE41-NEXT: ptest %xmm0, %xmm0
1013 ; SSE41-NEXT: setne %al
1016 ; AVX-LABEL: allzeros_v2i64_not:
1018 ; AVX-NEXT: vptest %xmm0, %xmm0
1019 ; AVX-NEXT: setne %al
1021 %1 = icmp eq <2 x i64> %a0, zeroinitializer
1022 %2 = bitcast <2 x i1> %1 to i2
1023 %3 = icmp ne i2 %2, -1
1027 define i1 @allzeros_v8i32_not(<8 x i32> %a0) {
1028 ; SSE2-LABEL: allzeros_v8i32_not:
1030 ; SSE2-NEXT: por %xmm1, %xmm0
1031 ; SSE2-NEXT: pxor %xmm1, %xmm1
1032 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1033 ; SSE2-NEXT: movmskps %xmm1, %eax
1034 ; SSE2-NEXT: xorl $15, %eax
1035 ; SSE2-NEXT: setne %al
1038 ; SSE41-LABEL: allzeros_v8i32_not:
1040 ; SSE41-NEXT: por %xmm1, %xmm0
1041 ; SSE41-NEXT: ptest %xmm0, %xmm0
1042 ; SSE41-NEXT: setne %al
1045 ; AVX-LABEL: allzeros_v8i32_not:
1047 ; AVX-NEXT: vptest %ymm0, %ymm0
1048 ; AVX-NEXT: setne %al
1049 ; AVX-NEXT: vzeroupper
1051 %1 = icmp eq <8 x i32> %a0, zeroinitializer
1052 %2 = bitcast <8 x i1> %1 to i8
1053 %3 = icmp ne i8 %2, -1
1057 define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
1058 ; SSE2-LABEL: allzeros_v8i64_not:
1060 ; SSE2-NEXT: por %xmm3, %xmm1
1061 ; SSE2-NEXT: por %xmm2, %xmm0
1062 ; SSE2-NEXT: por %xmm1, %xmm0
1063 ; SSE2-NEXT: pxor %xmm1, %xmm1
1064 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1065 ; SSE2-NEXT: movmskps %xmm1, %eax
1066 ; SSE2-NEXT: xorl $15, %eax
1067 ; SSE2-NEXT: setne %al
1070 ; SSE41-LABEL: allzeros_v8i64_not:
1072 ; SSE41-NEXT: por %xmm3, %xmm1
1073 ; SSE41-NEXT: por %xmm2, %xmm0
1074 ; SSE41-NEXT: por %xmm1, %xmm0
1075 ; SSE41-NEXT: ptest %xmm0, %xmm0
1076 ; SSE41-NEXT: setne %al
1079 ; AVX1-LABEL: allzeros_v8i64_not:
1081 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1082 ; AVX1-NEXT: vptest %ymm0, %ymm0
1083 ; AVX1-NEXT: setne %al
1084 ; AVX1-NEXT: vzeroupper
1087 ; AVX2-LABEL: allzeros_v8i64_not:
1089 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1090 ; AVX2-NEXT: vptest %ymm0, %ymm0
1091 ; AVX2-NEXT: setne %al
1092 ; AVX2-NEXT: vzeroupper
1095 ; AVX512-LABEL: allzeros_v8i64_not:
1097 ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
1098 ; AVX512-NEXT: kortestw %k0, %k0
1099 ; AVX512-NEXT: setne %al
1100 ; AVX512-NEXT: vzeroupper
1102 %1 = icmp eq <8 x i64> %a0, zeroinitializer
1103 %2 = bitcast <8 x i1> %1 to i8
1104 %3 = icmp ne i8 %2, -1
1108 define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
1109 ; SSE2-LABEL: allzeros_v16i8_and1:
1111 ; SSE2-NEXT: psllw $7, %xmm0
1112 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1113 ; SSE2-NEXT: testl %eax, %eax
1114 ; SSE2-NEXT: sete %al
1117 ; SSE41-LABEL: allzeros_v16i8_and1:
1119 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1120 ; SSE41-NEXT: sete %al
1123 ; AVX1OR2-LABEL: allzeros_v16i8_and1:
1125 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1126 ; AVX1OR2-NEXT: sete %al
1127 ; AVX1OR2-NEXT: retq
1129 ; KNL-LABEL: allzeros_v16i8_and1:
1131 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1132 ; KNL-NEXT: sete %al
1135 ; SKX-LABEL: allzeros_v16i8_and1:
1137 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
1138 ; SKX-NEXT: vptest %xmm1, %xmm0
1139 ; SKX-NEXT: sete %al
1141 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1142 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1143 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1144 %tmp3 = icmp eq i16 %tmp2, 0
1148 define i1 @allones_v32i8_and1(<32 x i8> %arg) {
1149 ; SSE-LABEL: allones_v32i8_and1:
1151 ; SSE-NEXT: pand %xmm1, %xmm0
1152 ; SSE-NEXT: psllw $7, %xmm0
1153 ; SSE-NEXT: pmovmskb %xmm0, %eax
1154 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1155 ; SSE-NEXT: sete %al
1158 ; AVX1-LABEL: allones_v32i8_and1:
1160 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1161 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1162 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1163 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1164 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1165 ; AVX1-NEXT: sete %al
1166 ; AVX1-NEXT: vzeroupper
1169 ; AVX2-LABEL: allones_v32i8_and1:
1171 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1172 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1173 ; AVX2-NEXT: cmpl $-1, %eax
1174 ; AVX2-NEXT: sete %al
1175 ; AVX2-NEXT: vzeroupper
1178 ; KNL-LABEL: allones_v32i8_and1:
1180 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1181 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1182 ; KNL-NEXT: cmpl $-1, %eax
1183 ; KNL-NEXT: sete %al
1184 ; KNL-NEXT: vzeroupper
1187 ; SKX-LABEL: allones_v32i8_and1:
1189 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1190 ; SKX-NEXT: kortestd %k0, %k0
1191 ; SKX-NEXT: setb %al
1192 ; SKX-NEXT: vzeroupper
1194 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1195 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1196 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1197 %tmp3 = icmp eq i32 %tmp2, -1
1201 define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
1202 ; SSE2-LABEL: allzeros_v32i8_and1:
1204 ; SSE2-NEXT: por %xmm1, %xmm0
1205 ; SSE2-NEXT: psllw $7, %xmm0
1206 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1207 ; SSE2-NEXT: testl %eax, %eax
1208 ; SSE2-NEXT: sete %al
1211 ; SSE41-LABEL: allzeros_v32i8_and1:
1213 ; SSE41-NEXT: por %xmm1, %xmm0
1214 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1215 ; SSE41-NEXT: sete %al
1218 ; AVX1-LABEL: allzeros_v32i8_and1:
1220 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1221 ; AVX1-NEXT: sete %al
1222 ; AVX1-NEXT: vzeroupper
1225 ; AVX2-LABEL: allzeros_v32i8_and1:
1227 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1228 ; AVX2-NEXT: vptest %ymm1, %ymm0
1229 ; AVX2-NEXT: sete %al
1230 ; AVX2-NEXT: vzeroupper
1233 ; AVX512-LABEL: allzeros_v32i8_and1:
1235 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1236 ; AVX512-NEXT: vptest %ymm1, %ymm0
1237 ; AVX512-NEXT: sete %al
1238 ; AVX512-NEXT: vzeroupper
1240 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1241 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1242 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1243 %tmp3 = icmp eq i32 %tmp2, 0
1247 define i1 @allones_v64i8_and1(<64 x i8> %arg) {
1248 ; SSE-LABEL: allones_v64i8_and1:
1250 ; SSE-NEXT: pand %xmm2, %xmm0
1251 ; SSE-NEXT: pand %xmm3, %xmm1
1252 ; SSE-NEXT: pand %xmm0, %xmm1
1253 ; SSE-NEXT: psllw $7, %xmm1
1254 ; SSE-NEXT: pmovmskb %xmm1, %eax
1255 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1256 ; SSE-NEXT: sete %al
1259 ; AVX1-LABEL: allones_v64i8_and1:
1261 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1262 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1263 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
1264 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1265 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
1266 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1267 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1268 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1269 ; AVX1-NEXT: sete %al
1270 ; AVX1-NEXT: vzeroupper
1273 ; AVX2-LABEL: allones_v64i8_and1:
1275 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1276 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1277 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1278 ; AVX2-NEXT: cmpl $-1, %eax
1279 ; AVX2-NEXT: sete %al
1280 ; AVX2-NEXT: vzeroupper
1283 ; KNL-LABEL: allones_v64i8_and1:
1285 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1286 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
1287 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1288 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1289 ; KNL-NEXT: cmpl $-1, %eax
1290 ; KNL-NEXT: sete %al
1291 ; KNL-NEXT: vzeroupper
1294 ; SKX-LABEL: allones_v64i8_and1:
1296 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1297 ; SKX-NEXT: kortestq %k0, %k0
1298 ; SKX-NEXT: setb %al
1299 ; SKX-NEXT: vzeroupper
1301 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1302 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1303 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1304 %tmp3 = icmp eq i64 %tmp2, -1
1308 define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
1309 ; SSE2-LABEL: allzeros_v64i8_and1:
1311 ; SSE2-NEXT: por %xmm3, %xmm1
1312 ; SSE2-NEXT: por %xmm2, %xmm0
1313 ; SSE2-NEXT: por %xmm1, %xmm0
1314 ; SSE2-NEXT: psllw $7, %xmm0
1315 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1316 ; SSE2-NEXT: testl %eax, %eax
1317 ; SSE2-NEXT: sete %al
1320 ; SSE41-LABEL: allzeros_v64i8_and1:
1322 ; SSE41-NEXT: por %xmm3, %xmm1
1323 ; SSE41-NEXT: por %xmm2, %xmm0
1324 ; SSE41-NEXT: por %xmm1, %xmm0
1325 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1326 ; SSE41-NEXT: sete %al
1329 ; AVX1-LABEL: allzeros_v64i8_and1:
1331 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1332 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1333 ; AVX1-NEXT: sete %al
1334 ; AVX1-NEXT: vzeroupper
1337 ; AVX2-LABEL: allzeros_v64i8_and1:
1339 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1340 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
1341 ; AVX2-NEXT: vptest %ymm1, %ymm0
1342 ; AVX2-NEXT: sete %al
1343 ; AVX2-NEXT: vzeroupper
1346 ; AVX512-LABEL: allzeros_v64i8_and1:
1348 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1349 ; AVX512-NEXT: kortestw %k0, %k0
1350 ; AVX512-NEXT: sete %al
1351 ; AVX512-NEXT: vzeroupper
1353 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1354 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1355 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1356 %tmp3 = icmp eq i64 %tmp2, 0
1360 define i1 @allones_v8i16_and1(<8 x i16> %arg) {
1361 ; SSE-LABEL: allones_v8i16_and1:
1363 ; SSE-NEXT: psllw $15, %xmm0
1364 ; SSE-NEXT: packsswb %xmm0, %xmm0
1365 ; SSE-NEXT: pmovmskb %xmm0, %eax
1366 ; SSE-NEXT: cmpb $-1, %al
1367 ; SSE-NEXT: sete %al
1370 ; AVX1OR2-LABEL: allones_v8i16_and1:
1372 ; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0
1373 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1374 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
1375 ; AVX1OR2-NEXT: cmpb $-1, %al
1376 ; AVX1OR2-NEXT: sete %al
1377 ; AVX1OR2-NEXT: retq
1379 ; KNL-LABEL: allones_v8i16_and1:
1381 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1382 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1383 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1384 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1385 ; KNL-NEXT: kmovw %k0, %eax
1386 ; KNL-NEXT: cmpb $-1, %al
1387 ; KNL-NEXT: sete %al
1388 ; KNL-NEXT: vzeroupper
1391 ; SKX-LABEL: allones_v8i16_and1:
1393 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1394 ; SKX-NEXT: kortestb %k0, %k0
1395 ; SKX-NEXT: setb %al
1397 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1398 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1399 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1400 %tmp3 = icmp eq i8 %tmp2, -1
1404 define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
1405 ; SSE2-LABEL: allzeros_v8i16_and1:
1407 ; SSE2-NEXT: psllw $7, %xmm0
1408 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1409 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
1410 ; SSE2-NEXT: sete %al
1413 ; SSE41-LABEL: allzeros_v8i16_and1:
1415 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1416 ; SSE41-NEXT: sete %al
1419 ; AVX1OR2-LABEL: allzeros_v8i16_and1:
1421 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1422 ; AVX1OR2-NEXT: sete %al
1423 ; AVX1OR2-NEXT: retq
1425 ; KNL-LABEL: allzeros_v8i16_and1:
1427 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1428 ; KNL-NEXT: sete %al
1431 ; SKX-LABEL: allzeros_v8i16_and1:
1433 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
1434 ; SKX-NEXT: vptest %xmm1, %xmm0
1435 ; SKX-NEXT: sete %al
1437 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1438 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1439 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1440 %tmp3 = icmp eq i8 %tmp2, 0
1444 define i1 @allones_v16i16_and1(<16 x i16> %arg) {
1445 ; SSE-LABEL: allones_v16i16_and1:
1447 ; SSE-NEXT: psllw $15, %xmm1
1448 ; SSE-NEXT: psllw $15, %xmm0
1449 ; SSE-NEXT: packsswb %xmm1, %xmm0
1450 ; SSE-NEXT: pmovmskb %xmm0, %eax
1451 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1452 ; SSE-NEXT: sete %al
1455 ; AVX1-LABEL: allones_v16i16_and1:
1457 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1458 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1459 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1460 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1461 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1462 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1463 ; AVX1-NEXT: sete %al
1464 ; AVX1-NEXT: vzeroupper
1467 ; AVX2-LABEL: allones_v16i16_and1:
1469 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1470 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1471 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1472 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1473 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1474 ; AVX2-NEXT: sete %al
1475 ; AVX2-NEXT: vzeroupper
1478 ; KNL-LABEL: allones_v16i16_and1:
1480 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1481 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1482 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1483 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1484 ; KNL-NEXT: kortestw %k0, %k0
1485 ; KNL-NEXT: setb %al
1486 ; KNL-NEXT: vzeroupper
1489 ; SKX-LABEL: allones_v16i16_and1:
1491 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
1492 ; SKX-NEXT: kortestw %k0, %k0
1493 ; SKX-NEXT: setb %al
1494 ; SKX-NEXT: vzeroupper
1496 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1497 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1498 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1499 %tmp3 = icmp eq i16 %tmp2, -1
1503 define i1 @allones_v32i16_and1(<32 x i16> %arg) {
1504 ; SSE-LABEL: allones_v32i16_and1:
1506 ; SSE-NEXT: pand %xmm3, %xmm1
1507 ; SSE-NEXT: psllw $15, %xmm1
1508 ; SSE-NEXT: pand %xmm2, %xmm0
1509 ; SSE-NEXT: psllw $15, %xmm0
1510 ; SSE-NEXT: packsswb %xmm1, %xmm0
1511 ; SSE-NEXT: pmovmskb %xmm0, %eax
1512 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1513 ; SSE-NEXT: sete %al
1516 ; AVX1-LABEL: allones_v32i16_and1:
1518 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1519 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1520 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
1521 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1522 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
1523 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1524 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1525 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1526 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1527 ; AVX1-NEXT: sete %al
1528 ; AVX1-NEXT: vzeroupper
1531 ; AVX2-LABEL: allones_v32i16_and1:
1533 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1534 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1535 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1536 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1537 ; AVX2-NEXT: cmpl $-1, %eax
1538 ; AVX2-NEXT: sete %al
1539 ; AVX2-NEXT: vzeroupper
1542 ; KNL-LABEL: allones_v32i16_and1:
1544 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm1
1545 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1546 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
1547 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1548 ; KNL-NEXT: kmovw %k0, %eax
1549 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1550 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1551 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1552 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1553 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1554 ; KNL-NEXT: kmovw %k0, %ecx
1555 ; KNL-NEXT: andl %eax, %ecx
1556 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
1557 ; KNL-NEXT: sete %al
1558 ; KNL-NEXT: vzeroupper
1561 ; SKX-LABEL: allones_v32i16_and1:
1563 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
1564 ; SKX-NEXT: kortestd %k0, %k0
1565 ; SKX-NEXT: setb %al
1566 ; SKX-NEXT: vzeroupper
1568 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1569 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1570 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1571 %tmp3 = icmp eq i32 %tmp2, -1
1575 define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
1576 ; SSE2-LABEL: allzeros_v32i16_and1:
1578 ; SSE2-NEXT: por %xmm3, %xmm1
1579 ; SSE2-NEXT: por %xmm2, %xmm0
1580 ; SSE2-NEXT: por %xmm1, %xmm0
1581 ; SSE2-NEXT: psllw $7, %xmm0
1582 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1583 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
1584 ; SSE2-NEXT: sete %al
1587 ; SSE41-LABEL: allzeros_v32i16_and1:
1589 ; SSE41-NEXT: por %xmm3, %xmm1
1590 ; SSE41-NEXT: por %xmm2, %xmm0
1591 ; SSE41-NEXT: por %xmm1, %xmm0
1592 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1593 ; SSE41-NEXT: sete %al
1596 ; AVX1-LABEL: allzeros_v32i16_and1:
1598 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1599 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1600 ; AVX1-NEXT: sete %al
1601 ; AVX1-NEXT: vzeroupper
1604 ; AVX2-LABEL: allzeros_v32i16_and1:
1606 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1607 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
1608 ; AVX2-NEXT: vptest %ymm1, %ymm0
1609 ; AVX2-NEXT: sete %al
1610 ; AVX2-NEXT: vzeroupper
1613 ; AVX512-LABEL: allzeros_v32i16_and1:
1615 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1616 ; AVX512-NEXT: kortestw %k0, %k0
1617 ; AVX512-NEXT: sete %al
1618 ; AVX512-NEXT: vzeroupper
1620 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1621 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1622 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1623 %tmp3 = icmp eq i32 %tmp2, 0
1627 define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
1628 ; SSE2-LABEL: allzeros_v16i16_and1:
1630 ; SSE2-NEXT: por %xmm1, %xmm0
1631 ; SSE2-NEXT: psllw $7, %xmm0
1632 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1633 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
1634 ; SSE2-NEXT: sete %al
1637 ; SSE41-LABEL: allzeros_v16i16_and1:
1639 ; SSE41-NEXT: por %xmm1, %xmm0
1640 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1641 ; SSE41-NEXT: sete %al
1644 ; AVX1-LABEL: allzeros_v16i16_and1:
1646 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1647 ; AVX1-NEXT: sete %al
1648 ; AVX1-NEXT: vzeroupper
1651 ; AVX2-LABEL: allzeros_v16i16_and1:
1653 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
1654 ; AVX2-NEXT: vptest %ymm1, %ymm0
1655 ; AVX2-NEXT: sete %al
1656 ; AVX2-NEXT: vzeroupper
1659 ; AVX512-LABEL: allzeros_v16i16_and1:
1661 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
1662 ; AVX512-NEXT: vptest %ymm1, %ymm0
1663 ; AVX512-NEXT: sete %al
1664 ; AVX512-NEXT: vzeroupper
1666 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1667 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1668 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1669 %tmp3 = icmp eq i16 %tmp2, 0
1673 define i1 @allones_v4i32_and1(<4 x i32> %arg) {
1674 ; SSE-LABEL: allones_v4i32_and1:
1676 ; SSE-NEXT: pslld $31, %xmm0
1677 ; SSE-NEXT: movmskps %xmm0, %eax
1678 ; SSE-NEXT: cmpl $15, %eax
1679 ; SSE-NEXT: sete %al
1682 ; AVX1OR2-LABEL: allones_v4i32_and1:
1684 ; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0
1685 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1686 ; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
1687 ; AVX1OR2-NEXT: setb %al
1688 ; AVX1OR2-NEXT: retq
1690 ; KNL-LABEL: allones_v4i32_and1:
1692 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1693 ; KNL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1694 ; KNL-NEXT: kmovw %k0, %eax
1695 ; KNL-NEXT: testb $15, %al
1696 ; KNL-NEXT: sete %al
1697 ; KNL-NEXT: vzeroupper
1700 ; SKX-LABEL: allones_v4i32_and1:
1702 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
1703 ; SKX-NEXT: kmovd %k0, %eax
1704 ; SKX-NEXT: cmpb $15, %al
1705 ; SKX-NEXT: sete %al
1707 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1708 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1709 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1710 %tmp3 = icmp eq i4 %tmp2, -1
1714 define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
1715 ; SSE2-LABEL: allzeros_v4i32_and1:
1717 ; SSE2-NEXT: pslld $31, %xmm0
1718 ; SSE2-NEXT: movmskps %xmm0, %eax
1719 ; SSE2-NEXT: testl %eax, %eax
1720 ; SSE2-NEXT: sete %al
1723 ; SSE41-LABEL: allzeros_v4i32_and1:
1725 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1726 ; SSE41-NEXT: sete %al
1729 ; AVX1OR2-LABEL: allzeros_v4i32_and1:
1731 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1732 ; AVX1OR2-NEXT: sete %al
1733 ; AVX1OR2-NEXT: retq
1735 ; KNL-LABEL: allzeros_v4i32_and1:
1737 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1738 ; KNL-NEXT: sete %al
1741 ; SKX-LABEL: allzeros_v4i32_and1:
1743 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
1744 ; SKX-NEXT: vptest %xmm1, %xmm0
1745 ; SKX-NEXT: sete %al
1747 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1748 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1749 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1750 %tmp3 = icmp eq i4 %tmp2, 0
1754 define i1 @allones_v8i32_and1(<8 x i32> %arg) {
1755 ; SSE-LABEL: allones_v8i32_and1:
1757 ; SSE-NEXT: pslld $31, %xmm1
1758 ; SSE-NEXT: pslld $31, %xmm0
1759 ; SSE-NEXT: packssdw %xmm1, %xmm0
1760 ; SSE-NEXT: packsswb %xmm0, %xmm0
1761 ; SSE-NEXT: pmovmskb %xmm0, %eax
1762 ; SSE-NEXT: cmpb $-1, %al
1763 ; SSE-NEXT: sete %al
1766 ; AVX1-LABEL: allones_v8i32_and1:
1768 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1769 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1770 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1771 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1772 ; AVX1-NEXT: vtestps %xmm1, %xmm0
1773 ; AVX1-NEXT: setb %al
1774 ; AVX1-NEXT: vzeroupper
1777 ; AVX2-LABEL: allones_v8i32_and1:
1779 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1780 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1781 ; AVX2-NEXT: vtestps %ymm1, %ymm0
1782 ; AVX2-NEXT: setb %al
1783 ; AVX2-NEXT: vzeroupper
1786 ; KNL-LABEL: allones_v8i32_and1:
1788 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1789 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1790 ; KNL-NEXT: kmovw %k0, %eax
1791 ; KNL-NEXT: cmpb $-1, %al
1792 ; KNL-NEXT: sete %al
1793 ; KNL-NEXT: vzeroupper
1796 ; SKX-LABEL: allones_v8i32_and1:
1798 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
1799 ; SKX-NEXT: kortestb %k0, %k0
1800 ; SKX-NEXT: setb %al
1801 ; SKX-NEXT: vzeroupper
1803 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1804 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
1805 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1806 %tmp3 = icmp eq i8 %tmp2, -1
1810 define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
1811 ; SSE2-LABEL: allzeros_v8i32_and1:
1813 ; SSE2-NEXT: por %xmm1, %xmm0
1814 ; SSE2-NEXT: pslld $31, %xmm0
1815 ; SSE2-NEXT: movmskps %xmm0, %eax
1816 ; SSE2-NEXT: testl %eax, %eax
1817 ; SSE2-NEXT: sete %al
1820 ; SSE41-LABEL: allzeros_v8i32_and1:
1822 ; SSE41-NEXT: por %xmm1, %xmm0
1823 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1824 ; SSE41-NEXT: sete %al
1827 ; AVX1-LABEL: allzeros_v8i32_and1:
1829 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1830 ; AVX1-NEXT: sete %al
1831 ; AVX1-NEXT: vzeroupper
1834 ; AVX2-LABEL: allzeros_v8i32_and1:
1836 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
1837 ; AVX2-NEXT: vptest %ymm1, %ymm0
1838 ; AVX2-NEXT: sete %al
1839 ; AVX2-NEXT: vzeroupper
1842 ; AVX512-LABEL: allzeros_v8i32_and1:
1844 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
1845 ; AVX512-NEXT: vptest %ymm1, %ymm0
1846 ; AVX512-NEXT: sete %al
1847 ; AVX512-NEXT: vzeroupper
1849 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1850 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
1851 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1852 %tmp3 = icmp eq i8 %tmp2, 0
1856 define i1 @allones_v16i32_and1(<16 x i32> %arg) {
1857 ; SSE-LABEL: allones_v16i32_and1:
1859 ; SSE-NEXT: pslld $31, %xmm3
1860 ; SSE-NEXT: pslld $31, %xmm2
1861 ; SSE-NEXT: packssdw %xmm3, %xmm2
1862 ; SSE-NEXT: pslld $31, %xmm1
1863 ; SSE-NEXT: pslld $31, %xmm0
1864 ; SSE-NEXT: packssdw %xmm1, %xmm0
1865 ; SSE-NEXT: packsswb %xmm2, %xmm0
1866 ; SSE-NEXT: pmovmskb %xmm0, %eax
1867 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1868 ; SSE-NEXT: sete %al
1871 ; AVX1-LABEL: allones_v16i32_and1:
1873 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1874 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
1875 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
1876 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1877 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1878 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
1879 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1880 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1881 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1882 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1883 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
1884 ; AVX1-NEXT: sete %al
1885 ; AVX1-NEXT: vzeroupper
1888 ; AVX2-LABEL: allones_v16i32_and1:
1890 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
1891 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
1892 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1893 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
1894 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1895 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1896 ; AVX2-NEXT: cmpl $-1, %eax
1897 ; AVX2-NEXT: sete %al
1898 ; AVX2-NEXT: vzeroupper
1901 ; AVX512-LABEL: allones_v16i32_and1:
1903 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1904 ; AVX512-NEXT: kortestw %k0, %k0
1905 ; AVX512-NEXT: setb %al
1906 ; AVX512-NEXT: vzeroupper
1908 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1909 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
1910 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1911 %tmp3 = icmp eq i16 %tmp2, -1
1915 define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
1916 ; SSE2-LABEL: allzeros_v16i32_and1:
1918 ; SSE2-NEXT: por %xmm3, %xmm1
1919 ; SSE2-NEXT: por %xmm2, %xmm0
1920 ; SSE2-NEXT: por %xmm1, %xmm0
1921 ; SSE2-NEXT: pslld $31, %xmm0
1922 ; SSE2-NEXT: movmskps %xmm0, %eax
1923 ; SSE2-NEXT: testl %eax, %eax
1924 ; SSE2-NEXT: sete %al
1927 ; SSE41-LABEL: allzeros_v16i32_and1:
1929 ; SSE41-NEXT: por %xmm3, %xmm1
1930 ; SSE41-NEXT: por %xmm2, %xmm0
1931 ; SSE41-NEXT: por %xmm1, %xmm0
1932 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1933 ; SSE41-NEXT: sete %al
1936 ; AVX1-LABEL: allzeros_v16i32_and1:
1938 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1939 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
1940 ; AVX1-NEXT: sete %al
1941 ; AVX1-NEXT: vzeroupper
1944 ; AVX2-LABEL: allzeros_v16i32_and1:
1946 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1947 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
1948 ; AVX2-NEXT: vptest %ymm1, %ymm0
1949 ; AVX2-NEXT: sete %al
1950 ; AVX2-NEXT: vzeroupper
1953 ; AVX512-LABEL: allzeros_v16i32_and1:
1955 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
1956 ; AVX512-NEXT: kortestw %k0, %k0
1957 ; AVX512-NEXT: sete %al
1958 ; AVX512-NEXT: vzeroupper
1960 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1961 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
1962 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1963 %tmp3 = icmp eq i16 %tmp2, 0
1967 define i1 @allones_v2i64_and1(<2 x i64> %arg) {
1968 ; SSE-LABEL: allones_v2i64_and1:
1970 ; SSE-NEXT: psllq $63, %xmm0
1971 ; SSE-NEXT: movmskpd %xmm0, %eax
1972 ; SSE-NEXT: cmpl $3, %eax
1973 ; SSE-NEXT: sete %al
1976 ; AVX1OR2-LABEL: allones_v2i64_and1:
1978 ; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0
1979 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1980 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
1981 ; AVX1OR2-NEXT: setb %al
1982 ; AVX1OR2-NEXT: retq
1984 ; KNL-LABEL: allones_v2i64_and1:
1986 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1987 ; KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
1988 ; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
1989 ; KNL-NEXT: kmovw %k0, %eax
1990 ; KNL-NEXT: testb $3, %al
1991 ; KNL-NEXT: sete %al
1992 ; KNL-NEXT: vzeroupper
1995 ; SKX-LABEL: allones_v2i64_and1:
1997 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
1998 ; SKX-NEXT: kmovd %k0, %eax
1999 ; SKX-NEXT: cmpb $3, %al
2000 ; SKX-NEXT: sete %al
2002 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2003 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2004 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2005 %tmp3 = icmp eq i2 %tmp2, -1
2009 define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
2010 ; SSE2-LABEL: allzeros_v2i64_and1:
2012 ; SSE2-NEXT: pslld $31, %xmm0
2013 ; SSE2-NEXT: movmskps %xmm0, %eax
2014 ; SSE2-NEXT: testb $5, %al
2015 ; SSE2-NEXT: sete %al
2018 ; SSE41-LABEL: allzeros_v2i64_and1:
2020 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2021 ; SSE41-NEXT: sete %al
2024 ; AVX1OR2-LABEL: allzeros_v2i64_and1:
2026 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2027 ; AVX1OR2-NEXT: sete %al
2028 ; AVX1OR2-NEXT: retq
2030 ; KNL-LABEL: allzeros_v2i64_and1:
2032 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2033 ; KNL-NEXT: sete %al
2036 ; SKX-LABEL: allzeros_v2i64_and1:
2038 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
2039 ; SKX-NEXT: vptest %xmm1, %xmm0
2040 ; SKX-NEXT: sete %al
2042 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2043 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2044 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2045 %tmp3 = icmp eq i2 %tmp2, 0
2049 define i1 @allones_v4i64_and1(<4 x i64> %arg) {
2050 ; SSE-LABEL: allones_v4i64_and1:
2052 ; SSE-NEXT: psllq $63, %xmm1
2053 ; SSE-NEXT: psllq $63, %xmm0
2054 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
2055 ; SSE-NEXT: movmskps %xmm0, %eax
2056 ; SSE-NEXT: cmpl $15, %eax
2057 ; SSE-NEXT: sete %al
2060 ; AVX1-LABEL: allones_v4i64_and1:
2062 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2063 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2064 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2065 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2066 ; AVX1-NEXT: vtestpd %xmm1, %xmm0
2067 ; AVX1-NEXT: setb %al
2068 ; AVX1-NEXT: vzeroupper
2071 ; AVX2-LABEL: allones_v4i64_and1:
2073 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2074 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
2075 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
2076 ; AVX2-NEXT: setb %al
2077 ; AVX2-NEXT: vzeroupper
2080 ; KNL-LABEL: allones_v4i64_and1:
2082 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2083 ; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2084 ; KNL-NEXT: kmovw %k0, %eax
2085 ; KNL-NEXT: testb $15, %al
2086 ; KNL-NEXT: sete %al
2087 ; KNL-NEXT: vzeroupper
2090 ; SKX-LABEL: allones_v4i64_and1:
2092 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
2093 ; SKX-NEXT: kmovd %k0, %eax
2094 ; SKX-NEXT: cmpb $15, %al
2095 ; SKX-NEXT: sete %al
2096 ; SKX-NEXT: vzeroupper
2098 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2099 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2100 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2101 %tmp3 = icmp eq i4 %tmp2, -1
2105 define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
2106 ; SSE2-LABEL: allzeros_v4i64_and1:
2108 ; SSE2-NEXT: por %xmm1, %xmm0
2109 ; SSE2-NEXT: pslld $31, %xmm0
2110 ; SSE2-NEXT: movmskps %xmm0, %eax
2111 ; SSE2-NEXT: testb $5, %al
2112 ; SSE2-NEXT: sete %al
2115 ; SSE41-LABEL: allzeros_v4i64_and1:
2117 ; SSE41-NEXT: por %xmm1, %xmm0
2118 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2119 ; SSE41-NEXT: sete %al
2122 ; AVX1-LABEL: allzeros_v4i64_and1:
2124 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2125 ; AVX1-NEXT: sete %al
2126 ; AVX1-NEXT: vzeroupper
2129 ; AVX2-LABEL: allzeros_v4i64_and1:
2131 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2132 ; AVX2-NEXT: vptest %ymm1, %ymm0
2133 ; AVX2-NEXT: sete %al
2134 ; AVX2-NEXT: vzeroupper
2137 ; AVX512-LABEL: allzeros_v4i64_and1:
2139 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2140 ; AVX512-NEXT: vptest %ymm1, %ymm0
2141 ; AVX512-NEXT: sete %al
2142 ; AVX512-NEXT: vzeroupper
2144 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2145 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2146 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2147 %tmp3 = icmp eq i4 %tmp2, 0
2151 define i1 @allones_v8i64_and1(<8 x i64> %arg) {
2152 ; SSE-LABEL: allones_v8i64_and1:
2154 ; SSE-NEXT: psllq $63, %xmm3
2155 ; SSE-NEXT: psllq $63, %xmm2
2156 ; SSE-NEXT: packssdw %xmm3, %xmm2
2157 ; SSE-NEXT: psllq $63, %xmm1
2158 ; SSE-NEXT: psllq $63, %xmm0
2159 ; SSE-NEXT: packssdw %xmm1, %xmm0
2160 ; SSE-NEXT: packssdw %xmm2, %xmm0
2161 ; SSE-NEXT: packsswb %xmm0, %xmm0
2162 ; SSE-NEXT: pmovmskb %xmm0, %eax
2163 ; SSE-NEXT: cmpb $-1, %al
2164 ; SSE-NEXT: sete %al
2167 ; AVX1-LABEL: allones_v8i64_and1:
2169 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm2
2170 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3
2171 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
2172 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2173 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2175 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2176 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2177 ; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm0
2178 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2179 ; AVX1-NEXT: vtestps %xmm1, %xmm0
2180 ; AVX1-NEXT: setb %al
2181 ; AVX1-NEXT: vzeroupper
2184 ; AVX2-LABEL: allones_v8i64_and1:
2186 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2187 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2188 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2189 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
2190 ; AVX2-NEXT: vtestps %ymm1, %ymm0
2191 ; AVX2-NEXT: setb %al
2192 ; AVX2-NEXT: vzeroupper
2195 ; KNL-LABEL: allones_v8i64_and1:
2197 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2198 ; KNL-NEXT: kmovw %k0, %eax
2199 ; KNL-NEXT: cmpb $-1, %al
2200 ; KNL-NEXT: sete %al
2201 ; KNL-NEXT: vzeroupper
2204 ; SKX-LABEL: allones_v8i64_and1:
2206 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
2207 ; SKX-NEXT: kortestb %k0, %k0
2208 ; SKX-NEXT: setb %al
2209 ; SKX-NEXT: vzeroupper
2211 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2212 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2213 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2214 %tmp3 = icmp eq i8 %tmp2, -1
2218 define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
2219 ; SSE2-LABEL: allzeros_v8i64_and1:
2221 ; SSE2-NEXT: por %xmm3, %xmm1
2222 ; SSE2-NEXT: por %xmm2, %xmm0
2223 ; SSE2-NEXT: por %xmm1, %xmm0
2224 ; SSE2-NEXT: pslld $31, %xmm0
2225 ; SSE2-NEXT: movmskps %xmm0, %eax
2226 ; SSE2-NEXT: testb $5, %al
2227 ; SSE2-NEXT: sete %al
2230 ; SSE41-LABEL: allzeros_v8i64_and1:
2232 ; SSE41-NEXT: por %xmm3, %xmm1
2233 ; SSE41-NEXT: por %xmm2, %xmm0
2234 ; SSE41-NEXT: por %xmm1, %xmm0
2235 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2236 ; SSE41-NEXT: sete %al
2239 ; AVX1-LABEL: allzeros_v8i64_and1:
2241 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2242 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2243 ; AVX1-NEXT: sete %al
2244 ; AVX1-NEXT: vzeroupper
2247 ; AVX2-LABEL: allzeros_v8i64_and1:
2249 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2250 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2251 ; AVX2-NEXT: vptest %ymm1, %ymm0
2252 ; AVX2-NEXT: sete %al
2253 ; AVX2-NEXT: vzeroupper
2256 ; AVX512-LABEL: allzeros_v8i64_and1:
2258 ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
2259 ; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
2260 ; AVX512-NEXT: kortestw %k0, %k0
2261 ; AVX512-NEXT: sete %al
2262 ; AVX512-NEXT: vzeroupper
2264 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2265 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2266 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2267 %tmp3 = icmp eq i8 %tmp2, 0
2271 define i1 @allones_v16i8_and4(<16 x i8> %arg) {
2272 ; SSE-LABEL: allones_v16i8_and4:
2274 ; SSE-NEXT: psllw $5, %xmm0
2275 ; SSE-NEXT: pmovmskb %xmm0, %eax
2276 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2277 ; SSE-NEXT: sete %al
2280 ; AVX1OR2-LABEL: allones_v16i8_and4:
2282 ; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0
2283 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2284 ; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2285 ; AVX1OR2-NEXT: sete %al
2286 ; AVX1OR2-NEXT: retq
2288 ; KNL-LABEL: allones_v16i8_and4:
2290 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2291 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2292 ; KNL-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2293 ; KNL-NEXT: sete %al
2296 ; SKX-LABEL: allones_v16i8_and4:
2298 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2299 ; SKX-NEXT: kortestw %k0, %k0
2300 ; SKX-NEXT: setb %al
2302 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2303 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2304 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2305 %tmp3 = icmp eq i16 %tmp2, -1
2309 define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
2310 ; SSE2-LABEL: allzeros_v16i8_and4:
2312 ; SSE2-NEXT: psllw $5, %xmm0
2313 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2314 ; SSE2-NEXT: testl %eax, %eax
2315 ; SSE2-NEXT: sete %al
2318 ; SSE41-LABEL: allzeros_v16i8_and4:
2320 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2321 ; SSE41-NEXT: sete %al
2324 ; AVX1OR2-LABEL: allzeros_v16i8_and4:
2326 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2327 ; AVX1OR2-NEXT: sete %al
2328 ; AVX1OR2-NEXT: retq
2330 ; KNL-LABEL: allzeros_v16i8_and4:
2332 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2333 ; KNL-NEXT: sete %al
2336 ; SKX-LABEL: allzeros_v16i8_and4:
2338 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [289360691352306692,289360691352306692]
2339 ; SKX-NEXT: vptest %xmm1, %xmm0
2340 ; SKX-NEXT: sete %al
2342 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2343 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2344 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2345 %tmp3 = icmp eq i16 %tmp2, 0
2349 define i1 @allones_v32i8_and4(<32 x i8> %arg) {
2350 ; SSE-LABEL: allones_v32i8_and4:
2352 ; SSE-NEXT: pand %xmm1, %xmm0
2353 ; SSE-NEXT: psllw $5, %xmm0
2354 ; SSE-NEXT: pmovmskb %xmm0, %eax
2355 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2356 ; SSE-NEXT: sete %al
2359 ; AVX1-LABEL: allones_v32i8_and4:
2361 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2362 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2363 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2364 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2365 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2366 ; AVX1-NEXT: sete %al
2367 ; AVX1-NEXT: vzeroupper
2370 ; AVX2-LABEL: allones_v32i8_and4:
2372 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2373 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2374 ; AVX2-NEXT: cmpl $-1, %eax
2375 ; AVX2-NEXT: sete %al
2376 ; AVX2-NEXT: vzeroupper
2379 ; KNL-LABEL: allones_v32i8_and4:
2381 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2382 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2383 ; KNL-NEXT: cmpl $-1, %eax
2384 ; KNL-NEXT: sete %al
2385 ; KNL-NEXT: vzeroupper
2388 ; SKX-LABEL: allones_v32i8_and4:
2390 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2391 ; SKX-NEXT: kortestd %k0, %k0
2392 ; SKX-NEXT: setb %al
2393 ; SKX-NEXT: vzeroupper
2395 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2396 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2397 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2398 %tmp3 = icmp eq i32 %tmp2, -1
2402 define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
2403 ; SSE2-LABEL: allzeros_v32i8_and4:
2405 ; SSE2-NEXT: por %xmm1, %xmm0
2406 ; SSE2-NEXT: psllw $5, %xmm0
2407 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2408 ; SSE2-NEXT: testl %eax, %eax
2409 ; SSE2-NEXT: sete %al
2412 ; SSE41-LABEL: allzeros_v32i8_and4:
2414 ; SSE41-NEXT: por %xmm1, %xmm0
2415 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2416 ; SSE41-NEXT: sete %al
2419 ; AVX1-LABEL: allzeros_v32i8_and4:
2421 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2422 ; AVX1-NEXT: sete %al
2423 ; AVX1-NEXT: vzeroupper
2426 ; AVX2-LABEL: allzeros_v32i8_and4:
2428 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
2429 ; AVX2-NEXT: vptest %ymm1, %ymm0
2430 ; AVX2-NEXT: sete %al
2431 ; AVX2-NEXT: vzeroupper
2434 ; AVX512-LABEL: allzeros_v32i8_and4:
2436 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
2437 ; AVX512-NEXT: vptest %ymm1, %ymm0
2438 ; AVX512-NEXT: sete %al
2439 ; AVX512-NEXT: vzeroupper
2441 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2442 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2443 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2444 %tmp3 = icmp eq i32 %tmp2, 0
2448 define i1 @allones_v64i8_and4(<64 x i8> %arg) {
2449 ; SSE-LABEL: allones_v64i8_and4:
2451 ; SSE-NEXT: pand %xmm2, %xmm0
2452 ; SSE-NEXT: pand %xmm3, %xmm1
2453 ; SSE-NEXT: pand %xmm0, %xmm1
2454 ; SSE-NEXT: psllw $5, %xmm1
2455 ; SSE-NEXT: pmovmskb %xmm1, %eax
2456 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2457 ; SSE-NEXT: sete %al
2460 ; AVX1-LABEL: allones_v64i8_and4:
2462 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2463 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2464 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
2465 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2466 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
2467 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2468 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2469 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2470 ; AVX1-NEXT: sete %al
2471 ; AVX1-NEXT: vzeroupper
2474 ; AVX2-LABEL: allones_v64i8_and4:
2476 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
2477 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2478 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2479 ; AVX2-NEXT: cmpl $-1, %eax
2480 ; AVX2-NEXT: sete %al
2481 ; AVX2-NEXT: vzeroupper
2484 ; KNL-LABEL: allones_v64i8_and4:
2486 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2487 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
2488 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2489 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2490 ; KNL-NEXT: cmpl $-1, %eax
2491 ; KNL-NEXT: sete %al
2492 ; KNL-NEXT: vzeroupper
2495 ; SKX-LABEL: allones_v64i8_and4:
2497 ; SKX-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2498 ; SKX-NEXT: kortestq %k0, %k0
2499 ; SKX-NEXT: setb %al
2500 ; SKX-NEXT: vzeroupper
2502 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2503 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2504 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2505 %tmp3 = icmp eq i64 %tmp2, -1
2509 define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
2510 ; SSE2-LABEL: allzeros_v64i8_and4:
2512 ; SSE2-NEXT: por %xmm3, %xmm1
2513 ; SSE2-NEXT: por %xmm2, %xmm0
2514 ; SSE2-NEXT: por %xmm1, %xmm0
2515 ; SSE2-NEXT: psllw $5, %xmm0
2516 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2517 ; SSE2-NEXT: testl %eax, %eax
2518 ; SSE2-NEXT: sete %al
2521 ; SSE41-LABEL: allzeros_v64i8_and4:
2523 ; SSE41-NEXT: por %xmm3, %xmm1
2524 ; SSE41-NEXT: por %xmm2, %xmm0
2525 ; SSE41-NEXT: por %xmm1, %xmm0
2526 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2527 ; SSE41-NEXT: sete %al
2530 ; AVX1-LABEL: allzeros_v64i8_and4:
2532 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2533 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2534 ; AVX1-NEXT: sete %al
2535 ; AVX1-NEXT: vzeroupper
2538 ; AVX2-LABEL: allzeros_v64i8_and4:
2540 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2541 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
2542 ; AVX2-NEXT: vptest %ymm1, %ymm0
2543 ; AVX2-NEXT: sete %al
2544 ; AVX2-NEXT: vzeroupper
2547 ; AVX512-LABEL: allzeros_v64i8_and4:
2549 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2550 ; AVX512-NEXT: kortestw %k0, %k0
2551 ; AVX512-NEXT: sete %al
2552 ; AVX512-NEXT: vzeroupper
2554 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2555 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2556 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2557 %tmp3 = icmp eq i64 %tmp2, 0
2561 define i1 @allones_v8i16_and4(<8 x i16> %arg) {
2562 ; SSE-LABEL: allones_v8i16_and4:
2564 ; SSE-NEXT: psllw $13, %xmm0
2565 ; SSE-NEXT: packsswb %xmm0, %xmm0
2566 ; SSE-NEXT: pmovmskb %xmm0, %eax
2567 ; SSE-NEXT: cmpb $-1, %al
2568 ; SSE-NEXT: sete %al
2571 ; AVX1OR2-LABEL: allones_v8i16_and4:
2573 ; AVX1OR2-NEXT: vpsllw $13, %xmm0, %xmm0
2574 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2575 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
2576 ; AVX1OR2-NEXT: cmpb $-1, %al
2577 ; AVX1OR2-NEXT: sete %al
2578 ; AVX1OR2-NEXT: retq
2580 ; KNL-LABEL: allones_v8i16_and4:
2582 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
2583 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
2584 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
2585 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
2586 ; KNL-NEXT: kmovw %k0, %eax
2587 ; KNL-NEXT: cmpb $-1, %al
2588 ; KNL-NEXT: sete %al
2589 ; KNL-NEXT: vzeroupper
2592 ; SKX-LABEL: allones_v8i16_and4:
2594 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2595 ; SKX-NEXT: kortestb %k0, %k0
2596 ; SKX-NEXT: setb %al
2598 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2599 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
2600 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2601 %tmp3 = icmp eq i8 %tmp2, -1
2605 define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
2606 ; SSE2-LABEL: allzeros_v8i16_and4:
2608 ; SSE2-NEXT: psllw $5, %xmm0
2609 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2610 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
2611 ; SSE2-NEXT: sete %al
2614 ; SSE41-LABEL: allzeros_v8i16_and4:
2616 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2617 ; SSE41-NEXT: sete %al
2620 ; AVX1OR2-LABEL: allzeros_v8i16_and4:
2622 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2623 ; AVX1OR2-NEXT: sete %al
2624 ; AVX1OR2-NEXT: retq
2626 ; KNL-LABEL: allzeros_v8i16_and4:
2628 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2629 ; KNL-NEXT: sete %al
2632 ; SKX-LABEL: allzeros_v8i16_and4:
2634 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1125917086973956,1125917086973956]
2635 ; SKX-NEXT: vptest %xmm1, %xmm0
2636 ; SKX-NEXT: sete %al
2638 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2639 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
2640 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2641 %tmp3 = icmp eq i8 %tmp2, 0
2645 define i1 @allones_v16i16_and4(<16 x i16> %arg) {
2646 ; SSE-LABEL: allones_v16i16_and4:
2648 ; SSE-NEXT: psllw $13, %xmm1
2649 ; SSE-NEXT: psllw $13, %xmm0
2650 ; SSE-NEXT: packsswb %xmm1, %xmm0
2651 ; SSE-NEXT: pmovmskb %xmm0, %eax
2652 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2653 ; SSE-NEXT: sete %al
2656 ; AVX1-LABEL: allones_v16i16_and4:
2658 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2659 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
2660 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
2661 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2662 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2663 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2664 ; AVX1-NEXT: sete %al
2665 ; AVX1-NEXT: vzeroupper
2668 ; AVX2-LABEL: allones_v16i16_and4:
2670 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
2671 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2672 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2673 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2674 ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2675 ; AVX2-NEXT: sete %al
2676 ; AVX2-NEXT: vzeroupper
2679 ; KNL-LABEL: allones_v16i16_and4:
2681 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
2682 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
2683 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2684 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2685 ; KNL-NEXT: kortestw %k0, %k0
2686 ; KNL-NEXT: setb %al
2687 ; KNL-NEXT: vzeroupper
2690 ; SKX-LABEL: allones_v16i16_and4:
2692 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
2693 ; SKX-NEXT: kortestw %k0, %k0
2694 ; SKX-NEXT: setb %al
2695 ; SKX-NEXT: vzeroupper
2697 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2698 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
2699 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2700 %tmp3 = icmp eq i16 %tmp2, -1
2704 define i1 @allones_v32i16_and4(<32 x i16> %arg) {
2705 ; SSE-LABEL: allones_v32i16_and4:
2707 ; SSE-NEXT: pand %xmm3, %xmm1
2708 ; SSE-NEXT: psllw $13, %xmm1
2709 ; SSE-NEXT: pand %xmm2, %xmm0
2710 ; SSE-NEXT: psllw $13, %xmm0
2711 ; SSE-NEXT: packsswb %xmm1, %xmm0
2712 ; SSE-NEXT: pmovmskb %xmm0, %eax
2713 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2714 ; SSE-NEXT: sete %al
2717 ; AVX1-LABEL: allones_v32i16_and4:
2719 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2720 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2721 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
2722 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
2723 ; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
2724 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
2725 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
2726 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2727 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
2728 ; AVX1-NEXT: sete %al
2729 ; AVX1-NEXT: vzeroupper
2732 ; AVX2-LABEL: allones_v32i16_and4:
2734 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
2735 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
2736 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
2737 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2738 ; AVX2-NEXT: cmpl $-1, %eax
2739 ; AVX2-NEXT: sete %al
2740 ; AVX2-NEXT: vzeroupper
2743 ; KNL-LABEL: allones_v32i16_and4:
2745 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm1
2746 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
2747 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
2748 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2749 ; KNL-NEXT: kmovw %k0, %eax
2750 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
2751 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
2752 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
2753 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2754 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2755 ; KNL-NEXT: kmovw %k0, %ecx
2756 ; KNL-NEXT: andl %eax, %ecx
2757 ; KNL-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
2758 ; KNL-NEXT: sete %al
2759 ; KNL-NEXT: vzeroupper
2762 ; SKX-LABEL: allones_v32i16_and4:
2764 ; SKX-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
2765 ; SKX-NEXT: kortestd %k0, %k0
2766 ; SKX-NEXT: setb %al
2767 ; SKX-NEXT: vzeroupper
2769 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2770 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
2771 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2772 %tmp3 = icmp eq i32 %tmp2, -1
2776 define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
2777 ; SSE2-LABEL: allzeros_v32i16_and4:
2779 ; SSE2-NEXT: por %xmm3, %xmm1
2780 ; SSE2-NEXT: por %xmm2, %xmm0
2781 ; SSE2-NEXT: por %xmm1, %xmm0
2782 ; SSE2-NEXT: psllw $5, %xmm0
2783 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2784 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
2785 ; SSE2-NEXT: sete %al
2788 ; SSE41-LABEL: allzeros_v32i16_and4:
2790 ; SSE41-NEXT: por %xmm3, %xmm1
2791 ; SSE41-NEXT: por %xmm2, %xmm0
2792 ; SSE41-NEXT: por %xmm1, %xmm0
2793 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2794 ; SSE41-NEXT: sete %al
2797 ; AVX1-LABEL: allzeros_v32i16_and4:
2799 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2800 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2801 ; AVX1-NEXT: sete %al
2802 ; AVX1-NEXT: vzeroupper
2805 ; AVX2-LABEL: allzeros_v32i16_and4:
2807 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
2808 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
2809 ; AVX2-NEXT: vptest %ymm1, %ymm0
2810 ; AVX2-NEXT: sete %al
2811 ; AVX2-NEXT: vzeroupper
2814 ; AVX512-LABEL: allzeros_v32i16_and4:
2816 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2817 ; AVX512-NEXT: kortestw %k0, %k0
2818 ; AVX512-NEXT: sete %al
2819 ; AVX512-NEXT: vzeroupper
2821 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2822 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
2823 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2824 %tmp3 = icmp eq i32 %tmp2, 0
2828 define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
2829 ; SSE2-LABEL: allzeros_v16i16_and4:
2831 ; SSE2-NEXT: por %xmm1, %xmm0
2832 ; SSE2-NEXT: psllw $5, %xmm0
2833 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2834 ; SSE2-NEXT: testl $21845, %eax # imm = 0x5555
2835 ; SSE2-NEXT: sete %al
2838 ; SSE41-LABEL: allzeros_v16i16_and4:
2840 ; SSE41-NEXT: por %xmm1, %xmm0
2841 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2842 ; SSE41-NEXT: sete %al
2845 ; AVX1-LABEL: allzeros_v16i16_and4:
2847 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
2848 ; AVX1-NEXT: sete %al
2849 ; AVX1-NEXT: vzeroupper
2852 ; AVX2-LABEL: allzeros_v16i16_and4:
2854 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
2855 ; AVX2-NEXT: vptest %ymm1, %ymm0
2856 ; AVX2-NEXT: sete %al
2857 ; AVX2-NEXT: vzeroupper
2860 ; AVX512-LABEL: allzeros_v16i16_and4:
2862 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
2863 ; AVX512-NEXT: vptest %ymm1, %ymm0
2864 ; AVX512-NEXT: sete %al
2865 ; AVX512-NEXT: vzeroupper
2867 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2868 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
2869 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2870 %tmp3 = icmp eq i16 %tmp2, 0
2874 define i1 @allones_v4i32_and4(<4 x i32> %arg) {
2875 ; SSE-LABEL: allones_v4i32_and4:
2877 ; SSE-NEXT: pslld $29, %xmm0
2878 ; SSE-NEXT: movmskps %xmm0, %eax
2879 ; SSE-NEXT: cmpl $15, %eax
2880 ; SSE-NEXT: sete %al
2883 ; AVX1OR2-LABEL: allones_v4i32_and4:
2885 ; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0
2886 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2887 ; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
2888 ; AVX1OR2-NEXT: setb %al
2889 ; AVX1OR2-NEXT: retq
2891 ; KNL-LABEL: allones_v4i32_and4:
2893 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2894 ; KNL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2895 ; KNL-NEXT: kmovw %k0, %eax
2896 ; KNL-NEXT: testb $15, %al
2897 ; KNL-NEXT: sete %al
2898 ; KNL-NEXT: vzeroupper
2901 ; SKX-LABEL: allones_v4i32_and4:
2903 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
2904 ; SKX-NEXT: kmovd %k0, %eax
2905 ; SKX-NEXT: cmpb $15, %al
2906 ; SKX-NEXT: sete %al
2908 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
2909 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
2910 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2911 %tmp3 = icmp eq i4 %tmp2, -1
2915 define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
2916 ; SSE2-LABEL: allzeros_v4i32_and4:
2918 ; SSE2-NEXT: pslld $29, %xmm0
2919 ; SSE2-NEXT: movmskps %xmm0, %eax
2920 ; SSE2-NEXT: testl %eax, %eax
2921 ; SSE2-NEXT: sete %al
2924 ; SSE41-LABEL: allzeros_v4i32_and4:
2926 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2927 ; SSE41-NEXT: sete %al
2930 ; AVX1OR2-LABEL: allzeros_v4i32_and4:
2932 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2933 ; AVX1OR2-NEXT: sete %al
2934 ; AVX1OR2-NEXT: retq
2936 ; KNL-LABEL: allzeros_v4i32_and4:
2938 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2939 ; KNL-NEXT: sete %al
2942 ; SKX-LABEL: allzeros_v4i32_and4:
2944 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17179869188,17179869188]
2945 ; SKX-NEXT: vptest %xmm1, %xmm0
2946 ; SKX-NEXT: sete %al
2948 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
2949 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
2950 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2951 %tmp3 = icmp eq i4 %tmp2, 0
2955 define i1 @allones_v8i32_and4(<8 x i32> %arg) {
2956 ; SSE-LABEL: allones_v8i32_and4:
2958 ; SSE-NEXT: pslld $29, %xmm1
2959 ; SSE-NEXT: pslld $29, %xmm0
2960 ; SSE-NEXT: packssdw %xmm1, %xmm0
2961 ; SSE-NEXT: packsswb %xmm0, %xmm0
2962 ; SSE-NEXT: pmovmskb %xmm0, %eax
2963 ; SSE-NEXT: cmpb $-1, %al
2964 ; SSE-NEXT: sete %al
2967 ; AVX1-LABEL: allones_v8i32_and4:
2969 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2970 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2971 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
2972 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
2973 ; AVX1-NEXT: vtestps %xmm1, %xmm0
2974 ; AVX1-NEXT: setb %al
2975 ; AVX1-NEXT: vzeroupper
2978 ; AVX2-LABEL: allones_v8i32_and4:
2980 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
2981 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
2982 ; AVX2-NEXT: vtestps %ymm1, %ymm0
2983 ; AVX2-NEXT: setb %al
2984 ; AVX2-NEXT: vzeroupper
2987 ; KNL-LABEL: allones_v8i32_and4:
2989 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2990 ; KNL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2991 ; KNL-NEXT: kmovw %k0, %eax
2992 ; KNL-NEXT: cmpb $-1, %al
2993 ; KNL-NEXT: sete %al
2994 ; KNL-NEXT: vzeroupper
2997 ; SKX-LABEL: allones_v8i32_and4:
2999 ; SKX-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
3000 ; SKX-NEXT: kortestb %k0, %k0
3001 ; SKX-NEXT: setb %al
3002 ; SKX-NEXT: vzeroupper
3004 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3005 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3006 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3007 %tmp3 = icmp eq i8 %tmp2, -1
3011 define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
3012 ; SSE2-LABEL: allzeros_v8i32_and4:
3014 ; SSE2-NEXT: por %xmm1, %xmm0
3015 ; SSE2-NEXT: pslld $29, %xmm0
3016 ; SSE2-NEXT: movmskps %xmm0, %eax
3017 ; SSE2-NEXT: testl %eax, %eax
3018 ; SSE2-NEXT: sete %al
3021 ; SSE41-LABEL: allzeros_v8i32_and4:
3023 ; SSE41-NEXT: por %xmm1, %xmm0
3024 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3025 ; SSE41-NEXT: sete %al
3028 ; AVX1-LABEL: allzeros_v8i32_and4:
3030 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3031 ; AVX1-NEXT: sete %al
3032 ; AVX1-NEXT: vzeroupper
3035 ; AVX2-LABEL: allzeros_v8i32_and4:
3037 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
3038 ; AVX2-NEXT: vptest %ymm1, %ymm0
3039 ; AVX2-NEXT: sete %al
3040 ; AVX2-NEXT: vzeroupper
3043 ; AVX512-LABEL: allzeros_v8i32_and4:
3045 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
3046 ; AVX512-NEXT: vptest %ymm1, %ymm0
3047 ; AVX512-NEXT: sete %al
3048 ; AVX512-NEXT: vzeroupper
3050 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3051 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3052 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3053 %tmp3 = icmp eq i8 %tmp2, 0
3057 define i1 @allones_v16i32_and4(<16 x i32> %arg) {
3058 ; SSE-LABEL: allones_v16i32_and4:
3060 ; SSE-NEXT: pslld $29, %xmm3
3061 ; SSE-NEXT: pslld $29, %xmm2
3062 ; SSE-NEXT: packssdw %xmm3, %xmm2
3063 ; SSE-NEXT: pslld $29, %xmm1
3064 ; SSE-NEXT: pslld $29, %xmm0
3065 ; SSE-NEXT: packssdw %xmm1, %xmm0
3066 ; SSE-NEXT: packsswb %xmm2, %xmm0
3067 ; SSE-NEXT: pmovmskb %xmm0, %eax
3068 ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
3069 ; SSE-NEXT: sete %al
3072 ; AVX1-LABEL: allones_v16i32_and4:
3074 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3075 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3076 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3077 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3078 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3079 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3080 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3081 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3082 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3083 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3084 ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
3085 ; AVX1-NEXT: sete %al
3086 ; AVX1-NEXT: vzeroupper
3089 ; AVX2-LABEL: allones_v16i32_and4:
3091 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3092 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3093 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3094 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3095 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3096 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3097 ; AVX2-NEXT: cmpl $-1, %eax
3098 ; AVX2-NEXT: sete %al
3099 ; AVX2-NEXT: vzeroupper
3102 ; AVX512-LABEL: allones_v16i32_and4:
3104 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3105 ; AVX512-NEXT: kortestw %k0, %k0
3106 ; AVX512-NEXT: setb %al
3107 ; AVX512-NEXT: vzeroupper
3109 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3110 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3111 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3112 %tmp3 = icmp eq i16 %tmp2, -1
3116 define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
3117 ; SSE2-LABEL: allzeros_v16i32_and4:
3119 ; SSE2-NEXT: por %xmm3, %xmm1
3120 ; SSE2-NEXT: por %xmm2, %xmm0
3121 ; SSE2-NEXT: por %xmm1, %xmm0
3122 ; SSE2-NEXT: pslld $29, %xmm0
3123 ; SSE2-NEXT: movmskps %xmm0, %eax
3124 ; SSE2-NEXT: testl %eax, %eax
3125 ; SSE2-NEXT: sete %al
3128 ; SSE41-LABEL: allzeros_v16i32_and4:
3130 ; SSE41-NEXT: por %xmm3, %xmm1
3131 ; SSE41-NEXT: por %xmm2, %xmm0
3132 ; SSE41-NEXT: por %xmm1, %xmm0
3133 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3134 ; SSE41-NEXT: sete %al
3137 ; AVX1-LABEL: allzeros_v16i32_and4:
3139 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
3140 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3141 ; AVX1-NEXT: sete %al
3142 ; AVX1-NEXT: vzeroupper
3145 ; AVX2-LABEL: allzeros_v16i32_and4:
3147 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
3148 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
3149 ; AVX2-NEXT: vptest %ymm1, %ymm0
3150 ; AVX2-NEXT: sete %al
3151 ; AVX2-NEXT: vzeroupper
3154 ; AVX512-LABEL: allzeros_v16i32_and4:
3156 ; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
3157 ; AVX512-NEXT: kortestw %k0, %k0
3158 ; AVX512-NEXT: sete %al
3159 ; AVX512-NEXT: vzeroupper
3161 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3162 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3163 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3164 %tmp3 = icmp eq i16 %tmp2, 0
3168 define i1 @allones_v2i64_and4(<2 x i64> %arg) {
3169 ; SSE-LABEL: allones_v2i64_and4:
3171 ; SSE-NEXT: psllq $61, %xmm0
3172 ; SSE-NEXT: movmskpd %xmm0, %eax
3173 ; SSE-NEXT: cmpl $3, %eax
3174 ; SSE-NEXT: sete %al
3177 ; AVX1OR2-LABEL: allones_v2i64_and4:
3179 ; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0
3180 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3181 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
3182 ; AVX1OR2-NEXT: setb %al
3183 ; AVX1OR2-NEXT: retq
3185 ; KNL-LABEL: allones_v2i64_and4:
3187 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3188 ; KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
3189 ; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
3190 ; KNL-NEXT: kmovw %k0, %eax
3191 ; KNL-NEXT: testb $3, %al
3192 ; KNL-NEXT: sete %al
3193 ; KNL-NEXT: vzeroupper
3196 ; SKX-LABEL: allones_v2i64_and4:
3198 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
3199 ; SKX-NEXT: kmovd %k0, %eax
3200 ; SKX-NEXT: cmpb $3, %al
3201 ; SKX-NEXT: sete %al
3203 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3204 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3205 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3206 %tmp3 = icmp eq i2 %tmp2, -1
3210 define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
3211 ; SSE2-LABEL: allzeros_v2i64_and4:
3213 ; SSE2-NEXT: pslld $29, %xmm0
3214 ; SSE2-NEXT: movmskps %xmm0, %eax
3215 ; SSE2-NEXT: testb $5, %al
3216 ; SSE2-NEXT: sete %al
3219 ; SSE41-LABEL: allzeros_v2i64_and4:
3221 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3222 ; SSE41-NEXT: sete %al
3225 ; AVX1OR2-LABEL: allzeros_v2i64_and4:
3227 ; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3228 ; AVX1OR2-NEXT: sete %al
3229 ; AVX1OR2-NEXT: retq
3231 ; KNL-LABEL: allzeros_v2i64_and4:
3233 ; KNL-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3234 ; KNL-NEXT: sete %al
3237 ; SKX-LABEL: allzeros_v2i64_and4:
3239 ; SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
3240 ; SKX-NEXT: vptest %xmm1, %xmm0
3241 ; SKX-NEXT: sete %al
3243 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3244 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3245 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3246 %tmp3 = icmp eq i2 %tmp2, 0
3250 define i1 @allones_v4i64_and4(<4 x i64> %arg) {
3251 ; SSE-LABEL: allones_v4i64_and4:
3253 ; SSE-NEXT: psllq $61, %xmm1
3254 ; SSE-NEXT: psllq $61, %xmm0
3255 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
3256 ; SSE-NEXT: movmskps %xmm0, %eax
3257 ; SSE-NEXT: cmpl $15, %eax
3258 ; SSE-NEXT: sete %al
3261 ; AVX1-LABEL: allones_v4i64_and4:
3263 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3264 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3265 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3266 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3267 ; AVX1-NEXT: vtestpd %xmm1, %xmm0
3268 ; AVX1-NEXT: setb %al
3269 ; AVX1-NEXT: vzeroupper
3272 ; AVX2-LABEL: allones_v4i64_and4:
3274 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3275 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
3276 ; AVX2-NEXT: vtestpd %ymm1, %ymm0
3277 ; AVX2-NEXT: setb %al
3278 ; AVX2-NEXT: vzeroupper
3281 ; KNL-LABEL: allones_v4i64_and4:
3283 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3284 ; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3285 ; KNL-NEXT: kmovw %k0, %eax
3286 ; KNL-NEXT: testb $15, %al
3287 ; KNL-NEXT: sete %al
3288 ; KNL-NEXT: vzeroupper
3291 ; SKX-LABEL: allones_v4i64_and4:
3293 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
3294 ; SKX-NEXT: kmovd %k0, %eax
3295 ; SKX-NEXT: cmpb $15, %al
3296 ; SKX-NEXT: sete %al
3297 ; SKX-NEXT: vzeroupper
3299 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3300 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3301 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3302 %tmp3 = icmp eq i4 %tmp2, -1
3306 define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
3307 ; SSE2-LABEL: allzeros_v4i64_and4:
3309 ; SSE2-NEXT: por %xmm1, %xmm0
3310 ; SSE2-NEXT: pslld $29, %xmm0
3311 ; SSE2-NEXT: movmskps %xmm0, %eax
3312 ; SSE2-NEXT: testb $5, %al
3313 ; SSE2-NEXT: sete %al
3316 ; SSE41-LABEL: allzeros_v4i64_and4:
3318 ; SSE41-NEXT: por %xmm1, %xmm0
3319 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3320 ; SSE41-NEXT: sete %al
3323 ; AVX1-LABEL: allzeros_v4i64_and4:
3325 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3326 ; AVX1-NEXT: sete %al
3327 ; AVX1-NEXT: vzeroupper
3330 ; AVX2-LABEL: allzeros_v4i64_and4:
3332 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
3333 ; AVX2-NEXT: vptest %ymm1, %ymm0
3334 ; AVX2-NEXT: sete %al
3335 ; AVX2-NEXT: vzeroupper
3338 ; AVX512-LABEL: allzeros_v4i64_and4:
3340 ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
3341 ; AVX512-NEXT: vptest %ymm1, %ymm0
3342 ; AVX512-NEXT: sete %al
3343 ; AVX512-NEXT: vzeroupper
3345 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3346 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3347 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3348 %tmp3 = icmp eq i4 %tmp2, 0
3352 define i1 @allones_v8i64_and4(<8 x i64> %arg) {
3353 ; SSE-LABEL: allones_v8i64_and4:
3355 ; SSE-NEXT: psllq $61, %xmm3
3356 ; SSE-NEXT: psllq $61, %xmm2
3357 ; SSE-NEXT: packssdw %xmm3, %xmm2
3358 ; SSE-NEXT: psllq $61, %xmm1
3359 ; SSE-NEXT: psllq $61, %xmm0
3360 ; SSE-NEXT: packssdw %xmm1, %xmm0
3361 ; SSE-NEXT: packssdw %xmm2, %xmm0
3362 ; SSE-NEXT: packsswb %xmm0, %xmm0
3363 ; SSE-NEXT: pmovmskb %xmm0, %eax
3364 ; SSE-NEXT: cmpb $-1, %al
3365 ; SSE-NEXT: sete %al
3368 ; AVX1-LABEL: allones_v8i64_and4:
3370 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm2
3371 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm3
3372 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
3373 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3374 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3375 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3376 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3377 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3378 ; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm0
3379 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3380 ; AVX1-NEXT: vtestps %xmm1, %xmm0
3381 ; AVX1-NEXT: setb %al
3382 ; AVX1-NEXT: vzeroupper
3385 ; AVX2-LABEL: allones_v8i64_and4:
3387 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
3388 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3389 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3390 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
3391 ; AVX2-NEXT: vtestps %ymm1, %ymm0
3392 ; AVX2-NEXT: setb %al
3393 ; AVX2-NEXT: vzeroupper
3396 ; KNL-LABEL: allones_v8i64_and4:
3398 ; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3399 ; KNL-NEXT: kmovw %k0, %eax
3400 ; KNL-NEXT: cmpb $-1, %al
3401 ; KNL-NEXT: sete %al
3402 ; KNL-NEXT: vzeroupper
3405 ; SKX-LABEL: allones_v8i64_and4:
3407 ; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
3408 ; SKX-NEXT: kortestb %k0, %k0
3409 ; SKX-NEXT: setb %al
3410 ; SKX-NEXT: vzeroupper
3412 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3413 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3414 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3415 %tmp3 = icmp eq i8 %tmp2, -1
3419 define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
3420 ; SSE2-LABEL: allzeros_v8i64_and4:
3422 ; SSE2-NEXT: por %xmm3, %xmm1
3423 ; SSE2-NEXT: por %xmm2, %xmm0
3424 ; SSE2-NEXT: por %xmm1, %xmm0
3425 ; SSE2-NEXT: pslld $29, %xmm0
3426 ; SSE2-NEXT: movmskps %xmm0, %eax
3427 ; SSE2-NEXT: testb $5, %al
3428 ; SSE2-NEXT: sete %al
3431 ; SSE41-LABEL: allzeros_v8i64_and4:
3433 ; SSE41-NEXT: por %xmm3, %xmm1
3434 ; SSE41-NEXT: por %xmm2, %xmm0
3435 ; SSE41-NEXT: por %xmm1, %xmm0
3436 ; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3437 ; SSE41-NEXT: sete %al
3440 ; AVX1-LABEL: allzeros_v8i64_and4:
3442 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
3443 ; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3444 ; AVX1-NEXT: sete %al
3445 ; AVX1-NEXT: vzeroupper
3448 ; AVX2-LABEL: allzeros_v8i64_and4:
3450 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
3451 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
3452 ; AVX2-NEXT: vptest %ymm1, %ymm0
3453 ; AVX2-NEXT: sete %al
3454 ; AVX2-NEXT: vzeroupper
3457 ; AVX512-LABEL: allzeros_v8i64_and4:
3459 ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
3460 ; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
3461 ; AVX512-NEXT: kortestw %k0, %k0
3462 ; AVX512-NEXT: sete %al
3463 ; AVX512-NEXT: vzeroupper
3465 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3466 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3467 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3468 %tmp3 = icmp eq i8 %tmp2, 0
3472 ; FCMP may use ISD::SETNE when nnan, don't attempt to use LowerVectorAllEqual.
3473 define i1 @allzeros_v8f32_nnan(<8 x float> %a0) {
3474 ; SSE-LABEL: allzeros_v8f32_nnan:
3476 ; SSE-NEXT: xorps %xmm2, %xmm2
3477 ; SSE-NEXT: cmpneqps %xmm2, %xmm1
3478 ; SSE-NEXT: cmpneqps %xmm2, %xmm0
3479 ; SSE-NEXT: packssdw %xmm1, %xmm0
3480 ; SSE-NEXT: pmovmskb %xmm0, %eax
3481 ; SSE-NEXT: testl %eax, %eax
3482 ; SSE-NEXT: setne %al
3485 ; AVX1OR2-LABEL: allzeros_v8f32_nnan:
3487 ; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
3488 ; AVX1OR2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
3489 ; AVX1OR2-NEXT: vtestps %ymm0, %ymm0
3490 ; AVX1OR2-NEXT: setne %al
3491 ; AVX1OR2-NEXT: vzeroupper
3492 ; AVX1OR2-NEXT: retq
3494 ; KNL-LABEL: allzeros_v8f32_nnan:
3496 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3497 ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
3498 ; KNL-NEXT: vcmpneqps %zmm1, %zmm0, %k0
3499 ; KNL-NEXT: kmovw %k0, %eax
3500 ; KNL-NEXT: testb %al, %al
3501 ; KNL-NEXT: setne %al
3502 ; KNL-NEXT: vzeroupper
3505 ; SKX-LABEL: allzeros_v8f32_nnan:
3507 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
3508 ; SKX-NEXT: vcmpneqps %ymm1, %ymm0, %k0
3509 ; SKX-NEXT: kortestb %k0, %k0
3510 ; SKX-NEXT: setne %al
3511 ; SKX-NEXT: vzeroupper
3513 %1 = fcmp nnan une <8 x float> %a0, zeroinitializer
3514 %2 = bitcast <8 x i1> %1 to i8
3515 %3 = icmp ne i8 %2, 0
3519 ; The below are IR patterns that should directly represent the behavior of a
3520 ; MOVMSK instruction.
3522 define i32 @movmskpd(<2 x double> %x) {
3523 ; SSE-LABEL: movmskpd:
3525 ; SSE-NEXT: movmskpd %xmm0, %eax
3528 ; AVX-LABEL: movmskpd:
3530 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3532 %a = bitcast <2 x double> %x to <2 x i64>
3533 %b = icmp slt <2 x i64> %a, zeroinitializer
3534 %c = bitcast <2 x i1> %b to i2
3535 %d = zext i2 %c to i32
3539 define i32 @movmskps(<4 x float> %x) {
3540 ; SSE-LABEL: movmskps:
3542 ; SSE-NEXT: movmskps %xmm0, %eax
3545 ; AVX-LABEL: movmskps:
3547 ; AVX-NEXT: vmovmskps %xmm0, %eax
3549 %a = bitcast <4 x float> %x to <4 x i32>
3550 %b = icmp slt <4 x i32> %a, zeroinitializer
3551 %c = bitcast <4 x i1> %b to i4
3552 %d = zext i4 %c to i32
3556 define i32 @movmskpd256(<4 x double> %x) {
3557 ; SSE-LABEL: movmskpd256:
3559 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
3560 ; SSE-NEXT: movmskps %xmm0, %eax
3563 ; AVX-LABEL: movmskpd256:
3565 ; AVX-NEXT: vmovmskpd %ymm0, %eax
3566 ; AVX-NEXT: vzeroupper
3568 %a = bitcast <4 x double> %x to <4 x i64>
3569 %b = icmp slt <4 x i64> %a, zeroinitializer
3570 %c = bitcast <4 x i1> %b to i4
3571 %d = zext i4 %c to i32
3575 define i32 @movmskps256(<8 x float> %x) {
3576 ; SSE-LABEL: movmskps256:
3578 ; SSE-NEXT: packssdw %xmm1, %xmm0
3579 ; SSE-NEXT: packsswb %xmm0, %xmm0
3580 ; SSE-NEXT: pmovmskb %xmm0, %eax
3581 ; SSE-NEXT: movzbl %al, %eax
3584 ; AVX-LABEL: movmskps256:
3586 ; AVX-NEXT: vmovmskps %ymm0, %eax
3587 ; AVX-NEXT: vzeroupper
3589 %a = bitcast <8 x float> %x to <8 x i32>
3590 %b = icmp slt <8 x i32> %a, zeroinitializer
3591 %c = bitcast <8 x i1> %b to i8
3592 %d = zext i8 %c to i32
3596 define i32 @movmskb(<16 x i8> %x) {
3597 ; SSE-LABEL: movmskb:
3599 ; SSE-NEXT: pmovmskb %xmm0, %eax
3602 ; AVX-LABEL: movmskb:
3604 ; AVX-NEXT: vpmovmskb %xmm0, %eax
3606 %a = icmp slt <16 x i8> %x, zeroinitializer
3607 %b = bitcast <16 x i1> %a to i16
3608 %c = zext i16 %b to i32
3612 define i32 @movmskb256(<32 x i8> %x) {
3613 ; SSE-LABEL: movmskb256:
3615 ; SSE-NEXT: pmovmskb %xmm0, %ecx
3616 ; SSE-NEXT: pmovmskb %xmm1, %eax
3617 ; SSE-NEXT: shll $16, %eax
3618 ; SSE-NEXT: orl %ecx, %eax
3621 ; AVX1-LABEL: movmskb256:
3623 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3624 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3625 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3626 ; AVX1-NEXT: shll $16, %eax
3627 ; AVX1-NEXT: orl %ecx, %eax
3628 ; AVX1-NEXT: vzeroupper
3631 ; AVX2-LABEL: movmskb256:
3633 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3634 ; AVX2-NEXT: vzeroupper
3637 ; AVX512-LABEL: movmskb256:
3639 ; AVX512-NEXT: vpmovmskb %ymm0, %eax
3640 ; AVX512-NEXT: vzeroupper
3642 %a = icmp slt <32 x i8> %x, zeroinitializer
3643 %b = bitcast <32 x i1> %a to i32
3647 ; Multiple extract elements from a vector compare.
3649 define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
3650 ; SSE-LABEL: movmsk_v16i8:
3652 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
3653 ; SSE-NEXT: pmovmskb %xmm0, %eax
3654 ; SSE-NEXT: movl %eax, %ecx
3655 ; SSE-NEXT: shrl $15, %ecx
3656 ; SSE-NEXT: movl %eax, %edx
3657 ; SSE-NEXT: shrl $8, %edx
3658 ; SSE-NEXT: andl $1, %edx
3659 ; SSE-NEXT: andl $8, %eax
3660 ; SSE-NEXT: shrl $3, %eax
3661 ; SSE-NEXT: xorl %edx, %eax
3662 ; SSE-NEXT: andl %ecx, %eax
3663 ; SSE-NEXT: # kill: def $al killed $al killed $eax
3666 ; AVX1OR2-LABEL: movmsk_v16i8:
3668 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3669 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
3670 ; AVX1OR2-NEXT: movl %eax, %ecx
3671 ; AVX1OR2-NEXT: shrl $15, %ecx
3672 ; AVX1OR2-NEXT: movl %eax, %edx
3673 ; AVX1OR2-NEXT: shrl $8, %edx
3674 ; AVX1OR2-NEXT: andl $1, %edx
3675 ; AVX1OR2-NEXT: andl $8, %eax
3676 ; AVX1OR2-NEXT: shrl $3, %eax
3677 ; AVX1OR2-NEXT: xorl %edx, %eax
3678 ; AVX1OR2-NEXT: andl %ecx, %eax
3679 ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
3680 ; AVX1OR2-NEXT: retq
3682 ; KNL-LABEL: movmsk_v16i8:
3684 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
3685 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
3686 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3687 ; KNL-NEXT: kshiftrw $15, %k0, %k1
3688 ; KNL-NEXT: kmovw %k1, %ecx
3689 ; KNL-NEXT: kshiftrw $8, %k0, %k1
3690 ; KNL-NEXT: kmovw %k1, %edx
3691 ; KNL-NEXT: kshiftrw $3, %k0, %k0
3692 ; KNL-NEXT: kmovw %k0, %eax
3693 ; KNL-NEXT: xorb %dl, %al
3694 ; KNL-NEXT: andb %cl, %al
3695 ; KNL-NEXT: # kill: def $al killed $al killed $eax
3696 ; KNL-NEXT: vzeroupper
3699 ; SKX-LABEL: movmsk_v16i8:
3701 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
3702 ; SKX-NEXT: kshiftrw $15, %k0, %k1
3703 ; SKX-NEXT: kmovd %k1, %ecx
3704 ; SKX-NEXT: kshiftrw $8, %k0, %k1
3705 ; SKX-NEXT: kmovd %k1, %edx
3706 ; SKX-NEXT: kshiftrw $3, %k0, %k0
3707 ; SKX-NEXT: kmovd %k0, %eax
3708 ; SKX-NEXT: xorb %dl, %al
3709 ; SKX-NEXT: andb %cl, %al
3710 ; SKX-NEXT: # kill: def $al killed $al killed $eax
3712 %cmp = icmp eq <16 x i8> %x, %y
3713 %e1 = extractelement <16 x i1> %cmp, i32 3
3714 %e2 = extractelement <16 x i1> %cmp, i32 8
3715 %e3 = extractelement <16 x i1> %cmp, i32 15
3716 %u1 = xor i1 %e1, %e2
3717 %u2 = and i1 %e3, %u1
3721 define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
3722 ; SSE-LABEL: movmsk_v8i16:
3724 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
3725 ; SSE-NEXT: packsswb %xmm0, %xmm0
3726 ; SSE-NEXT: pmovmskb %xmm0, %eax
3727 ; SSE-NEXT: notb %al
3728 ; SSE-NEXT: testb $-109, %al
3729 ; SSE-NEXT: sete %al
3732 ; AVX1OR2-LABEL: movmsk_v8i16:
3734 ; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
3735 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3736 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
3737 ; AVX1OR2-NEXT: notb %al
3738 ; AVX1OR2-NEXT: testb $-109, %al
3739 ; AVX1OR2-NEXT: sete %al
3740 ; AVX1OR2-NEXT: retq
3742 ; KNL-LABEL: movmsk_v8i16:
3744 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
3745 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
3746 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
3747 ; KNL-NEXT: kmovw %k0, %eax
3748 ; KNL-NEXT: testb $-109, %al
3749 ; KNL-NEXT: sete %al
3750 ; KNL-NEXT: vzeroupper
3753 ; SKX-LABEL: movmsk_v8i16:
3755 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
3756 ; SKX-NEXT: knotb %k0, %k0
3757 ; SKX-NEXT: kmovd %k0, %eax
3758 ; SKX-NEXT: testb $-109, %al
3759 ; SKX-NEXT: sete %al
3761 %cmp = icmp sgt <8 x i16> %x, %y
3762 %e1 = extractelement <8 x i1> %cmp, i32 0
3763 %e2 = extractelement <8 x i1> %cmp, i32 1
3764 %e3 = extractelement <8 x i1> %cmp, i32 7
3765 %e4 = extractelement <8 x i1> %cmp, i32 4
3766 %u1 = and i1 %e1, %e2
3767 %u2 = and i1 %e3, %e4
3768 %u3 = and i1 %u1, %u2
3772 ; TODO: Replace shift+mask chain with AND+CMP.
3773 define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
3774 ; SSE-LABEL: movmsk_v4i32:
3776 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
3777 ; SSE-NEXT: movmskps %xmm1, %eax
3778 ; SSE-NEXT: movl %eax, %ecx
3779 ; SSE-NEXT: shrb $3, %cl
3780 ; SSE-NEXT: andb $4, %al
3781 ; SSE-NEXT: shrb $2, %al
3782 ; SSE-NEXT: xorb %cl, %al
3783 ; SSE-NEXT: # kill: def $al killed $al killed $eax
3786 ; AVX1OR2-LABEL: movmsk_v4i32:
3788 ; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
3789 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
3790 ; AVX1OR2-NEXT: movl %eax, %ecx
3791 ; AVX1OR2-NEXT: shrb $3, %cl
3792 ; AVX1OR2-NEXT: andb $4, %al
3793 ; AVX1OR2-NEXT: shrb $2, %al
3794 ; AVX1OR2-NEXT: xorb %cl, %al
3795 ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
3796 ; AVX1OR2-NEXT: retq
3798 ; KNL-LABEL: movmsk_v4i32:
3800 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3801 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3802 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
3803 ; KNL-NEXT: kshiftrw $3, %k0, %k1
3804 ; KNL-NEXT: kmovw %k1, %ecx
3805 ; KNL-NEXT: kshiftrw $2, %k0, %k0
3806 ; KNL-NEXT: kmovw %k0, %eax
3807 ; KNL-NEXT: xorb %cl, %al
3808 ; KNL-NEXT: # kill: def $al killed $al killed $eax
3809 ; KNL-NEXT: vzeroupper
3812 ; SKX-LABEL: movmsk_v4i32:
3814 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
3815 ; SKX-NEXT: kshiftrb $3, %k0, %k1
3816 ; SKX-NEXT: kmovd %k1, %ecx
3817 ; SKX-NEXT: kshiftrb $2, %k0, %k0
3818 ; SKX-NEXT: kmovd %k0, %eax
3819 ; SKX-NEXT: xorb %cl, %al
3820 ; SKX-NEXT: # kill: def $al killed $al killed $eax
3822 %cmp = icmp slt <4 x i32> %x, %y
3823 %e1 = extractelement <4 x i1> %cmp, i32 2
3824 %e2 = extractelement <4 x i1> %cmp, i32 3
3825 %u1 = xor i1 %e1, %e2
3829 define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
3830 ; SSE2-LABEL: movmsk_and_v2i64:
3832 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3833 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
3834 ; SSE2-NEXT: pand %xmm0, %xmm1
3835 ; SSE2-NEXT: movmskpd %xmm1, %eax
3836 ; SSE2-NEXT: testl %eax, %eax
3837 ; SSE2-NEXT: sete %al
3840 ; SSE41-LABEL: movmsk_and_v2i64:
3842 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
3843 ; SSE41-NEXT: movmskpd %xmm0, %eax
3844 ; SSE41-NEXT: testl %eax, %eax
3845 ; SSE41-NEXT: sete %al
3848 ; AVX1OR2-LABEL: movmsk_and_v2i64:
3850 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
3851 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
3852 ; AVX1OR2-NEXT: sete %al
3853 ; AVX1OR2-NEXT: retq
3855 ; KNL-LABEL: movmsk_and_v2i64:
3857 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3858 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3859 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3860 ; KNL-NEXT: kmovw %k0, %eax
3861 ; KNL-NEXT: testb $3, %al
3862 ; KNL-NEXT: sete %al
3863 ; KNL-NEXT: vzeroupper
3866 ; SKX-LABEL: movmsk_and_v2i64:
3868 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
3869 ; SKX-NEXT: kmovd %k0, %eax
3870 ; SKX-NEXT: cmpb $3, %al
3871 ; SKX-NEXT: sete %al
3873 %cmp = icmp ne <2 x i64> %x, %y
3874 %e1 = extractelement <2 x i1> %cmp, i32 0
3875 %e2 = extractelement <2 x i1> %cmp, i32 1
3876 %u1 = and i1 %e1, %e2
3880 define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) {
3881 ; SSE2-LABEL: movmsk_or_v2i64:
3883 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3884 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
3885 ; SSE2-NEXT: pand %xmm0, %xmm1
3886 ; SSE2-NEXT: movmskpd %xmm1, %eax
3887 ; SSE2-NEXT: cmpl $3, %eax
3888 ; SSE2-NEXT: setne %al
3891 ; SSE41-LABEL: movmsk_or_v2i64:
3893 ; SSE41-NEXT: pxor %xmm1, %xmm0
3894 ; SSE41-NEXT: ptest %xmm0, %xmm0
3895 ; SSE41-NEXT: setne %al
3898 ; AVX-LABEL: movmsk_or_v2i64:
3900 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
3901 ; AVX-NEXT: vptest %xmm0, %xmm0
3902 ; AVX-NEXT: setne %al
3904 %cmp = icmp ne <2 x i64> %x, %y
3905 %e1 = extractelement <2 x i1> %cmp, i32 0
3906 %e2 = extractelement <2 x i1> %cmp, i32 1
3907 %u1 = or i1 %e1, %e2
3911 define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
3912 ; SSE-LABEL: movmsk_v4f32:
3914 ; SSE-NEXT: movaps %xmm0, %xmm2
3915 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
3916 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
3917 ; SSE-NEXT: orps %xmm2, %xmm0
3918 ; SSE-NEXT: movmskps %xmm0, %eax
3919 ; SSE-NEXT: testb $14, %al
3920 ; SSE-NEXT: setne %al
3923 ; AVX1OR2-LABEL: movmsk_v4f32:
3925 ; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
3926 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
3927 ; AVX1OR2-NEXT: testb $14, %al
3928 ; AVX1OR2-NEXT: setne %al
3929 ; AVX1OR2-NEXT: retq
3931 ; KNL-LABEL: movmsk_v4f32:
3933 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3934 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3935 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k0
3936 ; KNL-NEXT: kmovw %k0, %eax
3937 ; KNL-NEXT: testb $14, %al
3938 ; KNL-NEXT: setne %al
3939 ; KNL-NEXT: vzeroupper
3942 ; SKX-LABEL: movmsk_v4f32:
3944 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
3945 ; SKX-NEXT: kmovd %k0, %eax
3946 ; SKX-NEXT: testb $14, %al
3947 ; SKX-NEXT: setne %al
3949 %cmp = fcmp ueq <4 x float> %x, %y
3950 %e1 = extractelement <4 x i1> %cmp, i32 1
3951 %e2 = extractelement <4 x i1> %cmp, i32 2
3952 %e3 = extractelement <4 x i1> %cmp, i32 3
3953 %u1 = or i1 %e1, %e2
3954 %u2 = or i1 %u1, %e3
3958 define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
3959 ; SSE-LABEL: movmsk_and_v2f64:
3961 ; SSE-NEXT: cmplepd %xmm0, %xmm1
3962 ; SSE-NEXT: movmskpd %xmm1, %eax
3963 ; SSE-NEXT: cmpl $3, %eax
3964 ; SSE-NEXT: sete %al
3967 ; AVX1OR2-LABEL: movmsk_and_v2f64:
3969 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
3970 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
3971 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
3972 ; AVX1OR2-NEXT: setb %al
3973 ; AVX1OR2-NEXT: retq
3975 ; KNL-LABEL: movmsk_and_v2f64:
3977 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3978 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3979 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
3980 ; KNL-NEXT: knotw %k0, %k0
3981 ; KNL-NEXT: kmovw %k0, %eax
3982 ; KNL-NEXT: testb $3, %al
3983 ; KNL-NEXT: sete %al
3984 ; KNL-NEXT: vzeroupper
3987 ; SKX-LABEL: movmsk_and_v2f64:
3989 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
3990 ; SKX-NEXT: kmovd %k0, %eax
3991 ; SKX-NEXT: cmpb $3, %al
3992 ; SKX-NEXT: sete %al
3994 %cmp = fcmp oge <2 x double> %x, %y
3995 %e1 = extractelement <2 x i1> %cmp, i32 0
3996 %e2 = extractelement <2 x i1> %cmp, i32 1
3997 %u1 = and i1 %e1, %e2
4001 define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
4002 ; SSE-LABEL: movmsk_or_v2f64:
4004 ; SSE-NEXT: cmplepd %xmm0, %xmm1
4005 ; SSE-NEXT: movmskpd %xmm1, %eax
4006 ; SSE-NEXT: testl %eax, %eax
4007 ; SSE-NEXT: setne %al
4010 ; AVX1OR2-LABEL: movmsk_or_v2f64:
4012 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4013 ; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
4014 ; AVX1OR2-NEXT: setne %al
4015 ; AVX1OR2-NEXT: retq
4017 ; KNL-LABEL: movmsk_or_v2f64:
4019 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4020 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4021 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
4022 ; KNL-NEXT: kmovw %k0, %eax
4023 ; KNL-NEXT: testb $3, %al
4024 ; KNL-NEXT: setne %al
4025 ; KNL-NEXT: vzeroupper
4028 ; SKX-LABEL: movmsk_or_v2f64:
4030 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4031 ; SKX-NEXT: kortestb %k0, %k0
4032 ; SKX-NEXT: setne %al
4034 %cmp = fcmp oge <2 x double> %x, %y
4035 %e1 = extractelement <2 x i1> %cmp, i32 0
4036 %e2 = extractelement <2 x i1> %cmp, i32 1
4037 %u1 = or i1 %e1, %e2
4041 ; Extract elements from a non-constant index.
4043 define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
4044 ; SSE-LABEL: movmsk_v16i8_var:
4046 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
4047 ; SSE-NEXT: pmovmskb %xmm0, %eax
4048 ; SSE-NEXT: btl %edi, %eax
4049 ; SSE-NEXT: setb %al
4052 ; AVX1OR2-LABEL: movmsk_v16i8_var:
4054 ; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4055 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
4056 ; AVX1OR2-NEXT: btl %edi, %eax
4057 ; AVX1OR2-NEXT: setb %al
4058 ; AVX1OR2-NEXT: retq
4060 ; KNL-LABEL: movmsk_v16i8_var:
4062 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4063 ; KNL-NEXT: vpmovmskb %xmm0, %eax
4064 ; KNL-NEXT: btl %edi, %eax
4065 ; KNL-NEXT: setb %al
4068 ; SKX-LABEL: movmsk_v16i8_var:
4070 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4071 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
4072 ; SKX-NEXT: vpmovm2b %k0, %xmm0
4073 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4074 ; SKX-NEXT: andl $15, %edi
4075 ; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
4077 %cmp = icmp eq <16 x i8> %x, %y
4078 %val = extractelement <16 x i1> %cmp, i32 %z
4082 define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
4083 ; SSE-LABEL: movmsk_v8i16_var:
4085 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
4086 ; SSE-NEXT: packsswb %xmm0, %xmm0
4087 ; SSE-NEXT: pmovmskb %xmm0, %eax
4088 ; SSE-NEXT: btl %edi, %eax
4089 ; SSE-NEXT: setb %al
4092 ; AVX1OR2-LABEL: movmsk_v8i16_var:
4094 ; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4095 ; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
4096 ; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
4097 ; AVX1OR2-NEXT: btl %edi, %eax
4098 ; AVX1OR2-NEXT: setb %al
4099 ; AVX1OR2-NEXT: retq
4101 ; KNL-LABEL: movmsk_v8i16_var:
4103 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4104 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4105 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
4106 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
4107 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4108 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
4109 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4110 ; KNL-NEXT: andl $7, %edi
4111 ; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
4112 ; KNL-NEXT: vzeroupper
4115 ; SKX-LABEL: movmsk_v8i16_var:
4117 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4118 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
4119 ; SKX-NEXT: vpmovm2w %k0, %xmm0
4120 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4121 ; SKX-NEXT: andl $7, %edi
4122 ; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
4124 %cmp = icmp sgt <8 x i16> %x, %y
4125 %val = extractelement <8 x i1> %cmp, i32 %z
4129 define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
4130 ; SSE-LABEL: movmsk_v4i32_var:
4132 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1
4133 ; SSE-NEXT: movmskps %xmm1, %eax
4134 ; SSE-NEXT: btl %edi, %eax
4135 ; SSE-NEXT: setb %al
4138 ; AVX1OR2-LABEL: movmsk_v4i32_var:
4140 ; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
4141 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
4142 ; AVX1OR2-NEXT: btl %edi, %eax
4143 ; AVX1OR2-NEXT: setb %al
4144 ; AVX1OR2-NEXT: retq
4146 ; KNL-LABEL: movmsk_v4i32_var:
4148 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4149 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4150 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4151 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
4152 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4153 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4154 ; KNL-NEXT: andl $3, %edi
4155 ; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4156 ; KNL-NEXT: vzeroupper
4159 ; SKX-LABEL: movmsk_v4i32_var:
4161 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4162 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
4163 ; SKX-NEXT: vpmovm2d %k0, %xmm0
4164 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4165 ; SKX-NEXT: andl $3, %edi
4166 ; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4168 %cmp = icmp slt <4 x i32> %x, %y
4169 %val = extractelement <4 x i1> %cmp, i32 %z
4173 define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
4174 ; SSE2-LABEL: movmsk_v2i64_var:
4176 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
4177 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
4178 ; SSE2-NEXT: pand %xmm0, %xmm1
4179 ; SSE2-NEXT: movmskpd %xmm1, %eax
4180 ; SSE2-NEXT: xorl $3, %eax
4181 ; SSE2-NEXT: btl %edi, %eax
4182 ; SSE2-NEXT: setb %al
4185 ; SSE41-LABEL: movmsk_v2i64_var:
4187 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4188 ; SSE41-NEXT: movmskpd %xmm0, %eax
4189 ; SSE41-NEXT: xorl $3, %eax
4190 ; SSE41-NEXT: btl %edi, %eax
4191 ; SSE41-NEXT: setb %al
4194 ; AVX1OR2-LABEL: movmsk_v2i64_var:
4196 ; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4197 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4198 ; AVX1OR2-NEXT: xorl $3, %eax
4199 ; AVX1OR2-NEXT: btl %edi, %eax
4200 ; AVX1OR2-NEXT: setb %al
4201 ; AVX1OR2-NEXT: retq
4203 ; KNL-LABEL: movmsk_v2i64_var:
4205 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4206 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4207 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4208 ; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k1
4209 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4210 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4211 ; KNL-NEXT: andl $1, %edi
4212 ; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4213 ; KNL-NEXT: vzeroupper
4216 ; SKX-LABEL: movmsk_v2i64_var:
4218 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4219 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
4220 ; SKX-NEXT: vpmovm2q %k0, %xmm0
4221 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4222 ; SKX-NEXT: andl $1, %edi
4223 ; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4225 %cmp = icmp ne <2 x i64> %x, %y
4226 %val = extractelement <2 x i1> %cmp, i32 %z
4230 define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
4231 ; SSE-LABEL: movmsk_v4f32_var:
4233 ; SSE-NEXT: movaps %xmm0, %xmm2
4234 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
4235 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
4236 ; SSE-NEXT: orps %xmm2, %xmm0
4237 ; SSE-NEXT: movmskps %xmm0, %eax
4238 ; SSE-NEXT: btl %edi, %eax
4239 ; SSE-NEXT: setb %al
4242 ; AVX1OR2-LABEL: movmsk_v4f32_var:
4244 ; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
4245 ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
4246 ; AVX1OR2-NEXT: btl %edi, %eax
4247 ; AVX1OR2-NEXT: setb %al
4248 ; AVX1OR2-NEXT: retq
4250 ; KNL-LABEL: movmsk_v4f32_var:
4252 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4253 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4254 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4255 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k1
4256 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4257 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4258 ; KNL-NEXT: andl $3, %edi
4259 ; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4260 ; KNL-NEXT: vzeroupper
4263 ; SKX-LABEL: movmsk_v4f32_var:
4265 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4266 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
4267 ; SKX-NEXT: vpmovm2d %k0, %xmm0
4268 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4269 ; SKX-NEXT: andl $3, %edi
4270 ; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
4272 %cmp = fcmp ueq <4 x float> %x, %y
4273 %val = extractelement <4 x i1> %cmp, i32 %z
4277 define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
4278 ; SSE-LABEL: movmsk_v2f64_var:
4280 ; SSE-NEXT: cmplepd %xmm0, %xmm1
4281 ; SSE-NEXT: movmskpd %xmm1, %eax
4282 ; SSE-NEXT: btl %edi, %eax
4283 ; SSE-NEXT: setb %al
4286 ; AVX1OR2-LABEL: movmsk_v2f64_var:
4288 ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4289 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
4290 ; AVX1OR2-NEXT: btl %edi, %eax
4291 ; AVX1OR2-NEXT: setb %al
4292 ; AVX1OR2-NEXT: retq
4294 ; KNL-LABEL: movmsk_v2f64_var:
4296 ; KNL-NEXT: # kill: def $edi killed $edi def $rdi
4297 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4298 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4299 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k1
4300 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
4301 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4302 ; KNL-NEXT: andl $1, %edi
4303 ; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4304 ; KNL-NEXT: vzeroupper
4307 ; SKX-LABEL: movmsk_v2f64_var:
4309 ; SKX-NEXT: # kill: def $edi killed $edi def $rdi
4310 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4311 ; SKX-NEXT: vpmovm2q %k0, %xmm0
4312 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
4313 ; SKX-NEXT: andl $1, %edi
4314 ; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
4316 %cmp = fcmp oge <2 x double> %x, %y
4317 %val = extractelement <2 x i1> %cmp, i32 %z
4321 ; TODO: We expect similar result as for PR39665_c_ray_opt,
4322 ; but this is not the case in practice.
4323 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
4324 ; SSE-LABEL: PR39665_c_ray:
4326 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
4327 ; SSE-NEXT: movmskpd %xmm1, %ecx
4328 ; SSE-NEXT: testb $2, %cl
4329 ; SSE-NEXT: movl $42, %eax
4330 ; SSE-NEXT: movl $99, %edx
4331 ; SSE-NEXT: cmovel %edx, %eax
4332 ; SSE-NEXT: testb $1, %cl
4333 ; SSE-NEXT: cmovel %edx, %eax
4336 ; AVX1OR2-LABEL: PR39665_c_ray:
4338 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4339 ; AVX1OR2-NEXT: vmovmskpd %xmm0, %ecx
4340 ; AVX1OR2-NEXT: testb $2, %cl
4341 ; AVX1OR2-NEXT: movl $42, %eax
4342 ; AVX1OR2-NEXT: movl $99, %edx
4343 ; AVX1OR2-NEXT: cmovel %edx, %eax
4344 ; AVX1OR2-NEXT: testb $1, %cl
4345 ; AVX1OR2-NEXT: cmovel %edx, %eax
4346 ; AVX1OR2-NEXT: retq
4348 ; KNL-LABEL: PR39665_c_ray:
4350 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4351 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4352 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4353 ; KNL-NEXT: kmovw %k0, %ecx
4354 ; KNL-NEXT: testb $2, %cl
4355 ; KNL-NEXT: movl $42, %eax
4356 ; KNL-NEXT: movl $99, %edx
4357 ; KNL-NEXT: cmovel %edx, %eax
4358 ; KNL-NEXT: testb $1, %cl
4359 ; KNL-NEXT: cmovel %edx, %eax
4360 ; KNL-NEXT: vzeroupper
4363 ; SKX-LABEL: PR39665_c_ray:
4365 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4366 ; SKX-NEXT: kmovd %k0, %ecx
4367 ; SKX-NEXT: testb $2, %cl
4368 ; SKX-NEXT: movl $42, %eax
4369 ; SKX-NEXT: movl $99, %edx
4370 ; SKX-NEXT: cmovel %edx, %eax
4371 ; SKX-NEXT: testb $1, %cl
4372 ; SKX-NEXT: cmovel %edx, %eax
4374 %cmp = fcmp ogt <2 x double> %x, %y
4375 %e1 = extractelement <2 x i1> %cmp, i32 0
4376 %e2 = extractelement <2 x i1> %cmp, i32 1
4377 %u = and i1 %e1, %e2
4378 %r = select i1 %u, i32 42, i32 99
4382 define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
4383 ; SSE-LABEL: PR39665_c_ray_opt:
4385 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
4386 ; SSE-NEXT: movmskpd %xmm1, %eax
4387 ; SSE-NEXT: cmpl $3, %eax
4388 ; SSE-NEXT: movl $42, %ecx
4389 ; SSE-NEXT: movl $99, %eax
4390 ; SSE-NEXT: cmovel %ecx, %eax
4393 ; AVX1OR2-LABEL: PR39665_c_ray_opt:
4395 ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4396 ; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
4397 ; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
4398 ; AVX1OR2-NEXT: movl $42, %ecx
4399 ; AVX1OR2-NEXT: movl $99, %eax
4400 ; AVX1OR2-NEXT: cmovbl %ecx, %eax
4401 ; AVX1OR2-NEXT: retq
4403 ; KNL-LABEL: PR39665_c_ray_opt:
4405 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4406 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4407 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4408 ; KNL-NEXT: knotw %k0, %k0
4409 ; KNL-NEXT: kmovw %k0, %eax
4410 ; KNL-NEXT: testb $3, %al
4411 ; KNL-NEXT: movl $42, %ecx
4412 ; KNL-NEXT: movl $99, %eax
4413 ; KNL-NEXT: cmovel %ecx, %eax
4414 ; KNL-NEXT: vzeroupper
4417 ; SKX-LABEL: PR39665_c_ray_opt:
4419 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4420 ; SKX-NEXT: kmovd %k0, %eax
4421 ; SKX-NEXT: cmpb $3, %al
4422 ; SKX-NEXT: movl $42, %ecx
4423 ; SKX-NEXT: movl $99, %eax
4424 ; SKX-NEXT: cmovel %ecx, %eax
4426 %cmp = fcmp ogt <2 x double> %x, %y
4427 %shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
4428 %1 = and <2 x i1> %cmp, %shift
4429 %u = extractelement <2 x i1> %1, i64 0
4430 %r = select i1 %u, i32 42, i32 99
4434 define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
4435 ; SSE2-LABEL: pr67287:
4436 ; SSE2: # %bb.0: # %entry
4437 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4438 ; SSE2-NEXT: pxor %xmm1, %xmm1
4439 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
4440 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
4441 ; SSE2-NEXT: movmskpd %xmm0, %eax
4442 ; SSE2-NEXT: testl %eax, %eax
4443 ; SSE2-NEXT: jne .LBB97_2
4444 ; SSE2-NEXT: # %bb.1: # %entry
4445 ; SSE2-NEXT: movd %xmm1, %eax
4446 ; SSE2-NEXT: testb $1, %al
4447 ; SSE2-NEXT: jne .LBB97_2
4448 ; SSE2-NEXT: # %bb.3: # %middle.block
4449 ; SSE2-NEXT: xorl %eax, %eax
4451 ; SSE2-NEXT: .LBB97_2:
4452 ; SSE2-NEXT: movw $0, 0
4453 ; SSE2-NEXT: xorl %eax, %eax
4456 ; SSE41-LABEL: pr67287:
4457 ; SSE41: # %bb.0: # %entry
4458 ; SSE41-NEXT: pxor %xmm1, %xmm1
4459 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
4460 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4461 ; SSE41-NEXT: movmskpd %xmm0, %eax
4462 ; SSE41-NEXT: testl %eax, %eax
4463 ; SSE41-NEXT: jne .LBB97_2
4464 ; SSE41-NEXT: # %bb.1: # %entry
4465 ; SSE41-NEXT: movd %xmm0, %eax
4466 ; SSE41-NEXT: testb $1, %al
4467 ; SSE41-NEXT: jne .LBB97_2
4468 ; SSE41-NEXT: # %bb.3: # %middle.block
4469 ; SSE41-NEXT: xorl %eax, %eax
4471 ; SSE41-NEXT: .LBB97_2:
4472 ; SSE41-NEXT: movw $0, 0
4473 ; SSE41-NEXT: xorl %eax, %eax
4476 ; AVX1-LABEL: pr67287:
4477 ; AVX1: # %bb.0: # %entry
4478 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4479 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
4480 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4481 ; AVX1-NEXT: vtestpd %xmm0, %xmm0
4482 ; AVX1-NEXT: jne .LBB97_2
4483 ; AVX1-NEXT: # %bb.1: # %entry
4484 ; AVX1-NEXT: vmovd %xmm0, %eax
4485 ; AVX1-NEXT: testb $1, %al
4486 ; AVX1-NEXT: jne .LBB97_2
4487 ; AVX1-NEXT: # %bb.3: # %middle.block
4488 ; AVX1-NEXT: xorl %eax, %eax
4490 ; AVX1-NEXT: .LBB97_2:
4491 ; AVX1-NEXT: movw $0, 0
4492 ; AVX1-NEXT: xorl %eax, %eax
4495 ; AVX2-LABEL: pr67287:
4496 ; AVX2: # %bb.0: # %entry
4497 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
4498 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4499 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4500 ; AVX2-NEXT: vtestpd %xmm0, %xmm0
4501 ; AVX2-NEXT: jne .LBB97_2
4502 ; AVX2-NEXT: # %bb.1: # %entry
4503 ; AVX2-NEXT: vmovd %xmm0, %eax
4504 ; AVX2-NEXT: testb $1, %al
4505 ; AVX2-NEXT: jne .LBB97_2
4506 ; AVX2-NEXT: # %bb.3: # %middle.block
4507 ; AVX2-NEXT: xorl %eax, %eax
4509 ; AVX2-NEXT: .LBB97_2:
4510 ; AVX2-NEXT: movw $0, 0
4511 ; AVX2-NEXT: xorl %eax, %eax
4514 ; KNL-LABEL: pr67287:
4515 ; KNL: # %bb.0: # %entry
4516 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4517 ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4518 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
4519 ; KNL-NEXT: kmovw %k0, %eax
4520 ; KNL-NEXT: testb $3, %al
4521 ; KNL-NEXT: jne .LBB97_2
4522 ; KNL-NEXT: # %bb.1: # %entry
4523 ; KNL-NEXT: kmovw %k0, %eax
4524 ; KNL-NEXT: testb $1, %al
4525 ; KNL-NEXT: jne .LBB97_2
4526 ; KNL-NEXT: # %bb.3: # %middle.block
4527 ; KNL-NEXT: xorl %eax, %eax
4528 ; KNL-NEXT: vzeroupper
4530 ; KNL-NEXT: .LBB97_2:
4531 ; KNL-NEXT: movw $0, 0
4532 ; KNL-NEXT: xorl %eax, %eax
4533 ; KNL-NEXT: vzeroupper
4536 ; SKX-LABEL: pr67287:
4537 ; SKX: # %bb.0: # %entry
4538 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
4539 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4540 ; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0
4541 ; SKX-NEXT: kortestb %k0, %k0
4542 ; SKX-NEXT: jne .LBB97_2
4543 ; SKX-NEXT: # %bb.1: # %entry
4544 ; SKX-NEXT: kmovd %k0, %eax
4545 ; SKX-NEXT: testb $1, %al
4546 ; SKX-NEXT: jne .LBB97_2
4547 ; SKX-NEXT: # %bb.3: # %middle.block
4548 ; SKX-NEXT: xorl %eax, %eax
4550 ; SKX-NEXT: .LBB97_2:
4551 ; SKX-NEXT: movw $0, 0
4552 ; SKX-NEXT: xorl %eax, %eax
4555 %0 = and <2 x i64> %broadcast.splatinsert25, <i64 4294967295, i64 4294967295>
4556 %1 = icmp eq <2 x i64> %0, zeroinitializer
4557 %shift = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 poison>
4558 %2 = or <2 x i1> %1, %shift
4559 %3 = extractelement <2 x i1> %2, i64 0
4560 %4 = extractelement <2 x i1> %1, i64 0
4562 br i1 %5, label %6, label %middle.block
4565 store i16 0, ptr null, align 2
4566 br label %middle.block
4568 middle.block: ; preds = %6, %entry