1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=KNL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=SKX
8 define i1 @allones_v16i8_sign(<16 x i8> %arg) {
9 ; SSE2-LABEL: allones_v16i8_sign:
11 ; SSE2-NEXT: pmovmskb %xmm0, %eax
12 ; SSE2-NEXT: cmpw $-1, %ax
16 ; AVX-LABEL: allones_v16i8_sign:
18 ; AVX-NEXT: vpmovmskb %xmm0, %eax
19 ; AVX-NEXT: cmpw $-1, %ax
23 ; KNL-LABEL: allones_v16i8_sign:
25 ; KNL-NEXT: vpmovmskb %xmm0, %eax
26 ; KNL-NEXT: cmpw $-1, %ax
30 ; SKX-LABEL: allones_v16i8_sign:
32 ; SKX-NEXT: vpmovb2m %xmm0, %k0
33 ; SKX-NEXT: kortestw %k0, %k0
36 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
37 %tmp1 = bitcast <16 x i1> %tmp to i16
38 %tmp2 = icmp eq i16 %tmp1, -1
42 define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
43 ; SSE2-LABEL: allzeros_v16i8_sign:
45 ; SSE2-NEXT: pmovmskb %xmm0, %eax
46 ; SSE2-NEXT: testw %ax, %ax
50 ; AVX-LABEL: allzeros_v16i8_sign:
52 ; AVX-NEXT: vpmovmskb %xmm0, %eax
53 ; AVX-NEXT: testw %ax, %ax
57 ; KNL-LABEL: allzeros_v16i8_sign:
59 ; KNL-NEXT: vpmovmskb %xmm0, %eax
60 ; KNL-NEXT: testw %ax, %ax
64 ; SKX-LABEL: allzeros_v16i8_sign:
66 ; SKX-NEXT: vpmovb2m %xmm0, %k0
67 ; SKX-NEXT: kortestw %k0, %k0
70 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
71 %tmp1 = bitcast <16 x i1> %tmp to i16
72 %tmp2 = icmp eq i16 %tmp1, 0
76 define i1 @allones_v32i8_sign(<32 x i8> %arg) {
77 ; SSE2-LABEL: allones_v32i8_sign:
79 ; SSE2-NEXT: pmovmskb %xmm0, %eax
80 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
81 ; SSE2-NEXT: shll $16, %ecx
82 ; SSE2-NEXT: orl %eax, %ecx
83 ; SSE2-NEXT: cmpl $-1, %ecx
87 ; AVX1-LABEL: allones_v32i8_sign:
89 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
90 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
91 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
92 ; AVX1-NEXT: shll $16, %ecx
93 ; AVX1-NEXT: orl %eax, %ecx
94 ; AVX1-NEXT: cmpl $-1, %ecx
96 ; AVX1-NEXT: vzeroupper
99 ; AVX2-LABEL: allones_v32i8_sign:
101 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
102 ; AVX2-NEXT: cmpl $-1, %eax
103 ; AVX2-NEXT: sete %al
104 ; AVX2-NEXT: vzeroupper
107 ; KNL-LABEL: allones_v32i8_sign:
109 ; KNL-NEXT: vpmovmskb %ymm0, %eax
110 ; KNL-NEXT: cmpl $-1, %eax
112 ; KNL-NEXT: vzeroupper
115 ; SKX-LABEL: allones_v32i8_sign:
117 ; SKX-NEXT: vpmovb2m %ymm0, %k0
118 ; SKX-NEXT: kortestd %k0, %k0
120 ; SKX-NEXT: vzeroupper
122 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
123 %tmp1 = bitcast <32 x i1> %tmp to i32
124 %tmp2 = icmp eq i32 %tmp1, -1
128 define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
129 ; SSE2-LABEL: allzeros_v32i8_sign:
131 ; SSE2-NEXT: pmovmskb %xmm0, %eax
132 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
133 ; SSE2-NEXT: shll $16, %ecx
134 ; SSE2-NEXT: orl %eax, %ecx
135 ; SSE2-NEXT: sete %al
138 ; AVX1-LABEL: allzeros_v32i8_sign:
140 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
141 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
142 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
143 ; AVX1-NEXT: shll $16, %ecx
144 ; AVX1-NEXT: orl %eax, %ecx
145 ; AVX1-NEXT: sete %al
146 ; AVX1-NEXT: vzeroupper
149 ; AVX2-LABEL: allzeros_v32i8_sign:
151 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
152 ; AVX2-NEXT: testl %eax, %eax
153 ; AVX2-NEXT: sete %al
154 ; AVX2-NEXT: vzeroupper
157 ; KNL-LABEL: allzeros_v32i8_sign:
159 ; KNL-NEXT: vpmovmskb %ymm0, %eax
160 ; KNL-NEXT: testl %eax, %eax
162 ; KNL-NEXT: vzeroupper
165 ; SKX-LABEL: allzeros_v32i8_sign:
167 ; SKX-NEXT: vpmovb2m %ymm0, %k0
168 ; SKX-NEXT: kortestd %k0, %k0
170 ; SKX-NEXT: vzeroupper
172 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
173 %tmp1 = bitcast <32 x i1> %tmp to i32
174 %tmp2 = icmp eq i32 %tmp1, 0
178 define i1 @allones_v64i8_sign(<64 x i8> %arg) {
179 ; SSE2-LABEL: allones_v64i8_sign:
181 ; SSE2-NEXT: pmovmskb %xmm0, %eax
182 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
183 ; SSE2-NEXT: shll $16, %ecx
184 ; SSE2-NEXT: orl %eax, %ecx
185 ; SSE2-NEXT: pmovmskb %xmm2, %eax
186 ; SSE2-NEXT: pmovmskb %xmm3, %edx
187 ; SSE2-NEXT: shll $16, %edx
188 ; SSE2-NEXT: orl %eax, %edx
189 ; SSE2-NEXT: shlq $32, %rdx
190 ; SSE2-NEXT: orq %rcx, %rdx
191 ; SSE2-NEXT: cmpq $-1, %rdx
192 ; SSE2-NEXT: sete %al
195 ; AVX1-LABEL: allones_v64i8_sign:
197 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
198 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
199 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
200 ; AVX1-NEXT: shll $16, %ecx
201 ; AVX1-NEXT: orl %eax, %ecx
202 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
203 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
204 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
205 ; AVX1-NEXT: shll $16, %edx
206 ; AVX1-NEXT: orl %eax, %edx
207 ; AVX1-NEXT: shlq $32, %rdx
208 ; AVX1-NEXT: orq %rcx, %rdx
209 ; AVX1-NEXT: cmpq $-1, %rdx
210 ; AVX1-NEXT: sete %al
211 ; AVX1-NEXT: vzeroupper
214 ; AVX2-LABEL: allones_v64i8_sign:
216 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
217 ; AVX2-NEXT: shlq $32, %rax
218 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
219 ; AVX2-NEXT: orq %rax, %rcx
220 ; AVX2-NEXT: cmpq $-1, %rcx
221 ; AVX2-NEXT: sete %al
222 ; AVX2-NEXT: vzeroupper
225 ; KNL-LABEL: allones_v64i8_sign:
227 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
228 ; KNL-NEXT: vpmovmskb %ymm1, %eax
229 ; KNL-NEXT: shlq $32, %rax
230 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
231 ; KNL-NEXT: orq %rax, %rcx
232 ; KNL-NEXT: cmpq $-1, %rcx
234 ; KNL-NEXT: vzeroupper
237 ; SKX-LABEL: allones_v64i8_sign:
239 ; SKX-NEXT: vpmovb2m %zmm0, %k0
240 ; SKX-NEXT: kortestq %k0, %k0
242 ; SKX-NEXT: vzeroupper
244 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
245 %tmp1 = bitcast <64 x i1> %tmp to i64
246 %tmp2 = icmp eq i64 %tmp1, -1
250 define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
251 ; SSE2-LABEL: allzeros_v64i8_sign:
253 ; SSE2-NEXT: pmovmskb %xmm0, %eax
254 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
255 ; SSE2-NEXT: shll $16, %ecx
256 ; SSE2-NEXT: orl %eax, %ecx
257 ; SSE2-NEXT: pmovmskb %xmm2, %eax
258 ; SSE2-NEXT: pmovmskb %xmm3, %edx
259 ; SSE2-NEXT: shll $16, %edx
260 ; SSE2-NEXT: orl %eax, %edx
261 ; SSE2-NEXT: shlq $32, %rdx
262 ; SSE2-NEXT: orq %rcx, %rdx
263 ; SSE2-NEXT: sete %al
266 ; AVX1-LABEL: allzeros_v64i8_sign:
268 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
269 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
270 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
271 ; AVX1-NEXT: shll $16, %ecx
272 ; AVX1-NEXT: orl %eax, %ecx
273 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
274 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
275 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
276 ; AVX1-NEXT: shll $16, %edx
277 ; AVX1-NEXT: orl %eax, %edx
278 ; AVX1-NEXT: shlq $32, %rdx
279 ; AVX1-NEXT: orq %rcx, %rdx
280 ; AVX1-NEXT: sete %al
281 ; AVX1-NEXT: vzeroupper
284 ; AVX2-LABEL: allzeros_v64i8_sign:
286 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
287 ; AVX2-NEXT: shlq $32, %rax
288 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
289 ; AVX2-NEXT: orq %rax, %rcx
290 ; AVX2-NEXT: sete %al
291 ; AVX2-NEXT: vzeroupper
294 ; KNL-LABEL: allzeros_v64i8_sign:
296 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
297 ; KNL-NEXT: vpmovmskb %ymm1, %eax
298 ; KNL-NEXT: shlq $32, %rax
299 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
300 ; KNL-NEXT: orq %rax, %rcx
302 ; KNL-NEXT: vzeroupper
305 ; SKX-LABEL: allzeros_v64i8_sign:
307 ; SKX-NEXT: vpmovb2m %zmm0, %k0
308 ; SKX-NEXT: kortestq %k0, %k0
310 ; SKX-NEXT: vzeroupper
312 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
313 %tmp1 = bitcast <64 x i1> %tmp to i64
314 %tmp2 = icmp eq i64 %tmp1, 0
318 define i1 @allones_v8i16_sign(<8 x i16> %arg) {
319 ; SSE2-LABEL: allones_v8i16_sign:
321 ; SSE2-NEXT: packsswb %xmm0, %xmm0
322 ; SSE2-NEXT: pmovmskb %xmm0, %eax
323 ; SSE2-NEXT: cmpb $-1, %al
324 ; SSE2-NEXT: sete %al
327 ; AVX-LABEL: allones_v8i16_sign:
329 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
330 ; AVX-NEXT: vpmovmskb %xmm0, %eax
331 ; AVX-NEXT: cmpb $-1, %al
335 ; KNL-LABEL: allones_v8i16_sign:
337 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
338 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
339 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
340 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
341 ; KNL-NEXT: kmovw %k0, %eax
342 ; KNL-NEXT: cmpb $-1, %al
344 ; KNL-NEXT: vzeroupper
347 ; SKX-LABEL: allones_v8i16_sign:
349 ; SKX-NEXT: vpmovw2m %xmm0, %k0
350 ; SKX-NEXT: kortestb %k0, %k0
353 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
354 %tmp1 = bitcast <8 x i1> %tmp to i8
355 %tmp2 = icmp eq i8 %tmp1, -1
359 define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
360 ; SSE2-LABEL: allzeros_v8i16_sign:
362 ; SSE2-NEXT: packsswb %xmm0, %xmm0
363 ; SSE2-NEXT: pmovmskb %xmm0, %eax
364 ; SSE2-NEXT: testb %al, %al
365 ; SSE2-NEXT: sete %al
368 ; AVX-LABEL: allzeros_v8i16_sign:
370 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
371 ; AVX-NEXT: vpmovmskb %xmm0, %eax
372 ; AVX-NEXT: testb %al, %al
376 ; KNL-LABEL: allzeros_v8i16_sign:
378 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
379 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
380 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
381 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
382 ; KNL-NEXT: kmovw %k0, %eax
383 ; KNL-NEXT: testb %al, %al
385 ; KNL-NEXT: vzeroupper
388 ; SKX-LABEL: allzeros_v8i16_sign:
390 ; SKX-NEXT: vpmovw2m %xmm0, %k0
391 ; SKX-NEXT: kortestb %k0, %k0
394 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
395 %tmp1 = bitcast <8 x i1> %tmp to i8
396 %tmp2 = icmp eq i8 %tmp1, 0
400 define i1 @allones_v16i16_sign(<16 x i16> %arg) {
401 ; SSE2-LABEL: allones_v16i16_sign:
403 ; SSE2-NEXT: packsswb %xmm1, %xmm0
404 ; SSE2-NEXT: pmovmskb %xmm0, %eax
405 ; SSE2-NEXT: cmpw $-1, %ax
406 ; SSE2-NEXT: sete %al
409 ; AVX1-LABEL: allones_v16i16_sign:
411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
412 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
413 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
414 ; AVX1-NEXT: cmpw $-1, %ax
415 ; AVX1-NEXT: sete %al
416 ; AVX1-NEXT: vzeroupper
419 ; AVX2-LABEL: allones_v16i16_sign:
421 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
422 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
423 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
424 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
425 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
426 ; AVX2-NEXT: cmpw $-1, %ax
427 ; AVX2-NEXT: sete %al
428 ; AVX2-NEXT: vzeroupper
431 ; KNL-LABEL: allones_v16i16_sign:
433 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
434 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
435 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
436 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
437 ; KNL-NEXT: kortestw %k0, %k0
439 ; KNL-NEXT: vzeroupper
442 ; SKX-LABEL: allones_v16i16_sign:
444 ; SKX-NEXT: vpmovw2m %ymm0, %k0
445 ; SKX-NEXT: kortestw %k0, %k0
447 ; SKX-NEXT: vzeroupper
449 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
450 %tmp1 = bitcast <16 x i1> %tmp to i16
451 %tmp2 = icmp eq i16 %tmp1, -1
455 define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
456 ; SSE2-LABEL: allzeros_v16i16_sign:
458 ; SSE2-NEXT: packsswb %xmm1, %xmm0
459 ; SSE2-NEXT: pmovmskb %xmm0, %eax
460 ; SSE2-NEXT: testw %ax, %ax
461 ; SSE2-NEXT: sete %al
464 ; AVX1-LABEL: allzeros_v16i16_sign:
466 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
467 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
468 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
469 ; AVX1-NEXT: testw %ax, %ax
470 ; AVX1-NEXT: sete %al
471 ; AVX1-NEXT: vzeroupper
474 ; AVX2-LABEL: allzeros_v16i16_sign:
476 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
477 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
478 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
479 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
480 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
481 ; AVX2-NEXT: testw %ax, %ax
482 ; AVX2-NEXT: sete %al
483 ; AVX2-NEXT: vzeroupper
486 ; KNL-LABEL: allzeros_v16i16_sign:
488 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
489 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
490 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
491 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
492 ; KNL-NEXT: kortestw %k0, %k0
494 ; KNL-NEXT: vzeroupper
497 ; SKX-LABEL: allzeros_v16i16_sign:
499 ; SKX-NEXT: vpmovw2m %ymm0, %k0
500 ; SKX-NEXT: kortestw %k0, %k0
502 ; SKX-NEXT: vzeroupper
504 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
505 %tmp1 = bitcast <16 x i1> %tmp to i16
506 %tmp2 = icmp eq i16 %tmp1, 0
510 define i1 @allones_v32i16_sign(<32 x i16> %arg) {
511 ; SSE2-LABEL: allones_v32i16_sign:
513 ; SSE2-NEXT: packsswb %xmm1, %xmm0
514 ; SSE2-NEXT: pmovmskb %xmm0, %eax
515 ; SSE2-NEXT: packsswb %xmm3, %xmm2
516 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
517 ; SSE2-NEXT: shll $16, %ecx
518 ; SSE2-NEXT: orl %eax, %ecx
519 ; SSE2-NEXT: cmpl $-1, %ecx
520 ; SSE2-NEXT: sete %al
523 ; AVX1-LABEL: allones_v32i16_sign:
525 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
526 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
527 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
528 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
529 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
530 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
531 ; AVX1-NEXT: shll $16, %ecx
532 ; AVX1-NEXT: orl %eax, %ecx
533 ; AVX1-NEXT: cmpl $-1, %ecx
534 ; AVX1-NEXT: sete %al
535 ; AVX1-NEXT: vzeroupper
538 ; AVX2-LABEL: allones_v32i16_sign:
540 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
541 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
542 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
543 ; AVX2-NEXT: cmpl $-1, %eax
544 ; AVX2-NEXT: sete %al
545 ; AVX2-NEXT: vzeroupper
548 ; KNL-LABEL: allones_v32i16_sign:
550 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
551 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
552 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
553 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
554 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
555 ; KNL-NEXT: kmovw %k0, %eax
556 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
557 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
558 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
559 ; KNL-NEXT: kmovw %k0, %ecx
560 ; KNL-NEXT: shll $16, %ecx
561 ; KNL-NEXT: orl %eax, %ecx
562 ; KNL-NEXT: cmpl $-1, %ecx
564 ; KNL-NEXT: vzeroupper
567 ; SKX-LABEL: allones_v32i16_sign:
569 ; SKX-NEXT: vpmovw2m %zmm0, %k0
570 ; SKX-NEXT: kortestd %k0, %k0
572 ; SKX-NEXT: vzeroupper
574 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
575 %tmp1 = bitcast <32 x i1> %tmp to i32
576 %tmp2 = icmp eq i32 %tmp1, -1
580 define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
581 ; SSE2-LABEL: allzeros_v32i16_sign:
583 ; SSE2-NEXT: packsswb %xmm1, %xmm0
584 ; SSE2-NEXT: pmovmskb %xmm0, %eax
585 ; SSE2-NEXT: packsswb %xmm3, %xmm2
586 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
587 ; SSE2-NEXT: shll $16, %ecx
588 ; SSE2-NEXT: orl %eax, %ecx
589 ; SSE2-NEXT: sete %al
592 ; AVX1-LABEL: allzeros_v32i16_sign:
594 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
595 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
596 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
597 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
598 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
599 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
600 ; AVX1-NEXT: shll $16, %ecx
601 ; AVX1-NEXT: orl %eax, %ecx
602 ; AVX1-NEXT: sete %al
603 ; AVX1-NEXT: vzeroupper
606 ; AVX2-LABEL: allzeros_v32i16_sign:
608 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
609 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
610 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
611 ; AVX2-NEXT: testl %eax, %eax
612 ; AVX2-NEXT: sete %al
613 ; AVX2-NEXT: vzeroupper
616 ; KNL-LABEL: allzeros_v32i16_sign:
618 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
619 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
620 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
621 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
622 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
623 ; KNL-NEXT: kmovw %k0, %eax
624 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
625 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
626 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
627 ; KNL-NEXT: kmovw %k0, %ecx
628 ; KNL-NEXT: shll $16, %ecx
629 ; KNL-NEXT: orl %eax, %ecx
631 ; KNL-NEXT: vzeroupper
634 ; SKX-LABEL: allzeros_v32i16_sign:
636 ; SKX-NEXT: vpmovw2m %zmm0, %k0
637 ; SKX-NEXT: kortestd %k0, %k0
639 ; SKX-NEXT: vzeroupper
641 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
642 %tmp1 = bitcast <32 x i1> %tmp to i32
643 %tmp2 = icmp eq i32 %tmp1, 0
647 define i1 @allones_v4i32_sign(<4 x i32> %arg) {
648 ; SSE2-LABEL: allones_v4i32_sign:
650 ; SSE2-NEXT: movmskps %xmm0, %eax
651 ; SSE2-NEXT: cmpb $15, %al
652 ; SSE2-NEXT: sete %al
655 ; AVX-LABEL: allones_v4i32_sign:
657 ; AVX-NEXT: vmovmskps %xmm0, %eax
658 ; AVX-NEXT: cmpb $15, %al
662 ; KNL-LABEL: allones_v4i32_sign:
664 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
665 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
666 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
667 ; KNL-NEXT: kmovw %k0, %eax
668 ; KNL-NEXT: andb $15, %al
669 ; KNL-NEXT: cmpb $15, %al
671 ; KNL-NEXT: vzeroupper
674 ; SKX-LABEL: allones_v4i32_sign:
676 ; SKX-NEXT: vpmovd2m %xmm0, %k0
677 ; SKX-NEXT: kmovd %k0, %eax
678 ; SKX-NEXT: andb $15, %al
679 ; SKX-NEXT: cmpb $15, %al
682 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
683 %tmp1 = bitcast <4 x i1> %tmp to i4
684 %tmp2 = icmp eq i4 %tmp1, -1
688 define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
689 ; SSE2-LABEL: allzeros_v4i32_sign:
691 ; SSE2-NEXT: movmskps %xmm0, %eax
692 ; SSE2-NEXT: testb %al, %al
693 ; SSE2-NEXT: sete %al
696 ; AVX-LABEL: allzeros_v4i32_sign:
698 ; AVX-NEXT: vmovmskps %xmm0, %eax
699 ; AVX-NEXT: testb %al, %al
703 ; KNL-LABEL: allzeros_v4i32_sign:
705 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
706 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
707 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
708 ; KNL-NEXT: kmovw %k0, %eax
709 ; KNL-NEXT: testb $15, %al
711 ; KNL-NEXT: vzeroupper
714 ; SKX-LABEL: allzeros_v4i32_sign:
716 ; SKX-NEXT: vpmovd2m %xmm0, %k0
717 ; SKX-NEXT: kmovd %k0, %eax
718 ; SKX-NEXT: testb $15, %al
721 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
722 %tmp1 = bitcast <4 x i1> %tmp to i4
723 %tmp2 = icmp eq i4 %tmp1, 0
727 define i1 @allones_v8i32_sign(<8 x i32> %arg) {
728 ; SSE2-LABEL: allones_v8i32_sign:
730 ; SSE2-NEXT: packssdw %xmm1, %xmm0
731 ; SSE2-NEXT: packsswb %xmm0, %xmm0
732 ; SSE2-NEXT: pmovmskb %xmm0, %eax
733 ; SSE2-NEXT: cmpb $-1, %al
734 ; SSE2-NEXT: sete %al
737 ; AVX-LABEL: allones_v8i32_sign:
739 ; AVX-NEXT: vmovmskps %ymm0, %eax
740 ; AVX-NEXT: cmpb $-1, %al
742 ; AVX-NEXT: vzeroupper
745 ; KNL-LABEL: allones_v8i32_sign:
747 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
748 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
749 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
750 ; KNL-NEXT: kmovw %k0, %eax
751 ; KNL-NEXT: cmpb $-1, %al
753 ; KNL-NEXT: vzeroupper
756 ; SKX-LABEL: allones_v8i32_sign:
758 ; SKX-NEXT: vpmovd2m %ymm0, %k0
759 ; SKX-NEXT: kortestb %k0, %k0
761 ; SKX-NEXT: vzeroupper
763 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
764 %tmp1 = bitcast <8 x i1> %tmp to i8
765 %tmp2 = icmp eq i8 %tmp1, -1
769 define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
770 ; SSE2-LABEL: allzeros_v8i32_sign:
772 ; SSE2-NEXT: packssdw %xmm1, %xmm0
773 ; SSE2-NEXT: packsswb %xmm0, %xmm0
774 ; SSE2-NEXT: pmovmskb %xmm0, %eax
775 ; SSE2-NEXT: testb %al, %al
776 ; SSE2-NEXT: sete %al
779 ; AVX-LABEL: allzeros_v8i32_sign:
781 ; AVX-NEXT: vmovmskps %ymm0, %eax
782 ; AVX-NEXT: testb %al, %al
784 ; AVX-NEXT: vzeroupper
787 ; KNL-LABEL: allzeros_v8i32_sign:
789 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
790 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
791 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
792 ; KNL-NEXT: kmovw %k0, %eax
793 ; KNL-NEXT: testb %al, %al
795 ; KNL-NEXT: vzeroupper
798 ; SKX-LABEL: allzeros_v8i32_sign:
800 ; SKX-NEXT: vpmovd2m %ymm0, %k0
801 ; SKX-NEXT: kortestb %k0, %k0
803 ; SKX-NEXT: vzeroupper
805 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
806 %tmp1 = bitcast <8 x i1> %tmp to i8
807 %tmp2 = icmp eq i8 %tmp1, 0
811 define i1 @allones_v16i32_sign(<16 x i32> %arg) {
812 ; SSE2-LABEL: allones_v16i32_sign:
814 ; SSE2-NEXT: packssdw %xmm3, %xmm2
815 ; SSE2-NEXT: packssdw %xmm1, %xmm0
816 ; SSE2-NEXT: packsswb %xmm2, %xmm0
817 ; SSE2-NEXT: pmovmskb %xmm0, %eax
818 ; SSE2-NEXT: cmpw $-1, %ax
819 ; SSE2-NEXT: sete %al
822 ; AVX1-LABEL: allones_v16i32_sign:
824 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
825 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
826 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
827 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
828 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
829 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
830 ; AVX1-NEXT: cmpw $-1, %ax
831 ; AVX1-NEXT: sete %al
832 ; AVX1-NEXT: vzeroupper
835 ; AVX2-LABEL: allones_v16i32_sign:
837 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
838 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
839 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
840 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
841 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
842 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
843 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
844 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
845 ; AVX2-NEXT: cmpw $-1, %ax
846 ; AVX2-NEXT: sete %al
847 ; AVX2-NEXT: vzeroupper
850 ; KNL-LABEL: allones_v16i32_sign:
852 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
853 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
854 ; KNL-NEXT: kortestw %k0, %k0
856 ; KNL-NEXT: vzeroupper
859 ; SKX-LABEL: allones_v16i32_sign:
861 ; SKX-NEXT: vpmovd2m %zmm0, %k0
862 ; SKX-NEXT: kortestw %k0, %k0
864 ; SKX-NEXT: vzeroupper
866 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
867 %tmp1 = bitcast <16 x i1> %tmp to i16
868 %tmp2 = icmp eq i16 %tmp1, -1
872 define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
873 ; SSE2-LABEL: allzeros_v16i32_sign:
875 ; SSE2-NEXT: packssdw %xmm3, %xmm2
876 ; SSE2-NEXT: packssdw %xmm1, %xmm0
877 ; SSE2-NEXT: packsswb %xmm2, %xmm0
878 ; SSE2-NEXT: pmovmskb %xmm0, %eax
879 ; SSE2-NEXT: testw %ax, %ax
880 ; SSE2-NEXT: sete %al
883 ; AVX1-LABEL: allzeros_v16i32_sign:
885 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
886 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
887 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
888 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
889 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
890 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
891 ; AVX1-NEXT: testw %ax, %ax
892 ; AVX1-NEXT: sete %al
893 ; AVX1-NEXT: vzeroupper
896 ; AVX2-LABEL: allzeros_v16i32_sign:
898 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
899 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
900 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
901 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
902 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
903 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
904 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
905 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
906 ; AVX2-NEXT: testw %ax, %ax
907 ; AVX2-NEXT: sete %al
908 ; AVX2-NEXT: vzeroupper
911 ; KNL-LABEL: allzeros_v16i32_sign:
913 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
914 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
915 ; KNL-NEXT: kortestw %k0, %k0
917 ; KNL-NEXT: vzeroupper
920 ; SKX-LABEL: allzeros_v16i32_sign:
922 ; SKX-NEXT: vpmovd2m %zmm0, %k0
923 ; SKX-NEXT: kortestw %k0, %k0
925 ; SKX-NEXT: vzeroupper
927 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
928 %tmp1 = bitcast <16 x i1> %tmp to i16
929 %tmp2 = icmp eq i16 %tmp1, 0
933 define i1 @allones_v4i64_sign(<4 x i64> %arg) {
934 ; SSE2-LABEL: allones_v4i64_sign:
936 ; SSE2-NEXT: packssdw %xmm1, %xmm0
937 ; SSE2-NEXT: movmskps %xmm0, %eax
938 ; SSE2-NEXT: cmpb $15, %al
939 ; SSE2-NEXT: sete %al
942 ; AVX-LABEL: allones_v4i64_sign:
944 ; AVX-NEXT: vmovmskpd %ymm0, %eax
945 ; AVX-NEXT: cmpb $15, %al
947 ; AVX-NEXT: vzeroupper
950 ; KNL-LABEL: allones_v4i64_sign:
952 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
953 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
954 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
955 ; KNL-NEXT: kmovw %k0, %eax
956 ; KNL-NEXT: andb $15, %al
957 ; KNL-NEXT: cmpb $15, %al
959 ; KNL-NEXT: vzeroupper
962 ; SKX-LABEL: allones_v4i64_sign:
964 ; SKX-NEXT: vpmovq2m %ymm0, %k0
965 ; SKX-NEXT: kmovd %k0, %eax
966 ; SKX-NEXT: andb $15, %al
967 ; SKX-NEXT: cmpb $15, %al
969 ; SKX-NEXT: vzeroupper
971 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
972 %tmp1 = bitcast <4 x i1> %tmp to i4
973 %tmp2 = icmp eq i4 %tmp1, -1
977 define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
978 ; SSE2-LABEL: allzeros_v4i64_sign:
980 ; SSE2-NEXT: packssdw %xmm1, %xmm0
981 ; SSE2-NEXT: movmskps %xmm0, %eax
982 ; SSE2-NEXT: testb %al, %al
983 ; SSE2-NEXT: sete %al
986 ; AVX-LABEL: allzeros_v4i64_sign:
988 ; AVX-NEXT: vmovmskpd %ymm0, %eax
989 ; AVX-NEXT: testb %al, %al
991 ; AVX-NEXT: vzeroupper
994 ; KNL-LABEL: allzeros_v4i64_sign:
996 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
997 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
998 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
999 ; KNL-NEXT: kmovw %k0, %eax
1000 ; KNL-NEXT: testb $15, %al
1001 ; KNL-NEXT: sete %al
1002 ; KNL-NEXT: vzeroupper
1005 ; SKX-LABEL: allzeros_v4i64_sign:
1007 ; SKX-NEXT: vpmovq2m %ymm0, %k0
1008 ; SKX-NEXT: kmovd %k0, %eax
1009 ; SKX-NEXT: testb $15, %al
1010 ; SKX-NEXT: sete %al
1011 ; SKX-NEXT: vzeroupper
1013 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
1014 %tmp1 = bitcast <4 x i1> %tmp to i4
1015 %tmp2 = icmp eq i4 %tmp1, 0
1019 define i1 @allones_v8i64_sign(<8 x i64> %arg) {
1020 ; SSE2-LABEL: allones_v8i64_sign:
1022 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1023 ; SSE2-NEXT: pxor %xmm4, %xmm3
1024 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1025 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1026 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1027 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1028 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1029 ; SSE2-NEXT: pand %xmm6, %xmm3
1030 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1031 ; SSE2-NEXT: por %xmm3, %xmm5
1032 ; SSE2-NEXT: pxor %xmm4, %xmm2
1033 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1034 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1035 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1036 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1037 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1038 ; SSE2-NEXT: pand %xmm6, %xmm7
1039 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1040 ; SSE2-NEXT: por %xmm7, %xmm2
1041 ; SSE2-NEXT: packssdw %xmm5, %xmm2
1042 ; SSE2-NEXT: pxor %xmm4, %xmm1
1043 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1044 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1045 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1046 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1047 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1048 ; SSE2-NEXT: pand %xmm5, %xmm1
1049 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1050 ; SSE2-NEXT: por %xmm1, %xmm3
1051 ; SSE2-NEXT: pxor %xmm4, %xmm0
1052 ; SSE2-NEXT: movdqa %xmm4, %xmm1
1053 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1054 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1055 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1056 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1057 ; SSE2-NEXT: pand %xmm5, %xmm0
1058 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1059 ; SSE2-NEXT: por %xmm0, %xmm1
1060 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1061 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1062 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1063 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1064 ; SSE2-NEXT: cmpb $-1, %al
1065 ; SSE2-NEXT: sete %al
1068 ; AVX1-LABEL: allones_v8i64_sign:
1070 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1071 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1072 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1073 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
1074 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1075 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1076 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1077 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1078 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1079 ; AVX1-NEXT: cmpb $-1, %al
1080 ; AVX1-NEXT: sete %al
1081 ; AVX1-NEXT: vzeroupper
1084 ; AVX2-LABEL: allones_v8i64_sign:
1086 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1087 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1088 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1089 ; AVX2-NEXT: cmpb $-1, %al
1090 ; AVX2-NEXT: sete %al
1091 ; AVX2-NEXT: vzeroupper
1094 ; KNL-LABEL: allones_v8i64_sign:
1096 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1097 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
1098 ; KNL-NEXT: kmovw %k0, %eax
1099 ; KNL-NEXT: cmpb $-1, %al
1100 ; KNL-NEXT: sete %al
1101 ; KNL-NEXT: vzeroupper
1104 ; SKX-LABEL: allones_v8i64_sign:
1106 ; SKX-NEXT: vpmovq2m %zmm0, %k0
1107 ; SKX-NEXT: kortestb %k0, %k0
1108 ; SKX-NEXT: setb %al
1109 ; SKX-NEXT: vzeroupper
1111 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
1112 %tmp1 = bitcast <8 x i1> %tmp to i8
1113 %tmp2 = icmp eq i8 %tmp1, -1
1117 define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
1118 ; SSE2-LABEL: allzeros_v8i64_sign:
1120 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1121 ; SSE2-NEXT: pxor %xmm4, %xmm3
1122 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1123 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1124 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1125 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1126 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1127 ; SSE2-NEXT: pand %xmm6, %xmm3
1128 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1129 ; SSE2-NEXT: por %xmm3, %xmm5
1130 ; SSE2-NEXT: pxor %xmm4, %xmm2
1131 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1132 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1133 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1134 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1135 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1136 ; SSE2-NEXT: pand %xmm6, %xmm7
1137 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1138 ; SSE2-NEXT: por %xmm7, %xmm2
1139 ; SSE2-NEXT: packssdw %xmm5, %xmm2
1140 ; SSE2-NEXT: pxor %xmm4, %xmm1
1141 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1142 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1143 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1144 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1145 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1146 ; SSE2-NEXT: pand %xmm5, %xmm1
1147 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1148 ; SSE2-NEXT: por %xmm1, %xmm3
1149 ; SSE2-NEXT: pxor %xmm4, %xmm0
1150 ; SSE2-NEXT: movdqa %xmm4, %xmm1
1151 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1152 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1153 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1154 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1155 ; SSE2-NEXT: pand %xmm5, %xmm0
1156 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1157 ; SSE2-NEXT: por %xmm0, %xmm1
1158 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1159 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1160 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1161 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1162 ; SSE2-NEXT: testb %al, %al
1163 ; SSE2-NEXT: sete %al
1166 ; AVX1-LABEL: allzeros_v8i64_sign:
1168 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1169 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1170 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1171 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
1172 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1173 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1174 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1175 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1176 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1177 ; AVX1-NEXT: testb %al, %al
1178 ; AVX1-NEXT: sete %al
1179 ; AVX1-NEXT: vzeroupper
1182 ; AVX2-LABEL: allzeros_v8i64_sign:
1184 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1185 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1186 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1187 ; AVX2-NEXT: testb %al, %al
1188 ; AVX2-NEXT: sete %al
1189 ; AVX2-NEXT: vzeroupper
1192 ; KNL-LABEL: allzeros_v8i64_sign:
1194 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1195 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
1196 ; KNL-NEXT: kmovw %k0, %eax
1197 ; KNL-NEXT: testb %al, %al
1198 ; KNL-NEXT: sete %al
1199 ; KNL-NEXT: vzeroupper
1202 ; SKX-LABEL: allzeros_v8i64_sign:
1204 ; SKX-NEXT: vpmovq2m %zmm0, %k0
1205 ; SKX-NEXT: kortestb %k0, %k0
1206 ; SKX-NEXT: sete %al
1207 ; SKX-NEXT: vzeroupper
1209 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
1210 %tmp1 = bitcast <8 x i1> %tmp to i8
1211 %tmp2 = icmp eq i8 %tmp1, 0
1215 define i1 @allones_v16i8_and1(<16 x i8> %arg) {
1216 ; SSE2-LABEL: allones_v16i8_and1:
1218 ; SSE2-NEXT: psllw $7, %xmm0
1219 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1220 ; SSE2-NEXT: cmpw $-1, %ax
1221 ; SSE2-NEXT: sete %al
1224 ; AVX-LABEL: allones_v16i8_and1:
1226 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
1227 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1228 ; AVX-NEXT: cmpw $-1, %ax
1229 ; AVX-NEXT: sete %al
1232 ; KNL-LABEL: allones_v16i8_and1:
1234 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
1235 ; KNL-NEXT: vpmovmskb %xmm0, %eax
1236 ; KNL-NEXT: cmpw $-1, %ax
1237 ; KNL-NEXT: sete %al
1240 ; SKX-LABEL: allones_v16i8_and1:
1242 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
1243 ; SKX-NEXT: kortestw %k0, %k0
1244 ; SKX-NEXT: setb %al
1246 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1247 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1248 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1249 %tmp3 = icmp eq i16 %tmp2, -1
1253 define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
1254 ; SSE2-LABEL: allzeros_v16i8_and1:
1256 ; SSE2-NEXT: psllw $7, %xmm0
1257 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1258 ; SSE2-NEXT: testw %ax, %ax
1259 ; SSE2-NEXT: sete %al
1262 ; AVX-LABEL: allzeros_v16i8_and1:
1264 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
1265 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1266 ; AVX-NEXT: testw %ax, %ax
1267 ; AVX-NEXT: sete %al
1270 ; KNL-LABEL: allzeros_v16i8_and1:
1272 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
1273 ; KNL-NEXT: vpmovmskb %xmm0, %eax
1274 ; KNL-NEXT: testw %ax, %ax
1275 ; KNL-NEXT: sete %al
1278 ; SKX-LABEL: allzeros_v16i8_and1:
1280 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
1281 ; SKX-NEXT: kortestw %k0, %k0
1282 ; SKX-NEXT: sete %al
1284 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1285 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1286 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1287 %tmp3 = icmp eq i16 %tmp2, 0
1291 define i1 @allones_v32i8_and1(<32 x i8> %arg) {
1292 ; SSE2-LABEL: allones_v32i8_and1:
1294 ; SSE2-NEXT: psllw $7, %xmm0
1295 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1296 ; SSE2-NEXT: psllw $7, %xmm1
1297 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1298 ; SSE2-NEXT: shll $16, %ecx
1299 ; SSE2-NEXT: orl %eax, %ecx
1300 ; SSE2-NEXT: cmpl $-1, %ecx
1301 ; SSE2-NEXT: sete %al
1304 ; AVX1-LABEL: allones_v32i8_and1:
1306 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1
1307 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
1308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1309 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1310 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1311 ; AVX1-NEXT: shll $16, %ecx
1312 ; AVX1-NEXT: orl %eax, %ecx
1313 ; AVX1-NEXT: cmpl $-1, %ecx
1314 ; AVX1-NEXT: sete %al
1315 ; AVX1-NEXT: vzeroupper
1318 ; AVX2-LABEL: allones_v32i8_and1:
1320 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1321 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1322 ; AVX2-NEXT: cmpl $-1, %eax
1323 ; AVX2-NEXT: sete %al
1324 ; AVX2-NEXT: vzeroupper
1327 ; KNL-LABEL: allones_v32i8_and1:
1329 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1330 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1331 ; KNL-NEXT: cmpl $-1, %eax
1332 ; KNL-NEXT: sete %al
1333 ; KNL-NEXT: vzeroupper
1336 ; SKX-LABEL: allones_v32i8_and1:
1338 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
1339 ; SKX-NEXT: kortestd %k0, %k0
1340 ; SKX-NEXT: setb %al
1341 ; SKX-NEXT: vzeroupper
1343 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1344 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1345 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1346 %tmp3 = icmp eq i32 %tmp2, -1
1350 define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
1351 ; SSE2-LABEL: allzeros_v32i8_and1:
1353 ; SSE2-NEXT: psllw $7, %xmm0
1354 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1355 ; SSE2-NEXT: psllw $7, %xmm1
1356 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1357 ; SSE2-NEXT: shll $16, %ecx
1358 ; SSE2-NEXT: orl %eax, %ecx
1359 ; SSE2-NEXT: sete %al
1362 ; AVX1-LABEL: allzeros_v32i8_and1:
1364 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1
1365 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
1366 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1367 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1368 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1369 ; AVX1-NEXT: shll $16, %ecx
1370 ; AVX1-NEXT: orl %eax, %ecx
1371 ; AVX1-NEXT: sete %al
1372 ; AVX1-NEXT: vzeroupper
1375 ; AVX2-LABEL: allzeros_v32i8_and1:
1377 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1378 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1379 ; AVX2-NEXT: testl %eax, %eax
1380 ; AVX2-NEXT: sete %al
1381 ; AVX2-NEXT: vzeroupper
1384 ; KNL-LABEL: allzeros_v32i8_and1:
1386 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1387 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1388 ; KNL-NEXT: testl %eax, %eax
1389 ; KNL-NEXT: sete %al
1390 ; KNL-NEXT: vzeroupper
1393 ; SKX-LABEL: allzeros_v32i8_and1:
1395 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
1396 ; SKX-NEXT: kortestd %k0, %k0
1397 ; SKX-NEXT: sete %al
1398 ; SKX-NEXT: vzeroupper
1400 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1401 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1402 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1403 %tmp3 = icmp eq i32 %tmp2, 0
1407 define i1 @allones_v64i8_and1(<64 x i8> %arg) {
1408 ; SSE2-LABEL: allones_v64i8_and1:
1410 ; SSE2-NEXT: psllw $7, %xmm0
1411 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1412 ; SSE2-NEXT: psllw $7, %xmm1
1413 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1414 ; SSE2-NEXT: shll $16, %ecx
1415 ; SSE2-NEXT: orl %eax, %ecx
1416 ; SSE2-NEXT: psllw $7, %xmm2
1417 ; SSE2-NEXT: pmovmskb %xmm2, %eax
1418 ; SSE2-NEXT: psllw $7, %xmm3
1419 ; SSE2-NEXT: pmovmskb %xmm3, %edx
1420 ; SSE2-NEXT: shll $16, %edx
1421 ; SSE2-NEXT: orl %eax, %edx
1422 ; SSE2-NEXT: shlq $32, %rdx
1423 ; SSE2-NEXT: orq %rcx, %rdx
1424 ; SSE2-NEXT: cmpq $-1, %rdx
1425 ; SSE2-NEXT: sete %al
1428 ; AVX1-LABEL: allones_v64i8_and1:
1430 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2
1431 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
1432 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1433 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1434 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1435 ; AVX1-NEXT: shll $16, %ecx
1436 ; AVX1-NEXT: orl %eax, %ecx
1437 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
1438 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1439 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1440 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1441 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1442 ; AVX1-NEXT: shll $16, %edx
1443 ; AVX1-NEXT: orl %eax, %edx
1444 ; AVX1-NEXT: shlq $32, %rdx
1445 ; AVX1-NEXT: orq %rcx, %rdx
1446 ; AVX1-NEXT: cmpq $-1, %rdx
1447 ; AVX1-NEXT: sete %al
1448 ; AVX1-NEXT: vzeroupper
1451 ; AVX2-LABEL: allones_v64i8_and1:
1453 ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1454 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
1455 ; AVX2-NEXT: shlq $32, %rax
1456 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1457 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
1458 ; AVX2-NEXT: orq %rax, %rcx
1459 ; AVX2-NEXT: cmpq $-1, %rcx
1460 ; AVX2-NEXT: sete %al
1461 ; AVX2-NEXT: vzeroupper
1464 ; KNL-LABEL: allones_v64i8_and1:
1466 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1467 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1468 ; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
1469 ; KNL-NEXT: vpmovmskb %ymm1, %eax
1470 ; KNL-NEXT: shlq $32, %rax
1471 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
1472 ; KNL-NEXT: orq %rax, %rcx
1473 ; KNL-NEXT: cmpq $-1, %rcx
1474 ; KNL-NEXT: sete %al
1475 ; KNL-NEXT: vzeroupper
1478 ; SKX-LABEL: allones_v64i8_and1:
1480 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
1481 ; SKX-NEXT: kortestq %k0, %k0
1482 ; SKX-NEXT: setb %al
1483 ; SKX-NEXT: vzeroupper
1485 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1486 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1487 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1488 %tmp3 = icmp eq i64 %tmp2, -1
1492 define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
1493 ; SSE2-LABEL: allzeros_v64i8_and1:
1495 ; SSE2-NEXT: psllw $7, %xmm0
1496 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1497 ; SSE2-NEXT: psllw $7, %xmm1
1498 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1499 ; SSE2-NEXT: shll $16, %ecx
1500 ; SSE2-NEXT: orl %eax, %ecx
1501 ; SSE2-NEXT: psllw $7, %xmm2
1502 ; SSE2-NEXT: pmovmskb %xmm2, %eax
1503 ; SSE2-NEXT: psllw $7, %xmm3
1504 ; SSE2-NEXT: pmovmskb %xmm3, %edx
1505 ; SSE2-NEXT: shll $16, %edx
1506 ; SSE2-NEXT: orl %eax, %edx
1507 ; SSE2-NEXT: shlq $32, %rdx
1508 ; SSE2-NEXT: orq %rcx, %rdx
1509 ; SSE2-NEXT: sete %al
1512 ; AVX1-LABEL: allzeros_v64i8_and1:
1514 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2
1515 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
1516 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1517 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1518 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1519 ; AVX1-NEXT: shll $16, %ecx
1520 ; AVX1-NEXT: orl %eax, %ecx
1521 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
1522 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1523 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1524 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1525 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1526 ; AVX1-NEXT: shll $16, %edx
1527 ; AVX1-NEXT: orl %eax, %edx
1528 ; AVX1-NEXT: shlq $32, %rdx
1529 ; AVX1-NEXT: orq %rcx, %rdx
1530 ; AVX1-NEXT: sete %al
1531 ; AVX1-NEXT: vzeroupper
1534 ; AVX2-LABEL: allzeros_v64i8_and1:
1536 ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1537 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
1538 ; AVX2-NEXT: shlq $32, %rax
1539 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1540 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
1541 ; AVX2-NEXT: orq %rax, %rcx
1542 ; AVX2-NEXT: sete %al
1543 ; AVX2-NEXT: vzeroupper
1546 ; KNL-LABEL: allzeros_v64i8_and1:
1548 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1549 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1550 ; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
1551 ; KNL-NEXT: vpmovmskb %ymm1, %eax
1552 ; KNL-NEXT: shlq $32, %rax
1553 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
1554 ; KNL-NEXT: orq %rax, %rcx
1555 ; KNL-NEXT: sete %al
1556 ; KNL-NEXT: vzeroupper
1559 ; SKX-LABEL: allzeros_v64i8_and1:
1561 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
1562 ; SKX-NEXT: kortestq %k0, %k0
1563 ; SKX-NEXT: sete %al
1564 ; SKX-NEXT: vzeroupper
1566 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1567 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1568 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1569 %tmp3 = icmp eq i64 %tmp2, 0
1573 define i1 @allones_v8i16_and1(<8 x i16> %arg) {
1574 ; SSE2-LABEL: allones_v8i16_and1:
1576 ; SSE2-NEXT: psllw $15, %xmm0
1577 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1578 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1579 ; SSE2-NEXT: cmpb $-1, %al
1580 ; SSE2-NEXT: sete %al
1583 ; AVX-LABEL: allones_v8i16_and1:
1585 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
1586 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1587 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1588 ; AVX-NEXT: cmpb $-1, %al
1589 ; AVX-NEXT: sete %al
1592 ; KNL-LABEL: allones_v8i16_and1:
1594 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1595 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1596 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1597 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1598 ; KNL-NEXT: kmovw %k0, %eax
1599 ; KNL-NEXT: cmpb $-1, %al
1600 ; KNL-NEXT: sete %al
1601 ; KNL-NEXT: vzeroupper
1604 ; SKX-LABEL: allones_v8i16_and1:
1606 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
1607 ; SKX-NEXT: kortestb %k0, %k0
1608 ; SKX-NEXT: setb %al
1610 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1611 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1612 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1613 %tmp3 = icmp eq i8 %tmp2, -1
1617 define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
1618 ; SSE2-LABEL: allzeros_v8i16_and1:
1620 ; SSE2-NEXT: psllw $15, %xmm0
1621 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1622 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1623 ; SSE2-NEXT: testb %al, %al
1624 ; SSE2-NEXT: sete %al
1627 ; AVX-LABEL: allzeros_v8i16_and1:
1629 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
1630 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1631 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1632 ; AVX-NEXT: testb %al, %al
1633 ; AVX-NEXT: sete %al
1636 ; KNL-LABEL: allzeros_v8i16_and1:
1638 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1639 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1640 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1641 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1642 ; KNL-NEXT: kmovw %k0, %eax
1643 ; KNL-NEXT: testb %al, %al
1644 ; KNL-NEXT: sete %al
1645 ; KNL-NEXT: vzeroupper
1648 ; SKX-LABEL: allzeros_v8i16_and1:
1650 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
1651 ; SKX-NEXT: kortestb %k0, %k0
1652 ; SKX-NEXT: sete %al
1654 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1655 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1656 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1657 %tmp3 = icmp eq i8 %tmp2, 0
1661 define i1 @allones_v16i16_and1(<16 x i16> %arg) {
1662 ; SSE2-LABEL: allones_v16i16_and1:
1664 ; SSE2-NEXT: psllw $15, %xmm1
1665 ; SSE2-NEXT: psllw $15, %xmm0
1666 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1667 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1668 ; SSE2-NEXT: cmpw $-1, %ax
1669 ; SSE2-NEXT: sete %al
1672 ; AVX1-LABEL: allones_v16i16_and1:
1674 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1675 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1676 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1677 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1678 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1679 ; AVX1-NEXT: cmpw $-1, %ax
1680 ; AVX1-NEXT: sete %al
1681 ; AVX1-NEXT: vzeroupper
1684 ; AVX2-LABEL: allones_v16i16_and1:
1686 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1687 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
1688 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1689 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1690 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1691 ; AVX2-NEXT: cmpw $-1, %ax
1692 ; AVX2-NEXT: sete %al
1693 ; AVX2-NEXT: vzeroupper
1696 ; KNL-LABEL: allones_v16i16_and1:
1698 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1699 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1700 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1701 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1702 ; KNL-NEXT: kortestw %k0, %k0
1703 ; KNL-NEXT: setb %al
1704 ; KNL-NEXT: vzeroupper
1707 ; SKX-LABEL: allones_v16i16_and1:
1709 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
1710 ; SKX-NEXT: kortestw %k0, %k0
1711 ; SKX-NEXT: setb %al
1712 ; SKX-NEXT: vzeroupper
1714 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1715 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1716 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1717 %tmp3 = icmp eq i16 %tmp2, -1
1721 define i1 @allones_v32i16_and1(<32 x i16> %arg) {
1722 ; SSE2-LABEL: allones_v32i16_and1:
1724 ; SSE2-NEXT: psllw $15, %xmm1
1725 ; SSE2-NEXT: psllw $15, %xmm0
1726 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1727 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1728 ; SSE2-NEXT: psllw $15, %xmm3
1729 ; SSE2-NEXT: psllw $15, %xmm2
1730 ; SSE2-NEXT: packsswb %xmm3, %xmm2
1731 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
1732 ; SSE2-NEXT: shll $16, %ecx
1733 ; SSE2-NEXT: orl %eax, %ecx
1734 ; SSE2-NEXT: cmpl $-1, %ecx
1735 ; SSE2-NEXT: sete %al
1738 ; AVX1-LABEL: allones_v32i16_and1:
1740 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1741 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1742 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1743 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1744 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1745 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1746 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1747 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1748 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
1749 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1750 ; AVX1-NEXT: shll $16, %ecx
1751 ; AVX1-NEXT: orl %eax, %ecx
1752 ; AVX1-NEXT: cmpl $-1, %ecx
1753 ; AVX1-NEXT: sete %al
1754 ; AVX1-NEXT: vzeroupper
1757 ; AVX2-LABEL: allones_v32i16_and1:
1759 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1760 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1761 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1762 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1763 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1764 ; AVX2-NEXT: cmpl $-1, %eax
1765 ; AVX2-NEXT: sete %al
1766 ; AVX2-NEXT: vzeroupper
1769 ; KNL-LABEL: allones_v32i16_and1:
1771 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1772 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1773 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1774 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1775 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1776 ; KNL-NEXT: kmovw %k0, %eax
1777 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm0
1778 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1779 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1780 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1781 ; KNL-NEXT: kmovw %k0, %ecx
1782 ; KNL-NEXT: shll $16, %ecx
1783 ; KNL-NEXT: orl %eax, %ecx
1784 ; KNL-NEXT: cmpl $-1, %ecx
1785 ; KNL-NEXT: sete %al
1786 ; KNL-NEXT: vzeroupper
1789 ; SKX-LABEL: allones_v32i16_and1:
1791 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
1792 ; SKX-NEXT: kortestd %k0, %k0
1793 ; SKX-NEXT: setb %al
1794 ; SKX-NEXT: vzeroupper
1796 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1797 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1798 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1799 %tmp3 = icmp eq i32 %tmp2, -1
1803 define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
1804 ; SSE2-LABEL: allzeros_v32i16_and1:
1806 ; SSE2-NEXT: psllw $15, %xmm1
1807 ; SSE2-NEXT: psllw $15, %xmm0
1808 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1809 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1810 ; SSE2-NEXT: psllw $15, %xmm3
1811 ; SSE2-NEXT: psllw $15, %xmm2
1812 ; SSE2-NEXT: packsswb %xmm3, %xmm2
1813 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
1814 ; SSE2-NEXT: shll $16, %ecx
1815 ; SSE2-NEXT: orl %eax, %ecx
1816 ; SSE2-NEXT: sete %al
1819 ; AVX1-LABEL: allzeros_v32i16_and1:
1821 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1822 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1823 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1824 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1825 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1826 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1827 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1828 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1829 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
1830 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1831 ; AVX1-NEXT: shll $16, %ecx
1832 ; AVX1-NEXT: orl %eax, %ecx
1833 ; AVX1-NEXT: sete %al
1834 ; AVX1-NEXT: vzeroupper
1837 ; AVX2-LABEL: allzeros_v32i16_and1:
1839 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1840 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1841 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1842 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1843 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1844 ; AVX2-NEXT: testl %eax, %eax
1845 ; AVX2-NEXT: sete %al
1846 ; AVX2-NEXT: vzeroupper
1849 ; KNL-LABEL: allzeros_v32i16_and1:
1851 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1852 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1853 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1854 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1855 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1856 ; KNL-NEXT: kmovw %k0, %eax
1857 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm0
1858 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1859 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1860 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1861 ; KNL-NEXT: kmovw %k0, %ecx
1862 ; KNL-NEXT: shll $16, %ecx
1863 ; KNL-NEXT: orl %eax, %ecx
1864 ; KNL-NEXT: sete %al
1865 ; KNL-NEXT: vzeroupper
1868 ; SKX-LABEL: allzeros_v32i16_and1:
1870 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
1871 ; SKX-NEXT: kortestd %k0, %k0
1872 ; SKX-NEXT: sete %al
1873 ; SKX-NEXT: vzeroupper
1875 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1876 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1877 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1878 %tmp3 = icmp eq i32 %tmp2, 0
1882 define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
1883 ; SSE2-LABEL: allzeros_v16i16_and1:
1885 ; SSE2-NEXT: psllw $15, %xmm1
1886 ; SSE2-NEXT: psllw $15, %xmm0
1887 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1888 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1889 ; SSE2-NEXT: testw %ax, %ax
1890 ; SSE2-NEXT: sete %al
1893 ; AVX1-LABEL: allzeros_v16i16_and1:
1895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1896 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1897 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1898 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1899 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1900 ; AVX1-NEXT: testw %ax, %ax
1901 ; AVX1-NEXT: sete %al
1902 ; AVX1-NEXT: vzeroupper
1905 ; AVX2-LABEL: allzeros_v16i16_and1:
1907 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1908 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
1909 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1910 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1911 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1912 ; AVX2-NEXT: testw %ax, %ax
1913 ; AVX2-NEXT: sete %al
1914 ; AVX2-NEXT: vzeroupper
1917 ; KNL-LABEL: allzeros_v16i16_and1:
1919 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1920 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1921 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1922 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1923 ; KNL-NEXT: kortestw %k0, %k0
1924 ; KNL-NEXT: sete %al
1925 ; KNL-NEXT: vzeroupper
1928 ; SKX-LABEL: allzeros_v16i16_and1:
1930 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
1931 ; SKX-NEXT: kortestw %k0, %k0
1932 ; SKX-NEXT: sete %al
1933 ; SKX-NEXT: vzeroupper
1935 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1936 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1937 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1938 %tmp3 = icmp eq i16 %tmp2, 0
1942 define i1 @allones_v4i32_and1(<4 x i32> %arg) {
1943 ; SSE2-LABEL: allones_v4i32_and1:
1945 ; SSE2-NEXT: pslld $31, %xmm0
1946 ; SSE2-NEXT: movmskps %xmm0, %eax
1947 ; SSE2-NEXT: cmpb $15, %al
1948 ; SSE2-NEXT: sete %al
1951 ; AVX-LABEL: allones_v4i32_and1:
1953 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
1954 ; AVX-NEXT: vmovmskps %xmm0, %eax
1955 ; AVX-NEXT: cmpb $15, %al
1956 ; AVX-NEXT: sete %al
1959 ; KNL-LABEL: allones_v4i32_and1:
1961 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1962 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
1963 ; KNL-NEXT: kmovw %k0, %eax
1964 ; KNL-NEXT: andb $15, %al
1965 ; KNL-NEXT: cmpb $15, %al
1966 ; KNL-NEXT: sete %al
1967 ; KNL-NEXT: vzeroupper
1970 ; SKX-LABEL: allones_v4i32_and1:
1972 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
1973 ; SKX-NEXT: kmovd %k0, %eax
1974 ; SKX-NEXT: andb $15, %al
1975 ; SKX-NEXT: cmpb $15, %al
1976 ; SKX-NEXT: sete %al
1978 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1979 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1980 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1981 %tmp3 = icmp eq i4 %tmp2, -1
1985 define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
1986 ; SSE2-LABEL: allzeros_v4i32_and1:
1988 ; SSE2-NEXT: pslld $31, %xmm0
1989 ; SSE2-NEXT: movmskps %xmm0, %eax
1990 ; SSE2-NEXT: testb %al, %al
1991 ; SSE2-NEXT: sete %al
1994 ; AVX-LABEL: allzeros_v4i32_and1:
1996 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
1997 ; AVX-NEXT: vmovmskps %xmm0, %eax
1998 ; AVX-NEXT: testb %al, %al
1999 ; AVX-NEXT: sete %al
2002 ; KNL-LABEL: allzeros_v4i32_and1:
2004 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2005 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2006 ; KNL-NEXT: kmovw %k0, %eax
2007 ; KNL-NEXT: testb $15, %al
2008 ; KNL-NEXT: sete %al
2009 ; KNL-NEXT: vzeroupper
2012 ; SKX-LABEL: allzeros_v4i32_and1:
2014 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
2015 ; SKX-NEXT: kmovd %k0, %eax
2016 ; SKX-NEXT: testb $15, %al
2017 ; SKX-NEXT: sete %al
2019 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
2020 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
2021 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2022 %tmp3 = icmp eq i4 %tmp2, 0
2026 define i1 @allones_v8i32_and1(<8 x i32> %arg) {
2027 ; SSE2-LABEL: allones_v8i32_and1:
2029 ; SSE2-NEXT: pslld $31, %xmm1
2030 ; SSE2-NEXT: pslld $31, %xmm0
2031 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2032 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2033 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2034 ; SSE2-NEXT: cmpb $-1, %al
2035 ; SSE2-NEXT: sete %al
2038 ; AVX1-LABEL: allones_v8i32_and1:
2040 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
2041 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2042 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2043 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2044 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2045 ; AVX1-NEXT: cmpb $-1, %al
2046 ; AVX1-NEXT: sete %al
2047 ; AVX1-NEXT: vzeroupper
2050 ; AVX2-LABEL: allones_v8i32_and1:
2052 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2053 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2054 ; AVX2-NEXT: cmpb $-1, %al
2055 ; AVX2-NEXT: sete %al
2056 ; AVX2-NEXT: vzeroupper
2059 ; KNL-LABEL: allones_v8i32_and1:
2061 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2062 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2063 ; KNL-NEXT: kmovw %k0, %eax
2064 ; KNL-NEXT: cmpb $-1, %al
2065 ; KNL-NEXT: sete %al
2066 ; KNL-NEXT: vzeroupper
2069 ; SKX-LABEL: allones_v8i32_and1:
2071 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
2072 ; SKX-NEXT: kortestb %k0, %k0
2073 ; SKX-NEXT: setb %al
2074 ; SKX-NEXT: vzeroupper
2076 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2077 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
2078 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2079 %tmp3 = icmp eq i8 %tmp2, -1
2083 define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
2084 ; SSE2-LABEL: allzeros_v8i32_and1:
2086 ; SSE2-NEXT: pslld $31, %xmm1
2087 ; SSE2-NEXT: pslld $31, %xmm0
2088 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2089 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2090 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2091 ; SSE2-NEXT: testb %al, %al
2092 ; SSE2-NEXT: sete %al
2095 ; AVX1-LABEL: allzeros_v8i32_and1:
2097 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
2098 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2099 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2100 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2101 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2102 ; AVX1-NEXT: testb %al, %al
2103 ; AVX1-NEXT: sete %al
2104 ; AVX1-NEXT: vzeroupper
2107 ; AVX2-LABEL: allzeros_v8i32_and1:
2109 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2110 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2111 ; AVX2-NEXT: testb %al, %al
2112 ; AVX2-NEXT: sete %al
2113 ; AVX2-NEXT: vzeroupper
2116 ; KNL-LABEL: allzeros_v8i32_and1:
2118 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2119 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2120 ; KNL-NEXT: kmovw %k0, %eax
2121 ; KNL-NEXT: testb %al, %al
2122 ; KNL-NEXT: sete %al
2123 ; KNL-NEXT: vzeroupper
2126 ; SKX-LABEL: allzeros_v8i32_and1:
2128 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
2129 ; SKX-NEXT: kortestb %k0, %k0
2130 ; SKX-NEXT: sete %al
2131 ; SKX-NEXT: vzeroupper
2133 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2134 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
2135 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2136 %tmp3 = icmp eq i8 %tmp2, 0
2140 define i1 @allones_v16i32_and1(<16 x i32> %arg) {
2141 ; SSE2-LABEL: allones_v16i32_and1:
2143 ; SSE2-NEXT: pslld $31, %xmm3
2144 ; SSE2-NEXT: pslld $31, %xmm2
2145 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2146 ; SSE2-NEXT: pslld $31, %xmm1
2147 ; SSE2-NEXT: pslld $31, %xmm0
2148 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2149 ; SSE2-NEXT: packsswb %xmm2, %xmm0
2150 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2151 ; SSE2-NEXT: cmpw $-1, %ax
2152 ; SSE2-NEXT: sete %al
2155 ; AVX1-LABEL: allones_v16i32_and1:
2157 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2158 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2159 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
2160 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2161 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2162 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2163 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2164 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2165 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2166 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2167 ; AVX1-NEXT: cmpw $-1, %ax
2168 ; AVX1-NEXT: sete %al
2169 ; AVX1-NEXT: vzeroupper
2172 ; AVX2-LABEL: allones_v16i32_and1:
2174 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
2175 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
2176 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2177 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
2178 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2179 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2180 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2181 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2182 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2183 ; AVX2-NEXT: cmpw $-1, %ax
2184 ; AVX2-NEXT: sete %al
2185 ; AVX2-NEXT: vzeroupper
2188 ; KNL-LABEL: allones_v16i32_and1:
2190 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2191 ; KNL-NEXT: kortestw %k0, %k0
2192 ; KNL-NEXT: setb %al
2193 ; KNL-NEXT: vzeroupper
2196 ; SKX-LABEL: allones_v16i32_and1:
2198 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2199 ; SKX-NEXT: kortestw %k0, %k0
2200 ; SKX-NEXT: setb %al
2201 ; SKX-NEXT: vzeroupper
2203 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2204 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
2205 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2206 %tmp3 = icmp eq i16 %tmp2, -1
2210 define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
2211 ; SSE2-LABEL: allzeros_v16i32_and1:
2213 ; SSE2-NEXT: pslld $31, %xmm3
2214 ; SSE2-NEXT: pslld $31, %xmm2
2215 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2216 ; SSE2-NEXT: pslld $31, %xmm1
2217 ; SSE2-NEXT: pslld $31, %xmm0
2218 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2219 ; SSE2-NEXT: packsswb %xmm2, %xmm0
2220 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2221 ; SSE2-NEXT: testw %ax, %ax
2222 ; SSE2-NEXT: sete %al
2225 ; AVX1-LABEL: allzeros_v16i32_and1:
2227 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2228 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2229 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
2230 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2231 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2232 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2233 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2234 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2235 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2236 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2237 ; AVX1-NEXT: testw %ax, %ax
2238 ; AVX1-NEXT: sete %al
2239 ; AVX1-NEXT: vzeroupper
2242 ; AVX2-LABEL: allzeros_v16i32_and1:
2244 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
2245 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
2246 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2247 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
2248 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2249 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2250 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2251 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2252 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2253 ; AVX2-NEXT: testw %ax, %ax
2254 ; AVX2-NEXT: sete %al
2255 ; AVX2-NEXT: vzeroupper
2258 ; KNL-LABEL: allzeros_v16i32_and1:
2260 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2261 ; KNL-NEXT: kortestw %k0, %k0
2262 ; KNL-NEXT: sete %al
2263 ; KNL-NEXT: vzeroupper
2266 ; SKX-LABEL: allzeros_v16i32_and1:
2268 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2269 ; SKX-NEXT: kortestw %k0, %k0
2270 ; SKX-NEXT: sete %al
2271 ; SKX-NEXT: vzeroupper
2273 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2274 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
2275 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2276 %tmp3 = icmp eq i16 %tmp2, 0
2280 define i1 @allones_v2i64_and1(<2 x i64> %arg) {
2281 ; SSE2-LABEL: allones_v2i64_and1:
2283 ; SSE2-NEXT: psllq $63, %xmm0
2284 ; SSE2-NEXT: movmskpd %xmm0, %eax
2285 ; SSE2-NEXT: cmpb $3, %al
2286 ; SSE2-NEXT: sete %al
2289 ; AVX-LABEL: allones_v2i64_and1:
2291 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
2292 ; AVX-NEXT: vmovmskpd %xmm0, %eax
2293 ; AVX-NEXT: cmpb $3, %al
2294 ; AVX-NEXT: sete %al
2297 ; KNL-LABEL: allones_v2i64_and1:
2299 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2300 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
2301 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
2302 ; KNL-NEXT: kmovw %k0, %eax
2303 ; KNL-NEXT: andb $3, %al
2304 ; KNL-NEXT: cmpb $3, %al
2305 ; KNL-NEXT: sete %al
2306 ; KNL-NEXT: vzeroupper
2309 ; SKX-LABEL: allones_v2i64_and1:
2311 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
2312 ; SKX-NEXT: kmovd %k0, %eax
2313 ; SKX-NEXT: andb $3, %al
2314 ; SKX-NEXT: cmpb $3, %al
2315 ; SKX-NEXT: sete %al
2317 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2318 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2319 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2320 %tmp3 = icmp eq i2 %tmp2, -1
2324 define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
2325 ; SSE2-LABEL: allzeros_v2i64_and1:
2327 ; SSE2-NEXT: psllq $63, %xmm0
2328 ; SSE2-NEXT: movmskpd %xmm0, %eax
2329 ; SSE2-NEXT: testb %al, %al
2330 ; SSE2-NEXT: sete %al
2333 ; AVX-LABEL: allzeros_v2i64_and1:
2335 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
2336 ; AVX-NEXT: vmovmskpd %xmm0, %eax
2337 ; AVX-NEXT: testb %al, %al
2338 ; AVX-NEXT: sete %al
2341 ; KNL-LABEL: allzeros_v2i64_and1:
2343 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2344 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
2345 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
2346 ; KNL-NEXT: kmovw %k0, %eax
2347 ; KNL-NEXT: testb $3, %al
2348 ; KNL-NEXT: sete %al
2349 ; KNL-NEXT: vzeroupper
2352 ; SKX-LABEL: allzeros_v2i64_and1:
2354 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
2355 ; SKX-NEXT: kmovd %k0, %eax
2356 ; SKX-NEXT: testb $3, %al
2357 ; SKX-NEXT: sete %al
2359 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2360 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2361 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2362 %tmp3 = icmp eq i2 %tmp2, 0
2366 define i1 @allones_v4i64_and1(<4 x i64> %arg) {
2367 ; SSE2-LABEL: allones_v4i64_and1:
2369 ; SSE2-NEXT: psllq $63, %xmm1
2370 ; SSE2-NEXT: psllq $63, %xmm0
2371 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2372 ; SSE2-NEXT: movmskps %xmm0, %eax
2373 ; SSE2-NEXT: cmpb $15, %al
2374 ; SSE2-NEXT: sete %al
2377 ; AVX1-LABEL: allones_v4i64_and1:
2379 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2380 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2381 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2382 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2383 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2384 ; AVX1-NEXT: cmpb $15, %al
2385 ; AVX1-NEXT: sete %al
2386 ; AVX1-NEXT: vzeroupper
2389 ; AVX2-LABEL: allones_v4i64_and1:
2391 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2392 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2393 ; AVX2-NEXT: cmpb $15, %al
2394 ; AVX2-NEXT: sete %al
2395 ; AVX2-NEXT: vzeroupper
2398 ; KNL-LABEL: allones_v4i64_and1:
2400 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2401 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2402 ; KNL-NEXT: kmovw %k0, %eax
2403 ; KNL-NEXT: andb $15, %al
2404 ; KNL-NEXT: cmpb $15, %al
2405 ; KNL-NEXT: sete %al
2406 ; KNL-NEXT: vzeroupper
2409 ; SKX-LABEL: allones_v4i64_and1:
2411 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
2412 ; SKX-NEXT: kmovd %k0, %eax
2413 ; SKX-NEXT: andb $15, %al
2414 ; SKX-NEXT: cmpb $15, %al
2415 ; SKX-NEXT: sete %al
2416 ; SKX-NEXT: vzeroupper
2418 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2419 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2420 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2421 %tmp3 = icmp eq i4 %tmp2, -1
2425 define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
2426 ; SSE2-LABEL: allzeros_v4i64_and1:
2428 ; SSE2-NEXT: psllq $63, %xmm1
2429 ; SSE2-NEXT: psllq $63, %xmm0
2430 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2431 ; SSE2-NEXT: movmskps %xmm0, %eax
2432 ; SSE2-NEXT: testb %al, %al
2433 ; SSE2-NEXT: sete %al
2436 ; AVX1-LABEL: allzeros_v4i64_and1:
2438 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2439 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2440 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2441 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2442 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2443 ; AVX1-NEXT: testb %al, %al
2444 ; AVX1-NEXT: sete %al
2445 ; AVX1-NEXT: vzeroupper
2448 ; AVX2-LABEL: allzeros_v4i64_and1:
2450 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2451 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2452 ; AVX2-NEXT: testb %al, %al
2453 ; AVX2-NEXT: sete %al
2454 ; AVX2-NEXT: vzeroupper
2457 ; KNL-LABEL: allzeros_v4i64_and1:
2459 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2460 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2461 ; KNL-NEXT: kmovw %k0, %eax
2462 ; KNL-NEXT: testb $15, %al
2463 ; KNL-NEXT: sete %al
2464 ; KNL-NEXT: vzeroupper
2467 ; SKX-LABEL: allzeros_v4i64_and1:
2469 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
2470 ; SKX-NEXT: kmovd %k0, %eax
2471 ; SKX-NEXT: testb $15, %al
2472 ; SKX-NEXT: sete %al
2473 ; SKX-NEXT: vzeroupper
2475 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2476 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2477 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2478 %tmp3 = icmp eq i4 %tmp2, 0
2482 define i1 @allones_v8i64_and1(<8 x i64> %arg) {
2483 ; SSE2-LABEL: allones_v8i64_and1:
2485 ; SSE2-NEXT: psllq $63, %xmm3
2486 ; SSE2-NEXT: psllq $63, %xmm2
2487 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2488 ; SSE2-NEXT: psllq $63, %xmm1
2489 ; SSE2-NEXT: psllq $63, %xmm0
2490 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2491 ; SSE2-NEXT: packssdw %xmm2, %xmm0
2492 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2493 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2494 ; SSE2-NEXT: cmpb $-1, %al
2495 ; SSE2-NEXT: sete %al
2498 ; AVX1-LABEL: allones_v8i64_and1:
2500 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2501 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2502 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2503 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2504 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2505 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2506 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2507 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2508 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2509 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2510 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2511 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2512 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2513 ; AVX1-NEXT: cmpb $-1, %al
2514 ; AVX1-NEXT: sete %al
2515 ; AVX1-NEXT: vzeroupper
2518 ; AVX2-LABEL: allones_v8i64_and1:
2520 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2521 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2522 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2523 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2524 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2525 ; AVX2-NEXT: cmpb $-1, %al
2526 ; AVX2-NEXT: sete %al
2527 ; AVX2-NEXT: vzeroupper
2530 ; KNL-LABEL: allones_v8i64_and1:
2532 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2533 ; KNL-NEXT: kmovw %k0, %eax
2534 ; KNL-NEXT: cmpb $-1, %al
2535 ; KNL-NEXT: sete %al
2536 ; KNL-NEXT: vzeroupper
2539 ; SKX-LABEL: allones_v8i64_and1:
2541 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2542 ; SKX-NEXT: kortestb %k0, %k0
2543 ; SKX-NEXT: setb %al
2544 ; SKX-NEXT: vzeroupper
2546 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2547 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2548 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2549 %tmp3 = icmp eq i8 %tmp2, -1
2553 define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
2554 ; SSE2-LABEL: allzeros_v8i64_and1:
2556 ; SSE2-NEXT: psllq $63, %xmm3
2557 ; SSE2-NEXT: psllq $63, %xmm2
2558 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2559 ; SSE2-NEXT: psllq $63, %xmm1
2560 ; SSE2-NEXT: psllq $63, %xmm0
2561 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2562 ; SSE2-NEXT: packssdw %xmm2, %xmm0
2563 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2564 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2565 ; SSE2-NEXT: testb %al, %al
2566 ; SSE2-NEXT: sete %al
2569 ; AVX1-LABEL: allzeros_v8i64_and1:
2571 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2572 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2573 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2574 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2575 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2576 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2577 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2578 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2579 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2580 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2581 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2582 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2583 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2584 ; AVX1-NEXT: testb %al, %al
2585 ; AVX1-NEXT: sete %al
2586 ; AVX1-NEXT: vzeroupper
2589 ; AVX2-LABEL: allzeros_v8i64_and1:
2591 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2592 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2593 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2594 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2595 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2596 ; AVX2-NEXT: testb %al, %al
2597 ; AVX2-NEXT: sete %al
2598 ; AVX2-NEXT: vzeroupper
2601 ; KNL-LABEL: allzeros_v8i64_and1:
2603 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2604 ; KNL-NEXT: kmovw %k0, %eax
2605 ; KNL-NEXT: testb %al, %al
2606 ; KNL-NEXT: sete %al
2607 ; KNL-NEXT: vzeroupper
2610 ; SKX-LABEL: allzeros_v8i64_and1:
2612 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2613 ; SKX-NEXT: kortestb %k0, %k0
2614 ; SKX-NEXT: sete %al
2615 ; SKX-NEXT: vzeroupper
2617 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2618 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2619 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2620 %tmp3 = icmp eq i8 %tmp2, 0
2624 define i1 @allones_v16i8_and4(<16 x i8> %arg) {
2625 ; SSE2-LABEL: allones_v16i8_and4:
2627 ; SSE2-NEXT: psllw $5, %xmm0
2628 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2629 ; SSE2-NEXT: cmpw $-1, %ax
2630 ; SSE2-NEXT: sete %al
2633 ; AVX-LABEL: allones_v16i8_and4:
2635 ; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
2636 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2637 ; AVX-NEXT: cmpw $-1, %ax
2638 ; AVX-NEXT: sete %al
2641 ; KNL-LABEL: allones_v16i8_and4:
2643 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2644 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2645 ; KNL-NEXT: cmpw $-1, %ax
2646 ; KNL-NEXT: sete %al
2649 ; SKX-LABEL: allones_v16i8_and4:
2651 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
2652 ; SKX-NEXT: kortestw %k0, %k0
2653 ; SKX-NEXT: setb %al
2655 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2656 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2657 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2658 %tmp3 = icmp eq i16 %tmp2, -1
2662 define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
2663 ; SSE2-LABEL: allzeros_v16i8_and4:
2665 ; SSE2-NEXT: psllw $5, %xmm0
2666 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2667 ; SSE2-NEXT: testw %ax, %ax
2668 ; SSE2-NEXT: sete %al
2671 ; AVX-LABEL: allzeros_v16i8_and4:
2673 ; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
2674 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2675 ; AVX-NEXT: testw %ax, %ax
2676 ; AVX-NEXT: sete %al
2679 ; KNL-LABEL: allzeros_v16i8_and4:
2681 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2682 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2683 ; KNL-NEXT: testw %ax, %ax
2684 ; KNL-NEXT: sete %al
2687 ; SKX-LABEL: allzeros_v16i8_and4:
2689 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
2690 ; SKX-NEXT: kortestw %k0, %k0
2691 ; SKX-NEXT: sete %al
2693 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2694 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2695 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2696 %tmp3 = icmp eq i16 %tmp2, 0
2700 define i1 @allones_v32i8_and4(<32 x i8> %arg) {
2701 ; SSE2-LABEL: allones_v32i8_and4:
2703 ; SSE2-NEXT: psllw $5, %xmm0
2704 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2705 ; SSE2-NEXT: psllw $5, %xmm1
2706 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2707 ; SSE2-NEXT: shll $16, %ecx
2708 ; SSE2-NEXT: orl %eax, %ecx
2709 ; SSE2-NEXT: cmpl $-1, %ecx
2710 ; SSE2-NEXT: sete %al
2713 ; AVX1-LABEL: allones_v32i8_and4:
2715 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1
2716 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
2717 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2718 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2719 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2720 ; AVX1-NEXT: shll $16, %ecx
2721 ; AVX1-NEXT: orl %eax, %ecx
2722 ; AVX1-NEXT: cmpl $-1, %ecx
2723 ; AVX1-NEXT: sete %al
2724 ; AVX1-NEXT: vzeroupper
2727 ; AVX2-LABEL: allones_v32i8_and4:
2729 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2730 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2731 ; AVX2-NEXT: cmpl $-1, %eax
2732 ; AVX2-NEXT: sete %al
2733 ; AVX2-NEXT: vzeroupper
2736 ; KNL-LABEL: allones_v32i8_and4:
2738 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2739 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2740 ; KNL-NEXT: cmpl $-1, %eax
2741 ; KNL-NEXT: sete %al
2742 ; KNL-NEXT: vzeroupper
2745 ; SKX-LABEL: allones_v32i8_and4:
2747 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
2748 ; SKX-NEXT: kortestd %k0, %k0
2749 ; SKX-NEXT: setb %al
2750 ; SKX-NEXT: vzeroupper
2752 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2753 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2754 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2755 %tmp3 = icmp eq i32 %tmp2, -1
2759 define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
2760 ; SSE2-LABEL: allzeros_v32i8_and4:
2762 ; SSE2-NEXT: psllw $5, %xmm0
2763 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2764 ; SSE2-NEXT: psllw $5, %xmm1
2765 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2766 ; SSE2-NEXT: shll $16, %ecx
2767 ; SSE2-NEXT: orl %eax, %ecx
2768 ; SSE2-NEXT: sete %al
2771 ; AVX1-LABEL: allzeros_v32i8_and4:
2773 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1
2774 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
2775 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2776 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2777 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2778 ; AVX1-NEXT: shll $16, %ecx
2779 ; AVX1-NEXT: orl %eax, %ecx
2780 ; AVX1-NEXT: sete %al
2781 ; AVX1-NEXT: vzeroupper
2784 ; AVX2-LABEL: allzeros_v32i8_and4:
2786 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2787 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2788 ; AVX2-NEXT: testl %eax, %eax
2789 ; AVX2-NEXT: sete %al
2790 ; AVX2-NEXT: vzeroupper
2793 ; KNL-LABEL: allzeros_v32i8_and4:
2795 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2796 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2797 ; KNL-NEXT: testl %eax, %eax
2798 ; KNL-NEXT: sete %al
2799 ; KNL-NEXT: vzeroupper
2802 ; SKX-LABEL: allzeros_v32i8_and4:
2804 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
2805 ; SKX-NEXT: kortestd %k0, %k0
2806 ; SKX-NEXT: sete %al
2807 ; SKX-NEXT: vzeroupper
2809 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2810 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2811 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2812 %tmp3 = icmp eq i32 %tmp2, 0
2816 define i1 @allones_v64i8_and4(<64 x i8> %arg) {
2817 ; SSE2-LABEL: allones_v64i8_and4:
2819 ; SSE2-NEXT: psllw $5, %xmm0
2820 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2821 ; SSE2-NEXT: psllw $5, %xmm1
2822 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2823 ; SSE2-NEXT: shll $16, %ecx
2824 ; SSE2-NEXT: orl %eax, %ecx
2825 ; SSE2-NEXT: psllw $5, %xmm2
2826 ; SSE2-NEXT: pmovmskb %xmm2, %eax
2827 ; SSE2-NEXT: psllw $5, %xmm3
2828 ; SSE2-NEXT: pmovmskb %xmm3, %edx
2829 ; SSE2-NEXT: shll $16, %edx
2830 ; SSE2-NEXT: orl %eax, %edx
2831 ; SSE2-NEXT: shlq $32, %rdx
2832 ; SSE2-NEXT: orq %rcx, %rdx
2833 ; SSE2-NEXT: cmpq $-1, %rdx
2834 ; SSE2-NEXT: sete %al
2837 ; AVX1-LABEL: allones_v64i8_and4:
2839 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2
2840 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
2841 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2842 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2843 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2844 ; AVX1-NEXT: shll $16, %ecx
2845 ; AVX1-NEXT: orl %eax, %ecx
2846 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0
2847 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2848 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
2849 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2850 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
2851 ; AVX1-NEXT: shll $16, %edx
2852 ; AVX1-NEXT: orl %eax, %edx
2853 ; AVX1-NEXT: shlq $32, %rdx
2854 ; AVX1-NEXT: orq %rcx, %rdx
2855 ; AVX1-NEXT: cmpq $-1, %rdx
2856 ; AVX1-NEXT: sete %al
2857 ; AVX1-NEXT: vzeroupper
2860 ; AVX2-LABEL: allones_v64i8_and4:
2862 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
2863 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
2864 ; AVX2-NEXT: shlq $32, %rax
2865 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2866 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
2867 ; AVX2-NEXT: orq %rax, %rcx
2868 ; AVX2-NEXT: cmpq $-1, %rcx
2869 ; AVX2-NEXT: sete %al
2870 ; AVX2-NEXT: vzeroupper
2873 ; KNL-LABEL: allones_v64i8_and4:
2875 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2876 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2877 ; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
2878 ; KNL-NEXT: vpmovmskb %ymm1, %eax
2879 ; KNL-NEXT: shlq $32, %rax
2880 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
2881 ; KNL-NEXT: orq %rax, %rcx
2882 ; KNL-NEXT: cmpq $-1, %rcx
2883 ; KNL-NEXT: sete %al
2884 ; KNL-NEXT: vzeroupper
2887 ; SKX-LABEL: allones_v64i8_and4:
2889 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
2890 ; SKX-NEXT: kortestq %k0, %k0
2891 ; SKX-NEXT: setb %al
2892 ; SKX-NEXT: vzeroupper
2894 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2895 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2896 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2897 %tmp3 = icmp eq i64 %tmp2, -1
2901 define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
2902 ; SSE2-LABEL: allzeros_v64i8_and4:
2904 ; SSE2-NEXT: psllw $5, %xmm0
2905 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2906 ; SSE2-NEXT: psllw $5, %xmm1
2907 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2908 ; SSE2-NEXT: shll $16, %ecx
2909 ; SSE2-NEXT: orl %eax, %ecx
2910 ; SSE2-NEXT: psllw $5, %xmm2
2911 ; SSE2-NEXT: pmovmskb %xmm2, %eax
2912 ; SSE2-NEXT: psllw $5, %xmm3
2913 ; SSE2-NEXT: pmovmskb %xmm3, %edx
2914 ; SSE2-NEXT: shll $16, %edx
2915 ; SSE2-NEXT: orl %eax, %edx
2916 ; SSE2-NEXT: shlq $32, %rdx
2917 ; SSE2-NEXT: orq %rcx, %rdx
2918 ; SSE2-NEXT: sete %al
2921 ; AVX1-LABEL: allzeros_v64i8_and4:
2923 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2
2924 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
2925 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2926 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2927 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2928 ; AVX1-NEXT: shll $16, %ecx
2929 ; AVX1-NEXT: orl %eax, %ecx
2930 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0
2931 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2932 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
2933 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2934 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
2935 ; AVX1-NEXT: shll $16, %edx
2936 ; AVX1-NEXT: orl %eax, %edx
2937 ; AVX1-NEXT: shlq $32, %rdx
2938 ; AVX1-NEXT: orq %rcx, %rdx
2939 ; AVX1-NEXT: sete %al
2940 ; AVX1-NEXT: vzeroupper
2943 ; AVX2-LABEL: allzeros_v64i8_and4:
2945 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
2946 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
2947 ; AVX2-NEXT: shlq $32, %rax
2948 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2949 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
2950 ; AVX2-NEXT: orq %rax, %rcx
2951 ; AVX2-NEXT: sete %al
2952 ; AVX2-NEXT: vzeroupper
2955 ; KNL-LABEL: allzeros_v64i8_and4:
2957 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2958 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2959 ; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
2960 ; KNL-NEXT: vpmovmskb %ymm1, %eax
2961 ; KNL-NEXT: shlq $32, %rax
2962 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
2963 ; KNL-NEXT: orq %rax, %rcx
2964 ; KNL-NEXT: sete %al
2965 ; KNL-NEXT: vzeroupper
2968 ; SKX-LABEL: allzeros_v64i8_and4:
2970 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
2971 ; SKX-NEXT: kortestq %k0, %k0
2972 ; SKX-NEXT: sete %al
2973 ; SKX-NEXT: vzeroupper
2975 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2976 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2977 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2978 %tmp3 = icmp eq i64 %tmp2, 0
2982 define i1 @allones_v8i16_and4(<8 x i16> %arg) {
2983 ; SSE2-LABEL: allones_v8i16_and4:
2985 ; SSE2-NEXT: psllw $13, %xmm0
2986 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2987 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2988 ; SSE2-NEXT: cmpb $-1, %al
2989 ; SSE2-NEXT: sete %al
2992 ; AVX-LABEL: allones_v8i16_and4:
2994 ; AVX-NEXT: vpsllw $13, %xmm0, %xmm0
2995 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2996 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2997 ; AVX-NEXT: cmpb $-1, %al
2998 ; AVX-NEXT: sete %al
3001 ; KNL-LABEL: allones_v8i16_and4:
3003 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
3004 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
3005 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
3006 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
3007 ; KNL-NEXT: kmovw %k0, %eax
3008 ; KNL-NEXT: cmpb $-1, %al
3009 ; KNL-NEXT: sete %al
3010 ; KNL-NEXT: vzeroupper
3013 ; SKX-LABEL: allones_v8i16_and4:
3015 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
3016 ; SKX-NEXT: kortestb %k0, %k0
3017 ; SKX-NEXT: setb %al
3019 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3020 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
3021 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3022 %tmp3 = icmp eq i8 %tmp2, -1
3026 define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
3027 ; SSE2-LABEL: allzeros_v8i16_and4:
3029 ; SSE2-NEXT: psllw $13, %xmm0
3030 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3031 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3032 ; SSE2-NEXT: testb %al, %al
3033 ; SSE2-NEXT: sete %al
3036 ; AVX-LABEL: allzeros_v8i16_and4:
3038 ; AVX-NEXT: vpsllw $13, %xmm0, %xmm0
3039 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3040 ; AVX-NEXT: vpmovmskb %xmm0, %eax
3041 ; AVX-NEXT: testb %al, %al
3042 ; AVX-NEXT: sete %al
3045 ; KNL-LABEL: allzeros_v8i16_and4:
3047 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
3048 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
3049 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
3050 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
3051 ; KNL-NEXT: kmovw %k0, %eax
3052 ; KNL-NEXT: testb %al, %al
3053 ; KNL-NEXT: sete %al
3054 ; KNL-NEXT: vzeroupper
3057 ; SKX-LABEL: allzeros_v8i16_and4:
3059 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
3060 ; SKX-NEXT: kortestb %k0, %k0
3061 ; SKX-NEXT: sete %al
3063 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3064 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
3065 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3066 %tmp3 = icmp eq i8 %tmp2, 0
3070 define i1 @allones_v16i16_and4(<16 x i16> %arg) {
3071 ; SSE2-LABEL: allones_v16i16_and4:
3073 ; SSE2-NEXT: psllw $13, %xmm1
3074 ; SSE2-NEXT: psllw $13, %xmm0
3075 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3076 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3077 ; SSE2-NEXT: cmpw $-1, %ax
3078 ; SSE2-NEXT: sete %al
3081 ; AVX1-LABEL: allones_v16i16_and4:
3083 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3084 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3085 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3086 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3087 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3088 ; AVX1-NEXT: cmpw $-1, %ax
3089 ; AVX1-NEXT: sete %al
3090 ; AVX1-NEXT: vzeroupper
3093 ; AVX2-LABEL: allones_v16i16_and4:
3095 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3096 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
3097 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3098 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3099 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3100 ; AVX2-NEXT: cmpw $-1, %ax
3101 ; AVX2-NEXT: sete %al
3102 ; AVX2-NEXT: vzeroupper
3105 ; KNL-LABEL: allones_v16i16_and4:
3107 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3108 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3109 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3110 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3111 ; KNL-NEXT: kortestw %k0, %k0
3112 ; KNL-NEXT: setb %al
3113 ; KNL-NEXT: vzeroupper
3116 ; SKX-LABEL: allones_v16i16_and4:
3118 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
3119 ; SKX-NEXT: kortestw %k0, %k0
3120 ; SKX-NEXT: setb %al
3121 ; SKX-NEXT: vzeroupper
3123 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3124 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
3125 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3126 %tmp3 = icmp eq i16 %tmp2, -1
3130 define i1 @allones_v32i16_and4(<32 x i16> %arg) {
3131 ; SSE2-LABEL: allones_v32i16_and4:
3133 ; SSE2-NEXT: psllw $13, %xmm1
3134 ; SSE2-NEXT: psllw $13, %xmm0
3135 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3136 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3137 ; SSE2-NEXT: psllw $13, %xmm3
3138 ; SSE2-NEXT: psllw $13, %xmm2
3139 ; SSE2-NEXT: packsswb %xmm3, %xmm2
3140 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
3141 ; SSE2-NEXT: shll $16, %ecx
3142 ; SSE2-NEXT: orl %eax, %ecx
3143 ; SSE2-NEXT: cmpl $-1, %ecx
3144 ; SSE2-NEXT: sete %al
3147 ; AVX1-LABEL: allones_v32i16_and4:
3149 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3150 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
3151 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3152 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
3153 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3154 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
3155 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3156 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3157 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
3158 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3159 ; AVX1-NEXT: shll $16, %ecx
3160 ; AVX1-NEXT: orl %eax, %ecx
3161 ; AVX1-NEXT: cmpl $-1, %ecx
3162 ; AVX1-NEXT: sete %al
3163 ; AVX1-NEXT: vzeroupper
3166 ; AVX2-LABEL: allones_v32i16_and4:
3168 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
3169 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3170 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
3171 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3172 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3173 ; AVX2-NEXT: cmpl $-1, %eax
3174 ; AVX2-NEXT: sete %al
3175 ; AVX2-NEXT: vzeroupper
3178 ; KNL-LABEL: allones_v32i16_and4:
3180 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3181 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3182 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3183 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3184 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3185 ; KNL-NEXT: kmovw %k0, %eax
3186 ; KNL-NEXT: vpsllw $13, %ymm1, %ymm0
3187 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3188 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3189 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3190 ; KNL-NEXT: kmovw %k0, %ecx
3191 ; KNL-NEXT: shll $16, %ecx
3192 ; KNL-NEXT: orl %eax, %ecx
3193 ; KNL-NEXT: cmpl $-1, %ecx
3194 ; KNL-NEXT: sete %al
3195 ; KNL-NEXT: vzeroupper
3198 ; SKX-LABEL: allones_v32i16_and4:
3200 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
3201 ; SKX-NEXT: kortestd %k0, %k0
3202 ; SKX-NEXT: setb %al
3203 ; SKX-NEXT: vzeroupper
3205 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3206 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
3207 %tmp2 = bitcast <32 x i1> %tmp1 to i32
3208 %tmp3 = icmp eq i32 %tmp2, -1
3212 define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
3213 ; SSE2-LABEL: allzeros_v32i16_and4:
3215 ; SSE2-NEXT: psllw $13, %xmm1
3216 ; SSE2-NEXT: psllw $13, %xmm0
3217 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3218 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3219 ; SSE2-NEXT: psllw $13, %xmm3
3220 ; SSE2-NEXT: psllw $13, %xmm2
3221 ; SSE2-NEXT: packsswb %xmm3, %xmm2
3222 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
3223 ; SSE2-NEXT: shll $16, %ecx
3224 ; SSE2-NEXT: orl %eax, %ecx
3225 ; SSE2-NEXT: sete %al
3228 ; AVX1-LABEL: allzeros_v32i16_and4:
3230 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3231 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
3232 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3233 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
3234 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3235 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
3236 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3237 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3238 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
3239 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3240 ; AVX1-NEXT: shll $16, %ecx
3241 ; AVX1-NEXT: orl %eax, %ecx
3242 ; AVX1-NEXT: sete %al
3243 ; AVX1-NEXT: vzeroupper
3246 ; AVX2-LABEL: allzeros_v32i16_and4:
3248 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
3249 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3250 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
3251 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3252 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3253 ; AVX2-NEXT: testl %eax, %eax
3254 ; AVX2-NEXT: sete %al
3255 ; AVX2-NEXT: vzeroupper
3258 ; KNL-LABEL: allzeros_v32i16_and4:
3260 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3261 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3262 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3263 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3264 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3265 ; KNL-NEXT: kmovw %k0, %eax
3266 ; KNL-NEXT: vpsllw $13, %ymm1, %ymm0
3267 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3268 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3269 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3270 ; KNL-NEXT: kmovw %k0, %ecx
3271 ; KNL-NEXT: shll $16, %ecx
3272 ; KNL-NEXT: orl %eax, %ecx
3273 ; KNL-NEXT: sete %al
3274 ; KNL-NEXT: vzeroupper
3277 ; SKX-LABEL: allzeros_v32i16_and4:
3279 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
3280 ; SKX-NEXT: kortestd %k0, %k0
3281 ; SKX-NEXT: sete %al
3282 ; SKX-NEXT: vzeroupper
3284 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3285 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
3286 %tmp2 = bitcast <32 x i1> %tmp1 to i32
3287 %tmp3 = icmp eq i32 %tmp2, 0
3291 define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
3292 ; SSE2-LABEL: allzeros_v16i16_and4:
3294 ; SSE2-NEXT: psllw $13, %xmm1
3295 ; SSE2-NEXT: psllw $13, %xmm0
3296 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3297 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3298 ; SSE2-NEXT: testw %ax, %ax
3299 ; SSE2-NEXT: sete %al
3302 ; AVX1-LABEL: allzeros_v16i16_and4:
3304 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3305 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3306 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3307 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3308 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3309 ; AVX1-NEXT: testw %ax, %ax
3310 ; AVX1-NEXT: sete %al
3311 ; AVX1-NEXT: vzeroupper
3314 ; AVX2-LABEL: allzeros_v16i16_and4:
3316 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3317 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
3318 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3319 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3320 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3321 ; AVX2-NEXT: testw %ax, %ax
3322 ; AVX2-NEXT: sete %al
3323 ; AVX2-NEXT: vzeroupper
3326 ; KNL-LABEL: allzeros_v16i16_and4:
3328 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3329 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3330 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3331 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3332 ; KNL-NEXT: kortestw %k0, %k0
3333 ; KNL-NEXT: sete %al
3334 ; KNL-NEXT: vzeroupper
3337 ; SKX-LABEL: allzeros_v16i16_and4:
3339 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
3340 ; SKX-NEXT: kortestw %k0, %k0
3341 ; SKX-NEXT: sete %al
3342 ; SKX-NEXT: vzeroupper
3344 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3345 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
3346 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3347 %tmp3 = icmp eq i16 %tmp2, 0
3351 define i1 @allones_v4i32_and4(<4 x i32> %arg) {
3352 ; SSE2-LABEL: allones_v4i32_and4:
3354 ; SSE2-NEXT: pslld $29, %xmm0
3355 ; SSE2-NEXT: movmskps %xmm0, %eax
3356 ; SSE2-NEXT: cmpb $15, %al
3357 ; SSE2-NEXT: sete %al
3360 ; AVX-LABEL: allones_v4i32_and4:
3362 ; AVX-NEXT: vpslld $29, %xmm0, %xmm0
3363 ; AVX-NEXT: vmovmskps %xmm0, %eax
3364 ; AVX-NEXT: cmpb $15, %al
3365 ; AVX-NEXT: sete %al
3368 ; KNL-LABEL: allones_v4i32_and4:
3370 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3371 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3372 ; KNL-NEXT: kmovw %k0, %eax
3373 ; KNL-NEXT: andb $15, %al
3374 ; KNL-NEXT: cmpb $15, %al
3375 ; KNL-NEXT: sete %al
3376 ; KNL-NEXT: vzeroupper
3379 ; SKX-LABEL: allones_v4i32_and4:
3381 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
3382 ; SKX-NEXT: kmovd %k0, %eax
3383 ; SKX-NEXT: andb $15, %al
3384 ; SKX-NEXT: cmpb $15, %al
3385 ; SKX-NEXT: sete %al
3387 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
3388 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
3389 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3390 %tmp3 = icmp eq i4 %tmp2, -1
3394 define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
3395 ; SSE2-LABEL: allzeros_v4i32_and4:
3397 ; SSE2-NEXT: pslld $29, %xmm0
3398 ; SSE2-NEXT: movmskps %xmm0, %eax
3399 ; SSE2-NEXT: testb %al, %al
3400 ; SSE2-NEXT: sete %al
3403 ; AVX-LABEL: allzeros_v4i32_and4:
3405 ; AVX-NEXT: vpslld $29, %xmm0, %xmm0
3406 ; AVX-NEXT: vmovmskps %xmm0, %eax
3407 ; AVX-NEXT: testb %al, %al
3408 ; AVX-NEXT: sete %al
3411 ; KNL-LABEL: allzeros_v4i32_and4:
3413 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3414 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3415 ; KNL-NEXT: kmovw %k0, %eax
3416 ; KNL-NEXT: testb $15, %al
3417 ; KNL-NEXT: sete %al
3418 ; KNL-NEXT: vzeroupper
3421 ; SKX-LABEL: allzeros_v4i32_and4:
3423 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
3424 ; SKX-NEXT: kmovd %k0, %eax
3425 ; SKX-NEXT: testb $15, %al
3426 ; SKX-NEXT: sete %al
3428 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
3429 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
3430 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3431 %tmp3 = icmp eq i4 %tmp2, 0
3435 define i1 @allones_v8i32_and4(<8 x i32> %arg) {
3436 ; SSE2-LABEL: allones_v8i32_and4:
3438 ; SSE2-NEXT: pslld $29, %xmm1
3439 ; SSE2-NEXT: pslld $29, %xmm0
3440 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3441 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3442 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3443 ; SSE2-NEXT: cmpb $-1, %al
3444 ; SSE2-NEXT: sete %al
3447 ; AVX1-LABEL: allones_v8i32_and4:
3449 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
3450 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3451 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3453 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3454 ; AVX1-NEXT: cmpb $-1, %al
3455 ; AVX1-NEXT: sete %al
3456 ; AVX1-NEXT: vzeroupper
3459 ; AVX2-LABEL: allones_v8i32_and4:
3461 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3462 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3463 ; AVX2-NEXT: cmpb $-1, %al
3464 ; AVX2-NEXT: sete %al
3465 ; AVX2-NEXT: vzeroupper
3468 ; KNL-LABEL: allones_v8i32_and4:
3470 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3471 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3472 ; KNL-NEXT: kmovw %k0, %eax
3473 ; KNL-NEXT: cmpb $-1, %al
3474 ; KNL-NEXT: sete %al
3475 ; KNL-NEXT: vzeroupper
3478 ; SKX-LABEL: allones_v8i32_and4:
3480 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
3481 ; SKX-NEXT: kortestb %k0, %k0
3482 ; SKX-NEXT: setb %al
3483 ; SKX-NEXT: vzeroupper
3485 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3486 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3487 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3488 %tmp3 = icmp eq i8 %tmp2, -1
3492 define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
3493 ; SSE2-LABEL: allzeros_v8i32_and4:
3495 ; SSE2-NEXT: pslld $29, %xmm1
3496 ; SSE2-NEXT: pslld $29, %xmm0
3497 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3498 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3499 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3500 ; SSE2-NEXT: testb %al, %al
3501 ; SSE2-NEXT: sete %al
3504 ; AVX1-LABEL: allzeros_v8i32_and4:
3506 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
3507 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3508 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3509 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3510 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3511 ; AVX1-NEXT: testb %al, %al
3512 ; AVX1-NEXT: sete %al
3513 ; AVX1-NEXT: vzeroupper
3516 ; AVX2-LABEL: allzeros_v8i32_and4:
3518 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3519 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3520 ; AVX2-NEXT: testb %al, %al
3521 ; AVX2-NEXT: sete %al
3522 ; AVX2-NEXT: vzeroupper
3525 ; KNL-LABEL: allzeros_v8i32_and4:
3527 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3528 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3529 ; KNL-NEXT: kmovw %k0, %eax
3530 ; KNL-NEXT: testb %al, %al
3531 ; KNL-NEXT: sete %al
3532 ; KNL-NEXT: vzeroupper
3535 ; SKX-LABEL: allzeros_v8i32_and4:
3537 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
3538 ; SKX-NEXT: kortestb %k0, %k0
3539 ; SKX-NEXT: sete %al
3540 ; SKX-NEXT: vzeroupper
3542 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3543 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3544 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3545 %tmp3 = icmp eq i8 %tmp2, 0
3549 define i1 @allones_v16i32_and4(<16 x i32> %arg) {
3550 ; SSE2-LABEL: allones_v16i32_and4:
3552 ; SSE2-NEXT: pslld $29, %xmm3
3553 ; SSE2-NEXT: pslld $29, %xmm2
3554 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3555 ; SSE2-NEXT: pslld $29, %xmm1
3556 ; SSE2-NEXT: pslld $29, %xmm0
3557 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3558 ; SSE2-NEXT: packsswb %xmm2, %xmm0
3559 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3560 ; SSE2-NEXT: cmpw $-1, %ax
3561 ; SSE2-NEXT: sete %al
3564 ; AVX1-LABEL: allones_v16i32_and4:
3566 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3567 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3568 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3569 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3570 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3571 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3572 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3573 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3574 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3575 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3576 ; AVX1-NEXT: cmpw $-1, %ax
3577 ; AVX1-NEXT: sete %al
3578 ; AVX1-NEXT: vzeroupper
3581 ; AVX2-LABEL: allones_v16i32_and4:
3583 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3584 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3585 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3586 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3587 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3588 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3589 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3590 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3591 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3592 ; AVX2-NEXT: cmpw $-1, %ax
3593 ; AVX2-NEXT: sete %al
3594 ; AVX2-NEXT: vzeroupper
3597 ; KNL-LABEL: allones_v16i32_and4:
3599 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3600 ; KNL-NEXT: kortestw %k0, %k0
3601 ; KNL-NEXT: setb %al
3602 ; KNL-NEXT: vzeroupper
3605 ; SKX-LABEL: allones_v16i32_and4:
3607 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3608 ; SKX-NEXT: kortestw %k0, %k0
3609 ; SKX-NEXT: setb %al
3610 ; SKX-NEXT: vzeroupper
3612 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3613 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3614 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3615 %tmp3 = icmp eq i16 %tmp2, -1
3619 define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
3620 ; SSE2-LABEL: allzeros_v16i32_and4:
3622 ; SSE2-NEXT: pslld $29, %xmm3
3623 ; SSE2-NEXT: pslld $29, %xmm2
3624 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3625 ; SSE2-NEXT: pslld $29, %xmm1
3626 ; SSE2-NEXT: pslld $29, %xmm0
3627 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3628 ; SSE2-NEXT: packsswb %xmm2, %xmm0
3629 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3630 ; SSE2-NEXT: testw %ax, %ax
3631 ; SSE2-NEXT: sete %al
3634 ; AVX1-LABEL: allzeros_v16i32_and4:
3636 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3637 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3638 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3639 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3640 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3641 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3642 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3643 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3644 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3645 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3646 ; AVX1-NEXT: testw %ax, %ax
3647 ; AVX1-NEXT: sete %al
3648 ; AVX1-NEXT: vzeroupper
3651 ; AVX2-LABEL: allzeros_v16i32_and4:
3653 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3654 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3655 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3656 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3657 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3658 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3659 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3660 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3661 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3662 ; AVX2-NEXT: testw %ax, %ax
3663 ; AVX2-NEXT: sete %al
3664 ; AVX2-NEXT: vzeroupper
3667 ; KNL-LABEL: allzeros_v16i32_and4:
3669 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3670 ; KNL-NEXT: kortestw %k0, %k0
3671 ; KNL-NEXT: sete %al
3672 ; KNL-NEXT: vzeroupper
3675 ; SKX-LABEL: allzeros_v16i32_and4:
3677 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3678 ; SKX-NEXT: kortestw %k0, %k0
3679 ; SKX-NEXT: sete %al
3680 ; SKX-NEXT: vzeroupper
3682 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3683 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3684 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3685 %tmp3 = icmp eq i16 %tmp2, 0
3689 define i1 @allones_v2i64_and4(<2 x i64> %arg) {
3690 ; SSE2-LABEL: allones_v2i64_and4:
3692 ; SSE2-NEXT: psllq $61, %xmm0
3693 ; SSE2-NEXT: movmskpd %xmm0, %eax
3694 ; SSE2-NEXT: cmpb $3, %al
3695 ; SSE2-NEXT: sete %al
3698 ; AVX-LABEL: allones_v2i64_and4:
3700 ; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
3701 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3702 ; AVX-NEXT: cmpb $3, %al
3703 ; AVX-NEXT: sete %al
3706 ; KNL-LABEL: allones_v2i64_and4:
3708 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3709 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
3710 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
3711 ; KNL-NEXT: kmovw %k0, %eax
3712 ; KNL-NEXT: andb $3, %al
3713 ; KNL-NEXT: cmpb $3, %al
3714 ; KNL-NEXT: sete %al
3715 ; KNL-NEXT: vzeroupper
3718 ; SKX-LABEL: allones_v2i64_and4:
3720 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
3721 ; SKX-NEXT: kmovd %k0, %eax
3722 ; SKX-NEXT: andb $3, %al
3723 ; SKX-NEXT: cmpb $3, %al
3724 ; SKX-NEXT: sete %al
3726 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3727 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3728 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3729 %tmp3 = icmp eq i2 %tmp2, -1
3733 define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
3734 ; SSE2-LABEL: allzeros_v2i64_and4:
3736 ; SSE2-NEXT: psllq $61, %xmm0
3737 ; SSE2-NEXT: movmskpd %xmm0, %eax
3738 ; SSE2-NEXT: testb %al, %al
3739 ; SSE2-NEXT: sete %al
3742 ; AVX-LABEL: allzeros_v2i64_and4:
3744 ; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
3745 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3746 ; AVX-NEXT: testb %al, %al
3747 ; AVX-NEXT: sete %al
3750 ; KNL-LABEL: allzeros_v2i64_and4:
3752 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3753 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
3754 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
3755 ; KNL-NEXT: kmovw %k0, %eax
3756 ; KNL-NEXT: testb $3, %al
3757 ; KNL-NEXT: sete %al
3758 ; KNL-NEXT: vzeroupper
3761 ; SKX-LABEL: allzeros_v2i64_and4:
3763 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
3764 ; SKX-NEXT: kmovd %k0, %eax
3765 ; SKX-NEXT: testb $3, %al
3766 ; SKX-NEXT: sete %al
3768 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3769 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3770 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3771 %tmp3 = icmp eq i2 %tmp2, 0
3775 define i1 @allones_v4i64_and4(<4 x i64> %arg) {
3776 ; SSE2-LABEL: allones_v4i64_and4:
3778 ; SSE2-NEXT: psllq $61, %xmm1
3779 ; SSE2-NEXT: psllq $61, %xmm0
3780 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3781 ; SSE2-NEXT: movmskps %xmm0, %eax
3782 ; SSE2-NEXT: cmpb $15, %al
3783 ; SSE2-NEXT: sete %al
3786 ; AVX1-LABEL: allones_v4i64_and4:
3788 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
3789 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3790 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3791 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3792 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
3793 ; AVX1-NEXT: cmpb $15, %al
3794 ; AVX1-NEXT: sete %al
3795 ; AVX1-NEXT: vzeroupper
3798 ; AVX2-LABEL: allones_v4i64_and4:
3800 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3801 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
3802 ; AVX2-NEXT: cmpb $15, %al
3803 ; AVX2-NEXT: sete %al
3804 ; AVX2-NEXT: vzeroupper
3807 ; KNL-LABEL: allones_v4i64_and4:
3809 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3810 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3811 ; KNL-NEXT: kmovw %k0, %eax
3812 ; KNL-NEXT: andb $15, %al
3813 ; KNL-NEXT: cmpb $15, %al
3814 ; KNL-NEXT: sete %al
3815 ; KNL-NEXT: vzeroupper
3818 ; SKX-LABEL: allones_v4i64_and4:
3820 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
3821 ; SKX-NEXT: kmovd %k0, %eax
3822 ; SKX-NEXT: andb $15, %al
3823 ; SKX-NEXT: cmpb $15, %al
3824 ; SKX-NEXT: sete %al
3825 ; SKX-NEXT: vzeroupper
3827 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3828 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3829 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3830 %tmp3 = icmp eq i4 %tmp2, -1
3834 define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
3835 ; SSE2-LABEL: allzeros_v4i64_and4:
3837 ; SSE2-NEXT: psllq $61, %xmm1
3838 ; SSE2-NEXT: psllq $61, %xmm0
3839 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3840 ; SSE2-NEXT: movmskps %xmm0, %eax
3841 ; SSE2-NEXT: testb %al, %al
3842 ; SSE2-NEXT: sete %al
3845 ; AVX1-LABEL: allzeros_v4i64_and4:
3847 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
3848 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3849 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3850 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3851 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
3852 ; AVX1-NEXT: testb %al, %al
3853 ; AVX1-NEXT: sete %al
3854 ; AVX1-NEXT: vzeroupper
3857 ; AVX2-LABEL: allzeros_v4i64_and4:
3859 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3860 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
3861 ; AVX2-NEXT: testb %al, %al
3862 ; AVX2-NEXT: sete %al
3863 ; AVX2-NEXT: vzeroupper
3866 ; KNL-LABEL: allzeros_v4i64_and4:
3868 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3869 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3870 ; KNL-NEXT: kmovw %k0, %eax
3871 ; KNL-NEXT: testb $15, %al
3872 ; KNL-NEXT: sete %al
3873 ; KNL-NEXT: vzeroupper
3876 ; SKX-LABEL: allzeros_v4i64_and4:
3878 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
3879 ; SKX-NEXT: kmovd %k0, %eax
3880 ; SKX-NEXT: testb $15, %al
3881 ; SKX-NEXT: sete %al
3882 ; SKX-NEXT: vzeroupper
3884 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3885 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3886 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3887 %tmp3 = icmp eq i4 %tmp2, 0
3891 define i1 @allones_v8i64_and4(<8 x i64> %arg) {
3892 ; SSE2-LABEL: allones_v8i64_and4:
3894 ; SSE2-NEXT: psllq $61, %xmm3
3895 ; SSE2-NEXT: psllq $61, %xmm2
3896 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3897 ; SSE2-NEXT: psllq $61, %xmm1
3898 ; SSE2-NEXT: psllq $61, %xmm0
3899 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3900 ; SSE2-NEXT: packssdw %xmm2, %xmm0
3901 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3902 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3903 ; SSE2-NEXT: cmpb $-1, %al
3904 ; SSE2-NEXT: sete %al
3907 ; AVX1-LABEL: allones_v8i64_and4:
3909 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3910 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3911 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3912 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
3913 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3914 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
3915 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3916 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3917 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3918 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3919 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3920 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3921 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3922 ; AVX1-NEXT: cmpb $-1, %al
3923 ; AVX1-NEXT: sete %al
3924 ; AVX1-NEXT: vzeroupper
3927 ; AVX2-LABEL: allones_v8i64_and4:
3929 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
3930 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3931 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3932 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3933 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3934 ; AVX2-NEXT: cmpb $-1, %al
3935 ; AVX2-NEXT: sete %al
3936 ; AVX2-NEXT: vzeroupper
3939 ; KNL-LABEL: allones_v8i64_and4:
3941 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3942 ; KNL-NEXT: kmovw %k0, %eax
3943 ; KNL-NEXT: cmpb $-1, %al
3944 ; KNL-NEXT: sete %al
3945 ; KNL-NEXT: vzeroupper
3948 ; SKX-LABEL: allones_v8i64_and4:
3950 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3951 ; SKX-NEXT: kortestb %k0, %k0
3952 ; SKX-NEXT: setb %al
3953 ; SKX-NEXT: vzeroupper
3955 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3956 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3957 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3958 %tmp3 = icmp eq i8 %tmp2, -1
3962 define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
3963 ; SSE2-LABEL: allzeros_v8i64_and4:
3965 ; SSE2-NEXT: psllq $61, %xmm3
3966 ; SSE2-NEXT: psllq $61, %xmm2
3967 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3968 ; SSE2-NEXT: psllq $61, %xmm1
3969 ; SSE2-NEXT: psllq $61, %xmm0
3970 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3971 ; SSE2-NEXT: packssdw %xmm2, %xmm0
3972 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3973 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3974 ; SSE2-NEXT: testb %al, %al
3975 ; SSE2-NEXT: sete %al
3978 ; AVX1-LABEL: allzeros_v8i64_and4:
3980 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3981 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3982 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3983 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
3984 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3985 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
3986 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3987 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3988 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3989 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3990 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3991 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3992 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3993 ; AVX1-NEXT: testb %al, %al
3994 ; AVX1-NEXT: sete %al
3995 ; AVX1-NEXT: vzeroupper
3998 ; AVX2-LABEL: allzeros_v8i64_and4:
4000 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
4001 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
4002 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
4003 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4004 ; AVX2-NEXT: vmovmskps %ymm0, %eax
4005 ; AVX2-NEXT: testb %al, %al
4006 ; AVX2-NEXT: sete %al
4007 ; AVX2-NEXT: vzeroupper
4010 ; KNL-LABEL: allzeros_v8i64_and4:
4012 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
4013 ; KNL-NEXT: kmovw %k0, %eax
4014 ; KNL-NEXT: testb %al, %al
4015 ; KNL-NEXT: sete %al
4016 ; KNL-NEXT: vzeroupper
4019 ; SKX-LABEL: allzeros_v8i64_and4:
4021 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
4022 ; SKX-NEXT: kortestb %k0, %k0
4023 ; SKX-NEXT: sete %al
4024 ; SKX-NEXT: vzeroupper
4026 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
4027 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
4028 %tmp2 = bitcast <8 x i1> %tmp1 to i8
4029 %tmp3 = icmp eq i8 %tmp2, 0
4033 ; The below are IR patterns that should directly represent the behavior of a
4034 ; MOVMSK instruction.
4036 define i32 @movmskpd(<2 x double> %x) {
4037 ; SSE2-LABEL: movmskpd:
4039 ; SSE2-NEXT: movmskpd %xmm0, %eax
4042 ; AVX-LABEL: movmskpd:
4044 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4047 ; KNL-LABEL: movmskpd:
4049 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4050 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4051 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
4052 ; KNL-NEXT: kmovw %k0, %eax
4053 ; KNL-NEXT: andl $3, %eax
4054 ; KNL-NEXT: vzeroupper
4057 ; SKX-LABEL: movmskpd:
4059 ; SKX-NEXT: vpmovq2m %xmm0, %k0
4060 ; SKX-NEXT: kmovd %k0, %eax
4061 ; SKX-NEXT: andl $3, %eax
4063 %a = bitcast <2 x double> %x to <2 x i64>
4064 %b = icmp slt <2 x i64> %a, zeroinitializer
4065 %c = bitcast <2 x i1> %b to i2
4066 %d = zext i2 %c to i32
4070 define i32 @movmskps(<4 x float> %x) {
4071 ; SSE2-LABEL: movmskps:
4073 ; SSE2-NEXT: movmskps %xmm0, %eax
4076 ; AVX-LABEL: movmskps:
4078 ; AVX-NEXT: vmovmskps %xmm0, %eax
4081 ; KNL-LABEL: movmskps:
4083 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4084 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4085 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4086 ; KNL-NEXT: kmovw %k0, %eax
4087 ; KNL-NEXT: andl $15, %eax
4088 ; KNL-NEXT: vzeroupper
4091 ; SKX-LABEL: movmskps:
4093 ; SKX-NEXT: vpmovd2m %xmm0, %k0
4094 ; SKX-NEXT: kmovd %k0, %eax
4095 ; SKX-NEXT: andl $15, %eax
4097 %a = bitcast <4 x float> %x to <4 x i32>
4098 %b = icmp slt <4 x i32> %a, zeroinitializer
4099 %c = bitcast <4 x i1> %b to i4
4100 %d = zext i4 %c to i32
4104 define i32 @movmskpd256(<4 x double> %x) {
4105 ; SSE2-LABEL: movmskpd256:
4107 ; SSE2-NEXT: packssdw %xmm1, %xmm0
4108 ; SSE2-NEXT: movmskps %xmm0, %eax
4111 ; AVX-LABEL: movmskpd256:
4113 ; AVX-NEXT: vmovmskpd %ymm0, %eax
4114 ; AVX-NEXT: vzeroupper
4117 ; KNL-LABEL: movmskpd256:
4119 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4120 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4121 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
4122 ; KNL-NEXT: kmovw %k0, %eax
4123 ; KNL-NEXT: andl $15, %eax
4124 ; KNL-NEXT: vzeroupper
4127 ; SKX-LABEL: movmskpd256:
4129 ; SKX-NEXT: vpmovq2m %ymm0, %k0
4130 ; SKX-NEXT: kmovd %k0, %eax
4131 ; SKX-NEXT: andl $15, %eax
4132 ; SKX-NEXT: vzeroupper
4134 %a = bitcast <4 x double> %x to <4 x i64>
4135 %b = icmp slt <4 x i64> %a, zeroinitializer
4136 %c = bitcast <4 x i1> %b to i4
4137 %d = zext i4 %c to i32
4141 define i32 @movmskps256(<8 x float> %x) {
4142 ; SSE2-LABEL: movmskps256:
4144 ; SSE2-NEXT: packssdw %xmm1, %xmm0
4145 ; SSE2-NEXT: packsswb %xmm0, %xmm0
4146 ; SSE2-NEXT: pmovmskb %xmm0, %eax
4147 ; SSE2-NEXT: movzbl %al, %eax
4150 ; AVX-LABEL: movmskps256:
4152 ; AVX-NEXT: vmovmskps %ymm0, %eax
4153 ; AVX-NEXT: vzeroupper
4156 ; KNL-LABEL: movmskps256:
4158 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4159 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4160 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4161 ; KNL-NEXT: kmovw %k0, %eax
4162 ; KNL-NEXT: movzbl %al, %eax
4163 ; KNL-NEXT: vzeroupper
4166 ; SKX-LABEL: movmskps256:
4168 ; SKX-NEXT: vpmovd2m %ymm0, %k0
4169 ; SKX-NEXT: kmovb %k0, %eax
4170 ; SKX-NEXT: vzeroupper
4172 %a = bitcast <8 x float> %x to <8 x i32>
4173 %b = icmp slt <8 x i32> %a, zeroinitializer
4174 %c = bitcast <8 x i1> %b to i8
4175 %d = zext i8 %c to i32
4179 define i32 @movmskb(<16 x i8> %x) {
4180 ; SSE2-LABEL: movmskb:
4182 ; SSE2-NEXT: pmovmskb %xmm0, %eax
4185 ; AVX-LABEL: movmskb:
4187 ; AVX-NEXT: vpmovmskb %xmm0, %eax
4190 ; KNL-LABEL: movmskb:
4192 ; KNL-NEXT: vpmovmskb %xmm0, %eax
4195 ; SKX-LABEL: movmskb:
4197 ; SKX-NEXT: vpmovb2m %xmm0, %k0
4198 ; SKX-NEXT: kmovw %k0, %eax
4200 %a = icmp slt <16 x i8> %x, zeroinitializer
4201 %b = bitcast <16 x i1> %a to i16
4202 %c = zext i16 %b to i32
4206 define i32 @movmskb256(<32 x i8> %x) {
4207 ; SSE2-LABEL: movmskb256:
4209 ; SSE2-NEXT: pmovmskb %xmm0, %ecx
4210 ; SSE2-NEXT: pmovmskb %xmm1, %eax
4211 ; SSE2-NEXT: shll $16, %eax
4212 ; SSE2-NEXT: orl %ecx, %eax
4215 ; AVX1-LABEL: movmskb256:
4217 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
4218 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4219 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
4220 ; AVX1-NEXT: shll $16, %eax
4221 ; AVX1-NEXT: orl %ecx, %eax
4222 ; AVX1-NEXT: vzeroupper
4225 ; AVX2-LABEL: movmskb256:
4227 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
4228 ; AVX2-NEXT: vzeroupper
4231 ; KNL-LABEL: movmskb256:
4233 ; KNL-NEXT: vpmovmskb %ymm0, %eax
4234 ; KNL-NEXT: vzeroupper
4237 ; SKX-LABEL: movmskb256:
4239 ; SKX-NEXT: vpmovb2m %ymm0, %k0
4240 ; SKX-NEXT: kmovd %k0, %eax
4241 ; SKX-NEXT: vzeroupper
4243 %a = icmp slt <32 x i8> %x, zeroinitializer
4244 %b = bitcast <32 x i1> %a to i32
4248 ; Multiple extract elements from a vector compare.
4250 define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
4251 ; SSE2-LABEL: movmsk_v16i8:
4253 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
4254 ; SSE2-NEXT: pmovmskb %xmm0, %eax
4255 ; SSE2-NEXT: movl %eax, %ecx
4256 ; SSE2-NEXT: shrl $15, %ecx
4257 ; SSE2-NEXT: movl %eax, %edx
4258 ; SSE2-NEXT: shrl $8, %edx
4259 ; SSE2-NEXT: andl $1, %edx
4260 ; SSE2-NEXT: andl $8, %eax
4261 ; SSE2-NEXT: shrl $3, %eax
4262 ; SSE2-NEXT: xorl %edx, %eax
4263 ; SSE2-NEXT: andl %ecx, %eax
4264 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
4267 ; AVX-LABEL: movmsk_v16i8:
4269 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4270 ; AVX-NEXT: vpmovmskb %xmm0, %eax
4271 ; AVX-NEXT: movl %eax, %ecx
4272 ; AVX-NEXT: shrl $15, %ecx
4273 ; AVX-NEXT: movl %eax, %edx
4274 ; AVX-NEXT: shrl $8, %edx
4275 ; AVX-NEXT: andl $1, %edx
4276 ; AVX-NEXT: andl $8, %eax
4277 ; AVX-NEXT: shrl $3, %eax
4278 ; AVX-NEXT: xorl %edx, %eax
4279 ; AVX-NEXT: andl %ecx, %eax
4280 ; AVX-NEXT: # kill: def $al killed $al killed $eax
4283 ; KNL-LABEL: movmsk_v16i8:
4285 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4286 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4287 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4288 ; KNL-NEXT: kshiftrw $15, %k0, %k1
4289 ; KNL-NEXT: kmovw %k1, %ecx
4290 ; KNL-NEXT: kshiftrw $8, %k0, %k1
4291 ; KNL-NEXT: kmovw %k1, %edx
4292 ; KNL-NEXT: kshiftrw $3, %k0, %k0
4293 ; KNL-NEXT: kmovw %k0, %eax
4294 ; KNL-NEXT: xorb %dl, %al
4295 ; KNL-NEXT: andb %cl, %al
4296 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4297 ; KNL-NEXT: vzeroupper
4300 ; SKX-LABEL: movmsk_v16i8:
4302 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
4303 ; SKX-NEXT: kshiftrw $15, %k0, %k1
4304 ; SKX-NEXT: kmovd %k1, %ecx
4305 ; SKX-NEXT: kshiftrw $8, %k0, %k1
4306 ; SKX-NEXT: kmovd %k1, %edx
4307 ; SKX-NEXT: kshiftrw $3, %k0, %k0
4308 ; SKX-NEXT: kmovd %k0, %eax
4309 ; SKX-NEXT: xorb %dl, %al
4310 ; SKX-NEXT: andb %cl, %al
4311 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4313 %cmp = icmp eq <16 x i8> %x, %y
4314 %e1 = extractelement <16 x i1> %cmp, i32 3
4315 %e2 = extractelement <16 x i1> %cmp, i32 8
4316 %e3 = extractelement <16 x i1> %cmp, i32 15
4317 %u1 = xor i1 %e1, %e2
4318 %u2 = and i1 %e3, %u1
4322 ; TODO: Replace shift+mask chain with NOT+TEST+SETE
4323 define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
4324 ; SSE2-LABEL: movmsk_v8i16:
4326 ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
4327 ; SSE2-NEXT: packsswb %xmm0, %xmm0
4328 ; SSE2-NEXT: pmovmskb %xmm0, %ecx
4329 ; SSE2-NEXT: movl %ecx, %eax
4330 ; SSE2-NEXT: shrb $7, %al
4331 ; SSE2-NEXT: movl %ecx, %edx
4332 ; SSE2-NEXT: andb $16, %dl
4333 ; SSE2-NEXT: shrb $4, %dl
4334 ; SSE2-NEXT: andb %al, %dl
4335 ; SSE2-NEXT: movl %ecx, %eax
4336 ; SSE2-NEXT: shrb %al
4337 ; SSE2-NEXT: andb %dl, %al
4338 ; SSE2-NEXT: andb %cl, %al
4341 ; AVX-LABEL: movmsk_v8i16:
4343 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4344 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
4345 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
4346 ; AVX-NEXT: movl %ecx, %eax
4347 ; AVX-NEXT: shrb $7, %al
4348 ; AVX-NEXT: movl %ecx, %edx
4349 ; AVX-NEXT: andb $16, %dl
4350 ; AVX-NEXT: shrb $4, %dl
4351 ; AVX-NEXT: andb %al, %dl
4352 ; AVX-NEXT: movl %ecx, %eax
4353 ; AVX-NEXT: shrb %al
4354 ; AVX-NEXT: andb %dl, %al
4355 ; AVX-NEXT: andb %cl, %al
4358 ; KNL-LABEL: movmsk_v8i16:
4360 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4361 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
4362 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
4363 ; KNL-NEXT: kshiftrw $4, %k0, %k1
4364 ; KNL-NEXT: kmovw %k1, %ecx
4365 ; KNL-NEXT: kshiftrw $7, %k0, %k1
4366 ; KNL-NEXT: kmovw %k1, %eax
4367 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4368 ; KNL-NEXT: kmovw %k1, %edx
4369 ; KNL-NEXT: kmovw %k0, %esi
4370 ; KNL-NEXT: andb %cl, %al
4371 ; KNL-NEXT: andb %dl, %al
4372 ; KNL-NEXT: andb %sil, %al
4373 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4374 ; KNL-NEXT: vzeroupper
4377 ; SKX-LABEL: movmsk_v8i16:
4379 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
4380 ; SKX-NEXT: kshiftrb $4, %k0, %k1
4381 ; SKX-NEXT: kmovd %k1, %ecx
4382 ; SKX-NEXT: kshiftrb $7, %k0, %k1
4383 ; SKX-NEXT: kmovd %k1, %eax
4384 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4385 ; SKX-NEXT: kmovd %k1, %edx
4386 ; SKX-NEXT: kmovd %k0, %esi
4387 ; SKX-NEXT: andb %cl, %al
4388 ; SKX-NEXT: andb %dl, %al
4389 ; SKX-NEXT: andb %sil, %al
4390 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4392 %cmp = icmp sgt <8 x i16> %x, %y
4393 %e1 = extractelement <8 x i1> %cmp, i32 0
4394 %e2 = extractelement <8 x i1> %cmp, i32 1
4395 %e3 = extractelement <8 x i1> %cmp, i32 7
4396 %e4 = extractelement <8 x i1> %cmp, i32 4
4397 %u1 = and i1 %e1, %e2
4398 %u2 = and i1 %e3, %e4
4399 %u3 = and i1 %u1, %u2
4403 ; TODO: Replace shift+mask chain with AND+CMP.
4404 define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
4405 ; SSE2-LABEL: movmsk_v4i32:
4407 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
4408 ; SSE2-NEXT: movmskps %xmm1, %eax
4409 ; SSE2-NEXT: movl %eax, %ecx
4410 ; SSE2-NEXT: shrb $3, %cl
4411 ; SSE2-NEXT: andb $4, %al
4412 ; SSE2-NEXT: shrb $2, %al
4413 ; SSE2-NEXT: xorb %cl, %al
4414 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
4417 ; AVX-LABEL: movmsk_v4i32:
4419 ; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
4420 ; AVX-NEXT: vmovmskps %xmm0, %eax
4421 ; AVX-NEXT: movl %eax, %ecx
4422 ; AVX-NEXT: shrb $3, %cl
4423 ; AVX-NEXT: andb $4, %al
4424 ; AVX-NEXT: shrb $2, %al
4425 ; AVX-NEXT: xorb %cl, %al
4426 ; AVX-NEXT: # kill: def $al killed $al killed $eax
4429 ; KNL-LABEL: movmsk_v4i32:
4431 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4432 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4433 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4434 ; KNL-NEXT: kshiftrw $3, %k0, %k1
4435 ; KNL-NEXT: kmovw %k1, %ecx
4436 ; KNL-NEXT: kshiftrw $2, %k0, %k0
4437 ; KNL-NEXT: kmovw %k0, %eax
4438 ; KNL-NEXT: xorb %cl, %al
4439 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4440 ; KNL-NEXT: vzeroupper
4443 ; SKX-LABEL: movmsk_v4i32:
4445 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
4446 ; SKX-NEXT: kshiftrb $3, %k0, %k1
4447 ; SKX-NEXT: kmovd %k1, %ecx
4448 ; SKX-NEXT: kshiftrb $2, %k0, %k0
4449 ; SKX-NEXT: kmovd %k0, %eax
4450 ; SKX-NEXT: xorb %cl, %al
4451 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4453 %cmp = icmp slt <4 x i32> %x, %y
4454 %e1 = extractelement <4 x i1> %cmp, i32 2
4455 %e2 = extractelement <4 x i1> %cmp, i32 3
4456 %u1 = xor i1 %e1, %e2
4460 define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) {
4461 ; SSE2-LABEL: movmsk_v2i64:
4463 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
4464 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
4465 ; SSE2-NEXT: pand %xmm0, %xmm1
4466 ; SSE2-NEXT: movmskpd %xmm1, %eax
4467 ; SSE2-NEXT: xorl $3, %eax
4468 ; SSE2-NEXT: cmpb $3, %al
4469 ; SSE2-NEXT: sete %al
4472 ; AVX-LABEL: movmsk_v2i64:
4474 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4475 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4476 ; AVX-NEXT: xorl $3, %eax
4477 ; AVX-NEXT: cmpb $3, %al
4478 ; AVX-NEXT: sete %al
4481 ; KNL-LABEL: movmsk_v2i64:
4483 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4484 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4485 ; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
4486 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4487 ; KNL-NEXT: kmovw %k1, %ecx
4488 ; KNL-NEXT: kmovw %k0, %eax
4489 ; KNL-NEXT: andb %cl, %al
4490 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4491 ; KNL-NEXT: vzeroupper
4494 ; SKX-LABEL: movmsk_v2i64:
4496 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
4497 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4498 ; SKX-NEXT: kmovd %k1, %ecx
4499 ; SKX-NEXT: kmovd %k0, %eax
4500 ; SKX-NEXT: andb %cl, %al
4501 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4503 %cmp = icmp ne <2 x i64> %x, %y
4504 %e1 = extractelement <2 x i1> %cmp, i32 0
4505 %e2 = extractelement <2 x i1> %cmp, i32 1
4506 %u1 = and i1 %e1, %e2
4510 define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
4511 ; SSE2-LABEL: movmsk_v4f32:
4513 ; SSE2-NEXT: movaps %xmm0, %xmm2
4514 ; SSE2-NEXT: cmpeqps %xmm1, %xmm2
4515 ; SSE2-NEXT: cmpunordps %xmm1, %xmm0
4516 ; SSE2-NEXT: orps %xmm2, %xmm0
4517 ; SSE2-NEXT: movmskps %xmm0, %eax
4518 ; SSE2-NEXT: testb $14, %al
4519 ; SSE2-NEXT: setne %al
4522 ; AVX-LABEL: movmsk_v4f32:
4524 ; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
4525 ; AVX-NEXT: vmovmskps %xmm0, %eax
4526 ; AVX-NEXT: testb $14, %al
4527 ; AVX-NEXT: setne %al
4530 ; KNL-LABEL: movmsk_v4f32:
4532 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4533 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4534 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k0
4535 ; KNL-NEXT: kshiftrw $3, %k0, %k1
4536 ; KNL-NEXT: kmovw %k1, %ecx
4537 ; KNL-NEXT: kshiftrw $2, %k0, %k1
4538 ; KNL-NEXT: kmovw %k1, %eax
4539 ; KNL-NEXT: kshiftrw $1, %k0, %k0
4540 ; KNL-NEXT: kmovw %k0, %edx
4541 ; KNL-NEXT: orb %cl, %al
4542 ; KNL-NEXT: orb %dl, %al
4543 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4544 ; KNL-NEXT: vzeroupper
4547 ; SKX-LABEL: movmsk_v4f32:
4549 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
4550 ; SKX-NEXT: kshiftrb $3, %k0, %k1
4551 ; SKX-NEXT: kmovd %k1, %ecx
4552 ; SKX-NEXT: kshiftrb $2, %k0, %k1
4553 ; SKX-NEXT: kmovd %k1, %eax
4554 ; SKX-NEXT: kshiftrb $1, %k0, %k0
4555 ; SKX-NEXT: kmovd %k0, %edx
4556 ; SKX-NEXT: orb %cl, %al
4557 ; SKX-NEXT: orb %dl, %al
4558 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4560 %cmp = fcmp ueq <4 x float> %x, %y
4561 %e1 = extractelement <4 x i1> %cmp, i32 1
4562 %e2 = extractelement <4 x i1> %cmp, i32 2
4563 %e3 = extractelement <4 x i1> %cmp, i32 3
4564 %u1 = or i1 %e1, %e2
4565 %u2 = or i1 %u1, %e3
4569 define i1 @movmsk_v2f64(<2 x double> %x, <2 x double> %y) {
4570 ; SSE2-LABEL: movmsk_v2f64:
4572 ; SSE2-NEXT: cmplepd %xmm0, %xmm1
4573 ; SSE2-NEXT: movmskpd %xmm1, %eax
4574 ; SSE2-NEXT: cmpb $3, %al
4575 ; SSE2-NEXT: sete %al
4578 ; AVX-LABEL: movmsk_v2f64:
4580 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4581 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4582 ; AVX-NEXT: cmpb $3, %al
4583 ; AVX-NEXT: sete %al
4586 ; KNL-LABEL: movmsk_v2f64:
4588 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4589 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4590 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
4591 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4592 ; KNL-NEXT: kmovw %k1, %ecx
4593 ; KNL-NEXT: kmovw %k0, %eax
4594 ; KNL-NEXT: andb %cl, %al
4595 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4596 ; KNL-NEXT: vzeroupper
4599 ; SKX-LABEL: movmsk_v2f64:
4601 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4602 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4603 ; SKX-NEXT: kmovd %k1, %ecx
4604 ; SKX-NEXT: kmovd %k0, %eax
4605 ; SKX-NEXT: andb %cl, %al
4606 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4608 %cmp = fcmp oge <2 x double> %x, %y
4609 %e1 = extractelement <2 x i1> %cmp, i32 0
4610 %e2 = extractelement <2 x i1> %cmp, i32 1
4611 %u1 = and i1 %e1, %e2
4615 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
4616 ; SSE2-LABEL: PR39665_c_ray:
4618 ; SSE2-NEXT: cmpltpd %xmm0, %xmm1
4619 ; SSE2-NEXT: movmskpd %xmm1, %eax
4620 ; SSE2-NEXT: cmpb $3, %al
4621 ; SSE2-NEXT: movl $42, %ecx
4622 ; SSE2-NEXT: movl $99, %eax
4623 ; SSE2-NEXT: cmovel %ecx, %eax
4626 ; AVX-LABEL: PR39665_c_ray:
4628 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4629 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4630 ; AVX-NEXT: cmpb $3, %al
4631 ; AVX-NEXT: movl $42, %ecx
4632 ; AVX-NEXT: movl $99, %eax
4633 ; AVX-NEXT: cmovel %ecx, %eax
4636 ; KNL-LABEL: PR39665_c_ray:
4638 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4639 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4640 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4641 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4642 ; KNL-NEXT: kmovw %k1, %eax
4643 ; KNL-NEXT: kmovw %k0, %ecx
4644 ; KNL-NEXT: testb $1, %al
4645 ; KNL-NEXT: movl $42, %eax
4646 ; KNL-NEXT: movl $99, %edx
4647 ; KNL-NEXT: cmovel %edx, %eax
4648 ; KNL-NEXT: testb $1, %cl
4649 ; KNL-NEXT: cmovel %edx, %eax
4650 ; KNL-NEXT: vzeroupper
4653 ; SKX-LABEL: PR39665_c_ray:
4655 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4656 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4657 ; SKX-NEXT: kmovd %k1, %eax
4658 ; SKX-NEXT: kmovd %k0, %ecx
4659 ; SKX-NEXT: testb $1, %al
4660 ; SKX-NEXT: movl $42, %eax
4661 ; SKX-NEXT: movl $99, %edx
4662 ; SKX-NEXT: cmovel %edx, %eax
4663 ; SKX-NEXT: testb $1, %cl
4664 ; SKX-NEXT: cmovel %edx, %eax
4666 %cmp = fcmp ogt <2 x double> %x, %y
4667 %e1 = extractelement <2 x i1> %cmp, i32 0
4668 %e2 = extractelement <2 x i1> %cmp, i32 1
4669 %u = and i1 %e1, %e2
4670 %r = select i1 %u, i32 42, i32 99