1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
14 define i1 @trunc_v2i64_v2i1(<2 x i64>) {
15 ; SSE-LABEL: trunc_v2i64_v2i1:
17 ; SSE-NEXT: psllq $63, %xmm0
18 ; SSE-NEXT: movmskpd %xmm0, %eax
19 ; SSE-NEXT: testb %al, %al
23 ; AVX-LABEL: trunc_v2i64_v2i1:
25 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
26 ; AVX-NEXT: vmovmskpd %xmm0, %eax
27 ; AVX-NEXT: testb %al, %al
31 ; AVX512F-LABEL: trunc_v2i64_v2i1:
33 ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
34 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
35 ; AVX512F-NEXT: kmovw %k0, %eax
36 ; AVX512F-NEXT: testb $3, %al
37 ; AVX512F-NEXT: setne %al
38 ; AVX512F-NEXT: vzeroupper
41 ; AVX512BW-LABEL: trunc_v2i64_v2i1:
43 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
44 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
45 ; AVX512BW-NEXT: kmovd %k0, %eax
46 ; AVX512BW-NEXT: testb $3, %al
47 ; AVX512BW-NEXT: setne %al
48 ; AVX512BW-NEXT: vzeroupper
51 ; AVX512VL-LABEL: trunc_v2i64_v2i1:
53 ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
54 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
55 ; AVX512VL-NEXT: kmovd %k0, %eax
56 ; AVX512VL-NEXT: testb $3, %al
57 ; AVX512VL-NEXT: setne %al
59 %a = trunc <2 x i64> %0 to <2 x i1>
60 %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
64 define i1 @trunc_v4i32_v4i1(<4 x i32>) {
65 ; SSE-LABEL: trunc_v4i32_v4i1:
67 ; SSE-NEXT: pslld $31, %xmm0
68 ; SSE-NEXT: movmskps %xmm0, %eax
69 ; SSE-NEXT: testb %al, %al
73 ; AVX-LABEL: trunc_v4i32_v4i1:
75 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
76 ; AVX-NEXT: vmovmskps %xmm0, %eax
77 ; AVX-NEXT: testb %al, %al
81 ; AVX512F-LABEL: trunc_v4i32_v4i1:
83 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
84 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
85 ; AVX512F-NEXT: kmovw %k0, %eax
86 ; AVX512F-NEXT: testb $15, %al
87 ; AVX512F-NEXT: setne %al
88 ; AVX512F-NEXT: vzeroupper
91 ; AVX512BW-LABEL: trunc_v4i32_v4i1:
93 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
94 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; AVX512BW-NEXT: kmovd %k0, %eax
96 ; AVX512BW-NEXT: testb $15, %al
97 ; AVX512BW-NEXT: setne %al
98 ; AVX512BW-NEXT: vzeroupper
101 ; AVX512VL-LABEL: trunc_v4i32_v4i1:
103 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
104 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
105 ; AVX512VL-NEXT: kmovd %k0, %eax
106 ; AVX512VL-NEXT: testb $15, %al
107 ; AVX512VL-NEXT: setne %al
108 ; AVX512VL-NEXT: retq
109 %a = trunc <4 x i32> %0 to <4 x i1>
110 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
114 define i1 @trunc_v8i16_v8i1(<8 x i8>) {
115 ; SSE2-LABEL: trunc_v8i16_v8i1:
117 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
118 ; SSE2-NEXT: psllw $15, %xmm0
119 ; SSE2-NEXT: packsswb %xmm0, %xmm0
120 ; SSE2-NEXT: pmovmskb %xmm0, %eax
121 ; SSE2-NEXT: testb %al, %al
122 ; SSE2-NEXT: setne %al
125 ; SSE41-LABEL: trunc_v8i16_v8i1:
127 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
128 ; SSE41-NEXT: psllw $15, %xmm0
129 ; SSE41-NEXT: packsswb %xmm0, %xmm0
130 ; SSE41-NEXT: pmovmskb %xmm0, %eax
131 ; SSE41-NEXT: testb %al, %al
132 ; SSE41-NEXT: setne %al
135 ; AVX-LABEL: trunc_v8i16_v8i1:
137 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
138 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
139 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
140 ; AVX-NEXT: vpmovmskb %xmm0, %eax
141 ; AVX-NEXT: testb %al, %al
142 ; AVX-NEXT: setne %al
145 ; AVX512F-LABEL: trunc_v8i16_v8i1:
147 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
148 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
149 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
150 ; AVX512F-NEXT: kmovw %k0, %eax
151 ; AVX512F-NEXT: testb %al, %al
152 ; AVX512F-NEXT: setne %al
153 ; AVX512F-NEXT: vzeroupper
156 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
158 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
159 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
160 ; AVX512BW-NEXT: kmovd %k0, %eax
161 ; AVX512BW-NEXT: testb %al, %al
162 ; AVX512BW-NEXT: setne %al
163 ; AVX512BW-NEXT: vzeroupper
164 ; AVX512BW-NEXT: retq
166 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
168 ; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0
169 ; AVX512VL-NEXT: vpmovb2m %xmm0, %k0
170 ; AVX512VL-NEXT: kmovd %k0, %eax
171 ; AVX512VL-NEXT: testb %al, %al
172 ; AVX512VL-NEXT: setne %al
173 ; AVX512VL-NEXT: retq
174 %a = trunc <8 x i8> %0 to <8 x i1>
175 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
179 define i1 @trunc_v16i8_v16i1(<16 x i8>) {
180 ; SSE-LABEL: trunc_v16i8_v16i1:
182 ; SSE-NEXT: psllw $7, %xmm0
183 ; SSE-NEXT: pmovmskb %xmm0, %eax
184 ; SSE-NEXT: testw %ax, %ax
185 ; SSE-NEXT: setne %al
188 ; AVX-LABEL: trunc_v16i8_v16i1:
190 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
191 ; AVX-NEXT: vpmovmskb %xmm0, %eax
192 ; AVX-NEXT: testw %ax, %ax
193 ; AVX-NEXT: setne %al
196 ; AVX512-LABEL: trunc_v16i8_v16i1:
198 ; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
199 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
200 ; AVX512-NEXT: testw %ax, %ax
201 ; AVX512-NEXT: setne %al
203 %a = trunc <16 x i8> %0 to <16 x i1>
204 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
208 define i1 @trunc_v4i64_v4i1(<4 x i64>) {
209 ; SSE-LABEL: trunc_v4i64_v4i1:
211 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
212 ; SSE-NEXT: pslld $31, %xmm0
213 ; SSE-NEXT: movmskps %xmm0, %eax
214 ; SSE-NEXT: testb %al, %al
215 ; SSE-NEXT: setne %al
218 ; AVX-LABEL: trunc_v4i64_v4i1:
220 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
221 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
222 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
223 ; AVX-NEXT: vmovmskps %xmm0, %eax
224 ; AVX-NEXT: testb %al, %al
225 ; AVX-NEXT: setne %al
226 ; AVX-NEXT: vzeroupper
229 ; AVX512F-LABEL: trunc_v4i64_v4i1:
231 ; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0
232 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
233 ; AVX512F-NEXT: kmovw %k0, %eax
234 ; AVX512F-NEXT: testb $15, %al
235 ; AVX512F-NEXT: setne %al
236 ; AVX512F-NEXT: vzeroupper
239 ; AVX512BW-LABEL: trunc_v4i64_v4i1:
241 ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0
242 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
243 ; AVX512BW-NEXT: kmovd %k0, %eax
244 ; AVX512BW-NEXT: testb $15, %al
245 ; AVX512BW-NEXT: setne %al
246 ; AVX512BW-NEXT: vzeroupper
247 ; AVX512BW-NEXT: retq
249 ; AVX512VL-LABEL: trunc_v4i64_v4i1:
251 ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
252 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
253 ; AVX512VL-NEXT: kmovd %k0, %eax
254 ; AVX512VL-NEXT: testb $15, %al
255 ; AVX512VL-NEXT: setne %al
256 ; AVX512VL-NEXT: vzeroupper
257 ; AVX512VL-NEXT: retq
258 %a = trunc <4 x i64> %0 to <4 x i1>
259 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
263 define i1 @trunc_v8i32_v8i1(<8 x i32>) {
264 ; SSE2-LABEL: trunc_v8i32_v8i1:
266 ; SSE2-NEXT: pslld $16, %xmm1
267 ; SSE2-NEXT: psrad $16, %xmm1
268 ; SSE2-NEXT: pslld $16, %xmm0
269 ; SSE2-NEXT: psrad $16, %xmm0
270 ; SSE2-NEXT: packssdw %xmm1, %xmm0
271 ; SSE2-NEXT: psllw $15, %xmm0
272 ; SSE2-NEXT: packsswb %xmm0, %xmm0
273 ; SSE2-NEXT: pmovmskb %xmm0, %eax
274 ; SSE2-NEXT: testb %al, %al
275 ; SSE2-NEXT: setne %al
278 ; SSE41-LABEL: trunc_v8i32_v8i1:
280 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
281 ; SSE41-NEXT: pshufb %xmm2, %xmm1
282 ; SSE41-NEXT: pshufb %xmm2, %xmm0
283 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
284 ; SSE41-NEXT: psllw $15, %xmm0
285 ; SSE41-NEXT: packsswb %xmm0, %xmm0
286 ; SSE41-NEXT: pmovmskb %xmm0, %eax
287 ; SSE41-NEXT: testb %al, %al
288 ; SSE41-NEXT: setne %al
291 ; AVX1-LABEL: trunc_v8i32_v8i1:
293 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
294 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
295 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
296 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
297 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
298 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
299 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
300 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
301 ; AVX1-NEXT: testb %al, %al
302 ; AVX1-NEXT: setne %al
303 ; AVX1-NEXT: vzeroupper
306 ; AVX2-LABEL: trunc_v8i32_v8i1:
308 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
309 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
310 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
311 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
312 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
313 ; AVX2-NEXT: testb %al, %al
314 ; AVX2-NEXT: setne %al
315 ; AVX2-NEXT: vzeroupper
318 ; AVX512F-LABEL: trunc_v8i32_v8i1:
320 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
321 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
322 ; AVX512F-NEXT: kmovw %k0, %eax
323 ; AVX512F-NEXT: testb %al, %al
324 ; AVX512F-NEXT: setne %al
325 ; AVX512F-NEXT: vzeroupper
328 ; AVX512BW-LABEL: trunc_v8i32_v8i1:
330 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
331 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
332 ; AVX512BW-NEXT: kmovd %k0, %eax
333 ; AVX512BW-NEXT: testb %al, %al
334 ; AVX512BW-NEXT: setne %al
335 ; AVX512BW-NEXT: vzeroupper
336 ; AVX512BW-NEXT: retq
338 ; AVX512VL-LABEL: trunc_v8i32_v8i1:
340 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
341 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
342 ; AVX512VL-NEXT: kmovd %k0, %eax
343 ; AVX512VL-NEXT: testb %al, %al
344 ; AVX512VL-NEXT: setne %al
345 ; AVX512VL-NEXT: vzeroupper
346 ; AVX512VL-NEXT: retq
347 %a = trunc <8 x i32> %0 to <8 x i1>
348 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
352 define i1 @trunc_v16i16_v16i1(<16 x i16>) {
353 ; SSE2-LABEL: trunc_v16i16_v16i1:
355 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
356 ; SSE2-NEXT: pand %xmm2, %xmm1
357 ; SSE2-NEXT: pand %xmm2, %xmm0
358 ; SSE2-NEXT: packuswb %xmm1, %xmm0
359 ; SSE2-NEXT: psllw $7, %xmm0
360 ; SSE2-NEXT: pmovmskb %xmm0, %eax
361 ; SSE2-NEXT: testw %ax, %ax
362 ; SSE2-NEXT: setne %al
365 ; SSE41-LABEL: trunc_v16i16_v16i1:
367 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
368 ; SSE41-NEXT: pshufb %xmm2, %xmm1
369 ; SSE41-NEXT: pshufb %xmm2, %xmm0
370 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
371 ; SSE41-NEXT: psllw $7, %xmm0
372 ; SSE41-NEXT: pmovmskb %xmm0, %eax
373 ; SSE41-NEXT: testw %ax, %ax
374 ; SSE41-NEXT: setne %al
377 ; AVX1-LABEL: trunc_v16i16_v16i1:
379 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
380 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
381 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
382 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
383 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
384 ; AVX1-NEXT: testw %ax, %ax
385 ; AVX1-NEXT: setne %al
386 ; AVX1-NEXT: vzeroupper
389 ; AVX2-LABEL: trunc_v16i16_v16i1:
391 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
392 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
393 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
394 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
395 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
396 ; AVX2-NEXT: testw %ax, %ax
397 ; AVX2-NEXT: setne %al
398 ; AVX2-NEXT: vzeroupper
401 ; AVX512F-LABEL: trunc_v16i16_v16i1:
403 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
404 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
405 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
406 ; AVX512F-NEXT: kortestw %k0, %k0
407 ; AVX512F-NEXT: setne %al
408 ; AVX512F-NEXT: vzeroupper
411 ; AVX512BW-LABEL: trunc_v16i16_v16i1:
413 ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0
414 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
415 ; AVX512BW-NEXT: kortestw %k0, %k0
416 ; AVX512BW-NEXT: setne %al
417 ; AVX512BW-NEXT: vzeroupper
418 ; AVX512BW-NEXT: retq
420 ; AVX512VL-LABEL: trunc_v16i16_v16i1:
422 ; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0
423 ; AVX512VL-NEXT: vpmovw2m %ymm0, %k0
424 ; AVX512VL-NEXT: kortestw %k0, %k0
425 ; AVX512VL-NEXT: setne %al
426 ; AVX512VL-NEXT: vzeroupper
427 ; AVX512VL-NEXT: retq
428 %a = trunc <16 x i16> %0 to <16 x i1>
429 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
433 define i1 @trunc_v32i8_v32i1(<32 x i8>) {
434 ; SSE-LABEL: trunc_v32i8_v32i1:
436 ; SSE-NEXT: por %xmm1, %xmm0
437 ; SSE-NEXT: psllw $7, %xmm0
438 ; SSE-NEXT: pmovmskb %xmm0, %eax
439 ; SSE-NEXT: testw %ax, %ax
440 ; SSE-NEXT: setne %al
443 ; AVX1-LABEL: trunc_v32i8_v32i1:
445 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
446 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
447 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
448 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
449 ; AVX1-NEXT: testw %ax, %ax
450 ; AVX1-NEXT: setne %al
451 ; AVX1-NEXT: vzeroupper
454 ; AVX2-LABEL: trunc_v32i8_v32i1:
456 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
457 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
458 ; AVX2-NEXT: testl %eax, %eax
459 ; AVX2-NEXT: setne %al
460 ; AVX2-NEXT: vzeroupper
463 ; AVX512F-LABEL: trunc_v32i8_v32i1:
465 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
466 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
467 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
468 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
469 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
470 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
471 ; AVX512F-NEXT: korw %k1, %k0, %k0
472 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
473 ; AVX512F-NEXT: korw %k1, %k0, %k0
474 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
475 ; AVX512F-NEXT: korw %k1, %k0, %k0
476 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
477 ; AVX512F-NEXT: korw %k1, %k0, %k0
478 ; AVX512F-NEXT: kmovw %k0, %eax
479 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
480 ; AVX512F-NEXT: vzeroupper
483 ; AVX512BW-LABEL: trunc_v32i8_v32i1:
485 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
486 ; AVX512BW-NEXT: vpmovmskb %ymm0, %eax
487 ; AVX512BW-NEXT: testl %eax, %eax
488 ; AVX512BW-NEXT: setne %al
489 ; AVX512BW-NEXT: vzeroupper
490 ; AVX512BW-NEXT: retq
492 ; AVX512VL-LABEL: trunc_v32i8_v32i1:
494 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
495 ; AVX512VL-NEXT: vpmovmskb %ymm0, %eax
496 ; AVX512VL-NEXT: testl %eax, %eax
497 ; AVX512VL-NEXT: setne %al
498 ; AVX512VL-NEXT: vzeroupper
499 ; AVX512VL-NEXT: retq
500 %a = trunc <32 x i8> %0 to <32 x i1>
501 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
505 define i1 @trunc_v8i64_v8i1(<8 x i64>) {
506 ; SSE2-LABEL: trunc_v8i64_v8i1:
508 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
509 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
510 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
511 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
512 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
513 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
514 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
515 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
516 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
517 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
518 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
519 ; SSE2-NEXT: psllw $15, %xmm2
520 ; SSE2-NEXT: packsswb %xmm0, %xmm2
521 ; SSE2-NEXT: pmovmskb %xmm2, %eax
522 ; SSE2-NEXT: testb %al, %al
523 ; SSE2-NEXT: setne %al
526 ; SSE41-LABEL: trunc_v8i64_v8i1:
528 ; SSE41-NEXT: pxor %xmm4, %xmm4
529 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
530 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
531 ; SSE41-NEXT: packusdw %xmm3, %xmm2
532 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
533 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
534 ; SSE41-NEXT: packusdw %xmm1, %xmm0
535 ; SSE41-NEXT: packusdw %xmm2, %xmm0
536 ; SSE41-NEXT: psllw $15, %xmm0
537 ; SSE41-NEXT: packsswb %xmm0, %xmm0
538 ; SSE41-NEXT: pmovmskb %xmm0, %eax
539 ; SSE41-NEXT: testb %al, %al
540 ; SSE41-NEXT: setne %al
543 ; AVX1-LABEL: trunc_v8i64_v8i1:
545 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
546 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
547 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
548 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
549 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
550 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
551 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
552 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
553 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
554 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
555 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
556 ; AVX1-NEXT: testb %al, %al
557 ; AVX1-NEXT: setne %al
558 ; AVX1-NEXT: vzeroupper
561 ; AVX2-LABEL: trunc_v8i64_v8i1:
563 ; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2
564 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
565 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
566 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
567 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
568 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
569 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
570 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
571 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
572 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
573 ; AVX2-NEXT: testb %al, %al
574 ; AVX2-NEXT: setne %al
575 ; AVX2-NEXT: vzeroupper
578 ; AVX512F-LABEL: trunc_v8i64_v8i1:
580 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
581 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
582 ; AVX512F-NEXT: kmovw %k0, %eax
583 ; AVX512F-NEXT: testb %al, %al
584 ; AVX512F-NEXT: setne %al
585 ; AVX512F-NEXT: vzeroupper
588 ; AVX512BW-LABEL: trunc_v8i64_v8i1:
590 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
591 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
592 ; AVX512BW-NEXT: kmovd %k0, %eax
593 ; AVX512BW-NEXT: testb %al, %al
594 ; AVX512BW-NEXT: setne %al
595 ; AVX512BW-NEXT: vzeroupper
596 ; AVX512BW-NEXT: retq
598 ; AVX512VL-LABEL: trunc_v8i64_v8i1:
600 ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
601 ; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
602 ; AVX512VL-NEXT: kmovd %k0, %eax
603 ; AVX512VL-NEXT: testb %al, %al
604 ; AVX512VL-NEXT: setne %al
605 ; AVX512VL-NEXT: vzeroupper
606 ; AVX512VL-NEXT: retq
607 %a = trunc <8 x i64> %0 to <8 x i1>
608 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
612 define i1 @trunc_v16i32_v16i1(<16 x i32>) {
613 ; SSE2-LABEL: trunc_v16i32_v16i1:
615 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
616 ; SSE2-NEXT: pand %xmm4, %xmm3
617 ; SSE2-NEXT: pand %xmm4, %xmm2
618 ; SSE2-NEXT: packuswb %xmm3, %xmm2
619 ; SSE2-NEXT: pand %xmm4, %xmm1
620 ; SSE2-NEXT: pand %xmm4, %xmm0
621 ; SSE2-NEXT: packuswb %xmm1, %xmm0
622 ; SSE2-NEXT: packuswb %xmm2, %xmm0
623 ; SSE2-NEXT: psllw $7, %xmm0
624 ; SSE2-NEXT: pmovmskb %xmm0, %eax
625 ; SSE2-NEXT: testw %ax, %ax
626 ; SSE2-NEXT: setne %al
629 ; SSE41-LABEL: trunc_v16i32_v16i1:
631 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
632 ; SSE41-NEXT: pand %xmm4, %xmm3
633 ; SSE41-NEXT: pand %xmm4, %xmm2
634 ; SSE41-NEXT: packusdw %xmm3, %xmm2
635 ; SSE41-NEXT: pand %xmm4, %xmm1
636 ; SSE41-NEXT: pand %xmm4, %xmm0
637 ; SSE41-NEXT: packusdw %xmm1, %xmm0
638 ; SSE41-NEXT: packuswb %xmm2, %xmm0
639 ; SSE41-NEXT: psllw $7, %xmm0
640 ; SSE41-NEXT: pmovmskb %xmm0, %eax
641 ; SSE41-NEXT: testw %ax, %ax
642 ; SSE41-NEXT: setne %al
645 ; AVX1-LABEL: trunc_v16i32_v16i1:
647 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
648 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
649 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
650 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
651 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
652 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
653 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
654 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
655 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
656 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
657 ; AVX1-NEXT: testw %ax, %ax
658 ; AVX1-NEXT: setne %al
659 ; AVX1-NEXT: vzeroupper
662 ; AVX2-LABEL: trunc_v16i32_v16i1:
664 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
665 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
666 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
667 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
668 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
669 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
670 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
671 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
672 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
673 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
674 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
675 ; AVX2-NEXT: testw %ax, %ax
676 ; AVX2-NEXT: setne %al
677 ; AVX2-NEXT: vzeroupper
680 ; AVX512-LABEL: trunc_v16i32_v16i1:
682 ; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
683 ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
684 ; AVX512-NEXT: kortestw %k0, %k0
685 ; AVX512-NEXT: setne %al
686 ; AVX512-NEXT: vzeroupper
688 %a = trunc <16 x i32> %0 to <16 x i1>
689 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
693 define i1 @trunc_v32i16_v32i1(<32 x i16>) {
694 ; SSE2-LABEL: trunc_v32i16_v32i1:
696 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
697 ; SSE2-NEXT: pand %xmm4, %xmm3
698 ; SSE2-NEXT: pand %xmm4, %xmm2
699 ; SSE2-NEXT: packuswb %xmm3, %xmm2
700 ; SSE2-NEXT: pand %xmm4, %xmm1
701 ; SSE2-NEXT: pand %xmm4, %xmm0
702 ; SSE2-NEXT: packuswb %xmm1, %xmm0
703 ; SSE2-NEXT: por %xmm2, %xmm0
704 ; SSE2-NEXT: psllw $7, %xmm0
705 ; SSE2-NEXT: pmovmskb %xmm0, %eax
706 ; SSE2-NEXT: testw %ax, %ax
707 ; SSE2-NEXT: setne %al
710 ; SSE41-LABEL: trunc_v32i16_v32i1:
712 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
713 ; SSE41-NEXT: pshufb %xmm4, %xmm3
714 ; SSE41-NEXT: pshufb %xmm4, %xmm2
715 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
716 ; SSE41-NEXT: pshufb %xmm4, %xmm1
717 ; SSE41-NEXT: pshufb %xmm4, %xmm0
718 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
719 ; SSE41-NEXT: por %xmm2, %xmm0
720 ; SSE41-NEXT: psllw $7, %xmm0
721 ; SSE41-NEXT: pmovmskb %xmm0, %eax
722 ; SSE41-NEXT: testw %ax, %ax
723 ; SSE41-NEXT: setne %al
726 ; AVX1-LABEL: trunc_v32i16_v32i1:
728 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
729 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
730 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
731 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
732 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
733 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
734 ; AVX1-NEXT: testw %ax, %ax
735 ; AVX1-NEXT: setne %al
736 ; AVX1-NEXT: vzeroupper
739 ; AVX2-LABEL: trunc_v32i16_v32i1:
741 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
742 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
743 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
744 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm2
745 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
746 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
747 ; AVX2-NEXT: vpackuswb %ymm0, %ymm2, %ymm0
748 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
749 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
750 ; AVX2-NEXT: testl %eax, %eax
751 ; AVX2-NEXT: setne %al
752 ; AVX2-NEXT: vzeroupper
755 ; AVX512F-LABEL: trunc_v32i16_v32i1:
757 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
758 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
759 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
760 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
761 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
762 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
763 ; AVX512F-NEXT: korw %k1, %k0, %k0
764 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
765 ; AVX512F-NEXT: korw %k1, %k0, %k0
766 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
767 ; AVX512F-NEXT: korw %k1, %k0, %k0
768 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
769 ; AVX512F-NEXT: korw %k1, %k0, %k0
770 ; AVX512F-NEXT: kmovw %k0, %eax
771 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
772 ; AVX512F-NEXT: vzeroupper
775 ; AVX512BW-LABEL: trunc_v32i16_v32i1:
777 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
778 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
779 ; AVX512BW-NEXT: kortestd %k0, %k0
780 ; AVX512BW-NEXT: setne %al
781 ; AVX512BW-NEXT: vzeroupper
782 ; AVX512BW-NEXT: retq
784 ; AVX512VL-LABEL: trunc_v32i16_v32i1:
786 ; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0
787 ; AVX512VL-NEXT: vpmovw2m %zmm0, %k0
788 ; AVX512VL-NEXT: kortestd %k0, %k0
789 ; AVX512VL-NEXT: setne %al
790 ; AVX512VL-NEXT: vzeroupper
791 ; AVX512VL-NEXT: retq
792 %a = trunc <32 x i16> %0 to <32 x i1>
793 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
797 define i1 @trunc_v64i8_v64i1(<64 x i8>) {
798 ; SSE-LABEL: trunc_v64i8_v64i1:
800 ; SSE-NEXT: por %xmm3, %xmm1
801 ; SSE-NEXT: por %xmm2, %xmm1
802 ; SSE-NEXT: por %xmm0, %xmm1
803 ; SSE-NEXT: psllw $7, %xmm1
804 ; SSE-NEXT: pmovmskb %xmm1, %eax
805 ; SSE-NEXT: testw %ax, %ax
806 ; SSE-NEXT: setne %al
809 ; AVX1-LABEL: trunc_v64i8_v64i1:
811 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
812 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
813 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
814 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
815 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
816 ; AVX1-NEXT: testw %ax, %ax
817 ; AVX1-NEXT: setne %al
818 ; AVX1-NEXT: vzeroupper
821 ; AVX2-LABEL: trunc_v64i8_v64i1:
823 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
824 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
825 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
826 ; AVX2-NEXT: testl %eax, %eax
827 ; AVX2-NEXT: setne %al
828 ; AVX2-NEXT: vzeroupper
831 ; AVX512F-LABEL: trunc_v64i8_v64i1:
833 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
834 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
835 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
836 ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2
837 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1
838 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
839 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
840 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
841 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
842 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
843 ; AVX512F-NEXT: korw %k1, %k0, %k0
844 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
845 ; AVX512F-NEXT: korw %k1, %k0, %k0
846 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
847 ; AVX512F-NEXT: korw %k1, %k0, %k0
848 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
849 ; AVX512F-NEXT: korw %k1, %k0, %k0
850 ; AVX512F-NEXT: kmovw %k0, %eax
851 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
852 ; AVX512F-NEXT: vzeroupper
855 ; AVX512BW-LABEL: trunc_v64i8_v64i1:
857 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
858 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
859 ; AVX512BW-NEXT: kortestq %k0, %k0
860 ; AVX512BW-NEXT: setne %al
861 ; AVX512BW-NEXT: vzeroupper
862 ; AVX512BW-NEXT: retq
864 ; AVX512VL-LABEL: trunc_v64i8_v64i1:
866 ; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0
867 ; AVX512VL-NEXT: vpmovb2m %zmm0, %k0
868 ; AVX512VL-NEXT: kortestq %k0, %k0
869 ; AVX512VL-NEXT: setne %al
870 ; AVX512VL-NEXT: vzeroupper
871 ; AVX512VL-NEXT: retq
872 %a = trunc <64 x i8> %0 to <64 x i1>
873 %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
881 define i1 @icmp_v2i64_v2i1(<2 x i64>) {
882 ; SSE2-LABEL: icmp_v2i64_v2i1:
884 ; SSE2-NEXT: pxor %xmm1, %xmm1
885 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
886 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
887 ; SSE2-NEXT: pand %xmm1, %xmm0
888 ; SSE2-NEXT: movmskpd %xmm0, %eax
889 ; SSE2-NEXT: testb %al, %al
890 ; SSE2-NEXT: setne %al
893 ; SSE41-LABEL: icmp_v2i64_v2i1:
895 ; SSE41-NEXT: pxor %xmm1, %xmm1
896 ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
897 ; SSE41-NEXT: movmskpd %xmm1, %eax
898 ; SSE41-NEXT: testb %al, %al
899 ; SSE41-NEXT: setne %al
902 ; AVX-LABEL: icmp_v2i64_v2i1:
904 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
905 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
906 ; AVX-NEXT: vmovmskpd %xmm0, %eax
907 ; AVX-NEXT: testb %al, %al
908 ; AVX-NEXT: setne %al
911 ; AVX512F-LABEL: icmp_v2i64_v2i1:
913 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
914 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
915 ; AVX512F-NEXT: kmovw %k0, %eax
916 ; AVX512F-NEXT: testb $3, %al
917 ; AVX512F-NEXT: setne %al
918 ; AVX512F-NEXT: vzeroupper
921 ; AVX512BW-LABEL: icmp_v2i64_v2i1:
923 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
924 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
925 ; AVX512BW-NEXT: kmovd %k0, %eax
926 ; AVX512BW-NEXT: testb $3, %al
927 ; AVX512BW-NEXT: setne %al
928 ; AVX512BW-NEXT: vzeroupper
929 ; AVX512BW-NEXT: retq
931 ; AVX512VL-LABEL: icmp_v2i64_v2i1:
933 ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
934 ; AVX512VL-NEXT: kmovd %k0, %eax
935 ; AVX512VL-NEXT: testb $3, %al
936 ; AVX512VL-NEXT: setne %al
937 ; AVX512VL-NEXT: retq
938 %a = icmp eq <2 x i64> %0, zeroinitializer
939 %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
943 define i1 @icmp_v4i32_v4i1(<4 x i32>) {
944 ; SSE-LABEL: icmp_v4i32_v4i1:
946 ; SSE-NEXT: pxor %xmm1, %xmm1
947 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1
948 ; SSE-NEXT: movmskps %xmm1, %eax
949 ; SSE-NEXT: testb %al, %al
950 ; SSE-NEXT: setne %al
953 ; AVX-LABEL: icmp_v4i32_v4i1:
955 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
956 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
957 ; AVX-NEXT: vmovmskps %xmm0, %eax
958 ; AVX-NEXT: testb %al, %al
959 ; AVX-NEXT: setne %al
962 ; AVX512F-LABEL: icmp_v4i32_v4i1:
964 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
965 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
966 ; AVX512F-NEXT: kmovw %k0, %eax
967 ; AVX512F-NEXT: testb $15, %al
968 ; AVX512F-NEXT: setne %al
969 ; AVX512F-NEXT: vzeroupper
972 ; AVX512BW-LABEL: icmp_v4i32_v4i1:
974 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
975 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
976 ; AVX512BW-NEXT: kmovd %k0, %eax
977 ; AVX512BW-NEXT: testb $15, %al
978 ; AVX512BW-NEXT: setne %al
979 ; AVX512BW-NEXT: vzeroupper
980 ; AVX512BW-NEXT: retq
982 ; AVX512VL-LABEL: icmp_v4i32_v4i1:
984 ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
985 ; AVX512VL-NEXT: kmovd %k0, %eax
986 ; AVX512VL-NEXT: testb $15, %al
987 ; AVX512VL-NEXT: setne %al
988 ; AVX512VL-NEXT: retq
989 %a = icmp eq <4 x i32> %0, zeroinitializer
990 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
994 define i1 @icmp_v8i16_v8i1(<8 x i8>) {
995 ; SSE2-LABEL: icmp_v8i16_v8i1:
997 ; SSE2-NEXT: pxor %xmm1, %xmm1
998 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
999 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1000 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1001 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1002 ; SSE2-NEXT: testb %al, %al
1003 ; SSE2-NEXT: setne %al
1006 ; SSE41-LABEL: icmp_v8i16_v8i1:
1008 ; SSE41-NEXT: pxor %xmm1, %xmm1
1009 ; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
1010 ; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
1011 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1012 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1013 ; SSE41-NEXT: testb %al, %al
1014 ; SSE41-NEXT: setne %al
1017 ; AVX-LABEL: icmp_v8i16_v8i1:
1019 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1020 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1021 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
1022 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1023 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1024 ; AVX-NEXT: testb %al, %al
1025 ; AVX-NEXT: setne %al
1028 ; AVX512F-LABEL: icmp_v8i16_v8i1:
1030 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1031 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1032 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1033 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1034 ; AVX512F-NEXT: kmovw %k0, %eax
1035 ; AVX512F-NEXT: testb %al, %al
1036 ; AVX512F-NEXT: setne %al
1037 ; AVX512F-NEXT: vzeroupper
1038 ; AVX512F-NEXT: retq
1040 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
1041 ; AVX512BW: # %bb.0:
1042 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1043 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1044 ; AVX512BW-NEXT: kmovd %k0, %eax
1045 ; AVX512BW-NEXT: testb %al, %al
1046 ; AVX512BW-NEXT: setne %al
1047 ; AVX512BW-NEXT: vzeroupper
1048 ; AVX512BW-NEXT: retq
1050 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
1051 ; AVX512VL: # %bb.0:
1052 ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
1053 ; AVX512VL-NEXT: kmovd %k0, %eax
1054 ; AVX512VL-NEXT: testb %al, %al
1055 ; AVX512VL-NEXT: setne %al
1056 ; AVX512VL-NEXT: retq
1057 %a = icmp eq <8 x i8> %0, zeroinitializer
1058 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1062 define i1 @icmp_v16i8_v16i1(<16 x i8>) {
1063 ; SSE-LABEL: icmp_v16i8_v16i1:
1065 ; SSE-NEXT: pxor %xmm1, %xmm1
1066 ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
1067 ; SSE-NEXT: pmovmskb %xmm1, %eax
1068 ; SSE-NEXT: testw %ax, %ax
1069 ; SSE-NEXT: setne %al
1072 ; AVX-LABEL: icmp_v16i8_v16i1:
1074 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1075 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1076 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1077 ; AVX-NEXT: testw %ax, %ax
1078 ; AVX-NEXT: setne %al
1081 ; AVX512F-LABEL: icmp_v16i8_v16i1:
1083 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1084 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1085 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
1086 ; AVX512F-NEXT: testw %ax, %ax
1087 ; AVX512F-NEXT: setne %al
1088 ; AVX512F-NEXT: retq
1090 ; AVX512BW-LABEL: icmp_v16i8_v16i1:
1091 ; AVX512BW: # %bb.0:
1092 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1093 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1094 ; AVX512BW-NEXT: kortestw %k0, %k0
1095 ; AVX512BW-NEXT: setne %al
1096 ; AVX512BW-NEXT: vzeroupper
1097 ; AVX512BW-NEXT: retq
1099 ; AVX512VL-LABEL: icmp_v16i8_v16i1:
1100 ; AVX512VL: # %bb.0:
1101 ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
1102 ; AVX512VL-NEXT: kortestw %k0, %k0
1103 ; AVX512VL-NEXT: setne %al
1104 ; AVX512VL-NEXT: retq
1105 %a = icmp eq <16 x i8> %0, zeroinitializer
1106 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1110 define i1 @icmp_v4i64_v4i1(<4 x i64>) {
1111 ; SSE2-LABEL: icmp_v4i64_v4i1:
1113 ; SSE2-NEXT: pxor %xmm2, %xmm2
1114 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
1115 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
1116 ; SSE2-NEXT: pand %xmm1, %xmm3
1117 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1118 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1119 ; SSE2-NEXT: pand %xmm0, %xmm1
1120 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1121 ; SSE2-NEXT: movmskps %xmm1, %eax
1122 ; SSE2-NEXT: testb %al, %al
1123 ; SSE2-NEXT: setne %al
1126 ; SSE41-LABEL: icmp_v4i64_v4i1:
1128 ; SSE41-NEXT: pxor %xmm2, %xmm2
1129 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm1
1130 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
1131 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1132 ; SSE41-NEXT: movmskps %xmm0, %eax
1133 ; SSE41-NEXT: testb %al, %al
1134 ; SSE41-NEXT: setne %al
1137 ; AVX1-LABEL: icmp_v4i64_v4i1:
1139 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1140 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1141 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
1142 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
1143 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1144 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1145 ; AVX1-NEXT: testb %al, %al
1146 ; AVX1-NEXT: setne %al
1147 ; AVX1-NEXT: vzeroupper
1150 ; AVX2-LABEL: icmp_v4i64_v4i1:
1152 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1153 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
1154 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1155 ; AVX2-NEXT: testb %al, %al
1156 ; AVX2-NEXT: setne %al
1157 ; AVX2-NEXT: vzeroupper
1160 ; AVX512F-LABEL: icmp_v4i64_v4i1:
1162 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1163 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1164 ; AVX512F-NEXT: kmovw %k0, %eax
1165 ; AVX512F-NEXT: testb $15, %al
1166 ; AVX512F-NEXT: setne %al
1167 ; AVX512F-NEXT: vzeroupper
1168 ; AVX512F-NEXT: retq
1170 ; AVX512BW-LABEL: icmp_v4i64_v4i1:
1171 ; AVX512BW: # %bb.0:
1172 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1173 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1174 ; AVX512BW-NEXT: kmovd %k0, %eax
1175 ; AVX512BW-NEXT: testb $15, %al
1176 ; AVX512BW-NEXT: setne %al
1177 ; AVX512BW-NEXT: vzeroupper
1178 ; AVX512BW-NEXT: retq
1180 ; AVX512VL-LABEL: icmp_v4i64_v4i1:
1181 ; AVX512VL: # %bb.0:
1182 ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
1183 ; AVX512VL-NEXT: kmovd %k0, %eax
1184 ; AVX512VL-NEXT: testb $15, %al
1185 ; AVX512VL-NEXT: setne %al
1186 ; AVX512VL-NEXT: vzeroupper
1187 ; AVX512VL-NEXT: retq
1188 %a = icmp eq <4 x i64> %0, zeroinitializer
1189 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
1193 define i1 @icmp_v8i32_v8i1(<8 x i32>) {
1194 ; SSE-LABEL: icmp_v8i32_v8i1:
1196 ; SSE-NEXT: pxor %xmm2, %xmm2
1197 ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
1198 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
1199 ; SSE-NEXT: packssdw %xmm1, %xmm0
1200 ; SSE-NEXT: packsswb %xmm0, %xmm0
1201 ; SSE-NEXT: pmovmskb %xmm0, %eax
1202 ; SSE-NEXT: testb %al, %al
1203 ; SSE-NEXT: setne %al
1206 ; AVX1-LABEL: icmp_v8i32_v8i1:
1208 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1209 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1210 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
1211 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1212 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1213 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1214 ; AVX1-NEXT: testb %al, %al
1215 ; AVX1-NEXT: setne %al
1216 ; AVX1-NEXT: vzeroupper
1219 ; AVX2-LABEL: icmp_v8i32_v8i1:
1221 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1222 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1223 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1224 ; AVX2-NEXT: testb %al, %al
1225 ; AVX2-NEXT: setne %al
1226 ; AVX2-NEXT: vzeroupper
1229 ; AVX512F-LABEL: icmp_v8i32_v8i1:
1231 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1232 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1233 ; AVX512F-NEXT: kmovw %k0, %eax
1234 ; AVX512F-NEXT: testb %al, %al
1235 ; AVX512F-NEXT: setne %al
1236 ; AVX512F-NEXT: vzeroupper
1237 ; AVX512F-NEXT: retq
1239 ; AVX512BW-LABEL: icmp_v8i32_v8i1:
1240 ; AVX512BW: # %bb.0:
1241 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1242 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1243 ; AVX512BW-NEXT: kmovd %k0, %eax
1244 ; AVX512BW-NEXT: testb %al, %al
1245 ; AVX512BW-NEXT: setne %al
1246 ; AVX512BW-NEXT: vzeroupper
1247 ; AVX512BW-NEXT: retq
1249 ; AVX512VL-LABEL: icmp_v8i32_v8i1:
1250 ; AVX512VL: # %bb.0:
1251 ; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
1252 ; AVX512VL-NEXT: kmovd %k0, %eax
1253 ; AVX512VL-NEXT: testb %al, %al
1254 ; AVX512VL-NEXT: setne %al
1255 ; AVX512VL-NEXT: vzeroupper
1256 ; AVX512VL-NEXT: retq
1257 %a = icmp eq <8 x i32> %0, zeroinitializer
1258 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1262 define i1 @icmp_v16i16_v16i1(<16 x i16>) {
1263 ; SSE-LABEL: icmp_v16i16_v16i1:
1265 ; SSE-NEXT: pxor %xmm2, %xmm2
1266 ; SSE-NEXT: pcmpeqw %xmm2, %xmm1
1267 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1268 ; SSE-NEXT: packsswb %xmm1, %xmm0
1269 ; SSE-NEXT: pmovmskb %xmm0, %eax
1270 ; SSE-NEXT: testw %ax, %ax
1271 ; SSE-NEXT: setne %al
1274 ; AVX1-LABEL: icmp_v16i16_v16i1:
1276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1277 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1278 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
1279 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1280 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1281 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1282 ; AVX1-NEXT: testw %ax, %ax
1283 ; AVX1-NEXT: setne %al
1284 ; AVX1-NEXT: vzeroupper
1287 ; AVX2-LABEL: icmp_v16i16_v16i1:
1289 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1290 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1291 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1292 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1293 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1294 ; AVX2-NEXT: testw %ax, %ax
1295 ; AVX2-NEXT: setne %al
1296 ; AVX2-NEXT: vzeroupper
1299 ; AVX512F-LABEL: icmp_v16i16_v16i1:
1301 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1302 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1303 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1304 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1305 ; AVX512F-NEXT: kortestw %k0, %k0
1306 ; AVX512F-NEXT: setne %al
1307 ; AVX512F-NEXT: vzeroupper
1308 ; AVX512F-NEXT: retq
1310 ; AVX512BW-LABEL: icmp_v16i16_v16i1:
1311 ; AVX512BW: # %bb.0:
1312 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1313 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1314 ; AVX512BW-NEXT: kortestw %k0, %k0
1315 ; AVX512BW-NEXT: setne %al
1316 ; AVX512BW-NEXT: vzeroupper
1317 ; AVX512BW-NEXT: retq
1319 ; AVX512VL-LABEL: icmp_v16i16_v16i1:
1320 ; AVX512VL: # %bb.0:
1321 ; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0
1322 ; AVX512VL-NEXT: kortestw %k0, %k0
1323 ; AVX512VL-NEXT: setne %al
1324 ; AVX512VL-NEXT: vzeroupper
1325 ; AVX512VL-NEXT: retq
1326 %a = icmp eq <16 x i16> %0, zeroinitializer
1327 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1331 define i1 @icmp_v32i8_v32i1(<32 x i8>) {
1332 ; SSE-LABEL: icmp_v32i8_v32i1:
1334 ; SSE-NEXT: pxor %xmm2, %xmm2
1335 ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
1336 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1337 ; SSE-NEXT: por %xmm1, %xmm0
1338 ; SSE-NEXT: pmovmskb %xmm0, %eax
1339 ; SSE-NEXT: testw %ax, %ax
1340 ; SSE-NEXT: setne %al
1343 ; AVX1-LABEL: icmp_v32i8_v32i1:
1345 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1346 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1347 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1348 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1349 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1350 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1351 ; AVX1-NEXT: testw %ax, %ax
1352 ; AVX1-NEXT: setne %al
1353 ; AVX1-NEXT: vzeroupper
1356 ; AVX2-LABEL: icmp_v32i8_v32i1:
1358 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1359 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1360 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1361 ; AVX2-NEXT: testl %eax, %eax
1362 ; AVX2-NEXT: setne %al
1363 ; AVX2-NEXT: vzeroupper
1366 ; AVX512F-LABEL: icmp_v32i8_v32i1:
1368 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1369 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1370 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1371 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
1372 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1373 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1374 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1375 ; AVX512F-NEXT: korw %k1, %k0, %k0
1376 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1377 ; AVX512F-NEXT: korw %k1, %k0, %k0
1378 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1379 ; AVX512F-NEXT: korw %k1, %k0, %k0
1380 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1381 ; AVX512F-NEXT: korw %k1, %k0, %k0
1382 ; AVX512F-NEXT: kmovw %k0, %eax
1383 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1384 ; AVX512F-NEXT: vzeroupper
1385 ; AVX512F-NEXT: retq
1387 ; AVX512BW-LABEL: icmp_v32i8_v32i1:
1388 ; AVX512BW: # %bb.0:
1389 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1390 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1391 ; AVX512BW-NEXT: kortestd %k0, %k0
1392 ; AVX512BW-NEXT: setne %al
1393 ; AVX512BW-NEXT: vzeroupper
1394 ; AVX512BW-NEXT: retq
1396 ; AVX512VL-LABEL: icmp_v32i8_v32i1:
1397 ; AVX512VL: # %bb.0:
1398 ; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0
1399 ; AVX512VL-NEXT: kortestd %k0, %k0
1400 ; AVX512VL-NEXT: setne %al
1401 ; AVX512VL-NEXT: vzeroupper
1402 ; AVX512VL-NEXT: retq
1403 %a = icmp eq <32 x i8> %0, zeroinitializer
1404 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
1408 define i1 @icmp_v8i64_v8i1(<8 x i64>) {
1409 ; SSE2-LABEL: icmp_v8i64_v8i1:
1411 ; SSE2-NEXT: pxor %xmm4, %xmm4
1412 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1413 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
1414 ; SSE2-NEXT: pand %xmm3, %xmm5
1415 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1416 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
1417 ; SSE2-NEXT: pand %xmm2, %xmm3
1418 ; SSE2-NEXT: packssdw %xmm5, %xmm3
1419 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1420 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
1421 ; SSE2-NEXT: pand %xmm1, %xmm2
1422 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1423 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1424 ; SSE2-NEXT: pand %xmm0, %xmm1
1425 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1426 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1427 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1428 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1429 ; SSE2-NEXT: testb %al, %al
1430 ; SSE2-NEXT: setne %al
1433 ; SSE41-LABEL: icmp_v8i64_v8i1:
1435 ; SSE41-NEXT: pxor %xmm4, %xmm4
1436 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm3
1437 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm2
1438 ; SSE41-NEXT: packssdw %xmm3, %xmm2
1439 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
1440 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
1441 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1442 ; SSE41-NEXT: packssdw %xmm2, %xmm0
1443 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1444 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1445 ; SSE41-NEXT: testb %al, %al
1446 ; SSE41-NEXT: setne %al
1449 ; AVX1-LABEL: icmp_v8i64_v8i1:
1451 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1452 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1453 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1454 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
1455 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1456 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1457 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1458 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1459 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1460 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1461 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1462 ; AVX1-NEXT: testb %al, %al
1463 ; AVX1-NEXT: setne %al
1464 ; AVX1-NEXT: vzeroupper
1467 ; AVX2-LABEL: icmp_v8i64_v8i1:
1469 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1470 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
1471 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
1472 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1473 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1474 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1475 ; AVX2-NEXT: testb %al, %al
1476 ; AVX2-NEXT: setne %al
1477 ; AVX2-NEXT: vzeroupper
1480 ; AVX512F-LABEL: icmp_v8i64_v8i1:
1482 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1483 ; AVX512F-NEXT: kmovw %k0, %eax
1484 ; AVX512F-NEXT: testb %al, %al
1485 ; AVX512F-NEXT: setne %al
1486 ; AVX512F-NEXT: vzeroupper
1487 ; AVX512F-NEXT: retq
1489 ; AVX512BW-LABEL: icmp_v8i64_v8i1:
1490 ; AVX512BW: # %bb.0:
1491 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1492 ; AVX512BW-NEXT: kmovd %k0, %eax
1493 ; AVX512BW-NEXT: testb %al, %al
1494 ; AVX512BW-NEXT: setne %al
1495 ; AVX512BW-NEXT: vzeroupper
1496 ; AVX512BW-NEXT: retq
1498 ; AVX512VL-LABEL: icmp_v8i64_v8i1:
1499 ; AVX512VL: # %bb.0:
1500 ; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1501 ; AVX512VL-NEXT: kmovd %k0, %eax
1502 ; AVX512VL-NEXT: testb %al, %al
1503 ; AVX512VL-NEXT: setne %al
1504 ; AVX512VL-NEXT: vzeroupper
1505 ; AVX512VL-NEXT: retq
1506 %a = icmp eq <8 x i64> %0, zeroinitializer
1507 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1511 define i1 @icmp_v16i32_v16i1(<16 x i32>) {
1512 ; SSE-LABEL: icmp_v16i32_v16i1:
1514 ; SSE-NEXT: pxor %xmm4, %xmm4
1515 ; SSE-NEXT: pcmpeqd %xmm4, %xmm3
1516 ; SSE-NEXT: pcmpeqd %xmm4, %xmm2
1517 ; SSE-NEXT: packssdw %xmm3, %xmm2
1518 ; SSE-NEXT: pcmpeqd %xmm4, %xmm1
1519 ; SSE-NEXT: pcmpeqd %xmm4, %xmm0
1520 ; SSE-NEXT: packssdw %xmm1, %xmm0
1521 ; SSE-NEXT: packsswb %xmm2, %xmm0
1522 ; SSE-NEXT: pmovmskb %xmm0, %eax
1523 ; SSE-NEXT: testw %ax, %ax
1524 ; SSE-NEXT: setne %al
1527 ; AVX1-LABEL: icmp_v16i32_v16i1:
1529 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1530 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1531 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1532 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
1533 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1534 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1535 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1536 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1537 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1538 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1539 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1540 ; AVX1-NEXT: testw %ax, %ax
1541 ; AVX1-NEXT: setne %al
1542 ; AVX1-NEXT: vzeroupper
1545 ; AVX2-LABEL: icmp_v16i32_v16i1:
1547 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1548 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
1549 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
1550 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1551 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1552 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1553 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1554 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1555 ; AVX2-NEXT: testw %ax, %ax
1556 ; AVX2-NEXT: setne %al
1557 ; AVX2-NEXT: vzeroupper
1560 ; AVX512-LABEL: icmp_v16i32_v16i1:
1562 ; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
1563 ; AVX512-NEXT: kortestw %k0, %k0
1564 ; AVX512-NEXT: setne %al
1565 ; AVX512-NEXT: vzeroupper
1567 %a = icmp eq <16 x i32> %0, zeroinitializer
1568 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1572 define i1 @icmp_v32i16_v32i1(<32 x i16>) {
1573 ; SSE-LABEL: icmp_v32i16_v32i1:
1575 ; SSE-NEXT: pxor %xmm4, %xmm4
1576 ; SSE-NEXT: pcmpeqw %xmm4, %xmm1
1577 ; SSE-NEXT: pcmpeqw %xmm4, %xmm0
1578 ; SSE-NEXT: packsswb %xmm1, %xmm0
1579 ; SSE-NEXT: pcmpeqw %xmm4, %xmm3
1580 ; SSE-NEXT: pcmpeqw %xmm4, %xmm2
1581 ; SSE-NEXT: packsswb %xmm3, %xmm2
1582 ; SSE-NEXT: por %xmm0, %xmm2
1583 ; SSE-NEXT: pmovmskb %xmm2, %eax
1584 ; SSE-NEXT: testw %ax, %ax
1585 ; SSE-NEXT: setne %al
1588 ; AVX1-LABEL: icmp_v32i16_v32i1:
1590 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1591 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1592 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
1593 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
1594 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
1595 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1596 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
1597 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1598 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1599 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1600 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1601 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1602 ; AVX1-NEXT: testw %ax, %ax
1603 ; AVX1-NEXT: setne %al
1604 ; AVX1-NEXT: vzeroupper
1607 ; AVX2-LABEL: icmp_v32i16_v32i1:
1609 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1610 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1611 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1612 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1613 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1614 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1615 ; AVX2-NEXT: testl %eax, %eax
1616 ; AVX2-NEXT: setne %al
1617 ; AVX2-NEXT: vzeroupper
1620 ; AVX512F-LABEL: icmp_v32i16_v32i1:
1622 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1623 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1624 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1625 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1626 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1627 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1628 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1629 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1630 ; AVX512F-NEXT: korw %k1, %k0, %k0
1631 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1632 ; AVX512F-NEXT: korw %k1, %k0, %k0
1633 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1634 ; AVX512F-NEXT: korw %k1, %k0, %k0
1635 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1636 ; AVX512F-NEXT: korw %k1, %k0, %k0
1637 ; AVX512F-NEXT: kmovw %k0, %eax
1638 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1639 ; AVX512F-NEXT: vzeroupper
1640 ; AVX512F-NEXT: retq
1642 ; AVX512BW-LABEL: icmp_v32i16_v32i1:
1643 ; AVX512BW: # %bb.0:
1644 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1645 ; AVX512BW-NEXT: kortestd %k0, %k0
1646 ; AVX512BW-NEXT: setne %al
1647 ; AVX512BW-NEXT: vzeroupper
1648 ; AVX512BW-NEXT: retq
1650 ; AVX512VL-LABEL: icmp_v32i16_v32i1:
1651 ; AVX512VL: # %bb.0:
1652 ; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0
1653 ; AVX512VL-NEXT: kortestd %k0, %k0
1654 ; AVX512VL-NEXT: setne %al
1655 ; AVX512VL-NEXT: vzeroupper
1656 ; AVX512VL-NEXT: retq
1657 %a = icmp eq <32 x i16> %0, zeroinitializer
1658 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
1662 define i1 @icmp_v64i8_v64i1(<64 x i8>) {
1663 ; SSE-LABEL: icmp_v64i8_v64i1:
1665 ; SSE-NEXT: pxor %xmm4, %xmm4
1666 ; SSE-NEXT: pcmpeqb %xmm4, %xmm2
1667 ; SSE-NEXT: pcmpeqb %xmm4, %xmm0
1668 ; SSE-NEXT: pcmpeqb %xmm4, %xmm3
1669 ; SSE-NEXT: pcmpeqb %xmm4, %xmm1
1670 ; SSE-NEXT: por %xmm3, %xmm1
1671 ; SSE-NEXT: por %xmm2, %xmm1
1672 ; SSE-NEXT: por %xmm0, %xmm1
1673 ; SSE-NEXT: psllw $7, %xmm1
1674 ; SSE-NEXT: pmovmskb %xmm1, %eax
1675 ; SSE-NEXT: testw %ax, %ax
1676 ; SSE-NEXT: setne %al
1679 ; AVX1-LABEL: icmp_v64i8_v64i1:
1681 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1682 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3
1683 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4
1684 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1685 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1686 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1687 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1688 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1689 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1690 ; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
1691 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1692 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1693 ; AVX1-NEXT: testw %ax, %ax
1694 ; AVX1-NEXT: setne %al
1695 ; AVX1-NEXT: vzeroupper
1698 ; AVX2-LABEL: icmp_v64i8_v64i1:
1700 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1701 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1702 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1703 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1704 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1705 ; AVX2-NEXT: testl %eax, %eax
1706 ; AVX2-NEXT: setne %al
1707 ; AVX2-NEXT: vzeroupper
1710 ; AVX512F-LABEL: icmp_v64i8_v64i1:
1712 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1713 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1714 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1715 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1716 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
1717 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1718 ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2
1719 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1
1720 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
1721 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1722 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1723 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1724 ; AVX512F-NEXT: korw %k1, %k0, %k0
1725 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1726 ; AVX512F-NEXT: korw %k1, %k0, %k0
1727 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1728 ; AVX512F-NEXT: korw %k1, %k0, %k0
1729 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1730 ; AVX512F-NEXT: korw %k1, %k0, %k0
1731 ; AVX512F-NEXT: kmovw %k0, %eax
1732 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1733 ; AVX512F-NEXT: vzeroupper
1734 ; AVX512F-NEXT: retq
1736 ; AVX512BW-LABEL: icmp_v64i8_v64i1:
1737 ; AVX512BW: # %bb.0:
1738 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1739 ; AVX512BW-NEXT: kortestq %k0, %k0
1740 ; AVX512BW-NEXT: setne %al
1741 ; AVX512BW-NEXT: vzeroupper
1742 ; AVX512BW-NEXT: retq
1744 ; AVX512VL-LABEL: icmp_v64i8_v64i1:
1745 ; AVX512VL: # %bb.0:
1746 ; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0
1747 ; AVX512VL-NEXT: kortestq %k0, %k0
1748 ; AVX512VL-NEXT: setne %al
1749 ; AVX512VL-NEXT: vzeroupper
1750 ; AVX512VL-NEXT: retq
1751 %a = icmp eq <64 x i8> %0, zeroinitializer
1752 %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
1756 declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>)
1757 declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>)
1758 declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
1759 declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>)
1760 declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>)
1761 declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>)