1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
14 define i1 @trunc_v2i64_v2i1(<2 x i64>) {
15 ; SSE-LABEL: trunc_v2i64_v2i1:
17 ; SSE-NEXT: psllq $63, %xmm0
18 ; SSE-NEXT: movmskpd %xmm0, %eax
19 ; SSE-NEXT: testb %al, %al
23 ; AVX-LABEL: trunc_v2i64_v2i1:
25 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
26 ; AVX-NEXT: vmovmskpd %xmm0, %eax
27 ; AVX-NEXT: testb %al, %al
31 ; AVX512F-LABEL: trunc_v2i64_v2i1:
33 ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
34 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
35 ; AVX512F-NEXT: kmovw %k0, %eax
36 ; AVX512F-NEXT: testb $3, %al
37 ; AVX512F-NEXT: setne %al
38 ; AVX512F-NEXT: vzeroupper
41 ; AVX512BW-LABEL: trunc_v2i64_v2i1:
43 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
44 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
45 ; AVX512BW-NEXT: kmovd %k0, %eax
46 ; AVX512BW-NEXT: testb $3, %al
47 ; AVX512BW-NEXT: setne %al
48 ; AVX512BW-NEXT: vzeroupper
51 ; AVX512VL-LABEL: trunc_v2i64_v2i1:
53 ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
54 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
55 ; AVX512VL-NEXT: kmovd %k0, %eax
56 ; AVX512VL-NEXT: testb $3, %al
57 ; AVX512VL-NEXT: setne %al
59 %a = trunc <2 x i64> %0 to <2 x i1>
60 %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
64 define i1 @trunc_v4i32_v4i1(<4 x i32>) {
65 ; SSE-LABEL: trunc_v4i32_v4i1:
67 ; SSE-NEXT: pslld $31, %xmm0
68 ; SSE-NEXT: movmskps %xmm0, %eax
69 ; SSE-NEXT: testb %al, %al
73 ; AVX-LABEL: trunc_v4i32_v4i1:
75 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
76 ; AVX-NEXT: vmovmskps %xmm0, %eax
77 ; AVX-NEXT: testb %al, %al
81 ; AVX512F-LABEL: trunc_v4i32_v4i1:
83 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
84 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
85 ; AVX512F-NEXT: kmovw %k0, %eax
86 ; AVX512F-NEXT: testb $15, %al
87 ; AVX512F-NEXT: setne %al
88 ; AVX512F-NEXT: vzeroupper
91 ; AVX512BW-LABEL: trunc_v4i32_v4i1:
93 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
94 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; AVX512BW-NEXT: kmovd %k0, %eax
96 ; AVX512BW-NEXT: testb $15, %al
97 ; AVX512BW-NEXT: setne %al
98 ; AVX512BW-NEXT: vzeroupper
101 ; AVX512VL-LABEL: trunc_v4i32_v4i1:
103 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
104 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
105 ; AVX512VL-NEXT: kmovd %k0, %eax
106 ; AVX512VL-NEXT: testb $15, %al
107 ; AVX512VL-NEXT: setne %al
108 ; AVX512VL-NEXT: retq
109 %a = trunc <4 x i32> %0 to <4 x i1>
110 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
114 define i1 @trunc_v8i16_v8i1(<8 x i8>) {
115 ; SSE-LABEL: trunc_v8i16_v8i1:
117 ; SSE-NEXT: psllw $15, %xmm0
118 ; SSE-NEXT: packsswb %xmm0, %xmm0
119 ; SSE-NEXT: pmovmskb %xmm0, %eax
120 ; SSE-NEXT: testb %al, %al
121 ; SSE-NEXT: setne %al
124 ; AVX-LABEL: trunc_v8i16_v8i1:
126 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
127 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
128 ; AVX-NEXT: vpmovmskb %xmm0, %eax
129 ; AVX-NEXT: testb %al, %al
130 ; AVX-NEXT: setne %al
133 ; AVX512F-LABEL: trunc_v8i16_v8i1:
135 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
136 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
137 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
138 ; AVX512F-NEXT: kmovw %k0, %eax
139 ; AVX512F-NEXT: testb %al, %al
140 ; AVX512F-NEXT: setne %al
141 ; AVX512F-NEXT: vzeroupper
144 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
146 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
147 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
148 ; AVX512BW-NEXT: kmovd %k0, %eax
149 ; AVX512BW-NEXT: testb %al, %al
150 ; AVX512BW-NEXT: setne %al
151 ; AVX512BW-NEXT: vzeroupper
152 ; AVX512BW-NEXT: retq
154 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
156 ; AVX512VL-NEXT: vpsllw $15, %xmm0, %xmm0
157 ; AVX512VL-NEXT: vpmovw2m %xmm0, %k0
158 ; AVX512VL-NEXT: kmovd %k0, %eax
159 ; AVX512VL-NEXT: testb %al, %al
160 ; AVX512VL-NEXT: setne %al
161 ; AVX512VL-NEXT: retq
162 %a = trunc <8 x i8> %0 to <8 x i1>
163 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
167 define i1 @trunc_v16i8_v16i1(<16 x i8>) {
168 ; SSE-LABEL: trunc_v16i8_v16i1:
170 ; SSE-NEXT: psllw $7, %xmm0
171 ; SSE-NEXT: pmovmskb %xmm0, %eax
172 ; SSE-NEXT: testw %ax, %ax
173 ; SSE-NEXT: setne %al
176 ; AVX-LABEL: trunc_v16i8_v16i1:
178 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
179 ; AVX-NEXT: vpmovmskb %xmm0, %eax
180 ; AVX-NEXT: testw %ax, %ax
181 ; AVX-NEXT: setne %al
184 ; AVX512-LABEL: trunc_v16i8_v16i1:
186 ; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
187 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
188 ; AVX512-NEXT: testw %ax, %ax
189 ; AVX512-NEXT: setne %al
191 %a = trunc <16 x i8> %0 to <16 x i1>
192 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
196 define i1 @trunc_v4i64_v4i1(<4 x i64>) {
197 ; SSE-LABEL: trunc_v4i64_v4i1:
199 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
200 ; SSE-NEXT: pslld $31, %xmm0
201 ; SSE-NEXT: movmskps %xmm0, %eax
202 ; SSE-NEXT: testb %al, %al
203 ; SSE-NEXT: setne %al
206 ; AVX-LABEL: trunc_v4i64_v4i1:
208 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
209 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
210 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
211 ; AVX-NEXT: vmovmskps %xmm0, %eax
212 ; AVX-NEXT: testb %al, %al
213 ; AVX-NEXT: setne %al
214 ; AVX-NEXT: vzeroupper
217 ; AVX512F-LABEL: trunc_v4i64_v4i1:
219 ; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0
220 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
221 ; AVX512F-NEXT: kmovw %k0, %eax
222 ; AVX512F-NEXT: testb $15, %al
223 ; AVX512F-NEXT: setne %al
224 ; AVX512F-NEXT: vzeroupper
227 ; AVX512BW-LABEL: trunc_v4i64_v4i1:
229 ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0
230 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
231 ; AVX512BW-NEXT: kmovd %k0, %eax
232 ; AVX512BW-NEXT: testb $15, %al
233 ; AVX512BW-NEXT: setne %al
234 ; AVX512BW-NEXT: vzeroupper
235 ; AVX512BW-NEXT: retq
237 ; AVX512VL-LABEL: trunc_v4i64_v4i1:
239 ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
240 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
241 ; AVX512VL-NEXT: kmovd %k0, %eax
242 ; AVX512VL-NEXT: testb $15, %al
243 ; AVX512VL-NEXT: setne %al
244 ; AVX512VL-NEXT: vzeroupper
245 ; AVX512VL-NEXT: retq
246 %a = trunc <4 x i64> %0 to <4 x i1>
247 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
251 define i1 @trunc_v8i32_v8i1(<8 x i32>) {
252 ; SSE2-LABEL: trunc_v8i32_v8i1:
254 ; SSE2-NEXT: pslld $16, %xmm1
255 ; SSE2-NEXT: psrad $16, %xmm1
256 ; SSE2-NEXT: pslld $16, %xmm0
257 ; SSE2-NEXT: psrad $16, %xmm0
258 ; SSE2-NEXT: packssdw %xmm1, %xmm0
259 ; SSE2-NEXT: psllw $15, %xmm0
260 ; SSE2-NEXT: packsswb %xmm0, %xmm0
261 ; SSE2-NEXT: pmovmskb %xmm0, %eax
262 ; SSE2-NEXT: testb %al, %al
263 ; SSE2-NEXT: setne %al
266 ; SSE41-LABEL: trunc_v8i32_v8i1:
268 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
269 ; SSE41-NEXT: pshufb %xmm2, %xmm1
270 ; SSE41-NEXT: pshufb %xmm2, %xmm0
271 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
272 ; SSE41-NEXT: psllw $15, %xmm0
273 ; SSE41-NEXT: packsswb %xmm0, %xmm0
274 ; SSE41-NEXT: pmovmskb %xmm0, %eax
275 ; SSE41-NEXT: testb %al, %al
276 ; SSE41-NEXT: setne %al
279 ; AVX1-LABEL: trunc_v8i32_v8i1:
281 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
282 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
283 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
284 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
285 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
286 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
287 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
288 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
289 ; AVX1-NEXT: testb %al, %al
290 ; AVX1-NEXT: setne %al
291 ; AVX1-NEXT: vzeroupper
294 ; AVX2-LABEL: trunc_v8i32_v8i1:
296 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
297 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
298 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
299 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
300 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
301 ; AVX2-NEXT: testb %al, %al
302 ; AVX2-NEXT: setne %al
303 ; AVX2-NEXT: vzeroupper
306 ; AVX512F-LABEL: trunc_v8i32_v8i1:
308 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
309 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
310 ; AVX512F-NEXT: kmovw %k0, %eax
311 ; AVX512F-NEXT: testb %al, %al
312 ; AVX512F-NEXT: setne %al
313 ; AVX512F-NEXT: vzeroupper
316 ; AVX512BW-LABEL: trunc_v8i32_v8i1:
318 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
319 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
320 ; AVX512BW-NEXT: kmovd %k0, %eax
321 ; AVX512BW-NEXT: testb %al, %al
322 ; AVX512BW-NEXT: setne %al
323 ; AVX512BW-NEXT: vzeroupper
324 ; AVX512BW-NEXT: retq
326 ; AVX512VL-LABEL: trunc_v8i32_v8i1:
328 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
329 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
330 ; AVX512VL-NEXT: kmovd %k0, %eax
331 ; AVX512VL-NEXT: testb %al, %al
332 ; AVX512VL-NEXT: setne %al
333 ; AVX512VL-NEXT: vzeroupper
334 ; AVX512VL-NEXT: retq
335 %a = trunc <8 x i32> %0 to <8 x i1>
336 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
340 define i1 @trunc_v16i16_v16i1(<16 x i16>) {
341 ; SSE2-LABEL: trunc_v16i16_v16i1:
343 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
344 ; SSE2-NEXT: pand %xmm2, %xmm1
345 ; SSE2-NEXT: pand %xmm2, %xmm0
346 ; SSE2-NEXT: packuswb %xmm1, %xmm0
347 ; SSE2-NEXT: psllw $7, %xmm0
348 ; SSE2-NEXT: pmovmskb %xmm0, %eax
349 ; SSE2-NEXT: testw %ax, %ax
350 ; SSE2-NEXT: setne %al
353 ; SSE41-LABEL: trunc_v16i16_v16i1:
355 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
356 ; SSE41-NEXT: pshufb %xmm2, %xmm1
357 ; SSE41-NEXT: pshufb %xmm2, %xmm0
358 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
359 ; SSE41-NEXT: psllw $7, %xmm0
360 ; SSE41-NEXT: pmovmskb %xmm0, %eax
361 ; SSE41-NEXT: testw %ax, %ax
362 ; SSE41-NEXT: setne %al
365 ; AVX1-LABEL: trunc_v16i16_v16i1:
367 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
368 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
369 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
370 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
371 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
372 ; AVX1-NEXT: testw %ax, %ax
373 ; AVX1-NEXT: setne %al
374 ; AVX1-NEXT: vzeroupper
377 ; AVX2-LABEL: trunc_v16i16_v16i1:
379 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
380 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
381 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
382 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
383 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
384 ; AVX2-NEXT: testw %ax, %ax
385 ; AVX2-NEXT: setne %al
386 ; AVX2-NEXT: vzeroupper
389 ; AVX512F-LABEL: trunc_v16i16_v16i1:
391 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
392 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
393 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
394 ; AVX512F-NEXT: kortestw %k0, %k0
395 ; AVX512F-NEXT: setne %al
396 ; AVX512F-NEXT: vzeroupper
399 ; AVX512BW-LABEL: trunc_v16i16_v16i1:
401 ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0
402 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
403 ; AVX512BW-NEXT: kortestw %k0, %k0
404 ; AVX512BW-NEXT: setne %al
405 ; AVX512BW-NEXT: vzeroupper
406 ; AVX512BW-NEXT: retq
408 ; AVX512VL-LABEL: trunc_v16i16_v16i1:
410 ; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0
411 ; AVX512VL-NEXT: vpmovw2m %ymm0, %k0
412 ; AVX512VL-NEXT: kortestw %k0, %k0
413 ; AVX512VL-NEXT: setne %al
414 ; AVX512VL-NEXT: vzeroupper
415 ; AVX512VL-NEXT: retq
416 %a = trunc <16 x i16> %0 to <16 x i1>
417 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
421 define i1 @trunc_v32i8_v32i1(<32 x i8>) {
422 ; SSE-LABEL: trunc_v32i8_v32i1:
424 ; SSE-NEXT: por %xmm1, %xmm0
425 ; SSE-NEXT: psllw $7, %xmm0
426 ; SSE-NEXT: pmovmskb %xmm0, %eax
427 ; SSE-NEXT: testw %ax, %ax
428 ; SSE-NEXT: setne %al
431 ; AVX1-LABEL: trunc_v32i8_v32i1:
433 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
434 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
435 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
436 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
437 ; AVX1-NEXT: testw %ax, %ax
438 ; AVX1-NEXT: setne %al
439 ; AVX1-NEXT: vzeroupper
442 ; AVX2-LABEL: trunc_v32i8_v32i1:
444 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
445 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
446 ; AVX2-NEXT: testl %eax, %eax
447 ; AVX2-NEXT: setne %al
448 ; AVX2-NEXT: vzeroupper
451 ; AVX512F-LABEL: trunc_v32i8_v32i1:
453 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
454 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
455 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
456 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
457 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
458 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
459 ; AVX512F-NEXT: korw %k1, %k0, %k0
460 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
461 ; AVX512F-NEXT: korw %k1, %k0, %k0
462 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
463 ; AVX512F-NEXT: korw %k1, %k0, %k0
464 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
465 ; AVX512F-NEXT: korw %k1, %k0, %k0
466 ; AVX512F-NEXT: kmovw %k0, %eax
467 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
468 ; AVX512F-NEXT: vzeroupper
471 ; AVX512BW-LABEL: trunc_v32i8_v32i1:
473 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
474 ; AVX512BW-NEXT: vpmovmskb %ymm0, %eax
475 ; AVX512BW-NEXT: testl %eax, %eax
476 ; AVX512BW-NEXT: setne %al
477 ; AVX512BW-NEXT: vzeroupper
478 ; AVX512BW-NEXT: retq
480 ; AVX512VL-LABEL: trunc_v32i8_v32i1:
482 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
483 ; AVX512VL-NEXT: vpmovmskb %ymm0, %eax
484 ; AVX512VL-NEXT: testl %eax, %eax
485 ; AVX512VL-NEXT: setne %al
486 ; AVX512VL-NEXT: vzeroupper
487 ; AVX512VL-NEXT: retq
488 %a = trunc <32 x i8> %0 to <32 x i1>
489 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
493 define i1 @trunc_v8i64_v8i1(<8 x i64>) {
494 ; SSE2-LABEL: trunc_v8i64_v8i1:
496 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
497 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
498 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
499 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
500 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
501 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
502 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
503 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
505 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
506 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
507 ; SSE2-NEXT: psllw $15, %xmm2
508 ; SSE2-NEXT: packsswb %xmm0, %xmm2
509 ; SSE2-NEXT: pmovmskb %xmm2, %eax
510 ; SSE2-NEXT: testb %al, %al
511 ; SSE2-NEXT: setne %al
514 ; SSE41-LABEL: trunc_v8i64_v8i1:
516 ; SSE41-NEXT: pxor %xmm4, %xmm4
517 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
518 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
519 ; SSE41-NEXT: packusdw %xmm3, %xmm2
520 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
521 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
522 ; SSE41-NEXT: packusdw %xmm1, %xmm0
523 ; SSE41-NEXT: packusdw %xmm2, %xmm0
524 ; SSE41-NEXT: psllw $15, %xmm0
525 ; SSE41-NEXT: packsswb %xmm0, %xmm0
526 ; SSE41-NEXT: pmovmskb %xmm0, %eax
527 ; SSE41-NEXT: testb %al, %al
528 ; SSE41-NEXT: setne %al
531 ; AVX1-LABEL: trunc_v8i64_v8i1:
533 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
534 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
535 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
536 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
537 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
538 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
539 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
540 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
541 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
542 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
543 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
544 ; AVX1-NEXT: testb %al, %al
545 ; AVX1-NEXT: setne %al
546 ; AVX1-NEXT: vzeroupper
549 ; AVX2-LABEL: trunc_v8i64_v8i1:
551 ; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2
552 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
553 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
554 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
555 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
556 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
557 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
558 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
559 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
560 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
561 ; AVX2-NEXT: testb %al, %al
562 ; AVX2-NEXT: setne %al
563 ; AVX2-NEXT: vzeroupper
566 ; AVX512F-LABEL: trunc_v8i64_v8i1:
568 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
569 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
570 ; AVX512F-NEXT: kmovw %k0, %eax
571 ; AVX512F-NEXT: testb %al, %al
572 ; AVX512F-NEXT: setne %al
573 ; AVX512F-NEXT: vzeroupper
576 ; AVX512BW-LABEL: trunc_v8i64_v8i1:
578 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
579 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
580 ; AVX512BW-NEXT: kmovd %k0, %eax
581 ; AVX512BW-NEXT: testb %al, %al
582 ; AVX512BW-NEXT: setne %al
583 ; AVX512BW-NEXT: vzeroupper
584 ; AVX512BW-NEXT: retq
586 ; AVX512VL-LABEL: trunc_v8i64_v8i1:
588 ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
589 ; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
590 ; AVX512VL-NEXT: kmovd %k0, %eax
591 ; AVX512VL-NEXT: testb %al, %al
592 ; AVX512VL-NEXT: setne %al
593 ; AVX512VL-NEXT: vzeroupper
594 ; AVX512VL-NEXT: retq
595 %a = trunc <8 x i64> %0 to <8 x i1>
596 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
600 define i1 @trunc_v16i32_v16i1(<16 x i32>) {
601 ; SSE2-LABEL: trunc_v16i32_v16i1:
603 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
604 ; SSE2-NEXT: pand %xmm4, %xmm3
605 ; SSE2-NEXT: pand %xmm4, %xmm2
606 ; SSE2-NEXT: packuswb %xmm3, %xmm2
607 ; SSE2-NEXT: pand %xmm4, %xmm1
608 ; SSE2-NEXT: pand %xmm4, %xmm0
609 ; SSE2-NEXT: packuswb %xmm1, %xmm0
610 ; SSE2-NEXT: packuswb %xmm2, %xmm0
611 ; SSE2-NEXT: psllw $7, %xmm0
612 ; SSE2-NEXT: pmovmskb %xmm0, %eax
613 ; SSE2-NEXT: testw %ax, %ax
614 ; SSE2-NEXT: setne %al
617 ; SSE41-LABEL: trunc_v16i32_v16i1:
619 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
620 ; SSE41-NEXT: pand %xmm4, %xmm3
621 ; SSE41-NEXT: pand %xmm4, %xmm2
622 ; SSE41-NEXT: packusdw %xmm3, %xmm2
623 ; SSE41-NEXT: pand %xmm4, %xmm1
624 ; SSE41-NEXT: pand %xmm4, %xmm0
625 ; SSE41-NEXT: packusdw %xmm1, %xmm0
626 ; SSE41-NEXT: packuswb %xmm2, %xmm0
627 ; SSE41-NEXT: psllw $7, %xmm0
628 ; SSE41-NEXT: pmovmskb %xmm0, %eax
629 ; SSE41-NEXT: testw %ax, %ax
630 ; SSE41-NEXT: setne %al
633 ; AVX1-LABEL: trunc_v16i32_v16i1:
635 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
636 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
637 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
638 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
639 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
640 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
641 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
642 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
643 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
644 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
645 ; AVX1-NEXT: testw %ax, %ax
646 ; AVX1-NEXT: setne %al
647 ; AVX1-NEXT: vzeroupper
650 ; AVX2-LABEL: trunc_v16i32_v16i1:
652 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
653 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
654 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
655 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
656 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
657 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
658 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
659 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
660 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
661 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
662 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
663 ; AVX2-NEXT: testw %ax, %ax
664 ; AVX2-NEXT: setne %al
665 ; AVX2-NEXT: vzeroupper
668 ; AVX512-LABEL: trunc_v16i32_v16i1:
670 ; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
671 ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
672 ; AVX512-NEXT: kortestw %k0, %k0
673 ; AVX512-NEXT: setne %al
674 ; AVX512-NEXT: vzeroupper
676 %a = trunc <16 x i32> %0 to <16 x i1>
677 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
681 define i1 @trunc_v32i16_v32i1(<32 x i16>) {
682 ; SSE2-LABEL: trunc_v32i16_v32i1:
684 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
685 ; SSE2-NEXT: pand %xmm4, %xmm3
686 ; SSE2-NEXT: pand %xmm4, %xmm2
687 ; SSE2-NEXT: packuswb %xmm3, %xmm2
688 ; SSE2-NEXT: pand %xmm4, %xmm1
689 ; SSE2-NEXT: pand %xmm4, %xmm0
690 ; SSE2-NEXT: packuswb %xmm1, %xmm0
691 ; SSE2-NEXT: por %xmm2, %xmm0
692 ; SSE2-NEXT: psllw $7, %xmm0
693 ; SSE2-NEXT: pmovmskb %xmm0, %eax
694 ; SSE2-NEXT: testw %ax, %ax
695 ; SSE2-NEXT: setne %al
698 ; SSE41-LABEL: trunc_v32i16_v32i1:
700 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
701 ; SSE41-NEXT: pshufb %xmm4, %xmm3
702 ; SSE41-NEXT: pshufb %xmm4, %xmm2
703 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
704 ; SSE41-NEXT: pshufb %xmm4, %xmm1
705 ; SSE41-NEXT: pshufb %xmm4, %xmm0
706 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
707 ; SSE41-NEXT: por %xmm2, %xmm0
708 ; SSE41-NEXT: psllw $7, %xmm0
709 ; SSE41-NEXT: pmovmskb %xmm0, %eax
710 ; SSE41-NEXT: testw %ax, %ax
711 ; SSE41-NEXT: setne %al
714 ; AVX1-LABEL: trunc_v32i16_v32i1:
716 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
717 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
718 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
719 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
720 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
721 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
722 ; AVX1-NEXT: testw %ax, %ax
723 ; AVX1-NEXT: setne %al
724 ; AVX1-NEXT: vzeroupper
727 ; AVX2-LABEL: trunc_v32i16_v32i1:
729 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
730 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
731 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
732 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm2
733 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
734 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
735 ; AVX2-NEXT: vpackuswb %ymm0, %ymm2, %ymm0
736 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
737 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
738 ; AVX2-NEXT: testl %eax, %eax
739 ; AVX2-NEXT: setne %al
740 ; AVX2-NEXT: vzeroupper
743 ; AVX512F-LABEL: trunc_v32i16_v32i1:
745 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
746 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
747 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
748 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
749 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
750 ; AVX512F-NEXT: korw %k1, %k0, %k0
751 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
752 ; AVX512F-NEXT: korw %k1, %k0, %k0
753 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
754 ; AVX512F-NEXT: korw %k1, %k0, %k0
755 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
756 ; AVX512F-NEXT: korw %k1, %k0, %k0
757 ; AVX512F-NEXT: kmovw %k0, %eax
758 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
759 ; AVX512F-NEXT: vzeroupper
762 ; AVX512BW-LABEL: trunc_v32i16_v32i1:
764 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
765 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
766 ; AVX512BW-NEXT: kortestd %k0, %k0
767 ; AVX512BW-NEXT: setne %al
768 ; AVX512BW-NEXT: vzeroupper
769 ; AVX512BW-NEXT: retq
771 ; AVX512VL-LABEL: trunc_v32i16_v32i1:
773 ; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0
774 ; AVX512VL-NEXT: vpmovw2m %zmm0, %k0
775 ; AVX512VL-NEXT: kortestd %k0, %k0
776 ; AVX512VL-NEXT: setne %al
777 ; AVX512VL-NEXT: vzeroupper
778 ; AVX512VL-NEXT: retq
779 %a = trunc <32 x i16> %0 to <32 x i1>
780 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
784 define i1 @trunc_v64i8_v64i1(<64 x i8>) {
785 ; SSE2-LABEL: trunc_v64i8_v64i1:
787 ; SSE2-NEXT: por %xmm3, %xmm1
788 ; SSE2-NEXT: por %xmm2, %xmm1
789 ; SSE2-NEXT: por %xmm0, %xmm1
790 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
791 ; SSE2-NEXT: por %xmm1, %xmm0
792 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
793 ; SSE2-NEXT: por %xmm0, %xmm1
794 ; SSE2-NEXT: movdqa %xmm1, %xmm0
795 ; SSE2-NEXT: psrld $16, %xmm0
796 ; SSE2-NEXT: por %xmm1, %xmm0
797 ; SSE2-NEXT: movdqa %xmm0, %xmm1
798 ; SSE2-NEXT: psrlw $8, %xmm1
799 ; SSE2-NEXT: por %xmm0, %xmm1
800 ; SSE2-NEXT: movd %xmm1, %eax
801 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
804 ; SSE41-LABEL: trunc_v64i8_v64i1:
806 ; SSE41-NEXT: por %xmm3, %xmm1
807 ; SSE41-NEXT: por %xmm2, %xmm1
808 ; SSE41-NEXT: por %xmm0, %xmm1
809 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
810 ; SSE41-NEXT: por %xmm1, %xmm0
811 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
812 ; SSE41-NEXT: por %xmm0, %xmm1
813 ; SSE41-NEXT: movdqa %xmm1, %xmm0
814 ; SSE41-NEXT: psrld $16, %xmm0
815 ; SSE41-NEXT: por %xmm1, %xmm0
816 ; SSE41-NEXT: movdqa %xmm0, %xmm1
817 ; SSE41-NEXT: psrlw $8, %xmm1
818 ; SSE41-NEXT: por %xmm0, %xmm1
819 ; SSE41-NEXT: pextrb $0, %xmm1, %eax
820 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
823 ; AVX1-LABEL: trunc_v64i8_v64i1:
825 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
826 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
827 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
828 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
829 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
830 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
831 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
832 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
833 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
834 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
835 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
836 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax
837 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
838 ; AVX1-NEXT: vzeroupper
841 ; AVX2-LABEL: trunc_v64i8_v64i1:
843 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
844 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
845 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
846 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
847 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
848 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
849 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
850 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
851 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
852 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
853 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
854 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax
855 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
856 ; AVX2-NEXT: vzeroupper
859 ; AVX512F-LABEL: trunc_v64i8_v64i1:
861 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
862 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
863 ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2
864 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1
865 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
866 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
867 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
868 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
869 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
870 ; AVX512F-NEXT: korw %k1, %k0, %k0
871 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
872 ; AVX512F-NEXT: korw %k1, %k0, %k0
873 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
874 ; AVX512F-NEXT: korw %k1, %k0, %k0
875 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
876 ; AVX512F-NEXT: korw %k1, %k0, %k0
877 ; AVX512F-NEXT: kmovw %k0, %eax
878 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
879 ; AVX512F-NEXT: vzeroupper
882 ; AVX512BW-LABEL: trunc_v64i8_v64i1:
884 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
885 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
886 ; AVX512BW-NEXT: kshiftrq $32, %k0, %k1
887 ; AVX512BW-NEXT: korq %k1, %k0, %k0
888 ; AVX512BW-NEXT: kshiftrq $16, %k0, %k1
889 ; AVX512BW-NEXT: korq %k1, %k0, %k0
890 ; AVX512BW-NEXT: kshiftrq $8, %k0, %k1
891 ; AVX512BW-NEXT: korq %k1, %k0, %k0
892 ; AVX512BW-NEXT: kshiftrq $4, %k0, %k1
893 ; AVX512BW-NEXT: korq %k1, %k0, %k0
894 ; AVX512BW-NEXT: kshiftrq $2, %k0, %k1
895 ; AVX512BW-NEXT: korq %k1, %k0, %k0
896 ; AVX512BW-NEXT: kshiftrq $1, %k0, %k1
897 ; AVX512BW-NEXT: korq %k1, %k0, %k0
898 ; AVX512BW-NEXT: kmovd %k0, %eax
899 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
900 ; AVX512BW-NEXT: vzeroupper
901 ; AVX512BW-NEXT: retq
903 ; AVX512VL-LABEL: trunc_v64i8_v64i1:
905 ; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0
906 ; AVX512VL-NEXT: vpmovb2m %zmm0, %k0
907 ; AVX512VL-NEXT: kshiftrq $32, %k0, %k1
908 ; AVX512VL-NEXT: korq %k1, %k0, %k0
909 ; AVX512VL-NEXT: kshiftrq $16, %k0, %k1
910 ; AVX512VL-NEXT: korq %k1, %k0, %k0
911 ; AVX512VL-NEXT: kshiftrq $8, %k0, %k1
912 ; AVX512VL-NEXT: korq %k1, %k0, %k0
913 ; AVX512VL-NEXT: kshiftrq $4, %k0, %k1
914 ; AVX512VL-NEXT: korq %k1, %k0, %k0
915 ; AVX512VL-NEXT: kshiftrq $2, %k0, %k1
916 ; AVX512VL-NEXT: korq %k1, %k0, %k0
917 ; AVX512VL-NEXT: kshiftrq $1, %k0, %k1
918 ; AVX512VL-NEXT: korq %k1, %k0, %k0
919 ; AVX512VL-NEXT: kmovd %k0, %eax
920 ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
921 ; AVX512VL-NEXT: vzeroupper
922 ; AVX512VL-NEXT: retq
923 %a = trunc <64 x i8> %0 to <64 x i1>
924 %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
932 define i1 @icmp_v2i64_v2i1(<2 x i64>) {
933 ; SSE2-LABEL: icmp_v2i64_v2i1:
935 ; SSE2-NEXT: pxor %xmm1, %xmm1
936 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
937 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
938 ; SSE2-NEXT: pand %xmm1, %xmm0
939 ; SSE2-NEXT: movmskpd %xmm0, %eax
940 ; SSE2-NEXT: testb %al, %al
941 ; SSE2-NEXT: setne %al
944 ; SSE41-LABEL: icmp_v2i64_v2i1:
946 ; SSE41-NEXT: pxor %xmm1, %xmm1
947 ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
948 ; SSE41-NEXT: movmskpd %xmm1, %eax
949 ; SSE41-NEXT: testb %al, %al
950 ; SSE41-NEXT: setne %al
953 ; AVX-LABEL: icmp_v2i64_v2i1:
955 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
956 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
957 ; AVX-NEXT: vmovmskpd %xmm0, %eax
958 ; AVX-NEXT: testb %al, %al
959 ; AVX-NEXT: setne %al
962 ; AVX512F-LABEL: icmp_v2i64_v2i1:
964 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
965 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
966 ; AVX512F-NEXT: kmovw %k0, %eax
967 ; AVX512F-NEXT: testb $3, %al
968 ; AVX512F-NEXT: setne %al
969 ; AVX512F-NEXT: vzeroupper
972 ; AVX512BW-LABEL: icmp_v2i64_v2i1:
974 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
975 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
976 ; AVX512BW-NEXT: kmovd %k0, %eax
977 ; AVX512BW-NEXT: testb $3, %al
978 ; AVX512BW-NEXT: setne %al
979 ; AVX512BW-NEXT: vzeroupper
980 ; AVX512BW-NEXT: retq
982 ; AVX512VL-LABEL: icmp_v2i64_v2i1:
984 ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
985 ; AVX512VL-NEXT: kmovd %k0, %eax
986 ; AVX512VL-NEXT: testb $3, %al
987 ; AVX512VL-NEXT: setne %al
988 ; AVX512VL-NEXT: retq
989 %a = icmp eq <2 x i64> %0, zeroinitializer
990 %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
994 define i1 @icmp_v4i32_v4i1(<4 x i32>) {
995 ; SSE-LABEL: icmp_v4i32_v4i1:
997 ; SSE-NEXT: pxor %xmm1, %xmm1
998 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1
999 ; SSE-NEXT: movmskps %xmm1, %eax
1000 ; SSE-NEXT: testb %al, %al
1001 ; SSE-NEXT: setne %al
1004 ; AVX-LABEL: icmp_v4i32_v4i1:
1006 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1007 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1008 ; AVX-NEXT: vmovmskps %xmm0, %eax
1009 ; AVX-NEXT: testb %al, %al
1010 ; AVX-NEXT: setne %al
1013 ; AVX512F-LABEL: icmp_v4i32_v4i1:
1015 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1016 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1017 ; AVX512F-NEXT: kmovw %k0, %eax
1018 ; AVX512F-NEXT: testb $15, %al
1019 ; AVX512F-NEXT: setne %al
1020 ; AVX512F-NEXT: vzeroupper
1021 ; AVX512F-NEXT: retq
1023 ; AVX512BW-LABEL: icmp_v4i32_v4i1:
1024 ; AVX512BW: # %bb.0:
1025 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1026 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1027 ; AVX512BW-NEXT: kmovd %k0, %eax
1028 ; AVX512BW-NEXT: testb $15, %al
1029 ; AVX512BW-NEXT: setne %al
1030 ; AVX512BW-NEXT: vzeroupper
1031 ; AVX512BW-NEXT: retq
1033 ; AVX512VL-LABEL: icmp_v4i32_v4i1:
1034 ; AVX512VL: # %bb.0:
1035 ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
1036 ; AVX512VL-NEXT: kmovd %k0, %eax
1037 ; AVX512VL-NEXT: testb $15, %al
1038 ; AVX512VL-NEXT: setne %al
1039 ; AVX512VL-NEXT: retq
1040 %a = icmp eq <4 x i32> %0, zeroinitializer
1041 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
1045 define i1 @icmp_v8i16_v8i1(<8 x i8>) {
1046 ; SSE-LABEL: icmp_v8i16_v8i1:
1048 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0
1049 ; SSE-NEXT: pxor %xmm1, %xmm1
1050 ; SSE-NEXT: pcmpeqw %xmm0, %xmm1
1051 ; SSE-NEXT: packsswb %xmm0, %xmm1
1052 ; SSE-NEXT: pmovmskb %xmm1, %eax
1053 ; SSE-NEXT: testb %al, %al
1054 ; SSE-NEXT: setne %al
1057 ; AVX-LABEL: icmp_v8i16_v8i1:
1059 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1060 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1061 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1062 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1063 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1064 ; AVX-NEXT: testb %al, %al
1065 ; AVX-NEXT: setne %al
1068 ; AVX512F-LABEL: icmp_v8i16_v8i1:
1070 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1071 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1072 ; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1073 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
1074 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
1075 ; AVX512F-NEXT: kmovw %k0, %eax
1076 ; AVX512F-NEXT: testb %al, %al
1077 ; AVX512F-NEXT: setne %al
1078 ; AVX512F-NEXT: vzeroupper
1079 ; AVX512F-NEXT: retq
1081 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
1082 ; AVX512BW: # %bb.0:
1083 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1084 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1085 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0
1086 ; AVX512BW-NEXT: kmovd %k0, %eax
1087 ; AVX512BW-NEXT: testb %al, %al
1088 ; AVX512BW-NEXT: setne %al
1089 ; AVX512BW-NEXT: vzeroupper
1090 ; AVX512BW-NEXT: retq
1092 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
1093 ; AVX512VL: # %bb.0:
1094 ; AVX512VL-NEXT: vptestnmw {{.*}}(%rip), %xmm0, %k0
1095 ; AVX512VL-NEXT: kmovd %k0, %eax
1096 ; AVX512VL-NEXT: testb %al, %al
1097 ; AVX512VL-NEXT: setne %al
1098 ; AVX512VL-NEXT: retq
1099 %a = icmp eq <8 x i8> %0, zeroinitializer
1100 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1104 define i1 @icmp_v16i8_v16i1(<16 x i8>) {
1105 ; SSE-LABEL: icmp_v16i8_v16i1:
1107 ; SSE-NEXT: pxor %xmm1, %xmm1
1108 ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
1109 ; SSE-NEXT: pmovmskb %xmm1, %eax
1110 ; SSE-NEXT: testw %ax, %ax
1111 ; SSE-NEXT: setne %al
1114 ; AVX-LABEL: icmp_v16i8_v16i1:
1116 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1117 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1118 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1119 ; AVX-NEXT: testw %ax, %ax
1120 ; AVX-NEXT: setne %al
1123 ; AVX512F-LABEL: icmp_v16i8_v16i1:
1125 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1126 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1127 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
1128 ; AVX512F-NEXT: testw %ax, %ax
1129 ; AVX512F-NEXT: setne %al
1130 ; AVX512F-NEXT: retq
1132 ; AVX512BW-LABEL: icmp_v16i8_v16i1:
1133 ; AVX512BW: # %bb.0:
1134 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1135 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1136 ; AVX512BW-NEXT: kortestw %k0, %k0
1137 ; AVX512BW-NEXT: setne %al
1138 ; AVX512BW-NEXT: vzeroupper
1139 ; AVX512BW-NEXT: retq
1141 ; AVX512VL-LABEL: icmp_v16i8_v16i1:
1142 ; AVX512VL: # %bb.0:
1143 ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
1144 ; AVX512VL-NEXT: kortestw %k0, %k0
1145 ; AVX512VL-NEXT: setne %al
1146 ; AVX512VL-NEXT: retq
1147 %a = icmp eq <16 x i8> %0, zeroinitializer
1148 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1152 define i1 @icmp_v4i64_v4i1(<4 x i64>) {
1153 ; SSE2-LABEL: icmp_v4i64_v4i1:
1155 ; SSE2-NEXT: pxor %xmm2, %xmm2
1156 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
1157 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
1158 ; SSE2-NEXT: pand %xmm1, %xmm3
1159 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1160 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1161 ; SSE2-NEXT: pand %xmm0, %xmm1
1162 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1163 ; SSE2-NEXT: movmskps %xmm1, %eax
1164 ; SSE2-NEXT: testb %al, %al
1165 ; SSE2-NEXT: setne %al
1168 ; SSE41-LABEL: icmp_v4i64_v4i1:
1170 ; SSE41-NEXT: pxor %xmm2, %xmm2
1171 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm1
1172 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
1173 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1174 ; SSE41-NEXT: movmskps %xmm0, %eax
1175 ; SSE41-NEXT: testb %al, %al
1176 ; SSE41-NEXT: setne %al
1179 ; AVX1-LABEL: icmp_v4i64_v4i1:
1181 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1182 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1183 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
1184 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
1185 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1186 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1187 ; AVX1-NEXT: testb %al, %al
1188 ; AVX1-NEXT: setne %al
1189 ; AVX1-NEXT: vzeroupper
1192 ; AVX2-LABEL: icmp_v4i64_v4i1:
1194 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1195 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
1196 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1197 ; AVX2-NEXT: testb %al, %al
1198 ; AVX2-NEXT: setne %al
1199 ; AVX2-NEXT: vzeroupper
1202 ; AVX512F-LABEL: icmp_v4i64_v4i1:
1204 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1205 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1206 ; AVX512F-NEXT: kmovw %k0, %eax
1207 ; AVX512F-NEXT: testb $15, %al
1208 ; AVX512F-NEXT: setne %al
1209 ; AVX512F-NEXT: vzeroupper
1210 ; AVX512F-NEXT: retq
1212 ; AVX512BW-LABEL: icmp_v4i64_v4i1:
1213 ; AVX512BW: # %bb.0:
1214 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1215 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1216 ; AVX512BW-NEXT: kmovd %k0, %eax
1217 ; AVX512BW-NEXT: testb $15, %al
1218 ; AVX512BW-NEXT: setne %al
1219 ; AVX512BW-NEXT: vzeroupper
1220 ; AVX512BW-NEXT: retq
1222 ; AVX512VL-LABEL: icmp_v4i64_v4i1:
1223 ; AVX512VL: # %bb.0:
1224 ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
1225 ; AVX512VL-NEXT: kmovd %k0, %eax
1226 ; AVX512VL-NEXT: testb $15, %al
1227 ; AVX512VL-NEXT: setne %al
1228 ; AVX512VL-NEXT: vzeroupper
1229 ; AVX512VL-NEXT: retq
1230 %a = icmp eq <4 x i64> %0, zeroinitializer
1231 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
1235 define i1 @icmp_v8i32_v8i1(<8 x i32>) {
1236 ; SSE-LABEL: icmp_v8i32_v8i1:
1238 ; SSE-NEXT: pxor %xmm2, %xmm2
1239 ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
1240 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
1241 ; SSE-NEXT: packssdw %xmm1, %xmm0
1242 ; SSE-NEXT: packsswb %xmm0, %xmm0
1243 ; SSE-NEXT: pmovmskb %xmm0, %eax
1244 ; SSE-NEXT: testb %al, %al
1245 ; SSE-NEXT: setne %al
1248 ; AVX1-LABEL: icmp_v8i32_v8i1:
1250 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1251 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1252 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
1253 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1254 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1255 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1256 ; AVX1-NEXT: testb %al, %al
1257 ; AVX1-NEXT: setne %al
1258 ; AVX1-NEXT: vzeroupper
1261 ; AVX2-LABEL: icmp_v8i32_v8i1:
1263 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1264 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1265 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1266 ; AVX2-NEXT: testb %al, %al
1267 ; AVX2-NEXT: setne %al
1268 ; AVX2-NEXT: vzeroupper
1271 ; AVX512F-LABEL: icmp_v8i32_v8i1:
1273 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1274 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1275 ; AVX512F-NEXT: kmovw %k0, %eax
1276 ; AVX512F-NEXT: testb %al, %al
1277 ; AVX512F-NEXT: setne %al
1278 ; AVX512F-NEXT: vzeroupper
1279 ; AVX512F-NEXT: retq
1281 ; AVX512BW-LABEL: icmp_v8i32_v8i1:
1282 ; AVX512BW: # %bb.0:
1283 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1284 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1285 ; AVX512BW-NEXT: kmovd %k0, %eax
1286 ; AVX512BW-NEXT: testb %al, %al
1287 ; AVX512BW-NEXT: setne %al
1288 ; AVX512BW-NEXT: vzeroupper
1289 ; AVX512BW-NEXT: retq
1291 ; AVX512VL-LABEL: icmp_v8i32_v8i1:
1292 ; AVX512VL: # %bb.0:
1293 ; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
1294 ; AVX512VL-NEXT: kmovd %k0, %eax
1295 ; AVX512VL-NEXT: testb %al, %al
1296 ; AVX512VL-NEXT: setne %al
1297 ; AVX512VL-NEXT: vzeroupper
1298 ; AVX512VL-NEXT: retq
1299 %a = icmp eq <8 x i32> %0, zeroinitializer
1300 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1304 define i1 @icmp_v16i16_v16i1(<16 x i16>) {
1305 ; SSE-LABEL: icmp_v16i16_v16i1:
1307 ; SSE-NEXT: pxor %xmm2, %xmm2
1308 ; SSE-NEXT: pcmpeqw %xmm2, %xmm1
1309 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1310 ; SSE-NEXT: packsswb %xmm1, %xmm0
1311 ; SSE-NEXT: pmovmskb %xmm0, %eax
1312 ; SSE-NEXT: testw %ax, %ax
1313 ; SSE-NEXT: setne %al
1316 ; AVX1-LABEL: icmp_v16i16_v16i1:
1318 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1319 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1320 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
1321 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1322 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1323 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1324 ; AVX1-NEXT: testw %ax, %ax
1325 ; AVX1-NEXT: setne %al
1326 ; AVX1-NEXT: vzeroupper
1329 ; AVX2-LABEL: icmp_v16i16_v16i1:
1331 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1332 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1333 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1334 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1335 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1336 ; AVX2-NEXT: testw %ax, %ax
1337 ; AVX2-NEXT: setne %al
1338 ; AVX2-NEXT: vzeroupper
1341 ; AVX512F-LABEL: icmp_v16i16_v16i1:
1343 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1344 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1345 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1346 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1347 ; AVX512F-NEXT: kortestw %k0, %k0
1348 ; AVX512F-NEXT: setne %al
1349 ; AVX512F-NEXT: vzeroupper
1350 ; AVX512F-NEXT: retq
1352 ; AVX512BW-LABEL: icmp_v16i16_v16i1:
1353 ; AVX512BW: # %bb.0:
1354 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1355 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1356 ; AVX512BW-NEXT: kortestw %k0, %k0
1357 ; AVX512BW-NEXT: setne %al
1358 ; AVX512BW-NEXT: vzeroupper
1359 ; AVX512BW-NEXT: retq
1361 ; AVX512VL-LABEL: icmp_v16i16_v16i1:
1362 ; AVX512VL: # %bb.0:
1363 ; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0
1364 ; AVX512VL-NEXT: kortestw %k0, %k0
1365 ; AVX512VL-NEXT: setne %al
1366 ; AVX512VL-NEXT: vzeroupper
1367 ; AVX512VL-NEXT: retq
1368 %a = icmp eq <16 x i16> %0, zeroinitializer
1369 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1373 define i1 @icmp_v32i8_v32i1(<32 x i8>) {
1374 ; SSE-LABEL: icmp_v32i8_v32i1:
1376 ; SSE-NEXT: pxor %xmm2, %xmm2
1377 ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
1378 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1379 ; SSE-NEXT: por %xmm1, %xmm0
1380 ; SSE-NEXT: pmovmskb %xmm0, %eax
1381 ; SSE-NEXT: testw %ax, %ax
1382 ; SSE-NEXT: setne %al
1385 ; AVX1-LABEL: icmp_v32i8_v32i1:
1387 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1388 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1389 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1390 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1391 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1392 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1393 ; AVX1-NEXT: testw %ax, %ax
1394 ; AVX1-NEXT: setne %al
1395 ; AVX1-NEXT: vzeroupper
1398 ; AVX2-LABEL: icmp_v32i8_v32i1:
1400 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1401 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1402 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1403 ; AVX2-NEXT: testl %eax, %eax
1404 ; AVX2-NEXT: setne %al
1405 ; AVX2-NEXT: vzeroupper
1408 ; AVX512F-LABEL: icmp_v32i8_v32i1:
1410 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1411 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1412 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1413 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
1414 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1415 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1416 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1417 ; AVX512F-NEXT: korw %k1, %k0, %k0
1418 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1419 ; AVX512F-NEXT: korw %k1, %k0, %k0
1420 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1421 ; AVX512F-NEXT: korw %k1, %k0, %k0
1422 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1423 ; AVX512F-NEXT: korw %k1, %k0, %k0
1424 ; AVX512F-NEXT: kmovw %k0, %eax
1425 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1426 ; AVX512F-NEXT: vzeroupper
1427 ; AVX512F-NEXT: retq
1429 ; AVX512BW-LABEL: icmp_v32i8_v32i1:
1430 ; AVX512BW: # %bb.0:
1431 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1432 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1433 ; AVX512BW-NEXT: kortestd %k0, %k0
1434 ; AVX512BW-NEXT: setne %al
1435 ; AVX512BW-NEXT: vzeroupper
1436 ; AVX512BW-NEXT: retq
1438 ; AVX512VL-LABEL: icmp_v32i8_v32i1:
1439 ; AVX512VL: # %bb.0:
1440 ; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0
1441 ; AVX512VL-NEXT: kortestd %k0, %k0
1442 ; AVX512VL-NEXT: setne %al
1443 ; AVX512VL-NEXT: vzeroupper
1444 ; AVX512VL-NEXT: retq
1445 %a = icmp eq <32 x i8> %0, zeroinitializer
1446 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
1450 define i1 @icmp_v8i64_v8i1(<8 x i64>) {
1451 ; SSE2-LABEL: icmp_v8i64_v8i1:
1453 ; SSE2-NEXT: pxor %xmm4, %xmm4
1454 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1455 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
1456 ; SSE2-NEXT: pand %xmm3, %xmm5
1457 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1458 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
1459 ; SSE2-NEXT: pand %xmm2, %xmm3
1460 ; SSE2-NEXT: packssdw %xmm5, %xmm3
1461 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1462 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
1463 ; SSE2-NEXT: pand %xmm1, %xmm2
1464 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1465 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1466 ; SSE2-NEXT: pand %xmm0, %xmm1
1467 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1468 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1469 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1470 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1471 ; SSE2-NEXT: testb %al, %al
1472 ; SSE2-NEXT: setne %al
1475 ; SSE41-LABEL: icmp_v8i64_v8i1:
1477 ; SSE41-NEXT: pxor %xmm4, %xmm4
1478 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm3
1479 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm2
1480 ; SSE41-NEXT: packssdw %xmm3, %xmm2
1481 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
1482 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
1483 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1484 ; SSE41-NEXT: packssdw %xmm2, %xmm0
1485 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1486 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1487 ; SSE41-NEXT: testb %al, %al
1488 ; SSE41-NEXT: setne %al
1491 ; AVX1-LABEL: icmp_v8i64_v8i1:
1493 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1494 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1495 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1496 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
1497 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1498 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1499 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1500 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1501 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1502 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1503 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1504 ; AVX1-NEXT: testb %al, %al
1505 ; AVX1-NEXT: setne %al
1506 ; AVX1-NEXT: vzeroupper
1509 ; AVX2-LABEL: icmp_v8i64_v8i1:
1511 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1512 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
1513 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
1514 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1515 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1516 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1517 ; AVX2-NEXT: testb %al, %al
1518 ; AVX2-NEXT: setne %al
1519 ; AVX2-NEXT: vzeroupper
1522 ; AVX512F-LABEL: icmp_v8i64_v8i1:
1524 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1525 ; AVX512F-NEXT: kmovw %k0, %eax
1526 ; AVX512F-NEXT: testb %al, %al
1527 ; AVX512F-NEXT: setne %al
1528 ; AVX512F-NEXT: vzeroupper
1529 ; AVX512F-NEXT: retq
1531 ; AVX512BW-LABEL: icmp_v8i64_v8i1:
1532 ; AVX512BW: # %bb.0:
1533 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1534 ; AVX512BW-NEXT: kmovd %k0, %eax
1535 ; AVX512BW-NEXT: testb %al, %al
1536 ; AVX512BW-NEXT: setne %al
1537 ; AVX512BW-NEXT: vzeroupper
1538 ; AVX512BW-NEXT: retq
1540 ; AVX512VL-LABEL: icmp_v8i64_v8i1:
1541 ; AVX512VL: # %bb.0:
1542 ; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1543 ; AVX512VL-NEXT: kmovd %k0, %eax
1544 ; AVX512VL-NEXT: testb %al, %al
1545 ; AVX512VL-NEXT: setne %al
1546 ; AVX512VL-NEXT: vzeroupper
1547 ; AVX512VL-NEXT: retq
1548 %a = icmp eq <8 x i64> %0, zeroinitializer
1549 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1553 define i1 @icmp_v16i32_v16i1(<16 x i32>) {
1554 ; SSE-LABEL: icmp_v16i32_v16i1:
1556 ; SSE-NEXT: pxor %xmm4, %xmm4
1557 ; SSE-NEXT: pcmpeqd %xmm4, %xmm3
1558 ; SSE-NEXT: pcmpeqd %xmm4, %xmm2
1559 ; SSE-NEXT: packssdw %xmm3, %xmm2
1560 ; SSE-NEXT: pcmpeqd %xmm4, %xmm1
1561 ; SSE-NEXT: pcmpeqd %xmm4, %xmm0
1562 ; SSE-NEXT: packssdw %xmm1, %xmm0
1563 ; SSE-NEXT: packsswb %xmm2, %xmm0
1564 ; SSE-NEXT: pmovmskb %xmm0, %eax
1565 ; SSE-NEXT: testw %ax, %ax
1566 ; SSE-NEXT: setne %al
1569 ; AVX1-LABEL: icmp_v16i32_v16i1:
1571 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1572 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1573 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1574 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
1575 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1576 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1577 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1578 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1579 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1580 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1581 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1582 ; AVX1-NEXT: testw %ax, %ax
1583 ; AVX1-NEXT: setne %al
1584 ; AVX1-NEXT: vzeroupper
1587 ; AVX2-LABEL: icmp_v16i32_v16i1:
1589 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1590 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
1591 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
1592 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1593 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1594 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1595 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1596 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1597 ; AVX2-NEXT: testw %ax, %ax
1598 ; AVX2-NEXT: setne %al
1599 ; AVX2-NEXT: vzeroupper
1602 ; AVX512-LABEL: icmp_v16i32_v16i1:
1604 ; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
1605 ; AVX512-NEXT: kortestw %k0, %k0
1606 ; AVX512-NEXT: setne %al
1607 ; AVX512-NEXT: vzeroupper
1609 %a = icmp eq <16 x i32> %0, zeroinitializer
1610 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1614 define i1 @icmp_v32i16_v32i1(<32 x i16>) {
1615 ; SSE-LABEL: icmp_v32i16_v32i1:
1617 ; SSE-NEXT: pxor %xmm4, %xmm4
1618 ; SSE-NEXT: pcmpeqw %xmm4, %xmm1
1619 ; SSE-NEXT: pcmpeqw %xmm4, %xmm0
1620 ; SSE-NEXT: packsswb %xmm1, %xmm0
1621 ; SSE-NEXT: pcmpeqw %xmm4, %xmm3
1622 ; SSE-NEXT: pcmpeqw %xmm4, %xmm2
1623 ; SSE-NEXT: packsswb %xmm3, %xmm2
1624 ; SSE-NEXT: por %xmm0, %xmm2
1625 ; SSE-NEXT: pmovmskb %xmm2, %eax
1626 ; SSE-NEXT: testw %ax, %ax
1627 ; SSE-NEXT: setne %al
1630 ; AVX1-LABEL: icmp_v32i16_v32i1:
1632 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1633 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1634 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
1635 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
1636 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
1637 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1638 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
1639 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1640 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1641 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1642 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1643 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1644 ; AVX1-NEXT: testw %ax, %ax
1645 ; AVX1-NEXT: setne %al
1646 ; AVX1-NEXT: vzeroupper
1649 ; AVX2-LABEL: icmp_v32i16_v32i1:
1651 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1652 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1653 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1654 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1655 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1656 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1657 ; AVX2-NEXT: testl %eax, %eax
1658 ; AVX2-NEXT: setne %al
1659 ; AVX2-NEXT: vzeroupper
1662 ; AVX512F-LABEL: icmp_v32i16_v32i1:
1664 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1665 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1666 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1667 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1668 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1669 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1670 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1671 ; AVX512F-NEXT: korw %k1, %k0, %k0
1672 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1673 ; AVX512F-NEXT: korw %k1, %k0, %k0
1674 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1675 ; AVX512F-NEXT: korw %k1, %k0, %k0
1676 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1677 ; AVX512F-NEXT: korw %k1, %k0, %k0
1678 ; AVX512F-NEXT: kmovw %k0, %eax
1679 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1680 ; AVX512F-NEXT: vzeroupper
1681 ; AVX512F-NEXT: retq
1683 ; AVX512BW-LABEL: icmp_v32i16_v32i1:
1684 ; AVX512BW: # %bb.0:
1685 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1686 ; AVX512BW-NEXT: kortestd %k0, %k0
1687 ; AVX512BW-NEXT: setne %al
1688 ; AVX512BW-NEXT: vzeroupper
1689 ; AVX512BW-NEXT: retq
1691 ; AVX512VL-LABEL: icmp_v32i16_v32i1:
1692 ; AVX512VL: # %bb.0:
1693 ; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0
1694 ; AVX512VL-NEXT: kortestd %k0, %k0
1695 ; AVX512VL-NEXT: setne %al
1696 ; AVX512VL-NEXT: vzeroupper
1697 ; AVX512VL-NEXT: retq
1698 %a = icmp eq <32 x i16> %0, zeroinitializer
1699 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
1703 define i1 @icmp_v64i8_v64i1(<64 x i8>) {
1704 ; SSE-LABEL: icmp_v64i8_v64i1:
1706 ; SSE-NEXT: pxor %xmm4, %xmm4
1707 ; SSE-NEXT: pcmpeqb %xmm4, %xmm2
1708 ; SSE-NEXT: pcmpeqb %xmm4, %xmm0
1709 ; SSE-NEXT: pcmpeqb %xmm4, %xmm3
1710 ; SSE-NEXT: pcmpeqb %xmm4, %xmm1
1711 ; SSE-NEXT: por %xmm3, %xmm1
1712 ; SSE-NEXT: por %xmm2, %xmm1
1713 ; SSE-NEXT: por %xmm0, %xmm1
1714 ; SSE-NEXT: pmovmskb %xmm1, %eax
1715 ; SSE-NEXT: negl %eax
1716 ; SSE-NEXT: sbbb %al, %al
1719 ; AVX1-LABEL: icmp_v64i8_v64i1:
1721 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1722 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3
1723 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4
1724 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1725 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1726 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1727 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1728 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1729 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1730 ; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
1731 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1732 ; AVX1-NEXT: negl %eax
1733 ; AVX1-NEXT: sbbb %al, %al
1734 ; AVX1-NEXT: vzeroupper
1737 ; AVX2-LABEL: icmp_v64i8_v64i1:
1739 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1740 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1741 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1742 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1743 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1744 ; AVX2-NEXT: negl %eax
1745 ; AVX2-NEXT: sbbb %al, %al
1746 ; AVX2-NEXT: vzeroupper
1749 ; AVX512F-LABEL: icmp_v64i8_v64i1:
1751 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1752 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1753 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1754 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
1755 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1756 ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2
1757 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1
1758 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
1759 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1760 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1761 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1762 ; AVX512F-NEXT: korw %k1, %k0, %k0
1763 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1764 ; AVX512F-NEXT: korw %k1, %k0, %k0
1765 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1766 ; AVX512F-NEXT: korw %k1, %k0, %k0
1767 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1768 ; AVX512F-NEXT: korw %k1, %k0, %k0
1769 ; AVX512F-NEXT: kmovw %k0, %eax
1770 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1771 ; AVX512F-NEXT: vzeroupper
1772 ; AVX512F-NEXT: retq
1774 ; AVX512BW-LABEL: icmp_v64i8_v64i1:
1775 ; AVX512BW: # %bb.0:
1776 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1777 ; AVX512BW-NEXT: kshiftrq $32, %k0, %k1
1778 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1779 ; AVX512BW-NEXT: kshiftrq $16, %k0, %k1
1780 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1781 ; AVX512BW-NEXT: kshiftrq $8, %k0, %k1
1782 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1783 ; AVX512BW-NEXT: kshiftrq $4, %k0, %k1
1784 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1785 ; AVX512BW-NEXT: kshiftrq $2, %k0, %k1
1786 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1787 ; AVX512BW-NEXT: kshiftrq $1, %k0, %k1
1788 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1789 ; AVX512BW-NEXT: kmovd %k0, %eax
1790 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
1791 ; AVX512BW-NEXT: vzeroupper
1792 ; AVX512BW-NEXT: retq
1794 ; AVX512VL-LABEL: icmp_v64i8_v64i1:
1795 ; AVX512VL: # %bb.0:
1796 ; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0
1797 ; AVX512VL-NEXT: kshiftrq $32, %k0, %k1
1798 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1799 ; AVX512VL-NEXT: kshiftrq $16, %k0, %k1
1800 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1801 ; AVX512VL-NEXT: kshiftrq $8, %k0, %k1
1802 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1803 ; AVX512VL-NEXT: kshiftrq $4, %k0, %k1
1804 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1805 ; AVX512VL-NEXT: kshiftrq $2, %k0, %k1
1806 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1807 ; AVX512VL-NEXT: kshiftrq $1, %k0, %k1
1808 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1809 ; AVX512VL-NEXT: kmovd %k0, %eax
1810 ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
1811 ; AVX512VL-NEXT: vzeroupper
1812 ; AVX512VL-NEXT: retq
1813 %a = icmp eq <64 x i8> %0, zeroinitializer
1814 %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
1818 declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>)
1819 declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>)
1820 declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
1821 declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>)
1822 declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>)
1823 declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>)