1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
14 define i1 @trunc_v2i64_v2i1(<2 x i64>) {
15 ; SSE-LABEL: trunc_v2i64_v2i1:
17 ; SSE-NEXT: psllq $63, %xmm0
18 ; SSE-NEXT: movmskpd %xmm0, %eax
19 ; SSE-NEXT: testb %al, %al
23 ; AVX-LABEL: trunc_v2i64_v2i1:
25 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
26 ; AVX-NEXT: vmovmskpd %xmm0, %eax
27 ; AVX-NEXT: testb %al, %al
31 ; AVX512F-LABEL: trunc_v2i64_v2i1:
33 ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
34 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
35 ; AVX512F-NEXT: kmovw %k0, %eax
36 ; AVX512F-NEXT: testb $3, %al
37 ; AVX512F-NEXT: setne %al
38 ; AVX512F-NEXT: vzeroupper
41 ; AVX512BW-LABEL: trunc_v2i64_v2i1:
43 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
44 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
45 ; AVX512BW-NEXT: kmovd %k0, %eax
46 ; AVX512BW-NEXT: testb $3, %al
47 ; AVX512BW-NEXT: setne %al
48 ; AVX512BW-NEXT: vzeroupper
51 ; AVX512VL-LABEL: trunc_v2i64_v2i1:
53 ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
54 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
55 ; AVX512VL-NEXT: kmovd %k0, %eax
56 ; AVX512VL-NEXT: testb $3, %al
57 ; AVX512VL-NEXT: setne %al
59 %a = trunc <2 x i64> %0 to <2 x i1>
60 %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
64 define i1 @trunc_v4i32_v4i1(<4 x i32>) {
65 ; SSE-LABEL: trunc_v4i32_v4i1:
67 ; SSE-NEXT: pslld $31, %xmm0
68 ; SSE-NEXT: movmskps %xmm0, %eax
69 ; SSE-NEXT: testb %al, %al
73 ; AVX-LABEL: trunc_v4i32_v4i1:
75 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
76 ; AVX-NEXT: vmovmskps %xmm0, %eax
77 ; AVX-NEXT: testb %al, %al
81 ; AVX512F-LABEL: trunc_v4i32_v4i1:
83 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
84 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
85 ; AVX512F-NEXT: kmovw %k0, %eax
86 ; AVX512F-NEXT: testb $15, %al
87 ; AVX512F-NEXT: setne %al
88 ; AVX512F-NEXT: vzeroupper
91 ; AVX512BW-LABEL: trunc_v4i32_v4i1:
93 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
94 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; AVX512BW-NEXT: kmovd %k0, %eax
96 ; AVX512BW-NEXT: testb $15, %al
97 ; AVX512BW-NEXT: setne %al
98 ; AVX512BW-NEXT: vzeroupper
101 ; AVX512VL-LABEL: trunc_v4i32_v4i1:
103 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
104 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
105 ; AVX512VL-NEXT: kmovd %k0, %eax
106 ; AVX512VL-NEXT: testb $15, %al
107 ; AVX512VL-NEXT: setne %al
108 ; AVX512VL-NEXT: retq
109 %a = trunc <4 x i32> %0 to <4 x i1>
110 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
114 define i1 @trunc_v8i16_v8i1(<8 x i8>) {
115 ; SSE2-LABEL: trunc_v8i16_v8i1:
117 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
118 ; SSE2-NEXT: psllw $15, %xmm0
119 ; SSE2-NEXT: packsswb %xmm0, %xmm0
120 ; SSE2-NEXT: pmovmskb %xmm0, %eax
121 ; SSE2-NEXT: testb %al, %al
122 ; SSE2-NEXT: setne %al
125 ; SSE41-LABEL: trunc_v8i16_v8i1:
127 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
128 ; SSE41-NEXT: psllw $15, %xmm0
129 ; SSE41-NEXT: packsswb %xmm0, %xmm0
130 ; SSE41-NEXT: pmovmskb %xmm0, %eax
131 ; SSE41-NEXT: testb %al, %al
132 ; SSE41-NEXT: setne %al
135 ; AVX-LABEL: trunc_v8i16_v8i1:
137 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
138 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
139 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
140 ; AVX-NEXT: vpmovmskb %xmm0, %eax
141 ; AVX-NEXT: testb %al, %al
142 ; AVX-NEXT: setne %al
145 ; AVX512F-LABEL: trunc_v8i16_v8i1:
147 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
148 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
149 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
150 ; AVX512F-NEXT: kmovw %k0, %eax
151 ; AVX512F-NEXT: testb %al, %al
152 ; AVX512F-NEXT: setne %al
153 ; AVX512F-NEXT: vzeroupper
156 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
158 ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
159 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
160 ; AVX512BW-NEXT: kmovd %k0, %eax
161 ; AVX512BW-NEXT: testb %al, %al
162 ; AVX512BW-NEXT: setne %al
163 ; AVX512BW-NEXT: vzeroupper
164 ; AVX512BW-NEXT: retq
166 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
168 ; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0
169 ; AVX512VL-NEXT: vpmovb2m %xmm0, %k0
170 ; AVX512VL-NEXT: kmovd %k0, %eax
171 ; AVX512VL-NEXT: testb %al, %al
172 ; AVX512VL-NEXT: setne %al
173 ; AVX512VL-NEXT: retq
174 %a = trunc <8 x i8> %0 to <8 x i1>
175 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
179 define i1 @trunc_v16i8_v16i1(<16 x i8>) {
180 ; SSE-LABEL: trunc_v16i8_v16i1:
182 ; SSE-NEXT: psllw $7, %xmm0
183 ; SSE-NEXT: pmovmskb %xmm0, %eax
184 ; SSE-NEXT: testw %ax, %ax
185 ; SSE-NEXT: setne %al
188 ; AVX-LABEL: trunc_v16i8_v16i1:
190 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
191 ; AVX-NEXT: vpmovmskb %xmm0, %eax
192 ; AVX-NEXT: testw %ax, %ax
193 ; AVX-NEXT: setne %al
196 ; AVX512-LABEL: trunc_v16i8_v16i1:
198 ; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
199 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
200 ; AVX512-NEXT: testw %ax, %ax
201 ; AVX512-NEXT: setne %al
203 %a = trunc <16 x i8> %0 to <16 x i1>
204 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
208 define i1 @trunc_v4i64_v4i1(<4 x i64>) {
209 ; SSE-LABEL: trunc_v4i64_v4i1:
211 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
212 ; SSE-NEXT: pslld $31, %xmm0
213 ; SSE-NEXT: movmskps %xmm0, %eax
214 ; SSE-NEXT: testb %al, %al
215 ; SSE-NEXT: setne %al
218 ; AVX-LABEL: trunc_v4i64_v4i1:
220 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
221 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
222 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
223 ; AVX-NEXT: vmovmskps %xmm0, %eax
224 ; AVX-NEXT: testb %al, %al
225 ; AVX-NEXT: setne %al
226 ; AVX-NEXT: vzeroupper
229 ; AVX512F-LABEL: trunc_v4i64_v4i1:
231 ; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0
232 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
233 ; AVX512F-NEXT: kmovw %k0, %eax
234 ; AVX512F-NEXT: testb $15, %al
235 ; AVX512F-NEXT: setne %al
236 ; AVX512F-NEXT: vzeroupper
239 ; AVX512BW-LABEL: trunc_v4i64_v4i1:
241 ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0
242 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
243 ; AVX512BW-NEXT: kmovd %k0, %eax
244 ; AVX512BW-NEXT: testb $15, %al
245 ; AVX512BW-NEXT: setne %al
246 ; AVX512BW-NEXT: vzeroupper
247 ; AVX512BW-NEXT: retq
249 ; AVX512VL-LABEL: trunc_v4i64_v4i1:
251 ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
252 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
253 ; AVX512VL-NEXT: kmovd %k0, %eax
254 ; AVX512VL-NEXT: testb $15, %al
255 ; AVX512VL-NEXT: setne %al
256 ; AVX512VL-NEXT: vzeroupper
257 ; AVX512VL-NEXT: retq
258 %a = trunc <4 x i64> %0 to <4 x i1>
259 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
263 define i1 @trunc_v8i32_v8i1(<8 x i32>) {
264 ; SSE2-LABEL: trunc_v8i32_v8i1:
266 ; SSE2-NEXT: pslld $16, %xmm1
267 ; SSE2-NEXT: psrad $16, %xmm1
268 ; SSE2-NEXT: pslld $16, %xmm0
269 ; SSE2-NEXT: psrad $16, %xmm0
270 ; SSE2-NEXT: packssdw %xmm1, %xmm0
271 ; SSE2-NEXT: psllw $15, %xmm0
272 ; SSE2-NEXT: packsswb %xmm0, %xmm0
273 ; SSE2-NEXT: pmovmskb %xmm0, %eax
274 ; SSE2-NEXT: testb %al, %al
275 ; SSE2-NEXT: setne %al
278 ; SSE41-LABEL: trunc_v8i32_v8i1:
280 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
281 ; SSE41-NEXT: pshufb %xmm2, %xmm1
282 ; SSE41-NEXT: pshufb %xmm2, %xmm0
283 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
284 ; SSE41-NEXT: psllw $15, %xmm0
285 ; SSE41-NEXT: packsswb %xmm0, %xmm0
286 ; SSE41-NEXT: pmovmskb %xmm0, %eax
287 ; SSE41-NEXT: testb %al, %al
288 ; SSE41-NEXT: setne %al
291 ; AVX1-LABEL: trunc_v8i32_v8i1:
293 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
294 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
295 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
296 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
297 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
298 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
299 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
300 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
301 ; AVX1-NEXT: testb %al, %al
302 ; AVX1-NEXT: setne %al
303 ; AVX1-NEXT: vzeroupper
306 ; AVX2-LABEL: trunc_v8i32_v8i1:
308 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
309 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
310 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
311 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
312 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
313 ; AVX2-NEXT: testb %al, %al
314 ; AVX2-NEXT: setne %al
315 ; AVX2-NEXT: vzeroupper
318 ; AVX512F-LABEL: trunc_v8i32_v8i1:
320 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
321 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
322 ; AVX512F-NEXT: kmovw %k0, %eax
323 ; AVX512F-NEXT: testb %al, %al
324 ; AVX512F-NEXT: setne %al
325 ; AVX512F-NEXT: vzeroupper
328 ; AVX512BW-LABEL: trunc_v8i32_v8i1:
330 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
331 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
332 ; AVX512BW-NEXT: kmovd %k0, %eax
333 ; AVX512BW-NEXT: testb %al, %al
334 ; AVX512BW-NEXT: setne %al
335 ; AVX512BW-NEXT: vzeroupper
336 ; AVX512BW-NEXT: retq
338 ; AVX512VL-LABEL: trunc_v8i32_v8i1:
340 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
341 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
342 ; AVX512VL-NEXT: kmovd %k0, %eax
343 ; AVX512VL-NEXT: testb %al, %al
344 ; AVX512VL-NEXT: setne %al
345 ; AVX512VL-NEXT: vzeroupper
346 ; AVX512VL-NEXT: retq
347 %a = trunc <8 x i32> %0 to <8 x i1>
348 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
352 define i1 @trunc_v16i16_v16i1(<16 x i16>) {
353 ; SSE2-LABEL: trunc_v16i16_v16i1:
355 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
356 ; SSE2-NEXT: pand %xmm2, %xmm1
357 ; SSE2-NEXT: pand %xmm2, %xmm0
358 ; SSE2-NEXT: packuswb %xmm1, %xmm0
359 ; SSE2-NEXT: psllw $7, %xmm0
360 ; SSE2-NEXT: pmovmskb %xmm0, %eax
361 ; SSE2-NEXT: testw %ax, %ax
362 ; SSE2-NEXT: setne %al
365 ; SSE41-LABEL: trunc_v16i16_v16i1:
367 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
368 ; SSE41-NEXT: pshufb %xmm2, %xmm1
369 ; SSE41-NEXT: pshufb %xmm2, %xmm0
370 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
371 ; SSE41-NEXT: psllw $7, %xmm0
372 ; SSE41-NEXT: pmovmskb %xmm0, %eax
373 ; SSE41-NEXT: testw %ax, %ax
374 ; SSE41-NEXT: setne %al
377 ; AVX1-LABEL: trunc_v16i16_v16i1:
379 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
380 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
381 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
382 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
383 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
384 ; AVX1-NEXT: testw %ax, %ax
385 ; AVX1-NEXT: setne %al
386 ; AVX1-NEXT: vzeroupper
389 ; AVX2-LABEL: trunc_v16i16_v16i1:
391 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
392 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
393 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
394 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
395 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
396 ; AVX2-NEXT: testw %ax, %ax
397 ; AVX2-NEXT: setne %al
398 ; AVX2-NEXT: vzeroupper
401 ; AVX512F-LABEL: trunc_v16i16_v16i1:
403 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
404 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
405 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
406 ; AVX512F-NEXT: kortestw %k0, %k0
407 ; AVX512F-NEXT: setne %al
408 ; AVX512F-NEXT: vzeroupper
411 ; AVX512BW-LABEL: trunc_v16i16_v16i1:
413 ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0
414 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
415 ; AVX512BW-NEXT: kortestw %k0, %k0
416 ; AVX512BW-NEXT: setne %al
417 ; AVX512BW-NEXT: vzeroupper
418 ; AVX512BW-NEXT: retq
420 ; AVX512VL-LABEL: trunc_v16i16_v16i1:
422 ; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0
423 ; AVX512VL-NEXT: vpmovw2m %ymm0, %k0
424 ; AVX512VL-NEXT: kortestw %k0, %k0
425 ; AVX512VL-NEXT: setne %al
426 ; AVX512VL-NEXT: vzeroupper
427 ; AVX512VL-NEXT: retq
428 %a = trunc <16 x i16> %0 to <16 x i1>
429 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
433 define i1 @trunc_v32i8_v32i1(<32 x i8>) {
434 ; SSE-LABEL: trunc_v32i8_v32i1:
436 ; SSE-NEXT: por %xmm1, %xmm0
437 ; SSE-NEXT: psllw $7, %xmm0
438 ; SSE-NEXT: pmovmskb %xmm0, %eax
439 ; SSE-NEXT: testw %ax, %ax
440 ; SSE-NEXT: setne %al
443 ; AVX1-LABEL: trunc_v32i8_v32i1:
445 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
446 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
447 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
448 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
449 ; AVX1-NEXT: testw %ax, %ax
450 ; AVX1-NEXT: setne %al
451 ; AVX1-NEXT: vzeroupper
454 ; AVX2-LABEL: trunc_v32i8_v32i1:
456 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
457 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
458 ; AVX2-NEXT: testl %eax, %eax
459 ; AVX2-NEXT: setne %al
460 ; AVX2-NEXT: vzeroupper
463 ; AVX512F-LABEL: trunc_v32i8_v32i1:
465 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
466 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
467 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
468 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
469 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
470 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
471 ; AVX512F-NEXT: korw %k1, %k0, %k0
472 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
473 ; AVX512F-NEXT: korw %k1, %k0, %k0
474 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
475 ; AVX512F-NEXT: korw %k1, %k0, %k0
476 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
477 ; AVX512F-NEXT: korw %k1, %k0, %k0
478 ; AVX512F-NEXT: kmovw %k0, %eax
479 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
480 ; AVX512F-NEXT: vzeroupper
483 ; AVX512BW-LABEL: trunc_v32i8_v32i1:
485 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
486 ; AVX512BW-NEXT: vpmovmskb %ymm0, %eax
487 ; AVX512BW-NEXT: testl %eax, %eax
488 ; AVX512BW-NEXT: setne %al
489 ; AVX512BW-NEXT: vzeroupper
490 ; AVX512BW-NEXT: retq
492 ; AVX512VL-LABEL: trunc_v32i8_v32i1:
494 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
495 ; AVX512VL-NEXT: vpmovmskb %ymm0, %eax
496 ; AVX512VL-NEXT: testl %eax, %eax
497 ; AVX512VL-NEXT: setne %al
498 ; AVX512VL-NEXT: vzeroupper
499 ; AVX512VL-NEXT: retq
500 %a = trunc <32 x i8> %0 to <32 x i1>
501 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
505 define i1 @trunc_v8i64_v8i1(<8 x i64>) {
506 ; SSE2-LABEL: trunc_v8i64_v8i1:
508 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
509 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
510 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
511 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
512 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
513 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
514 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
515 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
516 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
517 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
518 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
519 ; SSE2-NEXT: psllw $15, %xmm2
520 ; SSE2-NEXT: packsswb %xmm0, %xmm2
521 ; SSE2-NEXT: pmovmskb %xmm2, %eax
522 ; SSE2-NEXT: testb %al, %al
523 ; SSE2-NEXT: setne %al
526 ; SSE41-LABEL: trunc_v8i64_v8i1:
528 ; SSE41-NEXT: pxor %xmm4, %xmm4
529 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
530 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
531 ; SSE41-NEXT: packusdw %xmm3, %xmm2
532 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
533 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
534 ; SSE41-NEXT: packusdw %xmm1, %xmm0
535 ; SSE41-NEXT: packusdw %xmm2, %xmm0
536 ; SSE41-NEXT: psllw $15, %xmm0
537 ; SSE41-NEXT: packsswb %xmm0, %xmm0
538 ; SSE41-NEXT: pmovmskb %xmm0, %eax
539 ; SSE41-NEXT: testb %al, %al
540 ; SSE41-NEXT: setne %al
543 ; AVX1-LABEL: trunc_v8i64_v8i1:
545 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
546 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
547 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
548 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
549 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
550 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
551 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
552 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
553 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
554 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
555 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
556 ; AVX1-NEXT: testb %al, %al
557 ; AVX1-NEXT: setne %al
558 ; AVX1-NEXT: vzeroupper
561 ; AVX2-LABEL: trunc_v8i64_v8i1:
563 ; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2
564 ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
565 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
566 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
567 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
568 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
569 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
570 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
571 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
572 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
573 ; AVX2-NEXT: testb %al, %al
574 ; AVX2-NEXT: setne %al
575 ; AVX2-NEXT: vzeroupper
578 ; AVX512F-LABEL: trunc_v8i64_v8i1:
580 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
581 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
582 ; AVX512F-NEXT: kmovw %k0, %eax
583 ; AVX512F-NEXT: testb %al, %al
584 ; AVX512F-NEXT: setne %al
585 ; AVX512F-NEXT: vzeroupper
588 ; AVX512BW-LABEL: trunc_v8i64_v8i1:
590 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
591 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
592 ; AVX512BW-NEXT: kmovd %k0, %eax
593 ; AVX512BW-NEXT: testb %al, %al
594 ; AVX512BW-NEXT: setne %al
595 ; AVX512BW-NEXT: vzeroupper
596 ; AVX512BW-NEXT: retq
598 ; AVX512VL-LABEL: trunc_v8i64_v8i1:
600 ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
601 ; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
602 ; AVX512VL-NEXT: kmovd %k0, %eax
603 ; AVX512VL-NEXT: testb %al, %al
604 ; AVX512VL-NEXT: setne %al
605 ; AVX512VL-NEXT: vzeroupper
606 ; AVX512VL-NEXT: retq
607 %a = trunc <8 x i64> %0 to <8 x i1>
608 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
612 define i1 @trunc_v16i32_v16i1(<16 x i32>) {
613 ; SSE2-LABEL: trunc_v16i32_v16i1:
615 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
616 ; SSE2-NEXT: pand %xmm4, %xmm3
617 ; SSE2-NEXT: pand %xmm4, %xmm2
618 ; SSE2-NEXT: packuswb %xmm3, %xmm2
619 ; SSE2-NEXT: pand %xmm4, %xmm1
620 ; SSE2-NEXT: pand %xmm4, %xmm0
621 ; SSE2-NEXT: packuswb %xmm1, %xmm0
622 ; SSE2-NEXT: packuswb %xmm2, %xmm0
623 ; SSE2-NEXT: psllw $7, %xmm0
624 ; SSE2-NEXT: pmovmskb %xmm0, %eax
625 ; SSE2-NEXT: testw %ax, %ax
626 ; SSE2-NEXT: setne %al
629 ; SSE41-LABEL: trunc_v16i32_v16i1:
631 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
632 ; SSE41-NEXT: pand %xmm4, %xmm3
633 ; SSE41-NEXT: pand %xmm4, %xmm2
634 ; SSE41-NEXT: packusdw %xmm3, %xmm2
635 ; SSE41-NEXT: pand %xmm4, %xmm1
636 ; SSE41-NEXT: pand %xmm4, %xmm0
637 ; SSE41-NEXT: packusdw %xmm1, %xmm0
638 ; SSE41-NEXT: packuswb %xmm2, %xmm0
639 ; SSE41-NEXT: psllw $7, %xmm0
640 ; SSE41-NEXT: pmovmskb %xmm0, %eax
641 ; SSE41-NEXT: testw %ax, %ax
642 ; SSE41-NEXT: setne %al
645 ; AVX1-LABEL: trunc_v16i32_v16i1:
647 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
648 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
649 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
650 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
651 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
652 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
653 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
654 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
655 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
656 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
657 ; AVX1-NEXT: testw %ax, %ax
658 ; AVX1-NEXT: setne %al
659 ; AVX1-NEXT: vzeroupper
662 ; AVX2-LABEL: trunc_v16i32_v16i1:
664 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
665 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
666 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
667 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
668 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
669 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
670 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
671 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
672 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
673 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
674 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
675 ; AVX2-NEXT: testw %ax, %ax
676 ; AVX2-NEXT: setne %al
677 ; AVX2-NEXT: vzeroupper
680 ; AVX512-LABEL: trunc_v16i32_v16i1:
682 ; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
683 ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
684 ; AVX512-NEXT: kortestw %k0, %k0
685 ; AVX512-NEXT: setne %al
686 ; AVX512-NEXT: vzeroupper
688 %a = trunc <16 x i32> %0 to <16 x i1>
689 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
693 define i1 @trunc_v32i16_v32i1(<32 x i16>) {
694 ; SSE2-LABEL: trunc_v32i16_v32i1:
696 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
697 ; SSE2-NEXT: pand %xmm4, %xmm3
698 ; SSE2-NEXT: pand %xmm4, %xmm2
699 ; SSE2-NEXT: packuswb %xmm3, %xmm2
700 ; SSE2-NEXT: pand %xmm4, %xmm1
701 ; SSE2-NEXT: pand %xmm4, %xmm0
702 ; SSE2-NEXT: packuswb %xmm1, %xmm0
703 ; SSE2-NEXT: por %xmm2, %xmm0
704 ; SSE2-NEXT: psllw $7, %xmm0
705 ; SSE2-NEXT: pmovmskb %xmm0, %eax
706 ; SSE2-NEXT: testw %ax, %ax
707 ; SSE2-NEXT: setne %al
710 ; SSE41-LABEL: trunc_v32i16_v32i1:
712 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
713 ; SSE41-NEXT: pshufb %xmm4, %xmm3
714 ; SSE41-NEXT: pshufb %xmm4, %xmm2
715 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
716 ; SSE41-NEXT: pshufb %xmm4, %xmm1
717 ; SSE41-NEXT: pshufb %xmm4, %xmm0
718 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
719 ; SSE41-NEXT: por %xmm2, %xmm0
720 ; SSE41-NEXT: psllw $7, %xmm0
721 ; SSE41-NEXT: pmovmskb %xmm0, %eax
722 ; SSE41-NEXT: testw %ax, %ax
723 ; SSE41-NEXT: setne %al
726 ; AVX1-LABEL: trunc_v32i16_v32i1:
728 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
729 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
730 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
731 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
732 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
733 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
734 ; AVX1-NEXT: testw %ax, %ax
735 ; AVX1-NEXT: setne %al
736 ; AVX1-NEXT: vzeroupper
739 ; AVX2-LABEL: trunc_v32i16_v32i1:
741 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
742 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
743 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
744 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm2
745 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
746 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
747 ; AVX2-NEXT: vpackuswb %ymm0, %ymm2, %ymm0
748 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
749 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
750 ; AVX2-NEXT: testl %eax, %eax
751 ; AVX2-NEXT: setne %al
752 ; AVX2-NEXT: vzeroupper
755 ; AVX512F-LABEL: trunc_v32i16_v32i1:
757 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
758 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
759 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
760 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
761 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
762 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
763 ; AVX512F-NEXT: korw %k1, %k0, %k0
764 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
765 ; AVX512F-NEXT: korw %k1, %k0, %k0
766 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
767 ; AVX512F-NEXT: korw %k1, %k0, %k0
768 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
769 ; AVX512F-NEXT: korw %k1, %k0, %k0
770 ; AVX512F-NEXT: kmovw %k0, %eax
771 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
772 ; AVX512F-NEXT: vzeroupper
775 ; AVX512BW-LABEL: trunc_v32i16_v32i1:
777 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
778 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
779 ; AVX512BW-NEXT: kortestd %k0, %k0
780 ; AVX512BW-NEXT: setne %al
781 ; AVX512BW-NEXT: vzeroupper
782 ; AVX512BW-NEXT: retq
784 ; AVX512VL-LABEL: trunc_v32i16_v32i1:
786 ; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0
787 ; AVX512VL-NEXT: vpmovw2m %zmm0, %k0
788 ; AVX512VL-NEXT: kortestd %k0, %k0
789 ; AVX512VL-NEXT: setne %al
790 ; AVX512VL-NEXT: vzeroupper
791 ; AVX512VL-NEXT: retq
792 %a = trunc <32 x i16> %0 to <32 x i1>
793 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
797 define i1 @trunc_v64i8_v64i1(<64 x i8>) {
798 ; SSE2-LABEL: trunc_v64i8_v64i1:
800 ; SSE2-NEXT: por %xmm3, %xmm1
801 ; SSE2-NEXT: por %xmm2, %xmm1
802 ; SSE2-NEXT: por %xmm0, %xmm1
803 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
804 ; SSE2-NEXT: por %xmm1, %xmm0
805 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
806 ; SSE2-NEXT: por %xmm0, %xmm1
807 ; SSE2-NEXT: movdqa %xmm1, %xmm0
808 ; SSE2-NEXT: psrld $16, %xmm0
809 ; SSE2-NEXT: por %xmm1, %xmm0
810 ; SSE2-NEXT: movdqa %xmm0, %xmm1
811 ; SSE2-NEXT: psrlw $8, %xmm1
812 ; SSE2-NEXT: por %xmm0, %xmm1
813 ; SSE2-NEXT: movd %xmm1, %eax
814 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
817 ; SSE41-LABEL: trunc_v64i8_v64i1:
819 ; SSE41-NEXT: por %xmm3, %xmm1
820 ; SSE41-NEXT: por %xmm2, %xmm1
821 ; SSE41-NEXT: por %xmm0, %xmm1
822 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
823 ; SSE41-NEXT: por %xmm1, %xmm0
824 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
825 ; SSE41-NEXT: por %xmm0, %xmm1
826 ; SSE41-NEXT: movdqa %xmm1, %xmm0
827 ; SSE41-NEXT: psrld $16, %xmm0
828 ; SSE41-NEXT: por %xmm1, %xmm0
829 ; SSE41-NEXT: movdqa %xmm0, %xmm1
830 ; SSE41-NEXT: psrlw $8, %xmm1
831 ; SSE41-NEXT: por %xmm0, %xmm1
832 ; SSE41-NEXT: pextrb $0, %xmm1, %eax
833 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
836 ; AVX1-LABEL: trunc_v64i8_v64i1:
838 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
840 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
841 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
842 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
843 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
844 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
845 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
846 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
847 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
848 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
849 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax
850 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
851 ; AVX1-NEXT: vzeroupper
854 ; AVX2-LABEL: trunc_v64i8_v64i1:
856 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
857 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
858 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
859 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
860 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
861 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
862 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
863 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
864 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
865 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
866 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
867 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax
868 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
869 ; AVX2-NEXT: vzeroupper
872 ; AVX512F-LABEL: trunc_v64i8_v64i1:
874 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
875 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
876 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
877 ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2
878 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1
879 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
880 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
881 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
882 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
883 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
884 ; AVX512F-NEXT: korw %k1, %k0, %k0
885 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
886 ; AVX512F-NEXT: korw %k1, %k0, %k0
887 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
888 ; AVX512F-NEXT: korw %k1, %k0, %k0
889 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
890 ; AVX512F-NEXT: korw %k1, %k0, %k0
891 ; AVX512F-NEXT: kmovw %k0, %eax
892 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
893 ; AVX512F-NEXT: vzeroupper
896 ; AVX512BW-LABEL: trunc_v64i8_v64i1:
898 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
899 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
900 ; AVX512BW-NEXT: kshiftrq $32, %k0, %k1
901 ; AVX512BW-NEXT: korq %k1, %k0, %k0
902 ; AVX512BW-NEXT: kshiftrq $16, %k0, %k1
903 ; AVX512BW-NEXT: korq %k1, %k0, %k0
904 ; AVX512BW-NEXT: kshiftrq $8, %k0, %k1
905 ; AVX512BW-NEXT: korq %k1, %k0, %k0
906 ; AVX512BW-NEXT: kshiftrq $4, %k0, %k1
907 ; AVX512BW-NEXT: korq %k1, %k0, %k0
908 ; AVX512BW-NEXT: kshiftrq $2, %k0, %k1
909 ; AVX512BW-NEXT: korq %k1, %k0, %k0
910 ; AVX512BW-NEXT: kshiftrq $1, %k0, %k1
911 ; AVX512BW-NEXT: korq %k1, %k0, %k0
912 ; AVX512BW-NEXT: kmovd %k0, %eax
913 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
914 ; AVX512BW-NEXT: vzeroupper
915 ; AVX512BW-NEXT: retq
917 ; AVX512VL-LABEL: trunc_v64i8_v64i1:
919 ; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0
920 ; AVX512VL-NEXT: vpmovb2m %zmm0, %k0
921 ; AVX512VL-NEXT: kshiftrq $32, %k0, %k1
922 ; AVX512VL-NEXT: korq %k1, %k0, %k0
923 ; AVX512VL-NEXT: kshiftrq $16, %k0, %k1
924 ; AVX512VL-NEXT: korq %k1, %k0, %k0
925 ; AVX512VL-NEXT: kshiftrq $8, %k0, %k1
926 ; AVX512VL-NEXT: korq %k1, %k0, %k0
927 ; AVX512VL-NEXT: kshiftrq $4, %k0, %k1
928 ; AVX512VL-NEXT: korq %k1, %k0, %k0
929 ; AVX512VL-NEXT: kshiftrq $2, %k0, %k1
930 ; AVX512VL-NEXT: korq %k1, %k0, %k0
931 ; AVX512VL-NEXT: kshiftrq $1, %k0, %k1
932 ; AVX512VL-NEXT: korq %k1, %k0, %k0
933 ; AVX512VL-NEXT: kmovd %k0, %eax
934 ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
935 ; AVX512VL-NEXT: vzeroupper
936 ; AVX512VL-NEXT: retq
937 %a = trunc <64 x i8> %0 to <64 x i1>
938 %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
946 define i1 @icmp_v2i64_v2i1(<2 x i64>) {
947 ; SSE2-LABEL: icmp_v2i64_v2i1:
949 ; SSE2-NEXT: pxor %xmm1, %xmm1
950 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
951 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
952 ; SSE2-NEXT: pand %xmm1, %xmm0
953 ; SSE2-NEXT: movmskpd %xmm0, %eax
954 ; SSE2-NEXT: testb %al, %al
955 ; SSE2-NEXT: setne %al
958 ; SSE41-LABEL: icmp_v2i64_v2i1:
960 ; SSE41-NEXT: pxor %xmm1, %xmm1
961 ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
962 ; SSE41-NEXT: movmskpd %xmm1, %eax
963 ; SSE41-NEXT: testb %al, %al
964 ; SSE41-NEXT: setne %al
967 ; AVX-LABEL: icmp_v2i64_v2i1:
969 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
970 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
971 ; AVX-NEXT: vmovmskpd %xmm0, %eax
972 ; AVX-NEXT: testb %al, %al
973 ; AVX-NEXT: setne %al
976 ; AVX512F-LABEL: icmp_v2i64_v2i1:
978 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
979 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
980 ; AVX512F-NEXT: kmovw %k0, %eax
981 ; AVX512F-NEXT: testb $3, %al
982 ; AVX512F-NEXT: setne %al
983 ; AVX512F-NEXT: vzeroupper
986 ; AVX512BW-LABEL: icmp_v2i64_v2i1:
988 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
989 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
990 ; AVX512BW-NEXT: kmovd %k0, %eax
991 ; AVX512BW-NEXT: testb $3, %al
992 ; AVX512BW-NEXT: setne %al
993 ; AVX512BW-NEXT: vzeroupper
994 ; AVX512BW-NEXT: retq
996 ; AVX512VL-LABEL: icmp_v2i64_v2i1:
998 ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
999 ; AVX512VL-NEXT: kmovd %k0, %eax
1000 ; AVX512VL-NEXT: testb $3, %al
1001 ; AVX512VL-NEXT: setne %al
1002 ; AVX512VL-NEXT: retq
1003 %a = icmp eq <2 x i64> %0, zeroinitializer
1004 %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
1008 define i1 @icmp_v4i32_v4i1(<4 x i32>) {
1009 ; SSE-LABEL: icmp_v4i32_v4i1:
1011 ; SSE-NEXT: pxor %xmm1, %xmm1
1012 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1
1013 ; SSE-NEXT: movmskps %xmm1, %eax
1014 ; SSE-NEXT: testb %al, %al
1015 ; SSE-NEXT: setne %al
1018 ; AVX-LABEL: icmp_v4i32_v4i1:
1020 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1021 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1022 ; AVX-NEXT: vmovmskps %xmm0, %eax
1023 ; AVX-NEXT: testb %al, %al
1024 ; AVX-NEXT: setne %al
1027 ; AVX512F-LABEL: icmp_v4i32_v4i1:
1029 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1030 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1031 ; AVX512F-NEXT: kmovw %k0, %eax
1032 ; AVX512F-NEXT: testb $15, %al
1033 ; AVX512F-NEXT: setne %al
1034 ; AVX512F-NEXT: vzeroupper
1035 ; AVX512F-NEXT: retq
1037 ; AVX512BW-LABEL: icmp_v4i32_v4i1:
1038 ; AVX512BW: # %bb.0:
1039 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1040 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1041 ; AVX512BW-NEXT: kmovd %k0, %eax
1042 ; AVX512BW-NEXT: testb $15, %al
1043 ; AVX512BW-NEXT: setne %al
1044 ; AVX512BW-NEXT: vzeroupper
1045 ; AVX512BW-NEXT: retq
1047 ; AVX512VL-LABEL: icmp_v4i32_v4i1:
1048 ; AVX512VL: # %bb.0:
1049 ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
1050 ; AVX512VL-NEXT: kmovd %k0, %eax
1051 ; AVX512VL-NEXT: testb $15, %al
1052 ; AVX512VL-NEXT: setne %al
1053 ; AVX512VL-NEXT: retq
1054 %a = icmp eq <4 x i32> %0, zeroinitializer
1055 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
1059 define i1 @icmp_v8i16_v8i1(<8 x i8>) {
1060 ; SSE2-LABEL: icmp_v8i16_v8i1:
1062 ; SSE2-NEXT: pxor %xmm1, %xmm1
1063 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
1064 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1065 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1066 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1067 ; SSE2-NEXT: testb %al, %al
1068 ; SSE2-NEXT: setne %al
1071 ; SSE41-LABEL: icmp_v8i16_v8i1:
1073 ; SSE41-NEXT: pxor %xmm1, %xmm1
1074 ; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
1075 ; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
1076 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1077 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1078 ; SSE41-NEXT: testb %al, %al
1079 ; SSE41-NEXT: setne %al
1082 ; AVX-LABEL: icmp_v8i16_v8i1:
1084 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1085 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1086 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
1087 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1088 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1089 ; AVX-NEXT: testb %al, %al
1090 ; AVX-NEXT: setne %al
1093 ; AVX512F-LABEL: icmp_v8i16_v8i1:
1095 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1096 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1097 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1098 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1099 ; AVX512F-NEXT: kmovw %k0, %eax
1100 ; AVX512F-NEXT: testb %al, %al
1101 ; AVX512F-NEXT: setne %al
1102 ; AVX512F-NEXT: vzeroupper
1103 ; AVX512F-NEXT: retq
1105 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
1106 ; AVX512BW: # %bb.0:
1107 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1108 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1109 ; AVX512BW-NEXT: kmovd %k0, %eax
1110 ; AVX512BW-NEXT: testb %al, %al
1111 ; AVX512BW-NEXT: setne %al
1112 ; AVX512BW-NEXT: vzeroupper
1113 ; AVX512BW-NEXT: retq
1115 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
1116 ; AVX512VL: # %bb.0:
1117 ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
1118 ; AVX512VL-NEXT: kmovd %k0, %eax
1119 ; AVX512VL-NEXT: testb %al, %al
1120 ; AVX512VL-NEXT: setne %al
1121 ; AVX512VL-NEXT: retq
1122 %a = icmp eq <8 x i8> %0, zeroinitializer
1123 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1127 define i1 @icmp_v16i8_v16i1(<16 x i8>) {
1128 ; SSE-LABEL: icmp_v16i8_v16i1:
1130 ; SSE-NEXT: pxor %xmm1, %xmm1
1131 ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
1132 ; SSE-NEXT: pmovmskb %xmm1, %eax
1133 ; SSE-NEXT: testw %ax, %ax
1134 ; SSE-NEXT: setne %al
1137 ; AVX-LABEL: icmp_v16i8_v16i1:
1139 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1140 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1141 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1142 ; AVX-NEXT: testw %ax, %ax
1143 ; AVX-NEXT: setne %al
1146 ; AVX512F-LABEL: icmp_v16i8_v16i1:
1148 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1149 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1150 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
1151 ; AVX512F-NEXT: testw %ax, %ax
1152 ; AVX512F-NEXT: setne %al
1153 ; AVX512F-NEXT: retq
1155 ; AVX512BW-LABEL: icmp_v16i8_v16i1:
1156 ; AVX512BW: # %bb.0:
1157 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1158 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1159 ; AVX512BW-NEXT: kortestw %k0, %k0
1160 ; AVX512BW-NEXT: setne %al
1161 ; AVX512BW-NEXT: vzeroupper
1162 ; AVX512BW-NEXT: retq
1164 ; AVX512VL-LABEL: icmp_v16i8_v16i1:
1165 ; AVX512VL: # %bb.0:
1166 ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
1167 ; AVX512VL-NEXT: kortestw %k0, %k0
1168 ; AVX512VL-NEXT: setne %al
1169 ; AVX512VL-NEXT: retq
1170 %a = icmp eq <16 x i8> %0, zeroinitializer
1171 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1175 define i1 @icmp_v4i64_v4i1(<4 x i64>) {
1176 ; SSE2-LABEL: icmp_v4i64_v4i1:
1178 ; SSE2-NEXT: pxor %xmm2, %xmm2
1179 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
1180 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
1181 ; SSE2-NEXT: pand %xmm1, %xmm3
1182 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1183 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1184 ; SSE2-NEXT: pand %xmm0, %xmm1
1185 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1186 ; SSE2-NEXT: movmskps %xmm1, %eax
1187 ; SSE2-NEXT: testb %al, %al
1188 ; SSE2-NEXT: setne %al
1191 ; SSE41-LABEL: icmp_v4i64_v4i1:
1193 ; SSE41-NEXT: pxor %xmm2, %xmm2
1194 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm1
1195 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
1196 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1197 ; SSE41-NEXT: movmskps %xmm0, %eax
1198 ; SSE41-NEXT: testb %al, %al
1199 ; SSE41-NEXT: setne %al
1202 ; AVX1-LABEL: icmp_v4i64_v4i1:
1204 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1205 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1206 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
1207 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
1208 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1209 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1210 ; AVX1-NEXT: testb %al, %al
1211 ; AVX1-NEXT: setne %al
1212 ; AVX1-NEXT: vzeroupper
1215 ; AVX2-LABEL: icmp_v4i64_v4i1:
1217 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1218 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
1219 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1220 ; AVX2-NEXT: testb %al, %al
1221 ; AVX2-NEXT: setne %al
1222 ; AVX2-NEXT: vzeroupper
1225 ; AVX512F-LABEL: icmp_v4i64_v4i1:
1227 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1228 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1229 ; AVX512F-NEXT: kmovw %k0, %eax
1230 ; AVX512F-NEXT: testb $15, %al
1231 ; AVX512F-NEXT: setne %al
1232 ; AVX512F-NEXT: vzeroupper
1233 ; AVX512F-NEXT: retq
1235 ; AVX512BW-LABEL: icmp_v4i64_v4i1:
1236 ; AVX512BW: # %bb.0:
1237 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1238 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1239 ; AVX512BW-NEXT: kmovd %k0, %eax
1240 ; AVX512BW-NEXT: testb $15, %al
1241 ; AVX512BW-NEXT: setne %al
1242 ; AVX512BW-NEXT: vzeroupper
1243 ; AVX512BW-NEXT: retq
1245 ; AVX512VL-LABEL: icmp_v4i64_v4i1:
1246 ; AVX512VL: # %bb.0:
1247 ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
1248 ; AVX512VL-NEXT: kmovd %k0, %eax
1249 ; AVX512VL-NEXT: testb $15, %al
1250 ; AVX512VL-NEXT: setne %al
1251 ; AVX512VL-NEXT: vzeroupper
1252 ; AVX512VL-NEXT: retq
1253 %a = icmp eq <4 x i64> %0, zeroinitializer
1254 %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
1258 define i1 @icmp_v8i32_v8i1(<8 x i32>) {
1259 ; SSE-LABEL: icmp_v8i32_v8i1:
1261 ; SSE-NEXT: pxor %xmm2, %xmm2
1262 ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
1263 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
1264 ; SSE-NEXT: packssdw %xmm1, %xmm0
1265 ; SSE-NEXT: packsswb %xmm0, %xmm0
1266 ; SSE-NEXT: pmovmskb %xmm0, %eax
1267 ; SSE-NEXT: testb %al, %al
1268 ; SSE-NEXT: setne %al
1271 ; AVX1-LABEL: icmp_v8i32_v8i1:
1273 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1274 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1275 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
1276 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1277 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1278 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1279 ; AVX1-NEXT: testb %al, %al
1280 ; AVX1-NEXT: setne %al
1281 ; AVX1-NEXT: vzeroupper
1284 ; AVX2-LABEL: icmp_v8i32_v8i1:
1286 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1287 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1288 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1289 ; AVX2-NEXT: testb %al, %al
1290 ; AVX2-NEXT: setne %al
1291 ; AVX2-NEXT: vzeroupper
1294 ; AVX512F-LABEL: icmp_v8i32_v8i1:
1296 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1297 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1298 ; AVX512F-NEXT: kmovw %k0, %eax
1299 ; AVX512F-NEXT: testb %al, %al
1300 ; AVX512F-NEXT: setne %al
1301 ; AVX512F-NEXT: vzeroupper
1302 ; AVX512F-NEXT: retq
1304 ; AVX512BW-LABEL: icmp_v8i32_v8i1:
1305 ; AVX512BW: # %bb.0:
1306 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1307 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1308 ; AVX512BW-NEXT: kmovd %k0, %eax
1309 ; AVX512BW-NEXT: testb %al, %al
1310 ; AVX512BW-NEXT: setne %al
1311 ; AVX512BW-NEXT: vzeroupper
1312 ; AVX512BW-NEXT: retq
1314 ; AVX512VL-LABEL: icmp_v8i32_v8i1:
1315 ; AVX512VL: # %bb.0:
1316 ; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
1317 ; AVX512VL-NEXT: kmovd %k0, %eax
1318 ; AVX512VL-NEXT: testb %al, %al
1319 ; AVX512VL-NEXT: setne %al
1320 ; AVX512VL-NEXT: vzeroupper
1321 ; AVX512VL-NEXT: retq
1322 %a = icmp eq <8 x i32> %0, zeroinitializer
1323 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1327 define i1 @icmp_v16i16_v16i1(<16 x i16>) {
1328 ; SSE-LABEL: icmp_v16i16_v16i1:
1330 ; SSE-NEXT: pxor %xmm2, %xmm2
1331 ; SSE-NEXT: pcmpeqw %xmm2, %xmm1
1332 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1333 ; SSE-NEXT: packsswb %xmm1, %xmm0
1334 ; SSE-NEXT: pmovmskb %xmm0, %eax
1335 ; SSE-NEXT: testw %ax, %ax
1336 ; SSE-NEXT: setne %al
1339 ; AVX1-LABEL: icmp_v16i16_v16i1:
1341 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1342 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1343 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
1344 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1345 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1346 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1347 ; AVX1-NEXT: testw %ax, %ax
1348 ; AVX1-NEXT: setne %al
1349 ; AVX1-NEXT: vzeroupper
1352 ; AVX2-LABEL: icmp_v16i16_v16i1:
1354 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1355 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1356 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1357 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1358 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1359 ; AVX2-NEXT: testw %ax, %ax
1360 ; AVX2-NEXT: setne %al
1361 ; AVX2-NEXT: vzeroupper
1364 ; AVX512F-LABEL: icmp_v16i16_v16i1:
1366 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1367 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1368 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1369 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1370 ; AVX512F-NEXT: kortestw %k0, %k0
1371 ; AVX512F-NEXT: setne %al
1372 ; AVX512F-NEXT: vzeroupper
1373 ; AVX512F-NEXT: retq
1375 ; AVX512BW-LABEL: icmp_v16i16_v16i1:
1376 ; AVX512BW: # %bb.0:
1377 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1378 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1379 ; AVX512BW-NEXT: kortestw %k0, %k0
1380 ; AVX512BW-NEXT: setne %al
1381 ; AVX512BW-NEXT: vzeroupper
1382 ; AVX512BW-NEXT: retq
1384 ; AVX512VL-LABEL: icmp_v16i16_v16i1:
1385 ; AVX512VL: # %bb.0:
1386 ; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0
1387 ; AVX512VL-NEXT: kortestw %k0, %k0
1388 ; AVX512VL-NEXT: setne %al
1389 ; AVX512VL-NEXT: vzeroupper
1390 ; AVX512VL-NEXT: retq
1391 %a = icmp eq <16 x i16> %0, zeroinitializer
1392 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1396 define i1 @icmp_v32i8_v32i1(<32 x i8>) {
1397 ; SSE-LABEL: icmp_v32i8_v32i1:
1399 ; SSE-NEXT: pxor %xmm2, %xmm2
1400 ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
1401 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1402 ; SSE-NEXT: por %xmm1, %xmm0
1403 ; SSE-NEXT: pmovmskb %xmm0, %eax
1404 ; SSE-NEXT: testw %ax, %ax
1405 ; SSE-NEXT: setne %al
1408 ; AVX1-LABEL: icmp_v32i8_v32i1:
1410 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1411 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1412 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1413 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1414 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1415 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1416 ; AVX1-NEXT: testw %ax, %ax
1417 ; AVX1-NEXT: setne %al
1418 ; AVX1-NEXT: vzeroupper
1421 ; AVX2-LABEL: icmp_v32i8_v32i1:
1423 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1424 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1425 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1426 ; AVX2-NEXT: testl %eax, %eax
1427 ; AVX2-NEXT: setne %al
1428 ; AVX2-NEXT: vzeroupper
1431 ; AVX512F-LABEL: icmp_v32i8_v32i1:
1433 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1434 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1435 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1436 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
1437 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1438 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1439 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1440 ; AVX512F-NEXT: korw %k1, %k0, %k0
1441 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1442 ; AVX512F-NEXT: korw %k1, %k0, %k0
1443 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1444 ; AVX512F-NEXT: korw %k1, %k0, %k0
1445 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1446 ; AVX512F-NEXT: korw %k1, %k0, %k0
1447 ; AVX512F-NEXT: kmovw %k0, %eax
1448 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1449 ; AVX512F-NEXT: vzeroupper
1450 ; AVX512F-NEXT: retq
1452 ; AVX512BW-LABEL: icmp_v32i8_v32i1:
1453 ; AVX512BW: # %bb.0:
1454 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1455 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1456 ; AVX512BW-NEXT: kortestd %k0, %k0
1457 ; AVX512BW-NEXT: setne %al
1458 ; AVX512BW-NEXT: vzeroupper
1459 ; AVX512BW-NEXT: retq
1461 ; AVX512VL-LABEL: icmp_v32i8_v32i1:
1462 ; AVX512VL: # %bb.0:
1463 ; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0
1464 ; AVX512VL-NEXT: kortestd %k0, %k0
1465 ; AVX512VL-NEXT: setne %al
1466 ; AVX512VL-NEXT: vzeroupper
1467 ; AVX512VL-NEXT: retq
1468 %a = icmp eq <32 x i8> %0, zeroinitializer
1469 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
1473 define i1 @icmp_v8i64_v8i1(<8 x i64>) {
1474 ; SSE2-LABEL: icmp_v8i64_v8i1:
1476 ; SSE2-NEXT: pxor %xmm4, %xmm4
1477 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1478 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
1479 ; SSE2-NEXT: pand %xmm3, %xmm5
1480 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1481 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
1482 ; SSE2-NEXT: pand %xmm2, %xmm3
1483 ; SSE2-NEXT: packssdw %xmm5, %xmm3
1484 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1485 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
1486 ; SSE2-NEXT: pand %xmm1, %xmm2
1487 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1488 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1489 ; SSE2-NEXT: pand %xmm0, %xmm1
1490 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1491 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1492 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1493 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1494 ; SSE2-NEXT: testb %al, %al
1495 ; SSE2-NEXT: setne %al
1498 ; SSE41-LABEL: icmp_v8i64_v8i1:
1500 ; SSE41-NEXT: pxor %xmm4, %xmm4
1501 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm3
1502 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm2
1503 ; SSE41-NEXT: packssdw %xmm3, %xmm2
1504 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
1505 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
1506 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1507 ; SSE41-NEXT: packssdw %xmm2, %xmm0
1508 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1509 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1510 ; SSE41-NEXT: testb %al, %al
1511 ; SSE41-NEXT: setne %al
1514 ; AVX1-LABEL: icmp_v8i64_v8i1:
1516 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1517 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1518 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1519 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
1520 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1521 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1522 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1523 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1524 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1525 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1526 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1527 ; AVX1-NEXT: testb %al, %al
1528 ; AVX1-NEXT: setne %al
1529 ; AVX1-NEXT: vzeroupper
1532 ; AVX2-LABEL: icmp_v8i64_v8i1:
1534 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1535 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
1536 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
1537 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1538 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1539 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1540 ; AVX2-NEXT: testb %al, %al
1541 ; AVX2-NEXT: setne %al
1542 ; AVX2-NEXT: vzeroupper
1545 ; AVX512F-LABEL: icmp_v8i64_v8i1:
1547 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1548 ; AVX512F-NEXT: kmovw %k0, %eax
1549 ; AVX512F-NEXT: testb %al, %al
1550 ; AVX512F-NEXT: setne %al
1551 ; AVX512F-NEXT: vzeroupper
1552 ; AVX512F-NEXT: retq
1554 ; AVX512BW-LABEL: icmp_v8i64_v8i1:
1555 ; AVX512BW: # %bb.0:
1556 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1557 ; AVX512BW-NEXT: kmovd %k0, %eax
1558 ; AVX512BW-NEXT: testb %al, %al
1559 ; AVX512BW-NEXT: setne %al
1560 ; AVX512BW-NEXT: vzeroupper
1561 ; AVX512BW-NEXT: retq
1563 ; AVX512VL-LABEL: icmp_v8i64_v8i1:
1564 ; AVX512VL: # %bb.0:
1565 ; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1566 ; AVX512VL-NEXT: kmovd %k0, %eax
1567 ; AVX512VL-NEXT: testb %al, %al
1568 ; AVX512VL-NEXT: setne %al
1569 ; AVX512VL-NEXT: vzeroupper
1570 ; AVX512VL-NEXT: retq
1571 %a = icmp eq <8 x i64> %0, zeroinitializer
1572 %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
1576 define i1 @icmp_v16i32_v16i1(<16 x i32>) {
1577 ; SSE-LABEL: icmp_v16i32_v16i1:
1579 ; SSE-NEXT: pxor %xmm4, %xmm4
1580 ; SSE-NEXT: pcmpeqd %xmm4, %xmm3
1581 ; SSE-NEXT: pcmpeqd %xmm4, %xmm2
1582 ; SSE-NEXT: packssdw %xmm3, %xmm2
1583 ; SSE-NEXT: pcmpeqd %xmm4, %xmm1
1584 ; SSE-NEXT: pcmpeqd %xmm4, %xmm0
1585 ; SSE-NEXT: packssdw %xmm1, %xmm0
1586 ; SSE-NEXT: packsswb %xmm2, %xmm0
1587 ; SSE-NEXT: pmovmskb %xmm0, %eax
1588 ; SSE-NEXT: testw %ax, %ax
1589 ; SSE-NEXT: setne %al
1592 ; AVX1-LABEL: icmp_v16i32_v16i1:
1594 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1595 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1596 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1597 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
1598 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1599 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1600 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1601 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1602 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1603 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1604 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1605 ; AVX1-NEXT: testw %ax, %ax
1606 ; AVX1-NEXT: setne %al
1607 ; AVX1-NEXT: vzeroupper
1610 ; AVX2-LABEL: icmp_v16i32_v16i1:
1612 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1613 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
1614 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
1615 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1616 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1617 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1618 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1619 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1620 ; AVX2-NEXT: testw %ax, %ax
1621 ; AVX2-NEXT: setne %al
1622 ; AVX2-NEXT: vzeroupper
1625 ; AVX512-LABEL: icmp_v16i32_v16i1:
1627 ; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
1628 ; AVX512-NEXT: kortestw %k0, %k0
1629 ; AVX512-NEXT: setne %al
1630 ; AVX512-NEXT: vzeroupper
1632 %a = icmp eq <16 x i32> %0, zeroinitializer
1633 %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
1637 define i1 @icmp_v32i16_v32i1(<32 x i16>) {
1638 ; SSE-LABEL: icmp_v32i16_v32i1:
1640 ; SSE-NEXT: pxor %xmm4, %xmm4
1641 ; SSE-NEXT: pcmpeqw %xmm4, %xmm1
1642 ; SSE-NEXT: pcmpeqw %xmm4, %xmm0
1643 ; SSE-NEXT: packsswb %xmm1, %xmm0
1644 ; SSE-NEXT: pcmpeqw %xmm4, %xmm3
1645 ; SSE-NEXT: pcmpeqw %xmm4, %xmm2
1646 ; SSE-NEXT: packsswb %xmm3, %xmm2
1647 ; SSE-NEXT: por %xmm0, %xmm2
1648 ; SSE-NEXT: pmovmskb %xmm2, %eax
1649 ; SSE-NEXT: testw %ax, %ax
1650 ; SSE-NEXT: setne %al
1653 ; AVX1-LABEL: icmp_v32i16_v32i1:
1655 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1656 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1657 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
1658 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
1659 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
1660 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1661 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
1662 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1663 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1664 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1665 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1666 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1667 ; AVX1-NEXT: testw %ax, %ax
1668 ; AVX1-NEXT: setne %al
1669 ; AVX1-NEXT: vzeroupper
1672 ; AVX2-LABEL: icmp_v32i16_v32i1:
1674 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1675 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1676 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1677 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1678 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1679 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1680 ; AVX2-NEXT: testl %eax, %eax
1681 ; AVX2-NEXT: setne %al
1682 ; AVX2-NEXT: vzeroupper
1685 ; AVX512F-LABEL: icmp_v32i16_v32i1:
1687 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1688 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1689 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1690 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1691 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
1692 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1693 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1694 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1695 ; AVX512F-NEXT: korw %k1, %k0, %k0
1696 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1697 ; AVX512F-NEXT: korw %k1, %k0, %k0
1698 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1699 ; AVX512F-NEXT: korw %k1, %k0, %k0
1700 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1701 ; AVX512F-NEXT: korw %k1, %k0, %k0
1702 ; AVX512F-NEXT: kmovw %k0, %eax
1703 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1704 ; AVX512F-NEXT: vzeroupper
1705 ; AVX512F-NEXT: retq
1707 ; AVX512BW-LABEL: icmp_v32i16_v32i1:
1708 ; AVX512BW: # %bb.0:
1709 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1710 ; AVX512BW-NEXT: kortestd %k0, %k0
1711 ; AVX512BW-NEXT: setne %al
1712 ; AVX512BW-NEXT: vzeroupper
1713 ; AVX512BW-NEXT: retq
1715 ; AVX512VL-LABEL: icmp_v32i16_v32i1:
1716 ; AVX512VL: # %bb.0:
1717 ; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0
1718 ; AVX512VL-NEXT: kortestd %k0, %k0
1719 ; AVX512VL-NEXT: setne %al
1720 ; AVX512VL-NEXT: vzeroupper
1721 ; AVX512VL-NEXT: retq
1722 %a = icmp eq <32 x i16> %0, zeroinitializer
1723 %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
1727 define i1 @icmp_v64i8_v64i1(<64 x i8>) {
1728 ; SSE-LABEL: icmp_v64i8_v64i1:
1730 ; SSE-NEXT: pxor %xmm4, %xmm4
1731 ; SSE-NEXT: pcmpeqb %xmm4, %xmm2
1732 ; SSE-NEXT: pcmpeqb %xmm4, %xmm0
1733 ; SSE-NEXT: pcmpeqb %xmm4, %xmm3
1734 ; SSE-NEXT: pcmpeqb %xmm4, %xmm1
1735 ; SSE-NEXT: por %xmm3, %xmm1
1736 ; SSE-NEXT: por %xmm2, %xmm1
1737 ; SSE-NEXT: por %xmm0, %xmm1
1738 ; SSE-NEXT: pmovmskb %xmm1, %eax
1739 ; SSE-NEXT: negl %eax
1740 ; SSE-NEXT: sbbb %al, %al
1743 ; AVX1-LABEL: icmp_v64i8_v64i1:
1745 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1746 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3
1747 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4
1748 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1749 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1750 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1751 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1752 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1753 ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1754 ; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
1755 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1756 ; AVX1-NEXT: negl %eax
1757 ; AVX1-NEXT: sbbb %al, %al
1758 ; AVX1-NEXT: vzeroupper
1761 ; AVX2-LABEL: icmp_v64i8_v64i1:
1763 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1764 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1765 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1766 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1767 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1768 ; AVX2-NEXT: negl %eax
1769 ; AVX2-NEXT: sbbb %al, %al
1770 ; AVX2-NEXT: vzeroupper
1773 ; AVX512F-LABEL: icmp_v64i8_v64i1:
1775 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1776 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1777 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1778 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1779 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
1780 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1781 ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2
1782 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1
1783 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
1784 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1785 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1786 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1787 ; AVX512F-NEXT: korw %k1, %k0, %k0
1788 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1789 ; AVX512F-NEXT: korw %k1, %k0, %k0
1790 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1791 ; AVX512F-NEXT: korw %k1, %k0, %k0
1792 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1793 ; AVX512F-NEXT: korw %k1, %k0, %k0
1794 ; AVX512F-NEXT: kmovw %k0, %eax
1795 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1796 ; AVX512F-NEXT: vzeroupper
1797 ; AVX512F-NEXT: retq
1799 ; AVX512BW-LABEL: icmp_v64i8_v64i1:
1800 ; AVX512BW: # %bb.0:
1801 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1802 ; AVX512BW-NEXT: kshiftrq $32, %k0, %k1
1803 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1804 ; AVX512BW-NEXT: kshiftrq $16, %k0, %k1
1805 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1806 ; AVX512BW-NEXT: kshiftrq $8, %k0, %k1
1807 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1808 ; AVX512BW-NEXT: kshiftrq $4, %k0, %k1
1809 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1810 ; AVX512BW-NEXT: kshiftrq $2, %k0, %k1
1811 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1812 ; AVX512BW-NEXT: kshiftrq $1, %k0, %k1
1813 ; AVX512BW-NEXT: korq %k1, %k0, %k0
1814 ; AVX512BW-NEXT: kmovd %k0, %eax
1815 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
1816 ; AVX512BW-NEXT: vzeroupper
1817 ; AVX512BW-NEXT: retq
1819 ; AVX512VL-LABEL: icmp_v64i8_v64i1:
1820 ; AVX512VL: # %bb.0:
1821 ; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0
1822 ; AVX512VL-NEXT: kshiftrq $32, %k0, %k1
1823 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1824 ; AVX512VL-NEXT: kshiftrq $16, %k0, %k1
1825 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1826 ; AVX512VL-NEXT: kshiftrq $8, %k0, %k1
1827 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1828 ; AVX512VL-NEXT: kshiftrq $4, %k0, %k1
1829 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1830 ; AVX512VL-NEXT: kshiftrq $2, %k0, %k1
1831 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1832 ; AVX512VL-NEXT: kshiftrq $1, %k0, %k1
1833 ; AVX512VL-NEXT: korq %k1, %k0, %k0
1834 ; AVX512VL-NEXT: kmovd %k0, %eax
1835 ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
1836 ; AVX512VL-NEXT: vzeroupper
1837 ; AVX512VL-NEXT: retq
1838 %a = icmp eq <64 x i8> %0, zeroinitializer
1839 %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
1843 declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>)
1844 declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>)
1845 declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
1846 declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>)
1847 declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>)
1848 declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>)