1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X86-SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X64-SSE,X64-SSE2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
15 define i1 @trunc_v2i64_v2i1(<2 x i64>) nounwind {
16 ; SSE-LABEL: trunc_v2i64_v2i1:
18 ; SSE-NEXT: psllq $63, %xmm0
19 ; SSE-NEXT: movmskpd %xmm0, %eax
20 ; SSE-NEXT: testb %al, %al
22 ; SSE-NEXT: ret{{[l|q]}}
24 ; AVX-LABEL: trunc_v2i64_v2i1:
26 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
27 ; AVX-NEXT: vmovmskpd %xmm0, %eax
28 ; AVX-NEXT: testb %al, %al
32 ; AVX512F-LABEL: trunc_v2i64_v2i1:
34 ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
35 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
36 ; AVX512F-NEXT: kmovw %k0, %eax
37 ; AVX512F-NEXT: testb $3, %al
38 ; AVX512F-NEXT: setnp %al
39 ; AVX512F-NEXT: vzeroupper
42 ; AVX512BW-LABEL: trunc_v2i64_v2i1:
44 ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
45 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
46 ; AVX512BW-NEXT: kmovd %k0, %eax
47 ; AVX512BW-NEXT: testb $3, %al
48 ; AVX512BW-NEXT: setnp %al
49 ; AVX512BW-NEXT: vzeroupper
52 ; AVX512VL-LABEL: trunc_v2i64_v2i1:
54 ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
55 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
56 ; AVX512VL-NEXT: kmovd %k0, %eax
57 ; AVX512VL-NEXT: testb %al, %al
58 ; AVX512VL-NEXT: setnp %al
60 %a = trunc <2 x i64> %0 to <2 x i1>
61 %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
65 define i1 @trunc_v4i32_v4i1(<4 x i32>) nounwind {
66 ; SSE-LABEL: trunc_v4i32_v4i1:
68 ; SSE-NEXT: pslld $31, %xmm0
69 ; SSE-NEXT: movmskps %xmm0, %eax
70 ; SSE-NEXT: testb %al, %al
72 ; SSE-NEXT: ret{{[l|q]}}
74 ; AVX-LABEL: trunc_v4i32_v4i1:
76 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
77 ; AVX-NEXT: vmovmskps %xmm0, %eax
78 ; AVX-NEXT: testb %al, %al
82 ; AVX512F-LABEL: trunc_v4i32_v4i1:
84 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
85 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
86 ; AVX512F-NEXT: kmovw %k0, %eax
87 ; AVX512F-NEXT: testb $15, %al
88 ; AVX512F-NEXT: setnp %al
89 ; AVX512F-NEXT: vzeroupper
92 ; AVX512BW-LABEL: trunc_v4i32_v4i1:
94 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
95 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
96 ; AVX512BW-NEXT: kmovd %k0, %eax
97 ; AVX512BW-NEXT: testb $15, %al
98 ; AVX512BW-NEXT: setnp %al
99 ; AVX512BW-NEXT: vzeroupper
100 ; AVX512BW-NEXT: retq
102 ; AVX512VL-LABEL: trunc_v4i32_v4i1:
104 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
105 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
106 ; AVX512VL-NEXT: kmovd %k0, %eax
107 ; AVX512VL-NEXT: testb %al, %al
108 ; AVX512VL-NEXT: setnp %al
109 ; AVX512VL-NEXT: retq
110 %a = trunc <4 x i32> %0 to <4 x i1>
111 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
115 define i1 @trunc_v8i16_v8i1(<8 x i16>) nounwind {
116 ; SSE-LABEL: trunc_v8i16_v8i1:
118 ; SSE-NEXT: psllw $15, %xmm0
119 ; SSE-NEXT: packsswb %xmm0, %xmm0
120 ; SSE-NEXT: pmovmskb %xmm0, %eax
121 ; SSE-NEXT: testb %al, %al
122 ; SSE-NEXT: setnp %al
123 ; SSE-NEXT: ret{{[l|q]}}
125 ; AVX-LABEL: trunc_v8i16_v8i1:
127 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
128 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
129 ; AVX-NEXT: vpmovmskb %xmm0, %eax
130 ; AVX-NEXT: testb %al, %al
131 ; AVX-NEXT: setnp %al
134 ; AVX512F-LABEL: trunc_v8i16_v8i1:
136 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
137 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
138 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
139 ; AVX512F-NEXT: kmovw %k0, %eax
140 ; AVX512F-NEXT: testb %al, %al
141 ; AVX512F-NEXT: setnp %al
142 ; AVX512F-NEXT: vzeroupper
145 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
147 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
148 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
149 ; AVX512BW-NEXT: kmovd %k0, %eax
150 ; AVX512BW-NEXT: testb %al, %al
151 ; AVX512BW-NEXT: setnp %al
152 ; AVX512BW-NEXT: vzeroupper
153 ; AVX512BW-NEXT: retq
155 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
157 ; AVX512VL-NEXT: vpsllw $15, %xmm0, %xmm0
158 ; AVX512VL-NEXT: vpmovw2m %xmm0, %k0
159 ; AVX512VL-NEXT: kmovd %k0, %eax
160 ; AVX512VL-NEXT: testb %al, %al
161 ; AVX512VL-NEXT: setnp %al
162 ; AVX512VL-NEXT: retq
163 %a = trunc <8 x i16> %0 to <8 x i1>
164 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
168 define i1 @trunc_v16i8_v16i1(<16 x i8>) nounwind {
169 ; SSE-LABEL: trunc_v16i8_v16i1:
171 ; SSE-NEXT: psllw $7, %xmm0
172 ; SSE-NEXT: pmovmskb %xmm0, %eax
173 ; SSE-NEXT: xorb %ah, %al
174 ; SSE-NEXT: setnp %al
175 ; SSE-NEXT: ret{{[l|q]}}
177 ; AVX-LABEL: trunc_v16i8_v16i1:
179 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
180 ; AVX-NEXT: vpmovmskb %xmm0, %eax
181 ; AVX-NEXT: xorb %ah, %al
182 ; AVX-NEXT: setnp %al
185 ; AVX512-LABEL: trunc_v16i8_v16i1:
187 ; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
188 ; AVX512-NEXT: vpmovmskb %xmm0, %eax
189 ; AVX512-NEXT: xorb %ah, %al
190 ; AVX512-NEXT: setnp %al
192 %a = trunc <16 x i8> %0 to <16 x i1>
193 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
197 define i1 @trunc_v4i64_v4i1(<4 x i64>) nounwind {
198 ; SSE-LABEL: trunc_v4i64_v4i1:
200 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
201 ; SSE-NEXT: pslld $31, %xmm0
202 ; SSE-NEXT: movmskps %xmm0, %eax
203 ; SSE-NEXT: testb %al, %al
204 ; SSE-NEXT: setnp %al
205 ; SSE-NEXT: ret{{[l|q]}}
207 ; AVX1-LABEL: trunc_v4i64_v4i1:
209 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
210 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
211 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
212 ; AVX1-NEXT: vmovmskps %xmm0, %eax
213 ; AVX1-NEXT: testb %al, %al
214 ; AVX1-NEXT: setnp %al
215 ; AVX1-NEXT: vzeroupper
218 ; AVX2-LABEL: trunc_v4i64_v4i1:
220 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
221 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
222 ; AVX2-NEXT: testb %al, %al
223 ; AVX2-NEXT: setnp %al
224 ; AVX2-NEXT: vzeroupper
227 ; AVX512F-LABEL: trunc_v4i64_v4i1:
229 ; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0
230 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
231 ; AVX512F-NEXT: kmovw %k0, %eax
232 ; AVX512F-NEXT: testb $15, %al
233 ; AVX512F-NEXT: setnp %al
234 ; AVX512F-NEXT: vzeroupper
237 ; AVX512BW-LABEL: trunc_v4i64_v4i1:
239 ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0
240 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
241 ; AVX512BW-NEXT: kmovd %k0, %eax
242 ; AVX512BW-NEXT: testb $15, %al
243 ; AVX512BW-NEXT: setnp %al
244 ; AVX512BW-NEXT: vzeroupper
245 ; AVX512BW-NEXT: retq
247 ; AVX512VL-LABEL: trunc_v4i64_v4i1:
249 ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
250 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
251 ; AVX512VL-NEXT: kmovd %k0, %eax
252 ; AVX512VL-NEXT: testb %al, %al
253 ; AVX512VL-NEXT: setnp %al
254 ; AVX512VL-NEXT: vzeroupper
255 ; AVX512VL-NEXT: retq
256 %a = trunc <4 x i64> %0 to <4 x i1>
257 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
261 define i1 @trunc_v8i32_v8i1(<8 x i32>) nounwind {
262 ; SSE2-LABEL: trunc_v8i32_v8i1:
264 ; SSE2-NEXT: pslld $16, %xmm1
265 ; SSE2-NEXT: psrad $16, %xmm1
266 ; SSE2-NEXT: pslld $16, %xmm0
267 ; SSE2-NEXT: psrad $16, %xmm0
268 ; SSE2-NEXT: packssdw %xmm1, %xmm0
269 ; SSE2-NEXT: psllw $15, %xmm0
270 ; SSE2-NEXT: packsswb %xmm0, %xmm0
271 ; SSE2-NEXT: pmovmskb %xmm0, %eax
272 ; SSE2-NEXT: testb %al, %al
273 ; SSE2-NEXT: setnp %al
274 ; SSE2-NEXT: ret{{[l|q]}}
276 ; SSE41-LABEL: trunc_v8i32_v8i1:
278 ; SSE41-NEXT: pxor %xmm2, %xmm2
279 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
280 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
281 ; SSE41-NEXT: packusdw %xmm1, %xmm0
282 ; SSE41-NEXT: psllw $15, %xmm0
283 ; SSE41-NEXT: packsswb %xmm0, %xmm0
284 ; SSE41-NEXT: pmovmskb %xmm0, %eax
285 ; SSE41-NEXT: testb %al, %al
286 ; SSE41-NEXT: setnp %al
289 ; AVX1-LABEL: trunc_v8i32_v8i1:
291 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
292 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
293 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
294 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
295 ; AVX1-NEXT: vmovmskps %ymm0, %eax
296 ; AVX1-NEXT: testb %al, %al
297 ; AVX1-NEXT: setnp %al
298 ; AVX1-NEXT: vzeroupper
301 ; AVX2-LABEL: trunc_v8i32_v8i1:
303 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
304 ; AVX2-NEXT: vmovmskps %ymm0, %eax
305 ; AVX2-NEXT: testb %al, %al
306 ; AVX2-NEXT: setnp %al
307 ; AVX2-NEXT: vzeroupper
310 ; AVX512F-LABEL: trunc_v8i32_v8i1:
312 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
313 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
314 ; AVX512F-NEXT: kmovw %k0, %eax
315 ; AVX512F-NEXT: testb %al, %al
316 ; AVX512F-NEXT: setnp %al
317 ; AVX512F-NEXT: vzeroupper
320 ; AVX512BW-LABEL: trunc_v8i32_v8i1:
322 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
323 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
324 ; AVX512BW-NEXT: kmovd %k0, %eax
325 ; AVX512BW-NEXT: testb %al, %al
326 ; AVX512BW-NEXT: setnp %al
327 ; AVX512BW-NEXT: vzeroupper
328 ; AVX512BW-NEXT: retq
330 ; AVX512VL-LABEL: trunc_v8i32_v8i1:
332 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
333 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
334 ; AVX512VL-NEXT: kmovd %k0, %eax
335 ; AVX512VL-NEXT: testb %al, %al
336 ; AVX512VL-NEXT: setnp %al
337 ; AVX512VL-NEXT: vzeroupper
338 ; AVX512VL-NEXT: retq
339 %a = trunc <8 x i32> %0 to <8 x i1>
340 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
344 define i1 @trunc_v16i16_v16i1(<16 x i16>) nounwind {
345 ; SSE2-LABEL: trunc_v16i16_v16i1:
347 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
348 ; SSE2-NEXT: pand %xmm2, %xmm1
349 ; SSE2-NEXT: pand %xmm2, %xmm0
350 ; SSE2-NEXT: packuswb %xmm1, %xmm0
351 ; SSE2-NEXT: psllw $7, %xmm0
352 ; SSE2-NEXT: pmovmskb %xmm0, %eax
353 ; SSE2-NEXT: xorb %ah, %al
354 ; SSE2-NEXT: setnp %al
355 ; SSE2-NEXT: ret{{[l|q]}}
357 ; SSE41-LABEL: trunc_v16i16_v16i1:
359 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
360 ; SSE41-NEXT: pand %xmm2, %xmm1
361 ; SSE41-NEXT: pand %xmm2, %xmm0
362 ; SSE41-NEXT: packuswb %xmm1, %xmm0
363 ; SSE41-NEXT: psllw $7, %xmm0
364 ; SSE41-NEXT: pmovmskb %xmm0, %eax
365 ; SSE41-NEXT: xorb %ah, %al
366 ; SSE41-NEXT: setnp %al
369 ; AVX1-LABEL: trunc_v16i16_v16i1:
371 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
372 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
373 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
374 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
375 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
376 ; AVX1-NEXT: xorb %ah, %al
377 ; AVX1-NEXT: setnp %al
378 ; AVX1-NEXT: vzeroupper
381 ; AVX2-LABEL: trunc_v16i16_v16i1:
383 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
384 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
385 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
386 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
387 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
388 ; AVX2-NEXT: xorb %ah, %al
389 ; AVX2-NEXT: setnp %al
390 ; AVX2-NEXT: vzeroupper
393 ; AVX512F-LABEL: trunc_v16i16_v16i1:
395 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
396 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
397 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
398 ; AVX512F-NEXT: kmovw %k0, %eax
399 ; AVX512F-NEXT: movl %eax, %ecx
400 ; AVX512F-NEXT: shrl $8, %ecx
401 ; AVX512F-NEXT: xorb %al, %cl
402 ; AVX512F-NEXT: setnp %al
403 ; AVX512F-NEXT: vzeroupper
406 ; AVX512BW-LABEL: trunc_v16i16_v16i1:
408 ; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0
409 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
410 ; AVX512BW-NEXT: kmovd %k0, %eax
411 ; AVX512BW-NEXT: movl %eax, %ecx
412 ; AVX512BW-NEXT: shrl $8, %ecx
413 ; AVX512BW-NEXT: xorb %al, %cl
414 ; AVX512BW-NEXT: setnp %al
415 ; AVX512BW-NEXT: vzeroupper
416 ; AVX512BW-NEXT: retq
418 ; AVX512VL-LABEL: trunc_v16i16_v16i1:
420 ; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0
421 ; AVX512VL-NEXT: vpmovw2m %ymm0, %k0
422 ; AVX512VL-NEXT: kmovd %k0, %eax
423 ; AVX512VL-NEXT: movl %eax, %ecx
424 ; AVX512VL-NEXT: shrl $8, %ecx
425 ; AVX512VL-NEXT: xorb %al, %cl
426 ; AVX512VL-NEXT: setnp %al
427 ; AVX512VL-NEXT: vzeroupper
428 ; AVX512VL-NEXT: retq
429 %a = trunc <16 x i16> %0 to <16 x i1>
430 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
434 define i1 @trunc_v32i8_v32i1(<32 x i8>) nounwind {
435 ; SSE-LABEL: trunc_v32i8_v32i1:
437 ; SSE-NEXT: pxor %xmm1, %xmm0
438 ; SSE-NEXT: psllw $7, %xmm0
439 ; SSE-NEXT: pmovmskb %xmm0, %eax
440 ; SSE-NEXT: xorb %ah, %al
441 ; SSE-NEXT: setnp %al
442 ; SSE-NEXT: ret{{[l|q]}}
444 ; AVX1-LABEL: trunc_v32i8_v32i1:
446 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
447 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
448 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
449 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
450 ; AVX1-NEXT: xorb %ah, %al
451 ; AVX1-NEXT: setnp %al
452 ; AVX1-NEXT: vzeroupper
455 ; AVX2-LABEL: trunc_v32i8_v32i1:
457 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
458 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
459 ; AVX2-NEXT: movl %eax, %ecx
460 ; AVX2-NEXT: shrl $16, %ecx
461 ; AVX2-NEXT: xorl %eax, %ecx
462 ; AVX2-NEXT: xorb %ch, %cl
463 ; AVX2-NEXT: setnp %al
464 ; AVX2-NEXT: vzeroupper
467 ; AVX512F-LABEL: trunc_v32i8_v32i1:
469 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
470 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
471 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
472 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
473 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
474 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
475 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
476 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
477 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
478 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
479 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
480 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
481 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
482 ; AVX512F-NEXT: kmovw %k0, %eax
483 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
484 ; AVX512F-NEXT: vzeroupper
487 ; AVX512BW-LABEL: trunc_v32i8_v32i1:
489 ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
490 ; AVX512BW-NEXT: vpmovmskb %ymm0, %eax
491 ; AVX512BW-NEXT: movl %eax, %ecx
492 ; AVX512BW-NEXT: shrl $16, %ecx
493 ; AVX512BW-NEXT: xorl %eax, %ecx
494 ; AVX512BW-NEXT: xorb %ch, %cl
495 ; AVX512BW-NEXT: setnp %al
496 ; AVX512BW-NEXT: vzeroupper
497 ; AVX512BW-NEXT: retq
499 ; AVX512VL-LABEL: trunc_v32i8_v32i1:
501 ; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
502 ; AVX512VL-NEXT: vpmovmskb %ymm0, %eax
503 ; AVX512VL-NEXT: movl %eax, %ecx
504 ; AVX512VL-NEXT: shrl $16, %ecx
505 ; AVX512VL-NEXT: xorl %eax, %ecx
506 ; AVX512VL-NEXT: xorb %ch, %cl
507 ; AVX512VL-NEXT: setnp %al
508 ; AVX512VL-NEXT: vzeroupper
509 ; AVX512VL-NEXT: retq
510 %a = trunc <32 x i8> %0 to <32 x i1>
511 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
515 define i1 @trunc_v8i64_v8i1(<8 x i64>) nounwind {
516 ; X86-SSE2-LABEL: trunc_v8i64_v8i1:
518 ; X86-SSE2-NEXT: pushl %ebp
519 ; X86-SSE2-NEXT: movl %esp, %ebp
520 ; X86-SSE2-NEXT: andl $-16, %esp
521 ; X86-SSE2-NEXT: subl $16, %esp
522 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
523 ; X86-SSE2-NEXT: pslld $16, %xmm0
524 ; X86-SSE2-NEXT: psrad $16, %xmm0
525 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],mem[0,2]
526 ; X86-SSE2-NEXT: pslld $16, %xmm2
527 ; X86-SSE2-NEXT: psrad $16, %xmm2
528 ; X86-SSE2-NEXT: packssdw %xmm2, %xmm0
529 ; X86-SSE2-NEXT: psllw $15, %xmm0
530 ; X86-SSE2-NEXT: packsswb %xmm0, %xmm0
531 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
532 ; X86-SSE2-NEXT: testb %al, %al
533 ; X86-SSE2-NEXT: setnp %al
534 ; X86-SSE2-NEXT: movl %ebp, %esp
535 ; X86-SSE2-NEXT: popl %ebp
536 ; X86-SSE2-NEXT: retl
538 ; X64-SSE2-LABEL: trunc_v8i64_v8i1:
540 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
541 ; X64-SSE2-NEXT: pslld $16, %xmm2
542 ; X64-SSE2-NEXT: psrad $16, %xmm2
543 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
544 ; X64-SSE2-NEXT: pslld $16, %xmm0
545 ; X64-SSE2-NEXT: psrad $16, %xmm0
546 ; X64-SSE2-NEXT: packssdw %xmm2, %xmm0
547 ; X64-SSE2-NEXT: psllw $15, %xmm0
548 ; X64-SSE2-NEXT: packsswb %xmm0, %xmm0
549 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
550 ; X64-SSE2-NEXT: testb %al, %al
551 ; X64-SSE2-NEXT: setnp %al
552 ; X64-SSE2-NEXT: retq
554 ; SSE41-LABEL: trunc_v8i64_v8i1:
556 ; SSE41-NEXT: pxor %xmm4, %xmm4
557 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
558 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
559 ; SSE41-NEXT: packusdw %xmm3, %xmm2
560 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
561 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
562 ; SSE41-NEXT: packusdw %xmm1, %xmm0
563 ; SSE41-NEXT: packusdw %xmm2, %xmm0
564 ; SSE41-NEXT: psllw $15, %xmm0
565 ; SSE41-NEXT: packsswb %xmm0, %xmm0
566 ; SSE41-NEXT: pmovmskb %xmm0, %eax
567 ; SSE41-NEXT: testb %al, %al
568 ; SSE41-NEXT: setnp %al
571 ; AVX1-LABEL: trunc_v8i64_v8i1:
573 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
574 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
575 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
576 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
577 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
578 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
579 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
580 ; AVX1-NEXT: vmovmskps %ymm0, %eax
581 ; AVX1-NEXT: testb %al, %al
582 ; AVX1-NEXT: setnp %al
583 ; AVX1-NEXT: vzeroupper
586 ; AVX2-LABEL: trunc_v8i64_v8i1:
588 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
589 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
590 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
591 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
592 ; AVX2-NEXT: vmovmskps %ymm0, %eax
593 ; AVX2-NEXT: testb %al, %al
594 ; AVX2-NEXT: setnp %al
595 ; AVX2-NEXT: vzeroupper
598 ; AVX512F-LABEL: trunc_v8i64_v8i1:
600 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
601 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
602 ; AVX512F-NEXT: kmovw %k0, %eax
603 ; AVX512F-NEXT: testb %al, %al
604 ; AVX512F-NEXT: setnp %al
605 ; AVX512F-NEXT: vzeroupper
608 ; AVX512BW-LABEL: trunc_v8i64_v8i1:
610 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
611 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
612 ; AVX512BW-NEXT: kmovd %k0, %eax
613 ; AVX512BW-NEXT: testb %al, %al
614 ; AVX512BW-NEXT: setnp %al
615 ; AVX512BW-NEXT: vzeroupper
616 ; AVX512BW-NEXT: retq
618 ; AVX512VL-LABEL: trunc_v8i64_v8i1:
620 ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
621 ; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
622 ; AVX512VL-NEXT: kmovd %k0, %eax
623 ; AVX512VL-NEXT: testb %al, %al
624 ; AVX512VL-NEXT: setnp %al
625 ; AVX512VL-NEXT: vzeroupper
626 ; AVX512VL-NEXT: retq
627 %a = trunc <8 x i64> %0 to <8 x i1>
628 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
632 define i1 @trunc_v16i32_v16i1(<16 x i32>) nounwind {
633 ; X86-SSE2-LABEL: trunc_v16i32_v16i1:
635 ; X86-SSE2-NEXT: pushl %ebp
636 ; X86-SSE2-NEXT: movl %esp, %ebp
637 ; X86-SSE2-NEXT: andl $-16, %esp
638 ; X86-SSE2-NEXT: subl $16, %esp
639 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
640 ; X86-SSE2-NEXT: pand %xmm3, %xmm1
641 ; X86-SSE2-NEXT: pand %xmm3, %xmm0
642 ; X86-SSE2-NEXT: packuswb %xmm1, %xmm0
643 ; X86-SSE2-NEXT: pand %xmm3, %xmm2
644 ; X86-SSE2-NEXT: pand 8(%ebp), %xmm3
645 ; X86-SSE2-NEXT: packuswb %xmm3, %xmm2
646 ; X86-SSE2-NEXT: packuswb %xmm2, %xmm0
647 ; X86-SSE2-NEXT: psllw $7, %xmm0
648 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
649 ; X86-SSE2-NEXT: xorb %ah, %al
650 ; X86-SSE2-NEXT: setnp %al
651 ; X86-SSE2-NEXT: movl %ebp, %esp
652 ; X86-SSE2-NEXT: popl %ebp
653 ; X86-SSE2-NEXT: retl
655 ; X64-SSE2-LABEL: trunc_v16i32_v16i1:
657 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
658 ; X64-SSE2-NEXT: pand %xmm4, %xmm3
659 ; X64-SSE2-NEXT: pand %xmm4, %xmm2
660 ; X64-SSE2-NEXT: packuswb %xmm3, %xmm2
661 ; X64-SSE2-NEXT: pand %xmm4, %xmm1
662 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
663 ; X64-SSE2-NEXT: packuswb %xmm1, %xmm0
664 ; X64-SSE2-NEXT: packuswb %xmm2, %xmm0
665 ; X64-SSE2-NEXT: psllw $7, %xmm0
666 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
667 ; X64-SSE2-NEXT: xorb %ah, %al
668 ; X64-SSE2-NEXT: setnp %al
669 ; X64-SSE2-NEXT: retq
671 ; SSE41-LABEL: trunc_v16i32_v16i1:
673 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
674 ; SSE41-NEXT: pand %xmm4, %xmm3
675 ; SSE41-NEXT: pand %xmm4, %xmm2
676 ; SSE41-NEXT: packusdw %xmm3, %xmm2
677 ; SSE41-NEXT: pand %xmm4, %xmm1
678 ; SSE41-NEXT: pand %xmm4, %xmm0
679 ; SSE41-NEXT: packusdw %xmm1, %xmm0
680 ; SSE41-NEXT: packuswb %xmm2, %xmm0
681 ; SSE41-NEXT: psllw $7, %xmm0
682 ; SSE41-NEXT: pmovmskb %xmm0, %eax
683 ; SSE41-NEXT: xorb %ah, %al
684 ; SSE41-NEXT: setnp %al
687 ; AVX1-LABEL: trunc_v16i32_v16i1:
689 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
690 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
691 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
692 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
693 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
694 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
695 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
696 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
697 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
698 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
699 ; AVX1-NEXT: xorb %ah, %al
700 ; AVX1-NEXT: setnp %al
701 ; AVX1-NEXT: vzeroupper
704 ; AVX2-LABEL: trunc_v16i32_v16i1:
706 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
707 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
708 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
709 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
710 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
711 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
712 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
713 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
714 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
715 ; AVX2-NEXT: xorb %ah, %al
716 ; AVX2-NEXT: setnp %al
717 ; AVX2-NEXT: vzeroupper
720 ; AVX512F-LABEL: trunc_v16i32_v16i1:
722 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
723 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
724 ; AVX512F-NEXT: kmovw %k0, %eax
725 ; AVX512F-NEXT: movl %eax, %ecx
726 ; AVX512F-NEXT: shrl $8, %ecx
727 ; AVX512F-NEXT: xorb %al, %cl
728 ; AVX512F-NEXT: setnp %al
729 ; AVX512F-NEXT: vzeroupper
732 ; AVX512BW-LABEL: trunc_v16i32_v16i1:
734 ; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0
735 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
736 ; AVX512BW-NEXT: kmovd %k0, %eax
737 ; AVX512BW-NEXT: movl %eax, %ecx
738 ; AVX512BW-NEXT: shrl $8, %ecx
739 ; AVX512BW-NEXT: xorb %al, %cl
740 ; AVX512BW-NEXT: setnp %al
741 ; AVX512BW-NEXT: vzeroupper
742 ; AVX512BW-NEXT: retq
744 ; AVX512VL-LABEL: trunc_v16i32_v16i1:
746 ; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0
747 ; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0
748 ; AVX512VL-NEXT: kmovd %k0, %eax
749 ; AVX512VL-NEXT: movl %eax, %ecx
750 ; AVX512VL-NEXT: shrl $8, %ecx
751 ; AVX512VL-NEXT: xorb %al, %cl
752 ; AVX512VL-NEXT: setnp %al
753 ; AVX512VL-NEXT: vzeroupper
754 ; AVX512VL-NEXT: retq
755 %a = trunc <16 x i32> %0 to <16 x i1>
756 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
760 define i1 @trunc_v32i16_v32i1(<32 x i16>) nounwind {
761 ; X86-SSE2-LABEL: trunc_v32i16_v32i1:
763 ; X86-SSE2-NEXT: pushl %ebp
764 ; X86-SSE2-NEXT: movl %esp, %ebp
765 ; X86-SSE2-NEXT: andl $-16, %esp
766 ; X86-SSE2-NEXT: subl $16, %esp
767 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
768 ; X86-SSE2-NEXT: pand %xmm3, %xmm1
769 ; X86-SSE2-NEXT: pand %xmm3, %xmm0
770 ; X86-SSE2-NEXT: packuswb %xmm1, %xmm0
771 ; X86-SSE2-NEXT: pand %xmm3, %xmm2
772 ; X86-SSE2-NEXT: pand 8(%ebp), %xmm3
773 ; X86-SSE2-NEXT: packuswb %xmm3, %xmm2
774 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2
775 ; X86-SSE2-NEXT: psllw $7, %xmm2
776 ; X86-SSE2-NEXT: pmovmskb %xmm2, %eax
777 ; X86-SSE2-NEXT: xorb %ah, %al
778 ; X86-SSE2-NEXT: setnp %al
779 ; X86-SSE2-NEXT: movl %ebp, %esp
780 ; X86-SSE2-NEXT: popl %ebp
781 ; X86-SSE2-NEXT: retl
783 ; X64-SSE2-LABEL: trunc_v32i16_v32i1:
785 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
786 ; X64-SSE2-NEXT: pand %xmm4, %xmm3
787 ; X64-SSE2-NEXT: pand %xmm4, %xmm2
788 ; X64-SSE2-NEXT: packuswb %xmm3, %xmm2
789 ; X64-SSE2-NEXT: pand %xmm4, %xmm1
790 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
791 ; X64-SSE2-NEXT: packuswb %xmm1, %xmm0
792 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0
793 ; X64-SSE2-NEXT: psllw $7, %xmm0
794 ; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
795 ; X64-SSE2-NEXT: xorb %ah, %al
796 ; X64-SSE2-NEXT: setnp %al
797 ; X64-SSE2-NEXT: retq
799 ; SSE41-LABEL: trunc_v32i16_v32i1:
801 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
802 ; SSE41-NEXT: pand %xmm4, %xmm3
803 ; SSE41-NEXT: pand %xmm4, %xmm2
804 ; SSE41-NEXT: packuswb %xmm3, %xmm2
805 ; SSE41-NEXT: pand %xmm4, %xmm1
806 ; SSE41-NEXT: pand %xmm4, %xmm0
807 ; SSE41-NEXT: packuswb %xmm1, %xmm0
808 ; SSE41-NEXT: pxor %xmm2, %xmm0
809 ; SSE41-NEXT: psllw $7, %xmm0
810 ; SSE41-NEXT: pmovmskb %xmm0, %eax
811 ; SSE41-NEXT: xorb %ah, %al
812 ; SSE41-NEXT: setnp %al
815 ; AVX1-LABEL: trunc_v32i16_v32i1:
817 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
818 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
819 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
820 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
821 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
822 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
823 ; AVX1-NEXT: xorb %ah, %al
824 ; AVX1-NEXT: setnp %al
825 ; AVX1-NEXT: vzeroupper
828 ; AVX2-LABEL: trunc_v32i16_v32i1:
830 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
831 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
832 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
833 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
834 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
835 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
836 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
837 ; AVX2-NEXT: movl %eax, %ecx
838 ; AVX2-NEXT: shrl $16, %ecx
839 ; AVX2-NEXT: xorl %eax, %ecx
840 ; AVX2-NEXT: xorb %ch, %cl
841 ; AVX2-NEXT: setnp %al
842 ; AVX2-NEXT: vzeroupper
845 ; AVX512F-LABEL: trunc_v32i16_v32i1:
847 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
848 ; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0
849 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
850 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
851 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
852 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
853 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
854 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
855 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
856 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
857 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
858 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
859 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
860 ; AVX512F-NEXT: kmovw %k0, %eax
861 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
862 ; AVX512F-NEXT: vzeroupper
865 ; AVX512BW-LABEL: trunc_v32i16_v32i1:
867 ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
868 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
869 ; AVX512BW-NEXT: kmovd %k0, %eax
870 ; AVX512BW-NEXT: movl %eax, %ecx
871 ; AVX512BW-NEXT: shrl $16, %ecx
872 ; AVX512BW-NEXT: xorl %eax, %ecx
873 ; AVX512BW-NEXT: xorb %ch, %cl
874 ; AVX512BW-NEXT: setnp %al
875 ; AVX512BW-NEXT: vzeroupper
876 ; AVX512BW-NEXT: retq
878 ; AVX512VL-LABEL: trunc_v32i16_v32i1:
880 ; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0
881 ; AVX512VL-NEXT: vpmovw2m %zmm0, %k0
882 ; AVX512VL-NEXT: kmovd %k0, %eax
883 ; AVX512VL-NEXT: movl %eax, %ecx
884 ; AVX512VL-NEXT: shrl $16, %ecx
885 ; AVX512VL-NEXT: xorl %eax, %ecx
886 ; AVX512VL-NEXT: xorb %ch, %cl
887 ; AVX512VL-NEXT: setnp %al
888 ; AVX512VL-NEXT: vzeroupper
889 ; AVX512VL-NEXT: retq
890 %a = trunc <32 x i16> %0 to <32 x i1>
891 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
895 define i1 @trunc_v64i8_v64i1(<64 x i8>) nounwind {
896 ; X86-SSE2-LABEL: trunc_v64i8_v64i1:
898 ; X86-SSE2-NEXT: pushl %ebp
899 ; X86-SSE2-NEXT: movl %esp, %ebp
900 ; X86-SSE2-NEXT: andl $-16, %esp
901 ; X86-SSE2-NEXT: subl $16, %esp
902 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0
903 ; X86-SSE2-NEXT: pxor 8(%ebp), %xmm1
904 ; X86-SSE2-NEXT: pxor %xmm0, %xmm1
905 ; X86-SSE2-NEXT: psllw $7, %xmm1
906 ; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
907 ; X86-SSE2-NEXT: xorb %ah, %al
908 ; X86-SSE2-NEXT: setnp %al
909 ; X86-SSE2-NEXT: movl %ebp, %esp
910 ; X86-SSE2-NEXT: popl %ebp
911 ; X86-SSE2-NEXT: retl
913 ; X64-SSE-LABEL: trunc_v64i8_v64i1:
915 ; X64-SSE-NEXT: pxor %xmm3, %xmm1
916 ; X64-SSE-NEXT: pxor %xmm2, %xmm0
917 ; X64-SSE-NEXT: pxor %xmm1, %xmm0
918 ; X64-SSE-NEXT: psllw $7, %xmm0
919 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
920 ; X64-SSE-NEXT: xorb %ah, %al
921 ; X64-SSE-NEXT: setnp %al
924 ; AVX1-LABEL: trunc_v64i8_v64i1:
926 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
927 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
928 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
929 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
930 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
931 ; AVX1-NEXT: xorb %ah, %al
932 ; AVX1-NEXT: setnp %al
933 ; AVX1-NEXT: vzeroupper
936 ; AVX2-LABEL: trunc_v64i8_v64i1:
938 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
939 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
940 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
941 ; AVX2-NEXT: movl %eax, %ecx
942 ; AVX2-NEXT: shrl $16, %ecx
943 ; AVX2-NEXT: xorl %eax, %ecx
944 ; AVX2-NEXT: xorb %ch, %cl
945 ; AVX2-NEXT: setnp %al
946 ; AVX2-NEXT: vzeroupper
949 ; AVX512F-LABEL: trunc_v64i8_v64i1:
951 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
952 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
953 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
954 ; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2
955 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
956 ; AVX512F-NEXT: vpxor %xmm2, %xmm0, %xmm0
957 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
958 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
959 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
960 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
961 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
962 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
963 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
964 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
965 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
966 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
967 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
968 ; AVX512F-NEXT: kmovw %k0, %eax
969 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
970 ; AVX512F-NEXT: vzeroupper
973 ; AVX512BW-LABEL: trunc_v64i8_v64i1:
975 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
976 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
977 ; AVX512BW-NEXT: kmovq %k0, %rax
978 ; AVX512BW-NEXT: movq %rax, %rcx
979 ; AVX512BW-NEXT: shrq $32, %rcx
980 ; AVX512BW-NEXT: xorl %eax, %ecx
981 ; AVX512BW-NEXT: movl %ecx, %eax
982 ; AVX512BW-NEXT: shrl $16, %eax
983 ; AVX512BW-NEXT: xorl %ecx, %eax
984 ; AVX512BW-NEXT: xorb %ah, %al
985 ; AVX512BW-NEXT: setnp %al
986 ; AVX512BW-NEXT: vzeroupper
987 ; AVX512BW-NEXT: retq
989 ; AVX512VL-LABEL: trunc_v64i8_v64i1:
991 ; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0
992 ; AVX512VL-NEXT: vpmovb2m %zmm0, %k0
993 ; AVX512VL-NEXT: kmovq %k0, %rax
994 ; AVX512VL-NEXT: movq %rax, %rcx
995 ; AVX512VL-NEXT: shrq $32, %rcx
996 ; AVX512VL-NEXT: xorl %eax, %ecx
997 ; AVX512VL-NEXT: movl %ecx, %eax
998 ; AVX512VL-NEXT: shrl $16, %eax
999 ; AVX512VL-NEXT: xorl %ecx, %eax
1000 ; AVX512VL-NEXT: xorb %ah, %al
1001 ; AVX512VL-NEXT: setnp %al
1002 ; AVX512VL-NEXT: vzeroupper
1003 ; AVX512VL-NEXT: retq
1004 %a = trunc <64 x i8> %0 to <64 x i1>
1005 %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
1010 ; Comparison With Zero
1013 define i1 @icmp0_v2i64_v2i1(<2 x i64>) nounwind {
1014 ; SSE2-LABEL: icmp0_v2i64_v2i1:
1016 ; SSE2-NEXT: pxor %xmm1, %xmm1
1017 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1018 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
1019 ; SSE2-NEXT: pand %xmm1, %xmm0
1020 ; SSE2-NEXT: movmskpd %xmm0, %eax
1021 ; SSE2-NEXT: testb %al, %al
1022 ; SSE2-NEXT: setnp %al
1023 ; SSE2-NEXT: ret{{[l|q]}}
1025 ; SSE41-LABEL: icmp0_v2i64_v2i1:
1027 ; SSE41-NEXT: pxor %xmm1, %xmm1
1028 ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
1029 ; SSE41-NEXT: movmskpd %xmm1, %eax
1030 ; SSE41-NEXT: testb %al, %al
1031 ; SSE41-NEXT: setnp %al
1034 ; AVX-LABEL: icmp0_v2i64_v2i1:
1036 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1037 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
1038 ; AVX-NEXT: vmovmskpd %xmm0, %eax
1039 ; AVX-NEXT: testb %al, %al
1040 ; AVX-NEXT: setnp %al
1043 ; AVX512F-LABEL: icmp0_v2i64_v2i1:
1045 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1046 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1047 ; AVX512F-NEXT: kmovw %k0, %eax
1048 ; AVX512F-NEXT: testb $3, %al
1049 ; AVX512F-NEXT: setnp %al
1050 ; AVX512F-NEXT: vzeroupper
1051 ; AVX512F-NEXT: retq
1053 ; AVX512BW-LABEL: icmp0_v2i64_v2i1:
1054 ; AVX512BW: # %bb.0:
1055 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1056 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1057 ; AVX512BW-NEXT: kmovd %k0, %eax
1058 ; AVX512BW-NEXT: testb $3, %al
1059 ; AVX512BW-NEXT: setnp %al
1060 ; AVX512BW-NEXT: vzeroupper
1061 ; AVX512BW-NEXT: retq
1063 ; AVX512VL-LABEL: icmp0_v2i64_v2i1:
1064 ; AVX512VL: # %bb.0:
1065 ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
1066 ; AVX512VL-NEXT: kmovd %k0, %eax
1067 ; AVX512VL-NEXT: testb %al, %al
1068 ; AVX512VL-NEXT: setnp %al
1069 ; AVX512VL-NEXT: retq
1070 %a = icmp eq <2 x i64> %0, zeroinitializer
1071 %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
1075 define i1 @icmp0_v4i32_v4i1(<4 x i32>) nounwind {
1076 ; SSE-LABEL: icmp0_v4i32_v4i1:
1078 ; SSE-NEXT: pxor %xmm1, %xmm1
1079 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1
1080 ; SSE-NEXT: movmskps %xmm1, %eax
1081 ; SSE-NEXT: testb %al, %al
1082 ; SSE-NEXT: setnp %al
1083 ; SSE-NEXT: ret{{[l|q]}}
1085 ; AVX-LABEL: icmp0_v4i32_v4i1:
1087 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1088 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1089 ; AVX-NEXT: vmovmskps %xmm0, %eax
1090 ; AVX-NEXT: testb %al, %al
1091 ; AVX-NEXT: setnp %al
1094 ; AVX512F-LABEL: icmp0_v4i32_v4i1:
1096 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1097 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1098 ; AVX512F-NEXT: kmovw %k0, %eax
1099 ; AVX512F-NEXT: testb $15, %al
1100 ; AVX512F-NEXT: setnp %al
1101 ; AVX512F-NEXT: vzeroupper
1102 ; AVX512F-NEXT: retq
1104 ; AVX512BW-LABEL: icmp0_v4i32_v4i1:
1105 ; AVX512BW: # %bb.0:
1106 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1107 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1108 ; AVX512BW-NEXT: kmovd %k0, %eax
1109 ; AVX512BW-NEXT: testb $15, %al
1110 ; AVX512BW-NEXT: setnp %al
1111 ; AVX512BW-NEXT: vzeroupper
1112 ; AVX512BW-NEXT: retq
1114 ; AVX512VL-LABEL: icmp0_v4i32_v4i1:
1115 ; AVX512VL: # %bb.0:
1116 ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
1117 ; AVX512VL-NEXT: kmovd %k0, %eax
1118 ; AVX512VL-NEXT: testb %al, %al
1119 ; AVX512VL-NEXT: setnp %al
1120 ; AVX512VL-NEXT: retq
1121 %a = icmp eq <4 x i32> %0, zeroinitializer
1122 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
1126 define i1 @icmp0_v8i16_v8i1(<8 x i16>) nounwind {
1127 ; SSE-LABEL: icmp0_v8i16_v8i1:
1129 ; SSE-NEXT: pxor %xmm1, %xmm1
1130 ; SSE-NEXT: pcmpeqw %xmm0, %xmm1
1131 ; SSE-NEXT: packsswb %xmm1, %xmm1
1132 ; SSE-NEXT: pmovmskb %xmm1, %eax
1133 ; SSE-NEXT: testb %al, %al
1134 ; SSE-NEXT: setnp %al
1135 ; SSE-NEXT: ret{{[l|q]}}
1137 ; AVX-LABEL: icmp0_v8i16_v8i1:
1139 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1140 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1141 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1142 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1143 ; AVX-NEXT: testb %al, %al
1144 ; AVX-NEXT: setnp %al
1147 ; AVX512F-LABEL: icmp0_v8i16_v8i1:
1149 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1150 ; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1151 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
1152 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
1153 ; AVX512F-NEXT: kmovw %k0, %eax
1154 ; AVX512F-NEXT: testb %al, %al
1155 ; AVX512F-NEXT: setnp %al
1156 ; AVX512F-NEXT: vzeroupper
1157 ; AVX512F-NEXT: retq
1159 ; AVX512BW-LABEL: icmp0_v8i16_v8i1:
1160 ; AVX512BW: # %bb.0:
1161 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1162 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1163 ; AVX512BW-NEXT: kmovd %k0, %eax
1164 ; AVX512BW-NEXT: testb %al, %al
1165 ; AVX512BW-NEXT: setnp %al
1166 ; AVX512BW-NEXT: vzeroupper
1167 ; AVX512BW-NEXT: retq
1169 ; AVX512VL-LABEL: icmp0_v8i16_v8i1:
1170 ; AVX512VL: # %bb.0:
1171 ; AVX512VL-NEXT: vptestnmw %xmm0, %xmm0, %k0
1172 ; AVX512VL-NEXT: kmovd %k0, %eax
1173 ; AVX512VL-NEXT: testb %al, %al
1174 ; AVX512VL-NEXT: setnp %al
1175 ; AVX512VL-NEXT: retq
1176 %a = icmp eq <8 x i16> %0, zeroinitializer
1177 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
1181 define i1 @icmp0_v16i8_v16i1(<16 x i8>) nounwind {
1182 ; SSE-LABEL: icmp0_v16i8_v16i1:
1184 ; SSE-NEXT: pxor %xmm1, %xmm1
1185 ; SSE-NEXT: pcmpeqb %xmm0, %xmm1
1186 ; SSE-NEXT: pmovmskb %xmm1, %eax
1187 ; SSE-NEXT: xorb %ah, %al
1188 ; SSE-NEXT: setnp %al
1189 ; SSE-NEXT: ret{{[l|q]}}
1191 ; AVX-LABEL: icmp0_v16i8_v16i1:
1193 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1194 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1195 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1196 ; AVX-NEXT: xorb %ah, %al
1197 ; AVX-NEXT: setnp %al
1200 ; AVX512F-LABEL: icmp0_v16i8_v16i1:
1202 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1203 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1204 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
1205 ; AVX512F-NEXT: xorb %ah, %al
1206 ; AVX512F-NEXT: setnp %al
1207 ; AVX512F-NEXT: retq
1209 ; AVX512BW-LABEL: icmp0_v16i8_v16i1:
1210 ; AVX512BW: # %bb.0:
1211 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1212 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1213 ; AVX512BW-NEXT: kmovd %k0, %eax
1214 ; AVX512BW-NEXT: movl %eax, %ecx
1215 ; AVX512BW-NEXT: shrl $8, %ecx
1216 ; AVX512BW-NEXT: xorb %al, %cl
1217 ; AVX512BW-NEXT: setnp %al
1218 ; AVX512BW-NEXT: vzeroupper
1219 ; AVX512BW-NEXT: retq
1221 ; AVX512VL-LABEL: icmp0_v16i8_v16i1:
1222 ; AVX512VL: # %bb.0:
1223 ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
1224 ; AVX512VL-NEXT: kmovd %k0, %eax
1225 ; AVX512VL-NEXT: movl %eax, %ecx
1226 ; AVX512VL-NEXT: shrl $8, %ecx
1227 ; AVX512VL-NEXT: xorb %al, %cl
1228 ; AVX512VL-NEXT: setnp %al
1229 ; AVX512VL-NEXT: retq
1230 %a = icmp eq <16 x i8> %0, zeroinitializer
1231 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
1235 define i1 @icmp0_v4i64_v4i1(<4 x i64>) nounwind {
1236 ; SSE2-LABEL: icmp0_v4i64_v4i1:
1238 ; SSE2-NEXT: pxor %xmm2, %xmm2
1239 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
1240 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
1241 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1242 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
1243 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1244 ; SSE2-NEXT: andps %xmm2, %xmm0
1245 ; SSE2-NEXT: movmskps %xmm0, %eax
1246 ; SSE2-NEXT: testb %al, %al
1247 ; SSE2-NEXT: setnp %al
1248 ; SSE2-NEXT: ret{{[l|q]}}
1250 ; SSE41-LABEL: icmp0_v4i64_v4i1:
1252 ; SSE41-NEXT: pxor %xmm2, %xmm2
1253 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm1
1254 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
1255 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1256 ; SSE41-NEXT: movmskps %xmm0, %eax
1257 ; SSE41-NEXT: testb %al, %al
1258 ; SSE41-NEXT: setnp %al
1261 ; AVX1-LABEL: icmp0_v4i64_v4i1:
1263 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1264 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1265 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
1266 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
1267 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1268 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
1269 ; AVX1-NEXT: testb %al, %al
1270 ; AVX1-NEXT: setnp %al
1271 ; AVX1-NEXT: vzeroupper
1274 ; AVX2-LABEL: icmp0_v4i64_v4i1:
1276 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1277 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
1278 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
1279 ; AVX2-NEXT: testb %al, %al
1280 ; AVX2-NEXT: setnp %al
1281 ; AVX2-NEXT: vzeroupper
1284 ; AVX512F-LABEL: icmp0_v4i64_v4i1:
1286 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1287 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1288 ; AVX512F-NEXT: kmovw %k0, %eax
1289 ; AVX512F-NEXT: testb $15, %al
1290 ; AVX512F-NEXT: setnp %al
1291 ; AVX512F-NEXT: vzeroupper
1292 ; AVX512F-NEXT: retq
1294 ; AVX512BW-LABEL: icmp0_v4i64_v4i1:
1295 ; AVX512BW: # %bb.0:
1296 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1297 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1298 ; AVX512BW-NEXT: kmovd %k0, %eax
1299 ; AVX512BW-NEXT: testb $15, %al
1300 ; AVX512BW-NEXT: setnp %al
1301 ; AVX512BW-NEXT: vzeroupper
1302 ; AVX512BW-NEXT: retq
1304 ; AVX512VL-LABEL: icmp0_v4i64_v4i1:
1305 ; AVX512VL: # %bb.0:
1306 ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
1307 ; AVX512VL-NEXT: kmovd %k0, %eax
1308 ; AVX512VL-NEXT: testb %al, %al
1309 ; AVX512VL-NEXT: setnp %al
1310 ; AVX512VL-NEXT: vzeroupper
1311 ; AVX512VL-NEXT: retq
1312 %a = icmp eq <4 x i64> %0, zeroinitializer
1313 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
1317 define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind {
1318 ; SSE-LABEL: icmp0_v8i32_v8i1:
1320 ; SSE-NEXT: pxor %xmm2, %xmm2
1321 ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
1322 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
1323 ; SSE-NEXT: packssdw %xmm1, %xmm0
1324 ; SSE-NEXT: packsswb %xmm0, %xmm0
1325 ; SSE-NEXT: pmovmskb %xmm0, %eax
1326 ; SSE-NEXT: testb %al, %al
1327 ; SSE-NEXT: setnp %al
1328 ; SSE-NEXT: ret{{[l|q]}}
1330 ; AVX1-LABEL: icmp0_v8i32_v8i1:
1332 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1333 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1334 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
1335 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1336 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1337 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1338 ; AVX1-NEXT: testb %al, %al
1339 ; AVX1-NEXT: setnp %al
1340 ; AVX1-NEXT: vzeroupper
1343 ; AVX2-LABEL: icmp0_v8i32_v8i1:
1345 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1346 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
1347 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1348 ; AVX2-NEXT: testb %al, %al
1349 ; AVX2-NEXT: setnp %al
1350 ; AVX2-NEXT: vzeroupper
1353 ; AVX512F-LABEL: icmp0_v8i32_v8i1:
1355 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1356 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1357 ; AVX512F-NEXT: kmovw %k0, %eax
1358 ; AVX512F-NEXT: testb %al, %al
1359 ; AVX512F-NEXT: setnp %al
1360 ; AVX512F-NEXT: vzeroupper
1361 ; AVX512F-NEXT: retq
1363 ; AVX512BW-LABEL: icmp0_v8i32_v8i1:
1364 ; AVX512BW: # %bb.0:
1365 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1366 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1367 ; AVX512BW-NEXT: kmovd %k0, %eax
1368 ; AVX512BW-NEXT: testb %al, %al
1369 ; AVX512BW-NEXT: setnp %al
1370 ; AVX512BW-NEXT: vzeroupper
1371 ; AVX512BW-NEXT: retq
1373 ; AVX512VL-LABEL: icmp0_v8i32_v8i1:
1374 ; AVX512VL: # %bb.0:
1375 ; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
1376 ; AVX512VL-NEXT: kmovd %k0, %eax
1377 ; AVX512VL-NEXT: testb %al, %al
1378 ; AVX512VL-NEXT: setnp %al
1379 ; AVX512VL-NEXT: vzeroupper
1380 ; AVX512VL-NEXT: retq
1381 %a = icmp eq <8 x i32> %0, zeroinitializer
1382 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
1386 define i1 @icmp0_v16i16_v16i1(<16 x i16>) nounwind {
1387 ; SSE-LABEL: icmp0_v16i16_v16i1:
1389 ; SSE-NEXT: pxor %xmm2, %xmm2
1390 ; SSE-NEXT: pcmpeqw %xmm2, %xmm1
1391 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1392 ; SSE-NEXT: packsswb %xmm1, %xmm0
1393 ; SSE-NEXT: pmovmskb %xmm0, %eax
1394 ; SSE-NEXT: xorb %ah, %al
1395 ; SSE-NEXT: setnp %al
1396 ; SSE-NEXT: ret{{[l|q]}}
1398 ; AVX1-LABEL: icmp0_v16i16_v16i1:
1400 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1401 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1402 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
1403 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1404 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1405 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1406 ; AVX1-NEXT: xorb %ah, %al
1407 ; AVX1-NEXT: setnp %al
1408 ; AVX1-NEXT: vzeroupper
1411 ; AVX2-LABEL: icmp0_v16i16_v16i1:
1413 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1414 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1415 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1416 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1417 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1418 ; AVX2-NEXT: xorb %ah, %al
1419 ; AVX2-NEXT: setnp %al
1420 ; AVX2-NEXT: vzeroupper
1423 ; AVX512F-LABEL: icmp0_v16i16_v16i1:
1425 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1426 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1427 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1428 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1429 ; AVX512F-NEXT: kmovw %k0, %eax
1430 ; AVX512F-NEXT: movl %eax, %ecx
1431 ; AVX512F-NEXT: shrl $8, %ecx
1432 ; AVX512F-NEXT: xorb %al, %cl
1433 ; AVX512F-NEXT: setnp %al
1434 ; AVX512F-NEXT: vzeroupper
1435 ; AVX512F-NEXT: retq
1437 ; AVX512BW-LABEL: icmp0_v16i16_v16i1:
1438 ; AVX512BW: # %bb.0:
1439 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1440 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1441 ; AVX512BW-NEXT: kmovd %k0, %eax
1442 ; AVX512BW-NEXT: movl %eax, %ecx
1443 ; AVX512BW-NEXT: shrl $8, %ecx
1444 ; AVX512BW-NEXT: xorb %al, %cl
1445 ; AVX512BW-NEXT: setnp %al
1446 ; AVX512BW-NEXT: vzeroupper
1447 ; AVX512BW-NEXT: retq
1449 ; AVX512VL-LABEL: icmp0_v16i16_v16i1:
1450 ; AVX512VL: # %bb.0:
1451 ; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0
1452 ; AVX512VL-NEXT: kmovd %k0, %eax
1453 ; AVX512VL-NEXT: movl %eax, %ecx
1454 ; AVX512VL-NEXT: shrl $8, %ecx
1455 ; AVX512VL-NEXT: xorb %al, %cl
1456 ; AVX512VL-NEXT: setnp %al
1457 ; AVX512VL-NEXT: vzeroupper
1458 ; AVX512VL-NEXT: retq
1459 %a = icmp eq <16 x i16> %0, zeroinitializer
1460 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
1464 define i1 @icmp0_v32i8_v32i1(<32 x i8>) nounwind {
1465 ; SSE-LABEL: icmp0_v32i8_v32i1:
1467 ; SSE-NEXT: pxor %xmm2, %xmm2
1468 ; SSE-NEXT: pcmpeqb %xmm2, %xmm1
1469 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
1470 ; SSE-NEXT: pxor %xmm1, %xmm0
1471 ; SSE-NEXT: pmovmskb %xmm0, %eax
1472 ; SSE-NEXT: xorb %ah, %al
1473 ; SSE-NEXT: setnp %al
1474 ; SSE-NEXT: ret{{[l|q]}}
1476 ; AVX1-LABEL: icmp0_v32i8_v32i1:
1478 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1479 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1480 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1481 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1482 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1483 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1484 ; AVX1-NEXT: xorb %ah, %al
1485 ; AVX1-NEXT: setnp %al
1486 ; AVX1-NEXT: vzeroupper
1489 ; AVX2-LABEL: icmp0_v32i8_v32i1:
1491 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1492 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1493 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1494 ; AVX2-NEXT: movl %eax, %ecx
1495 ; AVX2-NEXT: shrl $16, %ecx
1496 ; AVX2-NEXT: xorl %eax, %ecx
1497 ; AVX2-NEXT: xorb %ch, %cl
1498 ; AVX2-NEXT: setnp %al
1499 ; AVX2-NEXT: vzeroupper
1502 ; AVX512F-LABEL: icmp0_v32i8_v32i1:
1504 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1505 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
1506 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1507 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
1508 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1509 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1510 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1511 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1512 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1513 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1514 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1515 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1516 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1517 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1518 ; AVX512F-NEXT: kmovw %k0, %eax
1519 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1520 ; AVX512F-NEXT: vzeroupper
1521 ; AVX512F-NEXT: retq
1523 ; AVX512BW-LABEL: icmp0_v32i8_v32i1:
1524 ; AVX512BW: # %bb.0:
1525 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1526 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
1527 ; AVX512BW-NEXT: kmovd %k0, %eax
1528 ; AVX512BW-NEXT: movl %eax, %ecx
1529 ; AVX512BW-NEXT: shrl $16, %ecx
1530 ; AVX512BW-NEXT: xorl %eax, %ecx
1531 ; AVX512BW-NEXT: xorb %ch, %cl
1532 ; AVX512BW-NEXT: setnp %al
1533 ; AVX512BW-NEXT: vzeroupper
1534 ; AVX512BW-NEXT: retq
1536 ; AVX512VL-LABEL: icmp0_v32i8_v32i1:
1537 ; AVX512VL: # %bb.0:
1538 ; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0
1539 ; AVX512VL-NEXT: kmovd %k0, %eax
1540 ; AVX512VL-NEXT: movl %eax, %ecx
1541 ; AVX512VL-NEXT: shrl $16, %ecx
1542 ; AVX512VL-NEXT: xorl %eax, %ecx
1543 ; AVX512VL-NEXT: xorb %ch, %cl
1544 ; AVX512VL-NEXT: setnp %al
1545 ; AVX512VL-NEXT: vzeroupper
1546 ; AVX512VL-NEXT: retq
1547 %a = icmp eq <32 x i8> %0, zeroinitializer
1548 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
1552 define i1 @icmp0_v8i64_v8i1(<8 x i64>) nounwind {
1553 ; X86-SSE2-LABEL: icmp0_v8i64_v8i1:
1554 ; X86-SSE2: # %bb.0:
1555 ; X86-SSE2-NEXT: pushl %ebp
1556 ; X86-SSE2-NEXT: movl %esp, %ebp
1557 ; X86-SSE2-NEXT: andl $-16, %esp
1558 ; X86-SSE2-NEXT: subl $16, %esp
1559 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1560 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm1
1561 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,0,3,2]
1562 ; X86-SSE2-NEXT: pand %xmm1, %xmm4
1563 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
1564 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1565 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1566 ; X86-SSE2-NEXT: packssdw %xmm4, %xmm1
1567 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1568 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
1569 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
1570 ; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm3
1571 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
1572 ; X86-SSE2-NEXT: pand %xmm3, %xmm2
1573 ; X86-SSE2-NEXT: packssdw %xmm2, %xmm0
1574 ; X86-SSE2-NEXT: packssdw %xmm0, %xmm1
1575 ; X86-SSE2-NEXT: packsswb %xmm1, %xmm1
1576 ; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
1577 ; X86-SSE2-NEXT: testb %al, %al
1578 ; X86-SSE2-NEXT: setnp %al
1579 ; X86-SSE2-NEXT: movl %ebp, %esp
1580 ; X86-SSE2-NEXT: popl %ebp
1581 ; X86-SSE2-NEXT: retl
1583 ; X64-SSE2-LABEL: icmp0_v8i64_v8i1:
1584 ; X64-SSE2: # %bb.0:
1585 ; X64-SSE2-NEXT: pxor %xmm4, %xmm4
1586 ; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1587 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
1588 ; X64-SSE2-NEXT: pand %xmm3, %xmm5
1589 ; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1590 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
1591 ; X64-SSE2-NEXT: pand %xmm2, %xmm3
1592 ; X64-SSE2-NEXT: packssdw %xmm5, %xmm3
1593 ; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1594 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
1595 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
1596 ; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1597 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1598 ; X64-SSE2-NEXT: pand %xmm0, %xmm1
1599 ; X64-SSE2-NEXT: packssdw %xmm2, %xmm1
1600 ; X64-SSE2-NEXT: packssdw %xmm3, %xmm1
1601 ; X64-SSE2-NEXT: packsswb %xmm1, %xmm1
1602 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
1603 ; X64-SSE2-NEXT: testb %al, %al
1604 ; X64-SSE2-NEXT: setnp %al
1605 ; X64-SSE2-NEXT: retq
1607 ; SSE41-LABEL: icmp0_v8i64_v8i1:
1609 ; SSE41-NEXT: pxor %xmm4, %xmm4
1610 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm3
1611 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm2
1612 ; SSE41-NEXT: packssdw %xmm3, %xmm2
1613 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
1614 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
1615 ; SSE41-NEXT: packssdw %xmm1, %xmm0
1616 ; SSE41-NEXT: packssdw %xmm2, %xmm0
1617 ; SSE41-NEXT: packsswb %xmm0, %xmm0
1618 ; SSE41-NEXT: pmovmskb %xmm0, %eax
1619 ; SSE41-NEXT: testb %al, %al
1620 ; SSE41-NEXT: setnp %al
1623 ; AVX1-LABEL: icmp0_v8i64_v8i1:
1625 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1626 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1627 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1628 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
1629 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1630 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1631 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
1632 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1633 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1634 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1635 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1636 ; AVX1-NEXT: testb %al, %al
1637 ; AVX1-NEXT: setnp %al
1638 ; AVX1-NEXT: vzeroupper
1641 ; AVX2-LABEL: icmp0_v8i64_v8i1:
1643 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1644 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
1645 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
1646 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1647 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1648 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1649 ; AVX2-NEXT: testb %al, %al
1650 ; AVX2-NEXT: setnp %al
1651 ; AVX2-NEXT: vzeroupper
1654 ; AVX512F-LABEL: icmp0_v8i64_v8i1:
1656 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
1657 ; AVX512F-NEXT: kmovw %k0, %eax
1658 ; AVX512F-NEXT: testb %al, %al
1659 ; AVX512F-NEXT: setnp %al
1660 ; AVX512F-NEXT: vzeroupper
1661 ; AVX512F-NEXT: retq
1663 ; AVX512BW-LABEL: icmp0_v8i64_v8i1:
1664 ; AVX512BW: # %bb.0:
1665 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
1666 ; AVX512BW-NEXT: kmovd %k0, %eax
1667 ; AVX512BW-NEXT: testb %al, %al
1668 ; AVX512BW-NEXT: setnp %al
1669 ; AVX512BW-NEXT: vzeroupper
1670 ; AVX512BW-NEXT: retq
1672 ; AVX512VL-LABEL: icmp0_v8i64_v8i1:
1673 ; AVX512VL: # %bb.0:
1674 ; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0
1675 ; AVX512VL-NEXT: kmovd %k0, %eax
1676 ; AVX512VL-NEXT: testb %al, %al
1677 ; AVX512VL-NEXT: setnp %al
1678 ; AVX512VL-NEXT: vzeroupper
1679 ; AVX512VL-NEXT: retq
1680 %a = icmp eq <8 x i64> %0, zeroinitializer
1681 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
1685 define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind {
1686 ; X86-SSE2-LABEL: icmp0_v16i32_v16i1:
1687 ; X86-SSE2: # %bb.0:
1688 ; X86-SSE2-NEXT: pushl %ebp
1689 ; X86-SSE2-NEXT: movl %esp, %ebp
1690 ; X86-SSE2-NEXT: andl $-16, %esp
1691 ; X86-SSE2-NEXT: subl $16, %esp
1692 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1693 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm1
1694 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
1695 ; X86-SSE2-NEXT: packssdw %xmm1, %xmm0
1696 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1697 ; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm3
1698 ; X86-SSE2-NEXT: packssdw %xmm3, %xmm2
1699 ; X86-SSE2-NEXT: packsswb %xmm2, %xmm0
1700 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
1701 ; X86-SSE2-NEXT: xorb %ah, %al
1702 ; X86-SSE2-NEXT: setnp %al
1703 ; X86-SSE2-NEXT: movl %ebp, %esp
1704 ; X86-SSE2-NEXT: popl %ebp
1705 ; X86-SSE2-NEXT: retl
1707 ; X64-SSE-LABEL: icmp0_v16i32_v16i1:
1709 ; X64-SSE-NEXT: pxor %xmm4, %xmm4
1710 ; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm3
1711 ; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm2
1712 ; X64-SSE-NEXT: packssdw %xmm3, %xmm2
1713 ; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm1
1714 ; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm0
1715 ; X64-SSE-NEXT: packssdw %xmm1, %xmm0
1716 ; X64-SSE-NEXT: packsswb %xmm2, %xmm0
1717 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
1718 ; X64-SSE-NEXT: xorb %ah, %al
1719 ; X64-SSE-NEXT: setnp %al
1720 ; X64-SSE-NEXT: retq
1722 ; AVX1-LABEL: icmp0_v16i32_v16i1:
1724 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1725 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1726 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1727 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
1728 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1729 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1730 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1731 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1732 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1733 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1734 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1735 ; AVX1-NEXT: xorb %ah, %al
1736 ; AVX1-NEXT: setnp %al
1737 ; AVX1-NEXT: vzeroupper
1740 ; AVX2-LABEL: icmp0_v16i32_v16i1:
1742 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1743 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
1744 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
1745 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1746 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1747 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1748 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1749 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1750 ; AVX2-NEXT: xorb %ah, %al
1751 ; AVX2-NEXT: setnp %al
1752 ; AVX2-NEXT: vzeroupper
1755 ; AVX512F-LABEL: icmp0_v16i32_v16i1:
1757 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
1758 ; AVX512F-NEXT: kmovw %k0, %eax
1759 ; AVX512F-NEXT: movl %eax, %ecx
1760 ; AVX512F-NEXT: shrl $8, %ecx
1761 ; AVX512F-NEXT: xorb %al, %cl
1762 ; AVX512F-NEXT: setnp %al
1763 ; AVX512F-NEXT: vzeroupper
1764 ; AVX512F-NEXT: retq
1766 ; AVX512BW-LABEL: icmp0_v16i32_v16i1:
1767 ; AVX512BW: # %bb.0:
1768 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
1769 ; AVX512BW-NEXT: kmovd %k0, %eax
1770 ; AVX512BW-NEXT: movl %eax, %ecx
1771 ; AVX512BW-NEXT: shrl $8, %ecx
1772 ; AVX512BW-NEXT: xorb %al, %cl
1773 ; AVX512BW-NEXT: setnp %al
1774 ; AVX512BW-NEXT: vzeroupper
1775 ; AVX512BW-NEXT: retq
1777 ; AVX512VL-LABEL: icmp0_v16i32_v16i1:
1778 ; AVX512VL: # %bb.0:
1779 ; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k0
1780 ; AVX512VL-NEXT: kmovd %k0, %eax
1781 ; AVX512VL-NEXT: movl %eax, %ecx
1782 ; AVX512VL-NEXT: shrl $8, %ecx
1783 ; AVX512VL-NEXT: xorb %al, %cl
1784 ; AVX512VL-NEXT: setnp %al
1785 ; AVX512VL-NEXT: vzeroupper
1786 ; AVX512VL-NEXT: retq
1787 %a = icmp eq <16 x i32> %0, zeroinitializer
1788 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
1792 define i1 @icmp0_v32i16_v32i1(<32 x i16>) nounwind {
1793 ; X86-SSE2-LABEL: icmp0_v32i16_v32i1:
1794 ; X86-SSE2: # %bb.0:
1795 ; X86-SSE2-NEXT: pushl %ebp
1796 ; X86-SSE2-NEXT: movl %esp, %ebp
1797 ; X86-SSE2-NEXT: andl $-16, %esp
1798 ; X86-SSE2-NEXT: subl $16, %esp
1799 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1800 ; X86-SSE2-NEXT: pcmpeqw %xmm3, %xmm1
1801 ; X86-SSE2-NEXT: pcmpeqw %xmm3, %xmm2
1802 ; X86-SSE2-NEXT: pcmpeqw %xmm3, %xmm0
1803 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0
1804 ; X86-SSE2-NEXT: pcmpeqw 8(%ebp), %xmm3
1805 ; X86-SSE2-NEXT: pxor %xmm1, %xmm3
1806 ; X86-SSE2-NEXT: packsswb %xmm3, %xmm0
1807 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
1808 ; X86-SSE2-NEXT: xorb %ah, %al
1809 ; X86-SSE2-NEXT: setnp %al
1810 ; X86-SSE2-NEXT: movl %ebp, %esp
1811 ; X86-SSE2-NEXT: popl %ebp
1812 ; X86-SSE2-NEXT: retl
1814 ; X64-SSE-LABEL: icmp0_v32i16_v32i1:
1816 ; X64-SSE-NEXT: pxor %xmm4, %xmm4
1817 ; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm2
1818 ; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm0
1819 ; X64-SSE-NEXT: pxor %xmm2, %xmm0
1820 ; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm3
1821 ; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm1
1822 ; X64-SSE-NEXT: pxor %xmm3, %xmm1
1823 ; X64-SSE-NEXT: packsswb %xmm1, %xmm0
1824 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
1825 ; X64-SSE-NEXT: xorb %ah, %al
1826 ; X64-SSE-NEXT: setnp %al
1827 ; X64-SSE-NEXT: retq
1829 ; AVX1-LABEL: icmp0_v32i16_v32i1:
1831 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1832 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm3
1833 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm4
1834 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
1835 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1836 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
1837 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1838 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1839 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1840 ; AVX1-NEXT: vpacksswb %xmm0, %xmm3, %xmm0
1841 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1842 ; AVX1-NEXT: xorb %ah, %al
1843 ; AVX1-NEXT: setnp %al
1844 ; AVX1-NEXT: vzeroupper
1847 ; AVX2-LABEL: icmp0_v32i16_v32i1:
1849 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1850 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1851 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1852 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1853 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1854 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1855 ; AVX2-NEXT: movl %eax, %ecx
1856 ; AVX2-NEXT: shrl $16, %ecx
1857 ; AVX2-NEXT: xorl %eax, %ecx
1858 ; AVX2-NEXT: xorb %ch, %cl
1859 ; AVX2-NEXT: setnp %al
1860 ; AVX2-NEXT: vzeroupper
1863 ; AVX512F-LABEL: icmp0_v32i16_v32i1:
1865 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1866 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1867 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
1868 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
1869 ; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0
1870 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1871 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1872 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1873 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1874 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
1875 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1876 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
1877 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1878 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
1879 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1880 ; AVX512F-NEXT: kmovw %k0, %eax
1881 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
1882 ; AVX512F-NEXT: vzeroupper
1883 ; AVX512F-NEXT: retq
1885 ; AVX512BW-LABEL: icmp0_v32i16_v32i1:
1886 ; AVX512BW: # %bb.0:
1887 ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
1888 ; AVX512BW-NEXT: kmovd %k0, %eax
1889 ; AVX512BW-NEXT: movl %eax, %ecx
1890 ; AVX512BW-NEXT: shrl $16, %ecx
1891 ; AVX512BW-NEXT: xorl %eax, %ecx
1892 ; AVX512BW-NEXT: xorb %ch, %cl
1893 ; AVX512BW-NEXT: setnp %al
1894 ; AVX512BW-NEXT: vzeroupper
1895 ; AVX512BW-NEXT: retq
1897 ; AVX512VL-LABEL: icmp0_v32i16_v32i1:
1898 ; AVX512VL: # %bb.0:
1899 ; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0
1900 ; AVX512VL-NEXT: kmovd %k0, %eax
1901 ; AVX512VL-NEXT: movl %eax, %ecx
1902 ; AVX512VL-NEXT: shrl $16, %ecx
1903 ; AVX512VL-NEXT: xorl %eax, %ecx
1904 ; AVX512VL-NEXT: xorb %ch, %cl
1905 ; AVX512VL-NEXT: setnp %al
1906 ; AVX512VL-NEXT: vzeroupper
1907 ; AVX512VL-NEXT: retq
1908 %a = icmp eq <32 x i16> %0, zeroinitializer
1909 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
1913 define i1 @icmp0_v64i8_v64i1(<64 x i8>) nounwind {
1914 ; X86-SSE2-LABEL: icmp0_v64i8_v64i1:
1915 ; X86-SSE2: # %bb.0:
1916 ; X86-SSE2-NEXT: pushl %ebp
1917 ; X86-SSE2-NEXT: movl %esp, %ebp
1918 ; X86-SSE2-NEXT: andl $-16, %esp
1919 ; X86-SSE2-NEXT: subl $16, %esp
1920 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1921 ; X86-SSE2-NEXT: pcmpeqb %xmm3, %xmm1
1922 ; X86-SSE2-NEXT: pcmpeqb %xmm3, %xmm2
1923 ; X86-SSE2-NEXT: pcmpeqb %xmm3, %xmm0
1924 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0
1925 ; X86-SSE2-NEXT: pcmpeqb 8(%ebp), %xmm3
1926 ; X86-SSE2-NEXT: pxor %xmm1, %xmm3
1927 ; X86-SSE2-NEXT: pxor %xmm0, %xmm3
1928 ; X86-SSE2-NEXT: pmovmskb %xmm3, %eax
1929 ; X86-SSE2-NEXT: xorb %ah, %al
1930 ; X86-SSE2-NEXT: setnp %al
1931 ; X86-SSE2-NEXT: movl %ebp, %esp
1932 ; X86-SSE2-NEXT: popl %ebp
1933 ; X86-SSE2-NEXT: retl
1935 ; X64-SSE-LABEL: icmp0_v64i8_v64i1:
1937 ; X64-SSE-NEXT: pxor %xmm4, %xmm4
1938 ; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm2
1939 ; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm0
1940 ; X64-SSE-NEXT: pxor %xmm2, %xmm0
1941 ; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm3
1942 ; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm1
1943 ; X64-SSE-NEXT: pxor %xmm3, %xmm1
1944 ; X64-SSE-NEXT: pxor %xmm0, %xmm1
1945 ; X64-SSE-NEXT: pmovmskb %xmm1, %eax
1946 ; X64-SSE-NEXT: xorb %ah, %al
1947 ; X64-SSE-NEXT: setnp %al
1948 ; X64-SSE-NEXT: retq
1950 ; AVX1-LABEL: icmp0_v64i8_v64i1:
1952 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1953 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3
1954 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4
1955 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
1956 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1957 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1958 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1959 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
1960 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1961 ; AVX1-NEXT: vpxor %xmm0, %xmm3, %xmm0
1962 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1963 ; AVX1-NEXT: xorb %ah, %al
1964 ; AVX1-NEXT: setnp %al
1965 ; AVX1-NEXT: vzeroupper
1968 ; AVX2-LABEL: icmp0_v64i8_v64i1:
1970 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1971 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1972 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1973 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
1974 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1975 ; AVX2-NEXT: movl %eax, %ecx
1976 ; AVX2-NEXT: shrl $16, %ecx
1977 ; AVX2-NEXT: xorl %eax, %ecx
1978 ; AVX2-NEXT: xorb %ch, %cl
1979 ; AVX2-NEXT: setnp %al
1980 ; AVX2-NEXT: vzeroupper
1983 ; AVX512F-LABEL: icmp0_v64i8_v64i1:
1985 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1986 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1987 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1988 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1989 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
1990 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1991 ; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2
1992 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
1993 ; AVX512F-NEXT: vpxor %xmm2, %xmm0, %xmm0
1994 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1995 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1996 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1997 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
1998 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
1999 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
2000 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2001 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
2002 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2003 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
2004 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2005 ; AVX512F-NEXT: kmovw %k0, %eax
2006 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
2007 ; AVX512F-NEXT: vzeroupper
2008 ; AVX512F-NEXT: retq
2010 ; AVX512BW-LABEL: icmp0_v64i8_v64i1:
2011 ; AVX512BW: # %bb.0:
2012 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
2013 ; AVX512BW-NEXT: kmovq %k0, %rax
2014 ; AVX512BW-NEXT: movq %rax, %rcx
2015 ; AVX512BW-NEXT: shrq $32, %rcx
2016 ; AVX512BW-NEXT: xorl %eax, %ecx
2017 ; AVX512BW-NEXT: movl %ecx, %eax
2018 ; AVX512BW-NEXT: shrl $16, %eax
2019 ; AVX512BW-NEXT: xorl %ecx, %eax
2020 ; AVX512BW-NEXT: xorb %ah, %al
2021 ; AVX512BW-NEXT: setnp %al
2022 ; AVX512BW-NEXT: vzeroupper
2023 ; AVX512BW-NEXT: retq
2025 ; AVX512VL-LABEL: icmp0_v64i8_v64i1:
2026 ; AVX512VL: # %bb.0:
2027 ; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0
2028 ; AVX512VL-NEXT: kmovq %k0, %rax
2029 ; AVX512VL-NEXT: movq %rax, %rcx
2030 ; AVX512VL-NEXT: shrq $32, %rcx
2031 ; AVX512VL-NEXT: xorl %eax, %ecx
2032 ; AVX512VL-NEXT: movl %ecx, %eax
2033 ; AVX512VL-NEXT: shrl $16, %eax
2034 ; AVX512VL-NEXT: xorl %ecx, %eax
2035 ; AVX512VL-NEXT: xorb %ah, %al
2036 ; AVX512VL-NEXT: setnp %al
2037 ; AVX512VL-NEXT: vzeroupper
2038 ; AVX512VL-NEXT: retq
2039 %a = icmp eq <64 x i8> %0, zeroinitializer
2040 %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
2047 define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) nounwind {
2048 ; SSE2-LABEL: icmp_v2i64_v2i1:
2050 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2051 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
2052 ; SSE2-NEXT: pand %xmm0, %xmm1
2053 ; SSE2-NEXT: movmskpd %xmm1, %eax
2054 ; SSE2-NEXT: testb %al, %al
2055 ; SSE2-NEXT: setnp %al
2056 ; SSE2-NEXT: ret{{[l|q]}}
2058 ; SSE41-LABEL: icmp_v2i64_v2i1:
2060 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
2061 ; SSE41-NEXT: movmskpd %xmm0, %eax
2062 ; SSE41-NEXT: testb %al, %al
2063 ; SSE41-NEXT: setnp %al
2066 ; AVX-LABEL: icmp_v2i64_v2i1:
2068 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
2069 ; AVX-NEXT: vmovmskpd %xmm0, %eax
2070 ; AVX-NEXT: testb %al, %al
2071 ; AVX-NEXT: setnp %al
2074 ; AVX512F-LABEL: icmp_v2i64_v2i1:
2076 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2077 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2078 ; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2079 ; AVX512F-NEXT: kmovw %k0, %eax
2080 ; AVX512F-NEXT: testb $3, %al
2081 ; AVX512F-NEXT: setnp %al
2082 ; AVX512F-NEXT: vzeroupper
2083 ; AVX512F-NEXT: retq
2085 ; AVX512BW-LABEL: icmp_v2i64_v2i1:
2086 ; AVX512BW: # %bb.0:
2087 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2088 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2089 ; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2090 ; AVX512BW-NEXT: kmovd %k0, %eax
2091 ; AVX512BW-NEXT: testb $3, %al
2092 ; AVX512BW-NEXT: setnp %al
2093 ; AVX512BW-NEXT: vzeroupper
2094 ; AVX512BW-NEXT: retq
2096 ; AVX512VL-LABEL: icmp_v2i64_v2i1:
2097 ; AVX512VL: # %bb.0:
2098 ; AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2099 ; AVX512VL-NEXT: kmovd %k0, %eax
2100 ; AVX512VL-NEXT: testb %al, %al
2101 ; AVX512VL-NEXT: setnp %al
2102 ; AVX512VL-NEXT: retq
2103 %a = icmp eq <2 x i64> %0, %1
2104 %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
2108 define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) nounwind {
2109 ; SSE-LABEL: icmp_v4i32_v4i1:
2111 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
2112 ; SSE-NEXT: movmskps %xmm0, %eax
2113 ; SSE-NEXT: testb %al, %al
2114 ; SSE-NEXT: setnp %al
2115 ; SSE-NEXT: ret{{[l|q]}}
2117 ; AVX-LABEL: icmp_v4i32_v4i1:
2119 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2120 ; AVX-NEXT: vmovmskps %xmm0, %eax
2121 ; AVX-NEXT: testb %al, %al
2122 ; AVX-NEXT: setnp %al
2125 ; AVX512F-LABEL: icmp_v4i32_v4i1:
2127 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2128 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2129 ; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2130 ; AVX512F-NEXT: kmovw %k0, %eax
2131 ; AVX512F-NEXT: testb $15, %al
2132 ; AVX512F-NEXT: setnp %al
2133 ; AVX512F-NEXT: vzeroupper
2134 ; AVX512F-NEXT: retq
2136 ; AVX512BW-LABEL: icmp_v4i32_v4i1:
2137 ; AVX512BW: # %bb.0:
2138 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2139 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2140 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2141 ; AVX512BW-NEXT: kmovd %k0, %eax
2142 ; AVX512BW-NEXT: testb $15, %al
2143 ; AVX512BW-NEXT: setnp %al
2144 ; AVX512BW-NEXT: vzeroupper
2145 ; AVX512BW-NEXT: retq
2147 ; AVX512VL-LABEL: icmp_v4i32_v4i1:
2148 ; AVX512VL: # %bb.0:
2149 ; AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
2150 ; AVX512VL-NEXT: kmovd %k0, %eax
2151 ; AVX512VL-NEXT: testb %al, %al
2152 ; AVX512VL-NEXT: setnp %al
2153 ; AVX512VL-NEXT: retq
2154 %a = icmp eq <4 x i32> %0, %1
2155 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
2159 define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) nounwind {
2160 ; SSE-LABEL: icmp_v8i16_v8i1:
2162 ; SSE-NEXT: pcmpeqw %xmm1, %xmm0
2163 ; SSE-NEXT: packsswb %xmm0, %xmm0
2164 ; SSE-NEXT: pmovmskb %xmm0, %eax
2165 ; SSE-NEXT: testb %al, %al
2166 ; SSE-NEXT: setnp %al
2167 ; SSE-NEXT: ret{{[l|q]}}
2169 ; AVX-LABEL: icmp_v8i16_v8i1:
2171 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
2172 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2173 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2174 ; AVX-NEXT: testb %al, %al
2175 ; AVX-NEXT: setnp %al
2178 ; AVX512F-LABEL: icmp_v8i16_v8i1:
2180 ; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
2181 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
2182 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
2183 ; AVX512F-NEXT: kmovw %k0, %eax
2184 ; AVX512F-NEXT: testb %al, %al
2185 ; AVX512F-NEXT: setnp %al
2186 ; AVX512F-NEXT: vzeroupper
2187 ; AVX512F-NEXT: retq
2189 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
2190 ; AVX512BW: # %bb.0:
2191 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2192 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2193 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2194 ; AVX512BW-NEXT: kmovd %k0, %eax
2195 ; AVX512BW-NEXT: testb %al, %al
2196 ; AVX512BW-NEXT: setnp %al
2197 ; AVX512BW-NEXT: vzeroupper
2198 ; AVX512BW-NEXT: retq
2200 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
2201 ; AVX512VL: # %bb.0:
2202 ; AVX512VL-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
2203 ; AVX512VL-NEXT: kmovd %k0, %eax
2204 ; AVX512VL-NEXT: testb %al, %al
2205 ; AVX512VL-NEXT: setnp %al
2206 ; AVX512VL-NEXT: retq
2207 %a = icmp eq <8 x i16> %0, %1
2208 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
2212 define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) nounwind {
2213 ; SSE-LABEL: icmp_v16i8_v16i1:
2215 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
2216 ; SSE-NEXT: pmovmskb %xmm0, %eax
2217 ; SSE-NEXT: xorb %ah, %al
2218 ; SSE-NEXT: setnp %al
2219 ; SSE-NEXT: ret{{[l|q]}}
2221 ; AVX-LABEL: icmp_v16i8_v16i1:
2223 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
2224 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2225 ; AVX-NEXT: xorb %ah, %al
2226 ; AVX-NEXT: setnp %al
2229 ; AVX512F-LABEL: icmp_v16i8_v16i1:
2231 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
2232 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
2233 ; AVX512F-NEXT: xorb %ah, %al
2234 ; AVX512F-NEXT: setnp %al
2235 ; AVX512F-NEXT: retq
2237 ; AVX512BW-LABEL: icmp_v16i8_v16i1:
2238 ; AVX512BW: # %bb.0:
2239 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2240 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2241 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
2242 ; AVX512BW-NEXT: kmovd %k0, %eax
2243 ; AVX512BW-NEXT: movl %eax, %ecx
2244 ; AVX512BW-NEXT: shrl $8, %ecx
2245 ; AVX512BW-NEXT: xorb %al, %cl
2246 ; AVX512BW-NEXT: setnp %al
2247 ; AVX512BW-NEXT: vzeroupper
2248 ; AVX512BW-NEXT: retq
2250 ; AVX512VL-LABEL: icmp_v16i8_v16i1:
2251 ; AVX512VL: # %bb.0:
2252 ; AVX512VL-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
2253 ; AVX512VL-NEXT: kmovd %k0, %eax
2254 ; AVX512VL-NEXT: movl %eax, %ecx
2255 ; AVX512VL-NEXT: shrl $8, %ecx
2256 ; AVX512VL-NEXT: xorb %al, %cl
2257 ; AVX512VL-NEXT: setnp %al
2258 ; AVX512VL-NEXT: retq
2259 %a = icmp eq <16 x i8> %0, %1
2260 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
2264 define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) nounwind {
2265 ; X86-SSE2-LABEL: icmp_v4i64_v4i1:
2266 ; X86-SSE2: # %bb.0:
2267 ; X86-SSE2-NEXT: pushl %ebp
2268 ; X86-SSE2-NEXT: movl %esp, %ebp
2269 ; X86-SSE2-NEXT: andl $-16, %esp
2270 ; X86-SSE2-NEXT: subl $16, %esp
2271 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
2272 ; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm1
2273 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
2274 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
2275 ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
2276 ; X86-SSE2-NEXT: andps %xmm2, %xmm0
2277 ; X86-SSE2-NEXT: movmskps %xmm0, %eax
2278 ; X86-SSE2-NEXT: testb %al, %al
2279 ; X86-SSE2-NEXT: setnp %al
2280 ; X86-SSE2-NEXT: movl %ebp, %esp
2281 ; X86-SSE2-NEXT: popl %ebp
2282 ; X86-SSE2-NEXT: retl
2284 ; X64-SSE2-LABEL: icmp_v4i64_v4i1:
2285 ; X64-SSE2: # %bb.0:
2286 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm1
2287 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
2288 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
2289 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
2290 ; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
2291 ; X64-SSE2-NEXT: andps %xmm2, %xmm0
2292 ; X64-SSE2-NEXT: movmskps %xmm0, %eax
2293 ; X64-SSE2-NEXT: testb %al, %al
2294 ; X64-SSE2-NEXT: setnp %al
2295 ; X64-SSE2-NEXT: retq
2297 ; SSE41-LABEL: icmp_v4i64_v4i1:
2299 ; SSE41-NEXT: pcmpeqq %xmm3, %xmm1
2300 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
2301 ; SSE41-NEXT: packssdw %xmm1, %xmm0
2302 ; SSE41-NEXT: movmskps %xmm0, %eax
2303 ; SSE41-NEXT: testb %al, %al
2304 ; SSE41-NEXT: setnp %al
2307 ; AVX1-LABEL: icmp_v4i64_v4i1:
2309 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2310 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2311 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2
2312 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
2313 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2314 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2315 ; AVX1-NEXT: testb %al, %al
2316 ; AVX1-NEXT: setnp %al
2317 ; AVX1-NEXT: vzeroupper
2320 ; AVX2-LABEL: icmp_v4i64_v4i1:
2322 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
2323 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2324 ; AVX2-NEXT: testb %al, %al
2325 ; AVX2-NEXT: setnp %al
2326 ; AVX2-NEXT: vzeroupper
2329 ; AVX512F-LABEL: icmp_v4i64_v4i1:
2331 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2332 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2333 ; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2334 ; AVX512F-NEXT: kmovw %k0, %eax
2335 ; AVX512F-NEXT: testb $15, %al
2336 ; AVX512F-NEXT: setnp %al
2337 ; AVX512F-NEXT: vzeroupper
2338 ; AVX512F-NEXT: retq
2340 ; AVX512BW-LABEL: icmp_v4i64_v4i1:
2341 ; AVX512BW: # %bb.0:
2342 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2343 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2344 ; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2345 ; AVX512BW-NEXT: kmovd %k0, %eax
2346 ; AVX512BW-NEXT: testb $15, %al
2347 ; AVX512BW-NEXT: setnp %al
2348 ; AVX512BW-NEXT: vzeroupper
2349 ; AVX512BW-NEXT: retq
2351 ; AVX512VL-LABEL: icmp_v4i64_v4i1:
2352 ; AVX512VL: # %bb.0:
2353 ; AVX512VL-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
2354 ; AVX512VL-NEXT: kmovd %k0, %eax
2355 ; AVX512VL-NEXT: testb %al, %al
2356 ; AVX512VL-NEXT: setnp %al
2357 ; AVX512VL-NEXT: vzeroupper
2358 ; AVX512VL-NEXT: retq
2359 %a = icmp eq <4 x i64> %0, %1
2360 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
2364 define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) nounwind {
2365 ; X86-SSE2-LABEL: icmp_v8i32_v8i1:
2366 ; X86-SSE2: # %bb.0:
2367 ; X86-SSE2-NEXT: pushl %ebp
2368 ; X86-SSE2-NEXT: movl %esp, %ebp
2369 ; X86-SSE2-NEXT: andl $-16, %esp
2370 ; X86-SSE2-NEXT: subl $16, %esp
2371 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
2372 ; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm1
2373 ; X86-SSE2-NEXT: packssdw %xmm1, %xmm0
2374 ; X86-SSE2-NEXT: packsswb %xmm0, %xmm0
2375 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
2376 ; X86-SSE2-NEXT: testb %al, %al
2377 ; X86-SSE2-NEXT: setnp %al
2378 ; X86-SSE2-NEXT: movl %ebp, %esp
2379 ; X86-SSE2-NEXT: popl %ebp
2380 ; X86-SSE2-NEXT: retl
2382 ; X64-SSE-LABEL: icmp_v8i32_v8i1:
2384 ; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm1
2385 ; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm0
2386 ; X64-SSE-NEXT: packssdw %xmm1, %xmm0
2387 ; X64-SSE-NEXT: packsswb %xmm0, %xmm0
2388 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
2389 ; X64-SSE-NEXT: testb %al, %al
2390 ; X64-SSE-NEXT: setnp %al
2391 ; X64-SSE-NEXT: retq
2393 ; AVX1-LABEL: icmp_v8i32_v8i1:
2395 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2396 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2397 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
2398 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2399 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2400 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2401 ; AVX1-NEXT: testb %al, %al
2402 ; AVX1-NEXT: setnp %al
2403 ; AVX1-NEXT: vzeroupper
2406 ; AVX2-LABEL: icmp_v8i32_v8i1:
2408 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
2409 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2410 ; AVX2-NEXT: testb %al, %al
2411 ; AVX2-NEXT: setnp %al
2412 ; AVX2-NEXT: vzeroupper
2415 ; AVX512F-LABEL: icmp_v8i32_v8i1:
2417 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2418 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2419 ; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2420 ; AVX512F-NEXT: kmovw %k0, %eax
2421 ; AVX512F-NEXT: testb %al, %al
2422 ; AVX512F-NEXT: setnp %al
2423 ; AVX512F-NEXT: vzeroupper
2424 ; AVX512F-NEXT: retq
2426 ; AVX512BW-LABEL: icmp_v8i32_v8i1:
2427 ; AVX512BW: # %bb.0:
2428 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2429 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2430 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2431 ; AVX512BW-NEXT: kmovd %k0, %eax
2432 ; AVX512BW-NEXT: testb %al, %al
2433 ; AVX512BW-NEXT: setnp %al
2434 ; AVX512BW-NEXT: vzeroupper
2435 ; AVX512BW-NEXT: retq
2437 ; AVX512VL-LABEL: icmp_v8i32_v8i1:
2438 ; AVX512VL: # %bb.0:
2439 ; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2440 ; AVX512VL-NEXT: kmovd %k0, %eax
2441 ; AVX512VL-NEXT: testb %al, %al
2442 ; AVX512VL-NEXT: setnp %al
2443 ; AVX512VL-NEXT: vzeroupper
2444 ; AVX512VL-NEXT: retq
2445 %a = icmp eq <8 x i32> %0, %1
2446 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
2450 define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) nounwind {
2451 ; X86-SSE2-LABEL: icmp_v16i16_v16i1:
2452 ; X86-SSE2: # %bb.0:
2453 ; X86-SSE2-NEXT: pushl %ebp
2454 ; X86-SSE2-NEXT: movl %esp, %ebp
2455 ; X86-SSE2-NEXT: andl $-16, %esp
2456 ; X86-SSE2-NEXT: subl $16, %esp
2457 ; X86-SSE2-NEXT: pcmpeqw %xmm2, %xmm0
2458 ; X86-SSE2-NEXT: pcmpeqw 8(%ebp), %xmm1
2459 ; X86-SSE2-NEXT: packsswb %xmm1, %xmm0
2460 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
2461 ; X86-SSE2-NEXT: xorb %ah, %al
2462 ; X86-SSE2-NEXT: setnp %al
2463 ; X86-SSE2-NEXT: movl %ebp, %esp
2464 ; X86-SSE2-NEXT: popl %ebp
2465 ; X86-SSE2-NEXT: retl
2467 ; X64-SSE-LABEL: icmp_v16i16_v16i1:
2469 ; X64-SSE-NEXT: pcmpeqw %xmm3, %xmm1
2470 ; X64-SSE-NEXT: pcmpeqw %xmm2, %xmm0
2471 ; X64-SSE-NEXT: packsswb %xmm1, %xmm0
2472 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
2473 ; X64-SSE-NEXT: xorb %ah, %al
2474 ; X64-SSE-NEXT: setnp %al
2475 ; X64-SSE-NEXT: retq
2477 ; AVX1-LABEL: icmp_v16i16_v16i1:
2479 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2480 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2481 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
2482 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
2483 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
2484 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2485 ; AVX1-NEXT: xorb %ah, %al
2486 ; AVX1-NEXT: setnp %al
2487 ; AVX1-NEXT: vzeroupper
2490 ; AVX2-LABEL: icmp_v16i16_v16i1:
2492 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2493 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2494 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2495 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2496 ; AVX2-NEXT: xorb %ah, %al
2497 ; AVX2-NEXT: setnp %al
2498 ; AVX2-NEXT: vzeroupper
2501 ; AVX512F-LABEL: icmp_v16i16_v16i1:
2503 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2504 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
2505 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
2506 ; AVX512F-NEXT: kmovw %k0, %eax
2507 ; AVX512F-NEXT: movl %eax, %ecx
2508 ; AVX512F-NEXT: shrl $8, %ecx
2509 ; AVX512F-NEXT: xorb %al, %cl
2510 ; AVX512F-NEXT: setnp %al
2511 ; AVX512F-NEXT: vzeroupper
2512 ; AVX512F-NEXT: retq
2514 ; AVX512BW-LABEL: icmp_v16i16_v16i1:
2515 ; AVX512BW: # %bb.0:
2516 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2517 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2518 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2519 ; AVX512BW-NEXT: kmovd %k0, %eax
2520 ; AVX512BW-NEXT: movl %eax, %ecx
2521 ; AVX512BW-NEXT: shrl $8, %ecx
2522 ; AVX512BW-NEXT: xorb %al, %cl
2523 ; AVX512BW-NEXT: setnp %al
2524 ; AVX512BW-NEXT: vzeroupper
2525 ; AVX512BW-NEXT: retq
2527 ; AVX512VL-LABEL: icmp_v16i16_v16i1:
2528 ; AVX512VL: # %bb.0:
2529 ; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
2530 ; AVX512VL-NEXT: kmovd %k0, %eax
2531 ; AVX512VL-NEXT: movl %eax, %ecx
2532 ; AVX512VL-NEXT: shrl $8, %ecx
2533 ; AVX512VL-NEXT: xorb %al, %cl
2534 ; AVX512VL-NEXT: setnp %al
2535 ; AVX512VL-NEXT: vzeroupper
2536 ; AVX512VL-NEXT: retq
2537 %a = icmp eq <16 x i16> %0, %1
2538 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
2542 define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) nounwind {
2543 ; X86-SSE2-LABEL: icmp_v32i8_v32i1:
2544 ; X86-SSE2: # %bb.0:
2545 ; X86-SSE2-NEXT: pushl %ebp
2546 ; X86-SSE2-NEXT: movl %esp, %ebp
2547 ; X86-SSE2-NEXT: andl $-16, %esp
2548 ; X86-SSE2-NEXT: subl $16, %esp
2549 ; X86-SSE2-NEXT: pcmpeqb %xmm2, %xmm0
2550 ; X86-SSE2-NEXT: pcmpeqb 8(%ebp), %xmm1
2551 ; X86-SSE2-NEXT: pxor %xmm0, %xmm1
2552 ; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
2553 ; X86-SSE2-NEXT: xorb %ah, %al
2554 ; X86-SSE2-NEXT: setnp %al
2555 ; X86-SSE2-NEXT: movl %ebp, %esp
2556 ; X86-SSE2-NEXT: popl %ebp
2557 ; X86-SSE2-NEXT: retl
2559 ; X64-SSE-LABEL: icmp_v32i8_v32i1:
2561 ; X64-SSE-NEXT: pcmpeqb %xmm3, %xmm1
2562 ; X64-SSE-NEXT: pcmpeqb %xmm2, %xmm0
2563 ; X64-SSE-NEXT: pxor %xmm1, %xmm0
2564 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
2565 ; X64-SSE-NEXT: xorb %ah, %al
2566 ; X64-SSE-NEXT: setnp %al
2567 ; X64-SSE-NEXT: retq
2569 ; AVX1-LABEL: icmp_v32i8_v32i1:
2571 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2572 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2573 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
2574 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
2575 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
2576 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2577 ; AVX1-NEXT: xorb %ah, %al
2578 ; AVX1-NEXT: setnp %al
2579 ; AVX1-NEXT: vzeroupper
2582 ; AVX2-LABEL: icmp_v32i8_v32i1:
2584 ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2585 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2586 ; AVX2-NEXT: movl %eax, %ecx
2587 ; AVX2-NEXT: shrl $16, %ecx
2588 ; AVX2-NEXT: xorl %eax, %ecx
2589 ; AVX2-NEXT: xorb %ch, %cl
2590 ; AVX2-NEXT: setnp %al
2591 ; AVX2-NEXT: vzeroupper
2594 ; AVX512F-LABEL: icmp_v32i8_v32i1:
2596 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
2597 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
2598 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
2599 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
2600 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
2601 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
2602 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2603 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
2604 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2605 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
2606 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2607 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
2608 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2609 ; AVX512F-NEXT: kmovw %k0, %eax
2610 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
2611 ; AVX512F-NEXT: vzeroupper
2612 ; AVX512F-NEXT: retq
2614 ; AVX512BW-LABEL: icmp_v32i8_v32i1:
2615 ; AVX512BW: # %bb.0:
2616 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2617 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2618 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
2619 ; AVX512BW-NEXT: kmovd %k0, %eax
2620 ; AVX512BW-NEXT: movl %eax, %ecx
2621 ; AVX512BW-NEXT: shrl $16, %ecx
2622 ; AVX512BW-NEXT: xorl %eax, %ecx
2623 ; AVX512BW-NEXT: xorb %ch, %cl
2624 ; AVX512BW-NEXT: setnp %al
2625 ; AVX512BW-NEXT: vzeroupper
2626 ; AVX512BW-NEXT: retq
2628 ; AVX512VL-LABEL: icmp_v32i8_v32i1:
2629 ; AVX512VL: # %bb.0:
2630 ; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
2631 ; AVX512VL-NEXT: kmovd %k0, %eax
2632 ; AVX512VL-NEXT: movl %eax, %ecx
2633 ; AVX512VL-NEXT: shrl $16, %ecx
2634 ; AVX512VL-NEXT: xorl %eax, %ecx
2635 ; AVX512VL-NEXT: xorb %ch, %cl
2636 ; AVX512VL-NEXT: setnp %al
2637 ; AVX512VL-NEXT: vzeroupper
2638 ; AVX512VL-NEXT: retq
2639 %a = icmp eq <32 x i8> %0, %1
2640 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
2644 define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) nounwind {
2645 ; X86-SSE2-LABEL: icmp_v8i64_v8i1:
2646 ; X86-SSE2: # %bb.0:
2647 ; X86-SSE2-NEXT: pushl %ebp
2648 ; X86-SSE2-NEXT: movl %esp, %ebp
2649 ; X86-SSE2-NEXT: andl $-16, %esp
2650 ; X86-SSE2-NEXT: subl $16, %esp
2651 ; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3
2652 ; X86-SSE2-NEXT: pcmpeqd 72(%ebp), %xmm3
2653 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2]
2654 ; X86-SSE2-NEXT: pand %xmm3, %xmm4
2655 ; X86-SSE2-NEXT: pcmpeqd 56(%ebp), %xmm2
2656 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
2657 ; X86-SSE2-NEXT: pand %xmm2, %xmm3
2658 ; X86-SSE2-NEXT: packssdw %xmm4, %xmm3
2659 ; X86-SSE2-NEXT: pcmpeqd 40(%ebp), %xmm1
2660 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
2661 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
2662 ; X86-SSE2-NEXT: pcmpeqd 24(%ebp), %xmm0
2663 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
2664 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
2665 ; X86-SSE2-NEXT: packssdw %xmm2, %xmm1
2666 ; X86-SSE2-NEXT: packssdw %xmm3, %xmm1
2667 ; X86-SSE2-NEXT: packsswb %xmm1, %xmm1
2668 ; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
2669 ; X86-SSE2-NEXT: testb %al, %al
2670 ; X86-SSE2-NEXT: setnp %al
2671 ; X86-SSE2-NEXT: movl %ebp, %esp
2672 ; X86-SSE2-NEXT: popl %ebp
2673 ; X86-SSE2-NEXT: retl
2675 ; X64-SSE2-LABEL: icmp_v8i64_v8i1:
2676 ; X64-SSE2: # %bb.0:
2677 ; X64-SSE2-NEXT: pcmpeqd %xmm7, %xmm3
2678 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
2679 ; X64-SSE2-NEXT: pand %xmm3, %xmm7
2680 ; X64-SSE2-NEXT: pcmpeqd %xmm6, %xmm2
2681 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
2682 ; X64-SSE2-NEXT: pand %xmm2, %xmm3
2683 ; X64-SSE2-NEXT: packssdw %xmm7, %xmm3
2684 ; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm1
2685 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
2686 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
2687 ; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm0
2688 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
2689 ; X64-SSE2-NEXT: pand %xmm0, %xmm1
2690 ; X64-SSE2-NEXT: packssdw %xmm2, %xmm1
2691 ; X64-SSE2-NEXT: packssdw %xmm3, %xmm1
2692 ; X64-SSE2-NEXT: packsswb %xmm1, %xmm1
2693 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
2694 ; X64-SSE2-NEXT: testb %al, %al
2695 ; X64-SSE2-NEXT: setnp %al
2696 ; X64-SSE2-NEXT: retq
2698 ; SSE41-LABEL: icmp_v8i64_v8i1:
2700 ; SSE41-NEXT: pcmpeqq %xmm7, %xmm3
2701 ; SSE41-NEXT: pcmpeqq %xmm6, %xmm2
2702 ; SSE41-NEXT: packssdw %xmm3, %xmm2
2703 ; SSE41-NEXT: pcmpeqq %xmm5, %xmm1
2704 ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
2705 ; SSE41-NEXT: packssdw %xmm1, %xmm0
2706 ; SSE41-NEXT: packssdw %xmm2, %xmm0
2707 ; SSE41-NEXT: packsswb %xmm0, %xmm0
2708 ; SSE41-NEXT: pmovmskb %xmm0, %eax
2709 ; SSE41-NEXT: testb %al, %al
2710 ; SSE41-NEXT: setnp %al
2713 ; AVX1-LABEL: icmp_v8i64_v8i1:
2715 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
2716 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
2717 ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm5, %xmm4
2718 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
2719 ; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1
2720 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
2721 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2722 ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm4, %xmm3
2723 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
2724 ; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2725 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2726 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2727 ; AVX1-NEXT: testb %al, %al
2728 ; AVX1-NEXT: setnp %al
2729 ; AVX1-NEXT: vzeroupper
2732 ; AVX2-LABEL: icmp_v8i64_v8i1:
2734 ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm1, %ymm1
2735 ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
2736 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2737 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2738 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2739 ; AVX2-NEXT: testb %al, %al
2740 ; AVX2-NEXT: setnp %al
2741 ; AVX2-NEXT: vzeroupper
2744 ; AVX512F-LABEL: icmp_v8i64_v8i1:
2746 ; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2747 ; AVX512F-NEXT: kmovw %k0, %eax
2748 ; AVX512F-NEXT: testb %al, %al
2749 ; AVX512F-NEXT: setnp %al
2750 ; AVX512F-NEXT: vzeroupper
2751 ; AVX512F-NEXT: retq
2753 ; AVX512BW-LABEL: icmp_v8i64_v8i1:
2754 ; AVX512BW: # %bb.0:
2755 ; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2756 ; AVX512BW-NEXT: kmovd %k0, %eax
2757 ; AVX512BW-NEXT: testb %al, %al
2758 ; AVX512BW-NEXT: setnp %al
2759 ; AVX512BW-NEXT: vzeroupper
2760 ; AVX512BW-NEXT: retq
2762 ; AVX512VL-LABEL: icmp_v8i64_v8i1:
2763 ; AVX512VL: # %bb.0:
2764 ; AVX512VL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2765 ; AVX512VL-NEXT: kmovd %k0, %eax
2766 ; AVX512VL-NEXT: testb %al, %al
2767 ; AVX512VL-NEXT: setnp %al
2768 ; AVX512VL-NEXT: vzeroupper
2769 ; AVX512VL-NEXT: retq
2770 %a = icmp eq <8 x i64> %0, %1
2771 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
2775 define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) nounwind {
2776 ; X86-SSE2-LABEL: icmp_v16i32_v16i1:
2777 ; X86-SSE2: # %bb.0:
2778 ; X86-SSE2-NEXT: pushl %ebp
2779 ; X86-SSE2-NEXT: movl %esp, %ebp
2780 ; X86-SSE2-NEXT: andl $-16, %esp
2781 ; X86-SSE2-NEXT: subl $16, %esp
2782 ; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3
2783 ; X86-SSE2-NEXT: pcmpeqd 72(%ebp), %xmm3
2784 ; X86-SSE2-NEXT: pcmpeqd 56(%ebp), %xmm2
2785 ; X86-SSE2-NEXT: packssdw %xmm3, %xmm2
2786 ; X86-SSE2-NEXT: pcmpeqd 40(%ebp), %xmm1
2787 ; X86-SSE2-NEXT: pcmpeqd 24(%ebp), %xmm0
2788 ; X86-SSE2-NEXT: packssdw %xmm1, %xmm0
2789 ; X86-SSE2-NEXT: packsswb %xmm2, %xmm0
2790 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
2791 ; X86-SSE2-NEXT: xorb %ah, %al
2792 ; X86-SSE2-NEXT: setnp %al
2793 ; X86-SSE2-NEXT: movl %ebp, %esp
2794 ; X86-SSE2-NEXT: popl %ebp
2795 ; X86-SSE2-NEXT: retl
2797 ; X64-SSE-LABEL: icmp_v16i32_v16i1:
2799 ; X64-SSE-NEXT: pcmpeqd %xmm7, %xmm3
2800 ; X64-SSE-NEXT: pcmpeqd %xmm6, %xmm2
2801 ; X64-SSE-NEXT: packssdw %xmm3, %xmm2
2802 ; X64-SSE-NEXT: pcmpeqd %xmm5, %xmm1
2803 ; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm0
2804 ; X64-SSE-NEXT: packssdw %xmm1, %xmm0
2805 ; X64-SSE-NEXT: packsswb %xmm2, %xmm0
2806 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
2807 ; X64-SSE-NEXT: xorb %ah, %al
2808 ; X64-SSE-NEXT: setnp %al
2809 ; X64-SSE-NEXT: retq
2811 ; AVX1-LABEL: icmp_v16i32_v16i1:
2813 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
2814 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
2815 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm5, %xmm4
2816 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
2817 ; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1
2818 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
2819 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2820 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3
2821 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
2822 ; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2823 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2824 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2825 ; AVX1-NEXT: xorb %ah, %al
2826 ; AVX1-NEXT: setnp %al
2827 ; AVX1-NEXT: vzeroupper
2830 ; AVX2-LABEL: icmp_v16i32_v16i1:
2832 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm1, %ymm1
2833 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
2834 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2835 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2836 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2837 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2838 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2839 ; AVX2-NEXT: xorb %ah, %al
2840 ; AVX2-NEXT: setnp %al
2841 ; AVX2-NEXT: vzeroupper
2844 ; AVX512F-LABEL: icmp_v16i32_v16i1:
2846 ; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2847 ; AVX512F-NEXT: kmovw %k0, %eax
2848 ; AVX512F-NEXT: movl %eax, %ecx
2849 ; AVX512F-NEXT: shrl $8, %ecx
2850 ; AVX512F-NEXT: xorb %al, %cl
2851 ; AVX512F-NEXT: setnp %al
2852 ; AVX512F-NEXT: vzeroupper
2853 ; AVX512F-NEXT: retq
2855 ; AVX512BW-LABEL: icmp_v16i32_v16i1:
2856 ; AVX512BW: # %bb.0:
2857 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2858 ; AVX512BW-NEXT: kmovd %k0, %eax
2859 ; AVX512BW-NEXT: movl %eax, %ecx
2860 ; AVX512BW-NEXT: shrl $8, %ecx
2861 ; AVX512BW-NEXT: xorb %al, %cl
2862 ; AVX512BW-NEXT: setnp %al
2863 ; AVX512BW-NEXT: vzeroupper
2864 ; AVX512BW-NEXT: retq
2866 ; AVX512VL-LABEL: icmp_v16i32_v16i1:
2867 ; AVX512VL: # %bb.0:
2868 ; AVX512VL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2869 ; AVX512VL-NEXT: kmovd %k0, %eax
2870 ; AVX512VL-NEXT: movl %eax, %ecx
2871 ; AVX512VL-NEXT: shrl $8, %ecx
2872 ; AVX512VL-NEXT: xorb %al, %cl
2873 ; AVX512VL-NEXT: setnp %al
2874 ; AVX512VL-NEXT: vzeroupper
2875 ; AVX512VL-NEXT: retq
2876 %a = icmp eq <16 x i32> %0, %1
2877 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
2881 define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) nounwind {
2882 ; X86-SSE2-LABEL: icmp_v32i16_v32i1:
2883 ; X86-SSE2: # %bb.0:
2884 ; X86-SSE2-NEXT: pushl %ebp
2885 ; X86-SSE2-NEXT: movl %esp, %ebp
2886 ; X86-SSE2-NEXT: andl $-16, %esp
2887 ; X86-SSE2-NEXT: subl $16, %esp
2888 ; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3
2889 ; X86-SSE2-NEXT: pcmpeqw 56(%ebp), %xmm2
2890 ; X86-SSE2-NEXT: pcmpeqw 24(%ebp), %xmm0
2891 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0
2892 ; X86-SSE2-NEXT: pcmpeqw 72(%ebp), %xmm3
2893 ; X86-SSE2-NEXT: pcmpeqw 40(%ebp), %xmm1
2894 ; X86-SSE2-NEXT: pxor %xmm3, %xmm1
2895 ; X86-SSE2-NEXT: packsswb %xmm1, %xmm0
2896 ; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
2897 ; X86-SSE2-NEXT: xorb %ah, %al
2898 ; X86-SSE2-NEXT: setnp %al
2899 ; X86-SSE2-NEXT: movl %ebp, %esp
2900 ; X86-SSE2-NEXT: popl %ebp
2901 ; X86-SSE2-NEXT: retl
2903 ; X64-SSE-LABEL: icmp_v32i16_v32i1:
2905 ; X64-SSE-NEXT: pcmpeqw %xmm6, %xmm2
2906 ; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm0
2907 ; X64-SSE-NEXT: pxor %xmm2, %xmm0
2908 ; X64-SSE-NEXT: pcmpeqw %xmm7, %xmm3
2909 ; X64-SSE-NEXT: pcmpeqw %xmm5, %xmm1
2910 ; X64-SSE-NEXT: pxor %xmm3, %xmm1
2911 ; X64-SSE-NEXT: packsswb %xmm1, %xmm0
2912 ; X64-SSE-NEXT: pmovmskb %xmm0, %eax
2913 ; X64-SSE-NEXT: xorb %ah, %al
2914 ; X64-SSE-NEXT: setnp %al
2915 ; X64-SSE-NEXT: retq
2917 ; AVX1-LABEL: icmp_v32i16_v32i1:
2919 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm4
2920 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm5
2921 ; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm4
2922 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
2923 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2924 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
2925 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
2926 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2927 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
2928 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
2929 ; AVX1-NEXT: vpacksswb %xmm0, %xmm4, %xmm0
2930 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2931 ; AVX1-NEXT: xorb %ah, %al
2932 ; AVX1-NEXT: setnp %al
2933 ; AVX1-NEXT: vzeroupper
2936 ; AVX2-LABEL: icmp_v32i16_v32i1:
2938 ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
2939 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
2940 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
2941 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2942 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2943 ; AVX2-NEXT: movl %eax, %ecx
2944 ; AVX2-NEXT: shrl $16, %ecx
2945 ; AVX2-NEXT: xorl %eax, %ecx
2946 ; AVX2-NEXT: xorb %ch, %cl
2947 ; AVX2-NEXT: setnp %al
2948 ; AVX2-NEXT: vzeroupper
2951 ; AVX512F-LABEL: icmp_v32i16_v32i1:
2953 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
2954 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
2955 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
2956 ; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2957 ; AVX512F-NEXT: vpxor %ymm2, %ymm0, %ymm0
2958 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
2959 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
2960 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
2961 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2962 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
2963 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2964 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
2965 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2966 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
2967 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
2968 ; AVX512F-NEXT: kmovw %k0, %eax
2969 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
2970 ; AVX512F-NEXT: vzeroupper
2971 ; AVX512F-NEXT: retq
2973 ; AVX512BW-LABEL: icmp_v32i16_v32i1:
2974 ; AVX512BW: # %bb.0:
2975 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2976 ; AVX512BW-NEXT: kmovd %k0, %eax
2977 ; AVX512BW-NEXT: movl %eax, %ecx
2978 ; AVX512BW-NEXT: shrl $16, %ecx
2979 ; AVX512BW-NEXT: xorl %eax, %ecx
2980 ; AVX512BW-NEXT: xorb %ch, %cl
2981 ; AVX512BW-NEXT: setnp %al
2982 ; AVX512BW-NEXT: vzeroupper
2983 ; AVX512BW-NEXT: retq
2985 ; AVX512VL-LABEL: icmp_v32i16_v32i1:
2986 ; AVX512VL: # %bb.0:
2987 ; AVX512VL-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2988 ; AVX512VL-NEXT: kmovd %k0, %eax
2989 ; AVX512VL-NEXT: movl %eax, %ecx
2990 ; AVX512VL-NEXT: shrl $16, %ecx
2991 ; AVX512VL-NEXT: xorl %eax, %ecx
2992 ; AVX512VL-NEXT: xorb %ch, %cl
2993 ; AVX512VL-NEXT: setnp %al
2994 ; AVX512VL-NEXT: vzeroupper
2995 ; AVX512VL-NEXT: retq
2996 %a = icmp eq <32 x i16> %0, %1
2997 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
3001 define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) nounwind {
3002 ; X86-SSE2-LABEL: icmp_v64i8_v64i1:
3003 ; X86-SSE2: # %bb.0:
3004 ; X86-SSE2-NEXT: pushl %ebp
3005 ; X86-SSE2-NEXT: movl %esp, %ebp
3006 ; X86-SSE2-NEXT: andl $-16, %esp
3007 ; X86-SSE2-NEXT: subl $16, %esp
3008 ; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3
3009 ; X86-SSE2-NEXT: pcmpeqb 56(%ebp), %xmm2
3010 ; X86-SSE2-NEXT: pcmpeqb 24(%ebp), %xmm0
3011 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0
3012 ; X86-SSE2-NEXT: pcmpeqb 72(%ebp), %xmm3
3013 ; X86-SSE2-NEXT: pcmpeqb 40(%ebp), %xmm1
3014 ; X86-SSE2-NEXT: pxor %xmm3, %xmm1
3015 ; X86-SSE2-NEXT: pxor %xmm0, %xmm1
3016 ; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
3017 ; X86-SSE2-NEXT: xorb %ah, %al
3018 ; X86-SSE2-NEXT: setnp %al
3019 ; X86-SSE2-NEXT: movl %ebp, %esp
3020 ; X86-SSE2-NEXT: popl %ebp
3021 ; X86-SSE2-NEXT: retl
3023 ; X64-SSE-LABEL: icmp_v64i8_v64i1:
3025 ; X64-SSE-NEXT: pcmpeqb %xmm6, %xmm2
3026 ; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm0
3027 ; X64-SSE-NEXT: pxor %xmm2, %xmm0
3028 ; X64-SSE-NEXT: pcmpeqb %xmm7, %xmm3
3029 ; X64-SSE-NEXT: pcmpeqb %xmm5, %xmm1
3030 ; X64-SSE-NEXT: pxor %xmm3, %xmm1
3031 ; X64-SSE-NEXT: pxor %xmm0, %xmm1
3032 ; X64-SSE-NEXT: pmovmskb %xmm1, %eax
3033 ; X64-SSE-NEXT: xorb %ah, %al
3034 ; X64-SSE-NEXT: setnp %al
3035 ; X64-SSE-NEXT: retq
3037 ; AVX1-LABEL: icmp_v64i8_v64i1:
3039 ; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm4
3040 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm5
3041 ; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm4
3042 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
3043 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3044 ; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1
3045 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
3046 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3047 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
3048 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
3049 ; AVX1-NEXT: vpxor %xmm0, %xmm4, %xmm0
3050 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3051 ; AVX1-NEXT: xorb %ah, %al
3052 ; AVX1-NEXT: setnp %al
3053 ; AVX1-NEXT: vzeroupper
3056 ; AVX2-LABEL: icmp_v64i8_v64i1:
3058 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
3059 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
3060 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
3061 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3062 ; AVX2-NEXT: movl %eax, %ecx
3063 ; AVX2-NEXT: shrl $16, %ecx
3064 ; AVX2-NEXT: xorl %eax, %ecx
3065 ; AVX2-NEXT: xorb %ch, %cl
3066 ; AVX2-NEXT: setnp %al
3067 ; AVX2-NEXT: vzeroupper
3070 ; AVX512F-LABEL: icmp_v64i8_v64i1:
3072 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
3073 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
3074 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
3075 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
3076 ; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1
3077 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
3078 ; AVX512F-NEXT: vpxor %xmm1, %xmm3, %xmm1
3079 ; AVX512F-NEXT: vpxor %xmm2, %xmm0, %xmm0
3080 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0
3081 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
3082 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
3083 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
3084 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
3085 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
3086 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
3087 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
3088 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
3089 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
3090 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
3091 ; AVX512F-NEXT: kxorw %k1, %k0, %k0
3092 ; AVX512F-NEXT: kmovw %k0, %eax
3093 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
3094 ; AVX512F-NEXT: vzeroupper
3095 ; AVX512F-NEXT: retq
3097 ; AVX512BW-LABEL: icmp_v64i8_v64i1:
3098 ; AVX512BW: # %bb.0:
3099 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
3100 ; AVX512BW-NEXT: kmovq %k0, %rax
3101 ; AVX512BW-NEXT: movq %rax, %rcx
3102 ; AVX512BW-NEXT: shrq $32, %rcx
3103 ; AVX512BW-NEXT: xorl %eax, %ecx
3104 ; AVX512BW-NEXT: movl %ecx, %eax
3105 ; AVX512BW-NEXT: shrl $16, %eax
3106 ; AVX512BW-NEXT: xorl %ecx, %eax
3107 ; AVX512BW-NEXT: xorb %ah, %al
3108 ; AVX512BW-NEXT: setnp %al
3109 ; AVX512BW-NEXT: vzeroupper
3110 ; AVX512BW-NEXT: retq
3112 ; AVX512VL-LABEL: icmp_v64i8_v64i1:
3113 ; AVX512VL: # %bb.0:
3114 ; AVX512VL-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
3115 ; AVX512VL-NEXT: kmovq %k0, %rax
3116 ; AVX512VL-NEXT: movq %rax, %rcx
3117 ; AVX512VL-NEXT: shrq $32, %rcx
3118 ; AVX512VL-NEXT: xorl %eax, %ecx
3119 ; AVX512VL-NEXT: movl %ecx, %eax
3120 ; AVX512VL-NEXT: shrl $16, %eax
3121 ; AVX512VL-NEXT: xorl %ecx, %eax
3122 ; AVX512VL-NEXT: xorb %ah, %al
3123 ; AVX512VL-NEXT: setnp %al
3124 ; AVX512VL-NEXT: vzeroupper
3125 ; AVX512VL-NEXT: retq
3126 %a = icmp eq <64 x i8> %0, %1
3127 %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
3131 declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
3132 declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>)
3133 declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>)
3134 declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>)
3135 declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>)
3136 declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>)