1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512BW
9 define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
10 ; SSE2-SSSE3-LABEL: v8i16:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
13 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
14 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
15 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
16 ; SSE2-SSSE3-NEXT: retq
20 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
21 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
22 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
23 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
26 ; AVX512F-LABEL: v8i16:
28 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
29 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
30 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
31 ; AVX512F-NEXT: kmovw %k0, %eax
32 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
33 ; AVX512F-NEXT: vzeroupper
36 ; AVX512BW-LABEL: v8i16:
38 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
39 ; AVX512BW-NEXT: kmovd %k0, %eax
40 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
42 %x = icmp sgt <8 x i16> %a, %b
43 %res = bitcast <8 x i1> %x to i8
47 define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) {
48 ; SSE2-SSSE3-LABEL: v4i32:
49 ; SSE2-SSSE3: # %bb.0:
50 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
51 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
52 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
53 ; SSE2-SSSE3-NEXT: retq
57 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
58 ; AVX12-NEXT: vmovmskps %xmm0, %eax
59 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
62 ; AVX512F-LABEL: v4i32:
64 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
65 ; AVX512F-NEXT: kmovw %k0, %eax
66 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
69 ; AVX512BW-LABEL: v4i32:
71 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
72 ; AVX512BW-NEXT: kmovd %k0, %eax
73 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
75 %x = icmp sgt <4 x i32> %a, %b
76 %res = bitcast <4 x i1> %x to i4
80 define i4 @v4f32(<4 x float> %a, <4 x float> %b) {
81 ; SSE2-SSSE3-LABEL: v4f32:
82 ; SSE2-SSSE3: # %bb.0:
83 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm1
84 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
85 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
86 ; SSE2-SSSE3-NEXT: retq
90 ; AVX12-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
91 ; AVX12-NEXT: vmovmskps %xmm0, %eax
92 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
95 ; AVX512F-LABEL: v4f32:
97 ; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k0
98 ; AVX512F-NEXT: kmovw %k0, %eax
99 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
102 ; AVX512BW-LABEL: v4f32:
104 ; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k0
105 ; AVX512BW-NEXT: kmovd %k0, %eax
106 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
107 ; AVX512BW-NEXT: retq
108 %x = fcmp ogt <4 x float> %a, %b
109 %res = bitcast <4 x i1> %x to i4
113 define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
114 ; SSE2-SSSE3-LABEL: v16i8:
115 ; SSE2-SSSE3: # %bb.0:
116 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
117 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
118 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
119 ; SSE2-SSSE3-NEXT: retq
121 ; AVX12-LABEL: v16i8:
123 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
124 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
125 ; AVX12-NEXT: # kill: def $ax killed $ax killed $eax
128 ; AVX512F-LABEL: v16i8:
130 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
131 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
132 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
135 ; AVX512BW-LABEL: v16i8:
137 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
138 ; AVX512BW-NEXT: kmovd %k0, %eax
139 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
140 ; AVX512BW-NEXT: retq
141 %x = icmp sgt <16 x i8> %a, %b
142 %res = bitcast <16 x i1> %x to i16
146 define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
149 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
150 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
151 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
152 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
153 ; SSE2-NEXT: movmskpd %xmm0, %eax
154 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
159 ; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
160 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,0,0,u,u,0,0,u,u,1,1,u,u,1,1]
161 ; SSSE3-NEXT: movmskpd %xmm0, %eax
162 ; SSSE3-NEXT: # kill: def $al killed $al killed $eax
167 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
168 ; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0
169 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
170 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
173 ; AVX512F-LABEL: v2i8:
175 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
176 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
177 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
178 ; AVX512F-NEXT: kmovw %k0, %eax
179 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
180 ; AVX512F-NEXT: vzeroupper
183 ; AVX512BW-LABEL: v2i8:
185 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
186 ; AVX512BW-NEXT: kmovd %k0, %eax
187 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
188 ; AVX512BW-NEXT: retq
189 %x = icmp sgt <2 x i8> %a, %b
190 %res = bitcast <2 x i1> %x to i2
194 define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
195 ; SSE2-SSSE3-LABEL: v2i16:
196 ; SSE2-SSSE3: # %bb.0:
197 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
198 ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
199 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
200 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
201 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
202 ; SSE2-SSSE3-NEXT: retq
204 ; AVX12-LABEL: v2i16:
206 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
207 ; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0
208 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
209 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
212 ; AVX512F-LABEL: v2i16:
214 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
215 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
216 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
217 ; AVX512F-NEXT: kmovw %k0, %eax
218 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
219 ; AVX512F-NEXT: vzeroupper
222 ; AVX512BW-LABEL: v2i16:
224 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
225 ; AVX512BW-NEXT: kmovd %k0, %eax
226 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
227 ; AVX512BW-NEXT: retq
228 %x = icmp sgt <2 x i16> %a, %b
229 %res = bitcast <2 x i1> %x to i2
233 define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
234 ; SSE2-SSSE3-LABEL: v2i32:
235 ; SSE2-SSSE3: # %bb.0:
236 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
237 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
238 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
239 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
240 ; SSE2-SSSE3-NEXT: retq
242 ; AVX12-LABEL: v2i32:
244 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
245 ; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0
246 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
247 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
250 ; AVX512F-LABEL: v2i32:
252 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
253 ; AVX512F-NEXT: kmovw %k0, %eax
254 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
257 ; AVX512BW-LABEL: v2i32:
259 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
260 ; AVX512BW-NEXT: kmovd %k0, %eax
261 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
262 ; AVX512BW-NEXT: retq
263 %x = icmp sgt <2 x i32> %a, %b
264 %res = bitcast <2 x i1> %x to i2
268 define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
269 ; SSE2-SSSE3-LABEL: v2i64:
270 ; SSE2-SSSE3: # %bb.0:
271 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
272 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
273 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
274 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
275 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
276 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
277 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
278 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
279 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
280 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
281 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
282 ; SSE2-SSSE3-NEXT: retq
284 ; AVX12-LABEL: v2i64:
286 ; AVX12-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
287 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
288 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
291 ; AVX512F-LABEL: v2i64:
293 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
294 ; AVX512F-NEXT: kmovw %k0, %eax
295 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
298 ; AVX512BW-LABEL: v2i64:
300 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
301 ; AVX512BW-NEXT: kmovd %k0, %eax
302 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
303 ; AVX512BW-NEXT: retq
304 %x = icmp sgt <2 x i64> %a, %b
305 %res = bitcast <2 x i1> %x to i2
309 define i2 @v2f64(<2 x double> %a, <2 x double> %b) {
310 ; SSE2-SSSE3-LABEL: v2f64:
311 ; SSE2-SSSE3: # %bb.0:
312 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm1
313 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
314 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
315 ; SSE2-SSSE3-NEXT: retq
317 ; AVX12-LABEL: v2f64:
319 ; AVX12-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
320 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
321 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
324 ; AVX512F-LABEL: v2f64:
326 ; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k0
327 ; AVX512F-NEXT: kmovw %k0, %eax
328 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
331 ; AVX512BW-LABEL: v2f64:
333 ; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k0
334 ; AVX512BW-NEXT: kmovd %k0, %eax
335 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
336 ; AVX512BW-NEXT: retq
337 %x = fcmp ogt <2 x double> %a, %b
338 %res = bitcast <2 x i1> %x to i2
342 define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
343 ; SSE2-SSSE3-LABEL: v4i8:
344 ; SSE2-SSSE3: # %bb.0:
345 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
346 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
347 ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
348 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
349 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
350 ; SSE2-SSSE3-NEXT: retq
354 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
355 ; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0
356 ; AVX12-NEXT: vmovmskps %xmm0, %eax
357 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
360 ; AVX512F-LABEL: v4i8:
362 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
363 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
364 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
365 ; AVX512F-NEXT: kmovw %k0, %eax
366 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
367 ; AVX512F-NEXT: vzeroupper
370 ; AVX512BW-LABEL: v4i8:
372 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
373 ; AVX512BW-NEXT: kmovd %k0, %eax
374 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
375 ; AVX512BW-NEXT: retq
376 %x = icmp sgt <4 x i8> %a, %b
377 %res = bitcast <4 x i1> %x to i4
381 define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
382 ; SSE2-SSSE3-LABEL: v4i16:
383 ; SSE2-SSSE3: # %bb.0:
384 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
385 ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
386 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
387 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
388 ; SSE2-SSSE3-NEXT: retq
390 ; AVX12-LABEL: v4i16:
392 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
393 ; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0
394 ; AVX12-NEXT: vmovmskps %xmm0, %eax
395 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
398 ; AVX512F-LABEL: v4i16:
400 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
401 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
402 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
403 ; AVX512F-NEXT: kmovw %k0, %eax
404 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
405 ; AVX512F-NEXT: vzeroupper
408 ; AVX512BW-LABEL: v4i16:
410 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
411 ; AVX512BW-NEXT: kmovd %k0, %eax
412 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
413 ; AVX512BW-NEXT: retq
414 %x = icmp sgt <4 x i16> %a, %b
415 %res = bitcast <4 x i1> %x to i4
419 define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
420 ; SSE2-SSSE3-LABEL: v8i8:
421 ; SSE2-SSSE3: # %bb.0:
422 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
423 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
424 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
425 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
426 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
427 ; SSE2-SSSE3-NEXT: retq
431 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
432 ; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
433 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
434 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
435 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
438 ; AVX512F-LABEL: v8i8:
440 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
441 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
442 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
443 ; AVX512F-NEXT: kmovw %k0, %eax
444 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
445 ; AVX512F-NEXT: vzeroupper
448 ; AVX512BW-LABEL: v8i8:
450 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
451 ; AVX512BW-NEXT: kmovd %k0, %eax
452 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
453 ; AVX512BW-NEXT: retq
454 %x = icmp sgt <8 x i8> %a, %b
455 %res = bitcast <8 x i1> %x to i8
459 define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) {
460 ; SSE2-SSSE3-LABEL: v16i8_widened_with_zeroes:
461 ; SSE2-SSSE3: # %bb.0: # %entry
462 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
463 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
464 ; SSE2-SSSE3-NEXT: retq
466 ; AVX1-LABEL: v16i8_widened_with_zeroes:
467 ; AVX1: # %bb.0: # %entry
468 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
469 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
472 ; AVX2-LABEL: v16i8_widened_with_zeroes:
473 ; AVX2: # %bb.0: # %entry
474 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
475 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
476 ; AVX2-NEXT: vzeroupper
479 ; AVX512F-LABEL: v16i8_widened_with_zeroes:
480 ; AVX512F: # %bb.0: # %entry
481 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
482 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
483 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
484 ; AVX512F-NEXT: kmovw %k0, %eax
485 ; AVX512F-NEXT: vzeroupper
488 ; AVX512BW-LABEL: v16i8_widened_with_zeroes:
489 ; AVX512BW: # %bb.0: # %entry
490 ; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
491 ; AVX512BW-NEXT: kmovq %k0, %rax
492 ; AVX512BW-NEXT: retq
494 %c = icmp eq <16 x i8> %a, %b
495 %d = shufflevector <16 x i1> %c, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
496 %e = bitcast <64 x i1> %d to i64
500 define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) {
501 ; SSE2-SSSE3-LABEL: v16i8_widened_with_ones:
502 ; SSE2-SSSE3: # %bb.0: # %entry
503 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
504 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
505 ; SSE2-SSSE3-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
506 ; SSE2-SSSE3-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
507 ; SSE2-SSSE3-NEXT: orq %rcx, %rax
508 ; SSE2-SSSE3-NEXT: retq
510 ; AVX1-LABEL: v16i8_widened_with_ones:
511 ; AVX1: # %bb.0: # %entry
512 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
513 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
514 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
515 ; AVX1-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
516 ; AVX1-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
517 ; AVX1-NEXT: orq %rcx, %rax
520 ; AVX2-LABEL: v16i8_widened_with_ones:
521 ; AVX2: # %bb.0: # %entry
522 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
523 ; AVX2-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm0
524 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
525 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
526 ; AVX2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
527 ; AVX2-NEXT: orq %rcx, %rax
528 ; AVX2-NEXT: vzeroupper
531 ; AVX512F-LABEL: v16i8_widened_with_ones:
532 ; AVX512F: # %bb.0: # %entry
533 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
534 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
535 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
536 ; AVX512F-NEXT: kmovw %k0, %ecx
537 ; AVX512F-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
538 ; AVX512F-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
539 ; AVX512F-NEXT: orq %rcx, %rax
540 ; AVX512F-NEXT: vzeroupper
543 ; AVX512BW-LABEL: v16i8_widened_with_ones:
544 ; AVX512BW: # %bb.0: # %entry
545 ; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
546 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
547 ; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0
548 ; AVX512BW-NEXT: kxnord %k0, %k0, %k1
549 ; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0
550 ; AVX512BW-NEXT: kmovq %k0, %rax
551 ; AVX512BW-NEXT: retq
553 %c = icmp eq <16 x i8> %a, %b
554 %d = shufflevector <16 x i1> %c, <16 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
555 %e = bitcast <64 x i1> %d to i64
559 define void @bitcast_16i8_store(i16* %p, <16 x i8> %a0) {
560 ; SSE2-SSSE3-LABEL: bitcast_16i8_store:
561 ; SSE2-SSSE3: # %bb.0:
562 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
563 ; SSE2-SSSE3-NEXT: movw %ax, (%rdi)
564 ; SSE2-SSSE3-NEXT: retq
566 ; AVX12-LABEL: bitcast_16i8_store:
568 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
569 ; AVX12-NEXT: movw %ax, (%rdi)
572 ; AVX512F-LABEL: bitcast_16i8_store:
574 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
575 ; AVX512F-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
576 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
577 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
578 ; AVX512F-NEXT: kmovw %k0, (%rdi)
579 ; AVX512F-NEXT: vzeroupper
582 ; AVX512BW-LABEL: bitcast_16i8_store:
584 ; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
585 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
586 ; AVX512BW-NEXT: retq
587 %a1 = icmp slt <16 x i8> %a0, zeroinitializer
588 %a2 = bitcast <16 x i1> %a1 to i16
589 store i16 %a2, i16* %p
593 define void @bitcast_8i16_store(i8* %p, <8 x i16> %a0) {
594 ; SSE2-SSSE3-LABEL: bitcast_8i16_store:
595 ; SSE2-SSSE3: # %bb.0:
596 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
597 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
598 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
599 ; SSE2-SSSE3-NEXT: retq
601 ; AVX12-LABEL: bitcast_8i16_store:
603 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
604 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
605 ; AVX12-NEXT: movb %al, (%rdi)
608 ; AVX512F-LABEL: bitcast_8i16_store:
610 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
611 ; AVX512F-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
612 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
613 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
614 ; AVX512F-NEXT: kmovw %k0, %eax
615 ; AVX512F-NEXT: movb %al, (%rdi)
616 ; AVX512F-NEXT: vzeroupper
619 ; AVX512BW-LABEL: bitcast_8i16_store:
621 ; AVX512BW-NEXT: vpmovw2m %xmm0, %k0
622 ; AVX512BW-NEXT: kmovd %k0, %eax
623 ; AVX512BW-NEXT: movb %al, (%rdi)
624 ; AVX512BW-NEXT: retq
625 %a1 = icmp slt <8 x i16> %a0, zeroinitializer
626 %a2 = bitcast <8 x i1> %a1 to i8
631 define void @bitcast_4i32_store(i4* %p, <4 x i32> %a0) {
632 ; SSE2-SSSE3-LABEL: bitcast_4i32_store:
633 ; SSE2-SSSE3: # %bb.0:
634 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
635 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
636 ; SSE2-SSSE3-NEXT: retq
638 ; AVX12-LABEL: bitcast_4i32_store:
640 ; AVX12-NEXT: vmovmskps %xmm0, %eax
641 ; AVX12-NEXT: movb %al, (%rdi)
644 ; AVX512F-LABEL: bitcast_4i32_store:
646 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
647 ; AVX512F-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
648 ; AVX512F-NEXT: kmovw %k0, %eax
649 ; AVX512F-NEXT: movb %al, (%rdi)
652 ; AVX512BW-LABEL: bitcast_4i32_store:
654 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
655 ; AVX512BW-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
656 ; AVX512BW-NEXT: kmovd %k0, %eax
657 ; AVX512BW-NEXT: movb %al, (%rdi)
658 ; AVX512BW-NEXT: retq
659 %a1 = icmp slt <4 x i32> %a0, zeroinitializer
660 %a2 = bitcast <4 x i1> %a1 to i4
665 define void @bitcast_2i64_store(i2* %p, <2 x i64> %a0) {
666 ; SSE2-SSSE3-LABEL: bitcast_2i64_store:
667 ; SSE2-SSSE3: # %bb.0:
668 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
669 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
670 ; SSE2-SSSE3-NEXT: retq
672 ; AVX12-LABEL: bitcast_2i64_store:
674 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
675 ; AVX12-NEXT: movb %al, (%rdi)
678 ; AVX512F-LABEL: bitcast_2i64_store:
680 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
681 ; AVX512F-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
682 ; AVX512F-NEXT: kmovw %k0, %eax
683 ; AVX512F-NEXT: movb %al, (%rdi)
686 ; AVX512BW-LABEL: bitcast_2i64_store:
688 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
689 ; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
690 ; AVX512BW-NEXT: kmovd %k0, %eax
691 ; AVX512BW-NEXT: movb %al, (%rdi)
692 ; AVX512BW-NEXT: retq
693 %a1 = icmp slt <2 x i64> %a0, zeroinitializer
694 %a2 = bitcast <2 x i1> %a1 to i2