1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9 define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
10 ; SSE2-SSSE3-LABEL: v8i16:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
13 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
14 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
15 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
16 ; SSE2-SSSE3-NEXT: retq
20 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
21 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
22 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
23 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
26 ; AVX512F-LABEL: v8i16:
28 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
29 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
30 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
31 ; AVX512F-NEXT: kmovw %k0, %eax
32 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
33 ; AVX512F-NEXT: vzeroupper
36 ; AVX512BW-LABEL: v8i16:
38 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
39 ; AVX512BW-NEXT: kmovd %k0, %eax
40 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
42 %x = icmp sgt <8 x i16> %a, %b
43 %res = bitcast <8 x i1> %x to i8
47 define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) {
48 ; SSE2-SSSE3-LABEL: v4i32:
49 ; SSE2-SSSE3: # %bb.0:
50 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
51 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
52 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
53 ; SSE2-SSSE3-NEXT: retq
57 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
58 ; AVX12-NEXT: vmovmskps %xmm0, %eax
59 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
62 ; AVX512F-LABEL: v4i32:
64 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
65 ; AVX512F-NEXT: kmovw %k0, %eax
66 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
69 ; AVX512BW-LABEL: v4i32:
71 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
72 ; AVX512BW-NEXT: kmovd %k0, %eax
73 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
75 %x = icmp sgt <4 x i32> %a, %b
76 %res = bitcast <4 x i1> %x to i4
80 define i4 @v4f32(<4 x float> %a, <4 x float> %b) {
81 ; SSE2-SSSE3-LABEL: v4f32:
82 ; SSE2-SSSE3: # %bb.0:
83 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm1
84 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
85 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
86 ; SSE2-SSSE3-NEXT: retq
90 ; AVX12-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
91 ; AVX12-NEXT: vmovmskps %xmm0, %eax
92 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
95 ; AVX512F-LABEL: v4f32:
97 ; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k0
98 ; AVX512F-NEXT: kmovw %k0, %eax
99 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
102 ; AVX512BW-LABEL: v4f32:
104 ; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k0
105 ; AVX512BW-NEXT: kmovd %k0, %eax
106 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
107 ; AVX512BW-NEXT: retq
108 %x = fcmp ogt <4 x float> %a, %b
109 %res = bitcast <4 x i1> %x to i4
113 define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
114 ; SSE2-SSSE3-LABEL: v16i8:
115 ; SSE2-SSSE3: # %bb.0:
116 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
117 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
118 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
119 ; SSE2-SSSE3-NEXT: retq
121 ; AVX12-LABEL: v16i8:
123 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
124 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
125 ; AVX12-NEXT: # kill: def $ax killed $ax killed $eax
128 ; AVX512F-LABEL: v16i8:
130 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
131 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax
132 ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
135 ; AVX512BW-LABEL: v16i8:
137 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
138 ; AVX512BW-NEXT: kmovd %k0, %eax
139 ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
140 ; AVX512BW-NEXT: retq
141 %x = icmp sgt <16 x i8> %a, %b
142 %res = bitcast <16 x i1> %x to i16
146 define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
149 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
150 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
151 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
152 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
153 ; SSE2-NEXT: movmskpd %xmm0, %eax
154 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
159 ; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
160 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1]
161 ; SSSE3-NEXT: movmskpd %xmm0, %eax
162 ; SSSE3-NEXT: # kill: def $al killed $al killed $eax
167 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
168 ; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0
169 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
170 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
173 ; AVX512F-LABEL: v2i8:
175 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
176 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
177 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
178 ; AVX512F-NEXT: kmovw %k0, %eax
179 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
180 ; AVX512F-NEXT: vzeroupper
183 ; AVX512BW-LABEL: v2i8:
185 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
186 ; AVX512BW-NEXT: kmovd %k0, %eax
187 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
188 ; AVX512BW-NEXT: retq
189 %x = icmp sgt <2 x i8> %a, %b
190 %res = bitcast <2 x i1> %x to i2
194 define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
195 ; SSE2-SSSE3-LABEL: v2i16:
196 ; SSE2-SSSE3: # %bb.0:
197 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
198 ; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
199 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
200 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
201 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
202 ; SSE2-SSSE3-NEXT: retq
204 ; AVX12-LABEL: v2i16:
206 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
207 ; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0
208 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
209 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
212 ; AVX512F-LABEL: v2i16:
214 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
215 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
216 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
217 ; AVX512F-NEXT: kmovw %k0, %eax
218 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
219 ; AVX512F-NEXT: vzeroupper
222 ; AVX512BW-LABEL: v2i16:
224 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
225 ; AVX512BW-NEXT: kmovd %k0, %eax
226 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
227 ; AVX512BW-NEXT: retq
228 %x = icmp sgt <2 x i16> %a, %b
229 %res = bitcast <2 x i1> %x to i2
233 define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
234 ; SSE2-SSSE3-LABEL: v2i32:
235 ; SSE2-SSSE3: # %bb.0:
236 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
237 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
238 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
239 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
240 ; SSE2-SSSE3-NEXT: retq
242 ; AVX12-LABEL: v2i32:
244 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
245 ; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0
246 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
247 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
250 ; AVX512F-LABEL: v2i32:
252 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
253 ; AVX512F-NEXT: kmovw %k0, %eax
254 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
257 ; AVX512BW-LABEL: v2i32:
259 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
260 ; AVX512BW-NEXT: kmovd %k0, %eax
261 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
262 ; AVX512BW-NEXT: retq
263 %x = icmp sgt <2 x i32> %a, %b
264 %res = bitcast <2 x i1> %x to i2
268 define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
269 ; SSE2-SSSE3-LABEL: v2i64:
270 ; SSE2-SSSE3: # %bb.0:
271 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
272 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
273 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
274 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
275 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
276 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
277 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
278 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
279 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
280 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
281 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
282 ; SSE2-SSSE3-NEXT: retq
284 ; AVX12-LABEL: v2i64:
286 ; AVX12-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
287 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
288 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
291 ; AVX512F-LABEL: v2i64:
293 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
294 ; AVX512F-NEXT: kmovw %k0, %eax
295 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
298 ; AVX512BW-LABEL: v2i64:
300 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
301 ; AVX512BW-NEXT: kmovd %k0, %eax
302 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
303 ; AVX512BW-NEXT: retq
304 %x = icmp sgt <2 x i64> %a, %b
305 %res = bitcast <2 x i1> %x to i2
309 define i2 @v2f64(<2 x double> %a, <2 x double> %b) {
310 ; SSE2-SSSE3-LABEL: v2f64:
311 ; SSE2-SSSE3: # %bb.0:
312 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm1
313 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
314 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
315 ; SSE2-SSSE3-NEXT: retq
317 ; AVX12-LABEL: v2f64:
319 ; AVX12-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
320 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
321 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
324 ; AVX512F-LABEL: v2f64:
326 ; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k0
327 ; AVX512F-NEXT: kmovw %k0, %eax
328 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
331 ; AVX512BW-LABEL: v2f64:
333 ; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k0
334 ; AVX512BW-NEXT: kmovd %k0, %eax
335 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
336 ; AVX512BW-NEXT: retq
337 %x = fcmp ogt <2 x double> %a, %b
338 %res = bitcast <2 x i1> %x to i2
342 define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
343 ; SSE2-SSSE3-LABEL: v4i8:
344 ; SSE2-SSSE3: # %bb.0:
345 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
346 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
347 ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
348 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
349 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
350 ; SSE2-SSSE3-NEXT: retq
354 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
355 ; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0
356 ; AVX12-NEXT: vmovmskps %xmm0, %eax
357 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
360 ; AVX512F-LABEL: v4i8:
362 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
363 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
364 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
365 ; AVX512F-NEXT: kmovw %k0, %eax
366 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
367 ; AVX512F-NEXT: vzeroupper
370 ; AVX512BW-LABEL: v4i8:
372 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
373 ; AVX512BW-NEXT: kmovd %k0, %eax
374 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
375 ; AVX512BW-NEXT: retq
376 %x = icmp sgt <4 x i8> %a, %b
377 %res = bitcast <4 x i1> %x to i4
381 define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
382 ; SSE2-SSSE3-LABEL: v4i16:
383 ; SSE2-SSSE3: # %bb.0:
384 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
385 ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
386 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
387 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
388 ; SSE2-SSSE3-NEXT: retq
390 ; AVX12-LABEL: v4i16:
392 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
393 ; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0
394 ; AVX12-NEXT: vmovmskps %xmm0, %eax
395 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
398 ; AVX512F-LABEL: v4i16:
400 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
401 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
402 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
403 ; AVX512F-NEXT: kmovw %k0, %eax
404 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
405 ; AVX512F-NEXT: vzeroupper
408 ; AVX512BW-LABEL: v4i16:
410 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
411 ; AVX512BW-NEXT: kmovd %k0, %eax
412 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
413 ; AVX512BW-NEXT: retq
414 %x = icmp sgt <4 x i16> %a, %b
415 %res = bitcast <4 x i1> %x to i4
419 define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
420 ; SSE2-SSSE3-LABEL: v8i8:
421 ; SSE2-SSSE3: # %bb.0:
422 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
423 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
424 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
425 ; SSE2-SSSE3-NEXT: retq
429 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
430 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
431 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
434 ; AVX512F-LABEL: v8i8:
436 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
437 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
438 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
439 ; AVX512F-NEXT: kmovw %k0, %eax
440 ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
441 ; AVX512F-NEXT: vzeroupper
444 ; AVX512BW-LABEL: v8i8:
446 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
447 ; AVX512BW-NEXT: kmovd %k0, %eax
448 ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
449 ; AVX512BW-NEXT: retq
450 %x = icmp sgt <8 x i8> %a, %b
451 %res = bitcast <8 x i1> %x to i8
455 define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) {
456 ; SSE2-SSSE3-LABEL: v16i8_widened_with_zeroes:
457 ; SSE2-SSSE3: # %bb.0: # %entry
458 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
459 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
460 ; SSE2-SSSE3-NEXT: retq
462 ; AVX1-LABEL: v16i8_widened_with_zeroes:
463 ; AVX1: # %bb.0: # %entry
464 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
465 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
468 ; AVX2-LABEL: v16i8_widened_with_zeroes:
469 ; AVX2: # %bb.0: # %entry
470 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
471 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
472 ; AVX2-NEXT: vzeroupper
475 ; AVX512F-LABEL: v16i8_widened_with_zeroes:
476 ; AVX512F: # %bb.0: # %entry
477 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
478 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
479 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
480 ; AVX512F-NEXT: kmovw %k0, %eax
481 ; AVX512F-NEXT: vzeroupper
484 ; AVX512BW-LABEL: v16i8_widened_with_zeroes:
485 ; AVX512BW: # %bb.0: # %entry
486 ; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
487 ; AVX512BW-NEXT: kmovq %k0, %rax
488 ; AVX512BW-NEXT: retq
490 %c = icmp eq <16 x i8> %a, %b
491 %d = shufflevector <16 x i1> %c, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
492 %e = bitcast <64 x i1> %d to i64
496 define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) {
497 ; SSE2-SSSE3-LABEL: v16i8_widened_with_ones:
498 ; SSE2-SSSE3: # %bb.0: # %entry
499 ; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
500 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
501 ; SSE2-SSSE3-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
502 ; SSE2-SSSE3-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
503 ; SSE2-SSSE3-NEXT: orq %rcx, %rax
504 ; SSE2-SSSE3-NEXT: retq
506 ; AVX1-LABEL: v16i8_widened_with_ones:
507 ; AVX1: # %bb.0: # %entry
508 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
509 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
510 ; AVX1-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
511 ; AVX1-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
512 ; AVX1-NEXT: orq %rcx, %rax
515 ; AVX2-LABEL: v16i8_widened_with_ones:
516 ; AVX2: # %bb.0: # %entry
517 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
518 ; AVX2-NEXT: vinserti128 $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
519 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
520 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
521 ; AVX2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
522 ; AVX2-NEXT: orq %rcx, %rax
523 ; AVX2-NEXT: vzeroupper
526 ; AVX512F-LABEL: v16i8_widened_with_ones:
527 ; AVX512F: # %bb.0: # %entry
528 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
529 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
530 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
531 ; AVX512F-NEXT: kmovw %k0, %ecx
532 ; AVX512F-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000
533 ; AVX512F-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
534 ; AVX512F-NEXT: orq %rcx, %rax
535 ; AVX512F-NEXT: vzeroupper
538 ; AVX512BW-LABEL: v16i8_widened_with_ones:
539 ; AVX512BW: # %bb.0: # %entry
540 ; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
541 ; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
542 ; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0
543 ; AVX512BW-NEXT: kxnord %k0, %k0, %k1
544 ; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0
545 ; AVX512BW-NEXT: kmovq %k0, %rax
546 ; AVX512BW-NEXT: retq
548 %c = icmp eq <16 x i8> %a, %b
549 %d = shufflevector <16 x i1> %c, <16 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
550 %e = bitcast <64 x i1> %d to i64
554 define void @bitcast_16i8_store(i16* %p, <16 x i8> %a0) {
555 ; SSE2-SSSE3-LABEL: bitcast_16i8_store:
556 ; SSE2-SSSE3: # %bb.0:
557 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
558 ; SSE2-SSSE3-NEXT: movw %ax, (%rdi)
559 ; SSE2-SSSE3-NEXT: retq
561 ; AVX12-LABEL: bitcast_16i8_store:
563 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
564 ; AVX12-NEXT: movw %ax, (%rdi)
567 ; AVX512F-LABEL: bitcast_16i8_store:
569 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
570 ; AVX512F-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
571 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
572 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
573 ; AVX512F-NEXT: kmovw %k0, (%rdi)
574 ; AVX512F-NEXT: vzeroupper
577 ; AVX512BW-LABEL: bitcast_16i8_store:
579 ; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
580 ; AVX512BW-NEXT: kmovw %k0, (%rdi)
581 ; AVX512BW-NEXT: retq
582 %a1 = icmp slt <16 x i8> %a0, zeroinitializer
583 %a2 = bitcast <16 x i1> %a1 to i16
584 store i16 %a2, i16* %p
588 define void @bitcast_8i16_store(i8* %p, <8 x i16> %a0) {
589 ; SSE2-SSSE3-LABEL: bitcast_8i16_store:
590 ; SSE2-SSSE3: # %bb.0:
591 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
592 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
593 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
594 ; SSE2-SSSE3-NEXT: retq
596 ; AVX12-LABEL: bitcast_8i16_store:
598 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
599 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
600 ; AVX12-NEXT: movb %al, (%rdi)
603 ; AVX512F-LABEL: bitcast_8i16_store:
605 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
606 ; AVX512F-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
607 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
608 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
609 ; AVX512F-NEXT: kmovw %k0, %eax
610 ; AVX512F-NEXT: movb %al, (%rdi)
611 ; AVX512F-NEXT: vzeroupper
614 ; AVX512BW-LABEL: bitcast_8i16_store:
616 ; AVX512BW-NEXT: vpmovw2m %xmm0, %k0
617 ; AVX512BW-NEXT: kmovd %k0, %eax
618 ; AVX512BW-NEXT: movb %al, (%rdi)
619 ; AVX512BW-NEXT: retq
620 %a1 = icmp slt <8 x i16> %a0, zeroinitializer
621 %a2 = bitcast <8 x i1> %a1 to i8
626 define void @bitcast_4i32_store(i4* %p, <4 x i32> %a0) {
627 ; SSE2-SSSE3-LABEL: bitcast_4i32_store:
628 ; SSE2-SSSE3: # %bb.0:
629 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
630 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
631 ; SSE2-SSSE3-NEXT: retq
633 ; AVX12-LABEL: bitcast_4i32_store:
635 ; AVX12-NEXT: vmovmskps %xmm0, %eax
636 ; AVX12-NEXT: movb %al, (%rdi)
639 ; AVX512F-LABEL: bitcast_4i32_store:
641 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
642 ; AVX512F-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
643 ; AVX512F-NEXT: kmovw %k0, %eax
644 ; AVX512F-NEXT: movb %al, (%rdi)
647 ; AVX512BW-LABEL: bitcast_4i32_store:
649 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
650 ; AVX512BW-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
651 ; AVX512BW-NEXT: kmovd %k0, %eax
652 ; AVX512BW-NEXT: movb %al, (%rdi)
653 ; AVX512BW-NEXT: retq
654 %a1 = icmp slt <4 x i32> %a0, zeroinitializer
655 %a2 = bitcast <4 x i1> %a1 to i4
660 define void @bitcast_2i64_store(i2* %p, <2 x i64> %a0) {
661 ; SSE2-SSSE3-LABEL: bitcast_2i64_store:
662 ; SSE2-SSSE3: # %bb.0:
663 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
664 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
665 ; SSE2-SSSE3-NEXT: retq
667 ; AVX12-LABEL: bitcast_2i64_store:
669 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
670 ; AVX12-NEXT: movb %al, (%rdi)
673 ; AVX512F-LABEL: bitcast_2i64_store:
675 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
676 ; AVX512F-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
677 ; AVX512F-NEXT: kmovw %k0, %eax
678 ; AVX512F-NEXT: movb %al, (%rdi)
681 ; AVX512BW-LABEL: bitcast_2i64_store:
683 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
684 ; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
685 ; AVX512BW-NEXT: kmovd %k0, %eax
686 ; AVX512BW-NEXT: movb %al, (%rdi)
687 ; AVX512BW-NEXT: retq
688 %a1 = icmp slt <2 x i64> %a0, zeroinitializer
689 %a2 = bitcast <2 x i1> %a1 to i2