1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2-SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE2-SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
12 define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13 ; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14 ; SSE2-SSSE3: # %bb.0:
15 ; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx
16 ; SSE2-SSSE3-NEXT: movl %ecx, %eax
17 ; SSE2-SSSE3-NEXT: shrb %al
18 ; SSE2-SSSE3-NEXT: addb %cl, %al
19 ; SSE2-SSSE3-NEXT: retq
21 ; AVX12-LABEL: bitcast_v2i64_to_v2i1:
23 ; AVX12-NEXT: vmovmskpd %xmm0, %ecx
24 ; AVX12-NEXT: movl %ecx, %eax
25 ; AVX12-NEXT: shrb %al
26 ; AVX12-NEXT: addb %cl, %al
29 ; AVX512-LABEL: bitcast_v2i64_to_v2i1:
31 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
32 ; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
33 ; AVX512-NEXT: kshiftrw $1, %k0, %k1
34 ; AVX512-NEXT: kmovd %k1, %ecx
35 ; AVX512-NEXT: kmovd %k0, %eax
36 ; AVX512-NEXT: addb %cl, %al
37 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
39 %1 = icmp slt <2 x i64> %a0, zeroinitializer
40 %2 = bitcast <2 x i1> %1 to <2 x i1>
41 %3 = extractelement <2 x i1> %2, i32 0
42 %4 = extractelement <2 x i1> %2, i32 1
47 define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
48 ; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
49 ; SSE2-SSSE3: # %bb.0:
50 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
51 ; SSE2-SSSE3-NEXT: movl %eax, %ecx
52 ; SSE2-SSSE3-NEXT: shrb $2, %cl
53 ; SSE2-SSSE3-NEXT: andb $3, %al
54 ; SSE2-SSSE3-NEXT: addb %cl, %al
55 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
56 ; SSE2-SSSE3-NEXT: retq
58 ; AVX-LABEL: bitcast_v4i32_to_v2i2:
60 ; AVX-NEXT: vmovmskps %xmm0, %eax
61 ; AVX-NEXT: movl %eax, %ecx
62 ; AVX-NEXT: shrb $2, %cl
63 ; AVX-NEXT: andb $3, %al
64 ; AVX-NEXT: addb %cl, %al
65 ; AVX-NEXT: # kill: def $al killed $al killed $eax
67 %1 = icmp slt <4 x i32> %a0, zeroinitializer
68 %2 = bitcast <4 x i1> %1 to <2 x i2>
69 %3 = extractelement <2 x i2> %2, i32 0
70 %4 = extractelement <2 x i2> %2, i32 1
75 define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
76 ; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
77 ; SSE2-SSSE3: # %bb.0:
78 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
79 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
80 ; SSE2-SSSE3-NEXT: movl %eax, %ecx
81 ; SSE2-SSSE3-NEXT: shrb $4, %cl
82 ; SSE2-SSSE3-NEXT: andb $15, %al
83 ; SSE2-SSSE3-NEXT: addb %cl, %al
84 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
85 ; SSE2-SSSE3-NEXT: retq
87 ; AVX12-LABEL: bitcast_v8i16_to_v2i4:
89 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
90 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
91 ; AVX12-NEXT: movl %eax, %ecx
92 ; AVX12-NEXT: shrb $4, %cl
93 ; AVX12-NEXT: andb $15, %al
94 ; AVX12-NEXT: addb %cl, %al
95 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
98 ; AVX512-LABEL: bitcast_v8i16_to_v2i4:
100 ; AVX512-NEXT: vpmovw2m %xmm0, %k0
101 ; AVX512-NEXT: kmovd %k0, %eax
102 ; AVX512-NEXT: movl %eax, %ecx
103 ; AVX512-NEXT: shrb $4, %cl
104 ; AVX512-NEXT: andb $15, %al
105 ; AVX512-NEXT: addb %cl, %al
106 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
108 %1 = icmp slt <8 x i16> %a0, zeroinitializer
109 %2 = bitcast <8 x i1> %1 to <2 x i4>
110 %3 = extractelement <2 x i4> %2, i32 0
111 %4 = extractelement <2 x i4> %2, i32 1
116 define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
117 ; SSE2-SSSE3-LABEL: bitcast_v16i8_to_v2i8:
118 ; SSE2-SSSE3: # %bb.0:
119 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
120 ; SSE2-SSSE3-NEXT: movd %eax, %xmm0
121 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
122 ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
123 ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
124 ; SSE2-SSSE3-NEXT: retq
126 ; AVX-LABEL: bitcast_v16i8_to_v2i8:
128 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
129 ; AVX-NEXT: movl %ecx, %eax
130 ; AVX-NEXT: shrl $8, %eax
131 ; AVX-NEXT: addb %cl, %al
132 ; AVX-NEXT: # kill: def $al killed $al killed $eax
134 %1 = icmp slt <16 x i8> %a0, zeroinitializer
135 %2 = bitcast <16 x i1> %1 to <2 x i8>
136 %3 = extractelement <2 x i8> %2, i32 0
137 %4 = extractelement <2 x i8> %2, i32 1
146 define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
147 ; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
148 ; SSE2-SSSE3: # %bb.0:
149 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
150 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
151 ; SSE2-SSSE3-NEXT: movl %eax, %ecx
152 ; SSE2-SSSE3-NEXT: shrb $2, %cl
153 ; SSE2-SSSE3-NEXT: andb $3, %al
154 ; SSE2-SSSE3-NEXT: addb %cl, %al
155 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
156 ; SSE2-SSSE3-NEXT: retq
158 ; AVX-LABEL: bitcast_v4i64_to_v2i2:
160 ; AVX-NEXT: vmovmskpd %ymm0, %eax
161 ; AVX-NEXT: movl %eax, %ecx
162 ; AVX-NEXT: shrb $2, %cl
163 ; AVX-NEXT: andb $3, %al
164 ; AVX-NEXT: addb %cl, %al
165 ; AVX-NEXT: # kill: def $al killed $al killed $eax
166 ; AVX-NEXT: vzeroupper
168 %1 = icmp slt <4 x i64> %a0, zeroinitializer
169 %2 = bitcast <4 x i1> %1 to <2 x i2>
170 %3 = extractelement <2 x i2> %2, i32 0
171 %4 = extractelement <2 x i2> %2, i32 1
176 define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
177 ; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
178 ; SSE2-SSSE3: # %bb.0:
179 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
180 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
181 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
182 ; SSE2-SSSE3-NEXT: movl %eax, %ecx
183 ; SSE2-SSSE3-NEXT: shrb $4, %cl
184 ; SSE2-SSSE3-NEXT: andb $15, %al
185 ; SSE2-SSSE3-NEXT: addb %cl, %al
186 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
187 ; SSE2-SSSE3-NEXT: retq
189 ; AVX-LABEL: bitcast_v8i32_to_v2i4:
191 ; AVX-NEXT: vmovmskps %ymm0, %eax
192 ; AVX-NEXT: movl %eax, %ecx
193 ; AVX-NEXT: shrb $4, %cl
194 ; AVX-NEXT: andb $15, %al
195 ; AVX-NEXT: addb %cl, %al
196 ; AVX-NEXT: # kill: def $al killed $al killed $eax
197 ; AVX-NEXT: vzeroupper
199 %1 = icmp slt <8 x i32> %a0, zeroinitializer
200 %2 = bitcast <8 x i1> %1 to <2 x i4>
201 %3 = extractelement <2 x i4> %2, i32 0
202 %4 = extractelement <2 x i4> %2, i32 1
207 define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
208 ; SSE2-SSSE3-LABEL: bitcast_v16i16_to_v2i8:
209 ; SSE2-SSSE3: # %bb.0:
210 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
211 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
212 ; SSE2-SSSE3-NEXT: movd %eax, %xmm0
213 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
214 ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
215 ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
216 ; SSE2-SSSE3-NEXT: retq
218 ; AVX1-LABEL: bitcast_v16i16_to_v2i8:
220 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
221 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
222 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
223 ; AVX1-NEXT: movl %ecx, %eax
224 ; AVX1-NEXT: shrl $8, %eax
225 ; AVX1-NEXT: addb %cl, %al
226 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
227 ; AVX1-NEXT: vzeroupper
230 ; AVX2-LABEL: bitcast_v16i16_to_v2i8:
232 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
233 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
234 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
235 ; AVX2-NEXT: movl %ecx, %eax
236 ; AVX2-NEXT: shrl $8, %eax
237 ; AVX2-NEXT: addb %cl, %al
238 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
239 ; AVX2-NEXT: vzeroupper
242 ; AVX512-LABEL: bitcast_v16i16_to_v2i8:
244 ; AVX512-NEXT: vpmovw2m %ymm0, %k0
245 ; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
246 ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
247 ; AVX512-NEXT: vmovd %xmm0, %ecx
248 ; AVX512-NEXT: vpextrb $1, %xmm0, %eax
249 ; AVX512-NEXT: addb %cl, %al
250 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
251 ; AVX512-NEXT: vzeroupper
253 %1 = icmp slt <16 x i16> %a0, zeroinitializer
254 %2 = bitcast <16 x i1> %1 to <2 x i8>
255 %3 = extractelement <2 x i8> %2, i32 0
256 %4 = extractelement <2 x i8> %2, i32 1
261 define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
262 ; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
263 ; SSE2-SSSE3: # %bb.0:
264 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
265 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
266 ; SSE2-SSSE3-NEXT: addl %ecx, %eax
267 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
268 ; SSE2-SSSE3-NEXT: retq
270 ; AVX1-LABEL: bitcast_v32i8_to_v2i16:
272 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
273 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx
274 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
275 ; AVX1-NEXT: addl %ecx, %eax
276 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
277 ; AVX1-NEXT: vzeroupper
280 ; AVX2-LABEL: bitcast_v32i8_to_v2i16:
282 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
283 ; AVX2-NEXT: movl %ecx, %eax
284 ; AVX2-NEXT: shrl $16, %eax
285 ; AVX2-NEXT: addl %ecx, %eax
286 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
287 ; AVX2-NEXT: vzeroupper
290 ; AVX512-LABEL: bitcast_v32i8_to_v2i16:
292 ; AVX512-NEXT: vpmovmskb %ymm0, %ecx
293 ; AVX512-NEXT: movl %ecx, %eax
294 ; AVX512-NEXT: shrl $16, %eax
295 ; AVX512-NEXT: addl %ecx, %eax
296 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
297 ; AVX512-NEXT: vzeroupper
299 %1 = icmp slt <32 x i8> %a0, zeroinitializer
300 %2 = bitcast <32 x i1> %1 to <2 x i16>
301 %3 = extractelement <2 x i16> %2, i32 0
302 %4 = extractelement <2 x i16> %2, i32 1
311 define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
312 ; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
313 ; SSE2-SSSE3: # %bb.0:
314 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
315 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
316 ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
317 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
318 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
319 ; SSE2-SSSE3-NEXT: movl %eax, %ecx
320 ; SSE2-SSSE3-NEXT: shrb $4, %cl
321 ; SSE2-SSSE3-NEXT: andb $15, %al
322 ; SSE2-SSSE3-NEXT: addb %cl, %al
323 ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
324 ; SSE2-SSSE3-NEXT: retq
326 ; AVX1-LABEL: bitcast_v8i64_to_v2i4:
328 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
329 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
330 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
331 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
332 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
333 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
334 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
335 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
336 ; AVX1-NEXT: vmovmskps %ymm0, %eax
337 ; AVX1-NEXT: movl %eax, %ecx
338 ; AVX1-NEXT: shrb $4, %cl
339 ; AVX1-NEXT: andb $15, %al
340 ; AVX1-NEXT: addb %cl, %al
341 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
342 ; AVX1-NEXT: vzeroupper
345 ; AVX2-LABEL: bitcast_v8i64_to_v2i4:
347 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
348 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
349 ; AVX2-NEXT: vmovmskps %ymm0, %eax
350 ; AVX2-NEXT: movl %eax, %ecx
351 ; AVX2-NEXT: shrb $4, %cl
352 ; AVX2-NEXT: andb $15, %al
353 ; AVX2-NEXT: addb %cl, %al
354 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
355 ; AVX2-NEXT: vzeroupper
358 ; AVX512-LABEL: bitcast_v8i64_to_v2i4:
360 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
361 ; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
362 ; AVX512-NEXT: kmovd %k0, %eax
363 ; AVX512-NEXT: movl %eax, %ecx
364 ; AVX512-NEXT: shrb $4, %cl
365 ; AVX512-NEXT: andb $15, %al
366 ; AVX512-NEXT: addb %cl, %al
367 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
368 ; AVX512-NEXT: vzeroupper
370 %1 = icmp slt <8 x i64> %a0, zeroinitializer
371 %2 = bitcast <8 x i1> %1 to <2 x i4>
372 %3 = extractelement <2 x i4> %2, i32 0
373 %4 = extractelement <2 x i4> %2, i32 1
378 define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
379 ; SSE2-SSSE3-LABEL: bitcast_v16i32_to_v2i8:
380 ; SSE2-SSSE3: # %bb.0:
381 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
382 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
383 ; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm0
384 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
385 ; SSE2-SSSE3-NEXT: movd %eax, %xmm0
386 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
387 ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
388 ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
389 ; SSE2-SSSE3-NEXT: retq
391 ; AVX1-LABEL: bitcast_v16i32_to_v2i8:
393 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
394 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
395 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
396 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
397 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
398 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
399 ; AVX1-NEXT: movl %ecx, %eax
400 ; AVX1-NEXT: shrl $8, %eax
401 ; AVX1-NEXT: addb %cl, %al
402 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
403 ; AVX1-NEXT: vzeroupper
406 ; AVX2-LABEL: bitcast_v16i32_to_v2i8:
408 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
409 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
410 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
411 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
412 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
413 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
414 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
415 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
416 ; AVX2-NEXT: movl %ecx, %eax
417 ; AVX2-NEXT: shrl $8, %eax
418 ; AVX2-NEXT: addb %cl, %al
419 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
420 ; AVX2-NEXT: vzeroupper
423 ; AVX512-LABEL: bitcast_v16i32_to_v2i8:
425 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
426 ; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
427 ; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
428 ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
429 ; AVX512-NEXT: vmovd %xmm0, %ecx
430 ; AVX512-NEXT: vpextrb $1, %xmm0, %eax
431 ; AVX512-NEXT: addb %cl, %al
432 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
433 ; AVX512-NEXT: vzeroupper
435 %1 = icmp slt <16 x i32> %a0, zeroinitializer
436 %2 = bitcast <16 x i1> %1 to <2 x i8>
437 %3 = extractelement <2 x i8> %2, i32 0
438 %4 = extractelement <2 x i8> %2, i32 1
443 define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
444 ; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
445 ; SSE2-SSSE3: # %bb.0:
446 ; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
447 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
448 ; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
449 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
450 ; SSE2-SSSE3-NEXT: addl %ecx, %eax
451 ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
452 ; SSE2-SSSE3-NEXT: retq
454 ; AVX1-LABEL: bitcast_v32i16_to_v2i16:
456 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
457 ; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
458 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx
459 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
460 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
461 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
462 ; AVX1-NEXT: addl %ecx, %eax
463 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
464 ; AVX1-NEXT: vzeroupper
467 ; AVX2-LABEL: bitcast_v32i16_to_v2i16:
469 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
470 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
471 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
472 ; AVX2-NEXT: movl %ecx, %eax
473 ; AVX2-NEXT: shrl $16, %eax
474 ; AVX2-NEXT: addl %ecx, %eax
475 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
476 ; AVX2-NEXT: vzeroupper
479 ; AVX512-LABEL: bitcast_v32i16_to_v2i16:
481 ; AVX512-NEXT: vpmovw2m %zmm0, %k0
482 ; AVX512-NEXT: kmovd %k0, -{{[0-9]+}}(%rsp)
483 ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
484 ; AVX512-NEXT: vmovd %xmm0, %ecx
485 ; AVX512-NEXT: vpextrw $1, %xmm0, %eax
486 ; AVX512-NEXT: addl %ecx, %eax
487 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
488 ; AVX512-NEXT: vzeroupper
490 %1 = icmp slt <32 x i16> %a0, zeroinitializer
491 %2 = bitcast <32 x i1> %1 to <2 x i16>
492 %3 = extractelement <2 x i16> %2, i32 0
493 %4 = extractelement <2 x i16> %2, i32 1
498 define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
499 ; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
500 ; SSE2-SSSE3: # %bb.0:
501 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
502 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
503 ; SSE2-SSSE3-NEXT: shll $16, %ecx
504 ; SSE2-SSSE3-NEXT: orl %eax, %ecx
505 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
506 ; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %edx
507 ; SSE2-SSSE3-NEXT: shll $16, %edx
508 ; SSE2-SSSE3-NEXT: orl %eax, %edx
509 ; SSE2-SSSE3-NEXT: shlq $32, %rdx
510 ; SSE2-SSSE3-NEXT: orq %rcx, %rdx
511 ; SSE2-SSSE3-NEXT: movq %rdx, %xmm0
512 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
513 ; SSE2-SSSE3-NEXT: movd %xmm0, %eax
514 ; SSE2-SSSE3-NEXT: addl %ecx, %eax
515 ; SSE2-SSSE3-NEXT: retq
517 ; AVX1-LABEL: bitcast_v64i8_to_v2i32:
519 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
520 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
521 ; AVX1-NEXT: vpmovmskb %xmm1, %ecx
522 ; AVX1-NEXT: shll $16, %ecx
523 ; AVX1-NEXT: orl %eax, %ecx
524 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
525 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
526 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
527 ; AVX1-NEXT: shll $16, %eax
528 ; AVX1-NEXT: orl %edx, %eax
529 ; AVX1-NEXT: addl %ecx, %eax
530 ; AVX1-NEXT: vzeroupper
533 ; AVX2-LABEL: bitcast_v64i8_to_v2i32:
535 ; AVX2-NEXT: vpmovmskb %ymm1, %ecx
536 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
537 ; AVX2-NEXT: addl %ecx, %eax
538 ; AVX2-NEXT: vzeroupper
541 ; AVX512-LABEL: bitcast_v64i8_to_v2i32:
543 ; AVX512-NEXT: vpmovb2m %zmm0, %k0
544 ; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
545 ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
546 ; AVX512-NEXT: vmovd %xmm0, %ecx
547 ; AVX512-NEXT: vpextrd $1, %xmm0, %eax
548 ; AVX512-NEXT: addl %ecx, %eax
549 ; AVX512-NEXT: vzeroupper
551 %1 = icmp slt <64 x i8> %a0, zeroinitializer
552 %2 = bitcast <64 x i1> %1 to <2 x i32>
553 %3 = extractelement <2 x i32> %2, i32 0
554 %4 = extractelement <2 x i32> %2, i32 1
559 define i64 @bitcast_v128i8_to_v2i64(<128 x i8> %a0) nounwind {
560 ; SSE2-SSSE3-LABEL: bitcast_v128i8_to_v2i64:
561 ; SSE2-SSSE3: # %bb.0:
562 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
563 ; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %ecx
564 ; SSE2-SSSE3-NEXT: shll $16, %ecx
565 ; SSE2-SSSE3-NEXT: orl %eax, %ecx
566 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
567 ; SSE2-SSSE3-NEXT: pmovmskb %xmm7, %edx
568 ; SSE2-SSSE3-NEXT: shll $16, %edx
569 ; SSE2-SSSE3-NEXT: orl %eax, %edx
570 ; SSE2-SSSE3-NEXT: shlq $32, %rdx
571 ; SSE2-SSSE3-NEXT: orq %rcx, %rdx
572 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
573 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
574 ; SSE2-SSSE3-NEXT: shll $16, %ecx
575 ; SSE2-SSSE3-NEXT: orl %eax, %ecx
576 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %esi
577 ; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %eax
578 ; SSE2-SSSE3-NEXT: shll $16, %eax
579 ; SSE2-SSSE3-NEXT: orl %esi, %eax
580 ; SSE2-SSSE3-NEXT: shlq $32, %rax
581 ; SSE2-SSSE3-NEXT: orq %rcx, %rax
582 ; SSE2-SSSE3-NEXT: addq %rdx, %rax
583 ; SSE2-SSSE3-NEXT: retq
585 ; AVX1-LABEL: bitcast_v128i8_to_v2i64:
587 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
588 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
589 ; AVX1-NEXT: vpmovmskb %xmm2, %edx
590 ; AVX1-NEXT: shll $16, %edx
591 ; AVX1-NEXT: orl %eax, %edx
592 ; AVX1-NEXT: vpmovmskb %xmm3, %eax
593 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
594 ; AVX1-NEXT: vpmovmskb %xmm2, %ecx
595 ; AVX1-NEXT: shll $16, %ecx
596 ; AVX1-NEXT: orl %eax, %ecx
597 ; AVX1-NEXT: shlq $32, %rcx
598 ; AVX1-NEXT: orq %rdx, %rcx
599 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
600 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
601 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
602 ; AVX1-NEXT: shll $16, %edx
603 ; AVX1-NEXT: orl %eax, %edx
604 ; AVX1-NEXT: vpmovmskb %xmm1, %esi
605 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
606 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
607 ; AVX1-NEXT: shll $16, %eax
608 ; AVX1-NEXT: orl %esi, %eax
609 ; AVX1-NEXT: shlq $32, %rax
610 ; AVX1-NEXT: orq %rdx, %rax
611 ; AVX1-NEXT: addq %rcx, %rax
612 ; AVX1-NEXT: vzeroupper
615 ; AVX2-LABEL: bitcast_v128i8_to_v2i64:
617 ; AVX2-NEXT: vpmovmskb %ymm3, %eax
618 ; AVX2-NEXT: shlq $32, %rax
619 ; AVX2-NEXT: vpmovmskb %ymm2, %ecx
620 ; AVX2-NEXT: orq %rax, %rcx
621 ; AVX2-NEXT: vpmovmskb %ymm1, %edx
622 ; AVX2-NEXT: shlq $32, %rdx
623 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
624 ; AVX2-NEXT: orq %rdx, %rax
625 ; AVX2-NEXT: addq %rcx, %rax
626 ; AVX2-NEXT: vzeroupper
629 ; AVX512-LABEL: bitcast_v128i8_to_v2i64:
631 ; AVX512-NEXT: vpmovb2m %zmm1, %k0
632 ; AVX512-NEXT: kmovq %k0, %rcx
633 ; AVX512-NEXT: vpmovb2m %zmm0, %k0
634 ; AVX512-NEXT: kmovq %k0, %rax
635 ; AVX512-NEXT: addq %rcx, %rax
636 ; AVX512-NEXT: vzeroupper
638 %1 = icmp slt <128 x i8> %a0, zeroinitializer
639 %2 = bitcast <128 x i1> %1 to <2 x i64>
640 %3 = extractelement <2 x i64> %2, i32 0
641 %4 = extractelement <2 x i64> %2, i32 1