1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
8 ; ExtractElement - Constant Index
11 define i64 @extract_undef_index_from_zero_vec() nounwind {
12 ; SSE-LABEL: extract_undef_index_from_zero_vec:
16 ; AVX-LABEL: extract_undef_index_from_zero_vec:
19 %E = extractelement <2 x i64> zeroinitializer, i64 undef
23 define i64 @extract_undef_index_from_nonzero_vec() nounwind {
24 ; SSE-LABEL: extract_undef_index_from_nonzero_vec:
28 ; AVX-LABEL: extract_undef_index_from_nonzero_vec:
31 %E = extractelement <2 x i64> <i64 -1, i64 -1>, i64 undef
35 define i8 @extractelement_v16i8_1(<16 x i8> %a) nounwind {
36 ; SSE2-LABEL: extractelement_v16i8_1:
38 ; SSE2-NEXT: movd %xmm0, %eax
39 ; SSE2-NEXT: shrl $8, %eax
40 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
43 ; SSE41-LABEL: extractelement_v16i8_1:
45 ; SSE41-NEXT: pextrb $1, %xmm0, %eax
46 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
49 ; AVX-LABEL: extractelement_v16i8_1:
51 ; AVX-NEXT: vpextrb $1, %xmm0, %eax
52 ; AVX-NEXT: # kill: def $al killed $al killed $eax
54 %b = extractelement <16 x i8> %a, i256 1
58 define i8 @extractelement_v16i8_11(<16 x i8> %a) nounwind {
59 ; SSE2-LABEL: extractelement_v16i8_11:
61 ; SSE2-NEXT: pextrw $5, %xmm0, %eax
62 ; SSE2-NEXT: shrl $8, %eax
63 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
66 ; SSE41-LABEL: extractelement_v16i8_11:
68 ; SSE41-NEXT: pextrb $11, %xmm0, %eax
69 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
72 ; AVX-LABEL: extractelement_v16i8_11:
74 ; AVX-NEXT: vpextrb $11, %xmm0, %eax
75 ; AVX-NEXT: # kill: def $al killed $al killed $eax
77 %b = extractelement <16 x i8> %a, i256 11
81 define i8 @extractelement_v16i8_14(<16 x i8> %a) nounwind {
82 ; SSE2-LABEL: extractelement_v16i8_14:
84 ; SSE2-NEXT: pextrw $7, %xmm0, %eax
85 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
88 ; SSE41-LABEL: extractelement_v16i8_14:
90 ; SSE41-NEXT: pextrb $14, %xmm0, %eax
91 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
94 ; AVX-LABEL: extractelement_v16i8_14:
96 ; AVX-NEXT: vpextrb $14, %xmm0, %eax
97 ; AVX-NEXT: # kill: def $al killed $al killed $eax
99 %b = extractelement <16 x i8> %a, i256 14
103 define i8 @extractelement_v32i8_1(<32 x i8> %a) nounwind {
104 ; SSE2-LABEL: extractelement_v32i8_1:
106 ; SSE2-NEXT: movd %xmm0, %eax
107 ; SSE2-NEXT: shrl $8, %eax
108 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
111 ; SSE41-LABEL: extractelement_v32i8_1:
113 ; SSE41-NEXT: pextrb $1, %xmm0, %eax
114 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
117 ; AVX-LABEL: extractelement_v32i8_1:
119 ; AVX-NEXT: vpextrb $1, %xmm0, %eax
120 ; AVX-NEXT: # kill: def $al killed $al killed $eax
121 ; AVX-NEXT: vzeroupper
123 %b = extractelement <32 x i8> %a, i256 1
127 define i8 @extractelement_v32i8_17(<32 x i8> %a) nounwind {
128 ; SSE2-LABEL: extractelement_v32i8_17:
130 ; SSE2-NEXT: movd %xmm1, %eax
131 ; SSE2-NEXT: shrl $8, %eax
132 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
135 ; SSE41-LABEL: extractelement_v32i8_17:
137 ; SSE41-NEXT: pextrb $1, %xmm1, %eax
138 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
141 ; AVX1-LABEL: extractelement_v32i8_17:
143 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
144 ; AVX1-NEXT: vpextrb $1, %xmm0, %eax
145 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
146 ; AVX1-NEXT: vzeroupper
149 ; AVX2-LABEL: extractelement_v32i8_17:
151 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
152 ; AVX2-NEXT: vpextrb $1, %xmm0, %eax
153 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
154 ; AVX2-NEXT: vzeroupper
156 %b = extractelement <32 x i8> %a, i256 17
160 define i16 @extractelement_v8i16_0(<8 x i16> %a, i256 %i) nounwind {
161 ; SSE-LABEL: extractelement_v8i16_0:
163 ; SSE-NEXT: movd %xmm0, %eax
164 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
167 ; AVX-LABEL: extractelement_v8i16_0:
169 ; AVX-NEXT: vmovd %xmm0, %eax
170 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
172 %b = extractelement <8 x i16> %a, i256 0
176 define i16 @extractelement_v8i16_3(<8 x i16> %a, i256 %i) nounwind {
177 ; SSE-LABEL: extractelement_v8i16_3:
179 ; SSE-NEXT: pextrw $3, %xmm0, %eax
180 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
183 ; AVX-LABEL: extractelement_v8i16_3:
185 ; AVX-NEXT: vpextrw $3, %xmm0, %eax
186 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
188 %b = extractelement <8 x i16> %a, i256 3
192 define i16 @extractelement_v16i16_0(<16 x i16> %a, i256 %i) nounwind {
193 ; SSE-LABEL: extractelement_v16i16_0:
195 ; SSE-NEXT: movd %xmm0, %eax
196 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
199 ; AVX-LABEL: extractelement_v16i16_0:
201 ; AVX-NEXT: vmovd %xmm0, %eax
202 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
203 ; AVX-NEXT: vzeroupper
205 %b = extractelement <16 x i16> %a, i256 0
209 define i16 @extractelement_v16i16_13(<16 x i16> %a, i256 %i) nounwind {
210 ; SSE-LABEL: extractelement_v16i16_13:
212 ; SSE-NEXT: pextrw $5, %xmm1, %eax
213 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
216 ; AVX1-LABEL: extractelement_v16i16_13:
218 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
219 ; AVX1-NEXT: vpextrw $5, %xmm0, %eax
220 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
221 ; AVX1-NEXT: vzeroupper
224 ; AVX2-LABEL: extractelement_v16i16_13:
226 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
227 ; AVX2-NEXT: vpextrw $5, %xmm0, %eax
228 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
229 ; AVX2-NEXT: vzeroupper
231 %b = extractelement <16 x i16> %a, i256 13
235 define i32 @extractelement_v4i32_0(<4 x i32> %a) nounwind {
236 ; SSE-LABEL: extractelement_v4i32_0:
238 ; SSE-NEXT: movd %xmm0, %eax
241 ; AVX-LABEL: extractelement_v4i32_0:
243 ; AVX-NEXT: vmovd %xmm0, %eax
245 %b = extractelement <4 x i32> %a, i256 0
249 define i32 @extractelement_v4i32_3(<4 x i32> %a) nounwind {
250 ; SSE2-LABEL: extractelement_v4i32_3:
252 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
253 ; SSE2-NEXT: movd %xmm0, %eax
256 ; SSE41-LABEL: extractelement_v4i32_3:
258 ; SSE41-NEXT: extractps $3, %xmm0, %eax
261 ; AVX-LABEL: extractelement_v4i32_3:
263 ; AVX-NEXT: vextractps $3, %xmm0, %eax
265 %b = extractelement <4 x i32> %a, i256 3
269 define i32 @extractelement_v8i32_0(<8 x i32> %a) nounwind {
270 ; SSE-LABEL: extractelement_v8i32_0:
272 ; SSE-NEXT: movd %xmm1, %eax
275 ; AVX1-LABEL: extractelement_v8i32_0:
277 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
278 ; AVX1-NEXT: vmovd %xmm0, %eax
279 ; AVX1-NEXT: vzeroupper
282 ; AVX2-LABEL: extractelement_v8i32_0:
284 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
285 ; AVX2-NEXT: vmovd %xmm0, %eax
286 ; AVX2-NEXT: vzeroupper
288 %b = extractelement <8 x i32> %a, i256 4
292 define i32 @extractelement_v8i32_4(<8 x i32> %a) nounwind {
293 ; SSE-LABEL: extractelement_v8i32_4:
295 ; SSE-NEXT: movd %xmm1, %eax
298 ; AVX1-LABEL: extractelement_v8i32_4:
300 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
301 ; AVX1-NEXT: vmovd %xmm0, %eax
302 ; AVX1-NEXT: vzeroupper
305 ; AVX2-LABEL: extractelement_v8i32_4:
307 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
308 ; AVX2-NEXT: vmovd %xmm0, %eax
309 ; AVX2-NEXT: vzeroupper
311 %b = extractelement <8 x i32> %a, i256 4
315 define i32 @extractelement_v8i32_7(<8 x i32> %a) nounwind {
316 ; SSE2-LABEL: extractelement_v8i32_7:
318 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
319 ; SSE2-NEXT: movd %xmm0, %eax
322 ; SSE41-LABEL: extractelement_v8i32_7:
324 ; SSE41-NEXT: extractps $3, %xmm1, %eax
327 ; AVX-LABEL: extractelement_v8i32_7:
329 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
330 ; AVX-NEXT: vextractps $3, %xmm0, %eax
331 ; AVX-NEXT: vzeroupper
333 %b = extractelement <8 x i32> %a, i64 7
337 define i64 @extractelement_v2i64_0(<2 x i64> %a, i256 %i) nounwind {
338 ; SSE-LABEL: extractelement_v2i64_0:
340 ; SSE-NEXT: movq %xmm0, %rax
343 ; AVX-LABEL: extractelement_v2i64_0:
345 ; AVX-NEXT: vmovq %xmm0, %rax
347 %b = extractelement <2 x i64> %a, i256 0
351 define i64 @extractelement_v2i64_1(<2 x i64> %a, i256 %i) nounwind {
352 ; SSE2-LABEL: extractelement_v2i64_1:
354 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
355 ; SSE2-NEXT: movq %xmm0, %rax
358 ; SSE41-LABEL: extractelement_v2i64_1:
360 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
363 ; AVX-LABEL: extractelement_v2i64_1:
365 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
367 %b = extractelement <2 x i64> %a, i256 1
371 define i64 @extractelement_v4i64_1(<4 x i64> %a, i256 %i) nounwind {
372 ; SSE2-LABEL: extractelement_v4i64_1:
374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
375 ; SSE2-NEXT: movq %xmm0, %rax
378 ; SSE41-LABEL: extractelement_v4i64_1:
380 ; SSE41-NEXT: pextrq $1, %xmm0, %rax
383 ; AVX-LABEL: extractelement_v4i64_1:
385 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
386 ; AVX-NEXT: vzeroupper
388 %b = extractelement <4 x i64> %a, i256 1
392 define i64 @extractelement_v4i64_3(<4 x i64> %a, i256 %i) nounwind {
393 ; SSE2-LABEL: extractelement_v4i64_3:
395 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
396 ; SSE2-NEXT: movq %xmm0, %rax
399 ; SSE41-LABEL: extractelement_v4i64_3:
401 ; SSE41-NEXT: pextrq $1, %xmm1, %rax
404 ; AVX1-LABEL: extractelement_v4i64_3:
406 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
407 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
408 ; AVX1-NEXT: vzeroupper
411 ; AVX2-LABEL: extractelement_v4i64_3:
413 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
414 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
415 ; AVX2-NEXT: vzeroupper
417 %b = extractelement <4 x i64> %a, i256 3
422 ; ExtractElement - Variable Index
425 define i8 @extractelement_v16i8_var(<16 x i8> %a, i256 %i) nounwind {
426 ; SSE-LABEL: extractelement_v16i8_var:
428 ; SSE-NEXT: andl $15, %edi
429 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
430 ; SSE-NEXT: movzbl -24(%rsp,%rdi), %eax
433 ; AVX-LABEL: extractelement_v16i8_var:
435 ; AVX-NEXT: andl $15, %edi
436 ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
437 ; AVX-NEXT: movzbl -24(%rsp,%rdi), %eax
439 %b = extractelement <16 x i8> %a, i256 %i
443 define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
444 ; SSE-LABEL: extractelement_v32i8_var:
446 ; SSE-NEXT: andl $31, %edi
447 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
448 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
449 ; SSE-NEXT: movzbl -40(%rsp,%rdi), %eax
452 ; AVX-LABEL: extractelement_v32i8_var:
454 ; AVX-NEXT: pushq %rbp
455 ; AVX-NEXT: movq %rsp, %rbp
456 ; AVX-NEXT: andq $-32, %rsp
457 ; AVX-NEXT: subq $64, %rsp
458 ; AVX-NEXT: andl $31, %edi
459 ; AVX-NEXT: vmovaps %ymm0, (%rsp)
460 ; AVX-NEXT: movzbl (%rsp,%rdi), %eax
461 ; AVX-NEXT: movq %rbp, %rsp
462 ; AVX-NEXT: popq %rbp
463 ; AVX-NEXT: vzeroupper
465 %b = extractelement <32 x i8> %a, i256 %i
469 define i16 @extractelement_v8i16_var(<8 x i16> %a, i256 %i) nounwind {
470 ; SSE-LABEL: extractelement_v8i16_var:
472 ; SSE-NEXT: andl $7, %edi
473 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
474 ; SSE-NEXT: movzwl -24(%rsp,%rdi,2), %eax
477 ; AVX-LABEL: extractelement_v8i16_var:
479 ; AVX-NEXT: andl $7, %edi
480 ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
481 ; AVX-NEXT: movzwl -24(%rsp,%rdi,2), %eax
483 %b = extractelement <8 x i16> %a, i256 %i
487 define i16 @extractelement_v16i16_var(<16 x i16> %a, i256 %i) nounwind {
488 ; SSE-LABEL: extractelement_v16i16_var:
490 ; SSE-NEXT: andl $15, %edi
491 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
492 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
493 ; SSE-NEXT: movzwl -40(%rsp,%rdi,2), %eax
496 ; AVX-LABEL: extractelement_v16i16_var:
498 ; AVX-NEXT: pushq %rbp
499 ; AVX-NEXT: movq %rsp, %rbp
500 ; AVX-NEXT: andq $-32, %rsp
501 ; AVX-NEXT: subq $64, %rsp
502 ; AVX-NEXT: andl $15, %edi
503 ; AVX-NEXT: vmovaps %ymm0, (%rsp)
504 ; AVX-NEXT: movzwl (%rsp,%rdi,2), %eax
505 ; AVX-NEXT: movq %rbp, %rsp
506 ; AVX-NEXT: popq %rbp
507 ; AVX-NEXT: vzeroupper
509 %b = extractelement <16 x i16> %a, i256 %i
513 define i32 @extractelement_v4i32_var(<4 x i32> %a, i256 %i) nounwind {
514 ; SSE-LABEL: extractelement_v4i32_var:
516 ; SSE-NEXT: andl $3, %edi
517 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
518 ; SSE-NEXT: movl -24(%rsp,%rdi,4), %eax
521 ; AVX-LABEL: extractelement_v4i32_var:
523 ; AVX-NEXT: andl $3, %edi
524 ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
525 ; AVX-NEXT: movl -24(%rsp,%rdi,4), %eax
527 %b = extractelement <4 x i32> %a, i256 %i
531 define i32 @extractelement_v8i32_var(<8 x i32> %a, i256 %i) nounwind {
532 ; SSE-LABEL: extractelement_v8i32_var:
534 ; SSE-NEXT: andl $7, %edi
535 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
536 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
537 ; SSE-NEXT: movl -40(%rsp,%rdi,4), %eax
540 ; AVX-LABEL: extractelement_v8i32_var:
542 ; AVX-NEXT: pushq %rbp
543 ; AVX-NEXT: movq %rsp, %rbp
544 ; AVX-NEXT: andq $-32, %rsp
545 ; AVX-NEXT: subq $64, %rsp
546 ; AVX-NEXT: andl $7, %edi
547 ; AVX-NEXT: vmovaps %ymm0, (%rsp)
548 ; AVX-NEXT: movl (%rsp,%rdi,4), %eax
549 ; AVX-NEXT: movq %rbp, %rsp
550 ; AVX-NEXT: popq %rbp
551 ; AVX-NEXT: vzeroupper
553 %b = extractelement <8 x i32> %a, i256 %i
557 define i64 @extractelement_v2i64_var(<2 x i64> %a, i256 %i) nounwind {
558 ; SSE-LABEL: extractelement_v2i64_var:
560 ; SSE-NEXT: andl $1, %edi
561 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
562 ; SSE-NEXT: movq -24(%rsp,%rdi,8), %rax
565 ; AVX-LABEL: extractelement_v2i64_var:
567 ; AVX-NEXT: andl $1, %edi
568 ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
569 ; AVX-NEXT: movq -24(%rsp,%rdi,8), %rax
571 %b = extractelement <2 x i64> %a, i256 %i
575 define i64 @extractelement_v4i64_var(<4 x i64> %a, i256 %i) nounwind {
576 ; SSE-LABEL: extractelement_v4i64_var:
578 ; SSE-NEXT: andl $3, %edi
579 ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
580 ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
581 ; SSE-NEXT: movq -40(%rsp,%rdi,8), %rax
584 ; AVX-LABEL: extractelement_v4i64_var:
586 ; AVX-NEXT: pushq %rbp
587 ; AVX-NEXT: movq %rsp, %rbp
588 ; AVX-NEXT: andq $-32, %rsp
589 ; AVX-NEXT: subq $64, %rsp
590 ; AVX-NEXT: andl $3, %edi
591 ; AVX-NEXT: vmovaps %ymm0, (%rsp)
592 ; AVX-NEXT: movq (%rsp,%rdi,8), %rax
593 ; AVX-NEXT: movq %rbp, %rsp
594 ; AVX-NEXT: popq %rbp
595 ; AVX-NEXT: vzeroupper
597 %b = extractelement <4 x i64> %a, i256 %i
602 ; ExtractElement - Constant (Out Of Range) Index
605 define i8 @extractelement_32i8_m1(<32 x i8> %a) nounwind {
606 ; SSE-LABEL: extractelement_32i8_m1:
610 ; AVX-LABEL: extractelement_32i8_m1:
613 %b = extractelement <32 x i8> %a, i256 -1
617 define i16 @extractelement_v16i16_m4(<16 x i16> %a, i256 %i) nounwind {
618 ; SSE-LABEL: extractelement_v16i16_m4:
622 ; AVX-LABEL: extractelement_v16i16_m4:
625 %b = extractelement <16 x i16> %a, i256 -4
629 define i32 @extractelement_v8i32_15(<8 x i32> %a) nounwind {
630 ; SSE-LABEL: extractelement_v8i32_15:
634 ; AVX-LABEL: extractelement_v8i32_15:
637 %b = extractelement <8 x i32> %a, i64 15
641 define i64 @extractelement_v4i64_4(<4 x i64> %a, i256 %i) nounwind {
642 ; SSE-LABEL: extractelement_v4i64_4:
646 ; AVX-LABEL: extractelement_v4i64_4:
649 %b = extractelement <4 x i64> %a, i256 4