1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
5 define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) {
6 ; CHECK-LABEL: extractelt_undef_insertelt:
8 ; CHECK-NEXT: ret{{[l|q]}}
9 %b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3
10 %c = icmp uge i32 %y, %y
11 %d = extractelement <4 x i32> %b, i1 %c
15 define i8 @extractelt_bitcast(i32 %x) nounwind {
16 ; X86-LABEL: extractelt_bitcast:
18 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
21 ; X64-LABEL: extractelt_bitcast:
23 ; X64-NEXT: movl %edi, %eax
24 ; X64-NEXT: # kill: def $al killed $al killed $eax
26 %bc = bitcast i32 %x to <4 x i8>
27 %ext = extractelement <4 x i8> %bc, i32 0
31 ; TODO: This should have folded to avoid vector ops, but the transform
32 ; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU
35 define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
36 ; X86-LABEL: extractelt_bitcast_extra_use:
38 ; X86-NEXT: pushl %eax
39 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
40 ; X86-NEXT: movd %eax, %xmm0
41 ; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
42 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
43 ; X86-NEXT: movl %eax, (%ecx)
44 ; X86-NEXT: movd %xmm0, %eax
45 ; X86-NEXT: # kill: def $al killed $al killed $eax
49 ; X64-LABEL: extractelt_bitcast_extra_use:
51 ; X64-NEXT: movd %edi, %xmm0
52 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
53 ; X64-NEXT: movl %edi, (%rsi)
54 ; X64-NEXT: movd %xmm0, %eax
55 ; X64-NEXT: # kill: def $al killed $al killed $eax
57 %bc = bitcast i32 %x to <4 x i8>
58 store <4 x i8> %bc, <4 x i8>* %p
59 %ext = extractelement <4 x i8> %bc, i32 0
63 define i32 @trunc_i64_to_i32_le(i64 %x) {
64 ; X86-LABEL: trunc_i64_to_i32_le:
66 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
69 ; X64-LABEL: trunc_i64_to_i32_le:
71 ; X64-NEXT: movq %rdi, %rax
72 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
74 %ins = insertelement <2 x i64> undef, i64 %x, i32 0
75 %bc = bitcast <2 x i64> %ins to <4 x i32>
76 %ext = extractelement <4 x i32> %bc, i32 0
80 define i16 @trunc_i64_to_i16_le(i64 %x) {
81 ; X86-LABEL: trunc_i64_to_i16_le:
83 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
84 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
87 ; X64-LABEL: trunc_i64_to_i16_le:
89 ; X64-NEXT: movq %rdi, %rax
90 ; X64-NEXT: # kill: def $ax killed $ax killed $rax
92 %ins = insertelement <2 x i64> undef, i64 %x, i32 0
93 %bc = bitcast <2 x i64> %ins to <8 x i16>
94 %ext = extractelement <8 x i16> %bc, i32 0
98 define i8 @trunc_i32_to_i8_le(i32 %x) {
99 ; X86-LABEL: trunc_i32_to_i8_le:
101 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
104 ; X64-LABEL: trunc_i32_to_i8_le:
106 ; X64-NEXT: movl %edi, %eax
107 ; X64-NEXT: # kill: def $al killed $al killed $eax
109 %ins = insertelement <4 x i32> undef, i32 %x, i32 0
110 %bc = bitcast <4 x i32> %ins to <16 x i8>
111 %ext = extractelement <16 x i8> %bc, i32 0