1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN
6 ; There are no MMX operations in @t1
8 define void @t1(i32 %a, ptr %P) nounwind {
11 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
13 ; X86-NEXT: shll $12, %ecx
14 ; X86-NEXT: movd %ecx, %xmm0
15 ; X86-NEXT: psllq $32, %xmm0
16 ; X86-NEXT: movq %xmm0, (%eax)
21 ; X64-NEXT: shll $12, %edi
22 ; X64-NEXT: movd %edi, %xmm0
23 ; X64-NEXT: psllq $32, %xmm0
24 ; X64-NEXT: movq %xmm0, (%rsi)
26 %tmp12 = shl i32 %a, 12
27 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
28 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
29 %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
30 store x86_mmx %tmp23, ptr %P
34 define <4 x float> @t2(ptr %P) nounwind {
37 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
38 ; X86-NEXT: xorps %xmm0, %xmm0
39 ; X86-NEXT: xorps %xmm1, %xmm1
40 ; X86-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
41 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
46 ; X64-NEXT: xorps %xmm0, %xmm0
47 ; X64-NEXT: xorps %xmm1, %xmm1
48 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
49 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
51 %tmp1 = load <4 x float>, ptr %P
52 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
56 define <4 x float> @t3(ptr %P) nounwind {
59 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
60 ; X86-NEXT: xorps %xmm0, %xmm0
61 ; X86-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
66 ; X64-NEXT: xorps %xmm0, %xmm0
67 ; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
69 %tmp1 = load <4 x float>, ptr %P
70 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
74 define <4 x float> @t4(ptr %P) nounwind {
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
78 ; X86-NEXT: xorps %xmm1, %xmm1
79 ; X86-NEXT: xorps %xmm0, %xmm0
80 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
81 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
86 ; X64-NEXT: xorps %xmm1, %xmm1
87 ; X64-NEXT: xorps %xmm0, %xmm0
88 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
89 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
91 %tmp1 = load <4 x float>, ptr %P
92 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
96 define <4 x float> @t4_under_aligned(ptr %P) nounwind {
97 ; X86-LABEL: t4_under_aligned:
99 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
100 ; X86-NEXT: movups (%eax), %xmm0
101 ; X86-NEXT: xorps %xmm1, %xmm1
102 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
103 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
106 ; ALIGN-LABEL: t4_under_aligned:
108 ; ALIGN-NEXT: movups (%rdi), %xmm0
109 ; ALIGN-NEXT: xorps %xmm1, %xmm1
110 ; ALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
111 ; ALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
114 ; UNALIGN-LABEL: t4_under_aligned:
116 ; UNALIGN-NEXT: xorps %xmm1, %xmm1
117 ; UNALIGN-NEXT: xorps %xmm0, %xmm0
118 ; UNALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
119 ; UNALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
121 %tmp1 = load <4 x float>, ptr %P, align 4
122 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
123 ret <4 x float> %tmp2
126 define <16 x i8> @t5(<16 x i8> %x) nounwind {
129 ; X86-NEXT: psrlw $8, %xmm0
134 ; X64-NEXT: psrlw $8, %xmm0
136 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
140 define <16 x i8> @t6(<16 x i8> %x) nounwind {
143 ; X86-NEXT: psrlw $8, %xmm0
148 ; X64-NEXT: psrlw $8, %xmm0
150 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
154 define <16 x i8> @t7(<16 x i8> %x) nounwind {
157 ; X86-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
162 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
164 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
168 define <16 x i8> @t8(<16 x i8> %x) nounwind {
171 ; X86-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
176 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
178 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
182 define <16 x i8> @t9(<16 x i8> %x) nounwind {
185 ; X86-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
190 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
192 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>