1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X64
5 ; There are no MMX operations in @t1
7 define void @t1(i32 %a, x86_mmx* %P) nounwind {
10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
11 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
12 ; X32-NEXT: shll $12, %ecx
13 ; X32-NEXT: movd %ecx, %xmm0
14 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
15 ; X32-NEXT: movq %xmm0, (%eax)
20 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
21 ; X64-NEXT: shll $12, %edi
22 ; X64-NEXT: movq %rdi, %xmm0
23 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
24 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
25 ; X64-NEXT: movq %xmm0, (%rsi)
27 %tmp12 = shl i32 %a, 12
28 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
29 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
30 %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
31 store x86_mmx %tmp23, x86_mmx* %P
35 define <4 x float> @t2(<4 x float>* %P) nounwind {
38 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
39 ; X32-NEXT: movaps (%eax), %xmm1
40 ; X32-NEXT: xorps %xmm0, %xmm0
41 ; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
42 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
47 ; X64-NEXT: movaps (%rdi), %xmm1
48 ; X64-NEXT: xorps %xmm0, %xmm0
49 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
50 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
52 %tmp1 = load <4 x float>, <4 x float>* %P
53 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
57 define <4 x float> @t3(<4 x float>* %P) nounwind {
60 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
61 ; X32-NEXT: xorps %xmm0, %xmm0
62 ; X32-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
67 ; X64-NEXT: xorps %xmm0, %xmm0
68 ; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
70 %tmp1 = load <4 x float>, <4 x float>* %P
71 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
75 define <4 x float> @t4(<4 x float>* %P) nounwind {
78 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
79 ; X32-NEXT: movaps (%eax), %xmm0
80 ; X32-NEXT: xorps %xmm1, %xmm1
81 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
82 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
87 ; X64-NEXT: movaps (%rdi), %xmm0
88 ; X64-NEXT: xorps %xmm1, %xmm1
89 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
90 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
92 %tmp1 = load <4 x float>, <4 x float>* %P
93 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
97 define <16 x i8> @t5(<16 x i8> %x) nounwind {
100 ; X32-NEXT: psrlw $8, %xmm0
105 ; X64-NEXT: psrlw $8, %xmm0
107 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
111 define <16 x i8> @t6(<16 x i8> %x) nounwind {
114 ; X32-NEXT: psrlw $8, %xmm0
119 ; X64-NEXT: psrlw $8, %xmm0
121 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
125 define <16 x i8> @t7(<16 x i8> %x) nounwind {
128 ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
133 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
135 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
139 define <16 x i8> @t8(<16 x i8> %x) nounwind {
142 ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
147 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
149 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
153 define <16 x i8> @t9(<16 x i8> %x) nounwind {
156 ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
161 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
163 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>