1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 | FileCheck %s
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
5 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
6 ; single paddd instruction. At the moment we produce the sequence
7 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
9 define double @test1(double %A) {
12 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
13 ; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
14 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
17 ; CHECK-WIDE-LABEL: test1:
18 ; CHECK-WIDE: # %bb.0:
19 ; CHECK-WIDE-NEXT: paddd {{.*}}(%rip), %xmm0
20 ; CHECK-WIDE-NEXT: retq
21 %1 = bitcast double %A to <2 x i32>
22 %add = add <2 x i32> %1, <i32 3, i32 5>
23 %2 = bitcast <2 x i32> %add to double
27 define double @test2(double %A, double %B) {
30 ; CHECK-NEXT: paddd %xmm1, %xmm0
33 ; CHECK-WIDE-LABEL: test2:
34 ; CHECK-WIDE: # %bb.0:
35 ; CHECK-WIDE-NEXT: paddd %xmm1, %xmm0
36 ; CHECK-WIDE-NEXT: retq
37 %1 = bitcast double %A to <2 x i32>
38 %2 = bitcast double %B to <2 x i32>
39 %add = add <2 x i32> %1, %2
40 %3 = bitcast <2 x i32> %add to double
44 define i64 @test3(i64 %A) {
47 ; CHECK-NEXT: movq %rdi, %xmm0
48 ; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
49 ; CHECK-NEXT: movq %xmm0, %rax
52 ; CHECK-WIDE-LABEL: test3:
53 ; CHECK-WIDE: # %bb.0:
54 ; CHECK-WIDE-NEXT: movq %rdi, %xmm0
55 ; CHECK-WIDE-NEXT: addps {{.*}}(%rip), %xmm0
56 ; CHECK-WIDE-NEXT: movq %xmm0, %rax
57 ; CHECK-WIDE-NEXT: retq
58 %1 = bitcast i64 %A to <2 x float>
59 %add = fadd <2 x float> %1, <float 3.0, float 5.0>
60 %2 = bitcast <2 x float> %add to i64
64 ; FIXME: Ideally we should be able to fold the entire body of @test4 into a
65 ; single paddd instruction. This is fixed with the widening legalization.
67 define i64 @test4(i64 %A) {
70 ; CHECK-NEXT: movq %rdi, %xmm0
71 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
72 ; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
73 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
74 ; CHECK-NEXT: movq %xmm0, %rax
77 ; CHECK-WIDE-LABEL: test4:
78 ; CHECK-WIDE: # %bb.0:
79 ; CHECK-WIDE-NEXT: movq %rdi, %xmm0
80 ; CHECK-WIDE-NEXT: paddd {{.*}}(%rip), %xmm0
81 ; CHECK-WIDE-NEXT: movq %xmm0, %rax
82 ; CHECK-WIDE-NEXT: retq
83 %1 = bitcast i64 %A to <2 x i32>
84 %add = add <2 x i32> %1, <i32 3, i32 5>
85 %2 = bitcast <2 x i32> %add to i64
89 define double @test5(double %A) {
92 ; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
95 ; CHECK-WIDE-LABEL: test5:
96 ; CHECK-WIDE: # %bb.0:
97 ; CHECK-WIDE-NEXT: addps {{.*}}(%rip), %xmm0
98 ; CHECK-WIDE-NEXT: retq
99 %1 = bitcast double %A to <2 x float>
100 %add = fadd <2 x float> %1, <float 3.0, float 5.0>
101 %2 = bitcast <2 x float> %add to double
105 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
106 ; single paddw instruction. This is fixed with the widening legalization.
108 define double @test6(double %A) {
109 ; CHECK-LABEL: test6:
111 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
112 ; CHECK-NEXT: paddw {{.*}}(%rip), %xmm0
113 ; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
116 ; CHECK-WIDE-LABEL: test6:
117 ; CHECK-WIDE: # %bb.0:
118 ; CHECK-WIDE-NEXT: paddw {{.*}}(%rip), %xmm0
119 ; CHECK-WIDE-NEXT: retq
120 %1 = bitcast double %A to <4 x i16>
121 %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
122 %2 = bitcast <4 x i16> %add to double
126 define double @test7(double %A, double %B) {
127 ; CHECK-LABEL: test7:
129 ; CHECK-NEXT: paddw %xmm1, %xmm0
132 ; CHECK-WIDE-LABEL: test7:
133 ; CHECK-WIDE: # %bb.0:
134 ; CHECK-WIDE-NEXT: paddw %xmm1, %xmm0
135 ; CHECK-WIDE-NEXT: retq
136 %1 = bitcast double %A to <4 x i16>
137 %2 = bitcast double %B to <4 x i16>
138 %add = add <4 x i16> %1, %2
139 %3 = bitcast <4 x i16> %add to double
143 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
144 ; single paddb instruction. At the moment we produce the sequence
145 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
147 define double @test8(double %A) {
148 ; CHECK-LABEL: test8:
150 ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
151 ; CHECK-NEXT: paddb {{.*}}(%rip), %xmm0
152 ; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
155 ; CHECK-WIDE-LABEL: test8:
156 ; CHECK-WIDE: # %bb.0:
157 ; CHECK-WIDE-NEXT: paddb {{.*}}(%rip), %xmm0
158 ; CHECK-WIDE-NEXT: retq
159 %1 = bitcast double %A to <8 x i8>
160 %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
161 %2 = bitcast <8 x i8> %add to double
165 define double @test9(double %A, double %B) {
166 ; CHECK-LABEL: test9:
168 ; CHECK-NEXT: paddb %xmm1, %xmm0
171 ; CHECK-WIDE-LABEL: test9:
172 ; CHECK-WIDE: # %bb.0:
173 ; CHECK-WIDE-NEXT: paddb %xmm1, %xmm0
174 ; CHECK-WIDE-NEXT: retq
175 %1 = bitcast double %A to <8 x i8>
176 %2 = bitcast double %B to <8 x i8>
177 %add = add <8 x i8> %1, %2
178 %3 = bitcast <8 x i8> %add to double