1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
6 ; Test ADDSUB ISel patterns.
8 ; Functions below are obtained from the following source:
10 ; typedef double double2 __attribute__((ext_vector_type(2)));
11 ; typedef double double4 __attribute__((ext_vector_type(4)));
12 ; typedef float float4 __attribute__((ext_vector_type(4)));
13 ; typedef float float8 __attribute__((ext_vector_type(8)));
15 ; float4 test1(float4 A, float4 B) {
18 ; return (float4){X[0], Y[1], X[2], Y[3]};
21 ; float8 test2(float8 A, float8 B) {
24 ; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
27 ; double4 test3(double4 A, double4 B) {
30 ; return (double4){X[0], Y[1], X[2], Y[3]};
33 ; double2 test4(double2 A, double2 B) {
36 ; return (double2){X[0], Y[1]};
39 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
42 ; SSE-NEXT: addsubps %xmm1, %xmm0
47 ; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
49 %sub = fsub <4 x float> %A, %B
50 %add = fadd <4 x float> %A, %B
51 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
52 ret <4 x float> %vecinit6
55 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
58 ; SSE-NEXT: addsubps %xmm2, %xmm0
59 ; SSE-NEXT: addsubps %xmm3, %xmm1
64 ; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
66 %sub = fsub <8 x float> %A, %B
67 %add = fadd <8 x float> %A, %B
68 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
69 ret <8 x float> %vecinit14
72 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
75 ; SSE-NEXT: addsubpd %xmm2, %xmm0
76 ; SSE-NEXT: addsubpd %xmm3, %xmm1
81 ; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
83 %sub = fsub <4 x double> %A, %B
84 %add = fadd <4 x double> %A, %B
85 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
86 ret <4 x double> %vecinit6
89 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
92 ; SSE-NEXT: addsubpd %xmm1, %xmm0
97 ; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
99 %add = fadd <2 x double> %A, %B
100 %sub = fsub <2 x double> %A, %B
101 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
102 ret <2 x double> %vecinit2
105 define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
108 ; SSE-NEXT: addsubps %xmm4, %xmm0
109 ; SSE-NEXT: addsubps %xmm5, %xmm1
110 ; SSE-NEXT: addsubps %xmm6, %xmm2
111 ; SSE-NEXT: addsubps %xmm7, %xmm3
116 ; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0
117 ; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1
120 ; AVX512-LABEL: test5:
122 ; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2
123 ; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA
124 ; AVX512-NEXT: kmovw %eax, %k1
125 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
126 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
128 %add = fadd <16 x float> %A, %B
129 %sub = fsub <16 x float> %A, %B
130 %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
131 ret <16 x float> %vecinit2
134 define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
137 ; SSE-NEXT: addsubpd %xmm4, %xmm0
138 ; SSE-NEXT: addsubpd %xmm5, %xmm1
139 ; SSE-NEXT: addsubpd %xmm6, %xmm2
140 ; SSE-NEXT: addsubpd %xmm7, %xmm3
145 ; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0
146 ; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1
149 ; AVX512-LABEL: test6:
151 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2
152 ; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0
153 ; AVX512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
155 %add = fadd <8 x double> %A, %B
156 %sub = fsub <8 x double> %A, %B
157 %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
158 ret <8 x double> %vecinit2
161 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
164 ; SSE-NEXT: addsubps (%rdi), %xmm0
169 ; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0
171 %1 = load <4 x float>, <4 x float>* %B
172 %add = fadd <4 x float> %A, %1
173 %sub = fsub <4 x float> %A, %1
174 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
175 ret <4 x float> %vecinit6
178 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
181 ; SSE-NEXT: addsubps (%rdi), %xmm0
182 ; SSE-NEXT: addsubps 16(%rdi), %xmm1
187 ; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0
189 %1 = load <8 x float>, <8 x float>* %B
190 %add = fadd <8 x float> %A, %1
191 %sub = fsub <8 x float> %A, %1
192 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
193 ret <8 x float> %vecinit14
196 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
199 ; SSE-NEXT: addsubpd (%rdi), %xmm0
200 ; SSE-NEXT: addsubpd 16(%rdi), %xmm1
205 ; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0
207 %1 = load <4 x double>, <4 x double>* %B
208 %add = fadd <4 x double> %A, %1
209 %sub = fsub <4 x double> %A, %1
210 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
211 ret <4 x double> %vecinit6
214 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
217 ; SSE-NEXT: addsubpd (%rdi), %xmm0
222 ; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0
224 %1 = load <2 x double>, <2 x double>* %B
225 %sub = fsub <2 x double> %A, %1
226 %add = fadd <2 x double> %A, %1
227 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
228 ret <2 x double> %vecinit2
231 define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
234 ; SSE-NEXT: addsubps (%rdi), %xmm0
239 ; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0
241 %1 = load <4 x float>, <4 x float>* %B
242 %add = fadd <4 x float> %A, %1
243 %sub = fsub <4 x float> %A, %1
244 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
245 ret <4 x float> %vecinit6
248 define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
251 ; SSE-NEXT: addsubps (%rdi), %xmm0
252 ; SSE-NEXT: addsubps 16(%rdi), %xmm1
257 ; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0
259 %1 = load <8 x float>, <8 x float>* %B
260 %add = fadd <8 x float> %A, %1
261 %sub = fsub <8 x float> %A, %1
262 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
263 ret <8 x float> %vecinit14
266 define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
269 ; SSE-NEXT: addsubpd (%rdi), %xmm0
270 ; SSE-NEXT: addsubpd 16(%rdi), %xmm1
275 ; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0
277 %1 = load <4 x double>, <4 x double>* %B
278 %add = fadd <4 x double> %A, %1
279 %sub = fsub <4 x double> %A, %1
280 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
281 ret <4 x double> %vecinit6
284 define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
287 ; SSE-NEXT: addsubpd (%rdi), %xmm0
292 ; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0
294 %1 = load <2 x double>, <2 x double>* %B
295 %sub = fsub <2 x double> %A, %1
296 %add = fadd <2 x double> %A, %1
297 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
298 ret <2 x double> %vecinit2