1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
13 define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
14 ; CHECK-LABEL: @test_v2f64(
15 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
16 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
17 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
18 ; CHECK-NEXT: ret <2 x double> [[TMP3]]
20 %a0 = extractelement <2 x double> %a, i32 0
21 %a1 = extractelement <2 x double> %a, i32 1
22 %b0 = extractelement <2 x double> %b, i32 0
23 %b1 = extractelement <2 x double> %b, i32 1
24 %r0 = fadd double %a0, %a1
25 %r1 = fadd double %b0, %b1
26 %r00 = insertelement <2 x double> undef, double %r0, i32 0
27 %r01 = insertelement <2 x double> %r00, double %r1, i32 1
31 define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
32 ; CHECK-LABEL: @test_v4f32(
33 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
34 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
35 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
36 ; CHECK-NEXT: ret <4 x float> [[TMP3]]
38 %a0 = extractelement <4 x float> %a, i32 0
39 %a1 = extractelement <4 x float> %a, i32 1
40 %a2 = extractelement <4 x float> %a, i32 2
41 %a3 = extractelement <4 x float> %a, i32 3
42 %b0 = extractelement <4 x float> %b, i32 0
43 %b1 = extractelement <4 x float> %b, i32 1
44 %b2 = extractelement <4 x float> %b, i32 2
45 %b3 = extractelement <4 x float> %b, i32 3
46 %r0 = fadd float %a0, %a1
47 %r1 = fadd float %a2, %a3
48 %r2 = fadd float %b0, %b1
49 %r3 = fadd float %b2, %b3
50 %r00 = insertelement <4 x float> undef, float %r0, i32 0
51 %r01 = insertelement <4 x float> %r00, float %r1, i32 1
52 %r02 = insertelement <4 x float> %r01, float %r2, i32 2
53 %r03 = insertelement <4 x float> %r02, float %r3, i32 3
57 define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
58 ; CHECK-LABEL: @test_v2i64(
59 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2>
60 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
61 ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
62 ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
64 %a0 = extractelement <2 x i64> %a, i32 0
65 %a1 = extractelement <2 x i64> %a, i32 1
66 %b0 = extractelement <2 x i64> %b, i32 0
67 %b1 = extractelement <2 x i64> %b, i32 1
68 %r0 = add i64 %a0, %a1
69 %r1 = add i64 %b0, %b1
70 %r00 = insertelement <2 x i64> undef, i64 %r0, i32 0
71 %r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1
75 define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
76 ; CHECK-LABEL: @test_v4i32(
77 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
78 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
79 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
80 ; CHECK-NEXT: ret <4 x i32> [[TMP3]]
82 %a0 = extractelement <4 x i32> %a, i32 0
83 %a1 = extractelement <4 x i32> %a, i32 1
84 %a2 = extractelement <4 x i32> %a, i32 2
85 %a3 = extractelement <4 x i32> %a, i32 3
86 %b0 = extractelement <4 x i32> %b, i32 0
87 %b1 = extractelement <4 x i32> %b, i32 1
88 %b2 = extractelement <4 x i32> %b, i32 2
89 %b3 = extractelement <4 x i32> %b, i32 3
90 %r0 = add i32 %a0, %a1
91 %r1 = add i32 %a2, %a3
92 %r2 = add i32 %b0, %b1
93 %r3 = add i32 %b2, %b3
94 %r00 = insertelement <4 x i32> undef, i32 %r0, i32 0
95 %r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1
96 %r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2
97 %r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3
101 define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
102 ; CHECK-LABEL: @test_v8i16(
103 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
104 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
105 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
106 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
108 %a0 = extractelement <8 x i16> %a, i32 0
109 %a1 = extractelement <8 x i16> %a, i32 1
110 %a2 = extractelement <8 x i16> %a, i32 2
111 %a3 = extractelement <8 x i16> %a, i32 3
112 %a4 = extractelement <8 x i16> %a, i32 4
113 %a5 = extractelement <8 x i16> %a, i32 5
114 %a6 = extractelement <8 x i16> %a, i32 6
115 %a7 = extractelement <8 x i16> %a, i32 7
116 %b0 = extractelement <8 x i16> %b, i32 0
117 %b1 = extractelement <8 x i16> %b, i32 1
118 %b2 = extractelement <8 x i16> %b, i32 2
119 %b3 = extractelement <8 x i16> %b, i32 3
120 %b4 = extractelement <8 x i16> %b, i32 4
121 %b5 = extractelement <8 x i16> %b, i32 5
122 %b6 = extractelement <8 x i16> %b, i32 6
123 %b7 = extractelement <8 x i16> %b, i32 7
124 %r0 = add i16 %a0, %a1
125 %r1 = add i16 %a2, %a3
126 %r2 = add i16 %a4, %a5
127 %r3 = add i16 %a6, %a7
128 %r4 = add i16 %b0, %b1
129 %r5 = add i16 %b2, %b3
130 %r6 = add i16 %b4, %b5
131 %r7 = add i16 %b6, %b7
132 %r00 = insertelement <8 x i16> undef, i16 %r0, i32 0
133 %r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1
134 %r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2
135 %r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3
136 %r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4
137 %r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5
138 %r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6
139 %r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7
147 define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
148 ; SSE-LABEL: @test_v4f64(
149 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
150 ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
151 ; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
152 ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
153 ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
154 ; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
155 ; SSE-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
156 ; SSE-NEXT: ret <4 x double> [[R032]]
158 ; SLM-LABEL: @test_v4f64(
159 ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
160 ; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
161 ; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
162 ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
163 ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
164 ; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
165 ; SLM-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
166 ; SLM-NEXT: ret <4 x double> [[R032]]
168 ; AVX-LABEL: @test_v4f64(
169 ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
170 ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
171 ; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
172 ; AVX-NEXT: ret <4 x double> [[TMP3]]
174 %a0 = extractelement <4 x double> %a, i32 0
175 %a1 = extractelement <4 x double> %a, i32 1
176 %a2 = extractelement <4 x double> %a, i32 2
177 %a3 = extractelement <4 x double> %a, i32 3
178 %b0 = extractelement <4 x double> %b, i32 0
179 %b1 = extractelement <4 x double> %b, i32 1
180 %b2 = extractelement <4 x double> %b, i32 2
181 %b3 = extractelement <4 x double> %b, i32 3
182 %r0 = fadd double %a0, %a1
183 %r1 = fadd double %b0, %b1
184 %r2 = fadd double %a2, %a3
185 %r3 = fadd double %b2, %b3
186 %r00 = insertelement <4 x double> undef, double %r0, i32 0
187 %r01 = insertelement <4 x double> %r00, double %r1, i32 1
188 %r02 = insertelement <4 x double> %r01, double %r2, i32 2
189 %r03 = insertelement <4 x double> %r02, double %r3, i32 3
190 ret <4 x double> %r03
193 define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
194 ; SSE-LABEL: @test_v8f32(
195 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
196 ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
197 ; SSE-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
198 ; SSE-NEXT: ret <8 x float> [[TMP3]]
200 ; SLM-LABEL: @test_v8f32(
201 ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
202 ; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
203 ; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
204 ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
205 ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
206 ; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
207 ; SLM-NEXT: [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
208 ; SLM-NEXT: ret <8 x float> [[R072]]
210 ; AVX-LABEL: @test_v8f32(
211 ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
212 ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
213 ; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
214 ; AVX-NEXT: ret <8 x float> [[TMP3]]
216 %a0 = extractelement <8 x float> %a, i32 0
217 %a1 = extractelement <8 x float> %a, i32 1
218 %a2 = extractelement <8 x float> %a, i32 2
219 %a3 = extractelement <8 x float> %a, i32 3
220 %a4 = extractelement <8 x float> %a, i32 4
221 %a5 = extractelement <8 x float> %a, i32 5
222 %a6 = extractelement <8 x float> %a, i32 6
223 %a7 = extractelement <8 x float> %a, i32 7
224 %b0 = extractelement <8 x float> %b, i32 0
225 %b1 = extractelement <8 x float> %b, i32 1
226 %b2 = extractelement <8 x float> %b, i32 2
227 %b3 = extractelement <8 x float> %b, i32 3
228 %b4 = extractelement <8 x float> %b, i32 4
229 %b5 = extractelement <8 x float> %b, i32 5
230 %b6 = extractelement <8 x float> %b, i32 6
231 %b7 = extractelement <8 x float> %b, i32 7
232 %r0 = fadd float %a0, %a1
233 %r1 = fadd float %a2, %a3
234 %r2 = fadd float %b0, %b1
235 %r3 = fadd float %b2, %b3
236 %r4 = fadd float %a4, %a5
237 %r5 = fadd float %a6, %a7
238 %r6 = fadd float %b4, %b5
239 %r7 = fadd float %b6, %b7
240 %r00 = insertelement <8 x float> undef, float %r0, i32 0
241 %r01 = insertelement <8 x float> %r00, float %r1, i32 1
242 %r02 = insertelement <8 x float> %r01, float %r2, i32 2
243 %r03 = insertelement <8 x float> %r02, float %r3, i32 3
244 %r04 = insertelement <8 x float> %r03, float %r4, i32 4
245 %r05 = insertelement <8 x float> %r04, float %r5, i32 5
246 %r06 = insertelement <8 x float> %r05, float %r6, i32 6
247 %r07 = insertelement <8 x float> %r06, float %r7, i32 7
251 define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
252 ; CHECK-LABEL: @test_v4i64(
253 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
254 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
255 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
256 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
258 %a0 = extractelement <4 x i64> %a, i32 0
259 %a1 = extractelement <4 x i64> %a, i32 1
260 %a2 = extractelement <4 x i64> %a, i32 2
261 %a3 = extractelement <4 x i64> %a, i32 3
262 %b0 = extractelement <4 x i64> %b, i32 0
263 %b1 = extractelement <4 x i64> %b, i32 1
264 %b2 = extractelement <4 x i64> %b, i32 2
265 %b3 = extractelement <4 x i64> %b, i32 3
266 %r0 = add i64 %a0, %a1
267 %r1 = add i64 %b0, %b1
268 %r2 = add i64 %a2, %a3
269 %r3 = add i64 %b2, %b3
270 %r00 = insertelement <4 x i64> undef, i64 %r0, i32 0
271 %r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1
272 %r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2
273 %r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3
277 define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
278 ; CHECK-LABEL: @test_v8i32(
279 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
280 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
281 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
282 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
284 %a0 = extractelement <8 x i32> %a, i32 0
285 %a1 = extractelement <8 x i32> %a, i32 1
286 %a2 = extractelement <8 x i32> %a, i32 2
287 %a3 = extractelement <8 x i32> %a, i32 3
288 %a4 = extractelement <8 x i32> %a, i32 4
289 %a5 = extractelement <8 x i32> %a, i32 5
290 %a6 = extractelement <8 x i32> %a, i32 6
291 %a7 = extractelement <8 x i32> %a, i32 7
292 %b0 = extractelement <8 x i32> %b, i32 0
293 %b1 = extractelement <8 x i32> %b, i32 1
294 %b2 = extractelement <8 x i32> %b, i32 2
295 %b3 = extractelement <8 x i32> %b, i32 3
296 %b4 = extractelement <8 x i32> %b, i32 4
297 %b5 = extractelement <8 x i32> %b, i32 5
298 %b6 = extractelement <8 x i32> %b, i32 6
299 %b7 = extractelement <8 x i32> %b, i32 7
300 %r0 = add i32 %a0, %a1
301 %r1 = add i32 %a2, %a3
302 %r2 = add i32 %b0, %b1
303 %r3 = add i32 %b2, %b3
304 %r4 = add i32 %a4, %a5
305 %r5 = add i32 %a6, %a7
306 %r6 = add i32 %b4, %b5
307 %r7 = add i32 %b6, %b7
308 %r00 = insertelement <8 x i32> undef, i32 %r0, i32 0
309 %r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1
310 %r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2
311 %r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3
312 %r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4
313 %r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5
314 %r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6
315 %r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7
319 define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
320 ; SSE-LABEL: @test_v16i16(
321 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22>
322 ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23>
323 ; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
324 ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
325 ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
326 ; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
327 ; SSE-NEXT: [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
328 ; SSE-NEXT: ret <16 x i16> [[RV152]]
330 ; SLM-LABEL: @test_v16i16(
331 ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
332 ; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
333 ; SLM-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
334 ; SLM-NEXT: ret <16 x i16> [[TMP3]]
336 ; AVX-LABEL: @test_v16i16(
337 ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
338 ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
339 ; AVX-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
340 ; AVX-NEXT: ret <16 x i16> [[TMP3]]
342 %a0 = extractelement <16 x i16> %a, i32 0
343 %a1 = extractelement <16 x i16> %a, i32 1
344 %a2 = extractelement <16 x i16> %a, i32 2
345 %a3 = extractelement <16 x i16> %a, i32 3
346 %a4 = extractelement <16 x i16> %a, i32 4
347 %a5 = extractelement <16 x i16> %a, i32 5
348 %a6 = extractelement <16 x i16> %a, i32 6
349 %a7 = extractelement <16 x i16> %a, i32 7
350 %a8 = extractelement <16 x i16> %a, i32 8
351 %a9 = extractelement <16 x i16> %a, i32 9
352 %a10 = extractelement <16 x i16> %a, i32 10
353 %a11 = extractelement <16 x i16> %a, i32 11
354 %a12 = extractelement <16 x i16> %a, i32 12
355 %a13 = extractelement <16 x i16> %a, i32 13
356 %a14 = extractelement <16 x i16> %a, i32 14
357 %a15 = extractelement <16 x i16> %a, i32 15
358 %b0 = extractelement <16 x i16> %b, i32 0
359 %b1 = extractelement <16 x i16> %b, i32 1
360 %b2 = extractelement <16 x i16> %b, i32 2
361 %b3 = extractelement <16 x i16> %b, i32 3
362 %b4 = extractelement <16 x i16> %b, i32 4
363 %b5 = extractelement <16 x i16> %b, i32 5
364 %b6 = extractelement <16 x i16> %b, i32 6
365 %b7 = extractelement <16 x i16> %b, i32 7
366 %b8 = extractelement <16 x i16> %b, i32 8
367 %b9 = extractelement <16 x i16> %b, i32 9
368 %b10 = extractelement <16 x i16> %b, i32 10
369 %b11 = extractelement <16 x i16> %b, i32 11
370 %b12 = extractelement <16 x i16> %b, i32 12
371 %b13 = extractelement <16 x i16> %b, i32 13
372 %b14 = extractelement <16 x i16> %b, i32 14
373 %b15 = extractelement <16 x i16> %b, i32 15
374 %r0 = add i16 %a0 , %a1
375 %r1 = add i16 %a2 , %a3
376 %r2 = add i16 %a4 , %a5
377 %r3 = add i16 %a6 , %a7
378 %r4 = add i16 %b0 , %b1
379 %r5 = add i16 %b2 , %b3
380 %r6 = add i16 %b4 , %b5
381 %r7 = add i16 %b6 , %b7
382 %r8 = add i16 %a8 , %a9
383 %r9 = add i16 %a10, %a11
384 %r10 = add i16 %a12, %a13
385 %r11 = add i16 %a14, %a15
386 %r12 = add i16 %b8 , %b9
387 %r13 = add i16 %b10, %b11
388 %r14 = add i16 %b12, %b13
389 %r15 = add i16 %b14, %b15
390 %rv0 = insertelement <16 x i16> undef, i16 %r0 , i32 0
391 %rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1
392 %rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2
393 %rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3
394 %rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4
395 %rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5
396 %rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6
397 %rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7
398 %rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8
399 %rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9
400 %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10
401 %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11
402 %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12
403 %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13
404 %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14
405 %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15