1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
10 ; CHECK-LABEL: @add_sub_v8i32(
11 ; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
12 ; CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
13 ; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
14 ; CHECK-NEXT: ret <8 x i32> [[R7]]
16 %a0 = extractelement <8 x i32> %a, i32 0
17 %a1 = extractelement <8 x i32> %a, i32 1
18 %a2 = extractelement <8 x i32> %a, i32 2
19 %a3 = extractelement <8 x i32> %a, i32 3
20 %a4 = extractelement <8 x i32> %a, i32 4
21 %a5 = extractelement <8 x i32> %a, i32 5
22 %a6 = extractelement <8 x i32> %a, i32 6
23 %a7 = extractelement <8 x i32> %a, i32 7
24 %b0 = extractelement <8 x i32> %b, i32 0
25 %b1 = extractelement <8 x i32> %b, i32 1
26 %b2 = extractelement <8 x i32> %b, i32 2
27 %b3 = extractelement <8 x i32> %b, i32 3
28 %b4 = extractelement <8 x i32> %b, i32 4
29 %b5 = extractelement <8 x i32> %b, i32 5
30 %b6 = extractelement <8 x i32> %b, i32 6
31 %b7 = extractelement <8 x i32> %b, i32 7
32 %ab0 = add i32 %a0, %b0
33 %ab1 = add i32 %a1, %b1
34 %ab2 = add i32 %a2, %b2
35 %ab3 = add i32 %a3, %b3
36 %ab4 = sub i32 %a4, %b4
37 %ab5 = sub i32 %a5, %b5
38 %ab6 = sub i32 %a6, %b6
39 %ab7 = sub i32 %a7, %b7
40 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
41 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
42 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
43 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
44 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
45 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
46 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
47 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
51 define <4 x i32> @add_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
52 ; CHECK-LABEL: @add_and_v4i32(
53 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
54 ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A]], [[B]]
55 ; CHECK-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
56 ; CHECK-NEXT: ret <4 x i32> [[R3]]
58 %a0 = extractelement <4 x i32> %a, i32 0
59 %a1 = extractelement <4 x i32> %a, i32 1
60 %a2 = extractelement <4 x i32> %a, i32 2
61 %a3 = extractelement <4 x i32> %a, i32 3
62 %b0 = extractelement <4 x i32> %b, i32 0
63 %b1 = extractelement <4 x i32> %b, i32 1
64 %b2 = extractelement <4 x i32> %b, i32 2
65 %b3 = extractelement <4 x i32> %b, i32 3
66 %ab0 = add i32 %a0, %b0
67 %ab1 = add i32 %a1, %b1
68 %ab2 = and i32 %a2, %b2
69 %ab3 = and i32 %a3, %b3
70 %r0 = insertelement <4 x i32> undef, i32 %ab0, i32 0
71 %r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
72 %r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
73 %r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
77 define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
78 ; SSE-LABEL: @add_mul_v4i32(
79 ; SSE-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
80 ; SSE-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
81 ; SSE-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
82 ; SSE-NEXT: ret <4 x i32> [[R3]]
84 ; SLM-LABEL: @add_mul_v4i32(
85 ; SLM-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
86 ; SLM-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
87 ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
88 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
89 ; SLM-NEXT: [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
90 ; SLM-NEXT: [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
91 ; SLM-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
92 ; SLM-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
93 ; SLM-NEXT: [[AB0:%.*]] = mul i32 [[A0]], [[B0]]
94 ; SLM-NEXT: [[AB1:%.*]] = add i32 [[A1]], [[B1]]
95 ; SLM-NEXT: [[AB2:%.*]] = add i32 [[A2]], [[B2]]
96 ; SLM-NEXT: [[AB3:%.*]] = mul i32 [[A3]], [[B3]]
97 ; SLM-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0
98 ; SLM-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1
99 ; SLM-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2
100 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3
101 ; SLM-NEXT: ret <4 x i32> [[R3]]
103 ; AVX-LABEL: @add_mul_v4i32(
104 ; AVX-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
105 ; AVX-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
106 ; AVX-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
107 ; AVX-NEXT: ret <4 x i32> [[R3]]
109 ; AVX512-LABEL: @add_mul_v4i32(
110 ; AVX512-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
111 ; AVX512-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]]
112 ; AVX512-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
113 ; AVX512-NEXT: ret <4 x i32> [[R3]]
115 %a0 = extractelement <4 x i32> %a, i32 0
116 %a1 = extractelement <4 x i32> %a, i32 1
117 %a2 = extractelement <4 x i32> %a, i32 2
118 %a3 = extractelement <4 x i32> %a, i32 3
119 %b0 = extractelement <4 x i32> %b, i32 0
120 %b1 = extractelement <4 x i32> %b, i32 1
121 %b2 = extractelement <4 x i32> %b, i32 2
122 %b3 = extractelement <4 x i32> %b, i32 3
123 %ab0 = mul i32 %a0, %b0
124 %ab1 = add i32 %a1, %b1
125 %ab2 = add i32 %a2, %b2
126 %ab3 = mul i32 %a3, %b3
127 %r0 = insertelement <4 x i32> undef, i32 %ab0, i32 0
128 %r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
129 %r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
130 %r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
134 define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
135 ; SSE-LABEL: @ashr_shl_v8i32(
136 ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
137 ; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
138 ; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
139 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
140 ; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
141 ; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
142 ; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
143 ; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
144 ; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
145 ; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
146 ; SSE-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
147 ; SSE-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
148 ; SSE-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[A]], [[B]]
149 ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
150 ; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
151 ; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
152 ; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
153 ; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[R3]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
154 ; SSE-NEXT: ret <8 x i32> [[R7]]
156 ; SLM-LABEL: @ashr_shl_v8i32(
157 ; SLM-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
158 ; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
159 ; SLM-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
160 ; SLM-NEXT: ret <8 x i32> [[R7]]
162 ; AVX-LABEL: @ashr_shl_v8i32(
163 ; AVX-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
164 ; AVX-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
165 ; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
166 ; AVX-NEXT: ret <8 x i32> [[R7]]
168 ; AVX512-LABEL: @ashr_shl_v8i32(
169 ; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
170 ; AVX512-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
171 ; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
172 ; AVX512-NEXT: ret <8 x i32> [[R7]]
174 %a0 = extractelement <8 x i32> %a, i32 0
175 %a1 = extractelement <8 x i32> %a, i32 1
176 %a2 = extractelement <8 x i32> %a, i32 2
177 %a3 = extractelement <8 x i32> %a, i32 3
178 %a4 = extractelement <8 x i32> %a, i32 4
179 %a5 = extractelement <8 x i32> %a, i32 5
180 %a6 = extractelement <8 x i32> %a, i32 6
181 %a7 = extractelement <8 x i32> %a, i32 7
182 %b0 = extractelement <8 x i32> %b, i32 0
183 %b1 = extractelement <8 x i32> %b, i32 1
184 %b2 = extractelement <8 x i32> %b, i32 2
185 %b3 = extractelement <8 x i32> %b, i32 3
186 %b4 = extractelement <8 x i32> %b, i32 4
187 %b5 = extractelement <8 x i32> %b, i32 5
188 %b6 = extractelement <8 x i32> %b, i32 6
189 %b7 = extractelement <8 x i32> %b, i32 7
190 %ab0 = ashr i32 %a0, %b0
191 %ab1 = ashr i32 %a1, %b1
192 %ab2 = ashr i32 %a2, %b2
193 %ab3 = ashr i32 %a3, %b3
194 %ab4 = shl i32 %a4, %b4
195 %ab5 = shl i32 %a5, %b5
196 %ab6 = shl i32 %a6, %b6
197 %ab7 = shl i32 %a7, %b7
198 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
199 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
200 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
201 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
202 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
203 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
204 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
205 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
209 define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
210 ; SSE-LABEL: @ashr_shl_v8i32_const(
211 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
212 ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
213 ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
214 ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
215 ; SSE-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
216 ; SSE-NEXT: ret <8 x i32> [[R7]]
218 ; SLM-LABEL: @ashr_shl_v8i32_const(
219 ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
220 ; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
221 ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
222 ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
223 ; SLM-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
224 ; SLM-NEXT: ret <8 x i32> [[R7]]
226 ; AVX1-LABEL: @ashr_shl_v8i32_const(
227 ; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
228 ; AVX1-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
229 ; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
230 ; AVX1-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
231 ; AVX1-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
232 ; AVX1-NEXT: ret <8 x i32> [[R7]]
234 ; AVX2-LABEL: @ashr_shl_v8i32_const(
235 ; AVX2-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
236 ; AVX2-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
237 ; AVX2-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
238 ; AVX2-NEXT: ret <8 x i32> [[R7]]
240 ; AVX512-LABEL: @ashr_shl_v8i32_const(
241 ; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
242 ; AVX512-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
243 ; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
244 ; AVX512-NEXT: ret <8 x i32> [[R7]]
246 %a0 = extractelement <8 x i32> %a, i32 0
247 %a1 = extractelement <8 x i32> %a, i32 1
248 %a2 = extractelement <8 x i32> %a, i32 2
249 %a3 = extractelement <8 x i32> %a, i32 3
250 %a4 = extractelement <8 x i32> %a, i32 4
251 %a5 = extractelement <8 x i32> %a, i32 5
252 %a6 = extractelement <8 x i32> %a, i32 6
253 %a7 = extractelement <8 x i32> %a, i32 7
254 %ab0 = ashr i32 %a0, 2
255 %ab1 = ashr i32 %a1, 2
256 %ab2 = ashr i32 %a2, 2
257 %ab3 = ashr i32 %a3, 2
258 %ab4 = shl i32 %a4, 3
259 %ab5 = shl i32 %a5, 3
260 %ab6 = shl i32 %a6, 3
261 %ab7 = shl i32 %a7, 3
262 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
263 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
264 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
265 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
266 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
267 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
268 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
269 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
273 define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
274 ; SSE-LABEL: @ashr_lshr_shl_v8i32(
275 ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
276 ; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
277 ; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
278 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
279 ; SSE-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
280 ; SSE-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
281 ; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
282 ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
283 ; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
284 ; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
285 ; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
286 ; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
287 ; SSE-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
288 ; SSE-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
289 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
290 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
291 ; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
292 ; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
293 ; SSE-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
294 ; SSE-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
295 ; SSE-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
296 ; SSE-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
297 ; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
298 ; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
299 ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
300 ; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
301 ; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
302 ; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
303 ; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
304 ; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
305 ; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
306 ; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
307 ; SSE-NEXT: ret <8 x i32> [[R7]]
309 ; SLM-LABEL: @ashr_lshr_shl_v8i32(
310 ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
311 ; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
312 ; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
313 ; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
314 ; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
315 ; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
316 ; SLM-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
317 ; SLM-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
318 ; SLM-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
319 ; SLM-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
320 ; SLM-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
321 ; SLM-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
322 ; SLM-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
323 ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
324 ; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
325 ; SLM-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
326 ; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
327 ; SLM-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
328 ; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
329 ; SLM-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
330 ; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
331 ; SLM-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
332 ; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
333 ; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
334 ; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
335 ; SLM-NEXT: ret <8 x i32> [[R7]]
337 ; AVX1-LABEL: @ashr_lshr_shl_v8i32(
338 ; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
339 ; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
340 ; AVX1-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
341 ; AVX1-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
342 ; AVX1-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
343 ; AVX1-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
344 ; AVX1-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
345 ; AVX1-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
346 ; AVX1-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
347 ; AVX1-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
348 ; AVX1-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
349 ; AVX1-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
350 ; AVX1-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
351 ; AVX1-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
352 ; AVX1-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
353 ; AVX1-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
354 ; AVX1-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
355 ; AVX1-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
356 ; AVX1-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
357 ; AVX1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
358 ; AVX1-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
359 ; AVX1-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
360 ; AVX1-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
361 ; AVX1-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
362 ; AVX1-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
363 ; AVX1-NEXT: ret <8 x i32> [[R7]]
365 ; AVX2-LABEL: @ashr_lshr_shl_v8i32(
366 ; AVX2-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
367 ; AVX2-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
368 ; AVX2-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
369 ; AVX2-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
370 ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
371 ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
372 ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
373 ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
374 ; AVX2-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]]
375 ; AVX2-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
376 ; AVX2-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
377 ; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
378 ; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0
379 ; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
380 ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1
381 ; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
382 ; AVX2-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2
383 ; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
384 ; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3
385 ; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4
386 ; AVX2-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4
387 ; AVX2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5
388 ; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5
389 ; AVX2-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
390 ; AVX2-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
391 ; AVX2-NEXT: ret <8 x i32> [[R7]]
393 ; AVX512-LABEL: @ashr_lshr_shl_v8i32(
394 ; AVX512-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
395 ; AVX512-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
396 ; AVX512-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
397 ; AVX512-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
398 ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
399 ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
400 ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
401 ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
402 ; AVX512-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]]
403 ; AVX512-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
404 ; AVX512-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
405 ; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
406 ; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0
407 ; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
408 ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1
409 ; AVX512-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
410 ; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2
411 ; AVX512-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
412 ; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3
413 ; AVX512-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4
414 ; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4
415 ; AVX512-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5
416 ; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5
417 ; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
418 ; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
419 ; AVX512-NEXT: ret <8 x i32> [[R7]]
421 %a0 = extractelement <8 x i32> %a, i32 0
422 %a1 = extractelement <8 x i32> %a, i32 1
423 %a2 = extractelement <8 x i32> %a, i32 2
424 %a3 = extractelement <8 x i32> %a, i32 3
425 %a4 = extractelement <8 x i32> %a, i32 4
426 %a5 = extractelement <8 x i32> %a, i32 5
427 %a6 = extractelement <8 x i32> %a, i32 6
428 %a7 = extractelement <8 x i32> %a, i32 7
429 %b0 = extractelement <8 x i32> %b, i32 0
430 %b1 = extractelement <8 x i32> %b, i32 1
431 %b2 = extractelement <8 x i32> %b, i32 2
432 %b3 = extractelement <8 x i32> %b, i32 3
433 %b4 = extractelement <8 x i32> %b, i32 4
434 %b5 = extractelement <8 x i32> %b, i32 5
435 %b6 = extractelement <8 x i32> %b, i32 6
436 %b7 = extractelement <8 x i32> %b, i32 7
437 %ab0 = ashr i32 %a0, %b0
438 %ab1 = ashr i32 %a1, %b1
439 %ab2 = lshr i32 %a2, %b2
440 %ab3 = lshr i32 %a3, %b3
441 %ab4 = lshr i32 %a4, %b4
442 %ab5 = lshr i32 %a5, %b5
443 %ab6 = shl i32 %a6, %b6
444 %ab7 = shl i32 %a7, %b7
445 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
446 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
447 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
448 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
449 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
450 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
451 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
452 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
456 define <8 x i32> @add_v8i32_undefs(<8 x i32> %a) {
457 ; CHECK-LABEL: @add_v8i32_undefs(
458 ; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], <i32 undef, i32 4, i32 8, i32 16, i32 undef, i32 4, i32 8, i32 16>
459 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
461 %a0 = extractelement <8 x i32> %a, i32 0
462 %a1 = extractelement <8 x i32> %a, i32 1
463 %a2 = extractelement <8 x i32> %a, i32 2
464 %a3 = extractelement <8 x i32> %a, i32 3
465 %a4 = extractelement <8 x i32> %a, i32 4
466 %a5 = extractelement <8 x i32> %a, i32 5
467 %a6 = extractelement <8 x i32> %a, i32 6
468 %a7 = extractelement <8 x i32> %a, i32 7
469 %ab0 = add i32 %a0, undef
470 %ab1 = add i32 %a1, 4
471 %ab2 = add i32 %a2, 8
472 %ab3 = add i32 %a3, 16
473 %ab4 = add i32 %a4, undef
474 %ab5 = add i32 %a5, 4
475 %ab6 = add i32 %a6, 8
476 %ab7 = add i32 %a7, 16
477 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
478 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
479 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
480 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
481 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
482 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
483 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
484 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
488 define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
489 ; CHECK-LABEL: @sdiv_v8i32_undefs(
490 ; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1
491 ; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
492 ; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
493 ; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
494 ; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
495 ; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
496 ; CHECK-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4
497 ; CHECK-NEXT: [[AB2:%.*]] = sdiv i32 [[A2]], 8
498 ; CHECK-NEXT: [[AB3:%.*]] = sdiv i32 [[A3]], 16
499 ; CHECK-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4
500 ; CHECK-NEXT: [[AB6:%.*]] = sdiv i32 [[A6]], 8
501 ; CHECK-NEXT: [[AB7:%.*]] = sdiv i32 [[A7]], 16
502 ; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> undef, i32 [[AB1]], i32 1
503 ; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
504 ; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
505 ; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB5]], i32 5
506 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
507 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
508 ; CHECK-NEXT: ret <8 x i32> [[R7]]
510 %a0 = extractelement <8 x i32> %a, i32 0
511 %a1 = extractelement <8 x i32> %a, i32 1
512 %a2 = extractelement <8 x i32> %a, i32 2
513 %a3 = extractelement <8 x i32> %a, i32 3
514 %a4 = extractelement <8 x i32> %a, i32 4
515 %a5 = extractelement <8 x i32> %a, i32 5
516 %a6 = extractelement <8 x i32> %a, i32 6
517 %a7 = extractelement <8 x i32> %a, i32 7
518 %ab0 = sdiv i32 %a0, undef
519 %ab1 = sdiv i32 %a1, 4
520 %ab2 = sdiv i32 %a2, 8
521 %ab3 = sdiv i32 %a3, 16
522 %ab4 = sdiv i32 %a4, undef
523 %ab5 = sdiv i32 %a5, 4
524 %ab6 = sdiv i32 %a6, 8
525 %ab7 = sdiv i32 %a7, 16
526 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
527 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
528 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
529 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
530 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
531 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
532 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
533 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
537 define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) {
538 ; CHECK-LABEL: @add_sub_v8i32_splat(
539 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[B:%.*]], i32 0
540 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
541 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
542 ; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
543 ; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
544 ; CHECK-NEXT: ret <8 x i32> [[R7]]
546 %a0 = extractelement <8 x i32> %a, i32 0
547 %a1 = extractelement <8 x i32> %a, i32 1
548 %a2 = extractelement <8 x i32> %a, i32 2
549 %a3 = extractelement <8 x i32> %a, i32 3
550 %a4 = extractelement <8 x i32> %a, i32 4
551 %a5 = extractelement <8 x i32> %a, i32 5
552 %a6 = extractelement <8 x i32> %a, i32 6
553 %a7 = extractelement <8 x i32> %a, i32 7
554 %ab0 = add i32 %a0, %b
555 %ab1 = add i32 %b, %a1
556 %ab2 = add i32 %a2, %b
557 %ab3 = add i32 %b, %a3
558 %ab4 = sub i32 %b, %a4
559 %ab5 = sub i32 %b, %a5
560 %ab6 = sub i32 %b, %a6
561 %ab7 = sub i32 %b, %a7
562 %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0
563 %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1
564 %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2
565 %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3
566 %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4
567 %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5
568 %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6
569 %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7