1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
5 declare void @use_i8(i8)
6 declare void @use_f32(float)
8 ; Eliminating extract is profitable.
10 define i8 @ext0_ext0_add(<16 x i8> %x, <16 x i8> %y) {
11 ; CHECK-LABEL: @ext0_ext0_add(
12 ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y:%.*]]
13 ; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
14 ; CHECK-NEXT: ret i8 [[R]]
16 %e0 = extractelement <16 x i8> %x, i32 0
17 %e1 = extractelement <16 x i8> %y, i32 0
22 ; Eliminating extract is still profitable. Flags propagate.
24 define i8 @ext1_ext1_add_flags(<16 x i8> %x, <16 x i8> %y) {
25 ; CHECK-LABEL: @ext1_ext1_add_flags(
26 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <16 x i8> [[X:%.*]], [[Y:%.*]]
27 ; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
28 ; CHECK-NEXT: ret i8 [[R]]
30 %e0 = extractelement <16 x i8> %x, i32 1
31 %e1 = extractelement <16 x i8> %y, i32 1
32 %r = add nsw nuw i8 %e0, %e1
36 ; Negative test - eliminating extract is profitable, but vector shift is expensive.
38 define i8 @ext1_ext1_shl(<16 x i8> %x, <16 x i8> %y) {
39 ; CHECK-LABEL: @ext1_ext1_shl(
40 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
41 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
42 ; CHECK-NEXT: [[R:%.*]] = shl i8 [[E0]], [[E1]]
43 ; CHECK-NEXT: ret i8 [[R]]
45 %e0 = extractelement <16 x i8> %x, i32 1
46 %e1 = extractelement <16 x i8> %y, i32 1
51 ; Negative test - eliminating extract is profitable, but vector multiply is expensive.
53 define i8 @ext13_ext13_mul(<16 x i8> %x, <16 x i8> %y) {
54 ; CHECK-LABEL: @ext13_ext13_mul(
55 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 13
56 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 13
57 ; CHECK-NEXT: [[R:%.*]] = mul i8 [[E0]], [[E1]]
58 ; CHECK-NEXT: ret i8 [[R]]
60 %e0 = extractelement <16 x i8> %x, i32 13
61 %e1 = extractelement <16 x i8> %y, i32 13
66 ; Negative test - cost is irrelevant because sdiv has potential UB.
68 define i8 @ext0_ext0_sdiv(<16 x i8> %x, <16 x i8> %y) {
69 ; CHECK-LABEL: @ext0_ext0_sdiv(
70 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
71 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
72 ; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[E0]], [[E1]]
73 ; CHECK-NEXT: ret i8 [[R]]
75 %e0 = extractelement <16 x i8> %x, i32 0
76 %e1 = extractelement <16 x i8> %y, i32 0
81 ; Extracts are free and vector op has same cost as scalar, but we
82 ; speculatively transform to vector to create more optimization
85 define double @ext0_ext0_fadd(<2 x double> %x, <2 x double> %y) {
86 ; CHECK-LABEL: @ext0_ext0_fadd(
87 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], [[Y:%.*]]
88 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
89 ; CHECK-NEXT: ret double [[R]]
91 %e0 = extractelement <2 x double> %x, i32 0
92 %e1 = extractelement <2 x double> %y, i32 0
93 %r = fadd double %e0, %e1
97 ; Eliminating extract is profitable. Flags propagate.
99 define double @ext1_ext1_fsub(<2 x double> %x, <2 x double> %y) {
100 ; CHECK-LABEL: @ext1_ext1_fsub(
101 ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x double> [[X:%.*]], [[Y:%.*]]
102 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
103 ; CHECK-NEXT: ret double [[R]]
105 %e0 = extractelement <2 x double> %x, i32 1
106 %e1 = extractelement <2 x double> %y, i32 1
107 %r = fsub fast double %e0, %e1
111 ; Negative test - type mismatch.
113 define double @ext1_ext1_fadd_different_types(<2 x double> %x, <4 x double> %y) {
114 ; CHECK-LABEL: @ext1_ext1_fadd_different_types(
115 ; CHECK-NEXT: [[E0:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
116 ; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x double> [[Y:%.*]], i32 1
117 ; CHECK-NEXT: [[R:%.*]] = fadd fast double [[E0]], [[E1]]
118 ; CHECK-NEXT: ret double [[R]]
120 %e0 = extractelement <2 x double> %x, i32 1
121 %e1 = extractelement <4 x double> %y, i32 1
122 %r = fadd fast double %e0, %e1
126 ; Disguised same vector operand; scalar code is not cheaper (with default
127 ; x86 target), so aggressively form vector binop.
129 define i32 @ext1_ext1_add_same_vec(<4 x i32> %x) {
130 ; CHECK-LABEL: @ext1_ext1_add_same_vec(
131 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
132 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
133 ; CHECK-NEXT: ret i32 [[R]]
135 %e0 = extractelement <4 x i32> %x, i32 1
136 %e1 = extractelement <4 x i32> %x, i32 1
137 %r = add i32 %e0, %e1
141 ; Functionally equivalent to above test; should transform as above.
143 define i32 @ext1_ext1_add_same_vec_cse(<4 x i32> %x) {
144 ; CHECK-LABEL: @ext1_ext1_add_same_vec_cse(
145 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
146 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
147 ; CHECK-NEXT: ret i32 [[R]]
149 %e0 = extractelement <4 x i32> %x, i32 1
150 %r = add i32 %e0, %e0
154 ; Don't assert if extract indices have different types.
156 define i32 @ext1_ext1_add_same_vec_diff_idx_ty(<4 x i32> %x) {
157 ; CHECK-LABEL: @ext1_ext1_add_same_vec_diff_idx_ty(
158 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
159 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
160 ; CHECK-NEXT: ret i32 [[R]]
162 %e0 = extractelement <4 x i32> %x, i32 1
163 %e1 = extractelement <4 x i32> %x, i64 1
164 %r = add i32 %e0, %e1
168 ; Negative test - same vector operand; scalar code is cheaper than general case
169 ; and vector code would be more expensive still.
171 define i8 @ext1_ext1_add_same_vec_extra_use0(<16 x i8> %x) {
172 ; CHECK-LABEL: @ext1_ext1_add_same_vec_extra_use0(
173 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
174 ; CHECK-NEXT: call void @use_i8(i8 [[E0]])
175 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[X]], i32 0
176 ; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
177 ; CHECK-NEXT: ret i8 [[R]]
179 %e0 = extractelement <16 x i8> %x, i32 0
180 call void @use_i8(i8 %e0)
181 %e1 = extractelement <16 x i8> %x, i32 0
186 ; Negative test - same vector operand; scalar code is cheaper than general case
187 ; and vector code would be more expensive still.
189 define i8 @ext1_ext1_add_same_vec_extra_use1(<16 x i8> %x) {
190 ; CHECK-LABEL: @ext1_ext1_add_same_vec_extra_use1(
191 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
192 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[X]], i32 0
193 ; CHECK-NEXT: call void @use_i8(i8 [[E1]])
194 ; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
195 ; CHECK-NEXT: ret i8 [[R]]
197 %e0 = extractelement <16 x i8> %x, i32 0
198 %e1 = extractelement <16 x i8> %x, i32 0
199 call void @use_i8(i8 %e1)
204 ; Negative test - same vector operand; scalar code is cheaper than general case
205 ; and vector code would be more expensive still.
207 define i8 @ext1_ext1_add_same_vec_cse_extra_use(<16 x i8> %x) {
208 ; CHECK-LABEL: @ext1_ext1_add_same_vec_cse_extra_use(
209 ; CHECK-NEXT: [[E:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
210 ; CHECK-NEXT: call void @use_i8(i8 [[E]])
211 ; CHECK-NEXT: [[R:%.*]] = add i8 [[E]], [[E]]
212 ; CHECK-NEXT: ret i8 [[R]]
214 %e = extractelement <16 x i8> %x, i32 0
215 call void @use_i8(i8 %e)
220 ; Vector code costs the same as scalar, so aggressively form vector op.
222 define i8 @ext1_ext1_add_uses1(<16 x i8> %x, <16 x i8> %y) {
223 ; CHECK-LABEL: @ext1_ext1_add_uses1(
224 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
225 ; CHECK-NEXT: call void @use_i8(i8 [[E0]])
226 ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X]], [[Y:%.*]]
227 ; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
228 ; CHECK-NEXT: ret i8 [[R]]
230 %e0 = extractelement <16 x i8> %x, i32 0
231 call void @use_i8(i8 %e0)
232 %e1 = extractelement <16 x i8> %y, i32 0
237 ; Vector code costs the same as scalar, so aggressively form vector op.
239 define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
240 ; CHECK-LABEL: @ext1_ext1_add_uses2(
241 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
242 ; CHECK-NEXT: call void @use_i8(i8 [[E1]])
243 ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y]]
244 ; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
245 ; CHECK-NEXT: ret i8 [[R]]
247 %e0 = extractelement <16 x i8> %x, i32 0
248 %e1 = extractelement <16 x i8> %y, i32 0
249 call void @use_i8(i8 %e1)
254 define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
255 ; SSE-LABEL: @ext0_ext1_add(
256 ; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
257 ; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
258 ; SSE-NEXT: [[R:%.*]] = add nuw i8 [[E0]], [[E1]]
259 ; SSE-NEXT: ret i8 [[R]]
261 ; AVX-LABEL: @ext0_ext1_add(
262 ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
263 ; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
264 ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
265 ; AVX-NEXT: ret i8 [[R]]
267 %e0 = extractelement <16 x i8> %x, i32 0
268 %e1 = extractelement <16 x i8> %y, i32 1
269 %r = add nuw i8 %e0, %e1
273 define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
274 ; SSE-LABEL: @ext5_ext0_add(
275 ; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
276 ; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
277 ; SSE-NEXT: [[R:%.*]] = sub nsw i8 [[E0]], [[E1]]
278 ; SSE-NEXT: ret i8 [[R]]
280 ; AVX-LABEL: @ext5_ext0_add(
281 ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
282 ; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
283 ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
284 ; AVX-NEXT: ret i8 [[R]]
286 %e0 = extractelement <16 x i8> %x, i32 5
287 %e1 = extractelement <16 x i8> %y, i32 0
288 %r = sub nsw i8 %e0, %e1
292 define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) {
293 ; SSE-LABEL: @ext1_ext6_add(
294 ; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
295 ; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 6
296 ; SSE-NEXT: [[R:%.*]] = and i8 [[E0]], [[E1]]
297 ; SSE-NEXT: ret i8 [[R]]
299 ; AVX-LABEL: @ext1_ext6_add(
300 ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
301 ; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
302 ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
303 ; AVX-NEXT: ret i8 [[R]]
305 %e0 = extractelement <16 x i8> %x, i32 1
306 %e1 = extractelement <16 x i8> %y, i32 6
311 define float @ext1_ext0_fmul(<4 x float> %x) {
312 ; CHECK-LABEL: @ext1_ext0_fmul(
313 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
314 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
315 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
316 ; CHECK-NEXT: ret float [[R]]
318 %e0 = extractelement <4 x float> %x, i32 1
319 %e1 = extractelement <4 x float> %x, i32 0
320 %r = fmul float %e0, %e1
324 define float @ext0_ext3_fmul_extra_use1(<4 x float> %x) {
325 ; CHECK-LABEL: @ext0_ext3_fmul_extra_use1(
326 ; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
327 ; CHECK-NEXT: call void @use_f32(float [[E0]])
328 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
329 ; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan <4 x float> [[X]], [[SHIFT]]
330 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
331 ; CHECK-NEXT: ret float [[R]]
333 %e0 = extractelement <4 x float> %x, i32 0
334 call void @use_f32(float %e0)
335 %e1 = extractelement <4 x float> %x, i32 3
336 %r = fmul nnan float %e0, %e1
340 define float @ext0_ext3_fmul_extra_use2(<4 x float> %x) {
341 ; CHECK-LABEL: @ext0_ext3_fmul_extra_use2(
342 ; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
343 ; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x float> [[X]], i32 3
344 ; CHECK-NEXT: call void @use_f32(float [[E1]])
345 ; CHECK-NEXT: [[R:%.*]] = fmul ninf nsz float [[E0]], [[E1]]
346 ; CHECK-NEXT: ret float [[R]]
348 %e0 = extractelement <4 x float> %x, i32 0
349 %e1 = extractelement <4 x float> %x, i32 3
350 call void @use_f32(float %e1)
351 %r = fmul ninf nsz float %e0, %e1
355 define float @ext0_ext4_fmul_v8f32(<8 x float> %x) {
356 ; SSE-LABEL: @ext0_ext4_fmul_v8f32(
357 ; SSE-NEXT: [[E0:%.*]] = extractelement <8 x float> [[X:%.*]], i32 0
358 ; SSE-NEXT: [[E1:%.*]] = extractelement <8 x float> [[X]], i32 4
359 ; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
360 ; SSE-NEXT: ret float [[R]]
362 ; AVX-LABEL: @ext0_ext4_fmul_v8f32(
363 ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
364 ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]]
365 ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
366 ; AVX-NEXT: ret float [[R]]
368 %e0 = extractelement <8 x float> %x, i32 0
369 %e1 = extractelement <8 x float> %x, i32 4
370 %r = fadd float %e0, %e1
374 define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
375 ; SSE-LABEL: @ext7_ext4_fmul_v8f32(
376 ; SSE-NEXT: [[E0:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
377 ; SSE-NEXT: [[E1:%.*]] = extractelement <8 x float> [[X]], i32 4
378 ; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
379 ; SSE-NEXT: ret float [[R]]
381 ; AVX-LABEL: @ext7_ext4_fmul_v8f32(
382 ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef>
383 ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
384 ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
385 ; AVX-NEXT: ret float [[R]]
387 %e0 = extractelement <8 x float> %x, i32 7
388 %e1 = extractelement <8 x float> %x, i32 4
389 %r = fadd float %e0, %e1
393 define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
394 ; CHECK-LABEL: @ext0_ext8_fmul_v16f32(
395 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 0
396 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 8
397 ; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
398 ; CHECK-NEXT: ret float [[R]]
400 %e0 = extractelement <16 x float> %x, i32 0
401 %e1 = extractelement <16 x float> %x, i32 8
402 %r = fadd float %e0, %e1
406 define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
407 ; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
408 ; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
409 ; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
410 ; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
411 ; CHECK-NEXT: ret float [[R]]
413 %e0 = extractelement <16 x float> %x, i32 14
414 %e1 = extractelement <16 x float> %x, i32 15
415 %r = fadd float %e0, %e1
419 define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420 ; CHECK-LABEL: @ins_bo_ext_ext(
421 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
422 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423 ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425 ; CHECK-NEXT: ret <4 x float> [[V3]]
427 %a2 = extractelement <4 x float> %a, i32 2
428 %a3 = extractelement <4 x float> %a, i32 3
429 %a23 = fadd float %a2, %a3
430 %v3 = insertelement <4 x float> %b, float %a23, i32 3
434 ; TODO: This is conservatively left to extract from the lower index value,
435 ; but it is likely that extracting from index 3 is the better option.
437 define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
438 ; CHECK-LABEL: @ins_bo_ext_ext_uses(
439 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
440 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441 ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442 ; CHECK-NEXT: call void @use_f32(float [[A23]])
443 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444 ; CHECK-NEXT: ret <4 x float> [[V3]]
446 %a2 = extractelement <4 x float> %a, i32 2
447 %a3 = extractelement <4 x float> %a, i32 3
448 %a23 = fadd float %a2, %a3
449 call void @use_f32(float %a23)
450 %v3 = insertelement <4 x float> %b, float %a23, i32 3
454 define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455 ; CHECK-LABEL: @PR34724(
456 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
457 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
458 ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
459 ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
460 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
461 ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
462 ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
463 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
464 ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
465 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
466 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
467 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
468 ; CHECK-NEXT: ret <4 x float> [[V3]]
470 %a0 = extractelement <4 x float> %a, i32 0
471 %a1 = extractelement <4 x float> %a, i32 1
472 %a2 = extractelement <4 x float> %a, i32 2
473 %a3 = extractelement <4 x float> %a, i32 3
475 %b0 = extractelement <4 x float> %b, i32 0
476 %b1 = extractelement <4 x float> %b, i32 1
477 %b2 = extractelement <4 x float> %b, i32 2
478 %b3 = extractelement <4 x float> %b, i32 3
480 %a23 = fadd float %a2, %a3
481 %b01 = fadd float %b0, %b1
482 %b23 = fadd float %b2, %b3
484 %v1 = insertelement <4 x float> poison, float %a23, i32 1
485 %v2 = insertelement <4 x float> %v1, float %b01, i32 2
486 %v3 = insertelement <4 x float> %v2, float %b23, i32 3
490 define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
491 ; CHECK-LABEL: @ext_ext_or_reduction_v4i32(
492 ; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
493 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
494 ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Z]], [[SHIFT]]
495 ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
496 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
497 ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
498 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
499 ; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
500 ; CHECK-NEXT: ret i32 [[Z0123]]
502 %z = and <4 x i32> %x, %y
503 %z0 = extractelement <4 x i32> %z, i32 0
504 %z1 = extractelement <4 x i32> %z, i32 1
505 %z01 = or i32 %z0, %z1
506 %z2 = extractelement <4 x i32> %z, i32 2
507 %z012 = or i32 %z01, %z2
508 %z3 = extractelement <4 x i32> %z, i32 3
509 %z0123 = or i32 %z3, %z012
513 define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
514 ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
515 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
516 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
517 ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
518 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
519 ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
520 ; CHECK-NEXT: ret i32 [[X210]]
522 %x0 = extractelement <4 x i32> %x, i32 0
523 %x1 = extractelement <4 x i32> %x, i32 1
524 %x10 = add i32 %x1, %x0
525 %x2 = extractelement <4 x i32> %x, i32 2
526 %x210 = add i32 %x2, %x10
530 define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
531 ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
532 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
533 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]]
534 ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
535 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
536 ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
537 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
538 ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
539 ; CHECK-NEXT: ret i32 [[X2Y210]]
541 %y0 = extractelement <4 x i32> %y, i32 0
542 %y1 = extractelement <4 x i32> %y, i32 1
543 %y10 = add i32 %y1, %y0
544 %y2 = extractelement <4 x i32> %y, i32 2
545 %y210 = add i32 %y2, %y10
546 %x2 = extractelement <4 x i32> %x, i32 2
547 %x2y210 = add i32 %x2, %y210
551 define i32 @constant_fold_crash(<4 x i32> %x) {
552 ; CHECK-LABEL: @constant_fold_crash(
553 ; CHECK-NEXT: [[A:%.*]] = extractelement <4 x i32> <i32 16, i32 17, i32 18, i32 19>, i32 1
554 ; CHECK-NEXT: [[B:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
555 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
556 ; CHECK-NEXT: ret i32 [[C]]
558 %a = extractelement <4 x i32> <i32 16, i32 17, i32 18, i32 19>, i32 1
559 %b = extractelement <4 x i32> %x, i32 0
564 define float @constant_fold_crash_commute(<4 x float> %x) {
565 ; CHECK-LABEL: @constant_fold_crash_commute(
566 ; CHECK-NEXT: [[A:%.*]] = extractelement <4 x float> <float 1.600000e+01, float 1.700000e+01, float 1.800000e+01, float 1.900000e+01>, i32 3
567 ; CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> [[X:%.*]], i32 1
568 ; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], [[A]]
569 ; CHECK-NEXT: ret float [[C]]
571 %a = extractelement <4 x float> <float 16.0, float 17.0, float 18.0, float 19.0>, i32 3
572 %b = extractelement <4 x float> %x, i32 1
573 %c = fadd float %b, %a