1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -instcombine -S < %s | FileCheck %s
4 define i32 @extract_load(<4 x i32>* %p) {
6 ; CHECK-LABEL: @extract_load(
7 ; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
8 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i64 1
9 ; CHECK-NEXT: ret i32 [[EXT]]
11 %x = load <4 x i32>, <4 x i32>* %p, align 4
12 %ext = extractelement <4 x i32> %x, i32 1
16 define double @extract_load_fp(<4 x double>* %p) {
18 ; CHECK-LABEL: @extract_load_fp(
19 ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
20 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 3
21 ; CHECK-NEXT: ret double [[EXT]]
23 %x = load <4 x double>, <4 x double>* %p, align 32
24 %ext = extractelement <4 x double> %x, i32 3
28 define double @extract_load_volatile(<4 x double>* %p) {
30 ; CHECK-LABEL: @extract_load_volatile(
31 ; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32
32 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 2
33 ; CHECK-NEXT: ret double [[EXT]]
35 %x = load volatile <4 x double>, <4 x double>* %p
36 %ext = extractelement <4 x double> %x, i32 2
40 define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) {
42 ; CHECK-LABEL: @extract_load_extra_use(
43 ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8
44 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 0
45 ; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32
46 ; CHECK-NEXT: ret double [[EXT]]
48 %x = load <4 x double>, <4 x double>* %p, align 8
49 %ext = extractelement <4 x double> %x, i32 0
50 store <4 x double> %x, <4 x double>* %p2
54 define double @extract_load_variable_index(<4 x double>* %p, i32 %y) {
56 ; CHECK-LABEL: @extract_load_variable_index(
57 ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
58 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
59 ; CHECK-NEXT: ret double [[EXT]]
61 %x = load <4 x double>, <4 x double>* %p
62 %ext = extractelement <4 x double> %x, i32 %y
66 define void @scalarize_phi(i32 * %n, float * %inout) {
68 ; CHECK-LABEL: @scalarize_phi(
70 ; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4
71 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
73 ; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
74 ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
75 ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4
76 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
77 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
79 ; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4
80 ; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
81 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
82 ; CHECK-NEXT: br label [[FOR_COND]]
84 ; CHECK-NEXT: ret void
87 %t0 = load volatile float, float * %inout, align 4
88 %insert = insertelement <4 x float> undef, float %t0, i32 0
89 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
90 %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
94 %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
95 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
96 %t1 = load i32, i32 * %n, align 4
97 %cmp = icmp ne i32 %i.0, %t1
98 br i1 %cmp, label %for.body, label %for.end
101 %t2 = extractelement <4 x float> %x.0, i32 1
102 store volatile float %t2, float * %inout, align 4
103 %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
104 %inc = add nsw i32 %i.0, 1
111 define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
113 ; CHECK-LABEL: @extract_element_binop_splat_constant_index(
114 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
115 ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
116 ; CHECK-NEXT: ret float [[R]]
118 %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
119 %r = extractelement <4 x float> %b, i32 2
123 define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
125 ; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
126 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0
127 ; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
128 ; CHECK-NEXT: ret double [[R]]
130 %b = fdiv <2 x double> <double 42.0, double undef>, %x
131 %r = extractelement <2 x double> %b, i32 0
135 define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
137 ; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
138 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i64 1
139 ; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
140 ; CHECK-NEXT: ret float [[R]]
142 %b = fmul <2 x float> %x, <float 42.0, float 43.0>
143 %r = extractelement <2 x float> %b, i32 1
147 define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
149 ; CHECK-LABEL: @extract_element_binop_splat_variable_index(
150 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
151 ; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42
152 ; CHECK-NEXT: ret i8 [[R]]
154 %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
155 %r = extractelement <4 x i8> %b, i32 %y
159 define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
161 ; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
162 ; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
163 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
164 ; CHECK-NEXT: ret i8 [[R]]
166 %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
167 %r = extractelement <4 x i8> %b, i32 %y
171 define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
173 ; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
174 ; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
175 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
176 ; CHECK-NEXT: ret i8 [[R]]
178 %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
179 %r = extractelement <4 x i8> %b, i32 %y
183 define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) {
185 ; CHECK-LABEL: @extract_element_load(
186 ; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16
187 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i64 2
188 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
189 ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
190 ; CHECK-NEXT: ret float [[R]]
192 %load = load <4 x float>, <4 x float>* %ptr
193 %add = fadd <4 x float> %x, %load
194 %r = extractelement <4 x float> %add, i32 2
198 define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) {
200 ; CHECK-LABEL: @extract_element_multi_Use_load(
201 ; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16
202 ; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16
203 ; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]]
204 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i64 2
205 ; CHECK-NEXT: ret float [[R]]
207 %load = load <4 x float>, <4 x float>* %ptr0
208 store <4 x float> %load, <4 x float>* %ptr1
209 %add = fadd <4 x float> %x, %load
210 %r = extractelement <4 x float> %add, i32 2
214 define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
216 ; CHECK-LABEL: @extract_element_variable_index(
217 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
218 ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
219 ; CHECK-NEXT: ret float [[R]]
221 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
222 %r = extractelement <4 x float> %add, i32 %y
226 define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
228 ; CHECK-LABEL: @extelt_binop_insertelt(
229 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
230 ; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]]
231 ; CHECK-NEXT: ret float [[E]]
233 %C = insertelement <4 x float> %A, float %f, i32 0
234 %D = fmul nnan <4 x float> %C, %B
235 %E = extractelement <4 x float> %D, i32 0
239 ; We recurse to find a scalarizable operand.
240 define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
242 ; CHECK-LABEL: @extelt_binop_binop_insertelt(
243 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i64 0
244 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]]
245 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i64 0
246 ; CHECK-NEXT: [[E:%.*]] = mul nsw i32 [[TMP2]], [[TMP3]]
247 ; CHECK-NEXT: ret i32 [[E]]
249 %v = insertelement <4 x i32> %A, i32 %f, i32 0
250 %C = add <4 x i32> %v, %B
251 %D = mul nsw <4 x i32> %C, %B
252 %E = extractelement <4 x i32> %D, i32 0
256 define float @extract_element_constant_vector_variable_index(i32 %y) {
258 ; CHECK-LABEL: @extract_element_constant_vector_variable_index(
259 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
260 ; CHECK-NEXT: ret float [[R]]
262 %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
266 define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
268 ; CHECK-LABEL: @cheap_to_extract_icmp(
269 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 2
270 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
271 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2
272 ; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
273 ; CHECK-NEXT: ret i1 [[R]]
275 %cmp = icmp eq <4 x i32> %x, zeroinitializer
276 %and = and <4 x i1> %cmp, %y
277 %r = extractelement <4 x i1> %and, i32 2
281 define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
283 ; CHECK-LABEL: @cheap_to_extract_fcmp(
284 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
285 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
286 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2
287 ; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
288 ; CHECK-NEXT: ret i1 [[R]]
290 %cmp = fcmp oeq <4 x float> %x, zeroinitializer
291 %and = and <4 x i1> %cmp, %y
292 %r = extractelement <4 x i1> %and, i32 2
296 define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
298 ; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
299 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i64 0
300 ; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
301 ; CHECK-NEXT: ret i1 [[EXT]]
303 %cmp = icmp eq <2 x i32> %arg, zeroinitializer
304 %ext = extractelement <2 x i1> %cmp, i32 0
308 define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
310 ; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
311 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i64 0
312 ; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
313 ; CHECK-NEXT: ret i1 [[EXT]]
315 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
316 %ext = extractelement <2 x i1> %cmp, i32 0
320 define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
322 ; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
323 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
324 ; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
325 ; CHECK-NEXT: ret i1 [[EXT]]
327 %cmp = icmp eq <2 x i32> %arg, zeroinitializer
328 %ext = extractelement <2 x i1> %cmp, i32 %idx
332 define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
334 ; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
335 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
336 ; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
337 ; CHECK-NEXT: ret i1 [[EXT]]
339 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
340 %ext = extractelement <2 x i1> %cmp, i32 %idx
344 define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
346 ; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
347 ; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
348 ; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8
349 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]]
350 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i64 0
351 ; CHECK-NEXT: ret i1 [[EXT]]
353 %add = fadd <2 x float> %arg1, %arg2
354 store volatile <2 x float> %add, <2 x float>* undef
355 %cmp = fcmp oeq <2 x float> %arg0, %add
356 %ext = extractelement <2 x i1> %cmp, i32 0