1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -instcombine -S < %s | FileCheck %s
4 define i32 @extract_load(<4 x i32>* %p) {
5 ; CHECK-LABEL: @extract_load(
6 ; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
7 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1
8 ; CHECK-NEXT: ret i32 [[EXT]]
10 %x = load <4 x i32>, <4 x i32>* %p, align 4
11 %ext = extractelement <4 x i32> %x, i32 1
15 define double @extract_load_fp(<4 x double>* %p) {
16 ; CHECK-LABEL: @extract_load_fp(
17 ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
18 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3
19 ; CHECK-NEXT: ret double [[EXT]]
21 %x = load <4 x double>, <4 x double>* %p, align 32
22 %ext = extractelement <4 x double> %x, i32 3
26 define double @extract_load_volatile(<4 x double>* %p) {
27 ; CHECK-LABEL: @extract_load_volatile(
28 ; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32
29 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2
30 ; CHECK-NEXT: ret double [[EXT]]
32 %x = load volatile <4 x double>, <4 x double>* %p
33 %ext = extractelement <4 x double> %x, i32 2
37 define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) {
38 ; CHECK-LABEL: @extract_load_extra_use(
39 ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8
40 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0
41 ; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32
42 ; CHECK-NEXT: ret double [[EXT]]
44 %x = load <4 x double>, <4 x double>* %p, align 8
45 %ext = extractelement <4 x double> %x, i32 0
46 store <4 x double> %x, <4 x double>* %p2
50 define double @extract_load_variable_index(<4 x double>* %p, i32 %y) {
51 ; CHECK-LABEL: @extract_load_variable_index(
52 ; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
53 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
54 ; CHECK-NEXT: ret double [[EXT]]
56 %x = load <4 x double>, <4 x double>* %p
57 %ext = extractelement <4 x double> %x, i32 %y
61 define void @scalarize_phi(i32 * %n, float * %inout) {
62 ; CHECK-LABEL: @scalarize_phi(
64 ; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4
65 ; CHECK-NEXT: br label [[FOR_COND:%.*]]
67 ; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
68 ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
69 ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4
70 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
71 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
73 ; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4
74 ; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
75 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
76 ; CHECK-NEXT: br label [[FOR_COND]]
78 ; CHECK-NEXT: ret void
81 %t0 = load volatile float, float * %inout, align 4
82 %insert = insertelement <4 x float> poison, float %t0, i32 0
83 %splat = shufflevector <4 x float> %insert, <4 x float> poison, <4 x i32> zeroinitializer
84 %insert1 = insertelement <4 x float> poison, float 3.0, i32 0
88 %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
89 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
90 %t1 = load i32, i32 * %n, align 4
91 %cmp = icmp ne i32 %i.0, %t1
92 br i1 %cmp, label %for.body, label %for.end
95 %t2 = extractelement <4 x float> %x.0, i32 1
96 store volatile float %t2, float * %inout, align 4
97 %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
98 %inc = add nsw i32 %i.0, 1
105 define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
106 ; CHECK-LABEL: @extract_element_binop_splat_constant_index(
107 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
108 ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
109 ; CHECK-NEXT: ret float [[R]]
111 %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
112 %r = extractelement <4 x float> %b, i32 2
116 define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
117 ; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
118 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
119 ; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
120 ; CHECK-NEXT: ret double [[R]]
122 %b = fdiv <2 x double> <double 42.0, double undef>, %x
123 %r = extractelement <2 x double> %b, i32 0
127 define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
128 ; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
129 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
130 ; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
131 ; CHECK-NEXT: ret float [[R]]
133 %b = fmul <2 x float> %x, <float 42.0, float 43.0>
134 %r = extractelement <2 x float> %b, i32 1
138 define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
139 ; CHECK-LABEL: @extract_element_binop_splat_variable_index(
140 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
141 ; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42
142 ; CHECK-NEXT: ret i8 [[R]]
144 %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
145 %r = extractelement <4 x i8> %b, i32 %y
149 define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
150 ; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
151 ; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
152 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
153 ; CHECK-NEXT: ret i8 [[R]]
155 %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
156 %r = extractelement <4 x i8> %b, i32 %y
160 define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
161 ; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
162 ; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
163 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
164 ; CHECK-NEXT: ret i8 [[R]]
166 %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
167 %r = extractelement <4 x i8> %b, i32 %y
171 define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) {
172 ; CHECK-LABEL: @extract_element_load(
173 ; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16
174 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2
175 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
176 ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
177 ; CHECK-NEXT: ret float [[R]]
179 %load = load <4 x float>, <4 x float>* %ptr
180 %add = fadd <4 x float> %x, %load
181 %r = extractelement <4 x float> %add, i32 2
185 define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) {
186 ; CHECK-LABEL: @extract_element_multi_Use_load(
187 ; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16
188 ; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16
189 ; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]]
190 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2
191 ; CHECK-NEXT: ret float [[R]]
193 %load = load <4 x float>, <4 x float>* %ptr0
194 store <4 x float> %load, <4 x float>* %ptr1
195 %add = fadd <4 x float> %x, %load
196 %r = extractelement <4 x float> %add, i32 2
200 define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
201 ; CHECK-LABEL: @extract_element_variable_index(
202 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
203 ; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
204 ; CHECK-NEXT: ret float [[R]]
206 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
207 %r = extractelement <4 x float> %add, i32 %y
211 define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
212 ; CHECK-LABEL: @extelt_binop_insertelt(
213 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
214 ; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]]
215 ; CHECK-NEXT: ret float [[E]]
217 %C = insertelement <4 x float> %A, float %f, i32 0
218 %D = fmul nnan <4 x float> %C, %B
219 %E = extractelement <4 x float> %D, i32 0
223 ; We recurse to find a scalarizable operand.
224 ; FIXME: We should propagate the IR flags including wrapping flags.
226 define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
227 ; CHECK-LABEL: @extelt_binop_binop_insertelt(
228 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
229 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]]
230 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0
231 ; CHECK-NEXT: [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]]
232 ; CHECK-NEXT: ret i32 [[E]]
234 %v = insertelement <4 x i32> %A, i32 %f, i32 0
235 %C = add <4 x i32> %v, %B
236 %D = mul nsw <4 x i32> %C, %B
237 %E = extractelement <4 x i32> %D, i32 0
241 define float @extract_element_constant_vector_variable_index(i32 %y) {
242 ; CHECK-LABEL: @extract_element_constant_vector_variable_index(
243 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
244 ; CHECK-NEXT: ret float [[R]]
246 %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
250 define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
251 ; CHECK-LABEL: @cheap_to_extract_icmp(
252 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
253 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
254 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
255 ; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
256 ; CHECK-NEXT: ret i1 [[R]]
258 %cmp = icmp eq <4 x i32> %x, zeroinitializer
259 %and = and <4 x i1> %cmp, %y
260 %r = extractelement <4 x i1> %and, i32 2
264 define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
265 ; CHECK-LABEL: @cheap_to_extract_fcmp(
266 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
267 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
268 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
269 ; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
270 ; CHECK-NEXT: ret i1 [[R]]
272 %cmp = fcmp oeq <4 x float> %x, zeroinitializer
273 %and = and <4 x i1> %cmp, %y
274 %r = extractelement <4 x i1> %and, i32 2
278 define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
279 ; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
280 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0
281 ; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
282 ; CHECK-NEXT: ret i1 [[EXT]]
284 %cmp = icmp eq <2 x i32> %arg, zeroinitializer
285 %ext = extractelement <2 x i1> %cmp, i32 0
289 define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
290 ; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
291 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0
292 ; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
293 ; CHECK-NEXT: ret i1 [[EXT]]
295 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
296 %ext = extractelement <2 x i1> %cmp, i32 0
300 define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
301 ; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
302 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
303 ; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
304 ; CHECK-NEXT: ret i1 [[EXT]]
306 %cmp = icmp eq <2 x i32> %arg, zeroinitializer
307 %ext = extractelement <2 x i1> %cmp, i32 %idx
311 define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
312 ; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
313 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
314 ; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
315 ; CHECK-NEXT: ret i1 [[EXT]]
317 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
318 %ext = extractelement <2 x i1> %cmp, i32 %idx
322 define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
323 ; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
324 ; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
325 ; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8
326 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]]
327 ; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0
328 ; CHECK-NEXT: ret i1 [[EXT]]
330 %add = fadd <2 x float> %arg1, %arg2
331 store volatile <2 x float> %add, <2 x float>* undef
332 %cmp = fcmp oeq <2 x float> %arg0, %add
333 %ext = extractelement <2 x i1> %cmp, i32 0