1 ; RUN: opt < %s -sroa -S | FileCheck %s
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
4 %S1 = type { i64, [42 x float] }
6 define i32 @test1(<4 x i32> %x, <4 x i32> %y) {
9 %a = alloca [2 x <4 x i32>]
12 %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
13 store <4 x i32> %x, <4 x i32>* %a.x
14 %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
15 store <4 x i32> %y, <4 x i32>* %a.y
18 %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
19 %tmp1 = load i32, i32* %a.tmp1
20 %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
21 %tmp2 = load i32, i32* %a.tmp2
22 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
23 %tmp3 = load i32, i32* %a.tmp3
25 ; CHECK: extractelement <4 x i32> %x, i32 2
26 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
27 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
29 %tmp4 = add i32 %tmp1, %tmp2
30 %tmp5 = add i32 %tmp3, %tmp4
37 define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
38 ; CHECK-LABEL: @test2(
40 %a = alloca [2 x <4 x i32>]
43 %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
44 store <4 x i32> %x, <4 x i32>* %a.x
45 %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
46 store <4 x i32> %y, <4 x i32>* %a.y
49 %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
50 %tmp1 = load i32, i32* %a.tmp1
51 %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
52 %tmp2 = load i32, i32* %a.tmp2
53 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
54 %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
55 %tmp3.vec = load <2 x i32>, <2 x i32>* %a.tmp3.cast
56 %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
58 ; CHECK: %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
59 ; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
60 ; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
61 ; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
63 %tmp4 = add i32 %tmp1, %tmp2
64 %tmp5 = add i32 %tmp3, %tmp4
66 ; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]]
67 ; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]]
68 ; CHECK-NEXT: ret i32 %[[sum2]]
71 define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
72 ; CHECK-LABEL: @test3(
74 %a = alloca [2 x <4 x i32>]
77 %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
78 store <4 x i32> %x, <4 x i32>* %a.x
79 %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
80 store <4 x i32> %y, <4 x i32>* %a.y
83 %a.y.cast = bitcast <4 x i32>* %a.y to i8*
84 call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i1 false)
87 %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
88 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
89 call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i1 false)
90 %tmp1 = load i32, i32* %a.tmp1
91 %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
92 %tmp2 = load i32, i32* %a.tmp2
93 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
94 %tmp3 = load i32, i32* %a.tmp3
96 ; CHECK: %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2
97 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
98 ; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3
99 ; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0
101 %tmp4 = add i32 %tmp1, %tmp2
102 %tmp5 = add i32 %tmp3, %tmp4
109 define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
110 ; CHECK-LABEL: @test4(
112 %a = alloca [2 x <4 x i32>]
115 %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
116 store <4 x i32> %x, <4 x i32>* %a.x
117 %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
118 store <4 x i32> %y, <4 x i32>* %a.y
121 %a.y.cast = bitcast <4 x i32>* %a.y to i8*
122 %z.cast = bitcast <4 x i32>* %z to i8*
123 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i1 false)
126 %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
127 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
128 %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
129 %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
130 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i1 false)
131 %tmp1 = load i32, i32* %a.tmp1
132 %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
133 %tmp2 = load i32, i32* %a.tmp2
134 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
135 %tmp3 = load i32, i32* %a.tmp3
137 ; CHECK: %[[load:.*]] = load <4 x i32>, <4 x i32>* %z
138 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
139 ; CHECK-NEXT: %[[element_load:.*]] = load i32, i32* %[[gep]]
140 ; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
141 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
142 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
143 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
145 %tmp4 = add i32 %tmp1, %tmp2
146 %tmp5 = add i32 %tmp3, %tmp4
153 declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) nounwind
155 ; Same as test4 with a different sized address space pointer source.
156 define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) {
157 ; CHECK-LABEL: @test4_as1(
159 %a = alloca [2 x <4 x i32>]
162 %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
163 store <4 x i32> %x, <4 x i32>* %a.x
164 %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
165 store <4 x i32> %y, <4 x i32>* %a.y
168 %a.y.cast = bitcast <4 x i32>* %a.y to i8*
169 %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)*
170 call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i1 false)
173 %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
174 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
175 %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i16 0, i16 2
176 %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
177 call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i1 false)
178 %tmp1 = load i32, i32* %a.tmp1
179 %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
180 %tmp2 = load i32, i32* %a.tmp2
181 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
182 %tmp3 = load i32, i32* %a.tmp3
184 ; CHECK: %[[load:.*]] = load <4 x i32>, <4 x i32> addrspace(1)* %z
185 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i64 0, i64 2
186 ; CHECK-NEXT: %[[element_load:.*]] = load i32, i32 addrspace(1)* %[[gep]]
187 ; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
188 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
189 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
190 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
192 %tmp4 = add i32 %tmp1, %tmp2
193 %tmp5 = add i32 %tmp3, %tmp4
200 define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
201 ; CHECK-LABEL: @test5(
202 ; The same as the above, but with reversed source and destination for the
203 ; element memcpy, and a self copy.
205 %a = alloca [2 x <4 x i32>]
208 %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
209 store <4 x i32> %x, <4 x i32>* %a.x
210 %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
211 store <4 x i32> %y, <4 x i32>* %a.y
214 %a.y.cast = bitcast <4 x i32>* %a.y to i8*
215 %a.x.cast = bitcast <4 x i32>* %a.x to i8*
216 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i1 false)
219 %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
220 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
221 %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
222 %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
223 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i1 false)
224 %tmp1 = load i32, i32* %a.tmp1
225 %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
226 %tmp2 = load i32, i32* %a.tmp2
227 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
228 %tmp3 = load i32, i32* %a.tmp3
230 ; CHECK: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
231 ; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2
232 ; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]]
233 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 2
234 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
235 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
237 %tmp4 = add i32 %tmp1, %tmp2
238 %tmp5 = add i32 %tmp3, %tmp4
245 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
246 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
248 define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
249 ; CHECK-LABEL: @test6(
250 ; The old scalarrepl pass would wrongly drop the store to the second alloca.
252 %tmp = alloca { <4 x i64>, <4 x i64> }
253 %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0
254 store <4 x i64> %x, <4 x i64>* %p0
255 ; CHECK: store <4 x i64> %x,
256 %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1
257 store <4 x i64> %y, <4 x i64>* %p1
258 ; CHECK: store <4 x i64> %y,
259 %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n
260 %res = load i64, i64* %addr, align 4
264 define <4 x i32> @test_subvec_store() {
265 ; CHECK-LABEL: @test_subvec_store(
267 %a = alloca <4 x i32>
270 %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0
271 %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
272 store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
274 ; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
276 %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1
277 %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
278 store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
279 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
281 %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2
282 %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
283 store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
284 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
286 %a.gep3 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 3
287 store i32 3, i32* %a.gep3
288 ; CHECK-NEXT: insertelement <4 x i32>
290 %ret = load <4 x i32>, <4 x i32>* %a
293 ; CHECK-NEXT: ret <4 x i32>
296 define <4 x i32> @test_subvec_load() {
297 ; CHECK-LABEL: @test_subvec_load(
299 %a = alloca <4 x i32>
301 store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a
304 %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0
305 %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
306 %first = load <2 x i32>, <2 x i32>* %a.cast0
308 ; CHECK: %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
310 %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1
311 %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
312 %second = load <2 x i32>, <2 x i32>* %a.cast1
313 ; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
315 %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2
316 %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
317 %third = load <2 x i32>, <2 x i32>* %a.cast2
318 ; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
320 %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
321 %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
322 ; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2>
323 ; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
326 ; CHECK-NEXT: ret <4 x i32> %[[ret]]
329 declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i1) nounwind
331 define <4 x float> @test_subvec_memset() {
332 ; CHECK-LABEL: @test_subvec_memset(
334 %a = alloca <4 x float>
337 %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
338 %a.cast0 = bitcast float* %a.gep0 to i8*
339 call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i1 false)
341 ; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
343 %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
344 %a.cast1 = bitcast float* %a.gep1 to i8*
345 call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i1 false)
346 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
348 %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
349 %a.cast2 = bitcast float* %a.gep2 to i8*
350 call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i1 false)
351 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
353 %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
354 %a.cast3 = bitcast float* %a.gep3 to i8*
355 call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i1 false)
356 ; CHECK-NEXT: insertelement <4 x float>
358 %ret = load <4 x float>, <4 x float>* %a
361 ; CHECK-NEXT: ret <4 x float>
364 define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
365 ; CHECK-LABEL: @test_subvec_memcpy(
367 %a = alloca <4 x float>
370 %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
371 %a.cast0 = bitcast float* %a.gep0 to i8*
372 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false)
373 ; CHECK: %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
374 ; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]]
375 ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
376 ; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
378 %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
379 %a.cast1 = bitcast float* %a.gep1 to i8*
380 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false)
381 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
382 ; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]]
383 ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
384 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
386 %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
387 %a.cast2 = bitcast float* %a.gep2 to i8*
388 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false)
389 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
390 ; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]]
391 ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
392 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
394 %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
395 %a.cast3 = bitcast float* %a.gep3 to i8*
396 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false)
397 ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
398 ; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]]
399 ; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float>
401 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false)
402 ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
403 ; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
404 ; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
406 %ret = load <4 x float>, <4 x float>* %a
409 ; CHECK-NEXT: ret <4 x float> %[[insert_f]]
412 define i32 @PR14212() {
413 ; CHECK-LABEL: @PR14212(
414 ; This caused a crash when "splitting" the load of the i32 in order to promote
415 ; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case.
417 %retval = alloca <3 x i8>, align 4
420 store <3 x i8> undef, <3 x i8>* %retval, align 4
421 %cast = bitcast <3 x i8>* %retval to i32*
422 %load = load i32, i32* %cast, align 4
427 define <2 x i8> @PR14349.1(i32 %x) {
429 ; The first testcase for broken SROA rewriting of split integer loads and
430 ; stores due to smaller vector loads and stores. This particular test ensures
431 ; that we can rewrite a split store of an integer to a store of a vector.
436 store i32 %x, i32* %a
439 %cast = bitcast i32* %a to <2 x i8>*
440 %vec = load <2 x i8>, <2 x i8>* %cast
444 ; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
445 ; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
446 ; CHECK: ret <2 x i8> %[[cast]]
449 define i32 @PR14349.2(<2 x i8> %x) {
451 ; The first testcase for broken SROA rewriting of split integer loads and
452 ; stores due to smaller vector loads and stores. This particular test ensures
453 ; that we can rewrite a split load of an integer to a load of a vector.
458 %cast = bitcast i32* %a to <2 x i8>*
459 store <2 x i8> %x, <2 x i8>* %cast
462 %int = load i32, i32* %a
466 ; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
467 ; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
468 ; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
469 ; CHECK: ret i32 %[[insert]]
472 define i32 @test7(<2 x i32> %x, <2 x i32> %y) {
473 ; Test that we can promote to vectors when the alloca doesn't mention any vector types.
474 ; CHECK-LABEL: @test7(
476 %a = alloca [2 x i64]
477 %a.cast = bitcast [2 x i64]* %a to [2 x <2 x i32>]*
480 %a.x = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0
481 store <2 x i32> %x, <2 x i32>* %a.x
482 %a.y = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1
483 store <2 x i32> %y, <2 x i32>* %a.y
486 %a.tmp1 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1
487 %tmp1 = load i32, i32* %a.tmp1
488 %a.tmp2 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 1
489 %tmp2 = load i32, i32* %a.tmp2
490 %a.tmp3 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0
491 %tmp3 = load i32, i32* %a.tmp3
493 ; CHECK: extractelement <2 x i32> %x, i32 1
494 ; CHECK-NEXT: extractelement <2 x i32> %y, i32 1
495 ; CHECK-NEXT: extractelement <2 x i32> %y, i32 0
497 %tmp4 = add i32 %tmp1, %tmp2
498 %tmp5 = add i32 %tmp3, %tmp4
505 define i32 @test8(<2 x i32> %x) {
506 ; Ensure that we can promote an alloca that doesn't mention a vector type based
507 ; on a single store with a vector type.
508 ; CHECK-LABEL: @test8(
511 %a.vec = bitcast i64* %a to <2 x i32>*
512 %a.i32 = bitcast i64* %a to i32*
515 store <2 x i32> %x, <2 x i32>* %a.vec
518 %tmp1 = load i32, i32* %a.i32
519 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
520 %tmp2 = load i32, i32* %a.tmp2
522 ; CHECK: extractelement <2 x i32> %x, i32 0
523 ; CHECK-NEXT: extractelement <2 x i32> %x, i32 1
525 %tmp4 = add i32 %tmp1, %tmp2
531 define <2 x i32> @test9(i32 %x, i32 %y) {
532 ; Ensure that we can promote an alloca that doesn't mention a vector type based
533 ; on a single load with a vector type.
534 ; CHECK-LABEL: @test9(
537 %a.vec = bitcast i64* %a to <2 x i32>*
538 %a.i32 = bitcast i64* %a to i32*
541 store i32 %x, i32* %a.i32
542 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
543 store i32 %y, i32* %a.tmp2
545 ; CHECK: %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0
546 ; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
548 %result = load <2 x i32>, <2 x i32>* %a.vec
551 ret <2 x i32> %result
552 ; CHECK-NEXT: ret <2 x i32> %[[V2]]
555 define <2 x i32> @test10(<4 x i16> %x, i32 %y) {
556 ; If there are multiple different vector types used, we should select the one
557 ; with the widest elements.
558 ; CHECK-LABEL: @test10(
561 %a.vec1 = bitcast i64* %a to <2 x i32>*
562 %a.vec2 = bitcast i64* %a to <4 x i16>*
563 %a.i32 = bitcast i64* %a to i32*
566 store <4 x i16> %x, <4 x i16>* %a.vec2
567 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
568 store i32 %y, i32* %a.tmp2
570 ; CHECK: %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32>
571 ; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
573 %result = load <2 x i32>, <2 x i32>* %a.vec1
576 ret <2 x i32> %result
577 ; CHECK-NEXT: ret <2 x i32> %[[V2]]
580 define <2 x float> @test11(<4 x i16> %x, i32 %y) {
581 ; If there are multiple different element types for different vector types,
582 ; pick the integer types. This isn't really important, but seems like the best
583 ; heuristic for making a deterministic decision.
584 ; CHECK-LABEL: @test11(
587 %a.vec1 = bitcast i64* %a to <2 x float>*
588 %a.vec2 = bitcast i64* %a to <4 x i16>*
589 %a.i32 = bitcast i64* %a to i32*
592 store <4 x i16> %x, <4 x i16>* %a.vec2
593 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
594 store i32 %y, i32* %a.tmp2
596 ; CHECK: %[[V1:.*]] = bitcast i32 %y to <2 x i16>
597 ; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
598 ; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x
599 ; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float>
601 %result = load <2 x float>, <2 x float>* %a.vec1
604 ret <2 x float> %result
605 ; CHECK-NEXT: ret <2 x float> %[[V4]]
608 define <4 x float> @test12() {
609 ; CHECK-LABEL: @test12(
610 %a = alloca <3 x i32>, align 16
613 %cast1 = bitcast <3 x i32>* %a to <4 x i32>*
614 store <4 x i32> undef, <4 x i32>* %cast1, align 16
617 %cast2 = bitcast <3 x i32>* %a to <3 x float>*
618 %cast3 = bitcast <3 x float>* %cast2 to <4 x float>*
619 %vec = load <4 x float>, <4 x float>* %cast3
622 ; CHECK: %[[ret:.*]] = bitcast <4 x i32> undef to <4 x float>
623 ; CHECK-NEXT: ret <4 x float> %[[ret]]