1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=sroa,amdgpu-promote-alloca < %s | FileCheck %s
4 ; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
5 ; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
6 ; the pass should handle it gracefully if it is
7 ; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
8 ; should now leave these unchanged
10 %Block = type { [1 x float], i32 }
11 %gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
12 %struct = type { i32, i32 }
14 @block = external addrspace(1) global %Block
15 @pv = external addrspace(1) global %gl_PerVertex
17 define amdgpu_vs void @promote_1d_aggr() #0 {
18 ; CHECK-LABEL: @promote_1d_aggr(
19 ; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5)
20 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], ptr addrspace(1) @block, i32 0, i32 1
21 ; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
22 ; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], ptr addrspace(1) @block, align 4
23 ; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [1 x float] [[FOO3]], 0
24 ; CHECK-NEXT: [[FOO3_FCA_0_GEP:%.*]] = getelementptr inbounds [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
25 ; CHECK-NEXT: store float [[FOO3_FCA_0_EXTRACT]], ptr addrspace(5) [[FOO3_FCA_0_GEP]], align 4
26 ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO1]]
27 ; CHECK-NEXT: [[FOO6:%.*]] = load float, ptr addrspace(5) [[FOO5]], align 4
28 ; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[FOO6]], i32 0
29 ; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
30 ; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
31 ; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3
32 ; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
33 ; CHECK-NEXT: ret void
35 %i = alloca i32, addrspace(5)
36 %f1 = alloca [1 x float], addrspace(5)
37 %foo = getelementptr %Block, ptr addrspace(1) @block, i32 0, i32 1
38 %foo1 = load i32, ptr addrspace(1) %foo
39 store i32 %foo1, ptr addrspace(5) %i
40 %foo3 = load [1 x float], ptr addrspace(1) @block
41 store [1 x float] %foo3, ptr addrspace(5) %f1
42 %foo4 = load i32, ptr addrspace(5) %i
43 %foo5 = getelementptr [1 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
44 %foo6 = load float, ptr addrspace(5) %foo5
45 %foo7 = alloca <4 x float>, addrspace(5)
46 %foo8 = load <4 x float>, ptr addrspace(5) %foo7
47 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
48 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
49 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
50 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
51 store <4 x float> %foo12, ptr addrspace(1) @pv
55 %Block2 = type { i32, [2 x float] }
56 @block2 = external addrspace(1) global %Block2
58 define amdgpu_vs void @promote_store_aggr() #0 {
59 ; CHECK-LABEL: @promote_store_aggr(
60 ; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) @block2, align 4
61 ; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO1]] to float
62 ; CHECK-NEXT: [[FOO6_FCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[FOO3]], 0
63 ; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1
64 ; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1
65 ; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4
66 ; CHECK-NEXT: store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16
67 ; CHECK-NEXT: ret void
69 %i = alloca i32, addrspace(5)
70 %f1 = alloca [2 x float], addrspace(5)
71 %foo1 = load i32, ptr addrspace(1) @block2
72 store i32 %foo1, ptr addrspace(5) %i
73 %foo2 = load i32, ptr addrspace(5) %i
74 %foo3 = sitofp i32 %foo2 to float
75 store float %foo3, ptr addrspace(5) %f1
76 %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 1
77 store float 2.000000e+00, ptr addrspace(5) %foo5
78 %foo6 = load [2 x float], ptr addrspace(5) %f1
79 %foo7 = getelementptr %Block2, ptr addrspace(1) @block2, i32 0, i32 1
80 store [2 x float] %foo6, ptr addrspace(1) %foo7
81 store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv
85 %Block3 = type { [2 x float], i32 }
86 @block3 = external addrspace(1) global %Block3
88 define amdgpu_vs void @promote_load_from_store_aggr() #0 {
89 ; CHECK-LABEL: @promote_load_from_store_aggr(
90 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 1
91 ; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
92 ; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], ptr addrspace(1) @block3, align 4
93 ; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0
94 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[FOO3_FCA_0_EXTRACT]], i32 0
95 ; CHECK-NEXT: [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1
96 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1
97 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]]
98 ; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
99 ; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
100 ; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
101 ; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3
102 ; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
103 ; CHECK-NEXT: ret void
105 %i = alloca i32, addrspace(5)
106 %f1 = alloca [2 x float], addrspace(5)
107 %foo = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 1
108 %foo1 = load i32, ptr addrspace(1) %foo
109 store i32 %foo1, ptr addrspace(5) %i
110 %foo3 = load [2 x float], ptr addrspace(1) @block3
111 store [2 x float] %foo3, ptr addrspace(5) %f1
112 %foo4 = load i32, ptr addrspace(5) %i
113 %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
114 %foo6 = load float, ptr addrspace(5) %foo5
115 %foo7 = alloca <4 x float>, addrspace(5)
116 %foo8 = load <4 x float>, ptr addrspace(5) %foo7
117 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
118 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
119 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
120 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
121 store <4 x float> %foo12, ptr addrspace(1) @pv
125 define amdgpu_vs void @promote_memmove_aggr() #0 {
126 ; CHECK-LABEL: @promote_memmove_aggr(
127 ; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4
128 ; CHECK-NEXT: ret void
130 %f1 = alloca [5 x float], addrspace(5)
131 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
132 %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
133 store float 1.0, ptr addrspace(5) %foo1
134 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
135 store float 2.0, ptr addrspace(5) %foo2
136 call void @llvm.memmove.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo1, i32 16, i1 false)
137 %foo3 = load float, ptr addrspace(5) %f1
138 store float %foo3, ptr addrspace(1) @pv
142 define amdgpu_vs void @promote_memcpy_aggr() #0 {
143 ; CHECK-LABEL: @promote_memcpy_aggr(
144 ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
145 ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
146 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 2.000000e+00, float 0.000000e+00>, float 3.000000e+00, i32 [[FOO4]]
147 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
148 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
149 ; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4
150 ; CHECK-NEXT: ret void
152 %f1 = alloca [5 x float], addrspace(5)
153 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
155 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
156 store float 2.0, ptr addrspace(5) %foo2
158 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
159 %foo4 = load i32, ptr addrspace(1) %foo3
160 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
161 store float 3.0, ptr addrspace(5) %foo5
163 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
164 %foo6 = load float, ptr addrspace(5) %f1
165 store float %foo6, ptr addrspace(1) @pv
169 define amdgpu_vs void @promote_memcpy_identity_aggr() #0 {
170 ; CHECK-LABEL: @promote_memcpy_identity_aggr(
171 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) @pv, align 4
172 ; CHECK-NEXT: ret void
174 %f1 = alloca [5 x float], addrspace(5)
175 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
176 %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
177 store float 1.0, ptr addrspace(5) %foo1
178 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
179 store float 2.0, ptr addrspace(5) %foo2
180 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %f1, i32 20, i1 false)
181 %foo3 = load float, ptr addrspace(5) %f1
182 store float %foo3, ptr addrspace(1) @pv
186 ; TODO: promote alloca even there is a memcpy between different alloca
187 define amdgpu_vs void @promote_memcpy_two_aggrs() #0 {
188 ; CHECK-LABEL: @promote_memcpy_two_aggrs(
189 ; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
190 ; CHECK-NEXT: [[F2:%.*]] = alloca [5 x float], align 4, addrspace(5)
191 ; CHECK-NEXT: [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
192 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP1]], align 4
193 ; CHECK-NEXT: [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
194 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP2]], align 4
195 ; CHECK-NEXT: [[DOTFCA_2_GEP3:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
196 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP3]], align 4
197 ; CHECK-NEXT: [[DOTFCA_3_GEP4:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
198 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP4]], align 4
199 ; CHECK-NEXT: [[DOTFCA_4_GEP5:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
200 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP5]], align 4
201 ; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 0
202 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
203 ; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 1
204 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
205 ; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 2
206 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
207 ; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 3
208 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
209 ; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 4
210 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
211 ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
212 ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
213 ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
214 ; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
215 ; CHECK-NEXT: call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 [[F2]], ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
216 ; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 [[FOO4]]
217 ; CHECK-NEXT: [[FOO7:%.*]] = load float, ptr addrspace(5) [[FOO6]], align 4
218 ; CHECK-NEXT: store float [[FOO7]], ptr addrspace(1) @pv, align 4
219 ; CHECK-NEXT: ret void
221 %f1 = alloca [5 x float], addrspace(5)
222 %f2 = alloca [5 x float], addrspace(5)
224 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
225 store [5 x float] zeroinitializer, ptr addrspace(5) %f2
227 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
228 %foo4 = load i32, ptr addrspace(1) %foo3
229 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
230 store float 3.0, ptr addrspace(5) %foo5
232 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f2, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
234 %foo6 = getelementptr [5 x float], ptr addrspace(5) %f2, i32 0, i32 %foo4
235 %foo7 = load float, ptr addrspace(5) %foo6
236 store float %foo7, ptr addrspace(1) @pv
240 ; TODO: promote alloca even there is a memcpy between the alloca and other memory space.
241 define amdgpu_vs void @promote_memcpy_p1p5_aggr(ptr addrspace(1) inreg %src) #0 {
242 ; CHECK-LABEL: @promote_memcpy_p1p5_aggr(
243 ; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
244 ; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
245 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
246 ; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
247 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
248 ; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
249 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
250 ; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
251 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
252 ; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
253 ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
254 ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
255 ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
256 ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
257 ; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
258 ; CHECK-NEXT: call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
259 ; CHECK-NEXT: ret void
261 %f1 = alloca [5 x float], addrspace(5)
262 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
264 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
265 %foo4 = load i32, ptr addrspace(1) %foo3
266 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
267 store float 3.0, ptr addrspace(5) %foo5
269 call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
273 define amdgpu_vs void @promote_memcpy_inline_aggr() #0 {
274 ; CHECK-LABEL: @promote_memcpy_inline_aggr(
275 ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
276 ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
277 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> zeroinitializer, float 3.000000e+00, i32 [[FOO4]]
278 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
279 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
280 ; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4
281 ; CHECK-NEXT: ret void
283 %f1 = alloca [5 x float], addrspace(5)
284 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
286 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
287 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
288 %foo4 = load i32, ptr addrspace(1) %foo3
289 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
290 store float 3.0, ptr addrspace(5) %foo5
292 call void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
293 %foo6 = load float, ptr addrspace(5) %f1
294 store float %foo6, ptr addrspace(1) @pv
298 declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
299 declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
300 declare void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
301 declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
303 @tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
304 @frag_color = external addrspace(1) global <4 x float>
306 define amdgpu_ps void @promote_double_aggr() #0 {
307 ; CHECK-LABEL: @promote_double_aggr(
308 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
309 ; CHECK-NEXT: [[FOO1:%.*]] = load double, ptr addrspace(1) [[FOO]], align 8
310 ; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
311 ; CHECK-NEXT: [[FOO3:%.*]] = load double, ptr addrspace(1) [[FOO2]], align 8
312 ; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0
313 ; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
314 ; CHECK-NEXT: [[FOO5_FCA_0_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 0
315 ; CHECK-NEXT: [[FOO5_FCA_1_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 1
316 ; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]]
317 ; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]]
318 ; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
319 ; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0
320 ; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1
321 ; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2
322 ; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3
323 ; CHECK-NEXT: store <4 x float> [[FOO21]], ptr addrspace(1) @frag_color, align 16
324 ; CHECK-NEXT: ret void
326 %s = alloca [2 x double], addrspace(5)
327 %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
328 %foo1 = load double, ptr addrspace(1) %foo
329 %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
330 %foo3 = load double, ptr addrspace(1) %foo2
331 %foo4 = insertvalue [2 x double] undef, double %foo1, 0
332 %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
333 store [2 x double] %foo5, ptr addrspace(5) %s
334 %foo6 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
335 %foo7 = load double, ptr addrspace(5) %foo6
336 %foo8 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
337 %foo9 = load double, ptr addrspace(5) %foo8
338 %foo10 = fadd double %foo7, %foo9
339 store double %foo10, ptr addrspace(5) %s
340 %foo13 = load double, ptr addrspace(5) %s
341 %foo14 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
342 %foo15 = load double, ptr addrspace(5) %foo14
343 %foo16 = fadd double %foo13, %foo15
344 %foo17 = fptrunc double %foo16 to float
345 %foo18 = insertelement <4 x float> undef, float %foo17, i32 0
346 %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1
347 %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2
348 %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3
349 store <4 x float> %foo21, ptr addrspace(1) @frag_color
353 ; Don't crash on a type that isn't a valid vector element.
354 define amdgpu_kernel void @alloca_struct() #0 {
355 ; CHECK-LABEL: @alloca_struct(
357 ; CHECK-NEXT: ret void
360 %alloca = alloca [2 x %struct], align 4, addrspace(5)