1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
4 %struct.ByValStruct = type { [4 x i32] }
6 ; GCN-LABEL: {{^}}void_func_byval_struct:
7 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
9 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
12 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
14 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
16 define hidden void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
18 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
19 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
20 %add = add nsw i32 %tmp, 1
21 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
22 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
23 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
24 %add3 = add nsw i32 %tmp1, 2
25 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
26 store volatile i32 9, i32 addrspace(1)* null, align 4
30 ; Make sure the offset is folded and function's frame register is used
31 ; rather than the global scratch wave offset.
32 ; GCN-LABEL: {{^}}void_func_byval_struct_use_outside_entry_block:
33 ; GCN-NOT: v_lshrrev_b32
36 ; GCN: s_and_saveexec_b64
37 ; GCN: s_cbranch_execz [[BB1:BB[0-9]+_[0-9]+]]
39 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
41 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
44 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
46 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
50 ; GCN: s_or_b64 exec, exec
51 define hidden void @void_func_byval_struct_use_outside_entry_block(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1, i1 %cond) #1 {
53 br i1 %cond, label %bb0, label %bb1
56 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
57 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
58 %add = add nsw i32 %tmp, 1
59 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
60 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
61 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
62 %add3 = add nsw i32 %tmp1, 2
63 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
64 store volatile i32 9, i32 addrspace(1)* null, align 4
71 ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
72 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:36
73 ; GCN-DAG: v_writelane_b32 v33, s34,
74 ; GCN: s_mov_b32 s34, s32
75 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}}
76 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
77 ; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:32
78 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
80 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
81 ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s34{{$}}
83 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:16{{$}}
84 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
88 ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s34 offset:16{{$}}
91 ; GCN-NOT: v_readlane_b32 s32
92 ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s34 offset:32
93 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
94 ; GCN: v_readlane_b32 s34, v33,
95 ; GCN-DAG: buffer_load_dword v33, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
97 define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
99 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
100 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
101 %add = add nsw i32 %tmp, 1
102 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
103 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
104 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
105 %add3 = add nsw i32 %tmp1, 2
106 call void @external_void_func_void()
107 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
108 store volatile i32 9, i32 addrspace(1)* null, align 4
112 ; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
113 ; GCN: s_mov_b32 s34, s32
114 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
115 ; GCN-DAG: v_writelane_b32
117 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
118 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
120 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s34{{$}}
121 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s34 offset:16
123 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}}
124 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:4
125 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s34 offset:8
126 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s34 offset:12
128 ; GCN-NOT: s_add_u32 s32, s32, 0x800
131 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
132 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
133 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
134 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
136 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
137 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
138 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
139 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
141 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
142 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
143 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
144 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
147 ; GCN-NOT: v_readlane_b32 s32
148 ; GCN: v_readlane_b32
149 ; GCN-NOT: v_readlane_b32 s32
151 ; GCN-NOT: s_sub_u32 s32, s32, 0x800
153 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
154 ; GCN: v_readlane_b32 s34, v
157 define void @call_void_func_byval_struct_func() #1 {
159 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
160 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
161 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
162 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
163 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
164 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
165 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
166 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
167 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
168 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
169 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
170 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
171 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
175 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel:
176 ; GCN: s_mov_b32 s33, s7
177 ; GCN-NOT: s_add_u32 s32, s32, 0x800
179 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
180 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
181 ; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
182 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
184 ; GCN-NOT: s_add_u32 s32, s32, 0x800
185 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
186 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
187 ; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
188 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
189 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
193 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
194 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
195 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
196 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
198 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
199 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
200 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
201 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
203 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
204 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
205 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
206 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
210 ; GCN-NOT: s_sub_u32 s32
212 define amdgpu_kernel void @call_void_func_byval_struct_kernel() #1 {
214 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
215 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
216 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
217 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
218 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
219 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
220 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
221 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
222 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
223 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
224 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
225 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
226 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
230 ; GCN-LABEL: {{^}}void_func_byval_struct_align8:
231 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
233 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
236 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
238 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
240 define hidden void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 {
242 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
243 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 8
244 %add = add nsw i32 %tmp, 1
245 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 8
246 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
247 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 8
248 %add3 = add nsw i32 %tmp1, 2
249 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 8
250 store volatile i32 9, i32 addrspace(1)* null, align 4
254 ; Make sure the byval alignment is respected in the call frame setup
255 ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel:
256 ; GCN: s_mov_b32 s33, s7
257 ; GCN-NOT: s_add_u32 s32, s32, 0x800
259 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
260 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
261 ; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
262 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
265 ; GCN-NOT: s_add_u32 s32, s32, 0x800
266 ; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
268 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
269 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
270 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
271 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
273 ; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
274 ; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
275 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
276 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
279 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
280 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
281 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
282 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
284 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
285 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
286 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
287 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
291 ; GCN-NOT: s_sub_u32 s32
293 define amdgpu_kernel void @call_void_func_byval_struct_align8_kernel() #1 {
295 %arg0 = alloca %struct.ByValStruct, align 8, addrspace(5)
296 %arg1 = alloca %struct.ByValStruct, align 8, addrspace(5)
297 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
298 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
299 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
300 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
301 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
302 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8
303 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
304 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8
305 call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1)
306 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
307 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
311 ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_func:
312 ; GCN: s_mov_b32 s34, s32
313 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
314 ; GCN-DAG: v_writelane_b32
316 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
317 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
319 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s34{{$}}
320 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s34 offset:16
322 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s34{{$}}
323 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s34 offset:4
324 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s34 offset:8
325 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s34 offset:12
327 ; GCN-NOT: s_add_u32 s32, s32, 0x800
329 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
330 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
331 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
332 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
334 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
335 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
336 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
337 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
339 ; GCN: s_waitcnt vmcnt(0)
340 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
341 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
342 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
343 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
346 ; GCN-NOT: v_readlane_b32 s32
347 ; GCN: v_readlane_b32
348 ; GCN-NOT: v_readlane_b32 s32
350 ; GCN-NOT: s_sub_u32 s32, s32, 0x800
352 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
353 ; GCN: v_readlane_b32 s34, v
355 ; GCN-NEXT: s_setpc_b64
356 define void @call_void_func_byval_struct_align8_func() #0 {
358 %arg0 = alloca %struct.ByValStruct, align 8, addrspace(5)
359 %arg1 = alloca %struct.ByValStruct, align 8, addrspace(5)
360 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
361 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
362 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
363 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
364 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
365 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 8
366 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
367 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 8
368 call void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 8 %arg1)
369 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
370 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
374 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
375 define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
377 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
378 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
379 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
380 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
381 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
382 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
383 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
384 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
385 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
386 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
387 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
388 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
389 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
393 declare hidden void @external_void_func_void() #0
395 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3
396 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3
398 attributes #0 = { nounwind }
399 attributes #1 = { noinline norecurse nounwind }
400 attributes #2 = { nounwind norecurse "frame-pointer"="all" }