1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
5 ; GCN-LABEL: {{^}}i1_func_void:
6 ; GCN: buffer_load_ubyte v0, off
8 ; GCN-NEXT: s_setpc_b64
9 define i1 @i1_func_void() #0 {
10 %val = load i1, i1 addrspace(1)* undef
15 ; GCN-LABEL: {{^}}i1_zeroext_func_void:
16 ; GCN: buffer_load_ubyte v0, off
17 ; GCN-NEXT: s_waitcnt vmcnt(0)
18 ; GCN-NEXT: s_setpc_b64
19 define zeroext i1 @i1_zeroext_func_void() #0 {
20 %val = load i1, i1 addrspace(1)* undef
24 ; GCN-LABEL: {{^}}i1_signext_func_void:
25 ; GCN: buffer_load_ubyte v0, off
26 ; GCN-NEXT: s_waitcnt vmcnt(0)
27 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}}
28 ; GCN-NEXT: s_setpc_b64
29 define signext i1 @i1_signext_func_void() #0 {
30 %val = load i1, i1 addrspace(1)* undef
34 ; GCN-LABEL: {{^}}i8_func_void:
35 ; GCN: buffer_load_ubyte v0, off
36 ; GCN-NEXT: s_waitcnt vmcnt(0)
37 ; GCN-NEXT: s_setpc_b64
38 define i8 @i8_func_void() #0 {
39 %val = load i8, i8 addrspace(1)* undef
43 ; GCN-LABEL: {{^}}i8_zeroext_func_void:
44 ; GCN: buffer_load_ubyte v0, off
45 ; GCN-NEXT: s_waitcnt vmcnt(0)
46 ; GCN-NEXT: s_setpc_b64
47 define zeroext i8 @i8_zeroext_func_void() #0 {
48 %val = load i8, i8 addrspace(1)* undef
52 ; GCN-LABEL: {{^}}i8_signext_func_void:
53 ; GCN: buffer_load_sbyte v0, off
54 ; GCN-NEXT: s_waitcnt vmcnt(0)
55 ; GCN-NEXT: s_setpc_b64
56 define signext i8 @i8_signext_func_void() #0 {
57 %val = load i8, i8 addrspace(1)* undef
61 ; GCN-LABEL: {{^}}i16_func_void:
62 ; GCN: buffer_load_ushort v0, off
63 ; GCN-NEXT: s_waitcnt vmcnt(0)
64 ; GCN-NEXT: s_setpc_b64
65 define i16 @i16_func_void() #0 {
66 %val = load i16, i16 addrspace(1)* undef
70 ; GCN-LABEL: {{^}}i16_zeroext_func_void:
71 ; GCN: buffer_load_ushort v0, off
72 ; GCN-NEXT: s_waitcnt vmcnt(0)
73 ; GCN-NEXT: s_setpc_b64
74 define zeroext i16 @i16_zeroext_func_void() #0 {
75 %val = load i16, i16 addrspace(1)* undef
79 ; GCN-LABEL: {{^}}i16_signext_func_void:
80 ; GCN: buffer_load_sshort v0, off
81 ; GCN-NEXT: s_waitcnt vmcnt(0)
82 ; GCN-NEXT: s_setpc_b64
83 define signext i16 @i16_signext_func_void() #0 {
84 %val = load i16, i16 addrspace(1)* undef
88 ; GCN-LABEL: {{^}}i32_func_void:
89 ; GCN: buffer_load_dword v0, off
90 ; GCN-NEXT: s_waitcnt vmcnt(0)
91 ; GCN-NEXT: s_setpc_b64
92 define i32 @i32_func_void() #0 {
93 %val = load i32, i32 addrspace(1)* undef
97 ; GCN-LABEL: {{^}}i48_func_void:
98 ; GCN: buffer_load_dword v0, off
99 ; GCN-NEXT: buffer_load_ushort v1, off
100 ; GCN-NEXT: s_waitcnt vmcnt(0)
101 ; GCN-NEXT: s_setpc_b64
102 define i48 @i48_func_void() #0 {
103 %val = load i48, i48 addrspace(1)* undef, align 8
107 ; GCN-LABEL: {{^}}i48_zeroext_func_void:
108 ; GCN: buffer_load_dword v0, off
109 ; GCN-NEXT: buffer_load_ushort v1, off
110 ; GCN-NEXT: s_waitcnt vmcnt(0)
111 ; GCN-NEXT: s_setpc_b64
112 define zeroext i48 @i48_zeroext_func_void() #0 {
113 %val = load i48, i48 addrspace(1)* undef, align 8
117 ; GCN-LABEL: {{^}}i48_signext_func_void:
118 ; GCN: buffer_load_dword v0, off
119 ; GCN-NEXT: buffer_load_sshort v1, off
120 ; GCN-NEXT: s_waitcnt vmcnt(0)
121 ; GCN-NEXT: s_setpc_b64
122 define signext i48 @i48_signext_func_void() #0 {
123 %val = load i48, i48 addrspace(1)* undef, align 8
127 ; GCN-LABEL: {{^}}i63_func_void:
129 ; GCN-NEXT: s_setpc_b64
130 define i63 @i63_func_void(i63 %val) #0 {
134 ; GCN-LABEL: {{^}}i63_zeroext_func_void:
136 ; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
137 ; GCN-NEXT: s_setpc_b64
138 define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
142 ; GCN-LABEL: {{^}}i63_signext_func_void:
144 ; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
145 ; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
147 ; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
148 ; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
150 ; GCN-NEXT: s_setpc_b64
151 define signext i63 @i63_signext_func_void(i63 %val) #0 {
155 ; GCN-LABEL: {{^}}i64_func_void:
156 ; GCN: buffer_load_dwordx2 v[0:1], off
157 ; GCN-NEXT: s_waitcnt vmcnt(0)
158 ; GCN-NEXT: s_setpc_b64
159 define i64 @i64_func_void() #0 {
160 %val = load i64, i64 addrspace(1)* undef
164 ; GCN-LABEL: {{^}}i65_func_void:
165 ; GCN-DAG: buffer_load_dwordx2 v[0:1], off
166 ; GCN-DAG: buffer_load_ubyte v2, off
167 ; GCN: s_waitcnt vmcnt(0)
168 ; GCN-NEXT: s_setpc_b64
169 define i65 @i65_func_void() #0 {
170 %val = load i65, i65 addrspace(1)* undef
174 ; GCN-LABEL: {{^}}f32_func_void:
175 ; GCN: buffer_load_dword v0, off, s[4:7], 0
176 ; GCN-NEXT: s_waitcnt vmcnt(0)
177 ; GCN-NEXT: s_setpc_b64
178 define float @f32_func_void() #0 {
179 %val = load float, float addrspace(1)* undef
183 ; GCN-LABEL: {{^}}f64_func_void:
184 ; GCN: buffer_load_dwordx2 v[0:1], off
185 ; GCN-NEXT: s_waitcnt vmcnt(0)
186 ; GCN-NEXT: s_setpc_b64
187 define double @f64_func_void() #0 {
188 %val = load double, double addrspace(1)* undef
192 ; GCN-LABEL: {{^}}v2f64_func_void:
193 ; GCN: buffer_load_dwordx4 v[0:3], off
194 ; GCN-NEXT: s_waitcnt vmcnt(0)
195 ; GCN-NEXT: s_setpc_b64
196 define <2 x double> @v2f64_func_void() #0 {
197 %val = load <2 x double>, <2 x double> addrspace(1)* undef
198 ret <2 x double> %val
201 ; GCN-LABEL: {{^}}v2i32_func_void:
202 ; GCN: buffer_load_dwordx2 v[0:1], off
203 ; GCN-NEXT: s_waitcnt vmcnt(0)
204 ; GCN-NEXT: s_setpc_b64
205 define <2 x i32> @v2i32_func_void() #0 {
206 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
210 ; GCN-LABEL: {{^}}v3i32_func_void:
211 ; GCN: buffer_load_dwordx3 v[0:2], off
212 ; GCN-NEXT: s_waitcnt vmcnt(0)
213 ; GCN-NEXT: s_setpc_b64
214 define <3 x i32> @v3i32_func_void() #0 {
215 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef
219 ; GCN-LABEL: {{^}}v4i32_func_void:
220 ; GCN: buffer_load_dwordx4 v[0:3], off
221 ; GCN-NEXT: s_waitcnt vmcnt(0)
222 ; GCN-NEXT: s_setpc_b64
223 define <4 x i32> @v4i32_func_void() #0 {
224 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
228 ; GCN-LABEL: {{^}}v5i32_func_void:
229 ; GCN-DAG: buffer_load_dword v4, off
230 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
231 ; GCN: s_waitcnt vmcnt(0)
232 ; GCN-NEXT: s_setpc_b64
233 define <5 x i32> @v5i32_func_void() #0 {
234 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef
238 ; GCN-LABEL: {{^}}v8i32_func_void:
239 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
240 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
241 ; GCN: s_waitcnt vmcnt(0)
242 ; GCN-NEXT: s_setpc_b64
243 define <8 x i32> @v8i32_func_void() #0 {
244 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
245 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
249 ; GCN-LABEL: {{^}}v16i32_func_void:
250 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
251 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
252 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
253 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
254 ; GCN: s_waitcnt vmcnt(0)
255 ; GCN-NEXT: s_setpc_b64
256 define <16 x i32> @v16i32_func_void() #0 {
257 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
258 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
262 ; GCN-LABEL: {{^}}v32i32_func_void:
263 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
264 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
265 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
266 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
267 ; GCN-DAG: buffer_load_dwordx4 v[16:19], off
268 ; GCN-DAG: buffer_load_dwordx4 v[20:23], off
269 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
270 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
271 ; GCN: s_waitcnt vmcnt(0)
272 ; GCN-NEXT: s_setpc_b64
273 define <32 x i32> @v32i32_func_void() #0 {
274 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
275 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
279 ; GCN-LABEL: {{^}}v2i64_func_void:
280 ; GCN: buffer_load_dwordx4 v[0:3], off
281 ; GCN-NEXT: s_waitcnt vmcnt(0)
282 ; GCN-NEXT: s_setpc_b64
283 define <2 x i64> @v2i64_func_void() #0 {
284 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef
288 ; GCN-LABEL: {{^}}v3i64_func_void:
289 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
290 ; GCN-DAG: buffer_load_dwordx2 v[4:5], off
291 ; GCN: s_waitcnt vmcnt(0)
292 ; GCN-NEXT: s_setpc_b64
293 define <3 x i64> @v3i64_func_void() #0 {
294 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
295 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
299 ; GCN-LABEL: {{^}}v4i64_func_void:
300 ; GCN: buffer_load_dwordx4 v[0:3], off
301 ; GCN: buffer_load_dwordx4 v[4:7], off
302 ; GCN-NEXT: s_waitcnt vmcnt(0)
303 ; GCN-NEXT: s_setpc_b64
304 define <4 x i64> @v4i64_func_void() #0 {
305 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
306 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
310 ; GCN-LABEL: {{^}}v5i64_func_void:
311 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
312 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
313 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
314 ; GCN: s_waitcnt vmcnt(0)
315 ; GCN-NEXT: s_setpc_b64
316 define <5 x i64> @v5i64_func_void() #0 {
317 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
318 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
322 ; GCN-LABEL: {{^}}v8i64_func_void:
323 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
324 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
325 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
326 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
327 ; GCN: s_waitcnt vmcnt(0)
328 ; GCN-NEXT: s_setpc_b64
329 define <8 x i64> @v8i64_func_void() #0 {
330 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
331 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
335 ; GCN-LABEL: {{^}}v16i64_func_void:
336 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
337 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
338 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
339 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
340 ; GCN-DAG: buffer_load_dwordx4 v[16:19], off
341 ; GCN-DAG: buffer_load_dwordx4 v[20:23], off
342 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
343 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
344 ; GCN: s_waitcnt vmcnt(0)
345 ; GCN-NEXT: s_setpc_b64
346 define <16 x i64> @v16i64_func_void() #0 {
347 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
348 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
352 ; GCN-LABEL: {{^}}v2i16_func_void:
353 ; GFX9: buffer_load_dword v0, off
354 ; GFX9-NEXT: s_waitcnt vmcnt(0)
355 ; GFX9-NEXT: s_setpc_b64
356 define <2 x i16> @v2i16_func_void() #0 {
357 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
361 ; GCN-LABEL: {{^}}v3i16_func_void:
362 ; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off
363 ; GFX9-NEXT: s_waitcnt vmcnt(0)
364 ; GFX9-NEXT: s_setpc_b64
365 define <3 x i16> @v3i16_func_void() #0 {
366 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
370 ; GCN-LABEL: {{^}}v4i16_func_void:
371 ; GFX9: buffer_load_dwordx2 v[0:1], off
372 ; GFX9-NEXT: s_waitcnt vmcnt(0)
373 ; GFX9-NEXT: s_setpc_b64
374 define <4 x i16> @v4i16_func_void() #0 {
375 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
379 ; GCN-LABEL: {{^}}v4f16_func_void:
380 ; GFX9: buffer_load_dwordx2 v[0:1], off
381 ; GFX9-NEXT: s_waitcnt vmcnt(0)
382 ; GFX9-NEXT: s_setpc_b64
383 define <4 x half> @v4f16_func_void() #0 {
384 %val = load <4 x half>, <4 x half> addrspace(1)* undef
388 ; FIXME: Mixing buffer and global
389 ; FIXME: Should not scalarize
390 ; GCN-LABEL: {{^}}v5i16_func_void:
391 ; GFX9: buffer_load_dwordx2 v[0:1]
393 ; GFX9-NEXT: global_load_short_d16 v2
394 ; GFX9-NEXT: s_waitcnt
395 ; GFX9-NEXT: s_setpc_b64
396 define <5 x i16> @v5i16_func_void() #0 {
397 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
398 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
402 ; GCN-LABEL: {{^}}v8i16_func_void:
403 ; GFX9-DAG: buffer_load_dwordx4 v[0:3], off
404 ; GFX9: s_waitcnt vmcnt(0)
405 ; GFX9-NEXT: s_setpc_b64
406 define <8 x i16> @v8i16_func_void() #0 {
407 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
408 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
412 ; GCN-LABEL: {{^}}v16i16_func_void:
413 ; GFX9: buffer_load_dwordx4 v[0:3], off
414 ; GFX9: buffer_load_dwordx4 v[4:7], off
415 ; GFX9: s_waitcnt vmcnt(0)
416 ; GFX9-NEXT: s_setpc_b64
417 define <16 x i16> @v16i16_func_void() #0 {
418 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
419 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
424 ; GCN-LABEL: {{^}}v16i8_func_void:
429 define <16 x i8> @v16i8_func_void() #0 {
430 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
431 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
436 ; GCN-LABEL: {{^}}v4i8_func_void:
437 ; GCN: buffer_load_dword v0
438 ; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0
439 ; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0
440 ; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0
442 define <4 x i8> @v4i8_func_void() #0 {
443 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
444 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
448 ; GCN-LABEL: {{^}}struct_i8_i32_func_void:
449 ; GCN-DAG: buffer_load_dword v1
450 ; GCN-DAG: buffer_load_ubyte v0
451 ; GCN: s_waitcnt vmcnt(0)
452 ; GCN-NEXT: s_setpc_b64
453 define {i8, i32} @struct_i8_i32_func_void() #0 {
454 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
458 ; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32:
459 ; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]]
460 ; GCN: buffer_load_dword [[VAL1:v[0-9]+]]
461 ; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}}
462 ; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}}
463 define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 {
464 %val0 = load volatile i8, i8 addrspace(1)* undef
465 %val1 = load volatile i32, i32 addrspace(1)* undef
466 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
467 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
468 store i8 %val0, i8 addrspace(5)* %gep0
469 store i32 %val1, i32 addrspace(5)* %gep1
473 ; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
474 ; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
475 ; AssertZext inserted. Not using it introduces the spills.
477 ; GCN-LABEL: {{^}}v33i32_func_void:
478 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
479 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}}
480 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}}
481 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}}
482 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}}
483 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}}
484 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}}
485 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}}
486 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}}
487 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}}
488 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}}
489 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}}
490 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}}
491 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}}
492 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}}
493 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}}
494 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}}
495 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}}
496 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}}
497 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}}
498 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}}
499 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}}
500 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}}
501 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}}
502 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}}
503 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}}
504 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}}
505 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}}
506 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}}
507 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}}
508 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}}
509 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}}
510 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
511 ; GFX9: s_waitcnt vmcnt(0)
512 ; GFX9-NEXT: s_setpc_b64
513 define <33 x i32> @v33i32_func_void() #0 {
514 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
515 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
519 ; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
520 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
521 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}}
522 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}}
523 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}}
524 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}}
525 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}}
526 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}}
527 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}}
528 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}}
529 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}}
530 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}}
531 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}}
532 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}}
533 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}}
534 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}}
535 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}}
536 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}}
537 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}}
538 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}}
539 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}}
540 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}}
541 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}}
542 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}}
543 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}}
544 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}}
545 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}}
546 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}}
547 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}}
548 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}}
549 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}}
550 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}}
551 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}}
552 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
553 ; GFX9: s_waitcnt vmcnt(0)
554 ; GFX9-NEXT: s_setpc_b64
555 define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
556 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
557 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
558 ret { <32 x i32>, i32 }%val
561 ; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
562 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
563 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
564 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:132{{$}}
565 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:136{{$}}
566 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:140{{$}}
567 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:144{{$}}
568 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:148{{$}}
569 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:152{{$}}
570 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:156{{$}}
571 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:160{{$}}
572 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:164{{$}}
573 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:168{{$}}
574 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:172{{$}}
575 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:176{{$}}
576 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:180{{$}}
577 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:184{{$}}
578 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:188{{$}}
579 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:192{{$}}
580 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:196{{$}}
581 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:200{{$}}
582 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:204{{$}}
583 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:208{{$}}
584 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:212{{$}}
585 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:216{{$}}
586 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:220{{$}}
587 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:224{{$}}
588 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:228{{$}}
589 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:232{{$}}
590 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:236{{$}}
591 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:240{{$}}
592 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:244{{$}}
593 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:248{{$}}
594 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:252{{$}}
595 ; GFX9: s_waitcnt vmcnt(0)
596 ; GFX9-NEXT: s_setpc_b64
597 define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
598 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
599 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
600 ret { i32, <32 x i32> }%val
603 ; Make sure the last struct component is returned in v3, not v4.
604 ; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg:
605 ; GCN: ds_read_b32 v0,
606 ; GCN: ds_read_b32 v1,
607 ; GCN: ds_read_b32 v2,
608 ; GCN: ds_read_b32 v3,
609 define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
610 %load0 = load volatile i32, i32 addrspace(3)* undef
611 %load1 = load volatile i32, i32 addrspace(3)* undef
612 %load2 = load volatile i32, i32 addrspace(3)* undef
613 %load3 = load volatile i32, i32 addrspace(3)* undef
615 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
616 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
617 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
618 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
619 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
620 ret { <3 x i32>, i32 } %insert.4
623 ; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg:
624 ; GCN: ds_read_b32 v0,
625 ; GCN: ds_read_b32 v1,
626 ; GCN: ds_read_b32 v2,
627 ; GCN: ds_read_b32 v3,
628 define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
629 %load0 = load volatile float, float addrspace(3)* undef
630 %load1 = load volatile float, float addrspace(3)* undef
631 %load2 = load volatile float, float addrspace(3)* undef
632 %load3 = load volatile i32, i32 addrspace(3)* undef
634 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
635 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
636 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
637 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
638 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
639 ret { <3 x float>, i32 } %insert.4
642 ; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits:
643 ; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0
644 ; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]]
646 ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0
647 ; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
648 ; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
649 define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 {
650 %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
652 %lshr0 = lshr i32 %arg0.int, 16
653 %lshr1 = lshr i32 %arg0.int, 17
654 %lshr2 = lshr i32 %arg0.int, 18
656 store volatile i32 %lshr0, i32 addrspace(3)* undef
657 store volatile i32 %lshr1, i32 addrspace(3)* undef
658 store volatile i32 %lshr2, i32 addrspace(3)* undef
662 attributes #0 = { nounwind }