1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
5 ; GCN-LABEL: {{^}}i1_func_void:
6 ; GCN: buffer_load_ubyte v0, off
8 ; GCN-NEXT: s_setpc_b64
9 define i1 @i1_func_void() #0 {
10 %val = load i1, i1 addrspace(1)* undef
15 ; GCN-LABEL: {{^}}i1_zeroext_func_void:
16 ; GCN: buffer_load_ubyte v0, off
17 ; GCN-NEXT: s_waitcnt vmcnt(0)
18 ; GCN-NEXT: s_setpc_b64
19 define zeroext i1 @i1_zeroext_func_void() #0 {
20 %val = load i1, i1 addrspace(1)* undef
24 ; GCN-LABEL: {{^}}i1_signext_func_void:
25 ; GCN: buffer_load_ubyte v0, off
26 ; GCN-NEXT: s_waitcnt vmcnt(0)
27 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}}
28 ; GCN-NEXT: s_setpc_b64
29 define signext i1 @i1_signext_func_void() #0 {
30 %val = load i1, i1 addrspace(1)* undef
34 ; GCN-LABEL: {{^}}i8_func_void:
35 ; GCN: buffer_load_ubyte v0, off
36 ; GCN-NEXT: s_waitcnt vmcnt(0)
37 ; GCN-NEXT: s_setpc_b64
38 define i8 @i8_func_void() #0 {
39 %val = load i8, i8 addrspace(1)* undef
43 ; GCN-LABEL: {{^}}i8_zeroext_func_void:
44 ; GCN: buffer_load_ubyte v0, off
45 ; GCN-NEXT: s_waitcnt vmcnt(0)
46 ; GCN-NEXT: s_setpc_b64
47 define zeroext i8 @i8_zeroext_func_void() #0 {
48 %val = load i8, i8 addrspace(1)* undef
52 ; GCN-LABEL: {{^}}i8_signext_func_void:
53 ; GCN: buffer_load_sbyte v0, off
54 ; GCN-NEXT: s_waitcnt vmcnt(0)
55 ; GCN-NEXT: s_setpc_b64
56 define signext i8 @i8_signext_func_void() #0 {
57 %val = load i8, i8 addrspace(1)* undef
61 ; GCN-LABEL: {{^}}i16_func_void:
62 ; GCN: buffer_load_ushort v0, off
63 ; GCN-NEXT: s_waitcnt vmcnt(0)
64 ; GCN-NEXT: s_setpc_b64
65 define i16 @i16_func_void() #0 {
66 %val = load i16, i16 addrspace(1)* undef
70 ; GCN-LABEL: {{^}}i16_zeroext_func_void:
71 ; GCN: buffer_load_ushort v0, off
72 ; GCN-NEXT: s_waitcnt vmcnt(0)
73 ; GCN-NEXT: s_setpc_b64
74 define zeroext i16 @i16_zeroext_func_void() #0 {
75 %val = load i16, i16 addrspace(1)* undef
79 ; GCN-LABEL: {{^}}i16_signext_func_void:
80 ; GCN: buffer_load_sshort v0, off
81 ; GCN-NEXT: s_waitcnt vmcnt(0)
82 ; GCN-NEXT: s_setpc_b64
83 define signext i16 @i16_signext_func_void() #0 {
84 %val = load i16, i16 addrspace(1)* undef
88 ; GCN-LABEL: {{^}}i32_func_void:
89 ; GCN: buffer_load_dword v0, off
90 ; GCN-NEXT: s_waitcnt vmcnt(0)
91 ; GCN-NEXT: s_setpc_b64
92 define i32 @i32_func_void() #0 {
93 %val = load i32, i32 addrspace(1)* undef
97 ; GCN-LABEL: {{^}}i48_func_void:
98 ; GCN: buffer_load_dword v0, off
99 ; GCN-NEXT: buffer_load_ushort v1, off
100 ; GCN-NEXT: s_waitcnt vmcnt(0)
101 ; GCN-NEXT: s_setpc_b64
102 define i48 @i48_func_void() #0 {
103 %val = load i48, i48 addrspace(1)* undef, align 8
107 ; GCN-LABEL: {{^}}i64_func_void:
108 ; GCN: buffer_load_dwordx2 v[0:1], off
109 ; GCN-NEXT: s_waitcnt vmcnt(0)
110 ; GCN-NEXT: s_setpc_b64
111 define i64 @i64_func_void() #0 {
112 %val = load i64, i64 addrspace(1)* undef
116 ; GCN-LABEL: {{^}}i65_func_void:
117 ; GCN-DAG: buffer_load_dwordx2 v[0:1], off
118 ; GCN-DAG: buffer_load_ubyte v2, off
119 ; GCN: s_waitcnt vmcnt(0)
120 ; GCN-NEXT: s_setpc_b64
121 define i65 @i65_func_void() #0 {
122 %val = load i65, i65 addrspace(1)* undef
126 ; GCN-LABEL: {{^}}f32_func_void:
127 ; GCN: buffer_load_dword v0, off, s[4:7], 0
128 ; GCN-NEXT: s_waitcnt vmcnt(0)
129 ; GCN-NEXT: s_setpc_b64
130 define float @f32_func_void() #0 {
131 %val = load float, float addrspace(1)* undef
135 ; GCN-LABEL: {{^}}f64_func_void:
136 ; GCN: buffer_load_dwordx2 v[0:1], off
137 ; GCN-NEXT: s_waitcnt vmcnt(0)
138 ; GCN-NEXT: s_setpc_b64
139 define double @f64_func_void() #0 {
140 %val = load double, double addrspace(1)* undef
144 ; GCN-LABEL: {{^}}v2f64_func_void:
145 ; GCN: buffer_load_dwordx4 v[0:3], off
146 ; GCN-NEXT: s_waitcnt vmcnt(0)
147 ; GCN-NEXT: s_setpc_b64
148 define <2 x double> @v2f64_func_void() #0 {
149 %val = load <2 x double>, <2 x double> addrspace(1)* undef
150 ret <2 x double> %val
153 ; GCN-LABEL: {{^}}v2i32_func_void:
154 ; GCN: buffer_load_dwordx2 v[0:1], off
155 ; GCN-NEXT: s_waitcnt vmcnt(0)
156 ; GCN-NEXT: s_setpc_b64
157 define <2 x i32> @v2i32_func_void() #0 {
158 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
162 ; GCN-LABEL: {{^}}v3i32_func_void:
163 ; GCN: buffer_load_dwordx3 v[0:2], off
164 ; GCN-NEXT: s_waitcnt vmcnt(0)
165 ; GCN-NEXT: s_setpc_b64
166 define <3 x i32> @v3i32_func_void() #0 {
167 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef
171 ; GCN-LABEL: {{^}}v4i32_func_void:
172 ; GCN: buffer_load_dwordx4 v[0:3], off
173 ; GCN-NEXT: s_waitcnt vmcnt(0)
174 ; GCN-NEXT: s_setpc_b64
175 define <4 x i32> @v4i32_func_void() #0 {
176 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
180 ; GCN-LABEL: {{^}}v5i32_func_void:
181 ; GCN-DAG: buffer_load_dword v4, off
182 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
183 ; GCN: s_waitcnt vmcnt(0)
184 ; GCN-NEXT: s_setpc_b64
185 define <5 x i32> @v5i32_func_void() #0 {
186 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef
190 ; GCN-LABEL: {{^}}v8i32_func_void:
191 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
192 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
193 ; GCN: s_waitcnt vmcnt(0)
194 ; GCN-NEXT: s_setpc_b64
195 define <8 x i32> @v8i32_func_void() #0 {
196 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
197 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
201 ; GCN-LABEL: {{^}}v16i32_func_void:
202 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
203 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
204 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
205 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
206 ; GCN: s_waitcnt vmcnt(0)
207 ; GCN-NEXT: s_setpc_b64
208 define <16 x i32> @v16i32_func_void() #0 {
209 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
210 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
214 ; GCN-LABEL: {{^}}v32i32_func_void:
215 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
216 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
217 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
218 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
219 ; GCN-DAG: buffer_load_dwordx4 v[16:19], off
220 ; GCN-DAG: buffer_load_dwordx4 v[20:23], off
221 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
222 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
223 ; GCN: s_waitcnt vmcnt(0)
224 ; GCN-NEXT: s_setpc_b64
225 define <32 x i32> @v32i32_func_void() #0 {
226 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
227 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
231 ; GCN-LABEL: {{^}}v2i64_func_void:
232 ; GCN: buffer_load_dwordx4 v[0:3], off
233 ; GCN-NEXT: s_waitcnt vmcnt(0)
234 ; GCN-NEXT: s_setpc_b64
235 define <2 x i64> @v2i64_func_void() #0 {
236 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef
240 ; GCN-LABEL: {{^}}v3i64_func_void:
241 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
242 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
243 ; GCN: s_waitcnt vmcnt(0)
244 ; GCN-NEXT: s_setpc_b64
245 define <3 x i64> @v3i64_func_void() #0 {
246 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
247 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
251 ; GCN-LABEL: {{^}}v4i64_func_void:
252 ; GCN: buffer_load_dwordx4 v[0:3], off
253 ; GCN: buffer_load_dwordx4 v[4:7], off
254 ; GCN-NEXT: s_waitcnt vmcnt(0)
255 ; GCN-NEXT: s_setpc_b64
256 define <4 x i64> @v4i64_func_void() #0 {
257 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
258 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
262 ; GCN-LABEL: {{^}}v5i64_func_void:
263 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
264 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
265 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
266 ; GCN: s_waitcnt vmcnt(0)
267 ; GCN-NEXT: s_setpc_b64
268 define <5 x i64> @v5i64_func_void() #0 {
269 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
270 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
274 ; GCN-LABEL: {{^}}v8i64_func_void:
275 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
276 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
277 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
278 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
279 ; GCN: s_waitcnt vmcnt(0)
280 ; GCN-NEXT: s_setpc_b64
281 define <8 x i64> @v8i64_func_void() #0 {
282 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
283 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
287 ; GCN-LABEL: {{^}}v16i64_func_void:
288 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
289 ; GCN-DAG: buffer_load_dwordx4 v[4:7], off
290 ; GCN-DAG: buffer_load_dwordx4 v[8:11], off
291 ; GCN-DAG: buffer_load_dwordx4 v[12:15], off
292 ; GCN-DAG: buffer_load_dwordx4 v[16:19], off
293 ; GCN-DAG: buffer_load_dwordx4 v[20:23], off
294 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
295 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
296 ; GCN: s_waitcnt vmcnt(0)
297 ; GCN-NEXT: s_setpc_b64
298 define <16 x i64> @v16i64_func_void() #0 {
299 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
300 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
304 ; GCN-LABEL: {{^}}v2i16_func_void:
305 ; GFX9: buffer_load_dword v0, off
306 ; GFX9-NEXT: s_waitcnt vmcnt(0)
307 ; GFX9-NEXT: s_setpc_b64
308 define <2 x i16> @v2i16_func_void() #0 {
309 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
313 ; GCN-LABEL: {{^}}v3i16_func_void:
314 ; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off
315 ; GFX9-NEXT: s_waitcnt vmcnt(0)
316 ; GFX9-NEXT: s_setpc_b64
317 define <3 x i16> @v3i16_func_void() #0 {
318 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
322 ; GCN-LABEL: {{^}}v4i16_func_void:
323 ; GFX9: buffer_load_dwordx2 v[0:1], off
324 ; GFX9-NEXT: s_waitcnt vmcnt(0)
325 ; GFX9-NEXT: s_setpc_b64
326 define <4 x i16> @v4i16_func_void() #0 {
327 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
331 ; GCN-LABEL: {{^}}v4f16_func_void:
332 ; GFX9: buffer_load_dwordx2 v[0:1], off
333 ; GFX9-NEXT: s_waitcnt vmcnt(0)
334 ; GFX9-NEXT: s_setpc_b64
335 define <4 x half> @v4f16_func_void() #0 {
336 %val = load <4 x half>, <4 x half> addrspace(1)* undef
340 ; FIXME: Mixing buffer and global
341 ; FIXME: Should not scalarize
342 ; GCN-LABEL: {{^}}v5i16_func_void:
343 ; GFX9: buffer_load_dwordx2 v[0:1]
344 ; GFX9-NEXT: global_load_short_d16 v2
345 ; GFX9-NEXT: s_waitcnt
346 ; GFX9-NEXT: s_setpc_b64
347 define <5 x i16> @v5i16_func_void() #0 {
348 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
349 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
353 ; GCN-LABEL: {{^}}v8i16_func_void:
354 ; GFX9-DAG: buffer_load_dwordx4 v[0:3], off
355 ; GFX9: s_waitcnt vmcnt(0)
356 ; GFX9-NEXT: s_setpc_b64
357 define <8 x i16> @v8i16_func_void() #0 {
358 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
359 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
363 ; GCN-LABEL: {{^}}v16i16_func_void:
364 ; GFX9: buffer_load_dwordx4 v[0:3], off
365 ; GFX9: buffer_load_dwordx4 v[4:7], off
366 ; GFX9: s_waitcnt vmcnt(0)
367 ; GFX9-NEXT: s_setpc_b64
368 define <16 x i16> @v16i16_func_void() #0 {
369 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
370 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
375 ; GCN-LABEL: {{^}}v16i8_func_void:
380 define <16 x i8> @v16i8_func_void() #0 {
381 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
382 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
387 ; GCN-LABEL: {{^}}v4i8_func_void:
388 ; GCN: buffer_load_dword v0
389 ; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0
390 ; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0
391 ; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0
393 define <4 x i8> @v4i8_func_void() #0 {
394 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
395 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
399 ; GCN-LABEL: {{^}}struct_i8_i32_func_void:
400 ; GCN-DAG: buffer_load_dword v1
401 ; GCN-DAG: buffer_load_ubyte v0
402 ; GCN: s_waitcnt vmcnt(0)
403 ; GCN-NEXT: s_setpc_b64
404 define {i8, i32} @struct_i8_i32_func_void() #0 {
405 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
409 ; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32:
410 ; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]]
411 ; GCN: buffer_load_dword [[VAL1:v[0-9]+]]
412 ; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s33 offen{{$}}
413 ; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s33 offen offset:4{{$}}
414 define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 {
415 %val0 = load volatile i8, i8 addrspace(1)* undef
416 %val1 = load volatile i32, i32 addrspace(1)* undef
417 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
418 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
419 store i8 %val0, i8 addrspace(5)* %gep0
420 store i32 %val1, i32 addrspace(5)* %gep1
424 ; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
425 ; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
426 ; AssertZext inserted. Not using it introduces the spills.
428 ; GCN-LABEL: {{^}}v33i32_func_void:
429 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
430 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}}
431 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}}
432 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}}
433 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}}
434 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}}
435 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}}
436 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}}
437 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}}
438 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}}
439 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}}
440 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}}
441 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}}
442 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}}
443 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}}
444 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}}
445 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}}
446 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}}
447 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}}
448 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}}
449 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}}
450 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}}
451 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}}
452 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}}
453 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}}
454 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}}
455 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}}
456 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}}
457 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}}
458 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}}
459 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}}
460 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}}
461 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
462 ; GFX9: s_waitcnt vmcnt(0)
463 ; GFX9-NEXT: s_setpc_b64
464 define <33 x i32> @v33i32_func_void() #0 {
465 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
466 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
470 ; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
471 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
472 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}}
473 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}}
474 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}}
475 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}}
476 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}}
477 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}}
478 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}}
479 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}}
480 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}}
481 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}}
482 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}}
483 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}}
484 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}}
485 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}}
486 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}}
487 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}}
488 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}}
489 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}}
490 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}}
491 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}}
492 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}}
493 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}}
494 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}}
495 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}}
496 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}}
497 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}}
498 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}}
499 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}}
500 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}}
501 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}}
502 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}}
503 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
504 ; GFX9: s_waitcnt vmcnt(0)
505 ; GFX9-NEXT: s_setpc_b64
506 define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
507 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
508 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
509 ret { <32 x i32>, i32 }%val
512 ; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
513 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
514 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
515 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:132{{$}}
516 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:136{{$}}
517 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:140{{$}}
518 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:144{{$}}
519 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:148{{$}}
520 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:152{{$}}
521 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:156{{$}}
522 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:160{{$}}
523 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:164{{$}}
524 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:168{{$}}
525 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:172{{$}}
526 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:176{{$}}
527 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:180{{$}}
528 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:184{{$}}
529 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:188{{$}}
530 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:192{{$}}
531 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:196{{$}}
532 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:200{{$}}
533 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:204{{$}}
534 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:208{{$}}
535 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:212{{$}}
536 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:216{{$}}
537 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:220{{$}}
538 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:224{{$}}
539 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:228{{$}}
540 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:232{{$}}
541 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:236{{$}}
542 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:240{{$}}
543 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:244{{$}}
544 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:248{{$}}
545 ; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:252{{$}}
546 ; GFX9: s_waitcnt vmcnt(0)
547 ; GFX9-NEXT: s_setpc_b64
548 define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
549 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
550 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
551 ret { i32, <32 x i32> }%val
554 ; Make sure the last struct component is returned in v3, not v4.
555 ; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg:
556 ; GCN: ds_read_b32 v0,
557 ; GCN: ds_read_b32 v1,
558 ; GCN: ds_read_b32 v2,
559 ; GCN: ds_read_b32 v3,
560 define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
561 %load0 = load volatile i32, i32 addrspace(3)* undef
562 %load1 = load volatile i32, i32 addrspace(3)* undef
563 %load2 = load volatile i32, i32 addrspace(3)* undef
564 %load3 = load volatile i32, i32 addrspace(3)* undef
566 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
567 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
568 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
569 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
570 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
571 ret { <3 x i32>, i32 } %insert.4
574 ; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg:
575 ; GCN: ds_read_b32 v0,
576 ; GCN: ds_read_b32 v1,
577 ; GCN: ds_read_b32 v2,
578 ; GCN: ds_read_b32 v3,
579 define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
580 %load0 = load volatile float, float addrspace(3)* undef
581 %load1 = load volatile float, float addrspace(3)* undef
582 %load2 = load volatile float, float addrspace(3)* undef
583 %load3 = load volatile i32, i32 addrspace(3)* undef
585 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
586 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
587 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
588 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
589 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
590 ret { <3 x float>, i32 } %insert.4
593 ; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits:
594 ; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0
595 ; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]]
597 ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0
598 ; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
599 ; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
600 define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 {
601 %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
603 %lshr0 = lshr i32 %arg0.int, 16
604 %lshr1 = lshr i32 %arg0.int, 17
605 %lshr2 = lshr i32 %arg0.int, 18
607 store volatile i32 %lshr0, i32 addrspace(3)* undef
608 store volatile i32 %lshr1, i32 addrspace(3)* undef
609 store volatile i32 %lshr2, i32 addrspace(3)* undef
613 attributes #0 = { nounwind }