1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
4 ; GCN-LABEL: {{^}}use_dispatch_ptr:
5 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
6 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
7 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
8 define void @use_dispatch_ptr() #1 {
9 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
10 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
11 %value = load volatile i32, i32 addrspace(4)* %header_ptr
15 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
16 ; GCN: enable_sgpr_dispatch_ptr = 1
17 ; GCN: s_mov_b64 s[6:7], s[4:5]
18 define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
19 call void @use_dispatch_ptr()
23 ; GCN-LABEL: {{^}}use_queue_ptr:
24 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
25 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
26 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
27 define void @use_queue_ptr() #1 {
28 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
29 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
30 %value = load volatile i32, i32 addrspace(4)* %header_ptr
34 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
35 ; GCN: enable_sgpr_queue_ptr = 1
36 ; GCN: s_mov_b64 s[6:7], s[4:5]
38 define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
39 call void @use_queue_ptr()
43 ; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
44 ; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
45 ; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
46 ; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
47 ; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
48 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
49 ; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
50 define void @use_queue_ptr_addrspacecast() #1 {
51 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
52 store volatile i32 0, i32* %asc
56 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
57 ; CIVI: enable_sgpr_queue_ptr = 1
59 ; CIVI: s_mov_b64 s[6:7], s[4:5]
62 define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
63 call void @use_queue_ptr_addrspacecast()
67 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
68 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
69 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
70 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
71 define void @use_kernarg_segment_ptr() #1 {
72 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
73 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
74 %value = load volatile i32, i32 addrspace(4)* %header_ptr
78 ; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
79 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
80 ; GCN: s_mov_b64 s[6:7], s[4:5]
82 define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
83 call void @use_kernarg_segment_ptr()
87 ; GCN-LABEL: {{^}}use_dispatch_id:
89 define void @use_dispatch_id() #1 {
90 %id = call i64 @llvm.amdgcn.dispatch.id()
91 call void asm sideeffect "; use $0", "s"(i64 %id)
95 ; No kernarg segment so that there is a mov to check. With kernarg
96 ; pointer enabled, it happens to end up in the right place anyway.
98 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
99 ; GCN: enable_sgpr_dispatch_id = 1
101 ; GCN: s_mov_b64 s[6:7], s[4:5]
102 define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
103 call void @use_dispatch_id()
107 ; GCN-LABEL: {{^}}use_workgroup_id_x:
110 define void @use_workgroup_id_x() #1 {
111 %val = call i32 @llvm.amdgcn.workgroup.id.x()
112 call void asm sideeffect "; use $0", "s"(i32 %val)
116 ; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
118 ; GCN: s_mov_b32 s5, s32
119 ; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
122 define void @use_stack_workgroup_id_x() #1 {
123 %alloca = alloca i32, addrspace(5)
124 store volatile i32 0, i32 addrspace(5)* %alloca
125 %val = call i32 @llvm.amdgcn.workgroup.id.x()
126 call void asm sideeffect "; use $0", "s"(i32 %val)
130 ; GCN-LABEL: {{^}}use_workgroup_id_y:
133 define void @use_workgroup_id_y() #1 {
134 %val = call i32 @llvm.amdgcn.workgroup.id.y()
135 call void asm sideeffect "; use $0", "s"(i32 %val)
139 ; GCN-LABEL: {{^}}use_workgroup_id_z:
142 define void @use_workgroup_id_z() #1 {
143 %val = call i32 @llvm.amdgcn.workgroup.id.z()
144 call void asm sideeffect "; use $0", "s"(i32 %val)
148 ; GCN-LABEL: {{^}}use_workgroup_id_xy:
151 define void @use_workgroup_id_xy() #1 {
152 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
153 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
154 call void asm sideeffect "; use $0", "s"(i32 %val0)
155 call void asm sideeffect "; use $0", "s"(i32 %val1)
159 ; GCN-LABEL: {{^}}use_workgroup_id_xyz:
163 define void @use_workgroup_id_xyz() #1 {
164 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
165 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
166 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
167 call void asm sideeffect "; use $0", "s"(i32 %val0)
168 call void asm sideeffect "; use $0", "s"(i32 %val1)
169 call void asm sideeffect "; use $0", "s"(i32 %val2)
173 ; GCN-LABEL: {{^}}use_workgroup_id_xz:
176 define void @use_workgroup_id_xz() #1 {
177 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
178 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
179 call void asm sideeffect "; use $0", "s"(i32 %val0)
180 call void asm sideeffect "; use $0", "s"(i32 %val1)
184 ; GCN-LABEL: {{^}}use_workgroup_id_yz:
187 define void @use_workgroup_id_yz() #1 {
188 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
189 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
190 call void asm sideeffect "; use $0", "s"(i32 %val0)
191 call void asm sideeffect "; use $0", "s"(i32 %val1)
195 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
196 ; GCN: enable_sgpr_workgroup_id_x = 1
197 ; GCN: enable_sgpr_workgroup_id_y = 0
198 ; GCN: enable_sgpr_workgroup_id_z = 0
201 ; GCN: s_mov_b32 s33, s7
203 ; GCN: s_mov_b32 s4, s33
205 ; GCN: s_mov_b32 s32, s33
207 define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
208 call void @use_workgroup_id_x()
212 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
213 ; GCN: enable_sgpr_workgroup_id_x = 1
214 ; GCN: enable_sgpr_workgroup_id_y = 1
215 ; GCN: enable_sgpr_workgroup_id_z = 0
217 ; GCN: s_mov_b32 s33, s8
218 ; GCN-DAG: s_mov_b32 s4, s33
219 ; GCN-DAG: s_mov_b32 s6, s7
220 ; GCN: s_mov_b32 s32, s33
222 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
223 call void @use_workgroup_id_y()
227 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
228 ; GCN: enable_sgpr_workgroup_id_x = 1
229 ; GCN: enable_sgpr_workgroup_id_y = 0
230 ; GCN: enable_sgpr_workgroup_id_z = 1
232 ; GCN: s_mov_b32 s33, s8
233 ; GCN-DAG: s_mov_b32 s4, s33
234 ; GCN-DAG: s_mov_b32 s6, s7
236 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
237 call void @use_workgroup_id_z()
241 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
242 ; GCN: enable_sgpr_workgroup_id_x = 1
243 ; GCN: enable_sgpr_workgroup_id_y = 1
244 ; GCN: enable_sgpr_workgroup_id_z = 0
246 ; GCN: s_mov_b32 s33, s8
249 ; GCN: s_mov_b32 s4, s33
252 ; GCN: s_mov_b32 s32, s33
256 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
257 call void @use_workgroup_id_xy()
261 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
262 ; GCN: enable_sgpr_workgroup_id_x = 1
263 ; GCN: enable_sgpr_workgroup_id_y = 1
264 ; GCN: enable_sgpr_workgroup_id_z = 1
266 ; GCN: s_mov_b32 s33, s9
272 ; GCN: s_mov_b32 s4, s33
278 ; GCN: s_mov_b32 s32, s33
285 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
286 call void @use_workgroup_id_xyz()
290 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
291 ; GCN: enable_sgpr_workgroup_id_x = 1
292 ; GCN: enable_sgpr_workgroup_id_y = 0
293 ; GCN: enable_sgpr_workgroup_id_z = 1
295 ; GCN: s_mov_b32 s33, s8
299 ; GCN: s_mov_b32 s4, s33
303 ; GCN: s_mov_b32 s32, s33
308 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
309 call void @use_workgroup_id_xz()
313 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
314 ; GCN: enable_sgpr_workgroup_id_x = 1
315 ; GCN: enable_sgpr_workgroup_id_y = 1
316 ; GCN: enable_sgpr_workgroup_id_z = 1
318 ; GCN: s_mov_b32 s33, s9
319 ; GCN: s_mov_b32 s6, s7
320 ; GCN: s_mov_b32 s4, s33
321 ; GCN: s_mov_b32 s7, s8
322 ; GCN: s_mov_b32 s32, s33
324 define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
325 call void @use_workgroup_id_yz()
329 ; Argument is in right place already
330 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
332 define void @func_indirect_use_workgroup_id_x() #1 {
333 call void @use_workgroup_id_x()
337 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
339 define void @func_indirect_use_workgroup_id_y() #1 {
340 call void @use_workgroup_id_y()
344 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
346 define void @func_indirect_use_workgroup_id_z() #1 {
347 call void @use_workgroup_id_z()
351 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
352 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
354 define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
355 %val = call i32 @llvm.amdgcn.workgroup.id.x()
356 store volatile i32 %arg0, i32 addrspace(1)* undef
357 call void asm sideeffect "; use $0", "s"(i32 %val)
361 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
362 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
364 define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
365 %val = call i32 @llvm.amdgcn.workgroup.id.y()
366 store volatile i32 %arg0, i32 addrspace(1)* undef
367 call void asm sideeffect "; use $0", "s"(i32 %val)
371 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
372 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
374 define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
375 %val = call i32 @llvm.amdgcn.workgroup.id.z()
376 store volatile i32 %arg0, i32 addrspace(1)* undef
377 call void asm sideeffect "; use $0", "s"(i32 %val)
381 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
382 ; GCN: enable_sgpr_workgroup_id_x = 1
383 ; GCN: enable_sgpr_workgroup_id_y = 0
384 ; GCN: enable_sgpr_workgroup_id_z = 0
387 ; GCN-DAG: s_mov_b32 s33, s7
388 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
389 ; GCN-DAG: s_mov_b32 s4, s33
390 ; GCN-DAG: s_mov_b32 s32, s33
393 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
394 call void @other_arg_use_workgroup_id_x(i32 555)
398 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
399 ; GCN: enable_sgpr_workgroup_id_x = 1
400 ; GCN: enable_sgpr_workgroup_id_y = 1
401 ; GCN: enable_sgpr_workgroup_id_z = 0
403 ; GCN-DAG: s_mov_b32 s33, s8
404 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
405 ; GCN-DAG: s_mov_b32 s4, s33
406 ; GCN-DAG: s_mov_b32 s6, s7
407 ; GCN-DAG: s_mov_b32 s32, s33
409 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
410 call void @other_arg_use_workgroup_id_y(i32 555)
414 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
415 ; GCN: enable_sgpr_workgroup_id_x = 1
416 ; GCN: enable_sgpr_workgroup_id_y = 0
417 ; GCN: enable_sgpr_workgroup_id_z = 1
419 ; GCN: s_mov_b32 s33, s8
420 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
421 ; GCN-DAG: s_mov_b32 s4, s33
422 ; GCN-DAG: s_mov_b32 s6, s7
424 ; GCN: s_mov_b32 s32, s33
426 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
427 call void @other_arg_use_workgroup_id_z(i32 555)
431 ; GCN-LABEL: {{^}}use_every_sgpr_input:
432 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
433 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
434 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
435 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
436 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
437 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
438 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
439 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
440 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
441 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
442 ; GCN: ; use s[12:13]
446 define void @use_every_sgpr_input() #1 {
447 %alloca = alloca i32, align 4, addrspace(5)
448 store volatile i32 0, i32 addrspace(5)* %alloca
450 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
451 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
452 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
454 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
455 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
456 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
458 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
459 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
460 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
462 %val3 = call i64 @llvm.amdgcn.dispatch.id()
463 call void asm sideeffect "; use $0", "s"(i64 %val3)
465 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
466 call void asm sideeffect "; use $0", "s"(i32 %val4)
468 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
469 call void asm sideeffect "; use $0", "s"(i32 %val5)
471 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
472 call void asm sideeffect "; use $0", "s"(i32 %val6)
477 ; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
478 ; GCN: enable_sgpr_workgroup_id_x = 1
479 ; GCN: enable_sgpr_workgroup_id_y = 1
480 ; GCN: enable_sgpr_workgroup_id_z = 1
481 ; GCN: enable_sgpr_workgroup_info = 0
483 ; GCN: enable_sgpr_private_segment_buffer = 1
484 ; GCN: enable_sgpr_dispatch_ptr = 1
485 ; GCN: enable_sgpr_queue_ptr = 1
486 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
487 ; GCN: enable_sgpr_dispatch_id = 1
488 ; GCN: enable_sgpr_flat_scratch_init = 1
490 ; GCN: s_mov_b32 s33, s17
491 ; GCN: s_mov_b64 s[12:13], s[10:11]
492 ; GCN: s_mov_b64 s[10:11], s[8:9]
493 ; GCN: s_mov_b64 s[8:9], s[6:7]
494 ; GCN: s_mov_b64 s[6:7], s[4:5]
495 ; GCN: s_mov_b32 s4, s33
496 ; GCN: s_mov_b32 s32, s33
498 define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
499 call void @use_every_sgpr_input()
503 ; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
516 define void @func_indirect_use_every_sgpr_input() #1 {
517 call void @use_every_sgpr_input()
521 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
522 ; GCN-DAG: s_mov_b32 s6, s14
523 ; GCN-DAG: s_mov_b32 s7, s15
524 ; GCN-DAG: s_mov_b32 s8, s16
526 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
527 %alloca = alloca i32, align 4, addrspace(5)
528 store volatile i32 0, i32 addrspace(5)* %alloca
530 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
531 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
532 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
534 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
535 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
536 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
538 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
539 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
540 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
542 %val3 = call i64 @llvm.amdgcn.dispatch.id()
543 call void asm sideeffect "; use $0", "s"(i64 %val3)
545 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
546 call void asm sideeffect "; use $0", "s"(i32 %val4)
548 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
549 call void asm sideeffect "; use $0", "s"(i32 %val5)
551 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
552 call void asm sideeffect "; use $0", "s"(i32 %val6)
554 call void @use_workgroup_id_xyz()
558 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
559 ; GCN: s_mov_b32 s5, s32
561 ; GCN-DAG: s_add_u32 s32, s32, 0x400
563 ; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
564 ; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
565 ; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
566 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
567 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
568 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
570 ; GCN-DAG: s_mov_b32 s6, s14
571 ; GCN-DAG: s_mov_b32 s7, s15
572 ; GCN-DAG: s_mov_b32 s8, s16
574 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
575 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
576 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
580 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
581 ; GCN-DAG: v_mov_b32_e32 v[[LO1:[0-9]+]], s[[LO_X]]
582 ; GCN-DAG: v_mov_b32_e32 v[[HI1:[0-9]+]], s[[HI_X]]
583 ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO1]]:[[HI1]]{{\]}}
584 ; GCN-DAG: v_mov_b32_e32 v[[LO2:[0-9]+]], s[[LO_Y]]
585 ; GCN-DAG: v_mov_b32_e32 v[[HI2:[0-9]+]], s[[HI_Y]]
586 ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO2]]:[[HI2]]{{\]}}
587 ; GCN-DAG: v_mov_b32_e32 v[[LO3:[0-9]+]], s[[LO_Z]]
588 ; GCN-DAG: v_mov_b32_e32 v[[HI3:[0-9]+]], s[[HI_Z]]
589 ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO3]]:[[HI3]]{{\]}}
591 ; GCN: ; use [[SAVE_X]]
592 ; GCN: ; use [[SAVE_Y]]
593 ; GCN: ; use [[SAVE_Z]]
594 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
595 %alloca = alloca i32, align 4, addrspace(5)
596 call void @use_workgroup_id_xyz()
598 store volatile i32 0, i32 addrspace(5)* %alloca
600 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
601 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
602 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
604 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
605 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
606 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
608 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
609 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
610 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
612 %val3 = call i64 @llvm.amdgcn.dispatch.id()
613 call void asm sideeffect "; use $0", "s"(i64 %val3)
615 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
616 call void asm sideeffect "; use $0", "s"(i32 %val4)
618 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
619 call void asm sideeffect "; use $0", "s"(i32 %val5)
621 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
622 call void asm sideeffect "; use $0", "s"(i32 %val6)
627 declare i32 @llvm.amdgcn.workgroup.id.x() #0
628 declare i32 @llvm.amdgcn.workgroup.id.y() #0
629 declare i32 @llvm.amdgcn.workgroup.id.z() #0
630 declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
631 declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
632 declare i64 @llvm.amdgcn.dispatch.id() #0
633 declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
635 attributes #0 = { nounwind readnone speculatable }
636 attributes #1 = { nounwind noinline }