1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
4 ; GCN-LABEL: {{^}}use_dispatch_ptr:
5 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s4
6 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s5
7 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
8 define hidden void @use_dispatch_ptr() #1 {
9 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
10 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
11 %value = load volatile i32, i32 addrspace(4)* %header_ptr
15 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
16 ; GCN: enable_sgpr_dispatch_ptr = 1
20 define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
21 call void @use_dispatch_ptr()
25 ; GCN-LABEL: {{^}}use_queue_ptr:
26 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s4
27 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s5
28 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
29 define hidden void @use_queue_ptr() #1 {
30 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
31 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
32 %value = load volatile i32, i32 addrspace(4)* %header_ptr
36 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
37 ; GCN: enable_sgpr_queue_ptr = 1
41 define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
42 call void @use_queue_ptr()
46 ; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
47 ; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
48 ; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
49 ; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
50 ; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
51 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
52 ; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
53 define hidden void @use_queue_ptr_addrspacecast() #1 {
54 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
55 store volatile i32 0, i32* %asc
59 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
60 ; CIVI: enable_sgpr_queue_ptr = 1
64 define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
65 call void @use_queue_ptr_addrspacecast()
69 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
70 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s4
71 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s5
72 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
73 define hidden void @use_kernarg_segment_ptr() #1 {
74 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
75 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
76 %value = load volatile i32, i32 addrspace(4)* %header_ptr
80 ; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
81 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
86 define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
87 call void @use_kernarg_segment_ptr()
91 ; GCN-LABEL: {{^}}use_dispatch_id:
93 define hidden void @use_dispatch_id() #1 {
94 %id = call i64 @llvm.amdgcn.dispatch.id()
95 call void asm sideeffect "; use $0", "s"(i64 %id)
99 ; No kernarg segment so that there is a mov to check. With kernarg
100 ; pointer enabled, it happens to end up in the right place anyway.
102 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
103 ; GCN: enable_sgpr_dispatch_id = 1
107 define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
108 call void @use_dispatch_id()
112 ; GCN-LABEL: {{^}}use_workgroup_id_x:
115 define hidden void @use_workgroup_id_x() #1 {
116 %val = call i32 @llvm.amdgcn.workgroup.id.x()
117 call void asm sideeffect "; use $0", "s"(i32 %val)
121 ; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
124 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
127 define hidden void @use_stack_workgroup_id_x() #1 {
128 %alloca = alloca i32, addrspace(5)
129 store volatile i32 0, i32 addrspace(5)* %alloca
130 %val = call i32 @llvm.amdgcn.workgroup.id.x()
131 call void asm sideeffect "; use $0", "s"(i32 %val)
135 ; GCN-LABEL: {{^}}use_workgroup_id_y:
138 define hidden void @use_workgroup_id_y() #1 {
139 %val = call i32 @llvm.amdgcn.workgroup.id.y()
140 call void asm sideeffect "; use $0", "s"(i32 %val)
144 ; GCN-LABEL: {{^}}use_workgroup_id_z:
147 define hidden void @use_workgroup_id_z() #1 {
148 %val = call i32 @llvm.amdgcn.workgroup.id.z()
149 call void asm sideeffect "; use $0", "s"(i32 %val)
153 ; GCN-LABEL: {{^}}use_workgroup_id_xy:
156 define hidden void @use_workgroup_id_xy() #1 {
157 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
158 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
159 call void asm sideeffect "; use $0", "s"(i32 %val0)
160 call void asm sideeffect "; use $0", "s"(i32 %val1)
164 ; GCN-LABEL: {{^}}use_workgroup_id_xyz:
168 define hidden void @use_workgroup_id_xyz() #1 {
169 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
170 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
171 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
172 call void asm sideeffect "; use $0", "s"(i32 %val0)
173 call void asm sideeffect "; use $0", "s"(i32 %val1)
174 call void asm sideeffect "; use $0", "s"(i32 %val2)
178 ; GCN-LABEL: {{^}}use_workgroup_id_xz:
181 define hidden void @use_workgroup_id_xz() #1 {
182 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
183 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
184 call void asm sideeffect "; use $0", "s"(i32 %val0)
185 call void asm sideeffect "; use $0", "s"(i32 %val1)
189 ; GCN-LABEL: {{^}}use_workgroup_id_yz:
192 define hidden void @use_workgroup_id_yz() #1 {
193 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
194 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
195 call void asm sideeffect "; use $0", "s"(i32 %val0)
196 call void asm sideeffect "; use $0", "s"(i32 %val1)
200 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
201 ; GCN: enable_sgpr_workgroup_id_x = 1
202 ; GCN: enable_sgpr_workgroup_id_y = 0
203 ; GCN: enable_sgpr_workgroup_id_z = 0
206 ; GCN: s_mov_b32 s4, s6
207 ; GCN-NEXT: s_getpc_b64 s[6:7]
208 ; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x@rel32@lo+4
209 ; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x@rel32@hi+4
210 ; GCN: s_mov_b32 s32, s33
213 define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
214 call void @use_workgroup_id_x()
218 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
219 ; GCN: enable_sgpr_workgroup_id_x = 1
220 ; GCN: enable_sgpr_workgroup_id_y = 1
221 ; GCN: enable_sgpr_workgroup_id_z = 0
223 ; GCN: s_mov_b32 s33, s8
224 ; GCN-DAG: s_mov_b32 s4, s7
225 ; GCN: s_mov_b32 s32, s33
227 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
228 call void @use_workgroup_id_y()
232 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
233 ; GCN: enable_sgpr_workgroup_id_x = 1
234 ; GCN: enable_sgpr_workgroup_id_y = 0
235 ; GCN: enable_sgpr_workgroup_id_z = 1
236 ; GCN: s_mov_b32 s33, s8
237 ; GCN: s_mov_b32 s4, s7
240 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
241 call void @use_workgroup_id_z()
245 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
246 ; GCN: enable_sgpr_workgroup_id_x = 1
247 ; GCN: enable_sgpr_workgroup_id_y = 1
248 ; GCN: enable_sgpr_workgroup_id_z = 0
250 ; GCN: s_mov_b32 s33, s8
252 ; GCN: s_mov_b32 s5, s7
253 ; GCN: s_mov_b32 s4, s6
254 ; GCN: s_mov_b32 s32, s33
256 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
257 call void @use_workgroup_id_xy()
261 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
262 ; GCN: enable_sgpr_workgroup_id_x = 1
263 ; GCN: enable_sgpr_workgroup_id_y = 1
264 ; GCN: enable_sgpr_workgroup_id_z = 1
266 ; GCN: s_mov_b32 s33, s9
268 ; GCN: s_mov_b32 s4, s6
269 ; GCN: s_mov_b32 s5, s7
270 ; GCN: s_mov_b32 s6, s8
272 ; GCN: s_mov_b32 s32, s33
274 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
275 call void @use_workgroup_id_xyz()
279 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
280 ; GCN: enable_sgpr_workgroup_id_x = 1
281 ; GCN: enable_sgpr_workgroup_id_y = 0
282 ; GCN: enable_sgpr_workgroup_id_z = 1
284 ; GCN: s_mov_b32 s33, s8
285 ; GCN: s_mov_b32 s5, s7
286 ; GCN: s_mov_b32 s4, s6
288 ; GCN: s_mov_b32 s32, s33
291 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
292 call void @use_workgroup_id_xz()
296 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
297 ; GCN: enable_sgpr_workgroup_id_x = 1
298 ; GCN: enable_sgpr_workgroup_id_y = 1
299 ; GCN: enable_sgpr_workgroup_id_z = 1
301 ; GCN: s_mov_b32 s33, s9
302 ; GCN: s_mov_b32 s4, s7
303 ; GCN: s_mov_b32 s5, s8
304 ; GCN: s_mov_b32 s32, s33
306 define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
307 call void @use_workgroup_id_yz()
311 ; Argument is in right place already
312 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
314 ; GCN: v_readlane_b32 s4, v32, 0
315 define hidden void @func_indirect_use_workgroup_id_x() #1 {
316 call void @use_workgroup_id_x()
320 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
322 ; GCN: v_readlane_b32 s4, v32, 0
323 define hidden void @func_indirect_use_workgroup_id_y() #1 {
324 call void @use_workgroup_id_y()
328 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
330 ; GCN: v_readlane_b32 s4, v32, 0
331 define hidden void @func_indirect_use_workgroup_id_z() #1 {
332 call void @use_workgroup_id_z()
336 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
337 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
339 define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
340 %val = call i32 @llvm.amdgcn.workgroup.id.x()
341 store volatile i32 %arg0, i32 addrspace(1)* undef
342 call void asm sideeffect "; use $0", "s"(i32 %val)
346 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
347 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
349 define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
350 %val = call i32 @llvm.amdgcn.workgroup.id.y()
351 store volatile i32 %arg0, i32 addrspace(1)* undef
352 call void asm sideeffect "; use $0", "s"(i32 %val)
356 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
357 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
359 define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
360 %val = call i32 @llvm.amdgcn.workgroup.id.z()
361 store volatile i32 %arg0, i32 addrspace(1)* undef
362 call void asm sideeffect "; use $0", "s"(i32 %val)
366 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
367 ; GCN: enable_sgpr_workgroup_id_x = 1
368 ; GCN: enable_sgpr_workgroup_id_y = 0
369 ; GCN: enable_sgpr_workgroup_id_z = 0
371 ; GCN-DAG: s_mov_b32 s33, s7
372 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
373 ; GCN-DAG: s_mov_b32 s4, s6
374 ; GCN-DAG: s_mov_b32 s32, s33
377 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
378 call void @other_arg_use_workgroup_id_x(i32 555)
382 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
383 ; GCN: enable_sgpr_workgroup_id_x = 1
384 ; GCN: enable_sgpr_workgroup_id_y = 1
385 ; GCN: enable_sgpr_workgroup_id_z = 0
387 ; GCN-DAG: s_mov_b32 s33, s8
388 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
389 ; GCN-DAG: s_mov_b32 s4, s7
391 ; GCN-DAG: s_mov_b32 s32, s33
393 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
394 call void @other_arg_use_workgroup_id_y(i32 555)
398 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
399 ; GCN: enable_sgpr_workgroup_id_x = 1
400 ; GCN: enable_sgpr_workgroup_id_y = 0
401 ; GCN: enable_sgpr_workgroup_id_z = 1
403 ; GCN-DAG: s_mov_b32 s33, s8
404 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
406 ; GCN: s_mov_b32 s32, s33
408 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
409 call void @other_arg_use_workgroup_id_z(i32 555)
413 ; GCN-LABEL: {{^}}use_every_sgpr_input:
414 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
415 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s4
416 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s5
417 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
418 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
419 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
420 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
421 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
422 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
423 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
424 ; GCN: ; use s[10:11]
428 define hidden void @use_every_sgpr_input() #1 {
429 %alloca = alloca i32, align 4, addrspace(5)
430 store volatile i32 0, i32 addrspace(5)* %alloca
432 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
433 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
434 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
436 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
437 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
438 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
440 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
441 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
442 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
444 %val3 = call i64 @llvm.amdgcn.dispatch.id()
445 call void asm sideeffect "; use $0", "s"(i64 %val3)
447 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
448 call void asm sideeffect "; use $0", "s"(i32 %val4)
450 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
451 call void asm sideeffect "; use $0", "s"(i32 %val5)
453 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
454 call void asm sideeffect "; use $0", "s"(i32 %val6)
459 ; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
460 ; GCN: enable_sgpr_workgroup_id_x = 1
461 ; GCN: enable_sgpr_workgroup_id_y = 1
462 ; GCN: enable_sgpr_workgroup_id_z = 1
463 ; GCN: enable_sgpr_workgroup_info = 0
465 ; GCN: enable_sgpr_private_segment_buffer = 1
466 ; GCN: enable_sgpr_dispatch_ptr = 1
467 ; GCN: enable_sgpr_queue_ptr = 1
468 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
469 ; GCN: enable_sgpr_dispatch_id = 1
470 ; GCN: enable_sgpr_flat_scratch_init = 1
472 ; GCN: s_mov_b32 s33, s17
473 ; GCN: s_mov_b32 s12, s14
474 ; GCN: s_mov_b32 s13, s15
475 ; GCN: s_mov_b32 s14, s16
476 ; GCN: s_mov_b32 s32, s33
478 define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
479 call void @use_every_sgpr_input()
483 ; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
496 ; GCN: s_or_saveexec_b64 s[16:17], -1
497 define hidden void @func_indirect_use_every_sgpr_input() #1 {
498 call void @use_every_sgpr_input()
502 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
503 ; GCN: s_mov_b32 s4, s12
504 ; GCN: s_mov_b32 s5, s13
505 ; GCN: s_mov_b32 s6, s14
506 ; GCN: ; use s[10:11]
512 define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
513 %alloca = alloca i32, align 4, addrspace(5)
514 store volatile i32 0, i32 addrspace(5)* %alloca
516 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
517 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
518 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
520 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
521 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
522 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
524 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
525 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
526 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
528 %val3 = call i64 @llvm.amdgcn.dispatch.id()
529 call void asm sideeffect "; use $0", "s"(i64 %val3)
531 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
532 call void asm sideeffect "; use $0", "s"(i32 %val4)
534 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
535 call void asm sideeffect "; use $0", "s"(i32 %val5)
537 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
538 call void asm sideeffect "; use $0", "s"(i32 %val6)
540 call void @use_workgroup_id_xyz()
544 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
545 ; GCN-DAG: s_mov_b32 s34, s32
546 ; GCN-DAG: s_add_u32 s32, s32, 0x400
547 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5]
548 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7]
551 ; GCN: s_mov_b32 s4, s12
552 ; GCN: s_mov_b32 s5, s13
553 ; GCN: s_mov_b32 s6, s14
555 ; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9]
557 ; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12
558 ; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13
559 ; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14
565 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34{{$}}
566 ; GCN-DAG: v_mov_b32_e32 v[[LO1:[0-9]+]], s[[LO_X]]
567 ; GCN-DAG: v_mov_b32_e32 v[[HI1:[0-9]+]], s[[HI_X]]
568 ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO1]]:[[HI1]]{{\]}}
569 ; GCN-DAG: v_mov_b32_e32 v[[LO2:[0-9]+]], s[[LO_Y]]
570 ; GCN-DAG: v_mov_b32_e32 v[[HI2:[0-9]+]], s[[HI_Y]]
571 ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO2]]:[[HI2]]{{\]}}
572 ; GCN-DAG: v_mov_b32_e32 v[[LO3:[0-9]+]], s[[LO_Z]]
573 ; GCN-DAG: v_mov_b32_e32 v[[HI3:[0-9]+]], s[[HI_Z]]
574 ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO3]]:[[HI3]]{{\]}}
576 ; GCN: ; use [[SAVE_X]]
577 ; GCN: ; use [[SAVE_Y]]
578 ; GCN: ; use [[SAVE_Z]]
579 define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
580 %alloca = alloca i32, align 4, addrspace(5)
581 call void @use_workgroup_id_xyz()
583 store volatile i32 0, i32 addrspace(5)* %alloca
585 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
586 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
587 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
589 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
590 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
591 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
593 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
594 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
595 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
597 %val3 = call i64 @llvm.amdgcn.dispatch.id()
598 call void asm sideeffect "; use $0", "s"(i64 %val3)
600 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
601 call void asm sideeffect "; use $0", "s"(i32 %val4)
603 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
604 call void asm sideeffect "; use $0", "s"(i32 %val5)
606 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
607 call void asm sideeffect "; use $0", "s"(i32 %val6)
612 declare i32 @llvm.amdgcn.workgroup.id.x() #0
613 declare i32 @llvm.amdgcn.workgroup.id.y() #0
614 declare i32 @llvm.amdgcn.workgroup.id.z() #0
615 declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
616 declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
617 declare i64 @llvm.amdgcn.dispatch.id() #0
618 declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
620 attributes #0 = { nounwind readnone speculatable }
621 attributes #1 = { nounwind noinline }