1 ; RUN: opt -mcpu=kaveri -passes=amdgpu-attributor < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s
3 target triple = "amdgcn-amd-amdhsa"
5 ; GCN-LABEL: {{^}}use_workitem_id_x:
7 ; FIXEDABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
8 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
10 ; GCN-NEXT: s_setpc_b64
11 define void @use_workitem_id_x() #1 {
12 %val = call i32 @llvm.amdgcn.workitem.id.x()
13 store volatile i32 %val, ptr addrspace(1) undef
17 ; GCN-LABEL: {{^}}use_workitem_id_y:
19 ; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
20 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
22 ; GCN-NEXT: s_setpc_b64
23 define void @use_workitem_id_y() #1 {
24 %val = call i32 @llvm.amdgcn.workitem.id.y()
25 store volatile i32 %val, ptr addrspace(1) undef
29 ; GCN-LABEL: {{^}}use_workitem_id_z:
31 ; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
32 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
34 ; GCN-NEXT: s_setpc_b64
35 define void @use_workitem_id_z() #1 {
36 %val = call i32 @llvm.amdgcn.workitem.id.z()
37 store volatile i32 %val, ptr addrspace(1) undef
41 ; GCN-LABEL: {{^}}use_workitem_id_xy:
43 ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
44 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
46 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
47 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
49 ; GCN-NEXT: s_setpc_b64
50 define void @use_workitem_id_xy() #1 {
51 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
52 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
53 store volatile i32 %val0, ptr addrspace(1) undef
54 store volatile i32 %val1, ptr addrspace(1) undef
58 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
61 ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
62 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
63 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
66 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
67 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
68 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
70 ; GCN-NEXT: s_setpc_b64
71 define void @use_workitem_id_xyz() #1 {
72 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
73 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
74 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
75 store volatile i32 %val0, ptr addrspace(1) undef
76 store volatile i32 %val1, ptr addrspace(1) undef
77 store volatile i32 %val2, ptr addrspace(1) undef
81 ; GCN-LABEL: {{^}}use_workitem_id_xz:
83 ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
84 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
86 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
87 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
89 ; GCN-NEXT: s_setpc_b64
90 define void @use_workitem_id_xz() #1 {
91 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
92 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
93 store volatile i32 %val0, ptr addrspace(1) undef
94 store volatile i32 %val1, ptr addrspace(1) undef
98 ; GCN-LABEL: {{^}}use_workitem_id_yz:
100 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
101 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
103 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
104 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
105 ; GCN-NEXT: s_waitcnt
106 ; GCN-NEXT: s_setpc_b64
107 define void @use_workitem_id_yz() #1 {
108 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
109 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
110 store volatile i32 %val0, ptr addrspace(1) undef
111 store volatile i32 %val1, ptr addrspace(1) undef
115 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
119 ; FIXEDABI: v_mov_b32_e32 v31, v0{{$}}
125 ; GCN: .amdhsa_system_vgpr_workitem_id 0
126 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
127 call void @use_workitem_id_x()
131 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
136 ; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
143 ; GCN: .amdhsa_system_vgpr_workitem_id 1
144 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
145 call void @use_workitem_id_y()
149 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
153 ; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
159 ; GCN: .amdhsa_system_vgpr_workitem_id 2
160 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
161 call void @use_workitem_id_z()
165 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
169 ; FIXEDABI: v_lshlrev_b32_e32 v1, 10, v1
170 ; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
176 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
177 call void @use_workitem_id_xy()
181 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
185 ; FIXEDABI: v_lshlrev_b32_e32 v1, 20, v2
186 ; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
192 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
193 call void @use_workitem_id_xz()
197 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
201 ; FIXEDABI:v_lshlrev_b32_e32 v0, 20, v2
202 ; FIXEDABI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
203 ; FIXEDABI-NEXT: v_or_b32_e32 v31, v1, v0
209 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
210 call void @use_workitem_id_yz()
214 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
215 ; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
216 ; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
217 ; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
218 ; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
221 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
222 call void @use_workitem_id_xyz()
226 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
230 define void @func_indirect_use_workitem_id_x() #1 {
231 call void @use_workitem_id_x()
235 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
239 define void @func_indirect_use_workitem_id_y() #1 {
240 call void @use_workitem_id_y()
244 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
248 define void @func_indirect_use_workitem_id_z() #1 {
249 call void @use_workitem_id_z()
253 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
255 ; FIXEDABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
257 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
258 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
259 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
260 %val = call i32 @llvm.amdgcn.workitem.id.x()
261 store volatile i32 %arg0, ptr addrspace(1) undef
262 store volatile i32 %val, ptr addrspace(1) undef
266 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
268 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
269 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
270 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
271 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
272 %val = call i32 @llvm.amdgcn.workitem.id.y()
273 store volatile i32 %arg0, ptr addrspace(1) undef
274 store volatile i32 %val, ptr addrspace(1) undef
278 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
280 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
281 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
282 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
283 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
284 %val = call i32 @llvm.amdgcn.workitem.id.z()
285 store volatile i32 %arg0, ptr addrspace(1) undef
286 store volatile i32 %val, ptr addrspace(1) undef
291 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
294 ; FIXEDABI: v_mov_b32_e32 v31, v0
295 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
299 ; GCN: .amdhsa_system_vgpr_workitem_id 0
300 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
301 call void @other_arg_use_workitem_id_x(i32 555)
306 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
311 ; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
312 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
314 ; GCN: .amdhsa_system_vgpr_workitem_id 1
315 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
316 call void @other_arg_use_workitem_id_y(i32 555)
320 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
325 ; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
326 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
328 ; GCN: .amdhsa_system_vgpr_workitem_id 2
329 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
330 call void @other_arg_use_workitem_id_z(i32 555)
334 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
335 ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
336 ; FIXEDABI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
337 define void @too_many_args_use_workitem_id_x(
338 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
339 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
340 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
341 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
342 %val = call i32 @llvm.amdgcn.workitem.id.x()
343 store volatile i32 %val, ptr addrspace(1) undef
345 store volatile i32 %arg0, ptr addrspace(1) undef
346 store volatile i32 %arg1, ptr addrspace(1) undef
347 store volatile i32 %arg2, ptr addrspace(1) undef
348 store volatile i32 %arg3, ptr addrspace(1) undef
349 store volatile i32 %arg4, ptr addrspace(1) undef
350 store volatile i32 %arg5, ptr addrspace(1) undef
351 store volatile i32 %arg6, ptr addrspace(1) undef
352 store volatile i32 %arg7, ptr addrspace(1) undef
354 store volatile i32 %arg8, ptr addrspace(1) undef
355 store volatile i32 %arg9, ptr addrspace(1) undef
356 store volatile i32 %arg10, ptr addrspace(1) undef
357 store volatile i32 %arg11, ptr addrspace(1) undef
358 store volatile i32 %arg12, ptr addrspace(1) undef
359 store volatile i32 %arg13, ptr addrspace(1) undef
360 store volatile i32 %arg14, ptr addrspace(1) undef
361 store volatile i32 %arg15, ptr addrspace(1) undef
363 store volatile i32 %arg16, ptr addrspace(1) undef
364 store volatile i32 %arg17, ptr addrspace(1) undef
365 store volatile i32 %arg18, ptr addrspace(1) undef
366 store volatile i32 %arg19, ptr addrspace(1) undef
367 store volatile i32 %arg20, ptr addrspace(1) undef
368 store volatile i32 %arg21, ptr addrspace(1) undef
369 store volatile i32 %arg22, ptr addrspace(1) undef
370 store volatile i32 %arg23, ptr addrspace(1) undef
372 store volatile i32 %arg24, ptr addrspace(1) undef
373 store volatile i32 %arg25, ptr addrspace(1) undef
374 store volatile i32 %arg26, ptr addrspace(1) undef
375 store volatile i32 %arg27, ptr addrspace(1) undef
376 store volatile i32 %arg28, ptr addrspace(1) undef
377 store volatile i32 %arg29, ptr addrspace(1) undef
378 store volatile i32 %arg30, ptr addrspace(1) undef
379 store volatile i32 %arg31, ptr addrspace(1) undef
384 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
389 ; FIXEDABI-DAG: s_mov_b32 s32, 0
390 ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
391 ; FIXEDABI-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
392 ; FIXEDABI-DAG: v_mov_b32_e32 v31, v0
394 ; FIXEDABI: s_swappc_b64
396 ; GCN: .amdhsa_system_vgpr_workitem_id 0
397 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
398 call void @too_many_args_use_workitem_id_x(
399 i32 10, i32 20, i32 30, i32 40,
400 i32 50, i32 60, i32 70, i32 80,
401 i32 90, i32 100, i32 110, i32 120,
402 i32 130, i32 140, i32 150, i32 160,
403 i32 170, i32 180, i32 190, i32 200,
404 i32 210, i32 220, i32 230, i32 240,
405 i32 250, i32 260, i32 270, i32 280,
406 i32 290, i32 300, i32 310, i32 320)
410 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
412 ; Touching the workitem id register is not necessary.
414 ; FIXEDABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
416 ; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
420 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
421 store volatile i32 %arg0, ptr addrspace(1) undef
422 call void @too_many_args_use_workitem_id_x(
423 i32 10, i32 20, i32 30, i32 40,
424 i32 50, i32 60, i32 70, i32 80,
425 i32 90, i32 100, i32 110, i32 120,
426 i32 130, i32 140, i32 150, i32 160,
427 i32 170, i32 180, i32 190, i32 200,
428 i32 210, i32 220, i32 230, i32 240,
429 i32 250, i32 260, i32 270, i32 280,
430 i32 290, i32 300, i32 310, i32 320)
434 ; Requires loading and storing to stack slot.
435 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
436 ; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
437 ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
438 ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
440 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
444 ; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
445 ; GCN: s_addk_i32 s32, 0xfc00{{$}}
447 define void @too_many_args_call_too_many_args_use_workitem_id_x(
448 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
449 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
450 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
451 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
452 call void @too_many_args_use_workitem_id_x(
453 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
454 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
455 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
456 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
460 ; var abi stack layout:
461 ; frame[0] = byval arg32
462 ; frame[1] = stack passed workitem ID x
463 ; frame[2] = VGPR spill slot
465 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
467 ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
468 ; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
470 ; FIXEDABI: buffer_load_dword v31, off, s[0:3], s32{{$}}
471 ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
472 ; FIXEDABI: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc{{$}}
473 ; FIXEDABI: s_setpc_b64
474 define void @too_many_args_use_workitem_id_x_byval(
475 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
476 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
477 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
478 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 {
479 %val = call i32 @llvm.amdgcn.workitem.id.x()
480 store volatile i32 %val, ptr addrspace(1) undef
482 store volatile i32 %arg0, ptr addrspace(1) undef
483 store volatile i32 %arg1, ptr addrspace(1) undef
484 store volatile i32 %arg2, ptr addrspace(1) undef
485 store volatile i32 %arg3, ptr addrspace(1) undef
486 store volatile i32 %arg4, ptr addrspace(1) undef
487 store volatile i32 %arg5, ptr addrspace(1) undef
488 store volatile i32 %arg6, ptr addrspace(1) undef
489 store volatile i32 %arg7, ptr addrspace(1) undef
491 store volatile i32 %arg8, ptr addrspace(1) undef
492 store volatile i32 %arg9, ptr addrspace(1) undef
493 store volatile i32 %arg10, ptr addrspace(1) undef
494 store volatile i32 %arg11, ptr addrspace(1) undef
495 store volatile i32 %arg12, ptr addrspace(1) undef
496 store volatile i32 %arg13, ptr addrspace(1) undef
497 store volatile i32 %arg14, ptr addrspace(1) undef
498 store volatile i32 %arg15, ptr addrspace(1) undef
500 store volatile i32 %arg16, ptr addrspace(1) undef
501 store volatile i32 %arg17, ptr addrspace(1) undef
502 store volatile i32 %arg18, ptr addrspace(1) undef
503 store volatile i32 %arg19, ptr addrspace(1) undef
504 store volatile i32 %arg20, ptr addrspace(1) undef
505 store volatile i32 %arg21, ptr addrspace(1) undef
506 store volatile i32 %arg22, ptr addrspace(1) undef
507 store volatile i32 %arg23, ptr addrspace(1) undef
509 store volatile i32 %arg24, ptr addrspace(1) undef
510 store volatile i32 %arg25, ptr addrspace(1) undef
511 store volatile i32 %arg26, ptr addrspace(1) undef
512 store volatile i32 %arg27, ptr addrspace(1) undef
513 store volatile i32 %arg28, ptr addrspace(1) undef
514 store volatile i32 %arg29, ptr addrspace(1) undef
515 store volatile i32 %arg30, ptr addrspace(1) undef
516 store volatile i32 %arg31, ptr addrspace(1) undef
517 %private = load volatile i32, ptr addrspace(5) %arg32
521 ; var abi stack layout:
524 ; sp[2] = stack passed workitem ID x
526 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
531 ; FIXEDABI: v_mov_b32_e32 v31, v0
532 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7
533 ; FIXEDABI: s_movk_i32 s32, 0x400{{$}}
534 ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], 0{{$}}
535 ; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140
537 ; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
539 ; FIXME: Why this reload?
540 ; FIXEDABI: buffer_load_dword [[RELOAD:v[0-9]+]], off, s[0:3], 0{{$}}
543 ; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32
544 ; FIXEDABI: s_swappc_b64
545 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
546 %alloca = alloca i32, align 4, addrspace(5)
547 store volatile i32 999, ptr addrspace(5) %alloca
548 call void @too_many_args_use_workitem_id_x_byval(
549 i32 10, i32 20, i32 30, i32 40,
550 i32 50, i32 60, i32 70, i32 80,
551 i32 90, i32 100, i32 110, i32 120,
552 i32 130, i32 140, i32 150, i32 160,
553 i32 170, i32 180, i32 190, i32 200,
554 i32 210, i32 220, i32 230, i32 240,
555 i32 250, i32 260, i32 270, i32 280,
556 i32 290, i32 300, i32 310, i32 320,
557 ptr addrspace(5) byval(i32) %alloca)
561 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
563 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}}
564 ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}}
565 ; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
566 ; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
567 ; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
570 ; FIXEDABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
572 ; FIXEDABI: s_swappc_b64
573 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
574 %alloca = alloca i32, align 4, addrspace(5)
575 store volatile i32 999, ptr addrspace(5) %alloca
576 call void @too_many_args_use_workitem_id_x_byval(
577 i32 10, i32 20, i32 30, i32 40,
578 i32 50, i32 60, i32 70, i32 80,
579 i32 90, i32 100, i32 110, i32 120,
580 i32 130, i32 140, i32 150, i32 160,
581 i32 170, i32 180, i32 190, i32 200,
582 i32 210, i32 220, i32 230, i32 240,
583 i32 250, i32 260, i32 270, i32 280,
584 i32 290, i32 300, i32 310, i32 320,
585 ptr addrspace(5) byval(i32) %alloca)
589 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
590 ; FIXEDABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v31
591 ; FIXEDABI-NOT: buffer_load_dword
592 ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
593 ; FIXEDABI-NOT: buffer_load_dword
594 ; FIXEDABI: v_bfe_u32 [[BFE_Y:v[0-9]+]], v31, 10, 10
595 ; FIXEDABI-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v31, 20, 10
596 ; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]]
597 ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]]
599 define void @too_many_args_use_workitem_id_xyz(
600 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
601 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
602 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
603 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
604 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
605 store volatile i32 %val0, ptr addrspace(1) undef
606 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
607 store volatile i32 %val1, ptr addrspace(1) undef
608 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
609 store volatile i32 %val2, ptr addrspace(1) undef
611 store volatile i32 %arg0, ptr addrspace(1) undef
612 store volatile i32 %arg1, ptr addrspace(1) undef
613 store volatile i32 %arg2, ptr addrspace(1) undef
614 store volatile i32 %arg3, ptr addrspace(1) undef
615 store volatile i32 %arg4, ptr addrspace(1) undef
616 store volatile i32 %arg5, ptr addrspace(1) undef
617 store volatile i32 %arg6, ptr addrspace(1) undef
618 store volatile i32 %arg7, ptr addrspace(1) undef
620 store volatile i32 %arg8, ptr addrspace(1) undef
621 store volatile i32 %arg9, ptr addrspace(1) undef
622 store volatile i32 %arg10, ptr addrspace(1) undef
623 store volatile i32 %arg11, ptr addrspace(1) undef
624 store volatile i32 %arg12, ptr addrspace(1) undef
625 store volatile i32 %arg13, ptr addrspace(1) undef
626 store volatile i32 %arg14, ptr addrspace(1) undef
627 store volatile i32 %arg15, ptr addrspace(1) undef
629 store volatile i32 %arg16, ptr addrspace(1) undef
630 store volatile i32 %arg17, ptr addrspace(1) undef
631 store volatile i32 %arg18, ptr addrspace(1) undef
632 store volatile i32 %arg19, ptr addrspace(1) undef
633 store volatile i32 %arg20, ptr addrspace(1) undef
634 store volatile i32 %arg21, ptr addrspace(1) undef
635 store volatile i32 %arg22, ptr addrspace(1) undef
636 store volatile i32 %arg23, ptr addrspace(1) undef
638 store volatile i32 %arg24, ptr addrspace(1) undef
639 store volatile i32 %arg25, ptr addrspace(1) undef
640 store volatile i32 %arg26, ptr addrspace(1) undef
641 store volatile i32 %arg27, ptr addrspace(1) undef
642 store volatile i32 %arg28, ptr addrspace(1) undef
643 store volatile i32 %arg29, ptr addrspace(1) undef
644 store volatile i32 %arg30, ptr addrspace(1) undef
645 store volatile i32 %arg31, ptr addrspace(1) undef
650 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
652 ; GCN-DAG: s_mov_b32 s32, 0
654 ; GCN-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
655 ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
656 ; GCN-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
657 ; GCN-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
659 ; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
660 ; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
664 ; GCN: .amdhsa_system_vgpr_workitem_id 2
665 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
666 call void @too_many_args_use_workitem_id_xyz(
667 i32 10, i32 20, i32 30, i32 40,
668 i32 50, i32 60, i32 70, i32 80,
669 i32 90, i32 100, i32 110, i32 120,
670 i32 130, i32 140, i32 150, i32 160,
671 i32 170, i32 180, i32 190, i32 200,
672 i32 210, i32 220, i32 230, i32 240,
673 i32 250, i32 260, i32 270, i32 280,
674 i32 290, i32 300, i32 310, i32 320)
678 ; Var abi: workitem ID X in register, yz on stack
679 ; v31 = workitem ID X
680 ; frame[0] = workitem { Z, Y, X }
682 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
683 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
684 ; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
685 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
686 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
687 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
688 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
690 ; GCN: ScratchSize: 0
691 define void @too_many_args_use_workitem_id_x_stack_yz(
692 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
693 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
694 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
695 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
696 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
697 store volatile i32 %val0, ptr addrspace(1) undef
698 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
699 store volatile i32 %val1, ptr addrspace(1) undef
700 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
701 store volatile i32 %val2, ptr addrspace(1) undef
703 store volatile i32 %arg0, ptr addrspace(1) undef
704 store volatile i32 %arg1, ptr addrspace(1) undef
705 store volatile i32 %arg2, ptr addrspace(1) undef
706 store volatile i32 %arg3, ptr addrspace(1) undef
707 store volatile i32 %arg4, ptr addrspace(1) undef
708 store volatile i32 %arg5, ptr addrspace(1) undef
709 store volatile i32 %arg6, ptr addrspace(1) undef
710 store volatile i32 %arg7, ptr addrspace(1) undef
712 store volatile i32 %arg8, ptr addrspace(1) undef
713 store volatile i32 %arg9, ptr addrspace(1) undef
714 store volatile i32 %arg10, ptr addrspace(1) undef
715 store volatile i32 %arg11, ptr addrspace(1) undef
716 store volatile i32 %arg12, ptr addrspace(1) undef
717 store volatile i32 %arg13, ptr addrspace(1) undef
718 store volatile i32 %arg14, ptr addrspace(1) undef
719 store volatile i32 %arg15, ptr addrspace(1) undef
721 store volatile i32 %arg16, ptr addrspace(1) undef
722 store volatile i32 %arg17, ptr addrspace(1) undef
723 store volatile i32 %arg18, ptr addrspace(1) undef
724 store volatile i32 %arg19, ptr addrspace(1) undef
725 store volatile i32 %arg20, ptr addrspace(1) undef
726 store volatile i32 %arg21, ptr addrspace(1) undef
727 store volatile i32 %arg22, ptr addrspace(1) undef
728 store volatile i32 %arg23, ptr addrspace(1) undef
730 store volatile i32 %arg24, ptr addrspace(1) undef
731 store volatile i32 %arg25, ptr addrspace(1) undef
732 store volatile i32 %arg26, ptr addrspace(1) undef
733 store volatile i32 %arg27, ptr addrspace(1) undef
734 store volatile i32 %arg28, ptr addrspace(1) undef
735 store volatile i32 %arg29, ptr addrspace(1) undef
736 store volatile i32 %arg30, ptr addrspace(1) undef
741 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
744 ; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
745 ; GCN-DAG: v_or_b32_e32 v0, v0, v1
746 ; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2
747 ; GCN-DAG: v_or_b32_e32 v31, v0, v2
749 ; GCN: s_mov_b32 s32, 0
752 ; GCN: .amdhsa_system_vgpr_workitem_id 2
753 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
754 call void @too_many_args_use_workitem_id_x_stack_yz(
755 i32 10, i32 20, i32 30, i32 40,
756 i32 50, i32 60, i32 70, i32 80,
757 i32 90, i32 100, i32 110, i32 120,
758 i32 130, i32 140, i32 150, i32 160,
759 i32 170, i32 180, i32 190, i32 200,
760 i32 210, i32 220, i32 230, i32 240,
761 i32 250, i32 260, i32 270, i32 280,
762 i32 290, i32 300, i32 310)
766 declare hidden void @extern_hint(i32) #2
768 ; Workitem IDs should not be passed due to the attribute
769 ; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints:
772 ; GCN: v_mov_b32_e32 v0, 9
776 define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
777 call void @extern_hint(i32 9)
781 ; GCN-LABEL: {{^}}func_call_no_workitem_id_hints:
784 ; GCN: v_mov_b32_e32 v0, 9
788 define void @func_call_no_workitem_id_hints() #2 {
789 call void @extern_hint(i32 9)
793 declare hidden void @extern_nohint(i32)
795 ; Check that the hint is respected on the callsite, not the function
797 ; GCN-LABEL: {{^}}kern_callsite_workitem_id_hints:
800 ; GCN: v_mov_b32_e32 v0, 9
804 define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
805 call void @extern_nohint(i32 9) #2
809 declare i32 @llvm.amdgcn.workitem.id.x() #0
810 declare i32 @llvm.amdgcn.workitem.id.y() #0
811 declare i32 @llvm.amdgcn.workitem.id.z() #0
813 attributes #0 = { nounwind readnone speculatable }
814 attributes #1 = { nounwind noinline }
815 attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
817 !llvm.module.flags = !{!0}
818 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}