1 ; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
2 ; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
4 target triple = "amdgcn-amd-amdhsa"
6 ; GCN-LABEL: {{^}}use_workitem_id_x:
8 ; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
9 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
11 ; GCN-NEXT: s_setpc_b64
12 define void @use_workitem_id_x() #1 {
13 %val = call i32 @llvm.amdgcn.workitem.id.x()
14 store volatile i32 %val, ptr addrspace(1) undef
18 ; GCN-LABEL: {{^}}use_workitem_id_y:
20 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
21 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
23 ; GCN-NEXT: s_setpc_b64
24 define void @use_workitem_id_y() #1 {
25 %val = call i32 @llvm.amdgcn.workitem.id.y()
26 store volatile i32 %val, ptr addrspace(1) undef
30 ; GCN-LABEL: {{^}}use_workitem_id_z:
32 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
33 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
35 ; GCN-NEXT: s_setpc_b64
36 define void @use_workitem_id_z() #1 {
37 %val = call i32 @llvm.amdgcn.workitem.id.z()
38 store volatile i32 %val, ptr addrspace(1) undef
42 ; GCN-LABEL: {{^}}use_workitem_id_xy:
44 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
45 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
46 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
47 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
49 ; GCN-NEXT: s_setpc_b64
50 define void @use_workitem_id_xy() #1 {
51 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
52 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
53 store volatile i32 %val0, ptr addrspace(1) undef
54 store volatile i32 %val1, ptr addrspace(1) undef
58 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
60 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
61 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
62 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
63 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
64 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
65 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
67 ; GCN-NEXT: s_setpc_b64
68 define void @use_workitem_id_xyz() #1 {
69 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
70 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
71 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
72 store volatile i32 %val0, ptr addrspace(1) undef
73 store volatile i32 %val1, ptr addrspace(1) undef
74 store volatile i32 %val2, ptr addrspace(1) undef
78 ; GCN-LABEL: {{^}}use_workitem_id_xz:
80 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
81 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
82 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
83 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
85 ; GCN-NEXT: s_setpc_b64
86 define void @use_workitem_id_xz() #1 {
87 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
88 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
89 store volatile i32 %val0, ptr addrspace(1) undef
90 store volatile i32 %val1, ptr addrspace(1) undef
94 ; GCN-LABEL: {{^}}use_workitem_id_yz:
96 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
97 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
98 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
99 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
100 ; GCN-NEXT: s_waitcnt
101 ; GCN-NEXT: s_setpc_b64
102 define void @use_workitem_id_yz() #1 {
103 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
104 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
105 store volatile i32 %val0, ptr addrspace(1) undef
106 store volatile i32 %val1, ptr addrspace(1) undef
110 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
111 ; GCN: v_mov_b32_e32 v31, v0
115 ; GCN: .amdhsa_system_vgpr_workitem_id 0
116 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
117 call void @use_workitem_id_x()
121 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
126 ; PACKED-TID: v_mov_b32_e32 v31, v0
127 ; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
132 ; GCN: .amdhsa_system_vgpr_workitem_id 1
133 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
134 call void @use_workitem_id_y()
138 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
143 ; PACKED-TID: v_mov_b32_e32 v31, v0
144 ; UNPACKED-TID: v_lshlrev_b32_e32 v31, 20, v2
149 ; GCN: .amdhsa_system_vgpr_workitem_id 2
150 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
151 call void @use_workitem_id_z()
155 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
158 ; PACKED-TID: v_mov_b32_e32 v31, v0
159 ; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
160 ; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDY]]
164 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
165 call void @use_workitem_id_xy()
169 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
173 ; PACKED-TID: v_mov_b32_e32 v31, v0
174 ; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
175 ; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDZ]]
179 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
180 call void @use_workitem_id_xz()
184 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
187 ; PACKED-TID: v_mov_b32_e32 v31, v0
188 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
189 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
190 ; UNPACKED-TID: v_or_b32_e32 v31, [[IDY]], [[IDZ]]
194 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
195 call void @use_workitem_id_yz()
199 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
204 ; PACKED-TID: v_mov_b32_e32 v31, v0
206 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
207 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
208 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
209 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, [[IDZ]]
214 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
215 call void @use_workitem_id_xyz()
219 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
223 define void @func_indirect_use_workitem_id_x() #1 {
224 call void @use_workitem_id_x()
228 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
232 define void @func_indirect_use_workitem_id_y() #1 {
233 call void @use_workitem_id_y()
237 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
241 define void @func_indirect_use_workitem_id_z() #1 {
242 call void @use_workitem_id_z()
246 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
248 ; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
249 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
250 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
251 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
252 %val = call i32 @llvm.amdgcn.workitem.id.x()
253 store volatile i32 %arg0, ptr addrspace(1) undef
254 store volatile i32 %val, ptr addrspace(1) undef
258 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
260 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
261 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
262 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
263 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
264 %val = call i32 @llvm.amdgcn.workitem.id.y()
265 store volatile i32 %arg0, ptr addrspace(1) undef
266 store volatile i32 %val, ptr addrspace(1) undef
270 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
272 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
273 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
274 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
275 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
276 %val = call i32 @llvm.amdgcn.workitem.id.z()
277 store volatile i32 %arg0, ptr addrspace(1) undef
278 store volatile i32 %val, ptr addrspace(1) undef
283 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
285 ; GCN: v_mov_b32_e32 v31, v0
286 ; GCN: v_mov_b32_e32 v0, 0x22b
289 ; GCN: .amdhsa_system_vgpr_workitem_id 0
290 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
291 call void @other_arg_use_workitem_id_x(i32 555)
296 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
298 ; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
299 ; PACKED-TID: v_mov_b32_e32 v31, v0
301 ; GCN: v_mov_b32_e32 v0, 0x22b
306 ; GCN: .amdhsa_system_vgpr_workitem_id 1
307 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
308 call void @other_arg_use_workitem_id_y(i32 555)
312 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
314 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
315 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v31, 20, v2
316 ; PACKED-TID-DAG: v_mov_b32_e32 v31, v0
320 ; GCN: .amdhsa_system_vgpr_workitem_id 2
321 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
322 call void @other_arg_use_workitem_id_z(i32 555)
326 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
327 ; GCN-DAG: v_and_b32_e32 v31, 0x3ff, v31
328 ; GCN-DAG: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
329 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
330 ; GCN-NEXT: s_waitcnt
331 ; GCN-NEXT: s_setpc_b64
332 define void @too_many_args_use_workitem_id_x(
333 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
334 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
335 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
336 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
337 %val = call i32 @llvm.amdgcn.workitem.id.x()
338 store volatile i32 %val, ptr addrspace(1) undef
340 store volatile i32 %arg0, ptr addrspace(1) undef
341 store volatile i32 %arg1, ptr addrspace(1) undef
342 store volatile i32 %arg2, ptr addrspace(1) undef
343 store volatile i32 %arg3, ptr addrspace(1) undef
344 store volatile i32 %arg4, ptr addrspace(1) undef
345 store volatile i32 %arg5, ptr addrspace(1) undef
346 store volatile i32 %arg6, ptr addrspace(1) undef
347 store volatile i32 %arg7, ptr addrspace(1) undef
349 store volatile i32 %arg8, ptr addrspace(1) undef
350 store volatile i32 %arg9, ptr addrspace(1) undef
351 store volatile i32 %arg10, ptr addrspace(1) undef
352 store volatile i32 %arg11, ptr addrspace(1) undef
353 store volatile i32 %arg12, ptr addrspace(1) undef
354 store volatile i32 %arg13, ptr addrspace(1) undef
355 store volatile i32 %arg14, ptr addrspace(1) undef
356 store volatile i32 %arg15, ptr addrspace(1) undef
358 store volatile i32 %arg16, ptr addrspace(1) undef
359 store volatile i32 %arg17, ptr addrspace(1) undef
360 store volatile i32 %arg18, ptr addrspace(1) undef
361 store volatile i32 %arg19, ptr addrspace(1) undef
362 store volatile i32 %arg20, ptr addrspace(1) undef
363 store volatile i32 %arg21, ptr addrspace(1) undef
364 store volatile i32 %arg22, ptr addrspace(1) undef
365 store volatile i32 %arg23, ptr addrspace(1) undef
367 store volatile i32 %arg24, ptr addrspace(1) undef
368 store volatile i32 %arg25, ptr addrspace(1) undef
369 store volatile i32 %arg26, ptr addrspace(1) undef
370 store volatile i32 %arg27, ptr addrspace(1) undef
371 store volatile i32 %arg28, ptr addrspace(1) undef
372 store volatile i32 %arg29, ptr addrspace(1) undef
373 store volatile i32 %arg30, ptr addrspace(1) undef
374 store volatile i32 %arg31, ptr addrspace(1) undef
379 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
381 ; GCN: s_mov_b32 s32, 0
382 ; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
383 ; GCN: v_mov_b32_e32 v31, v0
386 ; GCN: .amdhsa_system_vgpr_workitem_id 0
387 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
388 call void @too_many_args_use_workitem_id_x(
389 i32 10, i32 20, i32 30, i32 40,
390 i32 50, i32 60, i32 70, i32 80,
391 i32 90, i32 100, i32 110, i32 120,
392 i32 130, i32 140, i32 150, i32 160,
393 i32 170, i32 180, i32 190, i32 200,
394 i32 210, i32 220, i32 230, i32 240,
395 i32 250, i32 260, i32 270, i32 280,
396 i32 290, i32 300, i32 310, i32 320)
400 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
402 ; GCN: s_mov_b32 s33, s32
403 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
407 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
408 store volatile i32 %arg0, ptr addrspace(1) undef
409 call void @too_many_args_use_workitem_id_x(
410 i32 10, i32 20, i32 30, i32 40,
411 i32 50, i32 60, i32 70, i32 80,
412 i32 90, i32 100, i32 110, i32 120,
413 i32 130, i32 140, i32 150, i32 160,
414 i32 170, i32 180, i32 190, i32 200,
415 i32 210, i32 220, i32 230, i32 240,
416 i32 250, i32 260, i32 270, i32 280,
417 i32 290, i32 300, i32 310, i32 320)
421 ; Requires loading and storing to stack slot.
422 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
423 ; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
424 ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
425 ; GCN-DAG: buffer_load_dword [[TMP_REG:v[0-9]+]], off, s[0:3], s33{{$}}
427 ; GCN: buffer_store_dword [[TMP_REG]], off, s[0:3], s32{{$}}
431 ; GCN: s_mov_b32 s32, s33
432 ; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
434 define void @too_many_args_call_too_many_args_use_workitem_id_x(
435 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
436 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
437 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
438 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
439 call void @too_many_args_use_workitem_id_x(
440 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
441 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
442 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
443 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
448 ; frame[0] = stack passed arg23
449 ; frame[1] = byval arg32
451 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
452 ; GCN-DAG: v_and_b32_e32 v31, 0x3ff, v31
453 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
454 ; GCN-DAG: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
455 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[LOAD_ARG31]]
456 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc
457 ; GCN-NEXT: s_waitcnt
458 ; GCN-NEXT: s_setpc_b64
459 define void @too_many_args_use_workitem_id_x_byval(
460 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
461 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
462 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
463 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 {
464 %val = call i32 @llvm.amdgcn.workitem.id.x()
465 store volatile i32 %val, ptr addrspace(1) undef
467 store volatile i32 %arg0, ptr addrspace(1) undef
468 store volatile i32 %arg1, ptr addrspace(1) undef
469 store volatile i32 %arg2, ptr addrspace(1) undef
470 store volatile i32 %arg3, ptr addrspace(1) undef
471 store volatile i32 %arg4, ptr addrspace(1) undef
472 store volatile i32 %arg5, ptr addrspace(1) undef
473 store volatile i32 %arg6, ptr addrspace(1) undef
474 store volatile i32 %arg7, ptr addrspace(1) undef
476 store volatile i32 %arg8, ptr addrspace(1) undef
477 store volatile i32 %arg9, ptr addrspace(1) undef
478 store volatile i32 %arg10, ptr addrspace(1) undef
479 store volatile i32 %arg11, ptr addrspace(1) undef
480 store volatile i32 %arg12, ptr addrspace(1) undef
481 store volatile i32 %arg13, ptr addrspace(1) undef
482 store volatile i32 %arg14, ptr addrspace(1) undef
483 store volatile i32 %arg15, ptr addrspace(1) undef
485 store volatile i32 %arg16, ptr addrspace(1) undef
486 store volatile i32 %arg17, ptr addrspace(1) undef
487 store volatile i32 %arg18, ptr addrspace(1) undef
488 store volatile i32 %arg19, ptr addrspace(1) undef
489 store volatile i32 %arg20, ptr addrspace(1) undef
490 store volatile i32 %arg21, ptr addrspace(1) undef
491 store volatile i32 %arg22, ptr addrspace(1) undef
492 store volatile i32 %arg23, ptr addrspace(1) undef
494 store volatile i32 %arg24, ptr addrspace(1) undef
495 store volatile i32 %arg25, ptr addrspace(1) undef
496 store volatile i32 %arg26, ptr addrspace(1) undef
497 store volatile i32 %arg27, ptr addrspace(1) undef
498 store volatile i32 %arg28, ptr addrspace(1) undef
499 store volatile i32 %arg29, ptr addrspace(1) undef
500 store volatile i32 %arg30, ptr addrspace(1) undef
501 store volatile i32 %arg31, ptr addrspace(1) undef
502 %private = load volatile i32, ptr addrspace(5) %arg32
506 ; sp[0] = stack passed %arg31
509 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
511 ; Local stack object initialize. Offset 0 is the emergency spill slot.
512 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
513 ; GCN-DAG: s_movk_i32 s32, 0x400
514 ; GCN: buffer_store_dword [[K]], off, s[0:3], 0
516 ; Pass %arg31 on stack
517 ; GCN: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
518 ; GCN: buffer_store_dword [[K1:v[0-9]+]], off, s[0:3], s32{{$}}
520 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0
521 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
522 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
525 ; GCN: .amdhsa_system_vgpr_workitem_id 0
526 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
527 %alloca = alloca i32, align 4, addrspace(5)
528 store volatile i32 999, ptr addrspace(5) %alloca
529 call void @too_many_args_use_workitem_id_x_byval(
530 i32 10, i32 20, i32 30, i32 40,
531 i32 50, i32 60, i32 70, i32 80,
532 i32 90, i32 100, i32 110, i32 120,
533 i32 130, i32 140, i32 150, i32 160,
534 i32 170, i32 180, i32 190, i32 200,
535 i32 210, i32 220, i32 230, i32 240,
536 i32 250, i32 260, i32 270, i32 280,
537 i32 290, i32 300, i32 310, i32 320,
538 ptr addrspace(5) byval(i32) %alloca)
542 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
543 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
544 ; GFX7: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
545 ; GFX90A: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
546 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
547 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
548 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
550 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
551 %alloca = alloca i32, align 4, addrspace(5)
552 store volatile i32 999, ptr addrspace(5) %alloca
553 call void @too_many_args_use_workitem_id_x_byval(
554 i32 10, i32 20, i32 30, i32 40,
555 i32 50, i32 60, i32 70, i32 80,
556 i32 90, i32 100, i32 110, i32 120,
557 i32 130, i32 140, i32 150, i32 160,
558 i32 170, i32 180, i32 190, i32 200,
559 i32 210, i32 220, i32 230, i32 240,
560 i32 250, i32 260, i32 270, i32 280,
561 i32 290, i32 300, i32 310, i32 320,
562 ptr addrspace(5) byval(i32) %alloca)
566 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
567 ; GFX90A: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
568 ; GFX90A: v_and_b32_e32 [[ID_X:v[0-9]+]], 0x3ff, v31
569 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_X]], off{{$}}
570 ; GFX90A: v_bfe_u32 [[ID_Y:v[0-9]+]], v31, 10, 10
571 ; GFX90A: v_bfe_u32 [[ID_Z:v[0-9]+]], v31, 20, 10
572 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_Y]], off{{$}}
573 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_Z]], off{{$}}
575 ; GFX7: v_and_b32_e32 v32, 0x3ff, v31
576 ; GFX7: v_bfe_u32 v32, v31, 10, 10
577 ; GCN7: v_bfe_u32 v31, v31, 20, 10
578 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32{{$}}
579 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31{{$}}
580 ; GFX7: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
582 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
583 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
585 ; GCN-NEXT: s_waitcnt
586 ; GCN-NEXT: s_setpc_b64
587 define void @too_many_args_use_workitem_id_xyz(
588 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
589 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
590 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
591 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
592 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
593 store volatile i32 %val0, ptr addrspace(1) undef
594 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
595 store volatile i32 %val1, ptr addrspace(1) undef
596 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
597 store volatile i32 %val2, ptr addrspace(1) undef
599 store volatile i32 %arg0, ptr addrspace(1) undef
600 store volatile i32 %arg1, ptr addrspace(1) undef
601 store volatile i32 %arg2, ptr addrspace(1) undef
602 store volatile i32 %arg3, ptr addrspace(1) undef
603 store volatile i32 %arg4, ptr addrspace(1) undef
604 store volatile i32 %arg5, ptr addrspace(1) undef
605 store volatile i32 %arg6, ptr addrspace(1) undef
606 store volatile i32 %arg7, ptr addrspace(1) undef
608 store volatile i32 %arg8, ptr addrspace(1) undef
609 store volatile i32 %arg9, ptr addrspace(1) undef
610 store volatile i32 %arg10, ptr addrspace(1) undef
611 store volatile i32 %arg11, ptr addrspace(1) undef
612 store volatile i32 %arg12, ptr addrspace(1) undef
613 store volatile i32 %arg13, ptr addrspace(1) undef
614 store volatile i32 %arg14, ptr addrspace(1) undef
615 store volatile i32 %arg15, ptr addrspace(1) undef
617 store volatile i32 %arg16, ptr addrspace(1) undef
618 store volatile i32 %arg17, ptr addrspace(1) undef
619 store volatile i32 %arg18, ptr addrspace(1) undef
620 store volatile i32 %arg19, ptr addrspace(1) undef
621 store volatile i32 %arg20, ptr addrspace(1) undef
622 store volatile i32 %arg21, ptr addrspace(1) undef
623 store volatile i32 %arg22, ptr addrspace(1) undef
624 store volatile i32 %arg23, ptr addrspace(1) undef
626 store volatile i32 %arg24, ptr addrspace(1) undef
627 store volatile i32 %arg25, ptr addrspace(1) undef
628 store volatile i32 %arg26, ptr addrspace(1) undef
629 store volatile i32 %arg27, ptr addrspace(1) undef
630 store volatile i32 %arg28, ptr addrspace(1) undef
631 store volatile i32 %arg29, ptr addrspace(1) undef
632 store volatile i32 %arg30, ptr addrspace(1) undef
633 store volatile i32 %arg31, ptr addrspace(1) undef
638 ; frame[0] = ID { Z, Y, X }
640 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
642 ; GCN-DAG: s_mov_b32 s32, 0
644 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
645 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
646 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
647 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
651 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
652 ; GCN-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
655 ; GCN: .amdhsa_system_vgpr_workitem_id 2
656 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
657 call void @too_many_args_use_workitem_id_xyz(
658 i32 10, i32 20, i32 30, i32 40,
659 i32 50, i32 60, i32 70, i32 80,
660 i32 90, i32 100, i32 110, i32 120,
661 i32 130, i32 140, i32 150, i32 160,
662 i32 170, i32 180, i32 190, i32 200,
663 i32 210, i32 220, i32 230, i32 240,
664 i32 250, i32 260, i32 270, i32 280,
665 i32 290, i32 300, i32 310, i32 320)
669 ; workitem ID X in register, yz on stack
670 ; v31 = workitem ID X
671 ; frame[0] = workitem { Z, Y, X }
673 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
674 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
675 ; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
676 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
677 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
678 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
679 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
681 ; GCN-COUNT-31: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}
682 ; GCN-NEXT: s_waitcnt
684 ; GCN: ScratchSize: 0
685 define void @too_many_args_use_workitem_id_x_stack_yz(
686 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
687 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
688 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
689 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
690 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
691 store volatile i32 %val0, ptr addrspace(1) undef
692 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
693 store volatile i32 %val1, ptr addrspace(1) undef
694 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
695 store volatile i32 %val2, ptr addrspace(1) undef
697 store volatile i32 %arg0, ptr addrspace(1) undef
698 store volatile i32 %arg1, ptr addrspace(1) undef
699 store volatile i32 %arg2, ptr addrspace(1) undef
700 store volatile i32 %arg3, ptr addrspace(1) undef
701 store volatile i32 %arg4, ptr addrspace(1) undef
702 store volatile i32 %arg5, ptr addrspace(1) undef
703 store volatile i32 %arg6, ptr addrspace(1) undef
704 store volatile i32 %arg7, ptr addrspace(1) undef
706 store volatile i32 %arg8, ptr addrspace(1) undef
707 store volatile i32 %arg9, ptr addrspace(1) undef
708 store volatile i32 %arg10, ptr addrspace(1) undef
709 store volatile i32 %arg11, ptr addrspace(1) undef
710 store volatile i32 %arg12, ptr addrspace(1) undef
711 store volatile i32 %arg13, ptr addrspace(1) undef
712 store volatile i32 %arg14, ptr addrspace(1) undef
713 store volatile i32 %arg15, ptr addrspace(1) undef
715 store volatile i32 %arg16, ptr addrspace(1) undef
716 store volatile i32 %arg17, ptr addrspace(1) undef
717 store volatile i32 %arg18, ptr addrspace(1) undef
718 store volatile i32 %arg19, ptr addrspace(1) undef
719 store volatile i32 %arg20, ptr addrspace(1) undef
720 store volatile i32 %arg21, ptr addrspace(1) undef
721 store volatile i32 %arg22, ptr addrspace(1) undef
722 store volatile i32 %arg23, ptr addrspace(1) undef
724 store volatile i32 %arg24, ptr addrspace(1) undef
725 store volatile i32 %arg25, ptr addrspace(1) undef
726 store volatile i32 %arg26, ptr addrspace(1) undef
727 store volatile i32 %arg27, ptr addrspace(1) undef
728 store volatile i32 %arg28, ptr addrspace(1) undef
729 store volatile i32 %arg29, ptr addrspace(1) undef
730 store volatile i32 %arg30, ptr addrspace(1) undef
735 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
738 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
739 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
740 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
741 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
742 ; PACKED-TID: v_mov_b32_e32 v31, v0
744 ; GCN: s_mov_b32 s32, 0
747 ; GCN: .amdhsa_system_vgpr_workitem_id 2
748 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
749 call void @too_many_args_use_workitem_id_x_stack_yz(
750 i32 10, i32 20, i32 30, i32 40,
751 i32 50, i32 60, i32 70, i32 80,
752 i32 90, i32 100, i32 110, i32 120,
753 i32 130, i32 140, i32 150, i32 160,
754 i32 170, i32 180, i32 190, i32 200,
755 i32 210, i32 220, i32 230, i32 240,
756 i32 250, i32 260, i32 270, i32 280,
757 i32 290, i32 300, i32 310)
761 declare i32 @llvm.amdgcn.workitem.id.x() #0
762 declare i32 @llvm.amdgcn.workitem.id.y() #0
763 declare i32 @llvm.amdgcn.workitem.id.z() #0
765 attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" }
766 attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }