1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
4 ; GCN-LABEL: {{^}}use_workitem_id_x:
6 ; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
7 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
9 ; GCN-NEXT: s_setpc_b64
10 define void @use_workitem_id_x() #1 {
11 %val = call i32 @llvm.amdgcn.workitem.id.x()
12 store volatile i32 %val, ptr addrspace(1) undef
16 ; GCN-LABEL: {{^}}use_workitem_id_y:
18 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
19 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
21 ; GCN-NEXT: s_setpc_b64
22 define void @use_workitem_id_y() #1 {
23 %val = call i32 @llvm.amdgcn.workitem.id.y()
24 store volatile i32 %val, ptr addrspace(1) undef
28 ; GCN-LABEL: {{^}}use_workitem_id_z:
30 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
31 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
33 ; GCN-NEXT: s_setpc_b64
34 define void @use_workitem_id_z() #1 {
35 %val = call i32 @llvm.amdgcn.workitem.id.z()
36 store volatile i32 %val, ptr addrspace(1) undef
40 ; GCN-LABEL: {{^}}use_workitem_id_xy:
42 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
43 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
44 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
45 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
47 ; GCN-NEXT: s_setpc_b64
48 define void @use_workitem_id_xy() #1 {
49 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
50 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
51 store volatile i32 %val0, ptr addrspace(1) undef
52 store volatile i32 %val1, ptr addrspace(1) undef
56 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
58 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
59 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
60 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
61 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
62 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
63 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
65 ; GCN-NEXT: s_setpc_b64
66 define void @use_workitem_id_xyz() #1 {
67 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
68 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
69 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
70 store volatile i32 %val0, ptr addrspace(1) undef
71 store volatile i32 %val1, ptr addrspace(1) undef
72 store volatile i32 %val2, ptr addrspace(1) undef
76 ; GCN-LABEL: {{^}}use_workitem_id_xz:
78 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
79 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
80 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
81 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
83 ; GCN-NEXT: s_setpc_b64
84 define void @use_workitem_id_xz() #1 {
85 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
86 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
87 store volatile i32 %val0, ptr addrspace(1) undef
88 store volatile i32 %val1, ptr addrspace(1) undef
92 ; GCN-LABEL: {{^}}use_workitem_id_yz:
94 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
95 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
96 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
97 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
99 ; GCN-NEXT: s_setpc_b64
100 define void @use_workitem_id_yz() #1 {
101 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
102 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
103 store volatile i32 %val0, ptr addrspace(1) undef
104 store volatile i32 %val1, ptr addrspace(1) undef
108 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
109 ; GCN: v_mov_b32_e32 v31, v0
113 ; GCN: .amdhsa_system_vgpr_workitem_id 0
114 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
115 call void @use_workitem_id_x()
119 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
124 ; PACKED-TID: v_mov_b32_e32 v31, v0
125 ; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
130 ; GCN: .amdhsa_system_vgpr_workitem_id 1
131 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
132 call void @use_workitem_id_y()
136 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
141 ; PACKED-TID: v_mov_b32_e32 v31, v0
142 ; UNPACKED-TID: v_lshlrev_b32_e32 v31, 20, v2
147 ; GCN: .amdhsa_system_vgpr_workitem_id 2
148 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
149 call void @use_workitem_id_z()
153 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
156 ; PACKED-TID: v_mov_b32_e32 v31, v0
157 ; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
158 ; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDY]]
162 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
163 call void @use_workitem_id_xy()
167 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
171 ; PACKED-TID: v_mov_b32_e32 v31, v0
172 ; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
173 ; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDZ]]
177 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
178 call void @use_workitem_id_xz()
182 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
185 ; PACKED-TID: v_mov_b32_e32 v31, v0
186 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
187 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
188 ; UNPACKED-TID: v_or_b32_e32 v31, [[IDY]], [[IDZ]]
192 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
193 call void @use_workitem_id_yz()
197 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
202 ; PACKED-TID: v_mov_b32_e32 v31, v0
204 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
205 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
206 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
207 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, [[IDZ]]
212 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
213 call void @use_workitem_id_xyz()
217 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
221 define void @func_indirect_use_workitem_id_x() #1 {
222 call void @use_workitem_id_x()
226 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
230 define void @func_indirect_use_workitem_id_y() #1 {
231 call void @use_workitem_id_y()
235 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
239 define void @func_indirect_use_workitem_id_z() #1 {
240 call void @use_workitem_id_z()
244 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
246 ; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
247 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
248 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
249 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
250 %val = call i32 @llvm.amdgcn.workitem.id.x()
251 store volatile i32 %arg0, ptr addrspace(1) undef
252 store volatile i32 %val, ptr addrspace(1) undef
256 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
258 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
259 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
260 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
261 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
262 %val = call i32 @llvm.amdgcn.workitem.id.y()
263 store volatile i32 %arg0, ptr addrspace(1) undef
264 store volatile i32 %val, ptr addrspace(1) undef
268 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
270 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
271 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
272 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
273 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
274 %val = call i32 @llvm.amdgcn.workitem.id.z()
275 store volatile i32 %arg0, ptr addrspace(1) undef
276 store volatile i32 %val, ptr addrspace(1) undef
281 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
283 ; GCN: v_mov_b32_e32 v31, v0
284 ; GCN: v_mov_b32_e32 v0, 0x22b
287 ; GCN: .amdhsa_system_vgpr_workitem_id 0
288 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
289 call void @other_arg_use_workitem_id_x(i32 555)
294 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
296 ; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
297 ; PACKED-TID: v_mov_b32_e32 v31, v0
299 ; GCN: v_mov_b32_e32 v0, 0x22b
304 ; GCN: .amdhsa_system_vgpr_workitem_id 1
305 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
306 call void @other_arg_use_workitem_id_y(i32 555)
310 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
312 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
313 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v31, 20, v2
314 ; PACKED-TID-DAG: v_mov_b32_e32 v31, v0
318 ; GCN: .amdhsa_system_vgpr_workitem_id 2
319 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
320 call void @other_arg_use_workitem_id_z(i32 555)
324 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
325 ; GCN-DAG: v_and_b32_e32 v31, 0x3ff, v31
326 ; GCN-DAG: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
327 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
328 ; GCN-NEXT: s_waitcnt
329 ; GCN-NEXT: s_setpc_b64
330 define void @too_many_args_use_workitem_id_x(
331 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
332 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
333 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
334 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
335 %val = call i32 @llvm.amdgcn.workitem.id.x()
336 store volatile i32 %val, ptr addrspace(1) undef
338 store volatile i32 %arg0, ptr addrspace(1) undef
339 store volatile i32 %arg1, ptr addrspace(1) undef
340 store volatile i32 %arg2, ptr addrspace(1) undef
341 store volatile i32 %arg3, ptr addrspace(1) undef
342 store volatile i32 %arg4, ptr addrspace(1) undef
343 store volatile i32 %arg5, ptr addrspace(1) undef
344 store volatile i32 %arg6, ptr addrspace(1) undef
345 store volatile i32 %arg7, ptr addrspace(1) undef
347 store volatile i32 %arg8, ptr addrspace(1) undef
348 store volatile i32 %arg9, ptr addrspace(1) undef
349 store volatile i32 %arg10, ptr addrspace(1) undef
350 store volatile i32 %arg11, ptr addrspace(1) undef
351 store volatile i32 %arg12, ptr addrspace(1) undef
352 store volatile i32 %arg13, ptr addrspace(1) undef
353 store volatile i32 %arg14, ptr addrspace(1) undef
354 store volatile i32 %arg15, ptr addrspace(1) undef
356 store volatile i32 %arg16, ptr addrspace(1) undef
357 store volatile i32 %arg17, ptr addrspace(1) undef
358 store volatile i32 %arg18, ptr addrspace(1) undef
359 store volatile i32 %arg19, ptr addrspace(1) undef
360 store volatile i32 %arg20, ptr addrspace(1) undef
361 store volatile i32 %arg21, ptr addrspace(1) undef
362 store volatile i32 %arg22, ptr addrspace(1) undef
363 store volatile i32 %arg23, ptr addrspace(1) undef
365 store volatile i32 %arg24, ptr addrspace(1) undef
366 store volatile i32 %arg25, ptr addrspace(1) undef
367 store volatile i32 %arg26, ptr addrspace(1) undef
368 store volatile i32 %arg27, ptr addrspace(1) undef
369 store volatile i32 %arg28, ptr addrspace(1) undef
370 store volatile i32 %arg29, ptr addrspace(1) undef
371 store volatile i32 %arg30, ptr addrspace(1) undef
372 store volatile i32 %arg31, ptr addrspace(1) undef
377 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
379 ; GCN: s_mov_b32 s32, 0
380 ; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
381 ; GCN: v_mov_b32_e32 v31, v0
384 ; GCN: .amdhsa_system_vgpr_workitem_id 0
385 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
386 call void @too_many_args_use_workitem_id_x(
387 i32 10, i32 20, i32 30, i32 40,
388 i32 50, i32 60, i32 70, i32 80,
389 i32 90, i32 100, i32 110, i32 120,
390 i32 130, i32 140, i32 150, i32 160,
391 i32 170, i32 180, i32 190, i32 200,
392 i32 210, i32 220, i32 230, i32 240,
393 i32 250, i32 260, i32 270, i32 280,
394 i32 290, i32 300, i32 310, i32 320)
398 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
400 ; GCN: s_mov_b32 s33, s32
401 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
405 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
406 store volatile i32 %arg0, ptr addrspace(1) undef
407 call void @too_many_args_use_workitem_id_x(
408 i32 10, i32 20, i32 30, i32 40,
409 i32 50, i32 60, i32 70, i32 80,
410 i32 90, i32 100, i32 110, i32 120,
411 i32 130, i32 140, i32 150, i32 160,
412 i32 170, i32 180, i32 190, i32 200,
413 i32 210, i32 220, i32 230, i32 240,
414 i32 250, i32 260, i32 270, i32 280,
415 i32 290, i32 300, i32 310, i32 320)
419 ; Requires loading and storing to stack slot.
420 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
421 ; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
422 ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
423 ; GCN-DAG: buffer_load_dword [[TMP_REG:v[0-9]+]], off, s[0:3], s33{{$}}
425 ; GCN: buffer_store_dword [[TMP_REG]], off, s[0:3], s32{{$}}
429 ; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
430 ; GCN: s_addk_i32 s32, 0xfc00{{$}}
432 define void @too_many_args_call_too_many_args_use_workitem_id_x(
433 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
434 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
435 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
436 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
437 call void @too_many_args_use_workitem_id_x(
438 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
439 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
440 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
441 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
446 ; frame[0] = stack passed arg23
447 ; frame[1] = byval arg32
449 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
450 ; GCN-DAG: v_and_b32_e32 v31, 0x3ff, v31
451 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
452 ; GCN-DAG: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
453 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[LOAD_ARG31]]
454 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 glc
455 ; GCN-NEXT: s_waitcnt
456 ; GCN-NEXT: s_setpc_b64
457 define void @too_many_args_use_workitem_id_x_byval(
458 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
459 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
460 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
461 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 {
462 %val = call i32 @llvm.amdgcn.workitem.id.x()
463 store volatile i32 %val, ptr addrspace(1) undef
465 store volatile i32 %arg0, ptr addrspace(1) undef
466 store volatile i32 %arg1, ptr addrspace(1) undef
467 store volatile i32 %arg2, ptr addrspace(1) undef
468 store volatile i32 %arg3, ptr addrspace(1) undef
469 store volatile i32 %arg4, ptr addrspace(1) undef
470 store volatile i32 %arg5, ptr addrspace(1) undef
471 store volatile i32 %arg6, ptr addrspace(1) undef
472 store volatile i32 %arg7, ptr addrspace(1) undef
474 store volatile i32 %arg8, ptr addrspace(1) undef
475 store volatile i32 %arg9, ptr addrspace(1) undef
476 store volatile i32 %arg10, ptr addrspace(1) undef
477 store volatile i32 %arg11, ptr addrspace(1) undef
478 store volatile i32 %arg12, ptr addrspace(1) undef
479 store volatile i32 %arg13, ptr addrspace(1) undef
480 store volatile i32 %arg14, ptr addrspace(1) undef
481 store volatile i32 %arg15, ptr addrspace(1) undef
483 store volatile i32 %arg16, ptr addrspace(1) undef
484 store volatile i32 %arg17, ptr addrspace(1) undef
485 store volatile i32 %arg18, ptr addrspace(1) undef
486 store volatile i32 %arg19, ptr addrspace(1) undef
487 store volatile i32 %arg20, ptr addrspace(1) undef
488 store volatile i32 %arg21, ptr addrspace(1) undef
489 store volatile i32 %arg22, ptr addrspace(1) undef
490 store volatile i32 %arg23, ptr addrspace(1) undef
492 store volatile i32 %arg24, ptr addrspace(1) undef
493 store volatile i32 %arg25, ptr addrspace(1) undef
494 store volatile i32 %arg26, ptr addrspace(1) undef
495 store volatile i32 %arg27, ptr addrspace(1) undef
496 store volatile i32 %arg28, ptr addrspace(1) undef
497 store volatile i32 %arg29, ptr addrspace(1) undef
498 store volatile i32 %arg30, ptr addrspace(1) undef
499 store volatile i32 %arg31, ptr addrspace(1) undef
500 %private = load volatile i32, ptr addrspace(5) %arg32
504 ; sp[0] = stack passed %arg31
507 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
509 ; Local stack object initialize. Offset 0 is the emergency spill slot.
510 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
511 ; GCN-DAG: s_movk_i32 s32, 0x400
512 ; GCN: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
514 ; Pass %arg31 on stack
515 ; GCN: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
516 ; GCN: buffer_store_dword [[K1:v[0-9]+]], off, s[0:3], s32{{$}}
518 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
519 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
520 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
523 ; GCN: .amdhsa_system_vgpr_workitem_id 0
524 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
525 %alloca = alloca i32, align 4, addrspace(5)
526 store volatile i32 999, ptr addrspace(5) %alloca
527 call void @too_many_args_use_workitem_id_x_byval(
528 i32 10, i32 20, i32 30, i32 40,
529 i32 50, i32 60, i32 70, i32 80,
530 i32 90, i32 100, i32 110, i32 120,
531 i32 130, i32 140, i32 150, i32 160,
532 i32 170, i32 180, i32 190, i32 200,
533 i32 210, i32 220, i32 230, i32 240,
534 i32 250, i32 260, i32 270, i32 280,
535 i32 290, i32 300, i32 310, i32 320,
536 ptr addrspace(5) byval(i32) %alloca)
540 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
541 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
542 ; GFX7: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
543 ; GFX90A: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
544 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
545 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
546 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
548 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
549 %alloca = alloca i32, align 4, addrspace(5)
550 store volatile i32 999, ptr addrspace(5) %alloca
551 call void @too_many_args_use_workitem_id_x_byval(
552 i32 10, i32 20, i32 30, i32 40,
553 i32 50, i32 60, i32 70, i32 80,
554 i32 90, i32 100, i32 110, i32 120,
555 i32 130, i32 140, i32 150, i32 160,
556 i32 170, i32 180, i32 190, i32 200,
557 i32 210, i32 220, i32 230, i32 240,
558 i32 250, i32 260, i32 270, i32 280,
559 i32 290, i32 300, i32 310, i32 320,
560 ptr addrspace(5) byval(i32) %alloca)
564 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
565 ; GFX90A: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
566 ; GFX90A: v_and_b32_e32 [[ID_X:v[0-9]+]], 0x3ff, v31
567 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_X]], off{{$}}
568 ; GFX90A: v_bfe_u32 [[ID_Y:v[0-9]+]], v31, 10, 10
569 ; GFX90A: v_bfe_u32 [[ID_Z:v[0-9]+]], v31, 20, 10
570 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_Y]], off{{$}}
571 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[ID_Z]], off{{$}}
573 ; GFX7: v_and_b32_e32 v32, 0x3ff, v31
574 ; GFX7: v_bfe_u32 v32, v31, 10, 10
575 ; GCN7: v_bfe_u32 v31, v31, 20, 10
576 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32{{$}}
577 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31{{$}}
578 ; GFX7: buffer_load_dword [[LOAD_ARG31:v[0-9]+]], off, s[0:3], s32{{$}}
580 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
581 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, [[LOAD_ARG31]]
583 ; GCN-NEXT: s_waitcnt
584 ; GCN-NEXT: s_setpc_b64
585 define void @too_many_args_use_workitem_id_xyz(
586 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
587 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
588 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
589 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
590 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
591 store volatile i32 %val0, ptr addrspace(1) undef
592 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
593 store volatile i32 %val1, ptr addrspace(1) undef
594 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
595 store volatile i32 %val2, ptr addrspace(1) undef
597 store volatile i32 %arg0, ptr addrspace(1) undef
598 store volatile i32 %arg1, ptr addrspace(1) undef
599 store volatile i32 %arg2, ptr addrspace(1) undef
600 store volatile i32 %arg3, ptr addrspace(1) undef
601 store volatile i32 %arg4, ptr addrspace(1) undef
602 store volatile i32 %arg5, ptr addrspace(1) undef
603 store volatile i32 %arg6, ptr addrspace(1) undef
604 store volatile i32 %arg7, ptr addrspace(1) undef
606 store volatile i32 %arg8, ptr addrspace(1) undef
607 store volatile i32 %arg9, ptr addrspace(1) undef
608 store volatile i32 %arg10, ptr addrspace(1) undef
609 store volatile i32 %arg11, ptr addrspace(1) undef
610 store volatile i32 %arg12, ptr addrspace(1) undef
611 store volatile i32 %arg13, ptr addrspace(1) undef
612 store volatile i32 %arg14, ptr addrspace(1) undef
613 store volatile i32 %arg15, ptr addrspace(1) undef
615 store volatile i32 %arg16, ptr addrspace(1) undef
616 store volatile i32 %arg17, ptr addrspace(1) undef
617 store volatile i32 %arg18, ptr addrspace(1) undef
618 store volatile i32 %arg19, ptr addrspace(1) undef
619 store volatile i32 %arg20, ptr addrspace(1) undef
620 store volatile i32 %arg21, ptr addrspace(1) undef
621 store volatile i32 %arg22, ptr addrspace(1) undef
622 store volatile i32 %arg23, ptr addrspace(1) undef
624 store volatile i32 %arg24, ptr addrspace(1) undef
625 store volatile i32 %arg25, ptr addrspace(1) undef
626 store volatile i32 %arg26, ptr addrspace(1) undef
627 store volatile i32 %arg27, ptr addrspace(1) undef
628 store volatile i32 %arg28, ptr addrspace(1) undef
629 store volatile i32 %arg29, ptr addrspace(1) undef
630 store volatile i32 %arg30, ptr addrspace(1) undef
631 store volatile i32 %arg31, ptr addrspace(1) undef
636 ; frame[0] = ID { Z, Y, X }
638 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
640 ; GCN-DAG: s_mov_b32 s32, 0
642 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
643 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
644 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
645 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
649 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
650 ; GCN-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
653 ; GCN: .amdhsa_system_vgpr_workitem_id 2
654 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
655 call void @too_many_args_use_workitem_id_xyz(
656 i32 10, i32 20, i32 30, i32 40,
657 i32 50, i32 60, i32 70, i32 80,
658 i32 90, i32 100, i32 110, i32 120,
659 i32 130, i32 140, i32 150, i32 160,
660 i32 170, i32 180, i32 190, i32 200,
661 i32 210, i32 220, i32 230, i32 240,
662 i32 250, i32 260, i32 270, i32 280,
663 i32 290, i32 300, i32 310, i32 320)
667 ; workitem ID X in register, yz on stack
668 ; v31 = workitem ID X
669 ; frame[0] = workitem { Z, Y, X }
671 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
672 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
673 ; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
674 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
675 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
676 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
677 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
679 ; GCN-COUNT-31: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}
680 ; GCN-NEXT: s_waitcnt
682 ; GCN: ScratchSize: 0
683 define void @too_many_args_use_workitem_id_x_stack_yz(
684 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
685 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
686 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
687 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
688 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
689 store volatile i32 %val0, ptr addrspace(1) undef
690 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
691 store volatile i32 %val1, ptr addrspace(1) undef
692 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
693 store volatile i32 %val2, ptr addrspace(1) undef
695 store volatile i32 %arg0, ptr addrspace(1) undef
696 store volatile i32 %arg1, ptr addrspace(1) undef
697 store volatile i32 %arg2, ptr addrspace(1) undef
698 store volatile i32 %arg3, ptr addrspace(1) undef
699 store volatile i32 %arg4, ptr addrspace(1) undef
700 store volatile i32 %arg5, ptr addrspace(1) undef
701 store volatile i32 %arg6, ptr addrspace(1) undef
702 store volatile i32 %arg7, ptr addrspace(1) undef
704 store volatile i32 %arg8, ptr addrspace(1) undef
705 store volatile i32 %arg9, ptr addrspace(1) undef
706 store volatile i32 %arg10, ptr addrspace(1) undef
707 store volatile i32 %arg11, ptr addrspace(1) undef
708 store volatile i32 %arg12, ptr addrspace(1) undef
709 store volatile i32 %arg13, ptr addrspace(1) undef
710 store volatile i32 %arg14, ptr addrspace(1) undef
711 store volatile i32 %arg15, ptr addrspace(1) undef
713 store volatile i32 %arg16, ptr addrspace(1) undef
714 store volatile i32 %arg17, ptr addrspace(1) undef
715 store volatile i32 %arg18, ptr addrspace(1) undef
716 store volatile i32 %arg19, ptr addrspace(1) undef
717 store volatile i32 %arg20, ptr addrspace(1) undef
718 store volatile i32 %arg21, ptr addrspace(1) undef
719 store volatile i32 %arg22, ptr addrspace(1) undef
720 store volatile i32 %arg23, ptr addrspace(1) undef
722 store volatile i32 %arg24, ptr addrspace(1) undef
723 store volatile i32 %arg25, ptr addrspace(1) undef
724 store volatile i32 %arg26, ptr addrspace(1) undef
725 store volatile i32 %arg27, ptr addrspace(1) undef
726 store volatile i32 %arg28, ptr addrspace(1) undef
727 store volatile i32 %arg29, ptr addrspace(1) undef
728 store volatile i32 %arg30, ptr addrspace(1) undef
733 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
736 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
737 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
738 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
739 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
740 ; PACKED-TID: v_mov_b32_e32 v31, v0
742 ; GCN: s_mov_b32 s32, 0
745 ; GCN: .amdhsa_system_vgpr_workitem_id 2
746 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
747 call void @too_many_args_use_workitem_id_x_stack_yz(
748 i32 10, i32 20, i32 30, i32 40,
749 i32 50, i32 60, i32 70, i32 80,
750 i32 90, i32 100, i32 110, i32 120,
751 i32 130, i32 140, i32 150, i32 160,
752 i32 170, i32 180, i32 190, i32 200,
753 i32 210, i32 220, i32 230, i32 240,
754 i32 250, i32 260, i32 270, i32 280,
755 i32 290, i32 300, i32 310)
759 declare i32 @llvm.amdgcn.workitem.id.x() #0
760 declare i32 @llvm.amdgcn.workitem.id.y() #0
761 declare i32 @llvm.amdgcn.workitem.id.z() #0
763 attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" }
764 attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }