1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s
3 ; GCN-LABEL: {{^}}use_workitem_id_x:
5 ; FIXEDABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
6 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
8 ; GCN-NEXT: s_setpc_b64
9 define void @use_workitem_id_x() #1 {
10 %val = call i32 @llvm.amdgcn.workitem.id.x()
11 store volatile i32 %val, ptr addrspace(1) undef
15 ; GCN-LABEL: {{^}}use_workitem_id_y:
17 ; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
18 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
20 ; GCN-NEXT: s_setpc_b64
21 define void @use_workitem_id_y() #1 {
22 %val = call i32 @llvm.amdgcn.workitem.id.y()
23 store volatile i32 %val, ptr addrspace(1) undef
27 ; GCN-LABEL: {{^}}use_workitem_id_z:
29 ; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
30 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
32 ; GCN-NEXT: s_setpc_b64
33 define void @use_workitem_id_z() #1 {
34 %val = call i32 @llvm.amdgcn.workitem.id.z()
35 store volatile i32 %val, ptr addrspace(1) undef
39 ; GCN-LABEL: {{^}}use_workitem_id_xy:
41 ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
42 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
44 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
45 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
47 ; GCN-NEXT: s_setpc_b64
48 define void @use_workitem_id_xy() #1 {
49 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
50 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
51 store volatile i32 %val0, ptr addrspace(1) undef
52 store volatile i32 %val1, ptr addrspace(1) undef
56 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
59 ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
60 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
61 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
64 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
65 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
66 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
68 ; GCN-NEXT: s_setpc_b64
69 define void @use_workitem_id_xyz() #1 {
70 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
71 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
72 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
73 store volatile i32 %val0, ptr addrspace(1) undef
74 store volatile i32 %val1, ptr addrspace(1) undef
75 store volatile i32 %val2, ptr addrspace(1) undef
79 ; GCN-LABEL: {{^}}use_workitem_id_xz:
81 ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
82 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
84 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
85 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
87 ; GCN-NEXT: s_setpc_b64
88 define void @use_workitem_id_xz() #1 {
89 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
90 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
91 store volatile i32 %val0, ptr addrspace(1) undef
92 store volatile i32 %val1, ptr addrspace(1) undef
96 ; GCN-LABEL: {{^}}use_workitem_id_yz:
98 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
99 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
101 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
102 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
103 ; GCN-NEXT: s_waitcnt
104 ; GCN-NEXT: s_setpc_b64
105 define void @use_workitem_id_yz() #1 {
106 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
107 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
108 store volatile i32 %val0, ptr addrspace(1) undef
109 store volatile i32 %val1, ptr addrspace(1) undef
113 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
117 ; FIXEDABI: v_mov_b32_e32 v31, v0{{$}}
123 ; GCN: .amdhsa_system_vgpr_workitem_id 0
124 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
125 call void @use_workitem_id_x()
129 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
134 ; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
141 ; GCN: .amdhsa_system_vgpr_workitem_id 1
142 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
143 call void @use_workitem_id_y()
147 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
151 ; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
157 ; GCN: .amdhsa_system_vgpr_workitem_id 2
158 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
159 call void @use_workitem_id_z()
163 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
167 ; FIXEDABI: v_lshlrev_b32_e32 v1, 10, v1
168 ; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
174 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
175 call void @use_workitem_id_xy()
179 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
183 ; FIXEDABI: v_lshlrev_b32_e32 v1, 20, v2
184 ; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
190 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
191 call void @use_workitem_id_xz()
195 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
199 ; FIXEDABI:v_lshlrev_b32_e32 v0, 20, v2
200 ; FIXEDABI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
201 ; FIXEDABI-NEXT: v_or_b32_e32 v31, v1, v0
207 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
208 call void @use_workitem_id_yz()
212 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
213 ; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
214 ; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
215 ; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
216 ; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
219 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
220 call void @use_workitem_id_xyz()
224 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
228 define void @func_indirect_use_workitem_id_x() #1 {
229 call void @use_workitem_id_x()
233 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
237 define void @func_indirect_use_workitem_id_y() #1 {
238 call void @use_workitem_id_y()
242 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
246 define void @func_indirect_use_workitem_id_z() #1 {
247 call void @use_workitem_id_z()
251 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
253 ; FIXEDABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
255 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
256 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
257 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
258 %val = call i32 @llvm.amdgcn.workitem.id.x()
259 store volatile i32 %arg0, ptr addrspace(1) undef
260 store volatile i32 %val, ptr addrspace(1) undef
264 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
266 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
267 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
268 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
269 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
270 %val = call i32 @llvm.amdgcn.workitem.id.y()
271 store volatile i32 %arg0, ptr addrspace(1) undef
272 store volatile i32 %val, ptr addrspace(1) undef
276 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
278 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
279 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
280 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
281 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
282 %val = call i32 @llvm.amdgcn.workitem.id.z()
283 store volatile i32 %arg0, ptr addrspace(1) undef
284 store volatile i32 %val, ptr addrspace(1) undef
289 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
292 ; FIXEDABI: v_mov_b32_e32 v31, v0
293 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
297 ; GCN: .amdhsa_system_vgpr_workitem_id 0
298 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
299 call void @other_arg_use_workitem_id_x(i32 555)
304 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
309 ; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
310 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
312 ; GCN: .amdhsa_system_vgpr_workitem_id 1
313 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
314 call void @other_arg_use_workitem_id_y(i32 555)
318 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
323 ; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
324 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
326 ; GCN: .amdhsa_system_vgpr_workitem_id 2
327 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
328 call void @other_arg_use_workitem_id_z(i32 555)
332 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
333 ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
334 ; FIXEDABI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
335 define void @too_many_args_use_workitem_id_x(
336 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
337 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
338 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
339 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
340 %val = call i32 @llvm.amdgcn.workitem.id.x()
341 store volatile i32 %val, ptr addrspace(1) undef
343 store volatile i32 %arg0, ptr addrspace(1) undef
344 store volatile i32 %arg1, ptr addrspace(1) undef
345 store volatile i32 %arg2, ptr addrspace(1) undef
346 store volatile i32 %arg3, ptr addrspace(1) undef
347 store volatile i32 %arg4, ptr addrspace(1) undef
348 store volatile i32 %arg5, ptr addrspace(1) undef
349 store volatile i32 %arg6, ptr addrspace(1) undef
350 store volatile i32 %arg7, ptr addrspace(1) undef
352 store volatile i32 %arg8, ptr addrspace(1) undef
353 store volatile i32 %arg9, ptr addrspace(1) undef
354 store volatile i32 %arg10, ptr addrspace(1) undef
355 store volatile i32 %arg11, ptr addrspace(1) undef
356 store volatile i32 %arg12, ptr addrspace(1) undef
357 store volatile i32 %arg13, ptr addrspace(1) undef
358 store volatile i32 %arg14, ptr addrspace(1) undef
359 store volatile i32 %arg15, ptr addrspace(1) undef
361 store volatile i32 %arg16, ptr addrspace(1) undef
362 store volatile i32 %arg17, ptr addrspace(1) undef
363 store volatile i32 %arg18, ptr addrspace(1) undef
364 store volatile i32 %arg19, ptr addrspace(1) undef
365 store volatile i32 %arg20, ptr addrspace(1) undef
366 store volatile i32 %arg21, ptr addrspace(1) undef
367 store volatile i32 %arg22, ptr addrspace(1) undef
368 store volatile i32 %arg23, ptr addrspace(1) undef
370 store volatile i32 %arg24, ptr addrspace(1) undef
371 store volatile i32 %arg25, ptr addrspace(1) undef
372 store volatile i32 %arg26, ptr addrspace(1) undef
373 store volatile i32 %arg27, ptr addrspace(1) undef
374 store volatile i32 %arg28, ptr addrspace(1) undef
375 store volatile i32 %arg29, ptr addrspace(1) undef
376 store volatile i32 %arg30, ptr addrspace(1) undef
377 store volatile i32 %arg31, ptr addrspace(1) undef
382 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
387 ; FIXEDABI-DAG: s_mov_b32 s32, 0
388 ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
389 ; FIXEDABI-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
390 ; FIXEDABI-DAG: v_mov_b32_e32 v31, v0
392 ; FIXEDABI: s_swappc_b64
394 ; GCN: .amdhsa_system_vgpr_workitem_id 0
395 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
396 call void @too_many_args_use_workitem_id_x(
397 i32 10, i32 20, i32 30, i32 40,
398 i32 50, i32 60, i32 70, i32 80,
399 i32 90, i32 100, i32 110, i32 120,
400 i32 130, i32 140, i32 150, i32 160,
401 i32 170, i32 180, i32 190, i32 200,
402 i32 210, i32 220, i32 230, i32 240,
403 i32 250, i32 260, i32 270, i32 280,
404 i32 290, i32 300, i32 310, i32 320)
408 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
410 ; Touching the workitem id register is not necessary.
412 ; FIXEDABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
414 ; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
418 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
419 store volatile i32 %arg0, ptr addrspace(1) undef
420 call void @too_many_args_use_workitem_id_x(
421 i32 10, i32 20, i32 30, i32 40,
422 i32 50, i32 60, i32 70, i32 80,
423 i32 90, i32 100, i32 110, i32 120,
424 i32 130, i32 140, i32 150, i32 160,
425 i32 170, i32 180, i32 190, i32 200,
426 i32 210, i32 220, i32 230, i32 240,
427 i32 250, i32 260, i32 270, i32 280,
428 i32 290, i32 300, i32 310, i32 320)
432 ; Requires loading and storing to stack slot.
433 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
434 ; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
435 ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
436 ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
438 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
442 ; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
443 ; GCN: s_addk_i32 s32, 0xfc00{{$}}
445 define void @too_many_args_call_too_many_args_use_workitem_id_x(
446 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
447 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
448 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
449 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
450 call void @too_many_args_use_workitem_id_x(
451 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
452 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
453 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
454 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
458 ; var abi stack layout:
459 ; frame[0] = byval arg32
460 ; frame[1] = stack passed workitem ID x
461 ; frame[2] = VGPR spill slot
463 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
465 ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31
466 ; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31
468 ; FIXEDABI: buffer_load_dword v31, off, s[0:3], s32{{$}}
469 ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
470 ; FIXEDABI: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc{{$}}
471 ; FIXEDABI: s_setpc_b64
472 define void @too_many_args_use_workitem_id_x_byval(
473 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
474 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
475 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
476 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 {
477 %val = call i32 @llvm.amdgcn.workitem.id.x()
478 store volatile i32 %val, ptr addrspace(1) undef
480 store volatile i32 %arg0, ptr addrspace(1) undef
481 store volatile i32 %arg1, ptr addrspace(1) undef
482 store volatile i32 %arg2, ptr addrspace(1) undef
483 store volatile i32 %arg3, ptr addrspace(1) undef
484 store volatile i32 %arg4, ptr addrspace(1) undef
485 store volatile i32 %arg5, ptr addrspace(1) undef
486 store volatile i32 %arg6, ptr addrspace(1) undef
487 store volatile i32 %arg7, ptr addrspace(1) undef
489 store volatile i32 %arg8, ptr addrspace(1) undef
490 store volatile i32 %arg9, ptr addrspace(1) undef
491 store volatile i32 %arg10, ptr addrspace(1) undef
492 store volatile i32 %arg11, ptr addrspace(1) undef
493 store volatile i32 %arg12, ptr addrspace(1) undef
494 store volatile i32 %arg13, ptr addrspace(1) undef
495 store volatile i32 %arg14, ptr addrspace(1) undef
496 store volatile i32 %arg15, ptr addrspace(1) undef
498 store volatile i32 %arg16, ptr addrspace(1) undef
499 store volatile i32 %arg17, ptr addrspace(1) undef
500 store volatile i32 %arg18, ptr addrspace(1) undef
501 store volatile i32 %arg19, ptr addrspace(1) undef
502 store volatile i32 %arg20, ptr addrspace(1) undef
503 store volatile i32 %arg21, ptr addrspace(1) undef
504 store volatile i32 %arg22, ptr addrspace(1) undef
505 store volatile i32 %arg23, ptr addrspace(1) undef
507 store volatile i32 %arg24, ptr addrspace(1) undef
508 store volatile i32 %arg25, ptr addrspace(1) undef
509 store volatile i32 %arg26, ptr addrspace(1) undef
510 store volatile i32 %arg27, ptr addrspace(1) undef
511 store volatile i32 %arg28, ptr addrspace(1) undef
512 store volatile i32 %arg29, ptr addrspace(1) undef
513 store volatile i32 %arg30, ptr addrspace(1) undef
514 store volatile i32 %arg31, ptr addrspace(1) undef
515 %private = load volatile i32, ptr addrspace(5) %arg32
519 ; var abi stack layout:
522 ; sp[2] = stack passed workitem ID x
524 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
529 ; FIXEDABI: v_mov_b32_e32 v31, v0
530 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7
531 ; FIXEDABI: s_movk_i32 s32, 0x400{{$}}
532 ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], 0 offset:4{{$}}
533 ; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140
535 ; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
537 ; FIXME: Why this reload?
538 ; FIXEDABI: buffer_load_dword [[RELOAD:v[0-9]+]], off, s[0:3], 0 offset:4{{$}}
541 ; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32 offset:4
542 ; FIXEDABI: s_swappc_b64
543 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
544 %alloca = alloca i32, align 4, addrspace(5)
545 store volatile i32 999, ptr addrspace(5) %alloca
546 call void @too_many_args_use_workitem_id_x_byval(
547 i32 10, i32 20, i32 30, i32 40,
548 i32 50, i32 60, i32 70, i32 80,
549 i32 90, i32 100, i32 110, i32 120,
550 i32 130, i32 140, i32 150, i32 160,
551 i32 170, i32 180, i32 190, i32 200,
552 i32 210, i32 220, i32 230, i32 240,
553 i32 250, i32 260, i32 270, i32 280,
554 i32 290, i32 300, i32 310, i32 320,
555 ptr addrspace(5) byval(i32) %alloca)
559 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
561 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}}
562 ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}}
563 ; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
564 ; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
565 ; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
568 ; FIXEDABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
570 ; FIXEDABI: s_swappc_b64
571 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
572 %alloca = alloca i32, align 4, addrspace(5)
573 store volatile i32 999, ptr addrspace(5) %alloca
574 call void @too_many_args_use_workitem_id_x_byval(
575 i32 10, i32 20, i32 30, i32 40,
576 i32 50, i32 60, i32 70, i32 80,
577 i32 90, i32 100, i32 110, i32 120,
578 i32 130, i32 140, i32 150, i32 160,
579 i32 170, i32 180, i32 190, i32 200,
580 i32 210, i32 220, i32 230, i32 240,
581 i32 250, i32 260, i32 270, i32 280,
582 i32 290, i32 300, i32 310, i32 320,
583 ptr addrspace(5) byval(i32) %alloca)
587 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
588 ; FIXEDABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v31
589 ; FIXEDABI-NOT: buffer_load_dword
590 ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
591 ; FIXEDABI-NOT: buffer_load_dword
592 ; FIXEDABI: v_bfe_u32 [[BFE_Y:v[0-9]+]], v31, 10, 10
593 ; FIXEDABI-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v31, 20, 10
594 ; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]]
595 ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]]
597 define void @too_many_args_use_workitem_id_xyz(
598 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
599 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
600 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
601 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
602 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
603 store volatile i32 %val0, ptr addrspace(1) undef
604 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
605 store volatile i32 %val1, ptr addrspace(1) undef
606 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
607 store volatile i32 %val2, ptr addrspace(1) undef
609 store volatile i32 %arg0, ptr addrspace(1) undef
610 store volatile i32 %arg1, ptr addrspace(1) undef
611 store volatile i32 %arg2, ptr addrspace(1) undef
612 store volatile i32 %arg3, ptr addrspace(1) undef
613 store volatile i32 %arg4, ptr addrspace(1) undef
614 store volatile i32 %arg5, ptr addrspace(1) undef
615 store volatile i32 %arg6, ptr addrspace(1) undef
616 store volatile i32 %arg7, ptr addrspace(1) undef
618 store volatile i32 %arg8, ptr addrspace(1) undef
619 store volatile i32 %arg9, ptr addrspace(1) undef
620 store volatile i32 %arg10, ptr addrspace(1) undef
621 store volatile i32 %arg11, ptr addrspace(1) undef
622 store volatile i32 %arg12, ptr addrspace(1) undef
623 store volatile i32 %arg13, ptr addrspace(1) undef
624 store volatile i32 %arg14, ptr addrspace(1) undef
625 store volatile i32 %arg15, ptr addrspace(1) undef
627 store volatile i32 %arg16, ptr addrspace(1) undef
628 store volatile i32 %arg17, ptr addrspace(1) undef
629 store volatile i32 %arg18, ptr addrspace(1) undef
630 store volatile i32 %arg19, ptr addrspace(1) undef
631 store volatile i32 %arg20, ptr addrspace(1) undef
632 store volatile i32 %arg21, ptr addrspace(1) undef
633 store volatile i32 %arg22, ptr addrspace(1) undef
634 store volatile i32 %arg23, ptr addrspace(1) undef
636 store volatile i32 %arg24, ptr addrspace(1) undef
637 store volatile i32 %arg25, ptr addrspace(1) undef
638 store volatile i32 %arg26, ptr addrspace(1) undef
639 store volatile i32 %arg27, ptr addrspace(1) undef
640 store volatile i32 %arg28, ptr addrspace(1) undef
641 store volatile i32 %arg29, ptr addrspace(1) undef
642 store volatile i32 %arg30, ptr addrspace(1) undef
643 store volatile i32 %arg31, ptr addrspace(1) undef
648 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
650 ; GCN-DAG: s_mov_b32 s32, 0
652 ; GCN-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
653 ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140
654 ; GCN-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
655 ; GCN-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
657 ; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
658 ; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
662 ; GCN: .amdhsa_system_vgpr_workitem_id 2
663 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
664 call void @too_many_args_use_workitem_id_xyz(
665 i32 10, i32 20, i32 30, i32 40,
666 i32 50, i32 60, i32 70, i32 80,
667 i32 90, i32 100, i32 110, i32 120,
668 i32 130, i32 140, i32 150, i32 160,
669 i32 170, i32 180, i32 190, i32 200,
670 i32 210, i32 220, i32 230, i32 240,
671 i32 250, i32 260, i32 270, i32 280,
672 i32 290, i32 300, i32 310, i32 320)
676 ; Var abi: workitem ID X in register, yz on stack
677 ; v31 = workitem ID X
678 ; frame[0] = workitem { Z, Y, X }
680 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
681 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
682 ; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
683 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
684 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
685 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
686 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
688 ; GCN: ScratchSize: 0
689 define void @too_many_args_use_workitem_id_x_stack_yz(
690 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
691 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
692 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
693 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
694 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
695 store volatile i32 %val0, ptr addrspace(1) undef
696 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
697 store volatile i32 %val1, ptr addrspace(1) undef
698 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
699 store volatile i32 %val2, ptr addrspace(1) undef
701 store volatile i32 %arg0, ptr addrspace(1) undef
702 store volatile i32 %arg1, ptr addrspace(1) undef
703 store volatile i32 %arg2, ptr addrspace(1) undef
704 store volatile i32 %arg3, ptr addrspace(1) undef
705 store volatile i32 %arg4, ptr addrspace(1) undef
706 store volatile i32 %arg5, ptr addrspace(1) undef
707 store volatile i32 %arg6, ptr addrspace(1) undef
708 store volatile i32 %arg7, ptr addrspace(1) undef
710 store volatile i32 %arg8, ptr addrspace(1) undef
711 store volatile i32 %arg9, ptr addrspace(1) undef
712 store volatile i32 %arg10, ptr addrspace(1) undef
713 store volatile i32 %arg11, ptr addrspace(1) undef
714 store volatile i32 %arg12, ptr addrspace(1) undef
715 store volatile i32 %arg13, ptr addrspace(1) undef
716 store volatile i32 %arg14, ptr addrspace(1) undef
717 store volatile i32 %arg15, ptr addrspace(1) undef
719 store volatile i32 %arg16, ptr addrspace(1) undef
720 store volatile i32 %arg17, ptr addrspace(1) undef
721 store volatile i32 %arg18, ptr addrspace(1) undef
722 store volatile i32 %arg19, ptr addrspace(1) undef
723 store volatile i32 %arg20, ptr addrspace(1) undef
724 store volatile i32 %arg21, ptr addrspace(1) undef
725 store volatile i32 %arg22, ptr addrspace(1) undef
726 store volatile i32 %arg23, ptr addrspace(1) undef
728 store volatile i32 %arg24, ptr addrspace(1) undef
729 store volatile i32 %arg25, ptr addrspace(1) undef
730 store volatile i32 %arg26, ptr addrspace(1) undef
731 store volatile i32 %arg27, ptr addrspace(1) undef
732 store volatile i32 %arg28, ptr addrspace(1) undef
733 store volatile i32 %arg29, ptr addrspace(1) undef
734 store volatile i32 %arg30, ptr addrspace(1) undef
739 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
742 ; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
743 ; GCN-DAG: v_or_b32_e32 v0, v0, v1
744 ; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2
745 ; GCN-DAG: v_or_b32_e32 v31, v0, v2
747 ; GCN: s_mov_b32 s32, 0
750 ; GCN: .amdhsa_system_vgpr_workitem_id 2
751 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
752 call void @too_many_args_use_workitem_id_x_stack_yz(
753 i32 10, i32 20, i32 30, i32 40,
754 i32 50, i32 60, i32 70, i32 80,
755 i32 90, i32 100, i32 110, i32 120,
756 i32 130, i32 140, i32 150, i32 160,
757 i32 170, i32 180, i32 190, i32 200,
758 i32 210, i32 220, i32 230, i32 240,
759 i32 250, i32 260, i32 270, i32 280,
760 i32 290, i32 300, i32 310)
764 declare hidden void @extern_hint(i32) #2
766 ; Workitem IDs should not be passed due to the attribute
767 ; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints:
770 ; GCN: v_mov_b32_e32 v0, 9
774 define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
775 call void @extern_hint(i32 9)
779 ; GCN-LABEL: {{^}}func_call_no_workitem_id_hints:
782 ; GCN: v_mov_b32_e32 v0, 9
786 define void @func_call_no_workitem_id_hints() #2 {
787 call void @extern_hint(i32 9)
791 declare hidden void @extern_nohint(i32)
793 ; Check that the hint is respected on the callsite, not the function
795 ; GCN-LABEL: {{^}}kern_callsite_workitem_id_hints:
798 ; GCN: v_mov_b32_e32 v0, 9
802 define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
803 call void @extern_nohint(i32 9) #2
807 declare i32 @llvm.amdgcn.workitem.id.x() #0
808 declare i32 @llvm.amdgcn.workitem.id.y() #0
809 declare i32 @llvm.amdgcn.workitem.id.z() #0
811 attributes #0 = { nounwind readnone speculatable }
812 attributes #1 = { nounwind noinline }
813 attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
815 !llvm.module.flags = !{!0}
816 !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}