1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
3 ; GCN-LABEL: {{^}}use_workitem_id_x:
5 ; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0
6 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
8 ; GCN-NEXT: s_setpc_b64
9 define void @use_workitem_id_x() #1 {
10 %val = call i32 @llvm.amdgcn.workitem.id.x()
11 store volatile i32 %val, i32 addrspace(1)* undef
15 ; GCN-LABEL: {{^}}use_workitem_id_y:
17 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
18 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
20 ; GCN-NEXT: s_setpc_b64
21 define void @use_workitem_id_y() #1 {
22 %val = call i32 @llvm.amdgcn.workitem.id.y()
23 store volatile i32 %val, i32 addrspace(1)* undef
27 ; GCN-LABEL: {{^}}use_workitem_id_z:
29 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
30 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
32 ; GCN-NEXT: s_setpc_b64
33 define void @use_workitem_id_z() #1 {
34 %val = call i32 @llvm.amdgcn.workitem.id.z()
35 store volatile i32 %val, i32 addrspace(1)* undef
39 ; GCN-LABEL: {{^}}use_workitem_id_xy:
41 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
42 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
43 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
44 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
46 ; GCN-NEXT: s_setpc_b64
47 define void @use_workitem_id_xy() #1 {
48 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
49 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
50 store volatile i32 %val0, i32 addrspace(1)* undef
51 store volatile i32 %val1, i32 addrspace(1)* undef
55 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
57 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
58 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
59 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
60 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
61 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
62 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
64 ; GCN-NEXT: s_setpc_b64
65 define void @use_workitem_id_xyz() #1 {
66 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
67 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
68 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
69 store volatile i32 %val0, i32 addrspace(1)* undef
70 store volatile i32 %val1, i32 addrspace(1)* undef
71 store volatile i32 %val2, i32 addrspace(1)* undef
75 ; GCN-LABEL: {{^}}use_workitem_id_xz:
77 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
78 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
79 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
80 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
82 ; GCN-NEXT: s_setpc_b64
83 define void @use_workitem_id_xz() #1 {
84 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
85 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
86 store volatile i32 %val0, i32 addrspace(1)* undef
87 store volatile i32 %val1, i32 addrspace(1)* undef
91 ; GCN-LABEL: {{^}}use_workitem_id_yz:
93 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
94 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
95 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
96 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
98 ; GCN-NEXT: s_setpc_b64
99 define void @use_workitem_id_yz() #1 {
100 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
101 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
102 store volatile i32 %val0, i32 addrspace(1)* undef
103 store volatile i32 %val1, i32 addrspace(1)* undef
107 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
108 ; GCN: enable_vgpr_workitem_id = 0
113 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
114 call void @use_workitem_id_x()
118 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
119 ; GCN: enable_vgpr_workitem_id = 1
123 ; GCN: v_lshlrev_b32_e32 v0, 10, v1
127 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
128 call void @use_workitem_id_y()
132 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
133 ; GCN: enable_vgpr_workitem_id = 2
137 ; GCN: v_lshlrev_b32_e32 v0, 20, v2
141 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
142 call void @use_workitem_id_z()
146 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
149 ; GCN: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
150 ; GCN: v_or_b32_e32 v0, v0, [[IDY]]
154 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
155 call void @use_workitem_id_xy()
159 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
162 ; GCN: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
163 ; GCN: v_or_b32_e32 v0, v0, [[IDZ]]
167 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
168 call void @use_workitem_id_xz()
172 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
175 ; GCN-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
176 ; GCN-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
177 ; GCN: v_or_b32_e32 v0, [[IDY]], [[IDZ]]
181 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
182 call void @use_workitem_id_yz()
186 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
190 ; GCN-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
191 ; GCN-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
192 ; GCN-DAG: v_or_b32_e32 v0, v0, [[IDY]]
193 ; GCN-DAG: v_or_b32_e32 v0, v0, [[IDZ]]
198 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
199 call void @use_workitem_id_xyz()
203 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
207 define void @func_indirect_use_workitem_id_x() #1 {
208 call void @use_workitem_id_x()
212 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
216 define void @func_indirect_use_workitem_id_y() #1 {
217 call void @use_workitem_id_y()
221 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
225 define void @func_indirect_use_workitem_id_z() #1 {
226 call void @use_workitem_id_z()
230 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
232 ; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1
233 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
234 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
235 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
236 %val = call i32 @llvm.amdgcn.workitem.id.x()
237 store volatile i32 %arg0, i32 addrspace(1)* undef
238 store volatile i32 %val, i32 addrspace(1)* undef
242 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
244 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10
245 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
246 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
247 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
248 %val = call i32 @llvm.amdgcn.workitem.id.y()
249 store volatile i32 %arg0, i32 addrspace(1)* undef
250 store volatile i32 %val, i32 addrspace(1)* undef
254 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
256 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10
257 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
258 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
259 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
260 %val = call i32 @llvm.amdgcn.workitem.id.z()
261 store volatile i32 %arg0, i32 addrspace(1)* undef
262 store volatile i32 %val, i32 addrspace(1)* undef
267 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
268 ; GCN: enable_vgpr_workitem_id = 0
270 ; GCN: v_mov_b32_e32 v1, v0
271 ; GCN: v_mov_b32_e32 v0, 0x22b
273 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
274 call void @other_arg_use_workitem_id_x(i32 555)
279 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
280 ; GCN: enable_vgpr_workitem_id = 1
282 ; GCN: v_lshlrev_b32_e32 v1, 10, v1
284 ; GCN: v_mov_b32_e32 v0, 0x22b
288 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
289 call void @other_arg_use_workitem_id_y(i32 555)
293 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
294 ; GCN: enable_vgpr_workitem_id = 2
296 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
297 ; GCN-DAG: v_lshlrev_b32_e32 v1, 20, v2
300 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
301 call void @other_arg_use_workitem_id_z(i32 555)
305 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
306 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
307 ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
308 ; GCN: v_and_b32_e32 v32, 0x3ff, v32
309 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
311 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
312 ; GCN-NEXT: s_waitcnt
313 ; GCN-NEXT: s_setpc_b64
314 define void @too_many_args_use_workitem_id_x(
315 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
316 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
317 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
318 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
319 %val = call i32 @llvm.amdgcn.workitem.id.x()
320 store volatile i32 %val, i32 addrspace(1)* undef
322 store volatile i32 %arg0, i32 addrspace(1)* undef
323 store volatile i32 %arg1, i32 addrspace(1)* undef
324 store volatile i32 %arg2, i32 addrspace(1)* undef
325 store volatile i32 %arg3, i32 addrspace(1)* undef
326 store volatile i32 %arg4, i32 addrspace(1)* undef
327 store volatile i32 %arg5, i32 addrspace(1)* undef
328 store volatile i32 %arg6, i32 addrspace(1)* undef
329 store volatile i32 %arg7, i32 addrspace(1)* undef
331 store volatile i32 %arg8, i32 addrspace(1)* undef
332 store volatile i32 %arg9, i32 addrspace(1)* undef
333 store volatile i32 %arg10, i32 addrspace(1)* undef
334 store volatile i32 %arg11, i32 addrspace(1)* undef
335 store volatile i32 %arg12, i32 addrspace(1)* undef
336 store volatile i32 %arg13, i32 addrspace(1)* undef
337 store volatile i32 %arg14, i32 addrspace(1)* undef
338 store volatile i32 %arg15, i32 addrspace(1)* undef
340 store volatile i32 %arg16, i32 addrspace(1)* undef
341 store volatile i32 %arg17, i32 addrspace(1)* undef
342 store volatile i32 %arg18, i32 addrspace(1)* undef
343 store volatile i32 %arg19, i32 addrspace(1)* undef
344 store volatile i32 %arg20, i32 addrspace(1)* undef
345 store volatile i32 %arg21, i32 addrspace(1)* undef
346 store volatile i32 %arg22, i32 addrspace(1)* undef
347 store volatile i32 %arg23, i32 addrspace(1)* undef
349 store volatile i32 %arg24, i32 addrspace(1)* undef
350 store volatile i32 %arg25, i32 addrspace(1)* undef
351 store volatile i32 %arg26, i32 addrspace(1)* undef
352 store volatile i32 %arg27, i32 addrspace(1)* undef
353 store volatile i32 %arg28, i32 addrspace(1)* undef
354 store volatile i32 %arg29, i32 addrspace(1)* undef
355 store volatile i32 %arg30, i32 addrspace(1)* undef
356 store volatile i32 %arg31, i32 addrspace(1)* undef
361 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
362 ; GCN: enable_vgpr_workitem_id = 0
364 ; GCN: s_mov_b32 s33, s7
365 ; GCN: s_mov_b32 s32, s33
366 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
368 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
369 call void @too_many_args_use_workitem_id_x(
370 i32 10, i32 20, i32 30, i32 40,
371 i32 50, i32 60, i32 70, i32 80,
372 i32 90, i32 100, i32 110, i32 120,
373 i32 130, i32 140, i32 150, i32 160,
374 i32 170, i32 180, i32 190, i32 200,
375 i32 210, i32 220, i32 230, i32 240,
376 i32 250, i32 260, i32 270, i32 280,
377 i32 290, i32 300, i32 310, i32 320)
381 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
382 ; GCN: s_mov_b32 s34, s32
383 ; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
385 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
386 store volatile i32 %arg0, i32 addrspace(1)* undef
387 call void @too_many_args_use_workitem_id_x(
388 i32 10, i32 20, i32 30, i32 40,
389 i32 50, i32 60, i32 70, i32 80,
390 i32 90, i32 100, i32 110, i32 120,
391 i32 130, i32 140, i32 150, i32 160,
392 i32 170, i32 180, i32 190, i32 200,
393 i32 210, i32 220, i32 230, i32 240,
394 i32 250, i32 260, i32 270, i32 280,
395 i32 290, i32 300, i32 310, i32 320)
399 ; Requires loading and storing to stack slot.
400 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
401 ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
402 ; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill
403 ; GCN: buffer_load_dword v32, off, s[0:3], s34{{$}}
405 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
409 ; GCN: buffer_load_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload
410 ; GCN: s_sub_u32 s32, s32, 0x400{{$}}
412 define void @too_many_args_call_too_many_args_use_workitem_id_x(
413 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
414 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
415 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
416 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
417 call void @too_many_args_use_workitem_id_x(
418 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
419 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
420 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
421 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
426 ; frame[0] = byval arg32
427 ; frame[1] = stack passed workitem ID x
428 ; frame[2] = VGPR spill slot
430 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
431 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
432 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4
433 ; GCN-NEXT: s_waitcnt
434 ; GCN-NEXT: v_and_b32_e32 v32, 0x3ff, v32
435 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
436 ; GCN: buffer_load_dword v0, off, s[0:3], s32{{$}}
437 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
439 define void @too_many_args_use_workitem_id_x_byval(
440 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
441 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
442 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
443 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 {
444 %val = call i32 @llvm.amdgcn.workitem.id.x()
445 store volatile i32 %val, i32 addrspace(1)* undef
447 store volatile i32 %arg0, i32 addrspace(1)* undef
448 store volatile i32 %arg1, i32 addrspace(1)* undef
449 store volatile i32 %arg2, i32 addrspace(1)* undef
450 store volatile i32 %arg3, i32 addrspace(1)* undef
451 store volatile i32 %arg4, i32 addrspace(1)* undef
452 store volatile i32 %arg5, i32 addrspace(1)* undef
453 store volatile i32 %arg6, i32 addrspace(1)* undef
454 store volatile i32 %arg7, i32 addrspace(1)* undef
456 store volatile i32 %arg8, i32 addrspace(1)* undef
457 store volatile i32 %arg9, i32 addrspace(1)* undef
458 store volatile i32 %arg10, i32 addrspace(1)* undef
459 store volatile i32 %arg11, i32 addrspace(1)* undef
460 store volatile i32 %arg12, i32 addrspace(1)* undef
461 store volatile i32 %arg13, i32 addrspace(1)* undef
462 store volatile i32 %arg14, i32 addrspace(1)* undef
463 store volatile i32 %arg15, i32 addrspace(1)* undef
465 store volatile i32 %arg16, i32 addrspace(1)* undef
466 store volatile i32 %arg17, i32 addrspace(1)* undef
467 store volatile i32 %arg18, i32 addrspace(1)* undef
468 store volatile i32 %arg19, i32 addrspace(1)* undef
469 store volatile i32 %arg20, i32 addrspace(1)* undef
470 store volatile i32 %arg21, i32 addrspace(1)* undef
471 store volatile i32 %arg22, i32 addrspace(1)* undef
472 store volatile i32 %arg23, i32 addrspace(1)* undef
474 store volatile i32 %arg24, i32 addrspace(1)* undef
475 store volatile i32 %arg25, i32 addrspace(1)* undef
476 store volatile i32 %arg26, i32 addrspace(1)* undef
477 store volatile i32 %arg27, i32 addrspace(1)* undef
478 store volatile i32 %arg28, i32 addrspace(1)* undef
479 store volatile i32 %arg29, i32 addrspace(1)* undef
480 store volatile i32 %arg30, i32 addrspace(1)* undef
481 store volatile i32 %arg31, i32 addrspace(1)* undef
482 %private = load volatile i32, i32 addrspace(5)* %arg32
488 ; sp[2] = stack passed workitem ID x
490 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
491 ; GCN: enable_vgpr_workitem_id = 0
492 ; GCN-DAG: s_mov_b32 s33, s7
493 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
494 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
495 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
496 ; GCN: s_add_u32 s32, s33, 0x400{{$}}
499 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
501 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
502 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
504 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
505 %alloca = alloca i32, align 4, addrspace(5)
506 store volatile i32 999, i32 addrspace(5)* %alloca
507 call void @too_many_args_use_workitem_id_x_byval(
508 i32 10, i32 20, i32 30, i32 40,
509 i32 50, i32 60, i32 70, i32 80,
510 i32 90, i32 100, i32 110, i32 120,
511 i32 130, i32 140, i32 150, i32 160,
512 i32 170, i32 180, i32 190, i32 200,
513 i32 210, i32 220, i32 230, i32 240,
514 i32 250, i32 260, i32 270, i32 280,
515 i32 290, i32 300, i32 310, i32 320,
516 i32 addrspace(5)* %alloca)
520 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
521 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
522 ; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}}
523 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}}
524 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
525 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
526 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
528 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
529 %alloca = alloca i32, align 4, addrspace(5)
530 store volatile i32 999, i32 addrspace(5)* %alloca
531 call void @too_many_args_use_workitem_id_x_byval(
532 i32 10, i32 20, i32 30, i32 40,
533 i32 50, i32 60, i32 70, i32 80,
534 i32 90, i32 100, i32 110, i32 120,
535 i32 130, i32 140, i32 150, i32 160,
536 i32 170, i32 180, i32 190, i32 200,
537 i32 210, i32 220, i32 230, i32 240,
538 i32 250, i32 260, i32 270, i32 280,
539 i32 290, i32 300, i32 310, i32 320,
540 i32 addrspace(5)* %alloca)
544 ; Only one stack load should be emitted for all 3 values.
545 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
546 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
547 ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
548 ; GCN-NOT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
549 ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
550 ; GCN-NOT: buffer_load_dword
552 ; GCN: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v32
553 ; GCN-NOT: buffer_load_dword
554 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]]
555 ; GCN-NOT: buffer_load_dword
556 ; GCN: v_bfe_u32 [[BFE_Y:v[0-9]+]], v32, 10, 10
557 ; GCN-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v32, 20, 10
558 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]]
559 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]]
561 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
562 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
563 ; GCN-NEXT: s_waitcnt
564 ; GCN-NEXT: s_setpc_b64
565 define void @too_many_args_use_workitem_id_xyz(
566 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
567 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
568 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
569 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
570 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
571 store volatile i32 %val0, i32 addrspace(1)* undef
572 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
573 store volatile i32 %val1, i32 addrspace(1)* undef
574 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
575 store volatile i32 %val2, i32 addrspace(1)* undef
577 store volatile i32 %arg0, i32 addrspace(1)* undef
578 store volatile i32 %arg1, i32 addrspace(1)* undef
579 store volatile i32 %arg2, i32 addrspace(1)* undef
580 store volatile i32 %arg3, i32 addrspace(1)* undef
581 store volatile i32 %arg4, i32 addrspace(1)* undef
582 store volatile i32 %arg5, i32 addrspace(1)* undef
583 store volatile i32 %arg6, i32 addrspace(1)* undef
584 store volatile i32 %arg7, i32 addrspace(1)* undef
586 store volatile i32 %arg8, i32 addrspace(1)* undef
587 store volatile i32 %arg9, i32 addrspace(1)* undef
588 store volatile i32 %arg10, i32 addrspace(1)* undef
589 store volatile i32 %arg11, i32 addrspace(1)* undef
590 store volatile i32 %arg12, i32 addrspace(1)* undef
591 store volatile i32 %arg13, i32 addrspace(1)* undef
592 store volatile i32 %arg14, i32 addrspace(1)* undef
593 store volatile i32 %arg15, i32 addrspace(1)* undef
595 store volatile i32 %arg16, i32 addrspace(1)* undef
596 store volatile i32 %arg17, i32 addrspace(1)* undef
597 store volatile i32 %arg18, i32 addrspace(1)* undef
598 store volatile i32 %arg19, i32 addrspace(1)* undef
599 store volatile i32 %arg20, i32 addrspace(1)* undef
600 store volatile i32 %arg21, i32 addrspace(1)* undef
601 store volatile i32 %arg22, i32 addrspace(1)* undef
602 store volatile i32 %arg23, i32 addrspace(1)* undef
604 store volatile i32 %arg24, i32 addrspace(1)* undef
605 store volatile i32 %arg25, i32 addrspace(1)* undef
606 store volatile i32 %arg26, i32 addrspace(1)* undef
607 store volatile i32 %arg27, i32 addrspace(1)* undef
608 store volatile i32 %arg28, i32 addrspace(1)* undef
609 store volatile i32 %arg29, i32 addrspace(1)* undef
610 store volatile i32 %arg30, i32 addrspace(1)* undef
611 store volatile i32 %arg31, i32 addrspace(1)* undef
616 ; frame[0] = ID { Z, Y, X }
618 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
619 ; GCN: enable_vgpr_workitem_id = 2
621 ; GCN-DAG: s_mov_b32 s33, s7
622 ; GCN-DAG: s_mov_b32 s32, s33
624 ; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
625 ; GCN-DAG: v_or_b32_e32 v0, v0, v1
626 ; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2
627 ; GCN-DAG: v_or_b32_e32 v0, v0, v2
628 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
630 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
631 call void @too_many_args_use_workitem_id_xyz(
632 i32 10, i32 20, i32 30, i32 40,
633 i32 50, i32 60, i32 70, i32 80,
634 i32 90, i32 100, i32 110, i32 120,
635 i32 130, i32 140, i32 150, i32 160,
636 i32 170, i32 180, i32 190, i32 200,
637 i32 210, i32 220, i32 230, i32 240,
638 i32 250, i32 260, i32 270, i32 280,
639 i32 290, i32 300, i32 310, i32 320)
643 ; workitem ID X in register, yz on stack
644 ; v31 = workitem ID X
645 ; frame[0] = workitem { Z, Y, X }
647 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
648 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
649 ; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
650 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
651 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
652 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
653 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
656 ; GCN-NEXT: s_setpc_b64
657 ; GCN: ScratchSize: 8
658 define void @too_many_args_use_workitem_id_x_stack_yz(
659 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
660 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
661 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
662 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
663 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
664 store volatile i32 %val0, i32 addrspace(1)* undef
665 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
666 store volatile i32 %val1, i32 addrspace(1)* undef
667 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
668 store volatile i32 %val2, i32 addrspace(1)* undef
670 store volatile i32 %arg0, i32 addrspace(1)* undef
671 store volatile i32 %arg1, i32 addrspace(1)* undef
672 store volatile i32 %arg2, i32 addrspace(1)* undef
673 store volatile i32 %arg3, i32 addrspace(1)* undef
674 store volatile i32 %arg4, i32 addrspace(1)* undef
675 store volatile i32 %arg5, i32 addrspace(1)* undef
676 store volatile i32 %arg6, i32 addrspace(1)* undef
677 store volatile i32 %arg7, i32 addrspace(1)* undef
679 store volatile i32 %arg8, i32 addrspace(1)* undef
680 store volatile i32 %arg9, i32 addrspace(1)* undef
681 store volatile i32 %arg10, i32 addrspace(1)* undef
682 store volatile i32 %arg11, i32 addrspace(1)* undef
683 store volatile i32 %arg12, i32 addrspace(1)* undef
684 store volatile i32 %arg13, i32 addrspace(1)* undef
685 store volatile i32 %arg14, i32 addrspace(1)* undef
686 store volatile i32 %arg15, i32 addrspace(1)* undef
688 store volatile i32 %arg16, i32 addrspace(1)* undef
689 store volatile i32 %arg17, i32 addrspace(1)* undef
690 store volatile i32 %arg18, i32 addrspace(1)* undef
691 store volatile i32 %arg19, i32 addrspace(1)* undef
692 store volatile i32 %arg20, i32 addrspace(1)* undef
693 store volatile i32 %arg21, i32 addrspace(1)* undef
694 store volatile i32 %arg22, i32 addrspace(1)* undef
695 store volatile i32 %arg23, i32 addrspace(1)* undef
697 store volatile i32 %arg24, i32 addrspace(1)* undef
698 store volatile i32 %arg25, i32 addrspace(1)* undef
699 store volatile i32 %arg26, i32 addrspace(1)* undef
700 store volatile i32 %arg27, i32 addrspace(1)* undef
701 store volatile i32 %arg28, i32 addrspace(1)* undef
702 store volatile i32 %arg29, i32 addrspace(1)* undef
703 store volatile i32 %arg30, i32 addrspace(1)* undef
708 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
709 ; GCN: enable_vgpr_workitem_id = 2
711 ; GCN: s_mov_b32 s33, s7
714 ; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
715 ; GCN-DAG: v_or_b32_e32 v0, v0, v1
716 ; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2
717 ; GCN-DAG: v_or_b32_e32 v31, v0, v2
719 ; GCN: s_mov_b32 s32, s33
721 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
722 call void @too_many_args_use_workitem_id_x_stack_yz(
723 i32 10, i32 20, i32 30, i32 40,
724 i32 50, i32 60, i32 70, i32 80,
725 i32 90, i32 100, i32 110, i32 120,
726 i32 130, i32 140, i32 150, i32 160,
727 i32 170, i32 180, i32 190, i32 200,
728 i32 210, i32 220, i32 230, i32 240,
729 i32 250, i32 260, i32 270, i32 280,
730 i32 290, i32 300, i32 310)
734 declare i32 @llvm.amdgcn.workitem.id.x() #0
735 declare i32 @llvm.amdgcn.workitem.id.y() #0
736 declare i32 @llvm.amdgcn.workitem.id.z() #0
738 attributes #0 = { nounwind readnone speculatable }
739 attributes #1 = { nounwind noinline }