1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
4 ; GCN-LABEL: {{^}}use_workitem_id_x:
6 ; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0
7 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
9 ; GCN-NEXT: s_setpc_b64
10 define void @use_workitem_id_x() #1 {
11 %val = call i32 @llvm.amdgcn.workitem.id.x()
12 store volatile i32 %val, i32 addrspace(1)* undef
16 ; GCN-LABEL: {{^}}use_workitem_id_y:
18 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
19 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
21 ; GCN-NEXT: s_setpc_b64
22 define void @use_workitem_id_y() #1 {
23 %val = call i32 @llvm.amdgcn.workitem.id.y()
24 store volatile i32 %val, i32 addrspace(1)* undef
28 ; GCN-LABEL: {{^}}use_workitem_id_z:
30 ; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
31 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
33 ; GCN-NEXT: s_setpc_b64
34 define void @use_workitem_id_z() #1 {
35 %val = call i32 @llvm.amdgcn.workitem.id.z()
36 store volatile i32 %val, i32 addrspace(1)* undef
40 ; GCN-LABEL: {{^}}use_workitem_id_xy:
42 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
43 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
44 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
45 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
47 ; GCN-NEXT: s_setpc_b64
48 define void @use_workitem_id_xy() #1 {
49 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
50 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
51 store volatile i32 %val0, i32 addrspace(1)* undef
52 store volatile i32 %val1, i32 addrspace(1)* undef
56 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
58 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
59 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
60 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
61 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
62 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
63 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
65 ; GCN-NEXT: s_setpc_b64
66 define void @use_workitem_id_xyz() #1 {
67 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
68 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
69 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
70 store volatile i32 %val0, i32 addrspace(1)* undef
71 store volatile i32 %val1, i32 addrspace(1)* undef
72 store volatile i32 %val2, i32 addrspace(1)* undef
76 ; GCN-LABEL: {{^}}use_workitem_id_xz:
78 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
79 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
80 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
81 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
83 ; GCN-NEXT: s_setpc_b64
84 define void @use_workitem_id_xz() #1 {
85 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
86 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
87 store volatile i32 %val0, i32 addrspace(1)* undef
88 store volatile i32 %val1, i32 addrspace(1)* undef
92 ; GCN-LABEL: {{^}}use_workitem_id_yz:
94 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
95 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
96 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
97 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
99 ; GCN-NEXT: s_setpc_b64
100 define void @use_workitem_id_yz() #1 {
101 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
102 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
103 store volatile i32 %val0, i32 addrspace(1)* undef
104 store volatile i32 %val1, i32 addrspace(1)* undef
108 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
114 ; GCN: .amdhsa_system_vgpr_workitem_id 0
115 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
116 call void @use_workitem_id_x()
120 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
124 ; UNPACKED-TID: v_lshlrev_b32_e32 v0, 10, v1
125 ; UNPACKED-TID-NOT: v0
126 ; UNPACKED-TID-NOT: v1
129 ; GCN: .amdhsa_system_vgpr_workitem_id 1
130 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
131 call void @use_workitem_id_y()
135 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
139 ; UNPACKED-TID: v_lshlrev_b32_e32 v0, 20, v2
140 ; UNPACKED-TID-NOT: v0
141 ; UNPACKED-TID-NOT: v1
144 ; GCN: .amdhsa_system_vgpr_workitem_id 2
145 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
146 call void @use_workitem_id_z()
150 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
151 ; UNPACKED-TID-NOT: v0
152 ; UNPACKED-TID-NOT: v1
153 ; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
154 ; UNPACKED-TID: v_or_b32_e32 v0, v0, [[IDY]]
158 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
159 call void @use_workitem_id_xy()
163 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
164 ; UNPACKED-TID-NOT: v0
165 ; UNPACKED-TID-NOT: v2
166 ; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
167 ; UNPACKED-TID: v_or_b32_e32 v0, v0, [[IDZ]]
171 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
172 call void @use_workitem_id_xz()
176 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
177 ; UNPACKED-TID-NOT: v1
178 ; UNPACKED-TID-NOT: v2
179 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
180 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
181 ; UNPACKED-TID: v_or_b32_e32 v0, [[IDY]], [[IDZ]]
185 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
186 call void @use_workitem_id_yz()
190 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
191 ; UNPACKED-TID-NOT: v0
192 ; UNPACKED-TID-NOT: v1
193 ; UNPACKED-TID-NOT: v2
194 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
195 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
196 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
197 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDZ]]
202 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
203 call void @use_workitem_id_xyz()
207 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
211 define void @func_indirect_use_workitem_id_x() #1 {
212 call void @use_workitem_id_x()
216 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
220 define void @func_indirect_use_workitem_id_y() #1 {
221 call void @use_workitem_id_y()
225 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
229 define void @func_indirect_use_workitem_id_z() #1 {
230 call void @use_workitem_id_z()
234 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
236 ; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1
237 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
238 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
239 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
240 %val = call i32 @llvm.amdgcn.workitem.id.x()
241 store volatile i32 %arg0, i32 addrspace(1)* undef
242 store volatile i32 %val, i32 addrspace(1)* undef
246 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
248 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10
249 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
250 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
251 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
252 %val = call i32 @llvm.amdgcn.workitem.id.y()
253 store volatile i32 %arg0, i32 addrspace(1)* undef
254 store volatile i32 %val, i32 addrspace(1)* undef
258 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
260 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10
261 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
262 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
263 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
264 %val = call i32 @llvm.amdgcn.workitem.id.z()
265 store volatile i32 %arg0, i32 addrspace(1)* undef
266 store volatile i32 %val, i32 addrspace(1)* undef
271 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
273 ; GCN: v_mov_b32_e32 v1, v0
274 ; GCN: v_mov_b32_e32 v0, 0x22b
277 ; GCN: .amdhsa_system_vgpr_workitem_id 0
278 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
279 call void @other_arg_use_workitem_id_x(i32 555)
284 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
286 ; UNPACKED-TID: v_lshlrev_b32_e32 v1, 10, v1
287 ; PACKED-TID: v_mov_b32_e32 v1, v0
289 ; GCN: v_mov_b32_e32 v0, 0x22b
294 ; GCN: .amdhsa_system_vgpr_workitem_id 1
295 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
296 call void @other_arg_use_workitem_id_y(i32 555)
300 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
302 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
303 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 20, v2
304 ; PACKED-TID-DAG: v_mov_b32_e32 v1, v0
308 ; GCN: .amdhsa_system_vgpr_workitem_id 2
309 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
310 call void @other_arg_use_workitem_id_z(i32 555)
314 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
315 ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
316 ; GCN: v_and_b32_e32 v32, 0x3ff, v32
317 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
319 define void @too_many_args_use_workitem_id_x(
320 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
321 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
322 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
323 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
324 %val = call i32 @llvm.amdgcn.workitem.id.x()
325 store volatile i32 %val, i32 addrspace(1)* undef
327 store volatile i32 %arg0, i32 addrspace(1)* undef
328 store volatile i32 %arg1, i32 addrspace(1)* undef
329 store volatile i32 %arg2, i32 addrspace(1)* undef
330 store volatile i32 %arg3, i32 addrspace(1)* undef
331 store volatile i32 %arg4, i32 addrspace(1)* undef
332 store volatile i32 %arg5, i32 addrspace(1)* undef
333 store volatile i32 %arg6, i32 addrspace(1)* undef
334 store volatile i32 %arg7, i32 addrspace(1)* undef
336 store volatile i32 %arg8, i32 addrspace(1)* undef
337 store volatile i32 %arg9, i32 addrspace(1)* undef
338 store volatile i32 %arg10, i32 addrspace(1)* undef
339 store volatile i32 %arg11, i32 addrspace(1)* undef
340 store volatile i32 %arg12, i32 addrspace(1)* undef
341 store volatile i32 %arg13, i32 addrspace(1)* undef
342 store volatile i32 %arg14, i32 addrspace(1)* undef
343 store volatile i32 %arg15, i32 addrspace(1)* undef
345 store volatile i32 %arg16, i32 addrspace(1)* undef
346 store volatile i32 %arg17, i32 addrspace(1)* undef
347 store volatile i32 %arg18, i32 addrspace(1)* undef
348 store volatile i32 %arg19, i32 addrspace(1)* undef
349 store volatile i32 %arg20, i32 addrspace(1)* undef
350 store volatile i32 %arg21, i32 addrspace(1)* undef
351 store volatile i32 %arg22, i32 addrspace(1)* undef
352 store volatile i32 %arg23, i32 addrspace(1)* undef
354 store volatile i32 %arg24, i32 addrspace(1)* undef
355 store volatile i32 %arg25, i32 addrspace(1)* undef
356 store volatile i32 %arg26, i32 addrspace(1)* undef
357 store volatile i32 %arg27, i32 addrspace(1)* undef
358 store volatile i32 %arg28, i32 addrspace(1)* undef
359 store volatile i32 %arg29, i32 addrspace(1)* undef
360 store volatile i32 %arg30, i32 addrspace(1)* undef
361 store volatile i32 %arg31, i32 addrspace(1)* undef
366 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
368 ; GCN: s_mov_b32 s32, 0
369 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
372 ; GCN: .amdhsa_system_vgpr_workitem_id 0
373 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
374 call void @too_many_args_use_workitem_id_x(
375 i32 10, i32 20, i32 30, i32 40,
376 i32 50, i32 60, i32 70, i32 80,
377 i32 90, i32 100, i32 110, i32 120,
378 i32 130, i32 140, i32 150, i32 160,
379 i32 170, i32 180, i32 190, i32 200,
380 i32 210, i32 220, i32 230, i32 240,
381 i32 250, i32 260, i32 270, i32 280,
382 i32 290, i32 300, i32 310, i32 320)
386 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
387 ; GCN: s_mov_b32 s33, s32
388 ; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
390 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
391 store volatile i32 %arg0, i32 addrspace(1)* undef
392 call void @too_many_args_use_workitem_id_x(
393 i32 10, i32 20, i32 30, i32 40,
394 i32 50, i32 60, i32 70, i32 80,
395 i32 90, i32 100, i32 110, i32 120,
396 i32 130, i32 140, i32 150, i32 160,
397 i32 170, i32 180, i32 190, i32 200,
398 i32 210, i32 220, i32 230, i32 240,
399 i32 250, i32 260, i32 270, i32 280,
400 i32 290, i32 300, i32 310, i32 320)
404 ; Requires loading and storing to stack slot.
405 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
406 ; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
407 ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
408 ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
410 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
414 ; GCN: s_addk_i32 s32, 0xfc00{{$}}
415 ; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
417 define void @too_many_args_call_too_many_args_use_workitem_id_x(
418 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
419 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
420 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
421 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
422 call void @too_many_args_use_workitem_id_x(
423 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
424 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
425 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
426 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
431 ; frame[0] = byval arg32
432 ; frame[1] = stack passed workitem ID x
433 ; frame[2] = VGPR spill slot
435 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
436 ; GFX7: buffer_load_dword v32, off, s[0:3], s32 offset:4
437 ; GFX90A: buffer_load_dword v32, off, s[0:3], s32 offset:4
439 ; GFX7: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
440 ; GFX90A: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32,
441 ; GFX7: buffer_load_dword v0, off, s[0:3], s32 glc{{$}}
442 ; GFX90A: buffer_load_dword v0, off, s[0:3], s32 glc{{$}}
444 define void @too_many_args_use_workitem_id_x_byval(
445 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
446 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
447 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
448 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval(i32) %arg32) #1 {
449 %val = call i32 @llvm.amdgcn.workitem.id.x()
450 store volatile i32 %val, i32 addrspace(1)* undef
452 store volatile i32 %arg0, i32 addrspace(1)* undef
453 store volatile i32 %arg1, i32 addrspace(1)* undef
454 store volatile i32 %arg2, i32 addrspace(1)* undef
455 store volatile i32 %arg3, i32 addrspace(1)* undef
456 store volatile i32 %arg4, i32 addrspace(1)* undef
457 store volatile i32 %arg5, i32 addrspace(1)* undef
458 store volatile i32 %arg6, i32 addrspace(1)* undef
459 store volatile i32 %arg7, i32 addrspace(1)* undef
461 store volatile i32 %arg8, i32 addrspace(1)* undef
462 store volatile i32 %arg9, i32 addrspace(1)* undef
463 store volatile i32 %arg10, i32 addrspace(1)* undef
464 store volatile i32 %arg11, i32 addrspace(1)* undef
465 store volatile i32 %arg12, i32 addrspace(1)* undef
466 store volatile i32 %arg13, i32 addrspace(1)* undef
467 store volatile i32 %arg14, i32 addrspace(1)* undef
468 store volatile i32 %arg15, i32 addrspace(1)* undef
470 store volatile i32 %arg16, i32 addrspace(1)* undef
471 store volatile i32 %arg17, i32 addrspace(1)* undef
472 store volatile i32 %arg18, i32 addrspace(1)* undef
473 store volatile i32 %arg19, i32 addrspace(1)* undef
474 store volatile i32 %arg20, i32 addrspace(1)* undef
475 store volatile i32 %arg21, i32 addrspace(1)* undef
476 store volatile i32 %arg22, i32 addrspace(1)* undef
477 store volatile i32 %arg23, i32 addrspace(1)* undef
479 store volatile i32 %arg24, i32 addrspace(1)* undef
480 store volatile i32 %arg25, i32 addrspace(1)* undef
481 store volatile i32 %arg26, i32 addrspace(1)* undef
482 store volatile i32 %arg27, i32 addrspace(1)* undef
483 store volatile i32 %arg28, i32 addrspace(1)* undef
484 store volatile i32 %arg29, i32 addrspace(1)* undef
485 store volatile i32 %arg30, i32 addrspace(1)* undef
486 store volatile i32 %arg31, i32 addrspace(1)* undef
487 %private = load volatile i32, i32 addrspace(5)* %arg32
493 ; sp[2] = stack passed workitem ID x
495 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
496 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
498 ; GCN: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
499 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
500 ; GCN: s_movk_i32 s32, 0x400
501 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
503 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
504 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
507 ; GCN: .amdhsa_system_vgpr_workitem_id 0
508 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
509 %alloca = alloca i32, align 4, addrspace(5)
510 store volatile i32 999, i32 addrspace(5)* %alloca
511 call void @too_many_args_use_workitem_id_x_byval(
512 i32 10, i32 20, i32 30, i32 40,
513 i32 50, i32 60, i32 70, i32 80,
514 i32 90, i32 100, i32 110, i32 120,
515 i32 130, i32 140, i32 150, i32 160,
516 i32 170, i32 180, i32 190, i32 200,
517 i32 210, i32 220, i32 230, i32 240,
518 i32 250, i32 260, i32 270, i32 280,
519 i32 290, i32 300, i32 310, i32 320,
520 i32 addrspace(5)* byval(i32) %alloca)
524 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
525 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
526 ; GFX7: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
527 ; GFX90A: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
528 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
529 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
530 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
532 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
533 %alloca = alloca i32, align 4, addrspace(5)
534 store volatile i32 999, i32 addrspace(5)* %alloca
535 call void @too_many_args_use_workitem_id_x_byval(
536 i32 10, i32 20, i32 30, i32 40,
537 i32 50, i32 60, i32 70, i32 80,
538 i32 90, i32 100, i32 110, i32 120,
539 i32 130, i32 140, i32 150, i32 160,
540 i32 170, i32 180, i32 190, i32 200,
541 i32 210, i32 220, i32 230, i32 240,
542 i32 250, i32 260, i32 270, i32 280,
543 i32 290, i32 300, i32 310, i32 320,
544 i32 addrspace(5)* byval(i32) %alloca)
548 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
549 ; GFX90A: buffer_load_dword v32, off, s[0:3], s32{{$}}
550 ; GFX90A: v_and_b32_e32 v33, 0x3ff, v32
551 ; GFX90A: v_bfe_u32 v34, v32, 10, 10
552 ; GCN90A: v_bfe_u32 v32, v32, 20, 10
553 ; GFX7: buffer_load_dword v32, off, s[0:3], s32{{$}}
554 ; GFX7: v_and_b32_e32 v33, 0x3ff, v32
555 ; GFX7: v_bfe_u32 v33, v32, 10, 10
556 ; GCN7: v_bfe_u32 v32, v32, 20, 10
557 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v33{{$}}
558 ; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32{{$}}
559 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v33, off{{$}}
560 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v34, off{{$}}
561 ; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v32, off{{$}}
563 ; GFX7-COUNT-32: flat_store_dword v{{\[[0-9]+:[0-9]+]}}
564 ; GFX90A-COUNT-32: global_store_dword v{{\[[0-9]+:[0-9]+]}}
565 ; GCN-NEXT: s_waitcnt
566 ; GCN-NEXT: s_setpc_b64
567 define void @too_many_args_use_workitem_id_xyz(
568 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
569 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
570 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
571 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
572 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
573 store volatile i32 %val0, i32 addrspace(1)* undef
574 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
575 store volatile i32 %val1, i32 addrspace(1)* undef
576 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
577 store volatile i32 %val2, i32 addrspace(1)* undef
579 store volatile i32 %arg0, i32 addrspace(1)* undef
580 store volatile i32 %arg1, i32 addrspace(1)* undef
581 store volatile i32 %arg2, i32 addrspace(1)* undef
582 store volatile i32 %arg3, i32 addrspace(1)* undef
583 store volatile i32 %arg4, i32 addrspace(1)* undef
584 store volatile i32 %arg5, i32 addrspace(1)* undef
585 store volatile i32 %arg6, i32 addrspace(1)* undef
586 store volatile i32 %arg7, i32 addrspace(1)* undef
588 store volatile i32 %arg8, i32 addrspace(1)* undef
589 store volatile i32 %arg9, i32 addrspace(1)* undef
590 store volatile i32 %arg10, i32 addrspace(1)* undef
591 store volatile i32 %arg11, i32 addrspace(1)* undef
592 store volatile i32 %arg12, i32 addrspace(1)* undef
593 store volatile i32 %arg13, i32 addrspace(1)* undef
594 store volatile i32 %arg14, i32 addrspace(1)* undef
595 store volatile i32 %arg15, i32 addrspace(1)* undef
597 store volatile i32 %arg16, i32 addrspace(1)* undef
598 store volatile i32 %arg17, i32 addrspace(1)* undef
599 store volatile i32 %arg18, i32 addrspace(1)* undef
600 store volatile i32 %arg19, i32 addrspace(1)* undef
601 store volatile i32 %arg20, i32 addrspace(1)* undef
602 store volatile i32 %arg21, i32 addrspace(1)* undef
603 store volatile i32 %arg22, i32 addrspace(1)* undef
604 store volatile i32 %arg23, i32 addrspace(1)* undef
606 store volatile i32 %arg24, i32 addrspace(1)* undef
607 store volatile i32 %arg25, i32 addrspace(1)* undef
608 store volatile i32 %arg26, i32 addrspace(1)* undef
609 store volatile i32 %arg27, i32 addrspace(1)* undef
610 store volatile i32 %arg28, i32 addrspace(1)* undef
611 store volatile i32 %arg29, i32 addrspace(1)* undef
612 store volatile i32 %arg30, i32 addrspace(1)* undef
613 store volatile i32 %arg31, i32 addrspace(1)* undef
618 ; frame[0] = ID { Z, Y, X }
620 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
622 ; GCN-DAG: s_mov_b32 s32, 0
624 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
625 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
626 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
627 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v2
631 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
634 ; GCN: .amdhsa_system_vgpr_workitem_id 2
635 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
636 call void @too_many_args_use_workitem_id_xyz(
637 i32 10, i32 20, i32 30, i32 40,
638 i32 50, i32 60, i32 70, i32 80,
639 i32 90, i32 100, i32 110, i32 120,
640 i32 130, i32 140, i32 150, i32 160,
641 i32 170, i32 180, i32 190, i32 200,
642 i32 210, i32 220, i32 230, i32 240,
643 i32 250, i32 260, i32 270, i32 280,
644 i32 290, i32 300, i32 310, i32 320)
648 ; workitem ID X in register, yz on stack
649 ; v31 = workitem ID X
650 ; frame[0] = workitem { Z, Y, X }
652 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
653 ; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
654 ; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
655 ; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
656 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
657 ; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
658 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
660 ; GCN-COUNT-31: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}
661 ; GCN-NEXT: s_waitcnt
663 ; GCN: ScratchSize: 0
664 define void @too_many_args_use_workitem_id_x_stack_yz(
665 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
666 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
667 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
668 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
669 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
670 store volatile i32 %val0, i32 addrspace(1)* undef
671 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
672 store volatile i32 %val1, i32 addrspace(1)* undef
673 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
674 store volatile i32 %val2, i32 addrspace(1)* undef
676 store volatile i32 %arg0, i32 addrspace(1)* undef
677 store volatile i32 %arg1, i32 addrspace(1)* undef
678 store volatile i32 %arg2, i32 addrspace(1)* undef
679 store volatile i32 %arg3, i32 addrspace(1)* undef
680 store volatile i32 %arg4, i32 addrspace(1)* undef
681 store volatile i32 %arg5, i32 addrspace(1)* undef
682 store volatile i32 %arg6, i32 addrspace(1)* undef
683 store volatile i32 %arg7, i32 addrspace(1)* undef
685 store volatile i32 %arg8, i32 addrspace(1)* undef
686 store volatile i32 %arg9, i32 addrspace(1)* undef
687 store volatile i32 %arg10, i32 addrspace(1)* undef
688 store volatile i32 %arg11, i32 addrspace(1)* undef
689 store volatile i32 %arg12, i32 addrspace(1)* undef
690 store volatile i32 %arg13, i32 addrspace(1)* undef
691 store volatile i32 %arg14, i32 addrspace(1)* undef
692 store volatile i32 %arg15, i32 addrspace(1)* undef
694 store volatile i32 %arg16, i32 addrspace(1)* undef
695 store volatile i32 %arg17, i32 addrspace(1)* undef
696 store volatile i32 %arg18, i32 addrspace(1)* undef
697 store volatile i32 %arg19, i32 addrspace(1)* undef
698 store volatile i32 %arg20, i32 addrspace(1)* undef
699 store volatile i32 %arg21, i32 addrspace(1)* undef
700 store volatile i32 %arg22, i32 addrspace(1)* undef
701 store volatile i32 %arg23, i32 addrspace(1)* undef
703 store volatile i32 %arg24, i32 addrspace(1)* undef
704 store volatile i32 %arg25, i32 addrspace(1)* undef
705 store volatile i32 %arg26, i32 addrspace(1)* undef
706 store volatile i32 %arg27, i32 addrspace(1)* undef
707 store volatile i32 %arg28, i32 addrspace(1)* undef
708 store volatile i32 %arg29, i32 addrspace(1)* undef
709 store volatile i32 %arg30, i32 addrspace(1)* undef
714 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
717 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
718 ; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
719 ; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
720 ; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
721 ; PACKED-TID: v_mov_b32_e32 v31, v0
723 ; GCN: s_mov_b32 s32, 0
726 ; GCN: .amdhsa_system_vgpr_workitem_id 2
727 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
728 call void @too_many_args_use_workitem_id_x_stack_yz(
729 i32 10, i32 20, i32 30, i32 40,
730 i32 50, i32 60, i32 70, i32 80,
731 i32 90, i32 100, i32 110, i32 120,
732 i32 130, i32 140, i32 150, i32 160,
733 i32 170, i32 180, i32 190, i32 200,
734 i32 210, i32 220, i32 230, i32 240,
735 i32 250, i32 260, i32 270, i32 280,
736 i32 290, i32 300, i32 310)
740 declare i32 @llvm.amdgcn.workitem.id.x() #0
741 declare i32 @llvm.amdgcn.workitem.id.y() #0
742 declare i32 @llvm.amdgcn.workitem.id.z() #0
744 attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" }
745 attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }