1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3 ; GCN-LABEL: {{^}}use_workitem_id_x:
5 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
7 ; GCN-NEXT: s_setpc_b64
8 define void @use_workitem_id_x() #1 {
9 %val = call i32 @llvm.amdgcn.workitem.id.x()
10 store volatile i32 %val, i32 addrspace(1)* undef
14 ; GCN-LABEL: {{^}}use_workitem_id_y:
16 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
18 ; GCN-NEXT: s_setpc_b64
19 define void @use_workitem_id_y() #1 {
20 %val = call i32 @llvm.amdgcn.workitem.id.y()
21 store volatile i32 %val, i32 addrspace(1)* undef
25 ; GCN-LABEL: {{^}}use_workitem_id_z:
27 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
29 ; GCN-NEXT: s_setpc_b64
30 define void @use_workitem_id_z() #1 {
31 %val = call i32 @llvm.amdgcn.workitem.id.z()
32 store volatile i32 %val, i32 addrspace(1)* undef
36 ; GCN-LABEL: {{^}}use_workitem_id_xy:
38 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
41 ; GCN-NEXT: s_setpc_b64
42 define void @use_workitem_id_xy() #1 {
43 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
44 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
45 store volatile i32 %val0, i32 addrspace(1)* undef
46 store volatile i32 %val1, i32 addrspace(1)* undef
50 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
52 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
53 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
54 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v2
56 ; GCN-NEXT: s_setpc_b64
57 define void @use_workitem_id_xyz() #1 {
58 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
59 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
60 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
61 store volatile i32 %val0, i32 addrspace(1)* undef
62 store volatile i32 %val1, i32 addrspace(1)* undef
63 store volatile i32 %val2, i32 addrspace(1)* undef
67 ; GCN-LABEL: {{^}}use_workitem_id_xz:
69 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
70 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
72 ; GCN-NEXT: s_setpc_b64
73 define void @use_workitem_id_xz() #1 {
74 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
75 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
76 store volatile i32 %val0, i32 addrspace(1)* undef
77 store volatile i32 %val1, i32 addrspace(1)* undef
81 ; GCN-LABEL: {{^}}use_workitem_id_yz:
83 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
84 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
86 ; GCN-NEXT: s_setpc_b64
87 define void @use_workitem_id_yz() #1 {
88 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
89 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
90 store volatile i32 %val0, i32 addrspace(1)* undef
91 store volatile i32 %val1, i32 addrspace(1)* undef
95 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
96 ; GCN: enable_vgpr_workitem_id = 0
101 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
102 call void @use_workitem_id_x()
106 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
107 ; GCN: enable_vgpr_workitem_id = 1
111 ; GCN: v_mov_b32_e32 v0, v1
115 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
116 call void @use_workitem_id_y()
120 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
121 ; GCN: enable_vgpr_workitem_id = 2
125 ; GCN: v_mov_b32_e32 v0, v2
129 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
130 call void @use_workitem_id_z()
134 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
138 define void @func_indirect_use_workitem_id_x() #1 {
139 call void @use_workitem_id_x()
143 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
147 define void @func_indirect_use_workitem_id_y() #1 {
148 call void @use_workitem_id_y()
152 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
156 define void @func_indirect_use_workitem_id_z() #1 {
157 call void @use_workitem_id_z()
161 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
163 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
164 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
165 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
166 %val = call i32 @llvm.amdgcn.workitem.id.x()
167 store volatile i32 %arg0, i32 addrspace(1)* undef
168 store volatile i32 %val, i32 addrspace(1)* undef
172 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
174 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
175 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
176 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
177 %val = call i32 @llvm.amdgcn.workitem.id.y()
178 store volatile i32 %arg0, i32 addrspace(1)* undef
179 store volatile i32 %val, i32 addrspace(1)* undef
183 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
185 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
186 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
187 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
188 %val = call i32 @llvm.amdgcn.workitem.id.z()
189 store volatile i32 %arg0, i32 addrspace(1)* undef
190 store volatile i32 %val, i32 addrspace(1)* undef
195 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
196 ; GCN: enable_vgpr_workitem_id = 0
198 ; GCN: v_mov_b32_e32 v1, v0
199 ; GCN: v_mov_b32_e32 v0, 0x22b
201 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
202 call void @other_arg_use_workitem_id_x(i32 555)
207 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
208 ; GCN: enable_vgpr_workitem_id = 1
211 ; GCN: v_mov_b32_e32 v0, 0x22b
215 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
216 call void @other_arg_use_workitem_id_y(i32 555)
220 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
221 ; GCN: enable_vgpr_workitem_id = 2
223 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
224 ; GCN-DAG: v_mov_b32_e32 v1, v2
227 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
228 call void @other_arg_use_workitem_id_z(i32 555)
232 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
233 ; GCN: s_mov_b32 s5, s32
234 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
235 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
236 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
238 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
239 ; GCN-NEXT: s_waitcnt
240 ; GCN-NEXT: s_setpc_b64
241 define void @too_many_args_use_workitem_id_x(
242 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
243 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
244 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
245 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
246 %val = call i32 @llvm.amdgcn.workitem.id.x()
247 store volatile i32 %val, i32 addrspace(1)* undef
249 store volatile i32 %arg0, i32 addrspace(1)* undef
250 store volatile i32 %arg1, i32 addrspace(1)* undef
251 store volatile i32 %arg2, i32 addrspace(1)* undef
252 store volatile i32 %arg3, i32 addrspace(1)* undef
253 store volatile i32 %arg4, i32 addrspace(1)* undef
254 store volatile i32 %arg5, i32 addrspace(1)* undef
255 store volatile i32 %arg6, i32 addrspace(1)* undef
256 store volatile i32 %arg7, i32 addrspace(1)* undef
258 store volatile i32 %arg8, i32 addrspace(1)* undef
259 store volatile i32 %arg9, i32 addrspace(1)* undef
260 store volatile i32 %arg10, i32 addrspace(1)* undef
261 store volatile i32 %arg11, i32 addrspace(1)* undef
262 store volatile i32 %arg12, i32 addrspace(1)* undef
263 store volatile i32 %arg13, i32 addrspace(1)* undef
264 store volatile i32 %arg14, i32 addrspace(1)* undef
265 store volatile i32 %arg15, i32 addrspace(1)* undef
267 store volatile i32 %arg16, i32 addrspace(1)* undef
268 store volatile i32 %arg17, i32 addrspace(1)* undef
269 store volatile i32 %arg18, i32 addrspace(1)* undef
270 store volatile i32 %arg19, i32 addrspace(1)* undef
271 store volatile i32 %arg20, i32 addrspace(1)* undef
272 store volatile i32 %arg21, i32 addrspace(1)* undef
273 store volatile i32 %arg22, i32 addrspace(1)* undef
274 store volatile i32 %arg23, i32 addrspace(1)* undef
276 store volatile i32 %arg24, i32 addrspace(1)* undef
277 store volatile i32 %arg25, i32 addrspace(1)* undef
278 store volatile i32 %arg26, i32 addrspace(1)* undef
279 store volatile i32 %arg27, i32 addrspace(1)* undef
280 store volatile i32 %arg28, i32 addrspace(1)* undef
281 store volatile i32 %arg29, i32 addrspace(1)* undef
282 store volatile i32 %arg30, i32 addrspace(1)* undef
283 store volatile i32 %arg31, i32 addrspace(1)* undef
288 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
289 ; GCN: enable_vgpr_workitem_id = 0
291 ; GCN: s_mov_b32 s33, s7
292 ; GCN: s_mov_b32 s32, s33
293 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
294 ; GCN: s_mov_b32 s4, s33
296 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
297 call void @too_many_args_use_workitem_id_x(
298 i32 10, i32 20, i32 30, i32 40,
299 i32 50, i32 60, i32 70, i32 80,
300 i32 90, i32 100, i32 110, i32 120,
301 i32 130, i32 140, i32 150, i32 160,
302 i32 170, i32 180, i32 190, i32 200,
303 i32 210, i32 220, i32 230, i32 240,
304 i32 250, i32 260, i32 270, i32 280,
305 i32 290, i32 300, i32 310, i32 320)
309 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
310 ; GCN: s_mov_b32 s5, s32
311 ; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:
313 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
314 store volatile i32 %arg0, i32 addrspace(1)* undef
315 call void @too_many_args_use_workitem_id_x(
316 i32 10, i32 20, i32 30, i32 40,
317 i32 50, i32 60, i32 70, i32 80,
318 i32 90, i32 100, i32 110, i32 120,
319 i32 130, i32 140, i32 150, i32 160,
320 i32 170, i32 180, i32 190, i32 200,
321 i32 210, i32 220, i32 230, i32 240,
322 i32 250, i32 260, i32 270, i32 280,
323 i32 290, i32 300, i32 310, i32 320)
327 ; Requires loading and storing to stack slot.
328 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
329 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
330 ; GCN: s_add_u32 s32, s32, 0x400{{$}}
331 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
333 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4{{$}}
337 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
338 ; GCN: s_sub_u32 s32, s32, 0x400{{$}}
340 define void @too_many_args_call_too_many_args_use_workitem_id_x(
341 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
342 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
343 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
344 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
345 call void @too_many_args_use_workitem_id_x(
346 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
347 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
348 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
349 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
354 ; frame[0] = emergency stack slot
355 ; frame[1] = byval arg32
356 ; frame[2] = stack passed workitem ID x
357 ; frame[3] = VGPR spill slot
359 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
360 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
361 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
362 ; GCN-NEXT: s_waitcnt
363 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
364 ; GCN: buffer_load_dword v0, off, s[0:3], s5 offset:4
365 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
367 define void @too_many_args_use_workitem_id_x_byval(
368 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
369 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
370 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
371 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 {
372 %val = call i32 @llvm.amdgcn.workitem.id.x()
373 store volatile i32 %val, i32 addrspace(1)* undef
375 store volatile i32 %arg0, i32 addrspace(1)* undef
376 store volatile i32 %arg1, i32 addrspace(1)* undef
377 store volatile i32 %arg2, i32 addrspace(1)* undef
378 store volatile i32 %arg3, i32 addrspace(1)* undef
379 store volatile i32 %arg4, i32 addrspace(1)* undef
380 store volatile i32 %arg5, i32 addrspace(1)* undef
381 store volatile i32 %arg6, i32 addrspace(1)* undef
382 store volatile i32 %arg7, i32 addrspace(1)* undef
384 store volatile i32 %arg8, i32 addrspace(1)* undef
385 store volatile i32 %arg9, i32 addrspace(1)* undef
386 store volatile i32 %arg10, i32 addrspace(1)* undef
387 store volatile i32 %arg11, i32 addrspace(1)* undef
388 store volatile i32 %arg12, i32 addrspace(1)* undef
389 store volatile i32 %arg13, i32 addrspace(1)* undef
390 store volatile i32 %arg14, i32 addrspace(1)* undef
391 store volatile i32 %arg15, i32 addrspace(1)* undef
393 store volatile i32 %arg16, i32 addrspace(1)* undef
394 store volatile i32 %arg17, i32 addrspace(1)* undef
395 store volatile i32 %arg18, i32 addrspace(1)* undef
396 store volatile i32 %arg19, i32 addrspace(1)* undef
397 store volatile i32 %arg20, i32 addrspace(1)* undef
398 store volatile i32 %arg21, i32 addrspace(1)* undef
399 store volatile i32 %arg22, i32 addrspace(1)* undef
400 store volatile i32 %arg23, i32 addrspace(1)* undef
402 store volatile i32 %arg24, i32 addrspace(1)* undef
403 store volatile i32 %arg25, i32 addrspace(1)* undef
404 store volatile i32 %arg26, i32 addrspace(1)* undef
405 store volatile i32 %arg27, i32 addrspace(1)* undef
406 store volatile i32 %arg28, i32 addrspace(1)* undef
407 store volatile i32 %arg29, i32 addrspace(1)* undef
408 store volatile i32 %arg30, i32 addrspace(1)* undef
409 store volatile i32 %arg31, i32 addrspace(1)* undef
410 %private = load volatile i32, i32 addrspace(5)* %arg32
414 ; frame[0] = emergency stack slot
417 ; sp[0] = callee emergency stack slot reservation
420 ; sp[3] = stack passed workitem ID x
422 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
423 ; GCN: enable_vgpr_workitem_id = 0
425 ; GCN: s_mov_b32 s33, s7
426 ; GCN: s_add_u32 s32, s33, 0x400{{$}}
429 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
430 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
431 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
433 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
434 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
435 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
437 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
438 %alloca = alloca i32, align 4, addrspace(5)
439 store volatile i32 999, i32 addrspace(5)* %alloca
440 call void @too_many_args_use_workitem_id_x_byval(
441 i32 10, i32 20, i32 30, i32 40,
442 i32 50, i32 60, i32 70, i32 80,
443 i32 90, i32 100, i32 110, i32 120,
444 i32 130, i32 140, i32 150, i32 160,
445 i32 170, i32 180, i32 190, i32 200,
446 i32 210, i32 220, i32 230, i32 240,
447 i32 250, i32 260, i32 270, i32 280,
448 i32 290, i32 300, i32 310, i32 320,
449 i32 addrspace(5)* %alloca)
453 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
454 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
455 ; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4
456 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
458 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s5 offset:4
459 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
460 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
462 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
463 %alloca = alloca i32, align 4, addrspace(5)
464 store volatile i32 999, i32 addrspace(5)* %alloca
465 call void @too_many_args_use_workitem_id_x_byval(
466 i32 10, i32 20, i32 30, i32 40,
467 i32 50, i32 60, i32 70, i32 80,
468 i32 90, i32 100, i32 110, i32 120,
469 i32 130, i32 140, i32 150, i32 160,
470 i32 170, i32 180, i32 190, i32 200,
471 i32 210, i32 220, i32 230, i32 240,
472 i32 250, i32 260, i32 270, i32 280,
473 i32 290, i32 300, i32 310, i32 320,
474 i32 addrspace(5)* %alloca)
478 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
479 ; GCN: s_mov_b32 s5, s32
480 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
481 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
482 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
483 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8{{$}}
484 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
485 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12{{$}}
486 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
488 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
489 ; GCN-NEXT: s_waitcnt
490 ; GCN-NEXT: s_setpc_b64
491 define void @too_many_args_use_workitem_id_xyz(
492 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
493 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
494 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
495 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
496 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
497 store volatile i32 %val0, i32 addrspace(1)* undef
498 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
499 store volatile i32 %val1, i32 addrspace(1)* undef
500 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
501 store volatile i32 %val2, i32 addrspace(1)* undef
503 store volatile i32 %arg0, i32 addrspace(1)* undef
504 store volatile i32 %arg1, i32 addrspace(1)* undef
505 store volatile i32 %arg2, i32 addrspace(1)* undef
506 store volatile i32 %arg3, i32 addrspace(1)* undef
507 store volatile i32 %arg4, i32 addrspace(1)* undef
508 store volatile i32 %arg5, i32 addrspace(1)* undef
509 store volatile i32 %arg6, i32 addrspace(1)* undef
510 store volatile i32 %arg7, i32 addrspace(1)* undef
512 store volatile i32 %arg8, i32 addrspace(1)* undef
513 store volatile i32 %arg9, i32 addrspace(1)* undef
514 store volatile i32 %arg10, i32 addrspace(1)* undef
515 store volatile i32 %arg11, i32 addrspace(1)* undef
516 store volatile i32 %arg12, i32 addrspace(1)* undef
517 store volatile i32 %arg13, i32 addrspace(1)* undef
518 store volatile i32 %arg14, i32 addrspace(1)* undef
519 store volatile i32 %arg15, i32 addrspace(1)* undef
521 store volatile i32 %arg16, i32 addrspace(1)* undef
522 store volatile i32 %arg17, i32 addrspace(1)* undef
523 store volatile i32 %arg18, i32 addrspace(1)* undef
524 store volatile i32 %arg19, i32 addrspace(1)* undef
525 store volatile i32 %arg20, i32 addrspace(1)* undef
526 store volatile i32 %arg21, i32 addrspace(1)* undef
527 store volatile i32 %arg22, i32 addrspace(1)* undef
528 store volatile i32 %arg23, i32 addrspace(1)* undef
530 store volatile i32 %arg24, i32 addrspace(1)* undef
531 store volatile i32 %arg25, i32 addrspace(1)* undef
532 store volatile i32 %arg26, i32 addrspace(1)* undef
533 store volatile i32 %arg27, i32 addrspace(1)* undef
534 store volatile i32 %arg28, i32 addrspace(1)* undef
535 store volatile i32 %arg29, i32 addrspace(1)* undef
536 store volatile i32 %arg30, i32 addrspace(1)* undef
537 store volatile i32 %arg31, i32 addrspace(1)* undef
542 ; frame[0] = callee emergency stack slot
547 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
548 ; GCN: enable_vgpr_workitem_id = 2
550 ; GCN: s_mov_b32 s33, s7
551 ; GCN: s_mov_b32 s32, s33
553 ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:4
554 ; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8
555 ; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12
557 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
558 call void @too_many_args_use_workitem_id_xyz(
559 i32 10, i32 20, i32 30, i32 40,
560 i32 50, i32 60, i32 70, i32 80,
561 i32 90, i32 100, i32 110, i32 120,
562 i32 130, i32 140, i32 150, i32 160,
563 i32 170, i32 180, i32 190, i32 200,
564 i32 210, i32 220, i32 230, i32 240,
565 i32 250, i32 260, i32 270, i32 280,
566 i32 290, i32 300, i32 310, i32 320)
570 ; workitem ID X in register, yz on stack
571 ; v31 = workitem ID X
572 ; frame[0] = emergency slot
573 ; frame[1] = workitem Y
574 ; frame[2] = workitem Z
576 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
577 ; GCN: s_mov_b32 s5, s32
578 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
579 ; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:4{{$}}
580 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
581 ; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:8{{$}}
582 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
585 ; GCN-NEXT: s_setpc_b64
586 ; GCN: ScratchSize: 12
587 define void @too_many_args_use_workitem_id_x_stack_yz(
588 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
589 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
590 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
591 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
592 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
593 store volatile i32 %val0, i32 addrspace(1)* undef
594 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
595 store volatile i32 %val1, i32 addrspace(1)* undef
596 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
597 store volatile i32 %val2, i32 addrspace(1)* undef
599 store volatile i32 %arg0, i32 addrspace(1)* undef
600 store volatile i32 %arg1, i32 addrspace(1)* undef
601 store volatile i32 %arg2, i32 addrspace(1)* undef
602 store volatile i32 %arg3, i32 addrspace(1)* undef
603 store volatile i32 %arg4, i32 addrspace(1)* undef
604 store volatile i32 %arg5, i32 addrspace(1)* undef
605 store volatile i32 %arg6, i32 addrspace(1)* undef
606 store volatile i32 %arg7, i32 addrspace(1)* undef
608 store volatile i32 %arg8, i32 addrspace(1)* undef
609 store volatile i32 %arg9, i32 addrspace(1)* undef
610 store volatile i32 %arg10, i32 addrspace(1)* undef
611 store volatile i32 %arg11, i32 addrspace(1)* undef
612 store volatile i32 %arg12, i32 addrspace(1)* undef
613 store volatile i32 %arg13, i32 addrspace(1)* undef
614 store volatile i32 %arg14, i32 addrspace(1)* undef
615 store volatile i32 %arg15, i32 addrspace(1)* undef
617 store volatile i32 %arg16, i32 addrspace(1)* undef
618 store volatile i32 %arg17, i32 addrspace(1)* undef
619 store volatile i32 %arg18, i32 addrspace(1)* undef
620 store volatile i32 %arg19, i32 addrspace(1)* undef
621 store volatile i32 %arg20, i32 addrspace(1)* undef
622 store volatile i32 %arg21, i32 addrspace(1)* undef
623 store volatile i32 %arg22, i32 addrspace(1)* undef
624 store volatile i32 %arg23, i32 addrspace(1)* undef
626 store volatile i32 %arg24, i32 addrspace(1)* undef
627 store volatile i32 %arg25, i32 addrspace(1)* undef
628 store volatile i32 %arg26, i32 addrspace(1)* undef
629 store volatile i32 %arg27, i32 addrspace(1)* undef
630 store volatile i32 %arg28, i32 addrspace(1)* undef
631 store volatile i32 %arg29, i32 addrspace(1)* undef
632 store volatile i32 %arg30, i32 addrspace(1)* undef
637 ; frame[0] = callee emergency stack slot
641 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
642 ; GCN: enable_vgpr_workitem_id = 2
644 ; GCN: s_mov_b32 s33, s7
645 ; GCN: s_mov_b32 s32, s33
647 ; GCN-DAG: v_mov_b32_e32 v31, v0
648 ; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:4
649 ; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:8
651 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
652 call void @too_many_args_use_workitem_id_x_stack_yz(
653 i32 10, i32 20, i32 30, i32 40,
654 i32 50, i32 60, i32 70, i32 80,
655 i32 90, i32 100, i32 110, i32 120,
656 i32 130, i32 140, i32 150, i32 160,
657 i32 170, i32 180, i32 190, i32 200,
658 i32 210, i32 220, i32 230, i32 240,
659 i32 250, i32 260, i32 270, i32 280,
660 i32 290, i32 300, i32 310)
664 declare i32 @llvm.amdgcn.workitem.id.x() #0
665 declare i32 @llvm.amdgcn.workitem.id.y() #0
666 declare i32 @llvm.amdgcn.workitem.id.z() #0
668 attributes #0 = { nounwind readnone speculatable }
669 attributes #1 = { nounwind noinline }