1 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s
2 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s
4 ; Does not apply to wave64
5 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s
6 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s
8 ; Does not apply to gfx1101
9 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s
10 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s
12 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s
13 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s
15 ; Does not apply to gfx1103
16 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s
17 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s
19 ; There aren't any stack objects, but we still enable the
20 ; private_segment_wavefront_offset to get to 16, and the workgroup ID
23 ; private_segment_buffer + workgroup_id_x = 5, + 11 padding
25 ; GCN-LABEL: {{^}}minimal_kernel_inputs:
26 ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
27 ; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s0
28 ; GCN-NEXT: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V]], off
30 ; GCN: .amdhsa_kernel minimal_kernel_inputs
31 ; WORKAROUND: .amdhsa_user_sgpr_count 15
32 ; NOWORKAROUND: .amdhsa_user_sgpr_count 0
33 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
34 ; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
35 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
36 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
37 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
38 ; GCN-NEXT: .amdhsa_wavefront_size32
39 ; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
40 ; GCN-NEXT: .amdhsa_enable_private_segment 0
41 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
42 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
43 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
44 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
45 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
46 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15
47 ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 0
48 define amdgpu_kernel void @minimal_kernel_inputs() #0 {
49 %id = call i32 @llvm.amdgcn.workgroup.id.x()
50 store volatile i32 %id, ptr addrspace(1) undef
54 ; GCN-LABEL: {{^}}minimal_kernel_inputs_with_stack:
55 ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
56 ; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s0
57 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V]], off
59 ; GCN: .amdhsa_kernel minimal_kernel_inputs
60 ; WORKAROUND: .amdhsa_user_sgpr_count 15
61 ; NOWORKAROUND: .amdhsa_user_sgpr_count 0
62 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
63 ; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
64 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
65 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
66 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
67 ; GCN-NEXT: .amdhsa_wavefront_size32
68 ; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
69 ; GCN-NEXT: .amdhsa_enable_private_segment 1
70 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
71 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
72 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
73 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
74 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
75 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15
76 ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 0
77 define amdgpu_kernel void @minimal_kernel_inputs_with_stack() #0 {
78 %alloca = alloca i32, addrspace(5)
79 %id = call i32 @llvm.amdgcn.workgroup.id.x()
80 store volatile i32 %id, ptr addrspace(1) undef
81 store volatile i32 0, ptr addrspace(5) %alloca
85 ; GCN-LABEL: {{^}}queue_ptr:
86 ; GCN: global_load_u8 v{{[0-9]+}},
88 ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
89 ; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s4
90 ; GCN-NEXT: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V]], off
92 ; GCN: .amdhsa_kernel queue_ptr
93 ; WORKAROUND: .amdhsa_user_sgpr_count 15
94 ; NOWORKAROUND: .amdhsa_user_sgpr_count 4
95 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
96 ; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
97 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
98 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
99 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
100 ; GCN-NEXT: .amdhsa_wavefront_size32
101 ; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
102 ; GCN-NEXT: .amdhsa_enable_private_segment 0
103 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
104 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
105 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
106 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
107 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
108 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15
109 ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 4
110 define amdgpu_kernel void @queue_ptr() #1 {
111 %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
112 %load = load volatile i8, ptr addrspace(4) %queue.ptr
113 %id = call i32 @llvm.amdgcn.workgroup.id.x()
114 store volatile i32 %id, ptr addrspace(1) undef
118 ; GCN-LABEL: {{^}}all_inputs:
119 ; WORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s13
120 ; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14
121 ; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15
123 ; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8
124 ; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9
125 ; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10
127 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
128 ; GCN: global_load_u8 v{{[0-9]+}},
129 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5]
131 ; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6
132 ; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7
134 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off
135 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off
136 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Z]], off
137 ; GCN: global_store_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[DISPATCH_LO]]:[[DISPATCH_HI]]{{\]}}, off
139 ; GCN: .amdhsa_kernel all_inputs
140 ; WORKAROUND: .amdhsa_user_sgpr_count 13
141 ; NOWORKAROUND: .amdhsa_user_sgpr_count 8
142 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
143 ; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
144 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
145 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
146 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
147 ; GCN-NEXT: .amdhsa_wavefront_size32
148 ; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
149 ; GCN-NEXT: .amdhsa_enable_private_segment 1
150 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
151 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
152 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
153 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
154 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
155 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13
156 ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8
157 define amdgpu_kernel void @all_inputs() #2 {
158 %alloca = alloca i32, addrspace(5)
159 store volatile i32 0, ptr addrspace(5) %alloca
161 %dispatch.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
162 %load.dispatch = load volatile i8, ptr addrspace(4) %dispatch.ptr
164 %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
165 %load.queue = load volatile i8, ptr addrspace(4) %queue.ptr
167 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
168 %load.implicitarg = load volatile i8, ptr addrspace(4) %implicitarg.ptr
170 %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
171 store volatile i32 %id.x, ptr addrspace(1) undef
173 %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
174 store volatile i32 %id.y, ptr addrspace(1) undef
176 %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
177 store volatile i32 %id.z, ptr addrspace(1) undef
179 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
180 store volatile i64 %dispatch.id, ptr addrspace(1) undef
185 declare i32 @llvm.amdgcn.workgroup.id.x() #3
186 declare i32 @llvm.amdgcn.workgroup.id.y() #3
187 declare i32 @llvm.amdgcn.workgroup.id.z() #3
188 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
189 declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #3
190 declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr() #3
191 declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #3
192 declare i64 @llvm.amdgcn.dispatch.id() #3
194 attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
195 attributes #1 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
196 attributes #2 = { "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
197 attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
199 !llvm.module.flags = !{!0}
200 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}