1 ; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
2 ; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4 ; GCN-LABEL: {{^}}use_dispatch_ptr:
5 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
6 define hidden void @use_dispatch_ptr() #1 {
7 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
8 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
9 %value = load volatile i32, i32 addrspace(4)* %header_ptr
13 ; GCN-LABEL: {{^}}use_queue_ptr:
14 ; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
15 define hidden void @use_queue_ptr() #1 {
16 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
17 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
18 %value = load volatile i32, i32 addrspace(4)* %header_ptr
22 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
23 ; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0
24 ; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
25 define hidden void @use_kernarg_segment_ptr() #1 {
26 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
27 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
28 %value = load volatile i32, i32 addrspace(4)* %header_ptr
32 ; GCN-LABEL: {{^}}use_implicitarg_ptr:
33 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
34 define hidden void @use_implicitarg_ptr() #1 {
35 %implicit.arg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
36 %header_ptr = bitcast i8 addrspace(4)* %implicit.arg.ptr to i32 addrspace(4)*
37 %value = load volatile i32, i32 addrspace(4)* %header_ptr
41 ; GCN-LABEL: {{^}}use_dispatch_id:
43 define hidden void @use_dispatch_id() #1 {
44 %id = call i64 @llvm.amdgcn.dispatch.id()
45 call void asm sideeffect "; use $0", "s"(i64 %id)
48 ; GCN-LABEL: {{^}}use_workgroup_id_x:
51 define hidden void @use_workgroup_id_x() #1 {
52 %val = call i32 @llvm.amdgcn.workgroup.id.x()
53 call void asm sideeffect "; use $0", "s"(i32 %val)
57 ; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
60 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
63 define hidden void @use_stack_workgroup_id_x() #1 {
64 %alloca = alloca i32, addrspace(5)
65 store volatile i32 0, i32 addrspace(5)* %alloca
66 %val = call i32 @llvm.amdgcn.workgroup.id.x()
67 call void asm sideeffect "; use $0", "s"(i32 %val)
71 ; GCN-LABEL: {{^}}use_workgroup_id_y:
74 define hidden void @use_workgroup_id_y() #1 {
75 %val = call i32 @llvm.amdgcn.workgroup.id.y()
76 call void asm sideeffect "; use $0", "s"(i32 %val)
80 ; GCN-LABEL: {{^}}use_workgroup_id_z:
83 define hidden void @use_workgroup_id_z() #1 {
84 %val = call i32 @llvm.amdgcn.workgroup.id.z()
85 call void asm sideeffect "; use $0", "s"(i32 %val)
89 ; GCN-LABEL: {{^}}use_workgroup_id_xy:
92 define hidden void @use_workgroup_id_xy() #1 {
93 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
94 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
95 call void asm sideeffect "; use $0", "s"(i32 %val0)
96 call void asm sideeffect "; use $0", "s"(i32 %val1)
100 ; GCN-LABEL: {{^}}use_workgroup_id_xyz:
104 define hidden void @use_workgroup_id_xyz() #1 {
105 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
106 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
107 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
108 call void asm sideeffect "; use $0", "s"(i32 %val0)
109 call void asm sideeffect "; use $0", "s"(i32 %val1)
110 call void asm sideeffect "; use $0", "s"(i32 %val2)
114 ; GCN-LABEL: {{^}}use_workgroup_id_xz:
117 define hidden void @use_workgroup_id_xz() #1 {
118 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
119 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
120 call void asm sideeffect "; use $0", "s"(i32 %val0)
121 call void asm sideeffect "; use $0", "s"(i32 %val1)
125 ; GCN-LABEL: {{^}}use_workgroup_id_yz:
128 define hidden void @use_workgroup_id_yz() #1 {
129 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
130 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
131 call void asm sideeffect "; use $0", "s"(i32 %val0)
132 call void asm sideeffect "; use $0", "s"(i32 %val1)
136 ; Argument is in right place already
137 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
141 ; GCN: v_readlane_b32 s4, v40, 0
142 define hidden void @func_indirect_use_workgroup_id_x() #1 {
143 call void @use_workgroup_id_x()
147 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
149 ; GCN: v_readlane_b32 s4, v40, 0
150 define hidden void @func_indirect_use_workgroup_id_y() #1 {
151 call void @use_workgroup_id_y()
155 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
157 ; GCN: v_readlane_b32 s4, v40, 0
158 define hidden void @func_indirect_use_workgroup_id_z() #1 {
159 call void @use_workgroup_id_z()
163 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
164 ; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
165 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
167 define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
168 %val = call i32 @llvm.amdgcn.workgroup.id.x()
169 store volatile i32 %arg0, i32 addrspace(1)* undef
170 call void asm sideeffect "; use $0", "s"(i32 %val)
174 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
175 ; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
176 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
178 define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
179 %val = call i32 @llvm.amdgcn.workgroup.id.y()
180 store volatile i32 %arg0, i32 addrspace(1)* undef
181 call void asm sideeffect "; use $0", "s"(i32 %val)
185 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
186 ; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
187 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
189 define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
190 %val = call i32 @llvm.amdgcn.workgroup.id.z()
191 store volatile i32 %arg0, i32 addrspace(1)* undef
192 call void asm sideeffect "; use $0", "s"(i32 %val)
196 ; GCN-LABEL: {{^}}use_every_sgpr_input:
197 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
198 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
199 ; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
200 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
201 ; GCN: ; use s[10:11]
205 define hidden void @use_every_sgpr_input() #1 {
206 %alloca = alloca i32, align 4, addrspace(5)
207 store volatile i32 0, i32 addrspace(5)* %alloca
209 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
210 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
211 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
213 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
214 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
215 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
217 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
218 %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
219 %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
221 %val3 = call i64 @llvm.amdgcn.dispatch.id()
222 call void asm sideeffect "; use $0", "s"(i64 %val3)
224 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
225 call void asm sideeffect "; use $0", "s"(i32 %val4)
227 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
228 call void asm sideeffect "; use $0", "s"(i32 %val5)
230 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
231 call void asm sideeffect "; use $0", "s"(i32 %val6)
236 ; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
237 ; GCN: s_mov_b32 s12, s14
238 ; GCN: s_mov_b32 s13, s15
239 ; GCN: s_mov_b32 s14, s16
240 ; GCN: s_mov_b32 s32, 0
243 ; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
244 ; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
245 ; GCN: .amdhsa_user_sgpr_queue_ptr 1
246 ; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
247 ; GCN: .amdhsa_user_sgpr_dispatch_id 1
248 ; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
249 ; GCN: .amdhsa_user_sgpr_private_segment_size 0
250 ; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
251 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
252 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
253 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
254 ; GCN: .amdhsa_system_sgpr_workgroup_info 0
255 ; GCN: .amdhsa_system_vgpr_workitem_id 2
256 define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
257 call void @use_every_sgpr_input()
261 ; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
275 ; GCN: s_or_saveexec_b64 s[16:17], -1
276 define hidden void @func_indirect_use_every_sgpr_input() #1 {
277 call void @use_every_sgpr_input()
281 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
285 ; GCN: ; use s[10:11]
291 define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
292 %alloca = alloca i32, align 4, addrspace(5)
293 store volatile i32 0, i32 addrspace(5)* %alloca
295 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
296 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
297 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
299 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
300 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
301 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
303 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
304 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
305 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
307 %val3 = call i64 @llvm.amdgcn.dispatch.id()
308 call void asm sideeffect "; use $0", "s"(i64 %val3)
310 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
311 call void asm sideeffect "; use $0", "s"(i32 %val4)
313 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
314 call void asm sideeffect "; use $0", "s"(i32 %val5)
316 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
317 call void asm sideeffect "; use $0", "s"(i32 %val6)
319 call void @use_workgroup_id_xyz()
323 declare i32 @llvm.amdgcn.workgroup.id.x() #0
324 declare i32 @llvm.amdgcn.workgroup.id.y() #0
325 declare i32 @llvm.amdgcn.workgroup.id.z() #0
326 declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
327 declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
328 declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
329 declare i64 @llvm.amdgcn.dispatch.id() #0
330 declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
332 attributes #0 = { nounwind readnone speculatable }
333 attributes #1 = { nounwind noinline }