1 ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
2 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
3 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s
4 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s
6 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
8 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
9 ; MESA: kernarg_segment_byte_size = 16
10 ; MESA: kernarg_segment_alignment = 4
12 ; HSA: s_load_dword s0, s[4:5], 0x0
14 ; COV4: .amdhsa_kernarg_size 56
15 ; COV5: .amdhsa_kernarg_size 256
16 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
17 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
18 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
22 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit:
23 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
24 ; MESA: kernarg_segment_byte_size = 16
25 ; MESA: kernarg_segment_alignment = 4
27 ; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}}
28 ; HSA: s_load_dword s0, [[NULL]], 0x0
30 ; MESA: s_load_dword s0, s[4:5], 0x0
32 ; COV4: .amdhsa_kernarg_size 0
33 ; COV5: .amdhsa_kernarg_size 0
34 define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
35 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
36 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
40 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
42 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
43 ; MESA: kernarg_segment_byte_size = 16
44 ; MESA: kernarg_segment_alignment = 4
46 ; HSA: s_load_dword s0, s[4:5], 0x0
48 ; HSA: .amdhsa_kernarg_size 48
49 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
50 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
51 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
55 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
57 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
58 ; MESA: kernarg_segment_byte_size = 128
59 ; MESA: kernarg_segment_alignment = 4
61 ; HSA: s_load_dword s0, s[4:5], 0x1c
63 ; COV4: .amdhsa_kernarg_size 168
64 ; COV5: .amdhsa_kernarg_size 368
65 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
66 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
67 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
71 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
73 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
74 ; MESA: kernarg_segment_byte_size = 128
75 ; MESA: kernarg_segment_alignment = 4
77 ; HSA: s_load_dword s0, s[4:5], 0x1c
79 ; HSA: .amdhsa_kernarg_size 160
80 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
81 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
82 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
86 ; GCN-LABEL: {{^}}func_implicitarg_ptr:
88 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
90 ; GCN-NEXT: s_setpc_b64
91 define void @func_implicitarg_ptr() #0 {
92 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
93 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
97 ; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
99 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
100 ; GCN-NEXT: s_waitcnt
101 ; GCN-NEXT: s_setpc_b64
102 define void @opencl_func_implicitarg_ptr() #0 {
103 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
104 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
108 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
110 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
111 ; MESA: kernarg_segment_byte_size = 16
112 ; MESA: kernarg_segment_alignment = 4
114 ; GCN: s_mov_b64 s[8:9], s[4:5]
117 ; COV4: .amdhsa_kernarg_size 56
118 ; COV5: .amdhsa_kernarg_size 256
119 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
120 call void @func_implicitarg_ptr()
124 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0:
125 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
126 ; MESA: kernarg_segment_byte_size = 16
127 ; MESA: kernarg_segment_alignment = 4
129 ; HSA: s_mov_b64 s[8:9], 0{{$}}
130 ; MESA: s_mov_b64 s[8:9], s[4:5]{{$}}
133 ; HSA: .amdhsa_kernarg_size 0
134 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
135 call void @func_implicitarg_ptr()
139 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
140 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
141 ; MESA: kernarg_segment_byte_size = 16
142 ; GCN: s_mov_b64 s[8:9], s[4:5]
147 ; HSA: .amdhsa_kernarg_size 48
148 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
149 call void @func_implicitarg_ptr()
153 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
154 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
155 ; MESA: kernarg_segment_byte_size = 128
156 ; MESA: kernarg_segment_alignment = 4
158 ; HSA: s_add_u32 s8, s4, 0x70
159 ; MESA: s_add_u32 s8, s4, 0x70
161 ; GCN: s_addc_u32 s9, s5, 0{{$}}
164 ; COV4: .amdhsa_kernarg_size 168
165 ; COV5: .amdhsa_kernarg_size 368
166 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
167 call void @func_implicitarg_ptr()
171 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
172 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
173 ; MESA: kernarg_segment_byte_size = 128
174 ; MESA: kernarg_segment_alignment = 4
176 ; GCN: s_add_u32 s8, s4, 0x70
177 ; GCN: s_addc_u32 s9, s5, 0{{$}}
180 ; HSA: .amdhsa_kernarg_size 160
181 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
182 call void @func_implicitarg_ptr()
186 ; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
191 ; GCN: s_setpc_b64 s[30:31]
192 define void @func_call_implicitarg_ptr_func() #0 {
193 call void @func_implicitarg_ptr()
197 ; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func:
202 ; GCN: s_setpc_b64 s[30:31]
203 define void @opencl_func_call_implicitarg_ptr_func() #0 {
204 call void @func_implicitarg_ptr()
208 ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
210 ; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
211 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
212 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
213 ; GCN: s_waitcnt lgkmcnt(0)
214 define void @func_kernarg_implicitarg_ptr() #0 {
215 %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
216 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
217 %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
218 %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
222 ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
224 ; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
225 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
226 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
227 ; GCN: s_waitcnt lgkmcnt(0)
228 define void @opencl_func_kernarg_implicitarg_ptr() #0 {
229 %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
230 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
231 %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
232 %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
236 ; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
237 ; GCN: s_add_u32 s8, s4, 0x70
238 ; GCN: s_addc_u32 s9, s5, 0
240 define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
241 call void @func_kernarg_implicitarg_ptr()
245 ; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
246 ; MESA: kernarg_segment_byte_size = 84
247 ; MESA: kernarg_segment_alignment = 6
249 ; HSA: .amdhsa_kernarg_size 120
250 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
251 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
252 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
256 ; HSA-LABEL: amdhsa.kernels:
257 ; HSA: .kernarg_segment_align: 8
258 ; COV5-NEXT: .kernarg_segment_size: 256
259 ; COV4-NEXT: .kernarg_segment_size: 56
260 ; HSA-LABEL: .name: kernel_implicitarg_ptr_empty
262 ; HSA: .kernarg_segment_align: 4
263 ; HSA-NEXT: .kernarg_segment_size: 0
264 ; HSA-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit
266 ; HSA: .kernarg_segment_align: 8
267 ; HSA-NEXT: .kernarg_segment_size: 48
268 ; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr_empty
270 ; HSA: .kernarg_segment_align: 8
271 ; COV5-NEXT: .kernarg_segment_size: 368
272 ; COV4-NEXT: .kernarg_segment_size: 168
273 ; HSA-LABEL: .name: kernel_implicitarg_ptr
275 ; HSA: .kernarg_segment_align: 8
276 ; HSA-NEXT: .kernarg_segment_size: 160
277 ; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr
279 ; HSA: .kernarg_segment_align: 8
280 ; COV5-NEXT: .kernarg_segment_size: 256
281 ; COV4-NEXT: .kernarg_segment_size: 56
282 ; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty
284 ; HSA: .kernarg_segment_align: 4
285 ; HSA-NEXT: .kernarg_segment_size: 0
286 ; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0
288 ; HSA: .kernarg_segment_align: 8
289 ; HSA-NEXT: .kernarg_segment_size: 48
290 ; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty
292 ; HSA: .kernarg_segment_align: 8
293 ; COV5-NEXT: .kernarg_segment_size: 368
294 ; COV4-NEXT: .kernarg_segment_size: 168
295 ; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func
297 ; HSA: .kernarg_segment_align: 8
298 ; HSA-NEXT: .kernarg_segment_size: 160
299 ; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func
301 ; HSA: .kernarg_segment_align: 8
302 ; COV5-NEXT: .kernarg_segment_size: 368
303 ; COV4-NEXT: .kernarg_segment_size: 168
304 ; HSA-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func
306 ; HSA: .kernarg_segment_align: 64
307 ; HSA-NEXT: .kernarg_segment_size: 120
308 ; HSA-LABEL: .name: kernel_implicitarg_no_struct_align_padding
310 declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
311 declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
313 attributes #0 = { nounwind noinline "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" }
314 attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" }
315 attributes #2 = { nounwind readnone speculatable }
316 attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" }
318 !llvm.module.flags = !{!0}
319 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}