1 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
2 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s
3 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s
5 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
7 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
8 ; MESA: kernarg_segment_byte_size = 16
9 ; MESA: kernarg_segment_alignment = 4
11 ; HSA: s_load_dword s0, s[4:5], 0x0
13 ; COV4: .amdhsa_kernarg_size 56
14 ; COV5: .amdhsa_kernarg_size 256
15 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
16 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
17 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
21 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit:
22 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
23 ; MESA: kernarg_segment_byte_size = 16
24 ; MESA: kernarg_segment_alignment = 4
26 ; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27 ; HSA: s_load_dword s0, [[NULL]], 0x0
29 ; MESA: s_load_dword s0, s[4:5], 0x0
31 ; COV4: .amdhsa_kernarg_size 0
32 ; COV5: .amdhsa_kernarg_size 0
33 define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
34 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
35 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
39 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
41 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
42 ; MESA: kernarg_segment_byte_size = 16
43 ; MESA: kernarg_segment_alignment = 4
45 ; HSA: s_load_dword s0, s[4:5], 0x0
47 ; HSA: .amdhsa_kernarg_size 48
48 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
49 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
50 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
54 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
56 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
57 ; MESA: kernarg_segment_byte_size = 128
58 ; MESA: kernarg_segment_alignment = 4
60 ; HSA: s_load_dword s0, s[4:5], 0x1c
62 ; COV4: .amdhsa_kernarg_size 168
63 ; COV5: .amdhsa_kernarg_size 368
64 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
65 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
66 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
70 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
72 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
73 ; MESA: kernarg_segment_byte_size = 128
74 ; MESA: kernarg_segment_alignment = 4
76 ; HSA: s_load_dword s0, s[4:5], 0x1c
78 ; HSA: .amdhsa_kernarg_size 160
79 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
80 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
81 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
85 ; GCN-LABEL: {{^}}func_implicitarg_ptr:
87 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
89 ; GCN-NEXT: s_setpc_b64
90 define void @func_implicitarg_ptr() #0 {
91 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
92 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
96 ; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
98 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
100 ; GCN-NEXT: s_setpc_b64
101 define void @opencl_func_implicitarg_ptr() #0 {
102 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
103 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
107 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
109 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
110 ; MESA: kernarg_segment_byte_size = 16
111 ; MESA: kernarg_segment_alignment = 4
113 ; GCN: s_mov_b64 s[8:9], s[4:5]
116 ; COV4: .amdhsa_kernarg_size 56
117 ; COV5: .amdhsa_kernarg_size 256
118 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
119 call void @func_implicitarg_ptr()
123 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0:
124 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
125 ; MESA: kernarg_segment_byte_size = 16
126 ; MESA: kernarg_segment_alignment = 4
128 ; HSA: s_mov_b64 s[8:9], 0{{$}}
129 ; MESA: s_mov_b64 s[8:9], s[4:5]{{$}}
132 ; HSA: .amdhsa_kernarg_size 0
133 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
134 call void @func_implicitarg_ptr()
138 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
139 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
140 ; MESA: kernarg_segment_byte_size = 16
141 ; GCN: s_mov_b64 s[8:9], s[4:5]
146 ; HSA: .amdhsa_kernarg_size 48
147 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
148 call void @func_implicitarg_ptr()
152 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
153 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
154 ; MESA: kernarg_segment_byte_size = 128
155 ; MESA: kernarg_segment_alignment = 4
157 ; HSA: s_add_u32 s8, s4, 0x70
158 ; MESA: s_add_u32 s8, s4, 0x70
160 ; GCN: s_addc_u32 s9, s5, 0{{$}}
163 ; COV4: .amdhsa_kernarg_size 168
164 ; COV5: .amdhsa_kernarg_size 368
165 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
166 call void @func_implicitarg_ptr()
170 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
171 ; MESA: enable_sgpr_kernarg_segment_ptr = 1
172 ; MESA: kernarg_segment_byte_size = 128
173 ; MESA: kernarg_segment_alignment = 4
175 ; GCN: s_add_u32 s8, s4, 0x70
176 ; GCN: s_addc_u32 s9, s5, 0{{$}}
179 ; HSA: .amdhsa_kernarg_size 160
180 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
181 call void @func_implicitarg_ptr()
185 ; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
190 ; GCN: s_setpc_b64 s[30:31]
191 define void @func_call_implicitarg_ptr_func() #0 {
192 call void @func_implicitarg_ptr()
196 ; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func:
201 ; GCN: s_setpc_b64 s[30:31]
202 define void @opencl_func_call_implicitarg_ptr_func() #0 {
203 call void @func_implicitarg_ptr()
207 ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
209 ; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
210 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
211 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
212 ; GCN: s_waitcnt lgkmcnt(0)
213 define void @func_kernarg_implicitarg_ptr() #0 {
214 %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
215 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
216 %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
217 %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
221 ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
223 ; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
224 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
225 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
226 ; GCN: s_waitcnt lgkmcnt(0)
227 define void @opencl_func_kernarg_implicitarg_ptr() #0 {
228 %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
229 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
230 %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
231 %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
235 ; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
236 ; GCN: s_add_u32 s8, s4, 0x70
237 ; GCN: s_addc_u32 s9, s5, 0
239 define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
240 call void @func_kernarg_implicitarg_ptr()
244 ; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
245 ; MESA: kernarg_segment_byte_size = 84
246 ; MESA: kernarg_segment_alignment = 6
248 ; HSA: .amdhsa_kernarg_size 120
249 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
250 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
251 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
255 ; HSA-LABEL: amdhsa.kernels:
256 ; HSA: .kernarg_segment_align: 8
257 ; COV5-NEXT: .kernarg_segment_size: 256
258 ; COV4-NEXT: .kernarg_segment_size: 56
259 ; HSA-LABEL: .name: kernel_implicitarg_ptr_empty
261 ; HSA: .kernarg_segment_align: 4
262 ; HSA-NEXT: .kernarg_segment_size: 0
263 ; HSA-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit
265 ; HSA: .kernarg_segment_align: 8
266 ; HSA-NEXT: .kernarg_segment_size: 48
267 ; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr_empty
269 ; HSA: .kernarg_segment_align: 8
270 ; COV5-NEXT: .kernarg_segment_size: 368
271 ; COV4-NEXT: .kernarg_segment_size: 168
272 ; HSA-LABEL: .name: kernel_implicitarg_ptr
274 ; HSA: .kernarg_segment_align: 8
275 ; HSA-NEXT: .kernarg_segment_size: 160
276 ; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr
278 ; HSA: .kernarg_segment_align: 8
279 ; COV5-NEXT: .kernarg_segment_size: 256
280 ; COV4-NEXT: .kernarg_segment_size: 56
281 ; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty
283 ; HSA: .kernarg_segment_align: 4
284 ; HSA-NEXT: .kernarg_segment_size: 0
285 ; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0
287 ; HSA: .kernarg_segment_align: 8
288 ; HSA-NEXT: .kernarg_segment_size: 48
289 ; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty
291 ; HSA: .kernarg_segment_align: 8
292 ; COV5-NEXT: .kernarg_segment_size: 368
293 ; COV4-NEXT: .kernarg_segment_size: 168
294 ; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func
296 ; HSA: .kernarg_segment_align: 8
297 ; HSA-NEXT: .kernarg_segment_size: 160
298 ; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func
300 ; HSA: .kernarg_segment_align: 8
301 ; COV5-NEXT: .kernarg_segment_size: 368
302 ; COV4-NEXT: .kernarg_segment_size: 168
303 ; HSA-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func
305 ; HSA: .kernarg_segment_align: 64
306 ; HSA-NEXT: .kernarg_segment_size: 120
307 ; HSA-LABEL: .name: kernel_implicitarg_no_struct_align_padding
309 declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
310 declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
312 attributes #0 = { nounwind noinline }
313 attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
314 attributes #2 = { nounwind readnone speculatable }
315 attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
317 !llvm.module.flags = !{!0}
318 !0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}