1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
4 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
5 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
7 ; HSA: kernarg_segment_byte_size = 0
8 ; MESA: kernarg_segment_byte_size = 16
10 ; HSA: s_load_dword s0, s[4:5], 0x0
11 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
12 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
13 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
14 %load = load volatile i32, i32 addrspace(4)* %cast
18 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
19 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
21 ; HSA: kernarg_segment_byte_size = 48
22 ; MESA: kernarg_segment_byte_size = 16
24 ; HSA: s_load_dword s0, s[4:5], 0x0
25 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
26 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
27 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
28 %load = load volatile i32, i32 addrspace(4)* %cast
32 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
33 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
35 ; HSA: kernarg_segment_byte_size = 112
36 ; MESA: kernarg_segment_byte_size = 128
38 ; HSA: s_load_dword s0, s[4:5], 0x1c
39 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
40 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
41 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
42 %load = load volatile i32, i32 addrspace(4)* %cast
46 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
47 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
49 ; HSA: kernarg_segment_byte_size = 160
50 ; MESA: kernarg_segment_byte_size = 128
52 ; HSA: s_load_dword s0, s[4:5], 0x1c
53 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
54 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
55 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
56 %load = load volatile i32, i32 addrspace(4)* %cast
60 ; GCN-LABEL: {{^}}func_implicitarg_ptr:
62 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
64 ; GCN-NEXT: s_setpc_b64
65 define void @func_implicitarg_ptr() #0 {
66 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
67 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
68 %load = load volatile i32, i32 addrspace(4)* %cast
72 ; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
74 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
76 ; GCN-NEXT: s_setpc_b64
77 define void @opencl_func_implicitarg_ptr() #0 {
78 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
79 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
80 %load = load volatile i32, i32 addrspace(4)* %cast
84 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
85 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
86 ; HSA: kernarg_segment_byte_size = 0
87 ; MESA: kernarg_segment_byte_size = 16
92 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
93 call void @func_implicitarg_ptr()
97 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
98 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
99 ; HSA: kernarg_segment_byte_size = 48
100 ; MESA: kernarg_segment_byte_size = 16
105 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
106 call void @func_implicitarg_ptr()
110 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
111 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
112 ; HSA: kernarg_segment_byte_size = 112
113 ; MESA: kernarg_segment_byte_size = 128
115 ; HSA: s_add_u32 s4, s4, 0x70
116 ; MESA: s_add_u32 s4, s4, 0x70
118 ; GCN: s_addc_u32 s5, s5, 0{{$}}
120 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
121 call void @func_implicitarg_ptr()
125 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
126 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
127 ; HSA: kernarg_segment_byte_size = 160
128 ; MESA: kernarg_segment_byte_size = 128
130 ; GCN: s_add_u32 s4, s4, 0x70
131 ; GCN: s_addc_u32 s5, s5, 0{{$}}
133 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
134 call void @func_implicitarg_ptr()
138 ; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
142 define void @func_call_implicitarg_ptr_func() #0 {
143 call void @func_implicitarg_ptr()
147 ; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func:
151 define void @opencl_func_call_implicitarg_ptr_func() #0 {
152 call void @func_implicitarg_ptr()
156 ; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
158 ; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
159 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
160 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
161 ; GCN: s_waitcnt lgkmcnt(0)
162 define void @func_kernarg_implicitarg_ptr() #0 {
163 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
164 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
165 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
166 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
167 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
168 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
172 ; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
174 ; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
175 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
176 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
177 ; GCN: s_waitcnt lgkmcnt(0)
178 define void @opencl_func_kernarg_implicitarg_ptr() #0 {
179 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
180 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
181 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
182 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
183 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
184 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
188 ; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
189 ; GCN: s_add_u32 s4, s4, 0x70
190 ; GCN: s_addc_u32 s5, s5, 0
192 define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
193 call void @func_kernarg_implicitarg_ptr()
197 ; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
198 ; HSA: kernarg_segment_byte_size = 120
199 ; MESA: kernarg_segment_byte_size = 84
200 ; GCN: kernarg_segment_alignment = 6
201 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
202 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
203 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
204 %load = load volatile i32, i32 addrspace(4)* %cast
208 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
209 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
211 attributes #0 = { nounwind noinline }
212 attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
213 attributes #2 = { nounwind readnone speculatable }