1 ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,ALL %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s
5 ; ALL-LABEL: {{^}}test:
6 ; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
7 ; HSA: kernarg_segment_byte_size = 8
8 ; HSA: kernarg_segment_alignment = 4
10 ; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
12 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
13 define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
14 %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
15 %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
16 %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
17 %value = load i32, i32 addrspace(4)* %gep
18 store i32 %value, i32 addrspace(1)* %out
22 ; ALL-LABEL: {{^}}test_implicit:
23 ; HSA: kernarg_segment_byte_size = 8
24 ; OS-MESA3D: kernarg_segment_byte_size = 24
25 ; CO-V2: kernarg_segment_alignment = 4
27 ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
29 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15
30 define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
31 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
32 %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
33 %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
34 %value = load i32, i32 addrspace(4)* %gep
35 store i32 %value, i32 addrspace(1)* %out
39 ; ALL-LABEL: {{^}}test_implicit_alignment:
40 ; HSA: kernarg_segment_byte_size = 12
41 ; OS-MESA3D: kernarg_segment_byte_size = 28
42 ; CO-V2: kernarg_segment_alignment = 4
45 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
46 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
47 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
48 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
49 ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
50 define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 {
51 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
52 %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
53 %val = load i32, i32 addrspace(4)* %arg.ptr
54 store i32 %val, i32 addrspace(1)* %out
58 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment
59 ; HSA: kernarg_segment_byte_size = 64
60 ; OS-MESA3D: kernarg_segment_byte_size = 28
61 ; CO-V2: kernarg_segment_alignment = 4
64 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
65 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
66 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
67 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
68 ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
69 define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #2 {
70 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
71 %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
72 %val = load i32, i32 addrspace(4)* %arg.ptr
73 store i32 %val, i32 addrspace(1)* %out
77 ; ALL-LABEL: {{^}}test_no_kernargs:
78 ; CO-V2: enable_sgpr_kernarg_segment_ptr = 0
79 ; CO-V2: kernarg_segment_byte_size = 0
80 ; CO-V2: kernarg_segment_alignment = 4
82 ; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}}
83 ; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]]
84 define amdgpu_kernel void @test_no_kernargs() #1 {
85 %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
86 %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
87 %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
88 %value = load i32, i32 addrspace(4)* %gep
89 store volatile i32 %value, i32 addrspace(1)* undef
93 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
94 ; HSA: kernarg_segment_byte_size = 48
95 ; OS-MESA3D: kernarg_segment_byte_size = 16
96 ; CO-V2: kernarg_segment_alignment = 4
97 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
98 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
99 %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
100 %val = load volatile i32, i32 addrspace(4)* %arg.ptr
101 store volatile i32 %val, i32 addrspace(1)* null
105 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
106 ; HSA: kernarg_segment_byte_size = 40
107 ; OS-MESA3D: kernarg_segment_byte_size = 16
108 ; CO-V2: kernarg_segment_alignment = 4
109 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
110 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
111 %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
112 %val = load volatile i32, i32 addrspace(4)* %arg.ptr
113 store volatile i32 %val, i32 addrspace(1)* null
117 ; ALL-LABEL: {{^}}func_kernarg_segment_ptr:
118 ; ALL: v_mov_b32_e32 v0, 0{{$}}
119 ; ALL: v_mov_b32_e32 v1, 0{{$}}
120 define i8 addrspace(4)* @func_kernarg_segment_ptr() {
121 %ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
122 ret i8 addrspace(4)* %ptr
125 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
126 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
128 attributes #0 = { nounwind readnone }
129 attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
130 attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
131 attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }