1 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor -o %t.bc %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %t.bc | llvm-readelf --notes - | FileCheck %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.bc | FileCheck --check-prefix=CHECK %s
5 declare void @function1()
7 declare void @function2() #0
9 ; Function Attrs: noinline
10 define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
11 store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
15 ; Function Attrs: noinline
16 define void @function4(i64 %arg, ptr %a) #2 {
17 store i64 %arg, ptr %a
21 ; Function Attrs: noinline
22 define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
23 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 64
24 %x = load i64, ptr addrspace(4) %gep
25 store i64 %x, ptr %sink
29 ; Function Attrs: nounwind readnone speculatable willreturn
30 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
32 ; CHECK: amdhsa.kernels:
34 ; CHECK-NOT: hidden_heap_v1
35 ; CHECK-LABEL: .name: test_kernel10
36 define amdgpu_kernel void @test_kernel10(ptr %a) {
37 store i8 3, ptr %a, align 1
41 ; Call to an extern function
44 ; CHECK: hidden_heap_v1
45 ; CHECK-LABEL: .name: test_kernel20
46 define amdgpu_kernel void @test_kernel20(ptr %a) {
47 call void @function1()
48 store i8 3, ptr %a, align 1
52 ; Explicit attribute on kernel
55 ; CHECK-NOT: hidden_heap_v1
56 ; CHECK-LABEL: .name: test_kernel21
57 define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
58 call void @function1()
59 store i8 3, ptr %a, align 1
63 ; Explicit attribute on extern callee
66 ; CHECK-NOT: hidden_heap_v1
67 ; CHECK-LABEL: .name: test_kernel22
68 define amdgpu_kernel void @test_kernel22(ptr %a) {
69 call void @function2()
70 store i8 3, ptr %a, align 1
74 ; Access more bytes than the pointer size
77 ; CHECK: hidden_heap_v1
78 ; CHECK-LABEL: .name: test_kernel30
79 define amdgpu_kernel void @test_kernel30(ptr %a) {
80 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
81 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
82 %x = load i128, ptr addrspace(4) %gep
87 ; Typical load of heap buffer pointer
90 ; CHECK: hidden_heap_v1
91 ; CHECK-LABEL: .name: test_kernel40
92 define amdgpu_kernel void @test_kernel40(ptr %a) {
93 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
94 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
95 %x = load i64, ptr addrspace(4) %gep
100 ; Typical usage, overriden by explicit attribute on kernel
103 ; CHECK-NOT: hidden_heap_v1
104 ; CHECK-LABEL: .name: test_kernel41
105 define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
106 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
107 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
108 %x = load i64, ptr addrspace(4) %gep
113 ; Access to implicit arg before the heap pointer
116 ; CHECK-NOT: hidden_heap_v1
117 ; CHECK-LABEL: .name: test_kernel42
118 define amdgpu_kernel void @test_kernel42(ptr %a) {
119 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
120 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
121 %x = load i64, ptr addrspace(4) %gep
126 ; Access to implicit arg after the heap pointer
129 ; CHECK-NOT: hidden_heap_v1
130 ; CHECK-LABEL: .name: test_kernel43
131 define amdgpu_kernel void @test_kernel43(ptr %a) {
132 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
133 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 104
134 %x = load i64, ptr addrspace(4) %gep
139 ; Accessing a byte just before the heap pointer
142 ; CHECK-NOT: hidden_heap_v1
143 ; CHECK-LABEL: .name: test_kernel44
144 define amdgpu_kernel void @test_kernel44(ptr %a) {
145 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
146 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 95
147 %x = load i8, ptr addrspace(4) %gep, align 1
148 store i8 %x, ptr %a, align 1
152 ; Accessing a byte inside the heap pointer
155 ; CHECK: hidden_heap_v1
156 ; CHECK-LABEL: .name: test_kernel45
157 define amdgpu_kernel void @test_kernel45(ptr %a) {
158 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
159 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
160 %x = load i8, ptr addrspace(4) %gep, align 1
161 store i8 %x, ptr %a, align 1
165 ; Accessing a byte inside the heap pointer
168 ; CHECK: hidden_heap_v1
169 ; CHECK-LABEL: .name: test_kernel46
170 define amdgpu_kernel void @test_kernel46(ptr %a) {
171 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
172 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 103
173 %x = load i8, ptr addrspace(4) %gep, align 1
174 store i8 %x, ptr %a, align 1
178 ; Accessing a byte just after the heap pointer
181 ; CHECK-NOT: hidden_heap_v1
182 ; CHECK-LABEL: .name: test_kernel47
183 define amdgpu_kernel void @test_kernel47(ptr %a) {
184 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
185 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 104
186 %x = load i8, ptr addrspace(4) %gep, align 1
187 store i8 %x, ptr %a, align 1
191 ; Access with an unknown offset
194 ; CHECK: hidden_heap_v1
195 ; CHECK-LABEL: .name: test_kernel50
196 define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) {
197 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
198 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 %b
199 %x = load i8, ptr addrspace(4) %gep, align 1
200 store i8 %x, ptr %a, align 1
204 ; Multiple geps reaching the heap pointer argument.
207 ; CHECK: hidden_heap_v1
208 ; CHECK-LABEL: .name: test_kernel51
209 define amdgpu_kernel void @test_kernel51(ptr %a) {
210 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
211 %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
212 %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 80
213 %x = load i8, ptr addrspace(4) %gep2, align 1
214 store i8 %x, ptr %a, align 1
218 ; Multiple geps not reaching the heap pointer argument.
221 ; CHECK-NOT: hidden_heap_v1
222 ; CHECK-LABEL: .name: test_kernel52
223 define amdgpu_kernel void @test_kernel52(ptr %a) {
224 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
225 %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
226 %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 16
227 %x = load i8, ptr addrspace(4) %gep2, align 1
228 store i8 %x, ptr %a, align 1
232 ; Heap pointer used inside a function call
235 ; CHECK: hidden_heap_v1
236 ; CHECK-LABEL: .name: test_kernel60
237 define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
238 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
239 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
240 %x = load i64, ptr addrspace(4) %gep
241 call void @function4(i64 %x, ptr %a)
245 ; Heap pointer retrieved inside a function call; chain of geps
248 ; CHECK: hidden_heap_v1
249 ; CHECK-LABEL: .name: test_kernel61
250 define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
251 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
252 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
253 call void @function5(ptr addrspace(4) %gep, ptr %a)
260 ; CHECK: hidden_heap_v1
261 ; CHECK-LABEL: .name: test_kernel70
262 define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
263 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
264 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
265 store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
269 ; Pointer captured inside function call
272 ; CHECK: hidden_heap_v1
273 ; CHECK-LABEL: .name: test_kernel71
274 define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
275 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
276 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
277 call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
281 ; Ineffective pointer capture
284 ; CHECK-NOT: hidden_heap_v1
285 ; CHECK-LABEL: .name: test_kernel72
286 define amdgpu_kernel void @test_kernel72() #2 {
287 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
288 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
289 store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
293 attributes #0 = { "amdgpu-no-heap-ptr" }
294 attributes #1 = { nounwind readnone speculatable willreturn }
295 attributes #2 = { noinline }
297 !llvm.module.flags = !{!0}
298 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}