1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
4 declare void @function1()
6 declare void @function2() #0
8 ; Function Attrs: noinline
9 define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 {
10 store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
14 ; Function Attrs: noinline
15 define void @function4(i64 %arg, i64* %a) #2 {
16 store i64 %arg, i64* %a
20 ; Function Attrs: noinline
21 define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 {
22 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 168
23 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
24 %x = load i64, i64 addrspace(4)* %cast
25 store i64 %x, i64* %sink
29 ; Function Attrs: nounwind readnone speculatable willreturn
30 declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
32 ; CHECK: amdhsa.kernels:
34 ; CHECK-NOT: hidden_queue_ptr
35 ; CHECK-LABEL: .name: test_kernel10
36 define amdgpu_kernel void @test_kernel10(i8* %a) {
37 store i8 3, i8* %a, align 1
41 ; Call to an extern function
44 ; CHECK: hidden_queue_ptr
45 ; CHECK-LABEL: .name: test_kernel20
46 define amdgpu_kernel void @test_kernel20(i8* %a) {
47 call void @function1()
48 store i8 3, i8* %a, align 1
52 ; Explicit attribute on kernel
55 ; CHECK-NOT: hidden_queue_ptr
56 ; CHECK-LABEL: .name: test_kernel21
57 define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
58 call void @function1()
59 store i8 3, i8* %a, align 1
63 ; Explicit attribute on extern callee
66 ; CHECK-NOT: hidden_queue_ptr
67 ; CHECK-LABEL: .name: test_kernel22
68 define amdgpu_kernel void @test_kernel22(i8* %a) {
69 call void @function2()
70 store i8 3, i8* %a, align 1
74 ; Access more bytes than the pointer size
77 ; CHECK: hidden_queue_ptr
78 ; CHECK-LABEL: .name: test_kernel30
79 define amdgpu_kernel void @test_kernel30(i128* %a) {
80 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
81 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192
82 %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
83 %x = load i128, i128 addrspace(4)* %cast
84 store i128 %x, i128* %a
88 ; Typical load of queue pointer
91 ; CHECK: hidden_queue_ptr
92 ; CHECK-LABEL: .name: test_kernel40
93 define amdgpu_kernel void @test_kernel40(i64* %a) {
94 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
95 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
96 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
97 %x = load i64, i64 addrspace(4)* %cast
102 ; Typical usage, overriden by explicit attribute on kernel
105 ; CHECK-NOT: hidden_queue_ptr
106 ; CHECK-LABEL: .name: test_kernel41
107 define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
108 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
109 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
110 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
111 %x = load i64, i64 addrspace(4)* %cast
112 store i64 %x, i64* %a
116 ; Access to implicit arg before the queue pointer
119 ; CHECK-NOT: hidden_queue_ptr
120 ; CHECK-LABEL: .name: test_kernel42
121 define amdgpu_kernel void @test_kernel42(i64* %a) {
122 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
123 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192
124 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
125 %x = load i64, i64 addrspace(4)* %cast
126 store i64 %x, i64* %a
130 ; Access to implicit arg after the queue pointer
133 ; CHECK-NOT: hidden_queue_ptr
134 ; CHECK-LABEL: .name: test_kernel43
135 define amdgpu_kernel void @test_kernel43(i64* %a) {
136 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
137 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208
138 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
139 %x = load i64, i64 addrspace(4)* %cast
140 store i64 %x, i64* %a
144 ; Accessing a byte just before the queue pointer
147 ; CHECK-NOT: hidden_queue_ptr
148 ; CHECK-LABEL: .name: test_kernel44
149 define amdgpu_kernel void @test_kernel44(i8* %a) {
150 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
151 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 199
152 %x = load i8, i8 addrspace(4)* %gep, align 1
153 store i8 %x, i8* %a, align 1
157 ; Accessing a byte inside the queue pointer
160 ; CHECK: hidden_queue_ptr
161 ; CHECK-LABEL: .name: test_kernel45
162 define amdgpu_kernel void @test_kernel45(i8* %a) {
163 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
164 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
165 %x = load i8, i8 addrspace(4)* %gep, align 1
166 store i8 %x, i8* %a, align 1
170 ; Accessing a byte inside the queue pointer
173 ; CHECK: hidden_queue_ptr
174 ; CHECK-LABEL: .name: test_kernel46
175 define amdgpu_kernel void @test_kernel46(i8* %a) {
176 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
177 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 207
178 %x = load i8, i8 addrspace(4)* %gep, align 1
179 store i8 %x, i8* %a, align 1
183 ; Accessing a byte just after the queue pointer
186 ; CHECK-NOT: hidden_queue_ptr
187 ; CHECK-LABEL: .name: test_kernel47
188 define amdgpu_kernel void @test_kernel47(i8* %a) {
189 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
190 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208
191 %x = load i8, i8 addrspace(4)* %gep, align 1
192 store i8 %x, i8* %a, align 1
196 ; Access with an unknown offset
199 ; CHECK: hidden_queue_ptr
200 ; CHECK-LABEL: .name: test_kernel50
201 define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
202 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
203 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
204 %x = load i8, i8 addrspace(4)* %gep, align 1
205 store i8 %x, i8* %a, align 1
209 ; Multiple geps reaching the queue pointer argument.
212 ; CHECK: hidden_queue_ptr
213 ; CHECK-LABEL: .name: test_kernel51
214 define amdgpu_kernel void @test_kernel51(i8* %a) {
215 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
216 %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
217 %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 184
218 %x = load i8, i8 addrspace(4)* %gep2, align 1
219 store i8 %x, i8* %a, align 1
223 ; Multiple geps not reaching the queue pointer argument.
226 ; CHECK-NOT: hidden_queue_ptr
227 ; CHECK-LABEL: .name: test_kernel52
228 define amdgpu_kernel void @test_kernel52(i8* %a) {
229 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
230 %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
231 %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
232 %x = load i8, i8 addrspace(4)* %gep2, align 1
233 store i8 %x, i8* %a, align 1
237 ; Queue pointer used inside a function call
240 ; CHECK: hidden_queue_ptr
241 ; CHECK-LABEL: .name: test_kernel60
242 define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
243 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
244 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
245 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
246 %x = load i64, i64 addrspace(4)* %cast
247 call void @function4(i64 %x, i64* %a)
251 ; Queue pointer retrieved inside a function call; chain of geps
254 ; CHECK: hidden_queue_ptr
255 ; CHECK-LABEL: .name: test_kernel61
256 define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
257 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
258 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
259 call void @function5(i8 addrspace(4)* %gep, i64* %a)
266 ; CHECK: hidden_queue_ptr
267 ; CHECK-LABEL: .name: test_kernel70
268 define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
269 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
270 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
271 store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
275 ; Pointer captured inside function call
278 ; CHECK: hidden_queue_ptr
279 ; CHECK-LABEL: .name: test_kernel71
280 define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
281 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
282 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
283 call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
287 ; Ineffective pointer capture
290 ; CHECK-NOT: hidden_queue_ptr
291 ; CHECK-LABEL: .name: test_kernel72
292 define amdgpu_kernel void @test_kernel72() #2 {
293 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
294 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
295 store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
299 attributes #0 = { "amdgpu-no-queue-ptr" }
300 attributes #1 = { nounwind readnone speculatable willreturn }
301 attributes #2 = { noinline }