1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s
4 declare void @function1()
6 declare void @function2() #0
8 ; Function Attrs: noinline
9 define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
10 store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
14 ; Function Attrs: noinline
15 define void @function4(i64 %arg, ptr %a) #2 {
16 store i64 %arg, ptr %a
20 ; Function Attrs: noinline
21 define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
22 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 72
23 %x = load i64, ptr addrspace(4) %gep
24 store i64 %x, ptr %sink
28 ; Function Attrs: nounwind readnone speculatable willreturn
29 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
31 ; CHECK: amdhsa.kernels:
33 ; CHECK-NOT: hidden_multigrid_sync_arg
34 ; CHECK-LABEL: .name: test_kernel10
35 define amdgpu_kernel void @test_kernel10(ptr %a) {
36 store i8 3, ptr %a, align 1
40 ; Call to an extern function
43 ; CHECK: hidden_multigrid_sync_arg
44 ; CHECK-LABEL: .name: test_kernel20
45 define amdgpu_kernel void @test_kernel20(ptr %a) {
46 call void @function1()
47 store i8 3, ptr %a, align 1
51 ; Explicit attribute on kernel
54 ; CHECK-NOT: hidden_multigrid_sync_arg
55 ; CHECK-LABEL: .name: test_kernel21
56 define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
57 call void @function1()
58 store i8 3, ptr %a, align 1
62 ; Explicit attribute on extern callee
65 ; CHECK-NOT: hidden_multigrid_sync_arg
66 ; CHECK-LABEL: .name: test_kernel22
67 define amdgpu_kernel void @test_kernel22(ptr %a) {
68 call void @function2()
69 store i8 3, ptr %a, align 1
73 ; Access more bytes than the pointer size
76 ; CHECK: hidden_multigrid_sync_arg
77 ; CHECK-LABEL: .name: test_kernel30
78 define amdgpu_kernel void @test_kernel30(ptr %a) {
79 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
80 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 80
81 %x = load i128, ptr addrspace(4) %gep
86 ; Typical load of multigrid sync arg pointer
89 ; CHECK: hidden_multigrid_sync_arg
90 ; CHECK-LABEL: .name: test_kernel40
91 define amdgpu_kernel void @test_kernel40(ptr %a) {
92 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
93 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
94 %x = load i64, ptr addrspace(4) %gep
99 ; Typical usage, overriden by explicit attribute on kernel
102 ; CHECK-NOT: hidden_multigrid_sync_arg
103 ; CHECK-LABEL: .name: test_kernel41
104 define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
105 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
106 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
107 %x = load i64, ptr addrspace(4) %gep
112 ; Access to implicit arg before the multigrid sync arg pointer
115 ; CHECK-NOT: hidden_multigrid_sync_arg
116 ; CHECK-LABEL: .name: test_kernel42
117 define amdgpu_kernel void @test_kernel42(ptr %a) {
118 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
119 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 80
120 %x = load i64, ptr addrspace(4) %gep
125 ; Access to implicit arg after the multigrid sync arg pointer
128 ; CHECK-NOT: hidden_multigrid_sync_arg
129 ; CHECK-LABEL: .name: test_kernel43
130 define amdgpu_kernel void @test_kernel43(ptr %a) {
131 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
132 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
133 %x = load i64, ptr addrspace(4) %gep
138 ; Accessing a byte just before the multigrid sync arg pointer.
141 ; CHECK-NOT: hidden_multigrid_sync_arg
142 ; CHECK-LABEL: .name: test_kernel44
143 define amdgpu_kernel void @test_kernel44(ptr %a) {
144 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
145 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 87
146 %x = load i8, ptr addrspace(4) %gep, align 1
147 store i8 %x, ptr %a, align 1
151 ; Accessing a byte inside the multigrid sync arg pointer.
154 ; CHECK: hidden_multigrid_sync_arg
155 ; CHECK-LABEL: .name: test_kernel45
156 define amdgpu_kernel void @test_kernel45(ptr %a) {
157 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
158 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
159 %x = load i8, ptr addrspace(4) %gep, align 1
160 store i8 %x, ptr %a, align 1
164 ; Accessing a byte inside the multigrid sync arg pointer
167 ; CHECK: hidden_multigrid_sync_arg
168 ; CHECK-LABEL: .name: test_kernel46
169 define amdgpu_kernel void @test_kernel46(ptr %a) {
170 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
171 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 95
172 %x = load i8, ptr addrspace(4) %gep, align 1
173 store i8 %x, ptr %a, align 1
177 ; Accessing a byte just after the multigrid sync arg pointer
180 ; CHECK-NOT: hidden_multigrid_sync_arg
181 ; CHECK-LABEL: .name: test_kernel47
182 define amdgpu_kernel void @test_kernel47(ptr %a) {
183 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
184 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
185 %x = load i8, ptr addrspace(4) %gep, align 1
186 store i8 %x, ptr %a, align 1
190 ; Access with an unknown offset
193 ; CHECK: hidden_multigrid_sync_arg
194 ; CHECK-LABEL: .name: test_kernel50
195 define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) {
196 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
197 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 %b
198 %x = load i8, ptr addrspace(4) %gep, align 1
199 store i8 %x, ptr %a, align 1
203 ; Multiple geps reaching the multigrid sync arg pointer argument
206 ; CHECK: hidden_multigrid_sync_arg
207 ; CHECK-LABEL: .name: test_kernel51
208 define amdgpu_kernel void @test_kernel51(ptr %a) {
209 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
210 %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
211 %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 72
212 %x = load i8, ptr addrspace(4) %gep2, align 1
213 store i8 %x, ptr %a, align 1
217 ; Multiple geps not reaching the multigrid sync arg pointer argument
220 ; CHECK-NOT: hidden_multigrid_sync_arg
221 ; CHECK-LABEL: .name: test_kernel52
222 define amdgpu_kernel void @test_kernel52(ptr %a) {
223 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
224 %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
225 %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 16
226 %x = load i8, ptr addrspace(4) %gep2, align 1
227 store i8 %x, ptr %a, align 1
231 ; Multigrid sync arg pointer used inside a function call
234 ; CHECK: hidden_multigrid_sync_arg
235 ; CHECK-LABEL: .name: test_kernel60
236 define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
237 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
238 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
239 %x = load i64, ptr addrspace(4) %gep
240 call void @function4(i64 %x, ptr %a)
244 ; Multigrid sync arg pointer retrieved inside a function call; chain of geps
247 ; CHECK: hidden_multigrid_sync_arg
248 ; CHECK-LABEL: .name: test_kernel61
249 define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
250 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
251 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
252 call void @function5(ptr addrspace(4) %gep, ptr %a)
259 ; CHECK: hidden_multigrid_sync_arg
260 ; CHECK-LABEL: .name: test_kernel70
261 define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
262 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
263 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
264 store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
268 ; Pointer captured inside function call
271 ; CHECK: hidden_multigrid_sync_arg
272 ; CHECK-LABEL: .name: test_kernel71
273 define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
274 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
275 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
276 call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
280 ; Ineffective pointer capture
283 ; CHECK-NOT: hidden_multigrid_sync_arg
284 ; CHECK-LABEL: .name: test_kernel72
285 define amdgpu_kernel void @test_kernel72() #2 {
286 %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
287 %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
288 store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
292 attributes #0 = { "amdgpu-no-multigrid-sync-arg" }
293 attributes #1 = { nounwind readnone speculatable willreturn }
294 attributes #2 = { noinline }
296 !llvm.module.flags = !{!0}
297 !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}