1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
4 ; Not reached by a non-kernel function and therefore not changed by this pass
5 @dynamic_kernel_only = external addrspace(3) global [0 x double]
7 ; shared8 is used directly by a kernel so remains in the outbout
8 ; the other three are only used by functions and will be replaced with
9 ; table lookups and dropped from the IR
10 @dynamic_shared1 = external addrspace(3) global [0 x i8], align 1
11 @dynamic_shared2 = external addrspace(3) global [0 x i16], align 2
12 @dynamic_shared4 = external addrspace(3) global [0 x i32], align 4
13 @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8
15 ; CHECK: %llvm.amdgcn.module.lds.t = type { i32 }
16 ; CHECK: @dynamic_kernel_only = external addrspace(3) global [0 x double]
17 ; CHECK: @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8
18 ; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol !0
19 ; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
21 ; Alignment of these must be the maximum of the alignment of the reachable symbols
22 ; CHECK: @llvm.amdgcn.expect_align1.dynlds = external addrspace(3) global [0 x i8], align 1, !absolute_symbol !0
23 ; CHECK: @llvm.amdgcn.expect_align2.dynlds = external addrspace(3) global [0 x i8], align 2, !absolute_symbol !0
24 ; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol !1
25 ; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol !0
27 ; Align 4 and symbol at address [4 5) as module.lds is reachable
28 ; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol !1
30 ; Builds a lookup table out of the newly created (suffixed .dynlds) variables in kernel.id order
31 ; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds to i32)]
35 define amdgpu_kernel void @kernel_only() {
36 ; CHECK-LABEL: @kernel_only() {
37 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_kernel_only, i32 0, i32 0
38 ; CHECK-NEXT: store double 3.140000e+00, ptr addrspace(3) [[ARRAYIDX]], align 8
39 ; CHECK-NEXT: ret void
41 %arrayidx = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_kernel_only, i32 0, i32 0
42 store double 3.140000e+00, ptr addrspace(3) %arrayidx
46 ; The accesses from functions are rewritten to go through the llvm.amdgcn.dynlds.offset.table
47 define void @use_shared1() {
48 ; CHECK-LABEL: @use_shared1() {
49 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
50 ; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
51 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4
52 ; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
53 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr addrspace(3) [[DYNAMIC_SHARED11]], i32 0, i32 1
54 ; CHECK-NEXT: store i8 0, ptr addrspace(3) [[ARRAYIDX]], align 1
55 ; CHECK-NEXT: ret void
57 %arrayidx = getelementptr inbounds [0 x i8], ptr addrspace(3) @dynamic_shared1, i32 0, i32 1
58 store i8 0, ptr addrspace(3) %arrayidx
62 define void @use_shared2() #0 {
63 ; CHECK-LABEL: @use_shared2() #0 {
64 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
65 ; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
66 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4
67 ; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
68 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i16], ptr addrspace(3) [[DYNAMIC_SHARED21]], i32 0, i32 3
69 ; CHECK-NEXT: store i16 1, ptr addrspace(3) [[ARRAYIDX]], align 2
70 ; CHECK-NEXT: ret void
72 %arrayidx = getelementptr inbounds [0 x i16], ptr addrspace(3) @dynamic_shared2, i32 0, i32 3
73 store i16 1, ptr addrspace(3) %arrayidx
77 ; Include a normal variable so that the new variables aren't all at the same absolute_symbol
78 @static_shared = addrspace(3) global i32 poison
79 define void @use_shared4() #0 {
80 ; CHECK-LABEL: @use_shared4() #0 {
81 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
82 ; CHECK-NEXT: store i32 4, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4
83 ; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
84 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4
85 ; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
86 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr addrspace(3) [[DYNAMIC_SHARED41]], i32 0, i32 5
87 ; CHECK-NEXT: store i32 2, ptr addrspace(3) [[ARRAYIDX]], align 4
88 ; CHECK-NEXT: ret void
90 store i32 4, ptr addrspace(3) @static_shared
91 %arrayidx = getelementptr inbounds [0 x i32], ptr addrspace(3) @dynamic_shared4, i32 0, i32 5
92 store i32 2, ptr addrspace(3) %arrayidx
96 define void @use_shared8() #0 {
97 ; CHECK-LABEL: @use_shared8() #0 {
98 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
99 ; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
100 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
101 ; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
102 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) [[DYNAMIC_SHARED81]], i32 0, i32 7
103 ; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 4
104 ; CHECK-NEXT: ret void
106 %arrayidx = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 7
107 store i64 3, ptr addrspace(3) %arrayidx
111 ; The kernels are annotated with kernel.id and llvm.donothing use of the corresponding variable
112 define amdgpu_kernel void @expect_align1() {
113 ; CHECK-LABEL: @expect_align1() !llvm.amdgcn.lds.kernel.id !2
114 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds) ]
115 ; CHECK-NEXT: call void @use_shared1()
116 ; CHECK-NEXT: ret void
118 call void @use_shared1()
122 define amdgpu_kernel void @expect_align2() {
123 ; CHECK-LABEL: @expect_align2() !llvm.amdgcn.lds.kernel.id !3
124 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds) ]
125 ; CHECK-NEXT: call void @use_shared2()
126 ; CHECK-NEXT: ret void
128 call void @use_shared2()
132 define amdgpu_kernel void @expect_align4() {
133 ; CHECK-LABEL: @expect_align4() #1 !llvm.amdgcn.lds.kernel.id !4 {
134 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds) ]
135 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
136 ; CHECK-NEXT: call void @use_shared4()
137 ; CHECK-NEXT: ret void
139 call void @use_shared4()
143 ; Use dynamic_shared directly too.
144 define amdgpu_kernel void @expect_align8() {
145 ; CHECK-LABEL: @expect_align8() !llvm.amdgcn.lds.kernel.id !5 {
146 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds) ]
147 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9
148 ; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 4
149 ; CHECK-NEXT: call void @use_shared8()
150 ; CHECK-NEXT: ret void
152 %arrayidx = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9
153 store i64 3, ptr addrspace(3) %arrayidx
154 call void @use_shared8()
158 ; Note: use_shared4 uses module.lds so this will allocate at offset 4
159 define amdgpu_kernel void @expect_max_of_2_and_4() {
160 ; CHECK-LABEL: @expect_max_of_2_and_4() #1 !llvm.amdgcn.lds.kernel.id !6 {
161 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds) ]
162 ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
163 ; CHECK-NEXT: call void @use_shared2()
164 ; CHECK-NEXT: call void @use_shared4()
165 ; CHECK-NEXT: ret void
167 call void @use_shared2()
168 call void @use_shared4()
173 attributes #0 = { noinline }
175 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
176 ; CHECK: declare void @llvm.donothing() #2
178 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
179 ; CHECK: declare i32 @llvm.amdgcn.lds.kernel.id() #3
181 ; CHECK: attributes #0 = { noinline }
182 ; CHECK: attributes #1 = { "amdgpu-lds-size"="4,4" }
183 ; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
184 ; CHECK: attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
186 ; CHECK: !0 = !{i64 0, i64 1}
187 ; CHECK: !1 = !{i64 4, i64 5}
188 ; CHECK: !2 = !{i32 0}
189 ; CHECK: !3 = !{i32 1}
190 ; CHECK: !4 = !{i32 2}
191 ; CHECK: !5 = !{i32 3}
192 ; CHECK: !6 = !{i32 4}