1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt -S -mtriple=amdgcn--amdhsa -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=OPT %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=GCN %s
5 ; Opt checks from utils/update_test_checks.py, llc checks from utils/update_llc_test_checks.py, both modified.
7 ; Define four variables and four non-kernel functions which access exactly one variable each
8 @v0 = addrspace(3) global float poison
9 @v1 = addrspace(3) global i16 poison, align 16
10 @v2 = addrspace(3) global i64 poison
11 @v3 = addrspace(3) global i8 poison
12 @unused = addrspace(3) global i16 poison
14 ; OPT: %llvm.amdgcn.kernel.kernel_no_table.lds.t = type { i64 }
15 ; OPT: %llvm.amdgcn.kernel.k01.lds.t = type { i16, [2 x i8], float }
16 ; OPT: %llvm.amdgcn.kernel.k23.lds.t = type { i64, i8 }
17 ; OPT: %llvm.amdgcn.kernel.k123.lds.t = type { i16, i8, [5 x i8], i64 }
19 ; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t poison, align 8, !absolute_symbol !0
20 ; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t poison, align 16, !absolute_symbol !0
21 ; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t poison, align 8, !absolute_symbol !0
22 ; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t poison, align 16, !absolute_symbol !0
24 ; Salient parts of the IR lookup table check:
25 ; It has (top level) size 3 as there are 3 kernels that call functions which use lds
26 ; The next level down has type [4 x i16] as there are 4 variables accessed by functions which use lds
27 ; The kernel naming pattern and the structs being named after the functions helps verify placement of poison
28 ; The remainder are constant expressions into the variable instances checked above
30 ; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]]
34 ; OPT-LABEL: define void @f0() {
35 ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
36 ; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
37 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V02]], align 4
38 ; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
39 ; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) [[V03]], align 4
40 ; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00
41 ; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
42 ; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V0]], align 4
43 ; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
44 ; OPT-NEXT: store float [[MUL]], ptr addrspace(3) [[V01]], align 4
49 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GCN-NEXT: s_mov_b32 s4, s15
51 ; GCN-NEXT: s_ashr_i32 s5, s15, 31
52 ; GCN-NEXT: s_getpc_b64 s[6:7]
53 ; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+4
54 ; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+12
55 ; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
56 ; GCN-NEXT: s_add_u32 s4, s4, s6
57 ; GCN-NEXT: s_addc_u32 s5, s5, s7
58 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
59 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
60 ; GCN-NEXT: v_mov_b32_e32 v0, s4
61 ; GCN-NEXT: s_mov_b32 m0, -1
62 ; GCN-NEXT: ds_read_b32 v1, v0
63 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
64 ; GCN-NEXT: v_add_f32_e32 v1, v1, v1
65 ; GCN-NEXT: ds_write_b32 v0, v1
66 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
67 ; GCN-NEXT: s_setpc_b64 s[30:31]
68 %ld = load float, ptr addrspace(3) @v0
69 %mul = fmul float %ld, 2.
70 store float %mul, ptr addrspace(3) @v0
75 ; OPT-LABEL: define void @f1() {
76 ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
77 ; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
78 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V12]], align 4
79 ; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
80 ; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[V13]], align 2
81 ; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
82 ; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
83 ; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V1]], align 4
84 ; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
85 ; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) [[V11]], align 2
90 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; GCN-NEXT: s_mov_b32 s4, s15
92 ; GCN-NEXT: s_ashr_i32 s5, s15, 31
93 ; GCN-NEXT: s_getpc_b64 s[6:7]
94 ; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+8
95 ; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+16
96 ; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
97 ; GCN-NEXT: s_add_u32 s4, s4, s6
98 ; GCN-NEXT: s_addc_u32 s5, s5, s7
99 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
100 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
101 ; GCN-NEXT: v_mov_b32_e32 v0, s4
102 ; GCN-NEXT: s_mov_b32 m0, -1
103 ; GCN-NEXT: ds_read_u16 v1, v0
104 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
105 ; GCN-NEXT: v_mul_lo_u32 v1, v1, 3
106 ; GCN-NEXT: ds_write_b16 v0, v1
107 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
108 ; GCN-NEXT: s_setpc_b64 s[30:31]
109 %ld = load i16, ptr addrspace(3) @v1
110 %mul = mul i16 %ld, 3
111 store i16 %mul, ptr addrspace(3) @v1
116 ; OPT-LABEL: define void @f2() {
117 ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
118 ; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
119 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
120 ; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
121 ; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
122 ; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
123 ; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
124 ; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
125 ; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
126 ; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
131 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GCN-NEXT: s_mov_b32 s4, s15
133 ; GCN-NEXT: s_ashr_i32 s5, s15, 31
134 ; GCN-NEXT: s_getpc_b64 s[6:7]
135 ; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+12
136 ; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+20
137 ; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
138 ; GCN-NEXT: s_add_u32 s4, s4, s6
139 ; GCN-NEXT: s_addc_u32 s5, s5, s7
140 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
141 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
142 ; GCN-NEXT: v_mov_b32_e32 v2, s4
143 ; GCN-NEXT: s_mov_b32 m0, -1
144 ; GCN-NEXT: ds_read_b64 v[0:1], v2
145 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
146 ; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
147 ; GCN-NEXT: ds_write_b64 v2, v[0:1]
148 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
149 ; GCN-NEXT: s_setpc_b64 s[30:31]
150 %ld = load i64, ptr addrspace(3) @v2
151 %mul = mul i64 %ld, 4
152 store i64 %mul, ptr addrspace(3) @v2
157 ; OPT-LABEL: define void @f3() {
158 ; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
159 ; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
160 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V32]], align 4
161 ; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
162 ; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[V33]], align 1
163 ; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5
164 ; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3
165 ; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V3]], align 4
166 ; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
167 ; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) [[V31]], align 1
172 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GCN-NEXT: s_mov_b32 s4, s15
174 ; GCN-NEXT: s_ashr_i32 s5, s15, 31
175 ; GCN-NEXT: s_getpc_b64 s[6:7]
176 ; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+16
177 ; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+24
178 ; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
179 ; GCN-NEXT: s_add_u32 s4, s4, s6
180 ; GCN-NEXT: s_addc_u32 s5, s5, s7
181 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
182 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
183 ; GCN-NEXT: v_mov_b32_e32 v0, s4
184 ; GCN-NEXT: s_mov_b32 m0, -1
185 ; GCN-NEXT: ds_read_u8 v1, v0
186 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
187 ; GCN-NEXT: v_mul_lo_u32 v1, v1, 5
188 ; GCN-NEXT: ds_write_b8 v0, v1
189 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
190 ; GCN-NEXT: s_setpc_b64 s[30:31]
191 %ld = load i8, ptr addrspace(3) @v3
193 store i8 %mul, ptr addrspace(3) @v3
197 ; Doesn't access any via a function, won't be in the lookup table
198 define amdgpu_kernel void @kernel_no_table() {
199 ; OPT-LABEL: define amdgpu_kernel void @kernel_no_table(
200 ; OPT-SAME: ) #[[ATTR0:[0-9]+]] {
201 ; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
202 ; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 8
203 ; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
206 ; GCN-LABEL: kernel_no_table:
208 ; GCN-NEXT: v_mov_b32_e32 v2, 0
209 ; GCN-NEXT: s_mov_b32 m0, -1
210 ; GCN-NEXT: ds_read_b64 v[0:1], v2
211 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
212 ; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 3
213 ; GCN-NEXT: ds_write_b64 v2, v[0:1]
215 %ld = load i64, ptr addrspace(3) @v2
216 %mul = mul i64 %ld, 8
217 store i64 %mul, ptr addrspace(3) @v2
221 ; Access two variables, will allocate those two
222 define amdgpu_kernel void @k01() {
223 ; OPT-LABEL: define amdgpu_kernel void @k01(
224 ; OPT-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id !1 {
225 ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ], !alias.scope !2, !noalias !5
226 ; OPT-NEXT: call void @f0()
227 ; OPT-NEXT: call void @f1()
232 ; GCN-NEXT: s_mov_b32 s32, 0
233 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s7
234 ; GCN-NEXT: s_add_i32 s6, s6, s9
235 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
236 ; GCN-NEXT: s_add_u32 s0, s0, s9
237 ; GCN-NEXT: s_addc_u32 s1, s1, 0
238 ; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
239 ; GCN-NEXT: s_getpc_b64 s[4:5]
240 ; GCN-NEXT: s_add_u32 s4, s4, f0@gotpcrel32@lo+4
241 ; GCN-NEXT: s_addc_u32 s5, s5, f0@gotpcrel32@hi+12
242 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
243 ; GCN-NEXT: s_mov_b32 s15, 0
244 ; GCN-NEXT: s_mov_b64 s[6:7], s[8:9]
245 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
246 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
247 ; GCN-NEXT: s_getpc_b64 s[4:5]
248 ; GCN-NEXT: s_add_u32 s4, s4, f1@gotpcrel32@lo+4
249 ; GCN-NEXT: s_addc_u32 s5, s5, f1@gotpcrel32@hi+12
250 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
251 ; GCN-NEXT: s_mov_b64 s[6:7], s[8:9]
252 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
253 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
255 ; GCN: .amdhsa_group_segment_fixed_size 8
261 define amdgpu_kernel void @k23() {
262 ; OPT-LABEL: define amdgpu_kernel void @k23(
263 ; OPT-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id !7 {
264 ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope !8, !noalias !11
265 ; OPT-NEXT: call void @f2()
266 ; OPT-NEXT: call void @f3()
271 ; GCN-NEXT: s_mov_b32 s32, 0
272 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s7
273 ; GCN-NEXT: s_add_i32 s6, s6, s9
274 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
275 ; GCN-NEXT: s_add_u32 s0, s0, s9
276 ; GCN-NEXT: s_addc_u32 s1, s1, 0
277 ; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
278 ; GCN-NEXT: s_getpc_b64 s[4:5]
279 ; GCN-NEXT: s_add_u32 s4, s4, f2@gotpcrel32@lo+4
280 ; GCN-NEXT: s_addc_u32 s5, s5, f2@gotpcrel32@hi+12
281 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
282 ; GCN-NEXT: s_mov_b32 s15, 2
283 ; GCN-NEXT: s_mov_b64 s[6:7], s[8:9]
284 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
285 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
286 ; GCN-NEXT: s_getpc_b64 s[4:5]
287 ; GCN-NEXT: s_add_u32 s4, s4, f3@gotpcrel32@lo+4
288 ; GCN-NEXT: s_addc_u32 s5, s5, f3@gotpcrel32@hi+12
289 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
290 ; GCN-NEXT: s_mov_b64 s[6:7], s[8:9]
291 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
292 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
294 ; GCN: .amdhsa_group_segment_fixed_size 16
300 ; Access and allocate three variables
301 define amdgpu_kernel void @k123() {
302 ; OPT-LABEL: define amdgpu_kernel void @k123(
303 ; OPT-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id !13 {
304 ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope !14, !noalias !17
305 ; OPT-NEXT: call void @f1()
306 ; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !20, !noalias !21
307 ; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
308 ; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !20, !noalias !21
309 ; OPT-NEXT: call void @f2()
314 ; GCN-NEXT: s_mov_b32 s32, 0
315 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s7
316 ; GCN-NEXT: s_add_i32 s6, s6, s9
317 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
318 ; GCN-NEXT: s_add_u32 s0, s0, s9
319 ; GCN-NEXT: s_addc_u32 s1, s1, 0
320 ; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
321 ; GCN-NEXT: s_getpc_b64 s[4:5]
322 ; GCN-NEXT: s_add_u32 s4, s4, f1@gotpcrel32@lo+4
323 ; GCN-NEXT: s_addc_u32 s5, s5, f1@gotpcrel32@hi+12
324 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
325 ; GCN-NEXT: s_mov_b32 s15, 1
326 ; GCN-NEXT: s_mov_b64 s[6:7], s[8:9]
327 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
328 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
329 ; GCN-NEXT: v_mov_b32_e32 v0, 0
330 ; GCN-NEXT: s_mov_b32 m0, -1
331 ; GCN-NEXT: ds_read_u8 v1, v0 offset:2
332 ; GCN-NEXT: s_getpc_b64 s[4:5]
333 ; GCN-NEXT: s_add_u32 s4, s4, f2@gotpcrel32@lo+4
334 ; GCN-NEXT: s_addc_u32 s5, s5, f2@gotpcrel32@hi+12
335 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
336 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
337 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1
338 ; GCN-NEXT: ds_write_b8 v0, v1 offset:2
339 ; GCN-NEXT: s_mov_b64 s[6:7], s[8:9]
340 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
342 ; GCN: .amdhsa_group_segment_fixed_size 16
344 %ld = load i8, ptr addrspace(3) @v3
346 store i8 %mul, ptr addrspace(3) @v3
352 ; OPT: declare i32 @llvm.amdgcn.lds.kernel.id()
354 ; OPT: attributes #0 = { "amdgpu-lds-size"="8" }
355 ; OPT: attributes #1 = { "amdgpu-lds-size"="16" }
363 ; Table size length number-kernels * number-variables * sizeof(uint16_t)
364 ; GCN: .type llvm.amdgcn.lds.offset.table,@object
365 ; GCN-NEXT: .section .data.rel.ro,#alloc,#write
366 ; GCN-NEXT: .p2align 4, 0x0
367 ; GCN-NEXT: llvm.amdgcn.lds.offset.table:
368 ; GCN-NEXT: .long 0+4
374 ; GCN-NEXT: .long 0+8
375 ; GCN-NEXT: .long 0+2
379 ; GCN-NEXT: .long 0+8
380 ; GCN-NEXT: .size llvm.amdgcn.lds.offset.table, 48