1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=CHECK
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=CHECK
5 define amdgpu_kernel void @struct_atomic_buffer_load_i32(<4 x i32> %addr, i32 %index) {
6 ; CHECK-LABEL: struct_atomic_buffer_load_i32:
7 ; CHECK: ; %bb.0: ; %bb
8 ; CHECK-NEXT: s_clause 0x1
9 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
10 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
11 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
12 ; CHECK-NEXT: s_mov_b32 s4, 0
13 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
14 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
15 ; CHECK-NEXT: .LBB0_1: ; %bb1
16 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
17 ; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
18 ; CHECK-NEXT: s_waitcnt vmcnt(0)
19 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
20 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
21 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
22 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
23 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1
24 ; CHECK-NEXT: ; %bb.2: ; %bb2
25 ; CHECK-NEXT: s_endpgm
27 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
30 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1)
31 %cmp = icmp eq i32 %load, %id
32 br i1 %cmp, label %bb1, label %bb2
37 define amdgpu_kernel void @struct_atomic_buffer_load_i32_const_idx(<4 x i32> %addr) {
38 ; CHECK-LABEL: struct_atomic_buffer_load_i32_const_idx:
39 ; CHECK: ; %bb.0: ; %bb
40 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
41 ; CHECK-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
42 ; CHECK-NEXT: s_mov_b32 s4, 0
43 ; CHECK-NEXT: .LBB1_1: ; %bb1
44 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
45 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
46 ; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
47 ; CHECK-NEXT: s_waitcnt vmcnt(0)
48 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
49 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
50 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
51 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
52 ; CHECK-NEXT: s_cbranch_execnz .LBB1_1
53 ; CHECK-NEXT: ; %bb.2: ; %bb2
54 ; CHECK-NEXT: s_endpgm
56 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
59 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 15, i32 0, i32 0, i32 1)
60 %cmp = icmp eq i32 %load, %id
61 br i1 %cmp, label %bb1, label %bb2
66 define amdgpu_kernel void @struct_atomic_buffer_load_i32_off(<4 x i32> %addr, i32 %index) {
67 ; CHECK-LABEL: struct_atomic_buffer_load_i32_off:
68 ; CHECK: ; %bb.0: ; %bb
69 ; CHECK-NEXT: s_clause 0x1
70 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
71 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
72 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
73 ; CHECK-NEXT: s_mov_b32 s4, 0
74 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
75 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
76 ; CHECK-NEXT: .LBB2_1: ; %bb1
77 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
78 ; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
79 ; CHECK-NEXT: s_waitcnt vmcnt(0)
80 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
81 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
82 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
83 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
84 ; CHECK-NEXT: s_cbranch_execnz .LBB2_1
85 ; CHECK-NEXT: ; %bb.2: ; %bb2
86 ; CHECK-NEXT: s_endpgm
88 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
91 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1)
92 %cmp = icmp eq i32 %load, %id
93 br i1 %cmp, label %bb1, label %bb2
98 define amdgpu_kernel void @struct_atomic_buffer_load_i32_soff(<4 x i32> %addr, i32 %index) {
99 ; CHECK-LABEL: struct_atomic_buffer_load_i32_soff:
100 ; CHECK: ; %bb.0: ; %bb
101 ; CHECK-NEXT: s_clause 0x1
102 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
103 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
104 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
105 ; CHECK-NEXT: s_mov_b32 s4, 0
106 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
107 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
108 ; CHECK-NEXT: .LBB3_1: ; %bb1
109 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
110 ; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 4 idxen offset:4 glc
111 ; CHECK-NEXT: s_waitcnt vmcnt(0)
112 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
113 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
114 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
115 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
116 ; CHECK-NEXT: s_cbranch_execnz .LBB3_1
117 ; CHECK-NEXT: ; %bb.2: ; %bb2
118 ; CHECK-NEXT: s_endpgm
120 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
123 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 4, i32 1)
124 %cmp = icmp eq i32 %load, %id
125 br i1 %cmp, label %bb1, label %bb2
129 define amdgpu_kernel void @struct_atomic_buffer_load_i32_dlc(<4 x i32> %addr, i32 %index) {
130 ; CHECK-LABEL: struct_atomic_buffer_load_i32_dlc:
131 ; CHECK: ; %bb.0: ; %bb
132 ; CHECK-NEXT: s_clause 0x1
133 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
134 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
135 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
136 ; CHECK-NEXT: s_mov_b32 s4, 0
137 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
138 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
139 ; CHECK-NEXT: .LBB4_1: ; %bb1
140 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
141 ; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen offset:4 dlc
142 ; CHECK-NEXT: s_waitcnt vmcnt(0)
143 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
144 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
145 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
146 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
147 ; CHECK-NEXT: s_cbranch_execnz .LBB4_1
148 ; CHECK-NEXT: ; %bb.2: ; %bb2
149 ; CHECK-NEXT: s_endpgm
151 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
154 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 4)
155 %cmp = icmp eq i32 %load, %id
156 br i1 %cmp, label %bb1, label %bb2
161 define amdgpu_kernel void @struct_nonatomic_buffer_load_i32(<4 x i32> %addr, i32 %index) {
162 ; CHECK-LABEL: struct_nonatomic_buffer_load_i32:
163 ; CHECK: ; %bb.0: ; %bb
164 ; CHECK-NEXT: s_clause 0x1
165 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
166 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
167 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
168 ; CHECK-NEXT: v_dual_mov_b32 v1, s6 :: v_dual_and_b32 v0, 0x3ff, v0
169 ; CHECK-NEXT: buffer_load_b32 v1, v1, s[0:3], 0 idxen offset:4 glc
170 ; CHECK-NEXT: s_mov_b32 s0, 0
171 ; CHECK-NEXT: s_waitcnt vmcnt(0)
172 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0
173 ; CHECK-NEXT: .LBB5_1: ; %bb1
174 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
175 ; CHECK-NEXT: s_and_b32 s1, exec_lo, vcc_lo
176 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
177 ; CHECK-NEXT: s_or_b32 s0, s1, s0
178 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
179 ; CHECK-NEXT: s_cbranch_execnz .LBB5_1
180 ; CHECK-NEXT: ; %bb.2: ; %bb2
181 ; CHECK-NEXT: s_endpgm
183 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
186 %load = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
187 %cmp = icmp eq i32 %load, %id
188 br i1 %cmp, label %bb1, label %bb2
193 define amdgpu_kernel void @struct_atomic_buffer_load_i64(<4 x i32> %addr, i32 %index) {
194 ; CHECK-LABEL: struct_atomic_buffer_load_i64:
195 ; CHECK: ; %bb.0: ; %bb
196 ; CHECK-NEXT: s_clause 0x1
197 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
198 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
199 ; CHECK-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
200 ; CHECK-NEXT: s_mov_b32 s4, 0
201 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
202 ; CHECK-NEXT: v_mov_b32_e32 v2, s6
203 ; CHECK-NEXT: .LBB6_1: ; %bb1
204 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
205 ; CHECK-NEXT: buffer_load_b64 v[3:4], v2, s[0:3], 0 idxen offset:4 glc
206 ; CHECK-NEXT: s_waitcnt vmcnt(0)
207 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[3:4], v[0:1]
208 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
209 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
210 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
211 ; CHECK-NEXT: s_cbranch_execnz .LBB6_1
212 ; CHECK-NEXT: ; %bb.2: ; %bb2
213 ; CHECK-NEXT: s_endpgm
215 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
216 %id.zext = zext i32 %id to i64
219 %load = call i64 @llvm.amdgcn.struct.atomic.buffer.load.i64(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
220 %cmp = icmp eq i64 %load, %id.zext
221 br i1 %cmp, label %bb1, label %bb2
226 define amdgpu_kernel void @struct_atomic_buffer_load_v2i16(<4 x i32> %addr, i32 %index) {
227 ; CHECK-LABEL: struct_atomic_buffer_load_v2i16:
228 ; CHECK: ; %bb.0: ; %bb
229 ; CHECK-NEXT: s_clause 0x1
230 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
231 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
232 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
233 ; CHECK-NEXT: s_mov_b32 s4, 0
234 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
235 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
236 ; CHECK-NEXT: .LBB7_1: ; %bb1
237 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
238 ; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
239 ; CHECK-NEXT: s_waitcnt vmcnt(0)
240 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
241 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
242 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
243 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
244 ; CHECK-NEXT: s_cbranch_execnz .LBB7_1
245 ; CHECK-NEXT: ; %bb.2: ; %bb2
246 ; CHECK-NEXT: s_endpgm
248 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
251 %load = call <2 x i16> @llvm.amdgcn.struct.atomic.buffer.load.v2i16(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1)
252 %bitcast = bitcast <2 x i16> %load to i32
253 %cmp = icmp eq i32 %bitcast, %id
254 br i1 %cmp, label %bb1, label %bb2
259 define amdgpu_kernel void @struct_atomic_buffer_load_v4i16(<4 x i32> %addr, i32 %index) {
260 ; CHECK-LABEL: struct_atomic_buffer_load_v4i16:
261 ; CHECK: ; %bb.0: ; %bb
262 ; CHECK-NEXT: s_clause 0x1
263 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
264 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
265 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
266 ; CHECK-NEXT: s_mov_b32 s4, 0
267 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
268 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
269 ; CHECK-NEXT: .LBB8_1: ; %bb1
270 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
271 ; CHECK-NEXT: buffer_load_b64 v[2:3], v1, s[0:3], 0 idxen offset:4 glc
272 ; CHECK-NEXT: s_waitcnt vmcnt(0)
273 ; CHECK-NEXT: v_and_b32_e32 v2, 0xffff, v2
274 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
275 ; CHECK-NEXT: v_lshl_or_b32 v2, v3, 16, v2
276 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
277 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
278 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
279 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
280 ; CHECK-NEXT: s_cbranch_execnz .LBB8_1
281 ; CHECK-NEXT: ; %bb.2: ; %bb2
282 ; CHECK-NEXT: s_endpgm
284 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
287 %load = call <4 x i16> @llvm.amdgcn.struct.atomic.buffer.load.v4i16(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
288 %shortened = shufflevector <4 x i16> %load, <4 x i16> poison, <2 x i32> <i32 0, i32 2>
289 %bitcast = bitcast <2 x i16> %shortened to i32
290 %cmp = icmp eq i32 %bitcast, %id
291 br i1 %cmp, label %bb1, label %bb2
296 define amdgpu_kernel void @struct_atomic_buffer_load_v4i32(<4 x i32> %addr, i32 %index) {
297 ; CHECK-LABEL: struct_atomic_buffer_load_v4i32:
298 ; CHECK: ; %bb.0: ; %bb
299 ; CHECK-NEXT: s_clause 0x1
300 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
301 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
302 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
303 ; CHECK-NEXT: s_mov_b32 s4, 0
304 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
305 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
306 ; CHECK-NEXT: .LBB9_1: ; %bb1
307 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
308 ; CHECK-NEXT: buffer_load_b128 v[2:5], v1, s[0:3], 0 idxen offset:4 glc
309 ; CHECK-NEXT: s_waitcnt vmcnt(0)
310 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v5, v0
311 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
312 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
313 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
314 ; CHECK-NEXT: s_cbranch_execnz .LBB9_1
315 ; CHECK-NEXT: ; %bb.2: ; %bb2
316 ; CHECK-NEXT: s_endpgm
318 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
321 %load = call <4 x i32> @llvm.amdgcn.struct.atomic.buffer.load.v4i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
322 %extracted = extractelement <4 x i32> %load, i32 3
323 %cmp = icmp eq i32 %extracted, %id
324 br i1 %cmp, label %bb1, label %bb2
329 define amdgpu_kernel void @struct_atomic_buffer_load_ptr(<4 x i32> %addr, i32 %index) {
330 ; CHECK-LABEL: struct_atomic_buffer_load_ptr:
331 ; CHECK: ; %bb.0: ; %bb
332 ; CHECK-NEXT: s_clause 0x1
333 ; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34
334 ; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
335 ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
336 ; CHECK-NEXT: s_mov_b32 s4, 0
337 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
338 ; CHECK-NEXT: v_mov_b32_e32 v1, s6
339 ; CHECK-NEXT: .LBB10_1: ; %bb1
340 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
341 ; CHECK-NEXT: buffer_load_b64 v[2:3], v1, s[0:3], 0 idxen offset:4 glc
342 ; CHECK-NEXT: s_waitcnt vmcnt(0)
343 ; CHECK-NEXT: flat_load_b32 v2, v[2:3]
344 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
345 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0
346 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
347 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
348 ; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
349 ; CHECK-NEXT: s_cbranch_execnz .LBB10_1
350 ; CHECK-NEXT: ; %bb.2: ; %bb2
351 ; CHECK-NEXT: s_endpgm
353 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
356 %load = call ptr @llvm.amdgcn.struct.atomic.buffer.load.ptr(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
357 %elem = load i32, ptr %load
358 %cmp = icmp eq i32 %elem, %id
359 br i1 %cmp, label %bb1, label %bb2
364 declare i32 @llvm.amdgcn.struct.atom.buffer.load.i32(<4 x i32>, i32, i32, i32, i32 immarg)
365 declare i64 @llvm.amdgcn.struct.atom.buffer.load.i64(<4 x i32>, i32, i32, i32, i32 immarg)
366 declare <2 x i16> @llvm.amdgcn.struct.atom.buffer.load.v2i16(<4 x i32>, i32, i32, i32, i32 immarg)
367 declare <4 x i16> @llvm.amdgcn.struct.atom.buffer.load.v4i16(<4 x i32>, i32, i32, i32, i32 immarg)
368 declare <4 x i32> @llvm.amdgcn.struct.atom.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32 immarg)
369 declare ptr @llvm.amdgcn.struct.atom.buffer.load.ptr(<4 x i32>, i32, i32, i32, i32 immarg)
370 declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32 immarg)
371 declare i32 @llvm.amdgcn.workitem.id.x()