1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,SI
3 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,VI
5 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
6 ; CHECK-LABEL: buffer_load:
7 ; CHECK: ; %bb.0: ; %main_body
8 ; CHECK-NEXT: v_mov_b32_e32 v8, 0
9 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v8, s[0:3], 0 idxen
10 ; CHECK-NEXT: buffer_load_dwordx4 v[4:7], v8, s[0:3], 0 idxen glc
11 ; CHECK-NEXT: buffer_load_dwordx4 v[8:11], v8, s[0:3], 0 idxen slc
12 ; CHECK-NEXT: s_waitcnt vmcnt(0)
13 ; CHECK-NEXT: ; return to shader part epilog
15 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
16 %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
17 %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
18 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
19 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
20 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
21 ret {<4 x float>, <4 x float>, <4 x float>} %r2
24 define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
25 ; CHECK-LABEL: buffer_load_immoffs:
26 ; CHECK: ; %bb.0: ; %main_body
27 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
28 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 idxen offset:40
29 ; CHECK-NEXT: s_waitcnt vmcnt(0)
30 ; CHECK-NEXT: ; return to shader part epilog
32 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 40, i32 0, i32 0)
36 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
37 ; CHECK-LABEL: buffer_load_immoffs_large:
38 ; CHECK: ; %bb.0: ; %main_body
39 ; CHECK-NEXT: s_movk_i32 s4, 0x1ffc
40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
41 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], s4 idxen offset:4
42 ; CHECK-NEXT: s_waitcnt vmcnt(0)
43 ; CHECK-NEXT: ; return to shader part epilog
45 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 8188, i32 0)
49 define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) {
50 ; CHECK-LABEL: buffer_load_idx:
51 ; CHECK: ; %bb.0: ; %main_body
52 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 idxen
53 ; CHECK-NEXT: s_waitcnt vmcnt(0)
54 ; CHECK-NEXT: ; return to shader part epilog
56 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0)
60 define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
61 ; CHECK-LABEL: buffer_load_ofs:
62 ; CHECK: ; %bb.0: ; %main_body
63 ; CHECK-NEXT: s_mov_b32 s4, 0
64 ; CHECK-NEXT: v_mov_b32_e32 v1, v0
65 ; CHECK-NEXT: v_mov_b32_e32 v0, s4
66 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
67 ; CHECK-NEXT: s_waitcnt vmcnt(0)
68 ; CHECK-NEXT: ; return to shader part epilog
70 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0)
74 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
75 ; CHECK-LABEL: buffer_load_ofs_imm:
76 ; CHECK: ; %bb.0: ; %main_body
77 ; CHECK-NEXT: s_mov_b32 s4, 0
78 ; CHECK-NEXT: v_mov_b32_e32 v1, v0
79 ; CHECK-NEXT: v_mov_b32_e32 v0, s4
80 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
81 ; CHECK-NEXT: s_waitcnt vmcnt(0)
82 ; CHECK-NEXT: ; return to shader part epilog
85 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0)
89 define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) {
90 ; CHECK-LABEL: buffer_load_both:
91 ; CHECK: ; %bb.0: ; %main_body
92 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
93 ; CHECK-NEXT: s_waitcnt vmcnt(0)
94 ; CHECK-NEXT: ; return to shader part epilog
96 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0)
100 define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) {
101 ; CHECK-LABEL: buffer_load_both_reversed:
102 ; CHECK: ; %bb.0: ; %main_body
103 ; CHECK-NEXT: v_mov_b32_e32 v2, v0
104 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v[1:2], s[0:3], 0 idxen offen
105 ; CHECK-NEXT: s_waitcnt vmcnt(0)
106 ; CHECK-NEXT: ; return to shader part epilog
108 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0)
109 ret <4 x float> %data
112 define amdgpu_ps float @buffer_load_x1(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
113 ; CHECK-LABEL: buffer_load_x1:
114 ; CHECK: ; %bb.0: ; %main_body
115 ; CHECK-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 idxen offen
116 ; CHECK-NEXT: s_waitcnt vmcnt(0)
117 ; CHECK-NEXT: ; return to shader part epilog
119 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
123 define amdgpu_ps <2 x float> @buffer_load_x2(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
124 ; CHECK-LABEL: buffer_load_x2:
125 ; CHECK: ; %bb.0: ; %main_body
126 ; CHECK-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 idxen offen
127 ; CHECK-NEXT: s_waitcnt vmcnt(0)
128 ; CHECK-NEXT: ; return to shader part epilog
130 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
131 ret <2 x float> %data
134 define amdgpu_ps <4 x float> @buffer_load_negative_offset(ptr addrspace(8) inreg, i32 %ofs) {
135 ; SI-LABEL: buffer_load_negative_offset:
136 ; SI: ; %bb.0: ; %main_body
137 ; SI-NEXT: s_mov_b32 s4, 0
138 ; SI-NEXT: v_add_i32_e32 v1, vcc, -16, v0
139 ; SI-NEXT: v_mov_b32_e32 v0, s4
140 ; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
141 ; SI-NEXT: s_waitcnt vmcnt(0)
142 ; SI-NEXT: ; return to shader part epilog
144 ; VI-LABEL: buffer_load_negative_offset:
145 ; VI: ; %bb.0: ; %main_body
146 ; VI-NEXT: s_mov_b32 s4, 0
147 ; VI-NEXT: v_add_u32_e32 v1, vcc, -16, v0
148 ; VI-NEXT: v_mov_b32_e32 v0, s4
149 ; VI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
150 ; VI-NEXT: s_waitcnt vmcnt(0)
151 ; VI-NEXT: ; return to shader part epilog
153 %ofs.1 = add i32 %ofs, -16
154 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs.1, i32 0, i32 0)
155 ret <4 x float> %data
158 ; SI won't merge ds memory operations, because of the signed offset bug.
159 define amdgpu_ps float @buffer_load_mmo(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %lds) {
160 ; SI-LABEL: buffer_load_mmo:
161 ; SI: ; %bb.0: ; %entry
162 ; SI-NEXT: v_mov_b32_e32 v2, 0
163 ; SI-NEXT: buffer_load_dword v1, v2, s[0:3], 0 idxen
164 ; SI-NEXT: s_mov_b32 m0, -1
165 ; SI-NEXT: ds_write_b32 v0, v2
166 ; SI-NEXT: v_add_i32_e32 v0, vcc, 16, v0
167 ; SI-NEXT: ds_write_b32 v0, v2
168 ; SI-NEXT: s_waitcnt vmcnt(0)
169 ; SI-NEXT: v_mov_b32_e32 v0, v1
170 ; SI-NEXT: s_waitcnt lgkmcnt(0)
171 ; SI-NEXT: ; return to shader part epilog
173 ; VI-LABEL: buffer_load_mmo:
174 ; VI: ; %bb.0: ; %entry
175 ; VI-NEXT: v_mov_b32_e32 v2, 0
176 ; VI-NEXT: buffer_load_dword v1, v2, s[0:3], 0 idxen
177 ; VI-NEXT: s_mov_b32 m0, -1
178 ; VI-NEXT: ds_write2_b32 v0, v2, v2 offset1:4
179 ; VI-NEXT: s_waitcnt vmcnt(0)
180 ; VI-NEXT: v_mov_b32_e32 v0, v1
181 ; VI-NEXT: s_waitcnt lgkmcnt(0)
182 ; VI-NEXT: ; return to shader part epilog
184 store float 0.0, ptr addrspace(3) %lds
185 %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
186 %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
187 store float 0.0, ptr addrspace(3) %tmp2
191 define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(ptr addrspace(8) inreg) {
192 ; CHECK-LABEL: buffer_load_int:
193 ; CHECK: ; %bb.0: ; %main_body
194 ; CHECK-NEXT: v_mov_b32_e32 v6, 0
195 ; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v6, s[0:3], 0 idxen
196 ; CHECK-NEXT: buffer_load_dwordx2 v[4:5], v6, s[0:3], 0 idxen glc
197 ; CHECK-NEXT: buffer_load_dword v6, v6, s[0:3], 0 idxen slc
198 ; CHECK-NEXT: s_waitcnt vmcnt(0)
199 ; CHECK-NEXT: ; return to shader part epilog
201 %data = call <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
202 %data_glc = call <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
203 %data_slc = call i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
204 %fdata = bitcast <4 x i32> %data to <4 x float>
205 %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
206 %fdata_slc = bitcast i32 %data_slc to float
207 %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
208 %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
209 %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
210 ret {<4 x float>, <2 x float>, float} %r2
213 define amdgpu_ps float @struct_ptr_buffer_load_ubyte(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
214 ; CHECK-LABEL: struct_ptr_buffer_load_ubyte:
215 ; CHECK: ; %bb.0: ; %main_body
216 ; CHECK-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 idxen offen
217 ; CHECK-NEXT: s_waitcnt vmcnt(0)
218 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
219 ; CHECK-NEXT: ; return to shader part epilog
221 %tmp = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
222 %tmp2 = zext i8 %tmp to i32
223 %val = uitofp i32 %tmp2 to float
227 define amdgpu_ps float @struct_ptr_buffer_load_ushort(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
228 ; CHECK-LABEL: struct_ptr_buffer_load_ushort:
229 ; CHECK: ; %bb.0: ; %main_body
230 ; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 idxen offen
231 ; CHECK-NEXT: s_waitcnt vmcnt(0)
232 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0
233 ; CHECK-NEXT: ; return to shader part epilog
235 %tmp = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
236 %tmp2 = zext i16 %tmp to i32
237 %val = uitofp i32 %tmp2 to float
241 define amdgpu_ps float @struct_ptr_buffer_load_sbyte(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
242 ; CHECK-LABEL: struct_ptr_buffer_load_sbyte:
243 ; CHECK: ; %bb.0: ; %main_body
244 ; CHECK-NEXT: buffer_load_sbyte v0, v[0:1], s[0:3], 0 idxen offen
245 ; CHECK-NEXT: s_waitcnt vmcnt(0)
246 ; CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
247 ; CHECK-NEXT: ; return to shader part epilog
249 %tmp = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
250 %tmp2 = sext i8 %tmp to i32
251 %val = sitofp i32 %tmp2 to float
255 define amdgpu_ps float @struct_ptr_buffer_load_sshort(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
256 ; CHECK-LABEL: struct_ptr_buffer_load_sshort:
257 ; CHECK: ; %bb.0: ; %main_body
258 ; CHECK-NEXT: buffer_load_sshort v0, v[0:1], s[0:3], 0 idxen offen
259 ; CHECK-NEXT: s_waitcnt vmcnt(0)
260 ; CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
261 ; CHECK-NEXT: ; return to shader part epilog
263 %tmp = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
264 %tmp2 = sext i16 %tmp to i32
265 %val = sitofp i32 %tmp2 to float
269 define amdgpu_ps void @struct_ptr_buffer_load_f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
270 ; CHECK-LABEL: struct_ptr_buffer_load_f16:
271 ; CHECK: ; %bb.0: ; %main_body
272 ; CHECK-NEXT: buffer_load_ushort v1, v1, s[0:3], 0 idxen
273 ; CHECK-NEXT: s_mov_b32 m0, -1
274 ; CHECK-NEXT: s_waitcnt vmcnt(0)
275 ; CHECK-NEXT: ds_write_b16 v0, v1
276 ; CHECK-NEXT: s_endpgm
278 %val = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
279 store half %val, ptr addrspace(3) %ptr
283 define amdgpu_ps void @struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
284 ; CHECK-LABEL: struct_ptr_buffer_load_v2f16:
285 ; CHECK: ; %bb.0: ; %main_body
286 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen
287 ; CHECK-NEXT: s_mov_b32 m0, -1
288 ; CHECK-NEXT: s_waitcnt vmcnt(0)
289 ; CHECK-NEXT: ds_write_b32 v0, v1
290 ; CHECK-NEXT: s_endpgm
292 %val = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
293 store <2 x half> %val, ptr addrspace(3) %ptr
297 define amdgpu_ps void @struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
298 ; CHECK-LABEL: struct_ptr_buffer_load_v4f16:
299 ; CHECK: ; %bb.0: ; %main_body
300 ; CHECK-NEXT: buffer_load_dwordx2 v[1:2], v1, s[0:3], 0 idxen
301 ; CHECK-NEXT: s_mov_b32 m0, -1
302 ; CHECK-NEXT: s_waitcnt vmcnt(0)
303 ; CHECK-NEXT: ds_write_b64 v0, v[1:2]
304 ; CHECK-NEXT: s_endpgm
306 %val = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
307 store <4 x half> %val, ptr addrspace(3) %ptr
311 define amdgpu_ps void @struct_ptr_buffer_load_i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
312 ; CHECK-LABEL: struct_ptr_buffer_load_i16:
313 ; CHECK: ; %bb.0: ; %main_body
314 ; CHECK-NEXT: buffer_load_ushort v1, v1, s[0:3], 0 idxen
315 ; CHECK-NEXT: s_mov_b32 m0, -1
316 ; CHECK-NEXT: s_waitcnt vmcnt(0)
317 ; CHECK-NEXT: ds_write_b16 v0, v1
318 ; CHECK-NEXT: s_endpgm
320 %val = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
321 store i16 %val, ptr addrspace(3) %ptr
325 define amdgpu_ps void @struct_ptr_buffer_load_v2i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
326 ; CHECK-LABEL: struct_ptr_buffer_load_v2i16:
327 ; CHECK: ; %bb.0: ; %main_body
328 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen
329 ; CHECK-NEXT: s_mov_b32 m0, -1
330 ; CHECK-NEXT: s_waitcnt vmcnt(0)
331 ; CHECK-NEXT: ds_write_b32 v0, v1
332 ; CHECK-NEXT: s_endpgm
334 %val = call <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
335 store <2 x i16> %val, ptr addrspace(3) %ptr
339 define amdgpu_ps void @struct_ptr_buffer_load_v4i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
340 ; CHECK-LABEL: struct_ptr_buffer_load_v4i16:
341 ; CHECK: ; %bb.0: ; %main_body
342 ; CHECK-NEXT: buffer_load_dwordx2 v[1:2], v1, s[0:3], 0 idxen
343 ; CHECK-NEXT: s_mov_b32 m0, -1
344 ; CHECK-NEXT: s_waitcnt vmcnt(0)
345 ; CHECK-NEXT: ds_write_b64 v0, v[1:2]
346 ; CHECK-NEXT: s_endpgm
348 %val = call <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
349 store <4 x i16> %val, ptr addrspace(3) %ptr
353 declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #0
354 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
355 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
356 declare i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32, i32) #0
357 declare <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32) #0
358 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #0
359 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
360 declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #0
362 declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #0
363 declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #0
364 declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #0
366 declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32) #0
367 declare <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32, i32) #0
368 declare <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32, i32) #0
370 attributes #0 = { nounwind readonly }