1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10
3 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10
4 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10
5 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11
7 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
8 ; PREGFX10-LABEL: buffer_load:
9 ; PREGFX10: ; %bb.0: ; %main_body
10 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
11 ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
12 ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
13 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
14 ; PREGFX10-NEXT: ; return to shader part epilog
16 ; GFX10-LABEL: buffer_load:
17 ; GFX10: ; %bb.0: ; %main_body
18 ; GFX10-NEXT: s_clause 0x2
19 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
20 ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
21 ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
22 ; GFX10-NEXT: s_waitcnt vmcnt(0)
23 ; GFX10-NEXT: ; return to shader part epilog
25 ; GFX11-LABEL: buffer_load:
26 ; GFX11: ; %bb.0: ; %main_body
27 ; GFX11-NEXT: s_clause 0x2
28 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
29 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc
30 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 slc
31 ; GFX11-NEXT: s_waitcnt vmcnt(0)
32 ; GFX11-NEXT: ; return to shader part epilog
34 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0)
35 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 1)
36 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 2)
37 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
38 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
39 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
40 ret {<4 x float>, <4 x float>, <4 x float>} %r2
43 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_dlc(ptr addrspace(8) inreg) {
44 ; PREGFX10-LABEL: buffer_load_dlc:
45 ; PREGFX10: ; %bb.0: ; %main_body
46 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
47 ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
48 ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
49 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
50 ; PREGFX10-NEXT: ; return to shader part epilog
52 ; GFX10-LABEL: buffer_load_dlc:
53 ; GFX10: ; %bb.0: ; %main_body
54 ; GFX10-NEXT: s_clause 0x2
55 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 dlc
56 ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc
57 ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc dlc
58 ; GFX10-NEXT: s_waitcnt vmcnt(0)
59 ; GFX10-NEXT: ; return to shader part epilog
61 ; GFX11-LABEL: buffer_load_dlc:
62 ; GFX11: ; %bb.0: ; %main_body
63 ; GFX11-NEXT: s_clause 0x2
64 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 dlc
65 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc
66 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 slc dlc
67 ; GFX11-NEXT: s_waitcnt vmcnt(0)
68 ; GFX11-NEXT: ; return to shader part epilog
70 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 4)
71 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 5)
72 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 6)
73 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
74 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
75 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
76 ret {<4 x float>, <4 x float>, <4 x float>} %r2
79 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_volatile(ptr addrspace(8) inreg) {
80 ; PREGFX10-LABEL: buffer_load_volatile:
81 ; PREGFX10: ; %bb.0: ; %main_body
82 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
83 ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
84 ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc
85 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
86 ; PREGFX10-NEXT: ; return to shader part epilog
88 ; GFX10-LABEL: buffer_load_volatile:
89 ; GFX10: ; %bb.0: ; %main_body
90 ; GFX10-NEXT: s_clause 0x2
91 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc dlc
92 ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc
93 ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc dlc
94 ; GFX10-NEXT: s_waitcnt vmcnt(0)
95 ; GFX10-NEXT: ; return to shader part epilog
97 ; GFX11-LABEL: buffer_load_volatile:
98 ; GFX11: ; %bb.0: ; %main_body
99 ; GFX11-NEXT: s_clause 0x2
100 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc
101 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc
102 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 glc slc dlc
103 ; GFX11-NEXT: s_waitcnt vmcnt(0)
104 ; GFX11-NEXT: ; return to shader part epilog
106 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483648)
107 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483647)
108 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483646)
109 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
110 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
111 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
112 ret {<4 x float>, <4 x float>, <4 x float>} %r2
115 define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
116 ; PREGFX10-LABEL: buffer_load_immoffs:
117 ; PREGFX10: ; %bb.0: ; %main_body
118 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
119 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
120 ; PREGFX10-NEXT: ; return to shader part epilog
122 ; GFX10-LABEL: buffer_load_immoffs:
123 ; GFX10: ; %bb.0: ; %main_body
124 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
125 ; GFX10-NEXT: s_waitcnt vmcnt(0)
126 ; GFX10-NEXT: ; return to shader part epilog
128 ; GFX11-LABEL: buffer_load_immoffs:
129 ; GFX11: ; %bb.0: ; %main_body
130 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:40
131 ; GFX11-NEXT: s_waitcnt vmcnt(0)
132 ; GFX11-NEXT: ; return to shader part epilog
134 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 40, i32 0, i32 0)
135 ret <4 x float> %data
138 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
139 ; PREGFX10-LABEL: buffer_load_immoffs_large:
140 ; PREGFX10: ; %bb.0: ; %main_body
141 ; PREGFX10-NEXT: s_movk_i32 s4, 0x1ffc
142 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4
143 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
144 ; PREGFX10-NEXT: ; return to shader part epilog
146 ; GFX10-LABEL: buffer_load_immoffs_large:
147 ; GFX10: ; %bb.0: ; %main_body
148 ; GFX10-NEXT: s_movk_i32 s4, 0x1ffc
149 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4
150 ; GFX10-NEXT: s_waitcnt vmcnt(0)
151 ; GFX10-NEXT: ; return to shader part epilog
153 ; GFX11-LABEL: buffer_load_immoffs_large:
154 ; GFX11: ; %bb.0: ; %main_body
155 ; GFX11-NEXT: s_movk_i32 s4, 0x1ffc
156 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], s4 offset:4
157 ; GFX11-NEXT: s_waitcnt vmcnt(0)
158 ; GFX11-NEXT: ; return to shader part epilog
160 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4, i32 8188, i32 0)
161 ret <4 x float> %data
164 define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
165 ; PREGFX10-LABEL: buffer_load_ofs:
166 ; PREGFX10: ; %bb.0: ; %main_body
167 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
168 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
169 ; PREGFX10-NEXT: ; return to shader part epilog
171 ; GFX10-LABEL: buffer_load_ofs:
172 ; GFX10: ; %bb.0: ; %main_body
173 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
174 ; GFX10-NEXT: s_waitcnt vmcnt(0)
175 ; GFX10-NEXT: ; return to shader part epilog
177 ; GFX11-LABEL: buffer_load_ofs:
178 ; GFX11: ; %bb.0: ; %main_body
179 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
180 ; GFX11-NEXT: s_waitcnt vmcnt(0)
181 ; GFX11-NEXT: ; return to shader part epilog
183 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0)
184 ret <4 x float> %data
187 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
188 ; PREGFX10-LABEL: buffer_load_ofs_imm:
189 ; PREGFX10: ; %bb.0: ; %main_body
190 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
191 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
192 ; PREGFX10-NEXT: ; return to shader part epilog
194 ; GFX10-LABEL: buffer_load_ofs_imm:
195 ; GFX10: ; %bb.0: ; %main_body
196 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
197 ; GFX10-NEXT: s_waitcnt vmcnt(0)
198 ; GFX10-NEXT: ; return to shader part epilog
200 ; GFX11-LABEL: buffer_load_ofs_imm:
201 ; GFX11: ; %bb.0: ; %main_body
202 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
203 ; GFX11-NEXT: s_waitcnt vmcnt(0)
204 ; GFX11-NEXT: ; return to shader part epilog
206 %ofs = add i32 %1, 60
207 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs, i32 0, i32 0)
208 ret <4 x float> %data
211 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
212 ; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
213 ; PREGFX10: ; %bb.0: ; %main_body
214 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
215 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
216 ; PREGFX10-NEXT: ; return to shader part epilog
218 ; GFX10-LABEL: buffer_load_voffset_large_12bit:
219 ; GFX10: ; %bb.0: ; %main_body
220 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
221 ; GFX10-NEXT: s_waitcnt vmcnt(0)
222 ; GFX10-NEXT: ; return to shader part epilog
224 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
225 ; GFX11: ; %bb.0: ; %main_body
226 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4092
227 ; GFX11-NEXT: s_waitcnt vmcnt(0)
228 ; GFX11-NEXT: ; return to shader part epilog
230 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4092, i32 0, i32 0)
231 ret <4 x float> %data
234 define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
235 ; PREGFX10-LABEL: buffer_load_voffset_large_13bit:
236 ; PREGFX10: ; %bb.0: ; %main_body
237 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000
238 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
239 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
240 ; PREGFX10-NEXT: ; return to shader part epilog
242 ; GFX10-LABEL: buffer_load_voffset_large_13bit:
243 ; GFX10: ; %bb.0: ; %main_body
244 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000
245 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
246 ; GFX10-NEXT: s_waitcnt vmcnt(0)
247 ; GFX10-NEXT: ; return to shader part epilog
249 ; GFX11-LABEL: buffer_load_voffset_large_13bit:
250 ; GFX11: ; %bb.0: ; %main_body
251 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
252 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
253 ; GFX11-NEXT: s_waitcnt vmcnt(0)
254 ; GFX11-NEXT: ; return to shader part epilog
256 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8188, i32 0, i32 0)
257 ret <4 x float> %data
260 define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
261 ; PREGFX10-LABEL: buffer_load_voffset_large_16bit:
262 ; PREGFX10: ; %bb.0: ; %main_body
263 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000
264 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
265 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
266 ; PREGFX10-NEXT: ; return to shader part epilog
268 ; GFX10-LABEL: buffer_load_voffset_large_16bit:
269 ; GFX10: ; %bb.0: ; %main_body
270 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000
271 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
272 ; GFX10-NEXT: s_waitcnt vmcnt(0)
273 ; GFX10-NEXT: ; return to shader part epilog
275 ; GFX11-LABEL: buffer_load_voffset_large_16bit:
276 ; GFX11: ; %bb.0: ; %main_body
277 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000
278 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
279 ; GFX11-NEXT: s_waitcnt vmcnt(0)
280 ; GFX11-NEXT: ; return to shader part epilog
282 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 65532, i32 0, i32 0)
283 ret <4 x float> %data
286 define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
287 ; PREGFX10-LABEL: buffer_load_voffset_large_23bit:
288 ; PREGFX10: ; %bb.0: ; %main_body
289 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
290 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
291 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
292 ; PREGFX10-NEXT: ; return to shader part epilog
294 ; GFX10-LABEL: buffer_load_voffset_large_23bit:
295 ; GFX10: ; %bb.0: ; %main_body
296 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
297 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
298 ; GFX10-NEXT: s_waitcnt vmcnt(0)
299 ; GFX10-NEXT: ; return to shader part epilog
301 ; GFX11-LABEL: buffer_load_voffset_large_23bit:
302 ; GFX11: ; %bb.0: ; %main_body
303 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000
304 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
305 ; GFX11-NEXT: s_waitcnt vmcnt(0)
306 ; GFX11-NEXT: ; return to shader part epilog
308 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8388604, i32 0, i32 0)
309 ret <4 x float> %data
312 define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
313 ; PREGFX10-LABEL: buffer_load_voffset_large_24bit:
314 ; PREGFX10: ; %bb.0: ; %main_body
315 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
316 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
317 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
318 ; PREGFX10-NEXT: ; return to shader part epilog
320 ; GFX10-LABEL: buffer_load_voffset_large_24bit:
321 ; GFX10: ; %bb.0: ; %main_body
322 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
323 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
324 ; GFX10-NEXT: s_waitcnt vmcnt(0)
325 ; GFX10-NEXT: ; return to shader part epilog
327 ; GFX11-LABEL: buffer_load_voffset_large_24bit:
328 ; GFX11: ; %bb.0: ; %main_body
329 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000
330 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
331 ; GFX11-NEXT: s_waitcnt vmcnt(0)
332 ; GFX11-NEXT: ; return to shader part epilog
334 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 16777212, i32 0, i32 0)
335 ret <4 x float> %data
339 define amdgpu_ps float @buffer_load_x1(ptr addrspace(8) inreg %rsrc, i32 %ofs) {
340 ; PREGFX10-LABEL: buffer_load_x1:
341 ; PREGFX10: ; %bb.0: ; %main_body
342 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
343 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
344 ; PREGFX10-NEXT: ; return to shader part epilog
346 ; GFX10-LABEL: buffer_load_x1:
347 ; GFX10: ; %bb.0: ; %main_body
348 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
349 ; GFX10-NEXT: s_waitcnt vmcnt(0)
350 ; GFX10-NEXT: ; return to shader part epilog
352 ; GFX11-LABEL: buffer_load_x1:
353 ; GFX11: ; %bb.0: ; %main_body
354 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
355 ; GFX11-NEXT: s_waitcnt vmcnt(0)
356 ; GFX11-NEXT: ; return to shader part epilog
358 %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0)
362 define amdgpu_ps <2 x float> @buffer_load_x2(ptr addrspace(8) inreg %rsrc, i32 %ofs) {
363 ; PREGFX10-LABEL: buffer_load_x2:
364 ; PREGFX10: ; %bb.0: ; %main_body
365 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
366 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
367 ; PREGFX10-NEXT: ; return to shader part epilog
369 ; GFX10-LABEL: buffer_load_x2:
370 ; GFX10: ; %bb.0: ; %main_body
371 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
372 ; GFX10-NEXT: s_waitcnt vmcnt(0)
373 ; GFX10-NEXT: ; return to shader part epilog
375 ; GFX11-LABEL: buffer_load_x2:
376 ; GFX11: ; %bb.0: ; %main_body
377 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
378 ; GFX11-NEXT: s_waitcnt vmcnt(0)
379 ; GFX11-NEXT: ; return to shader part epilog
381 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0)
382 ret <2 x float> %data
385 define amdgpu_ps <4 x float> @buffer_load_negative_offset(ptr addrspace(8) inreg, i32 %ofs) {
386 ; GFX10-LABEL: buffer_load_negative_offset:
387 ; GFX10: ; %bb.0: ; %main_body
388 ; GFX10-NEXT: v_add_nc_u32_e32 v0, -16, v0
389 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
390 ; GFX10-NEXT: s_waitcnt vmcnt(0)
391 ; GFX10-NEXT: ; return to shader part epilog
393 ; GFX11-LABEL: buffer_load_negative_offset:
394 ; GFX11: ; %bb.0: ; %main_body
395 ; GFX11-NEXT: v_add_nc_u32_e32 v0, -16, v0
396 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
397 ; GFX11-NEXT: s_waitcnt vmcnt(0)
398 ; GFX11-NEXT: ; return to shader part epilog
400 %ofs.1 = add i32 %ofs, -16
401 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs.1, i32 0, i32 0)
402 ret <4 x float> %data
405 define amdgpu_ps float @buffer_load_mmo(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %lds) {
406 ; GFX10-LABEL: buffer_load_mmo:
407 ; GFX10: ; %bb.0: ; %entry
408 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
409 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
410 ; GFX10-NEXT: ds_write2_b32 v0, v2, v2 offset1:4
411 ; GFX10-NEXT: s_waitcnt vmcnt(0)
412 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
413 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
414 ; GFX10-NEXT: ; return to shader part epilog
416 ; GFX11-LABEL: buffer_load_mmo:
417 ; GFX11: ; %bb.0: ; %entry
418 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
419 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
420 ; GFX11-NEXT: ds_store_2addr_b32 v0, v2, v2 offset1:4
421 ; GFX11-NEXT: s_waitcnt vmcnt(0)
422 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
423 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
424 ; GFX11-NEXT: ; return to shader part epilog
426 store float 0.0, ptr addrspace(3) %lds
427 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
428 %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
429 store float 0.0, ptr addrspace(3) %tmp2
433 define amdgpu_ps void @buffer_load_x1_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) {
434 ; PREGFX10-LABEL: buffer_load_x1_offen_merged_and:
435 ; PREGFX10: ; %bb.0: ; %main_body
436 ; PREGFX10-NEXT: buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
437 ; PREGFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
438 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
439 ; PREGFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
440 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
441 ; PREGFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
442 ; PREGFX10-NEXT: s_endpgm
444 ; GFX10-LABEL: buffer_load_x1_offen_merged_and:
445 ; GFX10: ; %bb.0: ; %main_body
446 ; GFX10-NEXT: s_clause 0x1
447 ; GFX10-NEXT: buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
448 ; GFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
449 ; GFX10-NEXT: s_waitcnt vmcnt(1)
450 ; GFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
451 ; GFX10-NEXT: s_waitcnt vmcnt(0)
452 ; GFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
453 ; GFX10-NEXT: s_endpgm
455 ; GFX11-LABEL: buffer_load_x1_offen_merged_and:
456 ; GFX11: ; %bb.0: ; %main_body
457 ; GFX11-NEXT: s_clause 0x1
458 ; GFX11-NEXT: buffer_load_b128 v[1:4], v0, s[0:3], 0 offen offset:4
459 ; GFX11-NEXT: buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28
460 ; GFX11-NEXT: s_waitcnt vmcnt(1)
461 ; GFX11-NEXT: exp mrt0 v1, v2, v3, v4 done
462 ; GFX11-NEXT: s_waitcnt vmcnt(0)
463 ; GFX11-NEXT: exp mrt0 v5, v6, v0, v0 done
464 ; GFX11-NEXT: s_endpgm
472 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
473 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
474 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
475 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
476 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
477 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
478 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
479 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
483 define amdgpu_ps void @buffer_load_x1_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) {
484 ; PREGFX10-LABEL: buffer_load_x1_offen_merged_or:
485 ; PREGFX10: ; %bb.0: ; %main_body
486 ; PREGFX10-NEXT: v_lshlrev_b32_e32 v4, 6, v0
487 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v4, s[0:3], 0 offen offset:4
488 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], v4, s[0:3], 0 offen offset:28
489 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
490 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
491 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
492 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
493 ; PREGFX10-NEXT: s_endpgm
495 ; GFX10-LABEL: buffer_load_x1_offen_merged_or:
496 ; GFX10: ; %bb.0: ; %main_body
497 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 6, v0
498 ; GFX10-NEXT: s_clause 0x1
499 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v6, s[0:3], 0 offen offset:4
500 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], v6, s[0:3], 0 offen offset:28
501 ; GFX10-NEXT: s_waitcnt vmcnt(1)
502 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
503 ; GFX10-NEXT: s_waitcnt vmcnt(0)
504 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
505 ; GFX10-NEXT: s_endpgm
507 ; GFX11-LABEL: buffer_load_x1_offen_merged_or:
508 ; GFX11: ; %bb.0: ; %main_body
509 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 6, v0
510 ; GFX11-NEXT: s_clause 0x1
511 ; GFX11-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:4
512 ; GFX11-NEXT: buffer_load_b64 v[4:5], v4, s[0:3], 0 offen offset:28
513 ; GFX11-NEXT: s_waitcnt vmcnt(1)
514 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
515 ; GFX11-NEXT: s_waitcnt vmcnt(0)
516 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
517 ; GFX11-NEXT: s_endpgm
526 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
527 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
528 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
529 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
530 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
531 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
532 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
533 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
537 define amdgpu_ps void @buffer_load_x1_offen_merged_glc_slc(ptr addrspace(8) inreg %rsrc, i32 %a) {
538 ; PREGFX10-LABEL: buffer_load_x1_offen_merged_glc_slc:
539 ; PREGFX10: ; %bb.0: ; %main_body
540 ; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
541 ; PREGFX10-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
542 ; PREGFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
543 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
544 ; PREGFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
545 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
546 ; PREGFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
547 ; PREGFX10-NEXT: s_endpgm
549 ; GFX10-LABEL: buffer_load_x1_offen_merged_glc_slc:
550 ; GFX10: ; %bb.0: ; %main_body
551 ; GFX10-NEXT: s_clause 0x2
552 ; GFX10-NEXT: buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
553 ; GFX10-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
554 ; GFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
555 ; GFX10-NEXT: s_waitcnt vmcnt(1)
556 ; GFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
557 ; GFX10-NEXT: s_waitcnt vmcnt(0)
558 ; GFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
559 ; GFX10-NEXT: s_endpgm
561 ; GFX11-LABEL: buffer_load_x1_offen_merged_glc_slc:
562 ; GFX11: ; %bb.0: ; %main_body
563 ; GFX11-NEXT: s_clause 0x2
564 ; GFX11-NEXT: buffer_load_b64 v[1:2], v0, s[0:3], 0 offen offset:4
565 ; GFX11-NEXT: buffer_load_b64 v[3:4], v0, s[0:3], 0 offen offset:12 glc
566 ; GFX11-NEXT: buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
567 ; GFX11-NEXT: s_waitcnt vmcnt(1)
568 ; GFX11-NEXT: exp mrt0 v1, v2, v3, v4 done
569 ; GFX11-NEXT: s_waitcnt vmcnt(0)
570 ; GFX11-NEXT: exp mrt0 v5, v6, v0, v0 done
571 ; GFX11-NEXT: s_endpgm
579 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
580 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
581 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 1)
582 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 1)
583 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 3)
584 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 3)
585 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
586 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
590 define amdgpu_ps void @buffer_load_x2_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) {
591 ; PREGFX10-LABEL: buffer_load_x2_offen_merged_and:
592 ; PREGFX10: ; %bb.0: ; %main_body
593 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
594 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
595 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
596 ; PREGFX10-NEXT: s_endpgm
598 ; GFX10-LABEL: buffer_load_x2_offen_merged_and:
599 ; GFX10: ; %bb.0: ; %main_body
600 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
601 ; GFX10-NEXT: s_waitcnt vmcnt(0)
602 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
603 ; GFX10-NEXT: s_endpgm
605 ; GFX11-LABEL: buffer_load_x2_offen_merged_and:
606 ; GFX11: ; %bb.0: ; %main_body
607 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4
608 ; GFX11-NEXT: s_waitcnt vmcnt(0)
609 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
610 ; GFX11-NEXT: s_endpgm
614 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
615 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
616 %r1 = extractelement <2 x float> %vr1, i32 0
617 %r2 = extractelement <2 x float> %vr1, i32 1
618 %r3 = extractelement <2 x float> %vr2, i32 0
619 %r4 = extractelement <2 x float> %vr2, i32 1
620 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
624 define amdgpu_ps void @buffer_load_x2_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) {
625 ; PREGFX10-LABEL: buffer_load_x2_offen_merged_or:
626 ; PREGFX10: ; %bb.0: ; %main_body
627 ; PREGFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
628 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
629 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
630 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
631 ; PREGFX10-NEXT: s_endpgm
633 ; GFX10-LABEL: buffer_load_x2_offen_merged_or:
634 ; GFX10: ; %bb.0: ; %main_body
635 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
636 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
637 ; GFX10-NEXT: s_waitcnt vmcnt(0)
638 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
639 ; GFX10-NEXT: s_endpgm
641 ; GFX11-LABEL: buffer_load_x2_offen_merged_or:
642 ; GFX11: ; %bb.0: ; %main_body
643 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
644 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4
645 ; GFX11-NEXT: s_waitcnt vmcnt(0)
646 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
647 ; GFX11-NEXT: s_endpgm
652 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
653 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
654 %r1 = extractelement <2 x float> %vr1, i32 0
655 %r2 = extractelement <2 x float> %vr1, i32 1
656 %r3 = extractelement <2 x float> %vr2, i32 0
657 %r4 = extractelement <2 x float> %vr2, i32 1
658 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
662 define amdgpu_ps void @buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) {
663 ; PREGFX10-LABEL: buffer_load_x1_offset_merged:
664 ; PREGFX10: ; %bb.0: ; %main_body
665 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
666 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
667 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
668 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
669 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
670 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
671 ; PREGFX10-NEXT: s_endpgm
673 ; GFX10-LABEL: buffer_load_x1_offset_merged:
674 ; GFX10: ; %bb.0: ; %main_body
675 ; GFX10-NEXT: s_clause 0x1
676 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
677 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
678 ; GFX10-NEXT: s_waitcnt vmcnt(1)
679 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
680 ; GFX10-NEXT: s_waitcnt vmcnt(0)
681 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
682 ; GFX10-NEXT: s_endpgm
684 ; GFX11-LABEL: buffer_load_x1_offset_merged:
685 ; GFX11: ; %bb.0: ; %main_body
686 ; GFX11-NEXT: s_clause 0x1
687 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
688 ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
689 ; GFX11-NEXT: s_waitcnt vmcnt(1)
690 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
691 ; GFX11-NEXT: s_waitcnt vmcnt(0)
692 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
693 ; GFX11-NEXT: s_endpgm
695 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
696 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
697 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
698 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
699 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
700 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
701 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
702 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
706 define amdgpu_ps void @buffer_load_x2_offset_merged(ptr addrspace(8) inreg %rsrc) {
707 ; PREGFX10-LABEL: buffer_load_x2_offset_merged:
708 ; PREGFX10: ; %bb.0: ; %main_body
709 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
710 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
711 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
712 ; PREGFX10-NEXT: s_endpgm
714 ; GFX10-LABEL: buffer_load_x2_offset_merged:
715 ; GFX10: ; %bb.0: ; %main_body
716 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
717 ; GFX10-NEXT: s_waitcnt vmcnt(0)
718 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
719 ; GFX10-NEXT: s_endpgm
721 ; GFX11-LABEL: buffer_load_x2_offset_merged:
722 ; GFX11: ; %bb.0: ; %main_body
723 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
724 ; GFX11-NEXT: s_waitcnt vmcnt(0)
725 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
726 ; GFX11-NEXT: s_endpgm
728 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
729 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
730 %r1 = extractelement <2 x float> %vr1, i32 0
731 %r2 = extractelement <2 x float> %vr1, i32 1
732 %r3 = extractelement <2 x float> %vr2, i32 0
733 %r4 = extractelement <2 x float> %vr2, i32 1
734 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
738 define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(ptr addrspace(8) inreg) {
739 ; PREGFX10-LABEL: buffer_load_int:
740 ; PREGFX10: ; %bb.0: ; %main_body
741 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
742 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
743 ; PREGFX10-NEXT: buffer_load_dword v6, off, s[0:3], 0 slc
744 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
745 ; PREGFX10-NEXT: ; return to shader part epilog
747 ; GFX10-LABEL: buffer_load_int:
748 ; GFX10: ; %bb.0: ; %main_body
749 ; GFX10-NEXT: s_clause 0x2
750 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
751 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
752 ; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], 0 slc
753 ; GFX10-NEXT: s_waitcnt vmcnt(0)
754 ; GFX10-NEXT: ; return to shader part epilog
756 ; GFX11-LABEL: buffer_load_int:
757 ; GFX11: ; %bb.0: ; %main_body
758 ; GFX11-NEXT: s_clause 0x2
759 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
760 ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 glc
761 ; GFX11-NEXT: buffer_load_b32 v6, off, s[0:3], 0 slc
762 ; GFX11-NEXT: s_waitcnt vmcnt(0)
763 ; GFX11-NEXT: ; return to shader part epilog
765 %data = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0)
766 %data_glc = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %0, i32 0, i32 0, i32 1)
767 %data_slc = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %0, i32 0, i32 0, i32 2)
768 %fdata = bitcast <4 x i32> %data to <4 x float>
769 %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
770 %fdata_slc = bitcast i32 %data_slc to float
771 %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
772 %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
773 %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
774 ret {<4 x float>, <2 x float>, float} %r2
777 define amdgpu_ps float @raw_ptr_buffer_load_ubyte(ptr addrspace(8) inreg %rsrc) {
778 ; PREGFX10-LABEL: raw_ptr_buffer_load_ubyte:
779 ; PREGFX10: ; %bb.0: ; %main_body
780 ; PREGFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
781 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
782 ; PREGFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
783 ; PREGFX10-NEXT: ; return to shader part epilog
785 ; GFX10-LABEL: raw_ptr_buffer_load_ubyte:
786 ; GFX10: ; %bb.0: ; %main_body
787 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
788 ; GFX10-NEXT: s_waitcnt vmcnt(0)
789 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
790 ; GFX10-NEXT: ; return to shader part epilog
792 ; GFX11-LABEL: raw_ptr_buffer_load_ubyte:
793 ; GFX11: ; %bb.0: ; %main_body
794 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
795 ; GFX11-NEXT: s_waitcnt vmcnt(0)
796 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
797 ; GFX11-NEXT: ; return to shader part epilog
799 %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
800 %tmp2 = zext i8 %tmp to i32
801 %val = uitofp i32 %tmp2 to float
805 define amdgpu_ps float @raw_ptr_buffer_load_i16(ptr addrspace(8) inreg %rsrc) {
806 ; PREGFX10-LABEL: raw_ptr_buffer_load_i16:
807 ; PREGFX10: ; %bb.0: ; %main_body
808 ; PREGFX10-NEXT: buffer_load_ushort v0, off, s[0:3], 0
809 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
810 ; PREGFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
811 ; PREGFX10-NEXT: ; return to shader part epilog
813 ; GFX10-LABEL: raw_ptr_buffer_load_i16:
814 ; GFX10: ; %bb.0: ; %main_body
815 ; GFX10-NEXT: buffer_load_ushort v0, off, s[0:3], 0
816 ; GFX10-NEXT: s_waitcnt vmcnt(0)
817 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
818 ; GFX10-NEXT: ; return to shader part epilog
820 ; GFX11-LABEL: raw_ptr_buffer_load_i16:
821 ; GFX11: ; %bb.0: ; %main_body
822 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
823 ; GFX11-NEXT: s_waitcnt vmcnt(0)
824 ; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
825 ; GFX11-NEXT: ; return to shader part epilog
827 %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
828 %tmp2 = zext i16 %tmp to i32
829 %val = uitofp i32 %tmp2 to float
833 define amdgpu_ps float @raw_ptr_buffer_load_sbyte(ptr addrspace(8) inreg %rsrc) {
834 ; PREGFX10-LABEL: raw_ptr_buffer_load_sbyte:
835 ; PREGFX10: ; %bb.0: ; %main_body
836 ; PREGFX10-NEXT: buffer_load_sbyte v0, off, s[0:3], 0
837 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
838 ; PREGFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
839 ; PREGFX10-NEXT: ; return to shader part epilog
841 ; GFX10-LABEL: raw_ptr_buffer_load_sbyte:
842 ; GFX10: ; %bb.0: ; %main_body
843 ; GFX10-NEXT: buffer_load_sbyte v0, off, s[0:3], 0
844 ; GFX10-NEXT: s_waitcnt vmcnt(0)
845 ; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
846 ; GFX10-NEXT: ; return to shader part epilog
848 ; GFX11-LABEL: raw_ptr_buffer_load_sbyte:
849 ; GFX11: ; %bb.0: ; %main_body
850 ; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0
851 ; GFX11-NEXT: s_waitcnt vmcnt(0)
852 ; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
853 ; GFX11-NEXT: ; return to shader part epilog
855 %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
856 %tmp2 = sext i8 %tmp to i32
857 %val = sitofp i32 %tmp2 to float
861 define amdgpu_ps float @raw_ptr_buffer_load_sshort(ptr addrspace(8) inreg %rsrc) {
862 ; PREGFX10-LABEL: raw_ptr_buffer_load_sshort:
863 ; PREGFX10: ; %bb.0: ; %main_body
864 ; PREGFX10-NEXT: buffer_load_sshort v0, off, s[0:3], 0
865 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
866 ; PREGFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
867 ; PREGFX10-NEXT: ; return to shader part epilog
869 ; GFX10-LABEL: raw_ptr_buffer_load_sshort:
870 ; GFX10: ; %bb.0: ; %main_body
871 ; GFX10-NEXT: buffer_load_sshort v0, off, s[0:3], 0
872 ; GFX10-NEXT: s_waitcnt vmcnt(0)
873 ; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
874 ; GFX10-NEXT: ; return to shader part epilog
876 ; GFX11-LABEL: raw_ptr_buffer_load_sshort:
877 ; GFX11: ; %bb.0: ; %main_body
878 ; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0
879 ; GFX11-NEXT: s_waitcnt vmcnt(0)
880 ; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
881 ; GFX11-NEXT: ; return to shader part epilog
883 %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
884 %tmp2 = sext i16 %tmp to i32
885 %val = sitofp i32 %tmp2 to float
889 define amdgpu_ps void @raw_ptr_buffer_load_f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
890 ; PREGFX10-LABEL: raw_ptr_buffer_load_f16:
891 ; PREGFX10: ; %bb.0: ; %main_body
892 ; PREGFX10-NEXT: buffer_load_ushort v1, off, s[0:3], 0
893 ; PREGFX10-NEXT: s_mov_b32 m0, -1
894 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
895 ; PREGFX10-NEXT: ds_write_b16 v0, v1
896 ; PREGFX10-NEXT: s_endpgm
898 ; GFX10-LABEL: raw_ptr_buffer_load_f16:
899 ; GFX10: ; %bb.0: ; %main_body
900 ; GFX10-NEXT: buffer_load_ushort v1, off, s[0:3], 0
901 ; GFX10-NEXT: s_waitcnt vmcnt(0)
902 ; GFX10-NEXT: ds_write_b16 v0, v1
903 ; GFX10-NEXT: s_endpgm
905 ; GFX11-LABEL: raw_ptr_buffer_load_f16:
906 ; GFX11: ; %bb.0: ; %main_body
907 ; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0
908 ; GFX11-NEXT: s_waitcnt vmcnt(0)
909 ; GFX11-NEXT: ds_store_b16 v0, v1
910 ; GFX11-NEXT: s_endpgm
912 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
913 store half %val, ptr addrspace(3) %ptr
917 define amdgpu_ps void @raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
918 ; PREGFX10-LABEL: raw_ptr_buffer_load_v2f16:
919 ; PREGFX10: ; %bb.0: ; %main_body
920 ; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
921 ; PREGFX10-NEXT: s_mov_b32 m0, -1
922 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
923 ; PREGFX10-NEXT: ds_write_b32 v0, v1
924 ; PREGFX10-NEXT: s_endpgm
926 ; GFX10-LABEL: raw_ptr_buffer_load_v2f16:
927 ; GFX10: ; %bb.0: ; %main_body
928 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
929 ; GFX10-NEXT: s_waitcnt vmcnt(0)
930 ; GFX10-NEXT: ds_write_b32 v0, v1
931 ; GFX10-NEXT: s_endpgm
933 ; GFX11-LABEL: raw_ptr_buffer_load_v2f16:
934 ; GFX11: ; %bb.0: ; %main_body
935 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
936 ; GFX11-NEXT: s_waitcnt vmcnt(0)
937 ; GFX11-NEXT: ds_store_b32 v0, v1
938 ; GFX11-NEXT: s_endpgm
940 %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
941 store <2 x half> %val, ptr addrspace(3) %ptr
945 define amdgpu_ps void @raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
946 ; PREGFX10-LABEL: raw_ptr_buffer_load_v4f16:
948 ; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
949 ; PREGFX10-NEXT: s_mov_b32 m0, -1
950 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
951 ; PREGFX10-NEXT: ds_write_b64 v0, v[1:2]
952 ; PREGFX10-NEXT: s_endpgm
954 ; GFX10-LABEL: raw_ptr_buffer_load_v4f16:
956 ; GFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
957 ; GFX10-NEXT: s_waitcnt vmcnt(0)
958 ; GFX10-NEXT: ds_write_b64 v0, v[1:2]
959 ; GFX10-NEXT: s_endpgm
961 ; GFX11-LABEL: raw_ptr_buffer_load_v4f16:
963 ; GFX11-NEXT: buffer_load_b64 v[1:2], off, s[0:3], 0
964 ; GFX11-NEXT: s_waitcnt vmcnt(0)
965 ; GFX11-NEXT: ds_store_b64 v0, v[1:2]
966 ; GFX11-NEXT: s_endpgm
967 %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
968 store <4 x half> %val, ptr addrspace(3) %ptr
973 ; define amdgpu_ps void @raw_ptr_buffer_load_v6f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
974 ; %val = call <6 x half> @llvm.amdgcn.raw.ptr.buffer.load.v6f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
975 ; store <6 x half> %val, ptr addrspace(3) %ptr
979 define amdgpu_ps void @raw_ptr_buffer_load_v8f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
980 ; GFX10-LABEL: raw_ptr_buffer_load_v8f16:
982 ; GFX10-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0
983 ; GFX10-NEXT: s_waitcnt vmcnt(0)
984 ; GFX10-NEXT: ds_write_b128 v0, v[1:4]
985 ; GFX10-NEXT: s_endpgm
987 ; GFX11-LABEL: raw_ptr_buffer_load_v8f16:
989 ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0
990 ; GFX11-NEXT: s_waitcnt vmcnt(0)
991 ; GFX11-NEXT: ds_store_b128 v0, v[1:4]
992 ; GFX11-NEXT: s_endpgm
993 %val = call <8 x half> @llvm.amdgcn.raw.ptr.buffer.load.v8f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
994 store <8 x half> %val, ptr addrspace(3) %ptr
998 define amdgpu_ps void @raw_ptr_buffer_load_v2i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
999 ; PREGFX10-LABEL: raw_ptr_buffer_load_v2i16:
1000 ; PREGFX10: ; %bb.0: ; %main_body
1001 ; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
1002 ; PREGFX10-NEXT: s_mov_b32 m0, -1
1003 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1004 ; PREGFX10-NEXT: ds_write_b32 v0, v1
1005 ; PREGFX10-NEXT: s_endpgm
1007 ; GFX10-LABEL: raw_ptr_buffer_load_v2i16:
1008 ; GFX10: ; %bb.0: ; %main_body
1009 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
1010 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1011 ; GFX10-NEXT: ds_write_b32 v0, v1
1012 ; GFX10-NEXT: s_endpgm
1014 ; GFX11-LABEL: raw_ptr_buffer_load_v2i16:
1015 ; GFX11: ; %bb.0: ; %main_body
1016 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
1017 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1018 ; GFX11-NEXT: ds_store_b32 v0, v1
1019 ; GFX11-NEXT: s_endpgm
1021 %val = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1022 store <2 x i16> %val, ptr addrspace(3) %ptr
1026 define amdgpu_ps void @raw_ptr_buffer_load_v4i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1027 ; PREGFX10-LABEL: raw_ptr_buffer_load_v4i16:
1028 ; PREGFX10: ; %bb.0:
1029 ; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
1030 ; PREGFX10-NEXT: s_mov_b32 m0, -1
1031 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1032 ; PREGFX10-NEXT: ds_write_b64 v0, v[1:2]
1033 ; PREGFX10-NEXT: s_endpgm
1035 ; GFX10-LABEL: raw_ptr_buffer_load_v4i16:
1037 ; GFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
1038 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1039 ; GFX10-NEXT: ds_write_b64 v0, v[1:2]
1040 ; GFX10-NEXT: s_endpgm
1042 ; GFX11-LABEL: raw_ptr_buffer_load_v4i16:
1044 ; GFX11-NEXT: buffer_load_b64 v[1:2], off, s[0:3], 0
1045 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1046 ; GFX11-NEXT: ds_store_b64 v0, v[1:2]
1047 ; GFX11-NEXT: s_endpgm
1048 %val = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1049 store <4 x i16> %val, ptr addrspace(3) %ptr
1054 ; define amdgpu_ps void @raw_ptr_buffer_load_v6i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1055 ; %val = call <6 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v6i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1056 ; store <6 x i16> %val, ptr addrspace(3) %ptr
1060 define amdgpu_ps void @raw_ptr_buffer_load_v8i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1061 ; GFX10-LABEL: raw_ptr_buffer_load_v8i16:
1063 ; GFX10-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0
1064 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1065 ; GFX10-NEXT: ds_write_b128 v0, v[1:4]
1066 ; GFX10-NEXT: s_endpgm
1068 ; GFX11-LABEL: raw_ptr_buffer_load_v8i16:
1070 ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0
1071 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1072 ; GFX11-NEXT: ds_store_b128 v0, v[1:4]
1073 ; GFX11-NEXT: s_endpgm
1074 %val = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1075 store <8 x i16> %val, ptr addrspace(3) %ptr
1079 define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) {
1080 ; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1081 ; PREGFX10: ; %bb.0: ; %main_body
1082 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1083 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1084 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
1085 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1086 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1087 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1088 ; PREGFX10-NEXT: s_endpgm
1090 ; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1091 ; GFX10: ; %bb.0: ; %main_body
1092 ; GFX10-NEXT: s_clause 0x1
1093 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1094 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1095 ; GFX10-NEXT: s_waitcnt vmcnt(1)
1096 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1097 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1098 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1099 ; GFX10-NEXT: s_endpgm
1101 ; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1102 ; GFX11: ; %bb.0: ; %main_body
1103 ; GFX11-NEXT: s_clause 0x1
1104 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
1105 ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
1106 ; GFX11-NEXT: s_waitcnt vmcnt(1)
1107 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
1108 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1109 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1110 ; GFX11-NEXT: s_endpgm
1112 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
1113 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
1114 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
1115 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
1116 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
1117 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
1118 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
1119 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
1123 define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_swizzled_not_merged(ptr addrspace(8) inreg %rsrc) {
1124 ; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1125 ; PREGFX10: ; %bb.0: ; %main_body
1126 ; PREGFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
1127 ; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
1128 ; PREGFX10-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:12
1129 ; PREGFX10-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:16
1130 ; PREGFX10-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:28
1131 ; PREGFX10-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:32
1132 ; PREGFX10-NEXT: s_waitcnt vmcnt(2)
1133 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1134 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1135 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1136 ; PREGFX10-NEXT: s_endpgm
1138 ; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1139 ; GFX10: ; %bb.0: ; %main_body
1140 ; GFX10-NEXT: s_clause 0x5
1141 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
1142 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
1143 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:12
1144 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:16
1145 ; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:28
1146 ; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:32
1147 ; GFX10-NEXT: s_waitcnt vmcnt(2)
1148 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1149 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1150 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1151 ; GFX10-NEXT: s_endpgm
1153 ; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1154 ; GFX11: ; %bb.0: ; %main_body
1155 ; GFX11-NEXT: s_clause 0x5
1156 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 offset:4
1157 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 offset:8
1158 ; GFX11-NEXT: buffer_load_b32 v2, off, s[0:3], 0 offset:12
1159 ; GFX11-NEXT: buffer_load_b32 v3, off, s[0:3], 0 offset:16
1160 ; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 offset:28
1161 ; GFX11-NEXT: buffer_load_b32 v5, off, s[0:3], 0 offset:32
1162 ; GFX11-NEXT: s_waitcnt vmcnt(2)
1163 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
1164 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1165 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1166 ; GFX11-NEXT: s_endpgm
1168 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 8)
1169 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 8)
1170 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 8)
1171 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 8)
1172 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 8)
1173 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 8)
1174 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
1175 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
1179 define double @buffer_load_f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1180 ; PREGFX10-LABEL: buffer_load_f64__voffset_add:
1181 ; PREGFX10: ; %bb.0:
1182 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1184 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1185 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1187 ; GFX10-LABEL: buffer_load_f64__voffset_add:
1189 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1191 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1192 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1194 ; GFX11-LABEL: buffer_load_f64__voffset_add:
1196 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1197 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1198 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1199 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1200 %voffset.add = add i32 %voffset, 60
1201 %data = call double @llvm.amdgcn.raw.ptr.buffer.load.f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1205 define <2 x double> @buffer_load_v2f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1206 ; PREGFX10-LABEL: buffer_load_v2f64__voffset_add:
1207 ; PREGFX10: ; %bb.0:
1208 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1210 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1211 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1213 ; GFX10-LABEL: buffer_load_v2f64__voffset_add:
1215 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1217 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1218 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1220 ; GFX11-LABEL: buffer_load_v2f64__voffset_add:
1222 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1224 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1225 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1226 %voffset.add = add i32 %voffset, 60
1227 %data = call <2 x double> @llvm.amdgcn.raw.ptr.buffer.load.v2f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1228 ret <2 x double> %data
1231 define i64 @buffer_load_i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1232 ; PREGFX10-LABEL: buffer_load_i64__voffset_add:
1233 ; PREGFX10: ; %bb.0:
1234 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1236 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1237 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1239 ; GFX10-LABEL: buffer_load_i64__voffset_add:
1241 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1243 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1244 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1246 ; GFX11-LABEL: buffer_load_i64__voffset_add:
1248 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1250 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1251 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1252 %voffset.add = add i32 %voffset, 60
1253 %data = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1257 define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1258 ; PREGFX10-LABEL: buffer_load_v2i64__voffset_add:
1259 ; PREGFX10: ; %bb.0:
1260 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1261 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1262 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1263 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1265 ; GFX10-LABEL: buffer_load_v2i64__voffset_add:
1267 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1268 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1269 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1270 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1272 ; GFX11-LABEL: buffer_load_v2i64__voffset_add:
1274 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1276 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1277 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1278 %voffset.add = add i32 %voffset, 60
1279 %data = call <2 x i64> @llvm.amdgcn.raw.ptr.buffer.load.v2i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1283 define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1284 ; PREGFX10-LABEL: buffer_load_p0__voffset_add:
1285 ; PREGFX10: ; %bb.0:
1286 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1288 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1289 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1291 ; GFX10-LABEL: buffer_load_p0__voffset_add:
1293 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1295 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1296 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1298 ; GFX11-LABEL: buffer_load_p0__voffset_add:
1300 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1302 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1303 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1304 %voffset.add = add i32 %voffset, 60
1305 %data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1309 define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1310 ; PREGFX10-LABEL: buffer_load_v2p0__voffset_add:
1311 ; PREGFX10: ; %bb.0:
1312 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1314 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1315 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1317 ; GFX10-LABEL: buffer_load_v2p0__voffset_add:
1319 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1321 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1322 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1324 ; GFX11-LABEL: buffer_load_v2p0__voffset_add:
1326 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1327 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1328 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1329 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1330 %voffset.add = add i32 %voffset, 60
1331 %data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1335 define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1336 ; PREGFX10-LABEL: buffer_load_p1__voffset_add:
1337 ; PREGFX10: ; %bb.0:
1338 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1340 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1341 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1343 ; GFX10-LABEL: buffer_load_p1__voffset_add:
1345 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1347 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1348 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1350 ; GFX11-LABEL: buffer_load_p1__voffset_add:
1352 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1354 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1355 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1356 %voffset.add = add i32 %voffset, 60
1357 %data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1358 ret ptr addrspace(1) %data
1361 define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1362 ; PREGFX10-LABEL: buffer_load_v2p1__voffset_add:
1363 ; PREGFX10: ; %bb.0:
1364 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1366 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1367 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1369 ; GFX10-LABEL: buffer_load_v2p1__voffset_add:
1371 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1373 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1374 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1376 ; GFX11-LABEL: buffer_load_v2p1__voffset_add:
1378 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1380 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1381 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1382 %voffset.add = add i32 %voffset, 60
1383 %data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1384 ret <2 x ptr addrspace(1)> %data
1387 define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1388 ; PREGFX10-LABEL: buffer_load_p4__voffset_add:
1389 ; PREGFX10: ; %bb.0:
1390 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1391 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1392 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1393 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1395 ; GFX10-LABEL: buffer_load_p4__voffset_add:
1397 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1399 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1400 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1402 ; GFX11-LABEL: buffer_load_p4__voffset_add:
1404 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1406 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1407 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1408 %voffset.add = add i32 %voffset, 60
1409 %data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1410 ret ptr addrspace(4) %data
1413 define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1414 ; PREGFX10-LABEL: buffer_load_v2p4__voffset_add:
1415 ; PREGFX10: ; %bb.0:
1416 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1418 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1419 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1421 ; GFX10-LABEL: buffer_load_v2p4__voffset_add:
1423 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1425 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1426 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1428 ; GFX11-LABEL: buffer_load_v2p4__voffset_add:
1430 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1432 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1433 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1434 %voffset.add = add i32 %voffset, 60
1435 %data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1436 ret <2 x ptr addrspace(4)> %data
1439 define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1440 ; PREGFX10-LABEL: buffer_load_p999__voffset_add:
1441 ; PREGFX10: ; %bb.0:
1442 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1444 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1445 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1447 ; GFX10-LABEL: buffer_load_p999__voffset_add:
1449 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1451 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1452 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1454 ; GFX11-LABEL: buffer_load_p999__voffset_add:
1456 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1457 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1458 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1459 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1460 %voffset.add = add i32 %voffset, 60
1461 %data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1462 ret ptr addrspace(999) %data
1465 define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1466 ; PREGFX10-LABEL: buffer_load_v2p999__voffset_add:
1467 ; PREGFX10: ; %bb.0:
1468 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1469 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1470 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1471 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1473 ; GFX10-LABEL: buffer_load_v2p999__voffset_add:
1475 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1476 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1477 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1478 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1480 ; GFX11-LABEL: buffer_load_v2p999__voffset_add:
1482 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1484 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1485 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1486 %voffset.add = add i32 %voffset, 60
1487 %data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1488 ret <2 x ptr addrspace(999)> %data
1491 define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1492 ; PREGFX10-LABEL: buffer_load_p2__voffset_add:
1493 ; PREGFX10: ; %bb.0:
1494 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1495 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1496 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1497 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1499 ; GFX10-LABEL: buffer_load_p2__voffset_add:
1501 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502 ; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1503 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1504 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1506 ; GFX11-LABEL: buffer_load_p2__voffset_add:
1508 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1510 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1511 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1512 %voffset.add = add i32 %voffset, 60
1513 %data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1514 ret ptr addrspace(2) %data
1517 define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1518 ; PREGFX10-LABEL: buffer_load_v2p2__voffset_add:
1519 ; PREGFX10: ; %bb.0:
1520 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1521 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1522 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1523 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1525 ; GFX10-LABEL: buffer_load_v2p2__voffset_add:
1527 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1528 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1529 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1530 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1532 ; GFX11-LABEL: buffer_load_v2p2__voffset_add:
1534 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1536 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1537 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1538 %voffset.add = add i32 %voffset, 60
1539 %data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1540 ret <2 x ptr addrspace(2)> %data
1543 define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1544 ; GFX10-LABEL: buffer_load_v3p2__voffset_add:
1546 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1548 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1549 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1551 ; GFX11-LABEL: buffer_load_v3p2__voffset_add:
1553 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1555 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1556 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1557 %voffset.add = add i32 %voffset, 60
1558 %data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1559 ret <3 x ptr addrspace(2)> %data
1562 define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1563 ; PREGFX10-LABEL: buffer_load_v4p2__voffset_add:
1564 ; PREGFX10: ; %bb.0:
1565 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1567 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1568 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1570 ; GFX10-LABEL: buffer_load_v4p2__voffset_add:
1572 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1574 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1575 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1577 ; GFX11-LABEL: buffer_load_v4p2__voffset_add:
1579 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1581 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1582 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1583 %voffset.add = add i32 %voffset, 60
1584 %data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1585 ret <4 x ptr addrspace(2)> %data
1588 define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1589 ; PREGFX10-LABEL: buffer_load_p3__voffset_add:
1590 ; PREGFX10: ; %bb.0:
1591 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1593 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1594 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1596 ; GFX10-LABEL: buffer_load_p3__voffset_add:
1598 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599 ; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1600 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1601 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1603 ; GFX11-LABEL: buffer_load_p3__voffset_add:
1605 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1607 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1608 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1609 %voffset.add = add i32 %voffset, 60
1610 %data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1611 ret ptr addrspace(3) %data
1614 define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1615 ; PREGFX10-LABEL: buffer_load_v2p3__voffset_add:
1616 ; PREGFX10: ; %bb.0:
1617 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1619 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1620 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1622 ; GFX10-LABEL: buffer_load_v2p3__voffset_add:
1624 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1626 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1627 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX11-LABEL: buffer_load_v2p3__voffset_add:
1631 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1633 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1635 %voffset.add = add i32 %voffset, 60
1636 %data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1637 ret <2 x ptr addrspace(3)> %data
1640 define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1641 ; GFX10-LABEL: buffer_load_v3p3__voffset_add:
1643 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1645 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1646 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1648 ; GFX11-LABEL: buffer_load_v3p3__voffset_add:
1650 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1652 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1653 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1654 %voffset.add = add i32 %voffset, 60
1655 %data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1656 ret <3 x ptr addrspace(3)> %data
1659 define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1660 ; PREGFX10-LABEL: buffer_load_v4p3__voffset_add:
1661 ; PREGFX10: ; %bb.0:
1662 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1664 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1665 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1667 ; GFX10-LABEL: buffer_load_v4p3__voffset_add:
1669 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1671 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1672 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1674 ; GFX11-LABEL: buffer_load_v4p3__voffset_add:
1676 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1678 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1679 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1680 %voffset.add = add i32 %voffset, 60
1681 %data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1682 ret <4 x ptr addrspace(3)> %data
1685 define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1686 ; PREGFX10-LABEL: buffer_load_p5__voffset_add:
1687 ; PREGFX10: ; %bb.0:
1688 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1690 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1691 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1693 ; GFX10-LABEL: buffer_load_p5__voffset_add:
1695 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696 ; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1697 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1698 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1700 ; GFX11-LABEL: buffer_load_p5__voffset_add:
1702 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1703 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1704 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1705 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1706 %voffset.add = add i32 %voffset, 60
1707 %data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1708 ret ptr addrspace(5) %data
1711 define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1712 ; PREGFX10-LABEL: buffer_load_v2p5__voffset_add:
1713 ; PREGFX10: ; %bb.0:
1714 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1716 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1717 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1719 ; GFX10-LABEL: buffer_load_v2p5__voffset_add:
1721 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1723 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1724 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1726 ; GFX11-LABEL: buffer_load_v2p5__voffset_add:
1728 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1730 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1731 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1732 %voffset.add = add i32 %voffset, 60
1733 %data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1734 ret <2 x ptr addrspace(5)> %data
1737 define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1738 ; GFX10-LABEL: buffer_load_v3p5__voffset_add:
1740 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1742 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1743 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1745 ; GFX11-LABEL: buffer_load_v3p5__voffset_add:
1747 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1749 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1750 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1751 %voffset.add = add i32 %voffset, 60
1752 %data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1753 ret <3 x ptr addrspace(5)> %data
1756 define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1757 ; PREGFX10-LABEL: buffer_load_v4p5__voffset_add:
1758 ; PREGFX10: ; %bb.0:
1759 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1761 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1762 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1764 ; GFX10-LABEL: buffer_load_v4p5__voffset_add:
1766 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1767 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1768 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1769 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1771 ; GFX11-LABEL: buffer_load_v4p5__voffset_add:
1773 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1775 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1776 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1777 %voffset.add = add i32 %voffset, 60
1778 %data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1779 ret <4 x ptr addrspace(5)> %data
1782 define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1783 ; PREGFX10-LABEL: buffer_load_p6__voffset_add:
1784 ; PREGFX10: ; %bb.0:
1785 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1787 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1788 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1790 ; GFX10-LABEL: buffer_load_p6__voffset_add:
1792 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793 ; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1794 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1795 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1797 ; GFX11-LABEL: buffer_load_p6__voffset_add:
1799 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1800 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1801 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1802 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1803 %voffset.add = add i32 %voffset, 60
1804 %data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1805 ret ptr addrspace(6) %data
1808 define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1809 ; PREGFX10-LABEL: buffer_load_v2p6__voffset_add:
1810 ; PREGFX10: ; %bb.0:
1811 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1813 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1814 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1816 ; GFX10-LABEL: buffer_load_v2p6__voffset_add:
1818 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1819 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1820 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1821 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1823 ; GFX11-LABEL: buffer_load_v2p6__voffset_add:
1825 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1827 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1828 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1829 %voffset.add = add i32 %voffset, 60
1830 %data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1831 ret <2 x ptr addrspace(6)> %data
1834 define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1835 ; GFX10-LABEL: buffer_load_v3p6__voffset_add:
1837 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1839 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1840 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1842 ; GFX11-LABEL: buffer_load_v3p6__voffset_add:
1844 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1845 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1846 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1847 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1848 %voffset.add = add i32 %voffset, 60
1849 %data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1850 ret <3 x ptr addrspace(6)> %data
1853 define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1854 ; PREGFX10-LABEL: buffer_load_v4p6__voffset_add:
1855 ; PREGFX10: ; %bb.0:
1856 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1857 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1858 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1859 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1861 ; GFX10-LABEL: buffer_load_v4p6__voffset_add:
1863 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1865 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1866 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1868 ; GFX11-LABEL: buffer_load_v4p6__voffset_add:
1870 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1871 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1872 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1873 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1874 %voffset.add = add i32 %voffset, 60
1875 %data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1876 ret <4 x ptr addrspace(6)> %data
1879 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
1880 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
1881 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0
1882 declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) #0
1883 declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) #0
1884 declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #0
1885 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
1886 declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #0
1887 declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) #0
1888 declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) #0
1889 declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) #0
1890 declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #0
1891 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #0
1892 declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #0
1893 attributes #0 = { nounwind readonly }