1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10
3 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10
4 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10
5 ;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11
7 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
8 ; PREGFX10-LABEL: buffer_load:
9 ; PREGFX10: ; %bb.0: ; %main_body
10 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
11 ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
12 ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
13 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
14 ; PREGFX10-NEXT: ; return to shader part epilog
16 ; GFX10-LABEL: buffer_load:
17 ; GFX10: ; %bb.0: ; %main_body
18 ; GFX10-NEXT: s_clause 0x2
19 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
20 ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
21 ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
22 ; GFX10-NEXT: s_waitcnt vmcnt(0)
23 ; GFX10-NEXT: ; return to shader part epilog
25 ; GFX11-LABEL: buffer_load:
26 ; GFX11: ; %bb.0: ; %main_body
27 ; GFX11-NEXT: s_clause 0x2
28 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
29 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc
30 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 slc
31 ; GFX11-NEXT: s_waitcnt vmcnt(0)
32 ; GFX11-NEXT: ; return to shader part epilog
34 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0)
35 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 1)
36 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 2)
37 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
38 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
39 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
40 ret {<4 x float>, <4 x float>, <4 x float>} %r2
43 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_dlc(ptr addrspace(8) inreg) {
44 ; PREGFX10-LABEL: buffer_load_dlc:
45 ; PREGFX10: ; %bb.0: ; %main_body
46 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
47 ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
48 ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
49 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
50 ; PREGFX10-NEXT: ; return to shader part epilog
52 ; GFX10-LABEL: buffer_load_dlc:
53 ; GFX10: ; %bb.0: ; %main_body
54 ; GFX10-NEXT: s_clause 0x2
55 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 dlc
56 ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc
57 ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc dlc
58 ; GFX10-NEXT: s_waitcnt vmcnt(0)
59 ; GFX10-NEXT: ; return to shader part epilog
61 ; GFX11-LABEL: buffer_load_dlc:
62 ; GFX11: ; %bb.0: ; %main_body
63 ; GFX11-NEXT: s_clause 0x2
64 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 dlc
65 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc
66 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 slc dlc
67 ; GFX11-NEXT: s_waitcnt vmcnt(0)
68 ; GFX11-NEXT: ; return to shader part epilog
70 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 4)
71 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 5)
72 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 6)
73 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
74 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
75 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
76 ret {<4 x float>, <4 x float>, <4 x float>} %r2
79 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_volatile(ptr addrspace(8) inreg) {
80 ; PREGFX10-LABEL: buffer_load_volatile:
81 ; PREGFX10: ; %bb.0: ; %main_body
82 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
83 ; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
84 ; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc
85 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
86 ; PREGFX10-NEXT: ; return to shader part epilog
88 ; GFX10-LABEL: buffer_load_volatile:
89 ; GFX10: ; %bb.0: ; %main_body
90 ; GFX10-NEXT: s_clause 0x2
91 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc dlc
92 ; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc
93 ; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc dlc
94 ; GFX10-NEXT: s_waitcnt vmcnt(0)
95 ; GFX10-NEXT: ; return to shader part epilog
97 ; GFX11-LABEL: buffer_load_volatile:
98 ; GFX11: ; %bb.0: ; %main_body
99 ; GFX11-NEXT: s_clause 0x2
100 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc
101 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc
102 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 glc slc dlc
103 ; GFX11-NEXT: s_waitcnt vmcnt(0)
104 ; GFX11-NEXT: ; return to shader part epilog
106 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483648)
107 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483647)
108 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483646)
109 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
110 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
111 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
112 ret {<4 x float>, <4 x float>, <4 x float>} %r2
115 define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
116 ; PREGFX10-LABEL: buffer_load_immoffs:
117 ; PREGFX10: ; %bb.0: ; %main_body
118 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
119 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
120 ; PREGFX10-NEXT: ; return to shader part epilog
122 ; GFX10-LABEL: buffer_load_immoffs:
123 ; GFX10: ; %bb.0: ; %main_body
124 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
125 ; GFX10-NEXT: s_waitcnt vmcnt(0)
126 ; GFX10-NEXT: ; return to shader part epilog
128 ; GFX11-LABEL: buffer_load_immoffs:
129 ; GFX11: ; %bb.0: ; %main_body
130 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:40
131 ; GFX11-NEXT: s_waitcnt vmcnt(0)
132 ; GFX11-NEXT: ; return to shader part epilog
134 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 40, i32 0, i32 0)
135 ret <4 x float> %data
138 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
139 ; PREGFX10-LABEL: buffer_load_immoffs_large:
140 ; PREGFX10: ; %bb.0: ; %main_body
141 ; PREGFX10-NEXT: s_movk_i32 s4, 0x1ffc
142 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4
143 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
144 ; PREGFX10-NEXT: ; return to shader part epilog
146 ; GFX10-LABEL: buffer_load_immoffs_large:
147 ; GFX10: ; %bb.0: ; %main_body
148 ; GFX10-NEXT: s_movk_i32 s4, 0x1ffc
149 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4
150 ; GFX10-NEXT: s_waitcnt vmcnt(0)
151 ; GFX10-NEXT: ; return to shader part epilog
153 ; GFX11-LABEL: buffer_load_immoffs_large:
154 ; GFX11: ; %bb.0: ; %main_body
155 ; GFX11-NEXT: s_movk_i32 s4, 0x1ffc
156 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], s4 offset:4
157 ; GFX11-NEXT: s_waitcnt vmcnt(0)
158 ; GFX11-NEXT: ; return to shader part epilog
160 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4, i32 8188, i32 0)
161 ret <4 x float> %data
164 define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
165 ; PREGFX10-LABEL: buffer_load_ofs:
166 ; PREGFX10: ; %bb.0: ; %main_body
167 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
168 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
169 ; PREGFX10-NEXT: ; return to shader part epilog
171 ; GFX10-LABEL: buffer_load_ofs:
172 ; GFX10: ; %bb.0: ; %main_body
173 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
174 ; GFX10-NEXT: s_waitcnt vmcnt(0)
175 ; GFX10-NEXT: ; return to shader part epilog
177 ; GFX11-LABEL: buffer_load_ofs:
178 ; GFX11: ; %bb.0: ; %main_body
179 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
180 ; GFX11-NEXT: s_waitcnt vmcnt(0)
181 ; GFX11-NEXT: ; return to shader part epilog
183 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0)
184 ret <4 x float> %data
187 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
188 ; PREGFX10-LABEL: buffer_load_ofs_imm:
189 ; PREGFX10: ; %bb.0: ; %main_body
190 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
191 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
192 ; PREGFX10-NEXT: ; return to shader part epilog
194 ; GFX10-LABEL: buffer_load_ofs_imm:
195 ; GFX10: ; %bb.0: ; %main_body
196 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
197 ; GFX10-NEXT: s_waitcnt vmcnt(0)
198 ; GFX10-NEXT: ; return to shader part epilog
200 ; GFX11-LABEL: buffer_load_ofs_imm:
201 ; GFX11: ; %bb.0: ; %main_body
202 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
203 ; GFX11-NEXT: s_waitcnt vmcnt(0)
204 ; GFX11-NEXT: ; return to shader part epilog
206 %ofs = add i32 %1, 60
207 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs, i32 0, i32 0)
208 ret <4 x float> %data
211 define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
212 ; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
213 ; PREGFX10: ; %bb.0: ; %main_body
214 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
215 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
216 ; PREGFX10-NEXT: ; return to shader part epilog
218 ; GFX10-LABEL: buffer_load_voffset_large_12bit:
219 ; GFX10: ; %bb.0: ; %main_body
220 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
221 ; GFX10-NEXT: s_waitcnt vmcnt(0)
222 ; GFX10-NEXT: ; return to shader part epilog
224 ; GFX11-LABEL: buffer_load_voffset_large_12bit:
225 ; GFX11: ; %bb.0: ; %main_body
226 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4092
227 ; GFX11-NEXT: s_waitcnt vmcnt(0)
228 ; GFX11-NEXT: ; return to shader part epilog
230 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4092, i32 0, i32 0)
231 ret <4 x float> %data
234 define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
235 ; PREGFX10-LABEL: buffer_load_voffset_large_13bit:
236 ; PREGFX10: ; %bb.0: ; %main_body
237 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000
238 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
239 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
240 ; PREGFX10-NEXT: ; return to shader part epilog
242 ; GFX10-LABEL: buffer_load_voffset_large_13bit:
243 ; GFX10: ; %bb.0: ; %main_body
244 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000
245 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
246 ; GFX10-NEXT: s_waitcnt vmcnt(0)
247 ; GFX10-NEXT: ; return to shader part epilog
249 ; GFX11-LABEL: buffer_load_voffset_large_13bit:
250 ; GFX11: ; %bb.0: ; %main_body
251 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
252 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
253 ; GFX11-NEXT: s_waitcnt vmcnt(0)
254 ; GFX11-NEXT: ; return to shader part epilog
256 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8188, i32 0, i32 0)
257 ret <4 x float> %data
260 define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
261 ; PREGFX10-LABEL: buffer_load_voffset_large_16bit:
262 ; PREGFX10: ; %bb.0: ; %main_body
263 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000
264 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
265 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
266 ; PREGFX10-NEXT: ; return to shader part epilog
268 ; GFX10-LABEL: buffer_load_voffset_large_16bit:
269 ; GFX10: ; %bb.0: ; %main_body
270 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000
271 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
272 ; GFX10-NEXT: s_waitcnt vmcnt(0)
273 ; GFX10-NEXT: ; return to shader part epilog
275 ; GFX11-LABEL: buffer_load_voffset_large_16bit:
276 ; GFX11: ; %bb.0: ; %main_body
277 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000
278 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
279 ; GFX11-NEXT: s_waitcnt vmcnt(0)
280 ; GFX11-NEXT: ; return to shader part epilog
282 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 65532, i32 0, i32 0)
283 ret <4 x float> %data
286 define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
287 ; PREGFX10-LABEL: buffer_load_voffset_large_23bit:
288 ; PREGFX10: ; %bb.0: ; %main_body
289 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
290 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
291 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
292 ; PREGFX10-NEXT: ; return to shader part epilog
294 ; GFX10-LABEL: buffer_load_voffset_large_23bit:
295 ; GFX10: ; %bb.0: ; %main_body
296 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
297 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
298 ; GFX10-NEXT: s_waitcnt vmcnt(0)
299 ; GFX10-NEXT: ; return to shader part epilog
301 ; GFX11-LABEL: buffer_load_voffset_large_23bit:
302 ; GFX11: ; %bb.0: ; %main_body
303 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000
304 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
305 ; GFX11-NEXT: s_waitcnt vmcnt(0)
306 ; GFX11-NEXT: ; return to shader part epilog
308 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8388604, i32 0, i32 0)
309 ret <4 x float> %data
312 define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
313 ; PREGFX10-LABEL: buffer_load_voffset_large_24bit:
314 ; PREGFX10: ; %bb.0: ; %main_body
315 ; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
316 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
317 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
318 ; PREGFX10-NEXT: ; return to shader part epilog
320 ; GFX10-LABEL: buffer_load_voffset_large_24bit:
321 ; GFX10: ; %bb.0: ; %main_body
322 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
323 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
324 ; GFX10-NEXT: s_waitcnt vmcnt(0)
325 ; GFX10-NEXT: ; return to shader part epilog
327 ; GFX11-LABEL: buffer_load_voffset_large_24bit:
328 ; GFX11: ; %bb.0: ; %main_body
329 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000
330 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
331 ; GFX11-NEXT: s_waitcnt vmcnt(0)
332 ; GFX11-NEXT: ; return to shader part epilog
334 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 16777212, i32 0, i32 0)
335 ret <4 x float> %data
339 define amdgpu_ps float @buffer_load_x1(ptr addrspace(8) inreg %rsrc, i32 %ofs) {
340 ; PREGFX10-LABEL: buffer_load_x1:
341 ; PREGFX10: ; %bb.0: ; %main_body
342 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
343 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
344 ; PREGFX10-NEXT: ; return to shader part epilog
346 ; GFX10-LABEL: buffer_load_x1:
347 ; GFX10: ; %bb.0: ; %main_body
348 ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
349 ; GFX10-NEXT: s_waitcnt vmcnt(0)
350 ; GFX10-NEXT: ; return to shader part epilog
352 ; GFX11-LABEL: buffer_load_x1:
353 ; GFX11: ; %bb.0: ; %main_body
354 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
355 ; GFX11-NEXT: s_waitcnt vmcnt(0)
356 ; GFX11-NEXT: ; return to shader part epilog
358 %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0)
362 define amdgpu_ps <2 x float> @buffer_load_x2(ptr addrspace(8) inreg %rsrc, i32 %ofs) {
363 ; PREGFX10-LABEL: buffer_load_x2:
364 ; PREGFX10: ; %bb.0: ; %main_body
365 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
366 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
367 ; PREGFX10-NEXT: ; return to shader part epilog
369 ; GFX10-LABEL: buffer_load_x2:
370 ; GFX10: ; %bb.0: ; %main_body
371 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
372 ; GFX10-NEXT: s_waitcnt vmcnt(0)
373 ; GFX10-NEXT: ; return to shader part epilog
375 ; GFX11-LABEL: buffer_load_x2:
376 ; GFX11: ; %bb.0: ; %main_body
377 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
378 ; GFX11-NEXT: s_waitcnt vmcnt(0)
379 ; GFX11-NEXT: ; return to shader part epilog
381 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0)
382 ret <2 x float> %data
385 define amdgpu_ps <4 x float> @buffer_load_negative_offset(ptr addrspace(8) inreg, i32 %ofs) {
386 ; GFX10-LABEL: buffer_load_negative_offset:
387 ; GFX10: ; %bb.0: ; %main_body
388 ; GFX10-NEXT: v_add_nc_u32_e32 v0, -16, v0
389 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
390 ; GFX10-NEXT: s_waitcnt vmcnt(0)
391 ; GFX10-NEXT: ; return to shader part epilog
393 ; GFX11-LABEL: buffer_load_negative_offset:
394 ; GFX11: ; %bb.0: ; %main_body
395 ; GFX11-NEXT: v_add_nc_u32_e32 v0, -16, v0
396 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
397 ; GFX11-NEXT: s_waitcnt vmcnt(0)
398 ; GFX11-NEXT: ; return to shader part epilog
400 %ofs.1 = add i32 %ofs, -16
401 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs.1, i32 0, i32 0)
402 ret <4 x float> %data
405 define amdgpu_ps float @buffer_load_mmo(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %lds) {
406 ; GFX10-LABEL: buffer_load_mmo:
407 ; GFX10: ; %bb.0: ; %entry
408 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
409 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
410 ; GFX10-NEXT: ds_write2_b32 v0, v2, v2 offset1:4
411 ; GFX10-NEXT: s_waitcnt vmcnt(0)
412 ; GFX10-NEXT: v_mov_b32_e32 v0, v1
413 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
414 ; GFX10-NEXT: ; return to shader part epilog
416 ; GFX11-LABEL: buffer_load_mmo:
417 ; GFX11: ; %bb.0: ; %entry
418 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
419 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
420 ; GFX11-NEXT: ds_store_2addr_b32 v0, v2, v2 offset1:4
421 ; GFX11-NEXT: s_waitcnt vmcnt(0)
422 ; GFX11-NEXT: v_mov_b32_e32 v0, v1
423 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
424 ; GFX11-NEXT: ; return to shader part epilog
426 store float 0.0, ptr addrspace(3) %lds
427 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
428 %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
429 store float 0.0, ptr addrspace(3) %tmp2
433 define amdgpu_ps void @buffer_load_x1_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) {
434 ; PREGFX10-LABEL: buffer_load_x1_offen_merged_and:
435 ; PREGFX10: ; %bb.0: ; %main_body
436 ; PREGFX10-NEXT: buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
437 ; PREGFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
438 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
439 ; PREGFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
440 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
441 ; PREGFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
442 ; PREGFX10-NEXT: s_endpgm
444 ; GFX10-LABEL: buffer_load_x1_offen_merged_and:
445 ; GFX10: ; %bb.0: ; %main_body
446 ; GFX10-NEXT: s_clause 0x1
447 ; GFX10-NEXT: buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
448 ; GFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
449 ; GFX10-NEXT: s_waitcnt vmcnt(1)
450 ; GFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
451 ; GFX10-NEXT: s_waitcnt vmcnt(0)
452 ; GFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
453 ; GFX10-NEXT: s_endpgm
455 ; GFX11-LABEL: buffer_load_x1_offen_merged_and:
456 ; GFX11: ; %bb.0: ; %main_body
457 ; GFX11-NEXT: s_clause 0x1
458 ; GFX11-NEXT: buffer_load_b128 v[1:4], v0, s[0:3], 0 offen offset:4
459 ; GFX11-NEXT: buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28
460 ; GFX11-NEXT: s_waitcnt vmcnt(1)
461 ; GFX11-NEXT: exp mrt0 v1, v2, v3, v4 done
462 ; GFX11-NEXT: s_waitcnt vmcnt(0)
463 ; GFX11-NEXT: exp mrt0 v5, v6, v0, v0 done
464 ; GFX11-NEXT: s_endpgm
472 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
473 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
474 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
475 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
476 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
477 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
478 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
479 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
483 define amdgpu_ps void @buffer_load_x1_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) {
484 ; PREGFX10-LABEL: buffer_load_x1_offen_merged_or:
485 ; PREGFX10: ; %bb.0: ; %main_body
486 ; PREGFX10-NEXT: v_lshlrev_b32_e32 v4, 6, v0
487 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v4, s[0:3], 0 offen offset:4
488 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], v4, s[0:3], 0 offen offset:28
489 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
490 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
491 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
492 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
493 ; PREGFX10-NEXT: s_endpgm
495 ; GFX10-LABEL: buffer_load_x1_offen_merged_or:
496 ; GFX10: ; %bb.0: ; %main_body
497 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 6, v0
498 ; GFX10-NEXT: s_clause 0x1
499 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v6, s[0:3], 0 offen offset:4
500 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], v6, s[0:3], 0 offen offset:28
501 ; GFX10-NEXT: s_waitcnt vmcnt(1)
502 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
503 ; GFX10-NEXT: s_waitcnt vmcnt(0)
504 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
505 ; GFX10-NEXT: s_endpgm
507 ; GFX11-LABEL: buffer_load_x1_offen_merged_or:
508 ; GFX11: ; %bb.0: ; %main_body
509 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 6, v0
510 ; GFX11-NEXT: s_clause 0x1
511 ; GFX11-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:4
512 ; GFX11-NEXT: buffer_load_b64 v[4:5], v4, s[0:3], 0 offen offset:28
513 ; GFX11-NEXT: s_waitcnt vmcnt(1)
514 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
515 ; GFX11-NEXT: s_waitcnt vmcnt(0)
516 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
517 ; GFX11-NEXT: s_endpgm
526 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
527 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
528 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
529 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
530 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
531 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
532 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
533 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
537 define amdgpu_ps void @buffer_load_x1_offen_merged_glc_slc(ptr addrspace(8) inreg %rsrc, i32 %a) {
538 ; PREGFX10-LABEL: buffer_load_x1_offen_merged_glc_slc:
539 ; PREGFX10: ; %bb.0: ; %main_body
540 ; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
541 ; PREGFX10-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
542 ; PREGFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
543 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
544 ; PREGFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
545 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
546 ; PREGFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
547 ; PREGFX10-NEXT: s_endpgm
549 ; GFX10-LABEL: buffer_load_x1_offen_merged_glc_slc:
550 ; GFX10: ; %bb.0: ; %main_body
551 ; GFX10-NEXT: s_clause 0x2
552 ; GFX10-NEXT: buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
553 ; GFX10-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
554 ; GFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
555 ; GFX10-NEXT: s_waitcnt vmcnt(1)
556 ; GFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm
557 ; GFX10-NEXT: s_waitcnt vmcnt(0)
558 ; GFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm
559 ; GFX10-NEXT: s_endpgm
561 ; GFX11-LABEL: buffer_load_x1_offen_merged_glc_slc:
562 ; GFX11: ; %bb.0: ; %main_body
563 ; GFX11-NEXT: s_clause 0x2
564 ; GFX11-NEXT: buffer_load_b64 v[1:2], v0, s[0:3], 0 offen offset:4
565 ; GFX11-NEXT: buffer_load_b64 v[3:4], v0, s[0:3], 0 offen offset:12 glc
566 ; GFX11-NEXT: buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
567 ; GFX11-NEXT: s_waitcnt vmcnt(1)
568 ; GFX11-NEXT: exp mrt0 v1, v2, v3, v4 done
569 ; GFX11-NEXT: s_waitcnt vmcnt(0)
570 ; GFX11-NEXT: exp mrt0 v5, v6, v0, v0 done
571 ; GFX11-NEXT: s_endpgm
579 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
580 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
581 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 1)
582 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 1)
583 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 3)
584 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 3)
585 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
586 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
590 define amdgpu_ps void @buffer_load_x2_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) {
591 ; PREGFX10-LABEL: buffer_load_x2_offen_merged_and:
592 ; PREGFX10: ; %bb.0: ; %main_body
593 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
594 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
595 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
596 ; PREGFX10-NEXT: s_endpgm
598 ; GFX10-LABEL: buffer_load_x2_offen_merged_and:
599 ; GFX10: ; %bb.0: ; %main_body
600 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
601 ; GFX10-NEXT: s_waitcnt vmcnt(0)
602 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
603 ; GFX10-NEXT: s_endpgm
605 ; GFX11-LABEL: buffer_load_x2_offen_merged_and:
606 ; GFX11: ; %bb.0: ; %main_body
607 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4
608 ; GFX11-NEXT: s_waitcnt vmcnt(0)
609 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
610 ; GFX11-NEXT: s_endpgm
614 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
615 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
616 %r1 = extractelement <2 x float> %vr1, i32 0
617 %r2 = extractelement <2 x float> %vr1, i32 1
618 %r3 = extractelement <2 x float> %vr2, i32 0
619 %r4 = extractelement <2 x float> %vr2, i32 1
620 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
624 define amdgpu_ps void @buffer_load_x2_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) {
625 ; PREGFX10-LABEL: buffer_load_x2_offen_merged_or:
626 ; PREGFX10: ; %bb.0: ; %main_body
627 ; PREGFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
628 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
629 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
630 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
631 ; PREGFX10-NEXT: s_endpgm
633 ; GFX10-LABEL: buffer_load_x2_offen_merged_or:
634 ; GFX10: ; %bb.0: ; %main_body
635 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
636 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
637 ; GFX10-NEXT: s_waitcnt vmcnt(0)
638 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
639 ; GFX10-NEXT: s_endpgm
641 ; GFX11-LABEL: buffer_load_x2_offen_merged_or:
642 ; GFX11: ; %bb.0: ; %main_body
643 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
644 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4
645 ; GFX11-NEXT: s_waitcnt vmcnt(0)
646 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
647 ; GFX11-NEXT: s_endpgm
652 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
653 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
654 %r1 = extractelement <2 x float> %vr1, i32 0
655 %r2 = extractelement <2 x float> %vr1, i32 1
656 %r3 = extractelement <2 x float> %vr2, i32 0
657 %r4 = extractelement <2 x float> %vr2, i32 1
658 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
662 define amdgpu_ps void @buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) {
663 ; PREGFX10-LABEL: buffer_load_x1_offset_merged:
664 ; PREGFX10: ; %bb.0: ; %main_body
665 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
666 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
667 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
668 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
669 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
670 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
671 ; PREGFX10-NEXT: s_endpgm
673 ; GFX10-LABEL: buffer_load_x1_offset_merged:
674 ; GFX10: ; %bb.0: ; %main_body
675 ; GFX10-NEXT: s_clause 0x1
676 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
677 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
678 ; GFX10-NEXT: s_waitcnt vmcnt(1)
679 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
680 ; GFX10-NEXT: s_waitcnt vmcnt(0)
681 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
682 ; GFX10-NEXT: s_endpgm
684 ; GFX11-LABEL: buffer_load_x1_offset_merged:
685 ; GFX11: ; %bb.0: ; %main_body
686 ; GFX11-NEXT: s_clause 0x1
687 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
688 ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
689 ; GFX11-NEXT: s_waitcnt vmcnt(1)
690 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
691 ; GFX11-NEXT: s_waitcnt vmcnt(0)
692 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
693 ; GFX11-NEXT: s_endpgm
695 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
696 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
697 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
698 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
699 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
700 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
701 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
702 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
706 define amdgpu_ps void @buffer_load_x2_offset_merged(ptr addrspace(8) inreg %rsrc) {
707 ; PREGFX10-LABEL: buffer_load_x2_offset_merged:
708 ; PREGFX10: ; %bb.0: ; %main_body
709 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
710 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
711 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
712 ; PREGFX10-NEXT: s_endpgm
714 ; GFX10-LABEL: buffer_load_x2_offset_merged:
715 ; GFX10: ; %bb.0: ; %main_body
716 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
717 ; GFX10-NEXT: s_waitcnt vmcnt(0)
718 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
719 ; GFX10-NEXT: s_endpgm
721 ; GFX11-LABEL: buffer_load_x2_offset_merged:
722 ; GFX11: ; %bb.0: ; %main_body
723 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
724 ; GFX11-NEXT: s_waitcnt vmcnt(0)
725 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
726 ; GFX11-NEXT: s_endpgm
728 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
729 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
730 %r1 = extractelement <2 x float> %vr1, i32 0
731 %r2 = extractelement <2 x float> %vr1, i32 1
732 %r3 = extractelement <2 x float> %vr2, i32 0
733 %r4 = extractelement <2 x float> %vr2, i32 1
734 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
738 define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(ptr addrspace(8) inreg) {
739 ; PREGFX10-LABEL: buffer_load_int:
740 ; PREGFX10: ; %bb.0: ; %main_body
741 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
742 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
743 ; PREGFX10-NEXT: buffer_load_dword v6, off, s[0:3], 0 slc
744 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
745 ; PREGFX10-NEXT: ; return to shader part epilog
747 ; GFX10-LABEL: buffer_load_int:
748 ; GFX10: ; %bb.0: ; %main_body
749 ; GFX10-NEXT: s_clause 0x2
750 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
751 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
752 ; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], 0 slc
753 ; GFX10-NEXT: s_waitcnt vmcnt(0)
754 ; GFX10-NEXT: ; return to shader part epilog
756 ; GFX11-LABEL: buffer_load_int:
757 ; GFX11: ; %bb.0: ; %main_body
758 ; GFX11-NEXT: s_clause 0x2
759 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
760 ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 glc
761 ; GFX11-NEXT: buffer_load_b32 v6, off, s[0:3], 0 slc
762 ; GFX11-NEXT: s_waitcnt vmcnt(0)
763 ; GFX11-NEXT: ; return to shader part epilog
765 %data = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0)
766 %data_glc = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %0, i32 0, i32 0, i32 1)
767 %data_slc = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %0, i32 0, i32 0, i32 2)
768 %fdata = bitcast <4 x i32> %data to <4 x float>
769 %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
770 %fdata_slc = bitcast i32 %data_slc to float
771 %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
772 %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
773 %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
774 ret {<4 x float>, <2 x float>, float} %r2
777 define amdgpu_ps float @raw_ptr_buffer_load_ubyte(ptr addrspace(8) inreg %rsrc) {
778 ; PREGFX10-LABEL: raw_ptr_buffer_load_ubyte:
779 ; PREGFX10: ; %bb.0: ; %main_body
780 ; PREGFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
781 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
782 ; PREGFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
783 ; PREGFX10-NEXT: ; return to shader part epilog
785 ; GFX10-LABEL: raw_ptr_buffer_load_ubyte:
786 ; GFX10: ; %bb.0: ; %main_body
787 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
788 ; GFX10-NEXT: s_waitcnt vmcnt(0)
789 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
790 ; GFX10-NEXT: ; return to shader part epilog
792 ; GFX11-LABEL: raw_ptr_buffer_load_ubyte:
793 ; GFX11: ; %bb.0: ; %main_body
794 ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
795 ; GFX11-NEXT: s_waitcnt vmcnt(0)
796 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
797 ; GFX11-NEXT: ; return to shader part epilog
799 %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
800 %tmp2 = zext i8 %tmp to i32
801 %val = uitofp i32 %tmp2 to float
805 define amdgpu_ps float @raw_ptr_buffer_load_i16(ptr addrspace(8) inreg %rsrc) {
806 ; PREGFX10-LABEL: raw_ptr_buffer_load_i16:
807 ; PREGFX10: ; %bb.0: ; %main_body
808 ; PREGFX10-NEXT: buffer_load_ushort v0, off, s[0:3], 0
809 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
810 ; PREGFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
811 ; PREGFX10-NEXT: ; return to shader part epilog
813 ; GFX10-LABEL: raw_ptr_buffer_load_i16:
814 ; GFX10: ; %bb.0: ; %main_body
815 ; GFX10-NEXT: buffer_load_ushort v0, off, s[0:3], 0
816 ; GFX10-NEXT: s_waitcnt vmcnt(0)
817 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
818 ; GFX10-NEXT: ; return to shader part epilog
820 ; GFX11-LABEL: raw_ptr_buffer_load_i16:
821 ; GFX11: ; %bb.0: ; %main_body
822 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
823 ; GFX11-NEXT: s_waitcnt vmcnt(0)
824 ; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
825 ; GFX11-NEXT: ; return to shader part epilog
827 %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
828 %tmp2 = zext i16 %tmp to i32
829 %val = uitofp i32 %tmp2 to float
833 define amdgpu_ps float @raw_ptr_buffer_load_sbyte(ptr addrspace(8) inreg %rsrc) {
834 ; PREGFX10-LABEL: raw_ptr_buffer_load_sbyte:
835 ; PREGFX10: ; %bb.0: ; %main_body
836 ; PREGFX10-NEXT: buffer_load_sbyte v0, off, s[0:3], 0
837 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
838 ; PREGFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
839 ; PREGFX10-NEXT: ; return to shader part epilog
841 ; GFX10-LABEL: raw_ptr_buffer_load_sbyte:
842 ; GFX10: ; %bb.0: ; %main_body
843 ; GFX10-NEXT: buffer_load_sbyte v0, off, s[0:3], 0
844 ; GFX10-NEXT: s_waitcnt vmcnt(0)
845 ; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
846 ; GFX10-NEXT: ; return to shader part epilog
848 ; GFX11-LABEL: raw_ptr_buffer_load_sbyte:
849 ; GFX11: ; %bb.0: ; %main_body
850 ; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0
851 ; GFX11-NEXT: s_waitcnt vmcnt(0)
852 ; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
853 ; GFX11-NEXT: ; return to shader part epilog
855 %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
856 %tmp2 = sext i8 %tmp to i32
857 %val = sitofp i32 %tmp2 to float
861 define amdgpu_ps float @raw_ptr_buffer_load_sshort(ptr addrspace(8) inreg %rsrc) {
862 ; PREGFX10-LABEL: raw_ptr_buffer_load_sshort:
863 ; PREGFX10: ; %bb.0: ; %main_body
864 ; PREGFX10-NEXT: buffer_load_sshort v0, off, s[0:3], 0
865 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
866 ; PREGFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
867 ; PREGFX10-NEXT: ; return to shader part epilog
869 ; GFX10-LABEL: raw_ptr_buffer_load_sshort:
870 ; GFX10: ; %bb.0: ; %main_body
871 ; GFX10-NEXT: buffer_load_sshort v0, off, s[0:3], 0
872 ; GFX10-NEXT: s_waitcnt vmcnt(0)
873 ; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
874 ; GFX10-NEXT: ; return to shader part epilog
876 ; GFX11-LABEL: raw_ptr_buffer_load_sshort:
877 ; GFX11: ; %bb.0: ; %main_body
878 ; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0
879 ; GFX11-NEXT: s_waitcnt vmcnt(0)
880 ; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
881 ; GFX11-NEXT: ; return to shader part epilog
883 %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
884 %tmp2 = sext i16 %tmp to i32
885 %val = sitofp i32 %tmp2 to float
889 define amdgpu_ps void @raw_ptr_buffer_load_f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
890 ; PREGFX10-LABEL: raw_ptr_buffer_load_f16:
891 ; PREGFX10: ; %bb.0: ; %main_body
892 ; PREGFX10-NEXT: buffer_load_ushort v1, off, s[0:3], 0
893 ; PREGFX10-NEXT: s_mov_b32 m0, -1
894 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
895 ; PREGFX10-NEXT: ds_write_b16 v0, v1
896 ; PREGFX10-NEXT: s_endpgm
898 ; GFX10-LABEL: raw_ptr_buffer_load_f16:
899 ; GFX10: ; %bb.0: ; %main_body
900 ; GFX10-NEXT: buffer_load_ushort v1, off, s[0:3], 0
901 ; GFX10-NEXT: s_waitcnt vmcnt(0)
902 ; GFX10-NEXT: ds_write_b16 v0, v1
903 ; GFX10-NEXT: s_endpgm
905 ; GFX11-LABEL: raw_ptr_buffer_load_f16:
906 ; GFX11: ; %bb.0: ; %main_body
907 ; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0
908 ; GFX11-NEXT: s_waitcnt vmcnt(0)
909 ; GFX11-NEXT: ds_store_b16 v0, v1
910 ; GFX11-NEXT: s_endpgm
912 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
913 store half %val, ptr addrspace(3) %ptr
917 define amdgpu_ps void @raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
918 ; PREGFX10-LABEL: raw_ptr_buffer_load_v2f16:
919 ; PREGFX10: ; %bb.0: ; %main_body
920 ; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
921 ; PREGFX10-NEXT: s_mov_b32 m0, -1
922 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
923 ; PREGFX10-NEXT: ds_write_b32 v0, v1
924 ; PREGFX10-NEXT: s_endpgm
926 ; GFX10-LABEL: raw_ptr_buffer_load_v2f16:
927 ; GFX10: ; %bb.0: ; %main_body
928 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
929 ; GFX10-NEXT: s_waitcnt vmcnt(0)
930 ; GFX10-NEXT: ds_write_b32 v0, v1
931 ; GFX10-NEXT: s_endpgm
933 ; GFX11-LABEL: raw_ptr_buffer_load_v2f16:
934 ; GFX11: ; %bb.0: ; %main_body
935 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
936 ; GFX11-NEXT: s_waitcnt vmcnt(0)
937 ; GFX11-NEXT: ds_store_b32 v0, v1
938 ; GFX11-NEXT: s_endpgm
940 %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
941 store <2 x half> %val, ptr addrspace(3) %ptr
945 define amdgpu_ps void @raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
946 ; PREGFX10-LABEL: raw_ptr_buffer_load_v4f16:
948 ; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
949 ; PREGFX10-NEXT: s_mov_b32 m0, -1
950 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
951 ; PREGFX10-NEXT: ds_write_b64 v0, v[1:2]
952 ; PREGFX10-NEXT: s_endpgm
954 ; GFX10-LABEL: raw_ptr_buffer_load_v4f16:
956 ; GFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
957 ; GFX10-NEXT: s_waitcnt vmcnt(0)
958 ; GFX10-NEXT: ds_write_b64 v0, v[1:2]
959 ; GFX10-NEXT: s_endpgm
961 ; GFX11-LABEL: raw_ptr_buffer_load_v4f16:
963 ; GFX11-NEXT: buffer_load_b64 v[1:2], off, s[0:3], 0
964 ; GFX11-NEXT: s_waitcnt vmcnt(0)
965 ; GFX11-NEXT: ds_store_b64 v0, v[1:2]
966 ; GFX11-NEXT: s_endpgm
967 %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
968 store <4 x half> %val, ptr addrspace(3) %ptr
973 ; define amdgpu_ps void @raw_ptr_buffer_load_v6f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
974 ; %val = call <6 x half> @llvm.amdgcn.raw.ptr.buffer.load.v6f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
975 ; store <6 x half> %val, ptr addrspace(3) %ptr
979 define amdgpu_ps void @raw_ptr_buffer_load_v8f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
980 ; GFX10-LABEL: raw_ptr_buffer_load_v8f16:
982 ; GFX10-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0
983 ; GFX10-NEXT: s_waitcnt vmcnt(0)
984 ; GFX10-NEXT: ds_write_b128 v0, v[1:4]
985 ; GFX10-NEXT: s_endpgm
987 ; GFX11-LABEL: raw_ptr_buffer_load_v8f16:
989 ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0
990 ; GFX11-NEXT: s_waitcnt vmcnt(0)
991 ; GFX11-NEXT: ds_store_b128 v0, v[1:4]
992 ; GFX11-NEXT: s_endpgm
993 %val = call <8 x half> @llvm.amdgcn.raw.ptr.buffer.load.v8f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
994 store <8 x half> %val, ptr addrspace(3) %ptr
998 define amdgpu_ps void @raw_ptr_buffer_load_v2i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
999 ; PREGFX10-LABEL: raw_ptr_buffer_load_v2i16:
1000 ; PREGFX10: ; %bb.0: ; %main_body
1001 ; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
1002 ; PREGFX10-NEXT: s_mov_b32 m0, -1
1003 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1004 ; PREGFX10-NEXT: ds_write_b32 v0, v1
1005 ; PREGFX10-NEXT: s_endpgm
1007 ; GFX10-LABEL: raw_ptr_buffer_load_v2i16:
1008 ; GFX10: ; %bb.0: ; %main_body
1009 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0
1010 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1011 ; GFX10-NEXT: ds_write_b32 v0, v1
1012 ; GFX10-NEXT: s_endpgm
1014 ; GFX11-LABEL: raw_ptr_buffer_load_v2i16:
1015 ; GFX11: ; %bb.0: ; %main_body
1016 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
1017 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1018 ; GFX11-NEXT: ds_store_b32 v0, v1
1019 ; GFX11-NEXT: s_endpgm
1021 %val = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1022 store <2 x i16> %val, ptr addrspace(3) %ptr
1026 define amdgpu_ps void @raw_ptr_buffer_load_v4i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1027 ; PREGFX10-LABEL: raw_ptr_buffer_load_v4i16:
1028 ; PREGFX10: ; %bb.0:
1029 ; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
1030 ; PREGFX10-NEXT: s_mov_b32 m0, -1
1031 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1032 ; PREGFX10-NEXT: ds_write_b64 v0, v[1:2]
1033 ; PREGFX10-NEXT: s_endpgm
1035 ; GFX10-LABEL: raw_ptr_buffer_load_v4i16:
1037 ; GFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
1038 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1039 ; GFX10-NEXT: ds_write_b64 v0, v[1:2]
1040 ; GFX10-NEXT: s_endpgm
1042 ; GFX11-LABEL: raw_ptr_buffer_load_v4i16:
1044 ; GFX11-NEXT: buffer_load_b64 v[1:2], off, s[0:3], 0
1045 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1046 ; GFX11-NEXT: ds_store_b64 v0, v[1:2]
1047 ; GFX11-NEXT: s_endpgm
1048 %val = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1049 store <4 x i16> %val, ptr addrspace(3) %ptr
1054 ; define amdgpu_ps void @raw_ptr_buffer_load_v6i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1055 ; %val = call <6 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v6i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1056 ; store <6 x i16> %val, ptr addrspace(3) %ptr
1060 define amdgpu_ps void @raw_ptr_buffer_load_v8i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1061 ; GFX10-LABEL: raw_ptr_buffer_load_v8i16:
1063 ; GFX10-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0
1064 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1065 ; GFX10-NEXT: ds_write_b128 v0, v[1:4]
1066 ; GFX10-NEXT: s_endpgm
1068 ; GFX11-LABEL: raw_ptr_buffer_load_v8i16:
1070 ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0
1071 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1072 ; GFX11-NEXT: ds_store_b128 v0, v[1:4]
1073 ; GFX11-NEXT: s_endpgm
1074 %val = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1075 store <8 x i16> %val, ptr addrspace(3) %ptr
1079 define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) {
1080 ; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1081 ; PREGFX10: ; %bb.0: ; %main_body
1082 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1083 ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1084 ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
1085 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1086 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1087 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1088 ; PREGFX10-NEXT: s_endpgm
1090 ; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1091 ; GFX10: ; %bb.0: ; %main_body
1092 ; GFX10-NEXT: s_clause 0x1
1093 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1094 ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1095 ; GFX10-NEXT: s_waitcnt vmcnt(1)
1096 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1097 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1098 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1099 ; GFX10-NEXT: s_endpgm
1101 ; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1102 ; GFX11: ; %bb.0: ; %main_body
1103 ; GFX11-NEXT: s_clause 0x1
1104 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
1105 ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
1106 ; GFX11-NEXT: s_waitcnt vmcnt(1)
1107 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
1108 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1109 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1110 ; GFX11-NEXT: s_endpgm
1112 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
1113 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
1114 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
1115 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
1116 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
1117 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
1118 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
1119 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
1123 define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_swizzled_not_merged(ptr addrspace(8) inreg %rsrc) {
1124 ; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1125 ; PREGFX10: ; %bb.0: ; %main_body
1126 ; PREGFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
1127 ; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
1128 ; PREGFX10-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:12
1129 ; PREGFX10-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:16
1130 ; PREGFX10-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:28
1131 ; PREGFX10-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:32
1132 ; PREGFX10-NEXT: s_waitcnt vmcnt(2)
1133 ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1134 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1135 ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1136 ; PREGFX10-NEXT: s_endpgm
1138 ; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1139 ; GFX10: ; %bb.0: ; %main_body
1140 ; GFX10-NEXT: s_clause 0x5
1141 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
1142 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
1143 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:12
1144 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:16
1145 ; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:28
1146 ; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:32
1147 ; GFX10-NEXT: s_waitcnt vmcnt(2)
1148 ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1149 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1150 ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1151 ; GFX10-NEXT: s_endpgm
1153 ; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1154 ; GFX11: ; %bb.0: ; %main_body
1155 ; GFX11-NEXT: s_clause 0x5
1156 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 offset:4
1157 ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 offset:8
1158 ; GFX11-NEXT: buffer_load_b32 v2, off, s[0:3], 0 offset:12
1159 ; GFX11-NEXT: buffer_load_b32 v3, off, s[0:3], 0 offset:16
1160 ; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 offset:28
1161 ; GFX11-NEXT: buffer_load_b32 v5, off, s[0:3], 0 offset:32
1162 ; GFX11-NEXT: s_waitcnt vmcnt(2)
1163 ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
1164 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1165 ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1166 ; GFX11-NEXT: s_endpgm
1168 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 8)
1169 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 8)
1170 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 8)
1171 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 8)
1172 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 8)
1173 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 8)
1174 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
1175 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
1179 define double @buffer_load_f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1180 ; PREGFX10-LABEL: buffer_load_f64__voffset_add:
1181 ; PREGFX10: ; %bb.0:
1182 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1184 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1185 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1186 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1187 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1188 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1189 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1191 ; GFX10-LABEL: buffer_load_f64__voffset_add:
1193 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194 ; GFX10-NEXT: s_mov_b32 s11, s17
1195 ; GFX10-NEXT: s_mov_b32 s10, s16
1196 ; GFX10-NEXT: s_mov_b32 s9, s7
1197 ; GFX10-NEXT: s_mov_b32 s8, s6
1198 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1199 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1200 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1202 ; GFX11-LABEL: buffer_load_f64__voffset_add:
1204 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1206 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1207 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1208 %voffset.add = add i32 %voffset, 60
1209 %data = call double @llvm.amdgcn.raw.ptr.buffer.load.f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1213 define <2 x double> @buffer_load_v2f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1214 ; PREGFX10-LABEL: buffer_load_v2f64__voffset_add:
1215 ; PREGFX10: ; %bb.0:
1216 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1217 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1218 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1219 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1220 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1221 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1222 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1223 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1225 ; GFX10-LABEL: buffer_load_v2f64__voffset_add:
1227 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228 ; GFX10-NEXT: s_mov_b32 s11, s17
1229 ; GFX10-NEXT: s_mov_b32 s10, s16
1230 ; GFX10-NEXT: s_mov_b32 s9, s7
1231 ; GFX10-NEXT: s_mov_b32 s8, s6
1232 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1233 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1234 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1236 ; GFX11-LABEL: buffer_load_v2f64__voffset_add:
1238 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1240 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1241 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1242 %voffset.add = add i32 %voffset, 60
1243 %data = call <2 x double> @llvm.amdgcn.raw.ptr.buffer.load.v2f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1244 ret <2 x double> %data
1247 define i64 @buffer_load_i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1248 ; PREGFX10-LABEL: buffer_load_i64__voffset_add:
1249 ; PREGFX10: ; %bb.0:
1250 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1251 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1252 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1253 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1254 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1255 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1256 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1257 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1259 ; GFX10-LABEL: buffer_load_i64__voffset_add:
1261 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262 ; GFX10-NEXT: s_mov_b32 s11, s17
1263 ; GFX10-NEXT: s_mov_b32 s10, s16
1264 ; GFX10-NEXT: s_mov_b32 s9, s7
1265 ; GFX10-NEXT: s_mov_b32 s8, s6
1266 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1267 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1268 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1270 ; GFX11-LABEL: buffer_load_i64__voffset_add:
1272 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1274 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1275 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1276 %voffset.add = add i32 %voffset, 60
1277 %data = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1281 define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1282 ; PREGFX10-LABEL: buffer_load_v2i64__voffset_add:
1283 ; PREGFX10: ; %bb.0:
1284 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1286 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1287 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1288 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1289 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1290 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1291 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1293 ; GFX10-LABEL: buffer_load_v2i64__voffset_add:
1295 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1296 ; GFX10-NEXT: s_mov_b32 s11, s17
1297 ; GFX10-NEXT: s_mov_b32 s10, s16
1298 ; GFX10-NEXT: s_mov_b32 s9, s7
1299 ; GFX10-NEXT: s_mov_b32 s8, s6
1300 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1301 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1302 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1304 ; GFX11-LABEL: buffer_load_v2i64__voffset_add:
1306 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1308 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1309 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1310 %voffset.add = add i32 %voffset, 60
1311 %data = call <2 x i64> @llvm.amdgcn.raw.ptr.buffer.load.v2i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1315 define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1316 ; PREGFX10-LABEL: buffer_load_p0__voffset_add:
1317 ; PREGFX10: ; %bb.0:
1318 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1320 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1321 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1322 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1323 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1324 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1325 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1327 ; GFX10-LABEL: buffer_load_p0__voffset_add:
1329 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330 ; GFX10-NEXT: s_mov_b32 s11, s17
1331 ; GFX10-NEXT: s_mov_b32 s10, s16
1332 ; GFX10-NEXT: s_mov_b32 s9, s7
1333 ; GFX10-NEXT: s_mov_b32 s8, s6
1334 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1335 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1336 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1338 ; GFX11-LABEL: buffer_load_p0__voffset_add:
1340 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1342 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1343 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1344 %voffset.add = add i32 %voffset, 60
1345 %data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1349 define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1350 ; PREGFX10-LABEL: buffer_load_v2p0__voffset_add:
1351 ; PREGFX10: ; %bb.0:
1352 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1354 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1355 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1356 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1357 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1358 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1359 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1361 ; GFX10-LABEL: buffer_load_v2p0__voffset_add:
1363 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364 ; GFX10-NEXT: s_mov_b32 s11, s17
1365 ; GFX10-NEXT: s_mov_b32 s10, s16
1366 ; GFX10-NEXT: s_mov_b32 s9, s7
1367 ; GFX10-NEXT: s_mov_b32 s8, s6
1368 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1369 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1370 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1372 ; GFX11-LABEL: buffer_load_v2p0__voffset_add:
1374 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1376 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1377 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1378 %voffset.add = add i32 %voffset, 60
1379 %data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1383 define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1384 ; PREGFX10-LABEL: buffer_load_p1__voffset_add:
1385 ; PREGFX10: ; %bb.0:
1386 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1388 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1389 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1390 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1391 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1392 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1393 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1395 ; GFX10-LABEL: buffer_load_p1__voffset_add:
1397 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX10-NEXT: s_mov_b32 s11, s17
1399 ; GFX10-NEXT: s_mov_b32 s10, s16
1400 ; GFX10-NEXT: s_mov_b32 s9, s7
1401 ; GFX10-NEXT: s_mov_b32 s8, s6
1402 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1403 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1404 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1406 ; GFX11-LABEL: buffer_load_p1__voffset_add:
1408 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1409 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1410 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1411 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1412 %voffset.add = add i32 %voffset, 60
1413 %data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1414 ret ptr addrspace(1) %data
1417 define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1418 ; PREGFX10-LABEL: buffer_load_v2p1__voffset_add:
1419 ; PREGFX10: ; %bb.0:
1420 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1422 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1423 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1424 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1425 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1426 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1427 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1429 ; GFX10-LABEL: buffer_load_v2p1__voffset_add:
1431 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432 ; GFX10-NEXT: s_mov_b32 s11, s17
1433 ; GFX10-NEXT: s_mov_b32 s10, s16
1434 ; GFX10-NEXT: s_mov_b32 s9, s7
1435 ; GFX10-NEXT: s_mov_b32 s8, s6
1436 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1437 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1438 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1440 ; GFX11-LABEL: buffer_load_v2p1__voffset_add:
1442 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1444 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1445 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1446 %voffset.add = add i32 %voffset, 60
1447 %data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1448 ret <2 x ptr addrspace(1)> %data
1451 define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1452 ; PREGFX10-LABEL: buffer_load_p4__voffset_add:
1453 ; PREGFX10: ; %bb.0:
1454 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1456 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1457 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1458 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1459 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1460 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1461 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1463 ; GFX10-LABEL: buffer_load_p4__voffset_add:
1465 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1466 ; GFX10-NEXT: s_mov_b32 s11, s17
1467 ; GFX10-NEXT: s_mov_b32 s10, s16
1468 ; GFX10-NEXT: s_mov_b32 s9, s7
1469 ; GFX10-NEXT: s_mov_b32 s8, s6
1470 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1471 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1472 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1474 ; GFX11-LABEL: buffer_load_p4__voffset_add:
1476 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1477 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1478 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1479 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1480 %voffset.add = add i32 %voffset, 60
1481 %data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1482 ret ptr addrspace(4) %data
1485 define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1486 ; PREGFX10-LABEL: buffer_load_v2p4__voffset_add:
1487 ; PREGFX10: ; %bb.0:
1488 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1489 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1490 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1491 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1492 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1493 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1494 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1495 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1497 ; GFX10-LABEL: buffer_load_v2p4__voffset_add:
1499 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500 ; GFX10-NEXT: s_mov_b32 s11, s17
1501 ; GFX10-NEXT: s_mov_b32 s10, s16
1502 ; GFX10-NEXT: s_mov_b32 s9, s7
1503 ; GFX10-NEXT: s_mov_b32 s8, s6
1504 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1505 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1506 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1508 ; GFX11-LABEL: buffer_load_v2p4__voffset_add:
1510 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1512 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1513 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1514 %voffset.add = add i32 %voffset, 60
1515 %data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1516 ret <2 x ptr addrspace(4)> %data
1519 define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1520 ; PREGFX10-LABEL: buffer_load_p999__voffset_add:
1521 ; PREGFX10: ; %bb.0:
1522 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1524 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1525 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1526 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1527 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1528 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1529 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1531 ; GFX10-LABEL: buffer_load_p999__voffset_add:
1533 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534 ; GFX10-NEXT: s_mov_b32 s11, s17
1535 ; GFX10-NEXT: s_mov_b32 s10, s16
1536 ; GFX10-NEXT: s_mov_b32 s9, s7
1537 ; GFX10-NEXT: s_mov_b32 s8, s6
1538 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1539 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1540 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1542 ; GFX11-LABEL: buffer_load_p999__voffset_add:
1544 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1545 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1546 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1547 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1548 %voffset.add = add i32 %voffset, 60
1549 %data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1550 ret ptr addrspace(999) %data
1553 define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1554 ; PREGFX10-LABEL: buffer_load_v2p999__voffset_add:
1555 ; PREGFX10: ; %bb.0:
1556 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1557 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1558 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1559 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1560 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1561 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1562 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1563 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1565 ; GFX10-LABEL: buffer_load_v2p999__voffset_add:
1567 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1568 ; GFX10-NEXT: s_mov_b32 s11, s17
1569 ; GFX10-NEXT: s_mov_b32 s10, s16
1570 ; GFX10-NEXT: s_mov_b32 s9, s7
1571 ; GFX10-NEXT: s_mov_b32 s8, s6
1572 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1573 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1574 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1576 ; GFX11-LABEL: buffer_load_v2p999__voffset_add:
1578 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1579 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1580 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1581 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1582 %voffset.add = add i32 %voffset, 60
1583 %data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1584 ret <2 x ptr addrspace(999)> %data
1587 define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1588 ; PREGFX10-LABEL: buffer_load_p2__voffset_add:
1589 ; PREGFX10: ; %bb.0:
1590 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1591 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1592 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1593 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1594 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1595 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1596 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1597 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1599 ; GFX10-LABEL: buffer_load_p2__voffset_add:
1601 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1602 ; GFX10-NEXT: s_mov_b32 s11, s17
1603 ; GFX10-NEXT: s_mov_b32 s10, s16
1604 ; GFX10-NEXT: s_mov_b32 s9, s7
1605 ; GFX10-NEXT: s_mov_b32 s8, s6
1606 ; GFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1607 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1608 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1610 ; GFX11-LABEL: buffer_load_p2__voffset_add:
1612 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1614 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1615 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1616 %voffset.add = add i32 %voffset, 60
1617 %data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1618 ret ptr addrspace(2) %data
1621 define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1622 ; PREGFX10-LABEL: buffer_load_v2p2__voffset_add:
1623 ; PREGFX10: ; %bb.0:
1624 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1626 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1627 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1628 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1629 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1630 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1631 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1633 ; GFX10-LABEL: buffer_load_v2p2__voffset_add:
1635 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636 ; GFX10-NEXT: s_mov_b32 s11, s17
1637 ; GFX10-NEXT: s_mov_b32 s10, s16
1638 ; GFX10-NEXT: s_mov_b32 s9, s7
1639 ; GFX10-NEXT: s_mov_b32 s8, s6
1640 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1641 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1642 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1644 ; GFX11-LABEL: buffer_load_v2p2__voffset_add:
1646 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1647 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1648 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1649 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1650 %voffset.add = add i32 %voffset, 60
1651 %data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1652 ret <2 x ptr addrspace(2)> %data
1655 define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1656 ; GFX10-LABEL: buffer_load_v3p2__voffset_add:
1658 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1659 ; GFX10-NEXT: s_mov_b32 s11, s17
1660 ; GFX10-NEXT: s_mov_b32 s10, s16
1661 ; GFX10-NEXT: s_mov_b32 s9, s7
1662 ; GFX10-NEXT: s_mov_b32 s8, s6
1663 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[8:11], 0 offen offset:60
1664 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1665 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1667 ; GFX11-LABEL: buffer_load_v3p2__voffset_add:
1669 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1671 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1672 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1673 %voffset.add = add i32 %voffset, 60
1674 %data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1675 ret <3 x ptr addrspace(2)> %data
1678 define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1679 ; PREGFX10-LABEL: buffer_load_v4p2__voffset_add:
1680 ; PREGFX10: ; %bb.0:
1681 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1683 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1684 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1685 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1686 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1687 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1688 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1690 ; GFX10-LABEL: buffer_load_v4p2__voffset_add:
1692 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693 ; GFX10-NEXT: s_mov_b32 s11, s17
1694 ; GFX10-NEXT: s_mov_b32 s10, s16
1695 ; GFX10-NEXT: s_mov_b32 s9, s7
1696 ; GFX10-NEXT: s_mov_b32 s8, s6
1697 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1698 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1699 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1701 ; GFX11-LABEL: buffer_load_v4p2__voffset_add:
1703 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1705 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1706 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1707 %voffset.add = add i32 %voffset, 60
1708 %data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1709 ret <4 x ptr addrspace(2)> %data
1712 define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1713 ; PREGFX10-LABEL: buffer_load_p3__voffset_add:
1714 ; PREGFX10: ; %bb.0:
1715 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1716 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1717 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1718 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1719 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1720 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1721 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1722 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1724 ; GFX10-LABEL: buffer_load_p3__voffset_add:
1726 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1727 ; GFX10-NEXT: s_mov_b32 s11, s17
1728 ; GFX10-NEXT: s_mov_b32 s10, s16
1729 ; GFX10-NEXT: s_mov_b32 s9, s7
1730 ; GFX10-NEXT: s_mov_b32 s8, s6
1731 ; GFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1732 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1733 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1735 ; GFX11-LABEL: buffer_load_p3__voffset_add:
1737 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1739 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1740 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1741 %voffset.add = add i32 %voffset, 60
1742 %data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1743 ret ptr addrspace(3) %data
1746 define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1747 ; PREGFX10-LABEL: buffer_load_v2p3__voffset_add:
1748 ; PREGFX10: ; %bb.0:
1749 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1750 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1751 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1752 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1753 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1754 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1755 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1756 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1758 ; GFX10-LABEL: buffer_load_v2p3__voffset_add:
1760 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1761 ; GFX10-NEXT: s_mov_b32 s11, s17
1762 ; GFX10-NEXT: s_mov_b32 s10, s16
1763 ; GFX10-NEXT: s_mov_b32 s9, s7
1764 ; GFX10-NEXT: s_mov_b32 s8, s6
1765 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1766 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1767 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1769 ; GFX11-LABEL: buffer_load_v2p3__voffset_add:
1771 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1773 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1774 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1775 %voffset.add = add i32 %voffset, 60
1776 %data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1777 ret <2 x ptr addrspace(3)> %data
1780 define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1781 ; GFX10-LABEL: buffer_load_v3p3__voffset_add:
1783 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1784 ; GFX10-NEXT: s_mov_b32 s11, s17
1785 ; GFX10-NEXT: s_mov_b32 s10, s16
1786 ; GFX10-NEXT: s_mov_b32 s9, s7
1787 ; GFX10-NEXT: s_mov_b32 s8, s6
1788 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[8:11], 0 offen offset:60
1789 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1790 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1792 ; GFX11-LABEL: buffer_load_v3p3__voffset_add:
1794 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1796 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1797 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1798 %voffset.add = add i32 %voffset, 60
1799 %data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1800 ret <3 x ptr addrspace(3)> %data
1803 define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1804 ; PREGFX10-LABEL: buffer_load_v4p3__voffset_add:
1805 ; PREGFX10: ; %bb.0:
1806 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1807 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1808 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1809 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1810 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1811 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1812 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1813 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1815 ; GFX10-LABEL: buffer_load_v4p3__voffset_add:
1817 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1818 ; GFX10-NEXT: s_mov_b32 s11, s17
1819 ; GFX10-NEXT: s_mov_b32 s10, s16
1820 ; GFX10-NEXT: s_mov_b32 s9, s7
1821 ; GFX10-NEXT: s_mov_b32 s8, s6
1822 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1823 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1824 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1826 ; GFX11-LABEL: buffer_load_v4p3__voffset_add:
1828 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1829 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1830 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1831 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1832 %voffset.add = add i32 %voffset, 60
1833 %data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1834 ret <4 x ptr addrspace(3)> %data
1837 define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1838 ; PREGFX10-LABEL: buffer_load_p5__voffset_add:
1839 ; PREGFX10: ; %bb.0:
1840 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1842 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1843 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1844 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1845 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1846 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1847 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1849 ; GFX10-LABEL: buffer_load_p5__voffset_add:
1851 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852 ; GFX10-NEXT: s_mov_b32 s11, s17
1853 ; GFX10-NEXT: s_mov_b32 s10, s16
1854 ; GFX10-NEXT: s_mov_b32 s9, s7
1855 ; GFX10-NEXT: s_mov_b32 s8, s6
1856 ; GFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1857 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1858 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1860 ; GFX11-LABEL: buffer_load_p5__voffset_add:
1862 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1863 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1864 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1865 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1866 %voffset.add = add i32 %voffset, 60
1867 %data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1868 ret ptr addrspace(5) %data
1871 define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1872 ; PREGFX10-LABEL: buffer_load_v2p5__voffset_add:
1873 ; PREGFX10: ; %bb.0:
1874 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1875 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1876 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1877 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1878 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1879 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1880 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1881 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1883 ; GFX10-LABEL: buffer_load_v2p5__voffset_add:
1885 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1886 ; GFX10-NEXT: s_mov_b32 s11, s17
1887 ; GFX10-NEXT: s_mov_b32 s10, s16
1888 ; GFX10-NEXT: s_mov_b32 s9, s7
1889 ; GFX10-NEXT: s_mov_b32 s8, s6
1890 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
1891 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1892 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1894 ; GFX11-LABEL: buffer_load_v2p5__voffset_add:
1896 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1897 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1898 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1899 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1900 %voffset.add = add i32 %voffset, 60
1901 %data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1902 ret <2 x ptr addrspace(5)> %data
1905 define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1906 ; GFX10-LABEL: buffer_load_v3p5__voffset_add:
1908 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909 ; GFX10-NEXT: s_mov_b32 s11, s17
1910 ; GFX10-NEXT: s_mov_b32 s10, s16
1911 ; GFX10-NEXT: s_mov_b32 s9, s7
1912 ; GFX10-NEXT: s_mov_b32 s8, s6
1913 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[8:11], 0 offen offset:60
1914 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1915 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1917 ; GFX11-LABEL: buffer_load_v3p5__voffset_add:
1919 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1920 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1921 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1922 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1923 %voffset.add = add i32 %voffset, 60
1924 %data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1925 ret <3 x ptr addrspace(5)> %data
1928 define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1929 ; PREGFX10-LABEL: buffer_load_v4p5__voffset_add:
1930 ; PREGFX10: ; %bb.0:
1931 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1932 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1933 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1934 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1935 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1936 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1937 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1938 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1940 ; GFX10-LABEL: buffer_load_v4p5__voffset_add:
1942 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1943 ; GFX10-NEXT: s_mov_b32 s11, s17
1944 ; GFX10-NEXT: s_mov_b32 s10, s16
1945 ; GFX10-NEXT: s_mov_b32 s9, s7
1946 ; GFX10-NEXT: s_mov_b32 s8, s6
1947 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
1948 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1949 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1951 ; GFX11-LABEL: buffer_load_v4p5__voffset_add:
1953 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1954 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1955 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1956 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1957 %voffset.add = add i32 %voffset, 60
1958 %data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1959 ret <4 x ptr addrspace(5)> %data
1962 define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1963 ; PREGFX10-LABEL: buffer_load_p6__voffset_add:
1964 ; PREGFX10: ; %bb.0:
1965 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1966 ; PREGFX10-NEXT: s_mov_b32 s11, s17
1967 ; PREGFX10-NEXT: s_mov_b32 s10, s16
1968 ; PREGFX10-NEXT: s_mov_b32 s9, s7
1969 ; PREGFX10-NEXT: s_mov_b32 s8, s6
1970 ; PREGFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1971 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1972 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1974 ; GFX10-LABEL: buffer_load_p6__voffset_add:
1976 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1977 ; GFX10-NEXT: s_mov_b32 s11, s17
1978 ; GFX10-NEXT: s_mov_b32 s10, s16
1979 ; GFX10-NEXT: s_mov_b32 s9, s7
1980 ; GFX10-NEXT: s_mov_b32 s8, s6
1981 ; GFX10-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen offset:60
1982 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1983 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1985 ; GFX11-LABEL: buffer_load_p6__voffset_add:
1987 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1988 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1989 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1990 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1991 %voffset.add = add i32 %voffset, 60
1992 %data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1993 ret ptr addrspace(6) %data
1996 define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1997 ; PREGFX10-LABEL: buffer_load_v2p6__voffset_add:
1998 ; PREGFX10: ; %bb.0:
1999 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2000 ; PREGFX10-NEXT: s_mov_b32 s11, s17
2001 ; PREGFX10-NEXT: s_mov_b32 s10, s16
2002 ; PREGFX10-NEXT: s_mov_b32 s9, s7
2003 ; PREGFX10-NEXT: s_mov_b32 s8, s6
2004 ; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
2005 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
2006 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
2008 ; GFX10-LABEL: buffer_load_v2p6__voffset_add:
2010 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2011 ; GFX10-NEXT: s_mov_b32 s11, s17
2012 ; GFX10-NEXT: s_mov_b32 s10, s16
2013 ; GFX10-NEXT: s_mov_b32 s9, s7
2014 ; GFX10-NEXT: s_mov_b32 s8, s6
2015 ; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[8:11], 0 offen offset:60
2016 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2017 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2019 ; GFX11-LABEL: buffer_load_v2p6__voffset_add:
2021 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2022 ; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
2023 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2024 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2025 %voffset.add = add i32 %voffset, 60
2026 %data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
2027 ret <2 x ptr addrspace(6)> %data
2030 define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
2031 ; GFX10-LABEL: buffer_load_v3p6__voffset_add:
2033 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034 ; GFX10-NEXT: s_mov_b32 s11, s17
2035 ; GFX10-NEXT: s_mov_b32 s10, s16
2036 ; GFX10-NEXT: s_mov_b32 s9, s7
2037 ; GFX10-NEXT: s_mov_b32 s8, s6
2038 ; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[8:11], 0 offen offset:60
2039 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2040 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2042 ; GFX11-LABEL: buffer_load_v3p6__voffset_add:
2044 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2045 ; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
2046 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2047 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2048 %voffset.add = add i32 %voffset, 60
2049 %data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
2050 ret <3 x ptr addrspace(6)> %data
2053 define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
2054 ; PREGFX10-LABEL: buffer_load_v4p6__voffset_add:
2055 ; PREGFX10: ; %bb.0:
2056 ; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2057 ; PREGFX10-NEXT: s_mov_b32 s11, s17
2058 ; PREGFX10-NEXT: s_mov_b32 s10, s16
2059 ; PREGFX10-NEXT: s_mov_b32 s9, s7
2060 ; PREGFX10-NEXT: s_mov_b32 s8, s6
2061 ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
2062 ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
2063 ; PREGFX10-NEXT: s_setpc_b64 s[30:31]
2065 ; GFX10-LABEL: buffer_load_v4p6__voffset_add:
2067 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2068 ; GFX10-NEXT: s_mov_b32 s11, s17
2069 ; GFX10-NEXT: s_mov_b32 s10, s16
2070 ; GFX10-NEXT: s_mov_b32 s9, s7
2071 ; GFX10-NEXT: s_mov_b32 s8, s6
2072 ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[8:11], 0 offen offset:60
2073 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2074 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2076 ; GFX11-LABEL: buffer_load_v4p6__voffset_add:
2078 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2079 ; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
2080 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2081 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2082 %voffset.add = add i32 %voffset, 60
2083 %data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
2084 ret <4 x ptr addrspace(6)> %data
2087 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
2088 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
2089 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0
2090 declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) #0
2091 declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) #0
2092 declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #0
2093 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
2094 declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #0
2095 declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) #0
2096 declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) #0
2097 declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) #0
2098 declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #0
2099 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #0
2100 declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #0
2101 attributes #0 = { nounwind readonly }