1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mcpu=tahiti -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX6
3 ; RUN: llc -mcpu=hawaii -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX7
4 ; RUN: llc -mcpu=fiji -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX8
5 ; RUN: llc -mcpu=gfx900 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX910,GFX9
6 ; RUN: llc -mcpu=gfx1010 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX910,GFX10
7 ; RUN: llc -mcpu=gfx1100 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11
8 ; RUN: llc -mcpu=gfx1200 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12
10 define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
11 ; GFX67-LABEL: raw_buffer_load_i8_tfe:
13 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
14 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
15 ; GFX67-NEXT: buffer_load_ubyte v[4:5], off, s[0:3], 0 tfe
16 ; GFX67-NEXT: s_mov_b32 s2, 0
17 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
18 ; GFX67-NEXT: s_mov_b32 s0, s2
19 ; GFX67-NEXT: s_mov_b32 s1, s2
20 ; GFX67-NEXT: s_waitcnt vmcnt(0)
21 ; GFX67-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64
22 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
23 ; GFX67-NEXT: s_endpgm
25 ; GFX8-LABEL: raw_buffer_load_i8_tfe:
27 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
28 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
29 ; GFX8-NEXT: buffer_load_ubyte v[4:5], off, s[0:3], 0 tfe
30 ; GFX8-NEXT: s_waitcnt vmcnt(0)
31 ; GFX8-NEXT: flat_store_byte v[0:1], v4
32 ; GFX8-NEXT: flat_store_dword v[2:3], v5
35 ; GFX910-LABEL: raw_buffer_load_i8_tfe:
37 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
38 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
39 ; GFX910-NEXT: buffer_load_ubyte v[4:5], off, s[0:3], 0 tfe
40 ; GFX910-NEXT: s_waitcnt vmcnt(0)
41 ; GFX910-NEXT: global_store_byte v[0:1], v4, off
42 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
43 ; GFX910-NEXT: s_endpgm
45 ; GFX11-LABEL: raw_buffer_load_i8_tfe:
47 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
48 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
49 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
50 ; GFX11-NEXT: buffer_load_u8 v[4:5], off, s[0:3], 0 tfe
51 ; GFX11-NEXT: s_waitcnt vmcnt(0)
52 ; GFX11-NEXT: global_store_b8 v[0:1], v4, off
53 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
55 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
56 ; GFX11-NEXT: s_endpgm
58 ; GFX12-LABEL: raw_buffer_load_i8_tfe:
60 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
61 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
62 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
63 ; GFX12-NEXT: buffer_load_u8 v[4:5], off, s[0:3], null tfe
64 ; GFX12-NEXT: s_wait_loadcnt 0x0
65 ; GFX12-NEXT: global_store_b8 v[0:1], v4, off
66 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
68 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
69 ; GFX12-NEXT: s_endpgm
70 %res = call { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
71 %data = extractvalue { i8, i32 } %res, 0
72 store i8 %data, ptr addrspace(1) %data_addr
73 %tfe = extractvalue { i8, i32 } %res, 1
74 store i32 %tfe, ptr addrspace(1) %tfe_addr
78 define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
79 ; GFX67-LABEL: raw_buffer_load_i16_tfe:
81 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
82 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
83 ; GFX67-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
84 ; GFX67-NEXT: s_mov_b32 s2, 0
85 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
86 ; GFX67-NEXT: s_mov_b32 s0, s2
87 ; GFX67-NEXT: s_mov_b32 s1, s2
88 ; GFX67-NEXT: s_waitcnt vmcnt(0)
89 ; GFX67-NEXT: buffer_store_short v4, v[0:1], s[0:3], 0 addr64
90 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
91 ; GFX67-NEXT: s_endpgm
93 ; GFX8-LABEL: raw_buffer_load_i16_tfe:
95 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
96 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
97 ; GFX8-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
98 ; GFX8-NEXT: s_waitcnt vmcnt(0)
99 ; GFX8-NEXT: flat_store_short v[0:1], v4
100 ; GFX8-NEXT: flat_store_dword v[2:3], v5
101 ; GFX8-NEXT: s_endpgm
103 ; GFX910-LABEL: raw_buffer_load_i16_tfe:
105 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
106 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
107 ; GFX910-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
108 ; GFX910-NEXT: s_waitcnt vmcnt(0)
109 ; GFX910-NEXT: global_store_short v[0:1], v4, off
110 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
111 ; GFX910-NEXT: s_endpgm
113 ; GFX11-LABEL: raw_buffer_load_i16_tfe:
115 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
116 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
117 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
118 ; GFX11-NEXT: buffer_load_u16 v[4:5], off, s[0:3], 0 tfe
119 ; GFX11-NEXT: s_waitcnt vmcnt(0)
120 ; GFX11-NEXT: global_store_b16 v[0:1], v4, off
121 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
122 ; GFX11-NEXT: s_nop 0
123 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
124 ; GFX11-NEXT: s_endpgm
126 ; GFX12-LABEL: raw_buffer_load_i16_tfe:
128 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
129 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
130 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
131 ; GFX12-NEXT: buffer_load_u16 v[4:5], off, s[0:3], null tfe
132 ; GFX12-NEXT: s_wait_loadcnt 0x0
133 ; GFX12-NEXT: global_store_b16 v[0:1], v4, off
134 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
135 ; GFX12-NEXT: s_nop 0
136 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
137 ; GFX12-NEXT: s_endpgm
138 %res = call { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
139 %data = extractvalue { i16, i32 } %res, 0
140 store i16 %data, ptr addrspace(1) %data_addr
141 %tfe = extractvalue { i16, i32 } %res, 1
142 store i32 %tfe, ptr addrspace(1) %tfe_addr
146 define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
147 ; GFX67-LABEL: raw_buffer_load_f16_tfe:
149 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
150 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
151 ; GFX67-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
152 ; GFX67-NEXT: s_mov_b32 s2, 0
153 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
154 ; GFX67-NEXT: s_mov_b32 s0, s2
155 ; GFX67-NEXT: s_mov_b32 s1, s2
156 ; GFX67-NEXT: s_waitcnt vmcnt(0)
157 ; GFX67-NEXT: buffer_store_short v4, v[0:1], s[0:3], 0 addr64
158 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
159 ; GFX67-NEXT: s_endpgm
161 ; GFX8-LABEL: raw_buffer_load_f16_tfe:
163 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
164 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
165 ; GFX8-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
166 ; GFX8-NEXT: s_waitcnt vmcnt(0)
167 ; GFX8-NEXT: flat_store_short v[0:1], v4
168 ; GFX8-NEXT: flat_store_dword v[2:3], v5
169 ; GFX8-NEXT: s_endpgm
171 ; GFX910-LABEL: raw_buffer_load_f16_tfe:
173 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
174 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
175 ; GFX910-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
176 ; GFX910-NEXT: s_waitcnt vmcnt(0)
177 ; GFX910-NEXT: global_store_short v[0:1], v4, off
178 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
179 ; GFX910-NEXT: s_endpgm
181 ; GFX11-LABEL: raw_buffer_load_f16_tfe:
183 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
184 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
185 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
186 ; GFX11-NEXT: buffer_load_u16 v[4:5], off, s[0:3], 0 tfe
187 ; GFX11-NEXT: s_waitcnt vmcnt(0)
188 ; GFX11-NEXT: global_store_b16 v[0:1], v4, off
189 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
190 ; GFX11-NEXT: s_nop 0
191 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
192 ; GFX11-NEXT: s_endpgm
194 ; GFX12-LABEL: raw_buffer_load_f16_tfe:
196 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
197 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
198 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
199 ; GFX12-NEXT: buffer_load_u16 v[4:5], off, s[0:3], null tfe
200 ; GFX12-NEXT: s_wait_loadcnt 0x0
201 ; GFX12-NEXT: global_store_b16 v[0:1], v4, off
202 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
203 ; GFX12-NEXT: s_nop 0
204 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
205 ; GFX12-NEXT: s_endpgm
206 %res = call { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
207 %data = extractvalue { half, i32 } %res, 0
208 store half %data, ptr addrspace(1) %data_addr
209 %tfe = extractvalue { half, i32 } %res, 1
210 store i32 %tfe, ptr addrspace(1) %tfe_addr
214 define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
215 ; GFX67-LABEL: raw_buffer_load_i32_tfe:
217 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
218 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
219 ; GFX67-NEXT: buffer_load_dword v[4:5], off, s[0:3], 0 tfe
220 ; GFX67-NEXT: s_mov_b32 s2, 0
221 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
222 ; GFX67-NEXT: s_mov_b32 s0, s2
223 ; GFX67-NEXT: s_mov_b32 s1, s2
224 ; GFX67-NEXT: s_waitcnt vmcnt(0)
225 ; GFX67-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64
226 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
227 ; GFX67-NEXT: s_endpgm
229 ; GFX8-LABEL: raw_buffer_load_i32_tfe:
231 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
232 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
233 ; GFX8-NEXT: buffer_load_dword v[4:5], off, s[0:3], 0 tfe
234 ; GFX8-NEXT: s_waitcnt vmcnt(0)
235 ; GFX8-NEXT: flat_store_dword v[0:1], v4
236 ; GFX8-NEXT: flat_store_dword v[2:3], v5
237 ; GFX8-NEXT: s_endpgm
239 ; GFX910-LABEL: raw_buffer_load_i32_tfe:
241 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
242 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
243 ; GFX910-NEXT: buffer_load_dword v[4:5], off, s[0:3], 0 tfe
244 ; GFX910-NEXT: s_waitcnt vmcnt(0)
245 ; GFX910-NEXT: global_store_dword v[0:1], v4, off
246 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
247 ; GFX910-NEXT: s_endpgm
249 ; GFX11-LABEL: raw_buffer_load_i32_tfe:
251 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
252 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
253 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
254 ; GFX11-NEXT: buffer_load_b32 v[4:5], off, s[0:3], 0 tfe
255 ; GFX11-NEXT: s_waitcnt vmcnt(0)
256 ; GFX11-NEXT: global_store_b32 v[0:1], v4, off
257 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
258 ; GFX11-NEXT: s_nop 0
259 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
260 ; GFX11-NEXT: s_endpgm
262 ; GFX12-LABEL: raw_buffer_load_i32_tfe:
264 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
265 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
266 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
267 ; GFX12-NEXT: buffer_load_b32 v[4:5], off, s[0:3], null tfe
268 ; GFX12-NEXT: s_wait_loadcnt 0x0
269 ; GFX12-NEXT: global_store_b32 v[0:1], v4, off
270 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
271 ; GFX12-NEXT: s_nop 0
272 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
273 ; GFX12-NEXT: s_endpgm
274 %res = call { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
275 %data = extractvalue { i32, i32 } %res, 0
276 store i32 %data, ptr addrspace(1) %data_addr
277 %tfe = extractvalue { i32, i32 } %res, 1
278 store i32 %tfe, ptr addrspace(1) %tfe_addr
282 define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
283 ; GFX6-LABEL: raw_buffer_load_v2i32_tfe:
285 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
286 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
287 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
288 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
289 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
290 ; GFX6-NEXT: s_mov_b32 s2, 0
291 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
292 ; GFX6-NEXT: s_mov_b32 s0, s2
293 ; GFX6-NEXT: s_mov_b32 s1, s2
294 ; GFX6-NEXT: s_waitcnt vmcnt(0)
295 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
296 ; GFX6-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
297 ; GFX6-NEXT: s_endpgm
299 ; GFX7-LABEL: raw_buffer_load_v2i32_tfe:
301 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
302 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
303 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
304 ; GFX7-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
305 ; GFX7-NEXT: s_mov_b32 s2, 0
306 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
307 ; GFX7-NEXT: s_mov_b32 s0, s2
308 ; GFX7-NEXT: s_mov_b32 s1, s2
309 ; GFX7-NEXT: s_waitcnt vmcnt(0)
310 ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
311 ; GFX7-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
312 ; GFX7-NEXT: s_endpgm
314 ; GFX8-LABEL: raw_buffer_load_v2i32_tfe:
316 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
317 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
318 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
319 ; GFX8-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
320 ; GFX8-NEXT: s_waitcnt vmcnt(0)
321 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
322 ; GFX8-NEXT: flat_store_dword v[2:3], v6
323 ; GFX8-NEXT: s_endpgm
325 ; GFX910-LABEL: raw_buffer_load_v2i32_tfe:
327 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
328 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
329 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
330 ; GFX910-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
331 ; GFX910-NEXT: s_waitcnt vmcnt(0)
332 ; GFX910-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
333 ; GFX910-NEXT: global_store_dword v[2:3], v6, off
334 ; GFX910-NEXT: s_endpgm
336 ; GFX11-LABEL: raw_buffer_load_v2i32_tfe:
338 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
339 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
340 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
341 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
342 ; GFX11-NEXT: buffer_load_b64 v[4:6], off, s[0:3], 0 tfe
343 ; GFX11-NEXT: s_waitcnt vmcnt(0)
344 ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
345 ; GFX11-NEXT: global_store_b32 v[2:3], v6, off
346 ; GFX11-NEXT: s_nop 0
347 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
348 ; GFX11-NEXT: s_endpgm
350 ; GFX12-LABEL: raw_buffer_load_v2i32_tfe:
352 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
353 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
354 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
355 ; GFX12-NEXT: buffer_load_b64 v[4:6], off, s[0:3], null tfe
356 ; GFX12-NEXT: s_wait_loadcnt 0x0
357 ; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
358 ; GFX12-NEXT: global_store_b32 v[2:3], v6, off
359 ; GFX12-NEXT: s_nop 0
360 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
361 ; GFX12-NEXT: s_endpgm
362 %res = call { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
363 %data = extractvalue { <2 x i32>, i32 } %res, 0
364 store <2 x i32> %data, ptr addrspace(1) %data_addr
365 %tfe = extractvalue { <2 x i32>, i32 } %res, 1
366 store i32 %tfe, ptr addrspace(1) %tfe_addr
370 define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
371 ; GFX6-LABEL: raw_buffer_load_v2f32_tfe:
373 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
374 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
375 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
376 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
377 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
378 ; GFX6-NEXT: s_mov_b32 s2, 0
379 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
380 ; GFX6-NEXT: s_mov_b32 s0, s2
381 ; GFX6-NEXT: s_mov_b32 s1, s2
382 ; GFX6-NEXT: s_waitcnt vmcnt(0)
383 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
384 ; GFX6-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
385 ; GFX6-NEXT: s_endpgm
387 ; GFX7-LABEL: raw_buffer_load_v2f32_tfe:
389 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
390 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
391 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
392 ; GFX7-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
393 ; GFX7-NEXT: s_mov_b32 s2, 0
394 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
395 ; GFX7-NEXT: s_mov_b32 s0, s2
396 ; GFX7-NEXT: s_mov_b32 s1, s2
397 ; GFX7-NEXT: s_waitcnt vmcnt(0)
398 ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
399 ; GFX7-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
400 ; GFX7-NEXT: s_endpgm
402 ; GFX8-LABEL: raw_buffer_load_v2f32_tfe:
404 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
405 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
406 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
407 ; GFX8-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
408 ; GFX8-NEXT: s_waitcnt vmcnt(0)
409 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
410 ; GFX8-NEXT: flat_store_dword v[2:3], v6
411 ; GFX8-NEXT: s_endpgm
413 ; GFX910-LABEL: raw_buffer_load_v2f32_tfe:
415 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
416 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
417 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
418 ; GFX910-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
419 ; GFX910-NEXT: s_waitcnt vmcnt(0)
420 ; GFX910-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
421 ; GFX910-NEXT: global_store_dword v[2:3], v6, off
422 ; GFX910-NEXT: s_endpgm
424 ; GFX11-LABEL: raw_buffer_load_v2f32_tfe:
426 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
427 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
428 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
429 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
430 ; GFX11-NEXT: buffer_load_b64 v[4:6], off, s[0:3], 0 tfe
431 ; GFX11-NEXT: s_waitcnt vmcnt(0)
432 ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
433 ; GFX11-NEXT: global_store_b32 v[2:3], v6, off
434 ; GFX11-NEXT: s_nop 0
435 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
436 ; GFX11-NEXT: s_endpgm
438 ; GFX12-LABEL: raw_buffer_load_v2f32_tfe:
440 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
441 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
442 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
443 ; GFX12-NEXT: buffer_load_b64 v[4:6], off, s[0:3], null tfe
444 ; GFX12-NEXT: s_wait_loadcnt 0x0
445 ; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
446 ; GFX12-NEXT: global_store_b32 v[2:3], v6, off
447 ; GFX12-NEXT: s_nop 0
448 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
449 ; GFX12-NEXT: s_endpgm
450 %res = call { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
451 %data = extractvalue { <2 x float>, i32 } %res, 0
452 store <2 x float> %data, ptr addrspace(1) %data_addr
453 %tfe = extractvalue { <2 x float>, i32 } %res, 1
454 store i32 %tfe, ptr addrspace(1) %tfe_addr
458 define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
459 ; GFX6-LABEL: raw_buffer_load_v3i32_tfe:
461 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
462 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
463 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
464 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
465 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
466 ; GFX6-NEXT: s_mov_b32 s2, 0
467 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
468 ; GFX6-NEXT: s_mov_b32 s0, s2
469 ; GFX6-NEXT: s_mov_b32 s1, s2
470 ; GFX6-NEXT: s_waitcnt vmcnt(0)
471 ; GFX6-NEXT: buffer_store_dword v6, v[0:1], s[0:3], 0 addr64 offset:8
472 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
473 ; GFX6-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
474 ; GFX6-NEXT: s_endpgm
476 ; GFX7-LABEL: raw_buffer_load_v3i32_tfe:
478 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
479 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
480 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
481 ; GFX7-NEXT: v_mov_b32_e32 v7, v4
482 ; GFX7-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
483 ; GFX7-NEXT: s_mov_b32 s2, 0
484 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
485 ; GFX7-NEXT: s_mov_b32 s0, s2
486 ; GFX7-NEXT: s_mov_b32 s1, s2
487 ; GFX7-NEXT: s_waitcnt vmcnt(0)
488 ; GFX7-NEXT: buffer_store_dwordx3 v[4:6], v[0:1], s[0:3], 0 addr64
489 ; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
490 ; GFX7-NEXT: s_endpgm
492 ; GFX8-LABEL: raw_buffer_load_v3i32_tfe:
494 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
495 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
496 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
497 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
498 ; GFX8-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
499 ; GFX8-NEXT: s_waitcnt vmcnt(0)
500 ; GFX8-NEXT: flat_store_dwordx3 v[0:1], v[4:6]
501 ; GFX8-NEXT: flat_store_dword v[2:3], v7
502 ; GFX8-NEXT: s_endpgm
504 ; GFX910-LABEL: raw_buffer_load_v3i32_tfe:
506 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
507 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
508 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
509 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
510 ; GFX910-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
511 ; GFX910-NEXT: s_waitcnt vmcnt(0)
512 ; GFX910-NEXT: global_store_dwordx3 v[0:1], v[4:6], off
513 ; GFX910-NEXT: global_store_dword v[2:3], v7, off
514 ; GFX910-NEXT: s_endpgm
516 ; GFX11-LABEL: raw_buffer_load_v3i32_tfe:
518 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
519 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
520 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
521 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
522 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
523 ; GFX11-NEXT: buffer_load_b96 v[4:7], off, s[0:3], 0 tfe
524 ; GFX11-NEXT: s_waitcnt vmcnt(0)
525 ; GFX11-NEXT: global_store_b96 v[0:1], v[4:6], off
526 ; GFX11-NEXT: global_store_b32 v[2:3], v7, off
527 ; GFX11-NEXT: s_nop 0
528 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
529 ; GFX11-NEXT: s_endpgm
531 ; GFX12-LABEL: raw_buffer_load_v3i32_tfe:
533 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
534 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
535 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
536 ; GFX12-NEXT: v_mov_b32_e32 v7, v4
537 ; GFX12-NEXT: buffer_load_b96 v[4:7], off, s[0:3], null tfe
538 ; GFX12-NEXT: s_wait_loadcnt 0x0
539 ; GFX12-NEXT: global_store_b96 v[0:1], v[4:6], off
540 ; GFX12-NEXT: global_store_b32 v[2:3], v7, off
541 ; GFX12-NEXT: s_nop 0
542 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
543 ; GFX12-NEXT: s_endpgm
544 %res = call { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
545 %data = extractvalue { <3 x i32>, i32 } %res, 0
546 store <3 x i32> %data, ptr addrspace(1) %data_addr
547 %tfe = extractvalue { <3 x i32>, i32 } %res, 1
548 store i32 %tfe, ptr addrspace(1) %tfe_addr
552 define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
553 ; GFX6-LABEL: raw_buffer_load_v3f32_tfe:
555 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
556 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
557 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
558 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
559 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
560 ; GFX6-NEXT: s_mov_b32 s2, 0
561 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
562 ; GFX6-NEXT: s_mov_b32 s0, s2
563 ; GFX6-NEXT: s_mov_b32 s1, s2
564 ; GFX6-NEXT: s_waitcnt vmcnt(0)
565 ; GFX6-NEXT: buffer_store_dword v6, v[0:1], s[0:3], 0 addr64 offset:8
566 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
567 ; GFX6-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
568 ; GFX6-NEXT: s_endpgm
570 ; GFX7-LABEL: raw_buffer_load_v3f32_tfe:
572 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
573 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
574 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
575 ; GFX7-NEXT: v_mov_b32_e32 v7, v4
576 ; GFX7-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
577 ; GFX7-NEXT: s_mov_b32 s2, 0
578 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
579 ; GFX7-NEXT: s_mov_b32 s0, s2
580 ; GFX7-NEXT: s_mov_b32 s1, s2
581 ; GFX7-NEXT: s_waitcnt vmcnt(0)
582 ; GFX7-NEXT: buffer_store_dwordx3 v[4:6], v[0:1], s[0:3], 0 addr64
583 ; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
584 ; GFX7-NEXT: s_endpgm
586 ; GFX8-LABEL: raw_buffer_load_v3f32_tfe:
588 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
589 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
590 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
591 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
592 ; GFX8-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
593 ; GFX8-NEXT: s_waitcnt vmcnt(0)
594 ; GFX8-NEXT: flat_store_dwordx3 v[0:1], v[4:6]
595 ; GFX8-NEXT: flat_store_dword v[2:3], v7
596 ; GFX8-NEXT: s_endpgm
598 ; GFX910-LABEL: raw_buffer_load_v3f32_tfe:
600 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
601 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
602 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
603 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
604 ; GFX910-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
605 ; GFX910-NEXT: s_waitcnt vmcnt(0)
606 ; GFX910-NEXT: global_store_dwordx3 v[0:1], v[4:6], off
607 ; GFX910-NEXT: global_store_dword v[2:3], v7, off
608 ; GFX910-NEXT: s_endpgm
610 ; GFX11-LABEL: raw_buffer_load_v3f32_tfe:
612 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
613 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
614 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
615 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
616 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
617 ; GFX11-NEXT: buffer_load_b96 v[4:7], off, s[0:3], 0 tfe
618 ; GFX11-NEXT: s_waitcnt vmcnt(0)
619 ; GFX11-NEXT: global_store_b96 v[0:1], v[4:6], off
620 ; GFX11-NEXT: global_store_b32 v[2:3], v7, off
621 ; GFX11-NEXT: s_nop 0
622 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
623 ; GFX11-NEXT: s_endpgm
625 ; GFX12-LABEL: raw_buffer_load_v3f32_tfe:
627 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
628 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
629 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
630 ; GFX12-NEXT: v_mov_b32_e32 v7, v4
631 ; GFX12-NEXT: buffer_load_b96 v[4:7], off, s[0:3], null tfe
632 ; GFX12-NEXT: s_wait_loadcnt 0x0
633 ; GFX12-NEXT: global_store_b96 v[0:1], v[4:6], off
634 ; GFX12-NEXT: global_store_b32 v[2:3], v7, off
635 ; GFX12-NEXT: s_nop 0
636 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
637 ; GFX12-NEXT: s_endpgm
638 %res = call { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
639 %data = extractvalue { <3 x float>, i32 } %res, 0
640 store <3 x float> %data, ptr addrspace(1) %data_addr
641 %tfe = extractvalue { <3 x float>, i32 } %res, 1
642 store i32 %tfe, ptr addrspace(1) %tfe_addr
646 define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
647 ; GFX67-LABEL: raw_buffer_load_v4i32_tfe:
649 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
650 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
651 ; GFX67-NEXT: v_mov_b32_e32 v6, v4
652 ; GFX67-NEXT: v_mov_b32_e32 v7, v4
653 ; GFX67-NEXT: v_mov_b32_e32 v8, v4
654 ; GFX67-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
655 ; GFX67-NEXT: s_mov_b32 s2, 0
656 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
657 ; GFX67-NEXT: s_mov_b32 s0, s2
658 ; GFX67-NEXT: s_mov_b32 s1, s2
659 ; GFX67-NEXT: s_waitcnt vmcnt(0)
660 ; GFX67-NEXT: buffer_store_dwordx4 v[4:7], v[0:1], s[0:3], 0 addr64
661 ; GFX67-NEXT: buffer_store_dword v8, v[2:3], s[0:3], 0 addr64
662 ; GFX67-NEXT: s_endpgm
664 ; GFX8-LABEL: raw_buffer_load_v4i32_tfe:
666 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
667 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
668 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
669 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
670 ; GFX8-NEXT: v_mov_b32_e32 v8, v4
671 ; GFX8-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
672 ; GFX8-NEXT: s_waitcnt vmcnt(0)
673 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
674 ; GFX8-NEXT: flat_store_dword v[2:3], v8
675 ; GFX8-NEXT: s_endpgm
677 ; GFX910-LABEL: raw_buffer_load_v4i32_tfe:
679 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
680 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
681 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
682 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
683 ; GFX910-NEXT: v_mov_b32_e32 v8, v4
684 ; GFX910-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
685 ; GFX910-NEXT: s_waitcnt vmcnt(0)
686 ; GFX910-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
687 ; GFX910-NEXT: global_store_dword v[2:3], v8, off
688 ; GFX910-NEXT: s_endpgm
690 ; GFX11-LABEL: raw_buffer_load_v4i32_tfe:
692 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
693 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
694 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
695 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
696 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
697 ; GFX11-NEXT: v_mov_b32_e32 v8, v4
698 ; GFX11-NEXT: buffer_load_b128 v[4:8], off, s[0:3], 0 tfe
699 ; GFX11-NEXT: s_waitcnt vmcnt(0)
700 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
701 ; GFX11-NEXT: global_store_b32 v[2:3], v8, off
702 ; GFX11-NEXT: s_nop 0
703 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
704 ; GFX11-NEXT: s_endpgm
706 ; GFX12-LABEL: raw_buffer_load_v4i32_tfe:
708 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
709 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
710 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
711 ; GFX12-NEXT: v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
712 ; GFX12-NEXT: buffer_load_b128 v[4:8], off, s[0:3], null tfe
713 ; GFX12-NEXT: s_wait_loadcnt 0x0
714 ; GFX12-NEXT: global_store_b128 v[0:1], v[4:7], off
715 ; GFX12-NEXT: global_store_b32 v[2:3], v8, off
716 ; GFX12-NEXT: s_nop 0
717 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
718 ; GFX12-NEXT: s_endpgm
719 %res = call { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
720 %data = extractvalue { <4 x i32>, i32 } %res, 0
721 store <4 x i32> %data, ptr addrspace(1) %data_addr
722 %tfe = extractvalue { <4 x i32>, i32 } %res, 1
723 store i32 %tfe, ptr addrspace(1) %tfe_addr
727 define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
728 ; GFX67-LABEL: raw_buffer_load_v4f32_tfe:
730 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
731 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
732 ; GFX67-NEXT: v_mov_b32_e32 v6, v4
733 ; GFX67-NEXT: v_mov_b32_e32 v7, v4
734 ; GFX67-NEXT: v_mov_b32_e32 v8, v4
735 ; GFX67-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
736 ; GFX67-NEXT: s_mov_b32 s2, 0
737 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
738 ; GFX67-NEXT: s_mov_b32 s0, s2
739 ; GFX67-NEXT: s_mov_b32 s1, s2
740 ; GFX67-NEXT: s_waitcnt vmcnt(0)
741 ; GFX67-NEXT: buffer_store_dwordx4 v[4:7], v[0:1], s[0:3], 0 addr64
742 ; GFX67-NEXT: buffer_store_dword v8, v[2:3], s[0:3], 0 addr64
743 ; GFX67-NEXT: s_endpgm
745 ; GFX8-LABEL: raw_buffer_load_v4f32_tfe:
747 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
748 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
749 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
750 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
751 ; GFX8-NEXT: v_mov_b32_e32 v8, v4
752 ; GFX8-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
753 ; GFX8-NEXT: s_waitcnt vmcnt(0)
754 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
755 ; GFX8-NEXT: flat_store_dword v[2:3], v8
756 ; GFX8-NEXT: s_endpgm
758 ; GFX910-LABEL: raw_buffer_load_v4f32_tfe:
760 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
761 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
762 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
763 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
764 ; GFX910-NEXT: v_mov_b32_e32 v8, v4
765 ; GFX910-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
766 ; GFX910-NEXT: s_waitcnt vmcnt(0)
767 ; GFX910-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
768 ; GFX910-NEXT: global_store_dword v[2:3], v8, off
769 ; GFX910-NEXT: s_endpgm
771 ; GFX11-LABEL: raw_buffer_load_v4f32_tfe:
773 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
774 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
775 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
776 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
777 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
778 ; GFX11-NEXT: v_mov_b32_e32 v8, v4
779 ; GFX11-NEXT: buffer_load_b128 v[4:8], off, s[0:3], 0 tfe
780 ; GFX11-NEXT: s_waitcnt vmcnt(0)
781 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
782 ; GFX11-NEXT: global_store_b32 v[2:3], v8, off
783 ; GFX11-NEXT: s_nop 0
784 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
785 ; GFX11-NEXT: s_endpgm
787 ; GFX12-LABEL: raw_buffer_load_v4f32_tfe:
789 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
790 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
791 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
792 ; GFX12-NEXT: v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
793 ; GFX12-NEXT: buffer_load_b128 v[4:8], off, s[0:3], null tfe
794 ; GFX12-NEXT: s_wait_loadcnt 0x0
795 ; GFX12-NEXT: global_store_b128 v[0:1], v[4:7], off
796 ; GFX12-NEXT: global_store_b32 v[2:3], v8, off
797 ; GFX12-NEXT: s_nop 0
798 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
799 ; GFX12-NEXT: s_endpgm
800 %res = call { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
801 %data = extractvalue { <4 x float>, i32 } %res, 0
802 store <4 x float> %data, ptr addrspace(1) %data_addr
803 %tfe = extractvalue { <4 x float>, i32 } %res, 1
804 store i32 %tfe, ptr addrspace(1) %tfe_addr
808 declare { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32>, i32, i32, i32)
809 declare { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32>, i32, i32, i32)
810 declare { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32>, i32, i32, i32)
811 declare { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32>, i32, i32, i32)
812 declare { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32>, i32, i32, i32)
813 declare { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32>, i32, i32, i32)
814 declare { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32>, i32, i32, i32)
815 declare { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32>, i32, i32, i32)
816 declare { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32>, i32, i32, i32)
817 declare { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32>, i32, i32, i32)
818 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: