1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mcpu=tahiti -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX6
3 ; RUN: llc -mcpu=hawaii -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX7
4 ; RUN: llc -mcpu=fiji -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX8
5 ; RUN: llc -mcpu=gfx900 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX910,GFX9
6 ; RUN: llc -mcpu=gfx1010 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX910,GFX10
7 ; RUN: llc -mcpu=gfx1100 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11
8 ; RUN: llc -mcpu=gfx1200 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12
10 define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
11 ; GFX67-LABEL: raw_buffer_load_i8_tfe:
13 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
14 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
15 ; GFX67-NEXT: buffer_load_ubyte v[4:5], off, s[0:3], 0 tfe
16 ; GFX67-NEXT: s_mov_b32 s2, 0
17 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
18 ; GFX67-NEXT: s_mov_b32 s0, s2
19 ; GFX67-NEXT: s_mov_b32 s1, s2
20 ; GFX67-NEXT: s_waitcnt vmcnt(0)
21 ; GFX67-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64
22 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
23 ; GFX67-NEXT: s_endpgm
25 ; GFX8-LABEL: raw_buffer_load_i8_tfe:
27 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
28 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
29 ; GFX8-NEXT: buffer_load_ubyte v[4:5], off, s[0:3], 0 tfe
30 ; GFX8-NEXT: s_waitcnt vmcnt(0)
31 ; GFX8-NEXT: flat_store_byte v[0:1], v4
32 ; GFX8-NEXT: flat_store_dword v[2:3], v5
35 ; GFX910-LABEL: raw_buffer_load_i8_tfe:
37 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
38 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
39 ; GFX910-NEXT: buffer_load_ubyte v[4:5], off, s[0:3], 0 tfe
40 ; GFX910-NEXT: s_waitcnt vmcnt(0)
41 ; GFX910-NEXT: global_store_byte v[0:1], v4, off
42 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
43 ; GFX910-NEXT: s_endpgm
45 ; GFX11-LABEL: raw_buffer_load_i8_tfe:
47 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
48 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
49 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
50 ; GFX11-NEXT: buffer_load_u8 v[4:5], off, s[0:3], 0 tfe
51 ; GFX11-NEXT: s_waitcnt vmcnt(0)
52 ; GFX11-NEXT: global_store_b8 v[0:1], v4, off
53 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
54 ; GFX11-NEXT: s_endpgm
56 ; GFX12-LABEL: raw_buffer_load_i8_tfe:
58 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
59 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
60 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
61 ; GFX12-NEXT: buffer_load_u8 v[4:5], off, s[0:3], null tfe
62 ; GFX12-NEXT: s_wait_loadcnt 0x0
63 ; GFX12-NEXT: global_store_b8 v[0:1], v4, off
64 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
65 ; GFX12-NEXT: s_endpgm
66 %res = call { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
67 %data = extractvalue { i8, i32 } %res, 0
68 store i8 %data, ptr addrspace(1) %data_addr
69 %tfe = extractvalue { i8, i32 } %res, 1
70 store i32 %tfe, ptr addrspace(1) %tfe_addr
74 define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
75 ; GFX67-LABEL: raw_buffer_load_i16_tfe:
77 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
78 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
79 ; GFX67-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
80 ; GFX67-NEXT: s_mov_b32 s2, 0
81 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
82 ; GFX67-NEXT: s_mov_b32 s0, s2
83 ; GFX67-NEXT: s_mov_b32 s1, s2
84 ; GFX67-NEXT: s_waitcnt vmcnt(0)
85 ; GFX67-NEXT: buffer_store_short v4, v[0:1], s[0:3], 0 addr64
86 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
87 ; GFX67-NEXT: s_endpgm
89 ; GFX8-LABEL: raw_buffer_load_i16_tfe:
91 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
92 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
93 ; GFX8-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
94 ; GFX8-NEXT: s_waitcnt vmcnt(0)
95 ; GFX8-NEXT: flat_store_short v[0:1], v4
96 ; GFX8-NEXT: flat_store_dword v[2:3], v5
99 ; GFX910-LABEL: raw_buffer_load_i16_tfe:
101 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
102 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
103 ; GFX910-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
104 ; GFX910-NEXT: s_waitcnt vmcnt(0)
105 ; GFX910-NEXT: global_store_short v[0:1], v4, off
106 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
107 ; GFX910-NEXT: s_endpgm
109 ; GFX11-LABEL: raw_buffer_load_i16_tfe:
111 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
112 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
113 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
114 ; GFX11-NEXT: buffer_load_u16 v[4:5], off, s[0:3], 0 tfe
115 ; GFX11-NEXT: s_waitcnt vmcnt(0)
116 ; GFX11-NEXT: global_store_b16 v[0:1], v4, off
117 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
118 ; GFX11-NEXT: s_endpgm
120 ; GFX12-LABEL: raw_buffer_load_i16_tfe:
122 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
123 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
124 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
125 ; GFX12-NEXT: buffer_load_u16 v[4:5], off, s[0:3], null tfe
126 ; GFX12-NEXT: s_wait_loadcnt 0x0
127 ; GFX12-NEXT: global_store_b16 v[0:1], v4, off
128 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
129 ; GFX12-NEXT: s_endpgm
130 %res = call { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
131 %data = extractvalue { i16, i32 } %res, 0
132 store i16 %data, ptr addrspace(1) %data_addr
133 %tfe = extractvalue { i16, i32 } %res, 1
134 store i32 %tfe, ptr addrspace(1) %tfe_addr
138 define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
139 ; GFX67-LABEL: raw_buffer_load_f16_tfe:
141 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
142 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
143 ; GFX67-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
144 ; GFX67-NEXT: s_mov_b32 s2, 0
145 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
146 ; GFX67-NEXT: s_mov_b32 s0, s2
147 ; GFX67-NEXT: s_mov_b32 s1, s2
148 ; GFX67-NEXT: s_waitcnt vmcnt(0)
149 ; GFX67-NEXT: buffer_store_short v4, v[0:1], s[0:3], 0 addr64
150 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
151 ; GFX67-NEXT: s_endpgm
153 ; GFX8-LABEL: raw_buffer_load_f16_tfe:
155 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
156 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
157 ; GFX8-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
158 ; GFX8-NEXT: s_waitcnt vmcnt(0)
159 ; GFX8-NEXT: flat_store_short v[0:1], v4
160 ; GFX8-NEXT: flat_store_dword v[2:3], v5
161 ; GFX8-NEXT: s_endpgm
163 ; GFX910-LABEL: raw_buffer_load_f16_tfe:
165 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
166 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
167 ; GFX910-NEXT: buffer_load_ushort v[4:5], off, s[0:3], 0 tfe
168 ; GFX910-NEXT: s_waitcnt vmcnt(0)
169 ; GFX910-NEXT: global_store_short v[0:1], v4, off
170 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
171 ; GFX910-NEXT: s_endpgm
173 ; GFX11-LABEL: raw_buffer_load_f16_tfe:
175 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
176 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
177 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
178 ; GFX11-NEXT: buffer_load_u16 v[4:5], off, s[0:3], 0 tfe
179 ; GFX11-NEXT: s_waitcnt vmcnt(0)
180 ; GFX11-NEXT: global_store_b16 v[0:1], v4, off
181 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
182 ; GFX11-NEXT: s_endpgm
184 ; GFX12-LABEL: raw_buffer_load_f16_tfe:
186 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
187 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
188 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
189 ; GFX12-NEXT: buffer_load_u16 v[4:5], off, s[0:3], null tfe
190 ; GFX12-NEXT: s_wait_loadcnt 0x0
191 ; GFX12-NEXT: global_store_b16 v[0:1], v4, off
192 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
193 ; GFX12-NEXT: s_endpgm
194 %res = call { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
195 %data = extractvalue { half, i32 } %res, 0
196 store half %data, ptr addrspace(1) %data_addr
197 %tfe = extractvalue { half, i32 } %res, 1
198 store i32 %tfe, ptr addrspace(1) %tfe_addr
202 define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
203 ; GFX67-LABEL: raw_buffer_load_i32_tfe:
205 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
206 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
207 ; GFX67-NEXT: buffer_load_dword v[4:5], off, s[0:3], 0 tfe
208 ; GFX67-NEXT: s_mov_b32 s2, 0
209 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
210 ; GFX67-NEXT: s_mov_b32 s0, s2
211 ; GFX67-NEXT: s_mov_b32 s1, s2
212 ; GFX67-NEXT: s_waitcnt vmcnt(0)
213 ; GFX67-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64
214 ; GFX67-NEXT: buffer_store_dword v5, v[2:3], s[0:3], 0 addr64
215 ; GFX67-NEXT: s_endpgm
217 ; GFX8-LABEL: raw_buffer_load_i32_tfe:
219 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
220 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
221 ; GFX8-NEXT: buffer_load_dword v[4:5], off, s[0:3], 0 tfe
222 ; GFX8-NEXT: s_waitcnt vmcnt(0)
223 ; GFX8-NEXT: flat_store_dword v[0:1], v4
224 ; GFX8-NEXT: flat_store_dword v[2:3], v5
225 ; GFX8-NEXT: s_endpgm
227 ; GFX910-LABEL: raw_buffer_load_i32_tfe:
229 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
230 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
231 ; GFX910-NEXT: buffer_load_dword v[4:5], off, s[0:3], 0 tfe
232 ; GFX910-NEXT: s_waitcnt vmcnt(0)
233 ; GFX910-NEXT: global_store_dword v[0:1], v4, off
234 ; GFX910-NEXT: global_store_dword v[2:3], v5, off
235 ; GFX910-NEXT: s_endpgm
237 ; GFX11-LABEL: raw_buffer_load_i32_tfe:
239 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
240 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
241 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
242 ; GFX11-NEXT: buffer_load_b32 v[4:5], off, s[0:3], 0 tfe
243 ; GFX11-NEXT: s_waitcnt vmcnt(0)
244 ; GFX11-NEXT: global_store_b32 v[0:1], v4, off
245 ; GFX11-NEXT: global_store_b32 v[2:3], v5, off
246 ; GFX11-NEXT: s_endpgm
248 ; GFX12-LABEL: raw_buffer_load_i32_tfe:
250 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
251 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
252 ; GFX12-NEXT: v_mov_b32_e32 v5, v4
253 ; GFX12-NEXT: buffer_load_b32 v[4:5], off, s[0:3], null tfe
254 ; GFX12-NEXT: s_wait_loadcnt 0x0
255 ; GFX12-NEXT: global_store_b32 v[0:1], v4, off
256 ; GFX12-NEXT: global_store_b32 v[2:3], v5, off
257 ; GFX12-NEXT: s_endpgm
258 %res = call { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
259 %data = extractvalue { i32, i32 } %res, 0
260 store i32 %data, ptr addrspace(1) %data_addr
261 %tfe = extractvalue { i32, i32 } %res, 1
262 store i32 %tfe, ptr addrspace(1) %tfe_addr
266 define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
267 ; GFX6-LABEL: raw_buffer_load_v2i32_tfe:
269 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
270 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
271 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
272 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
273 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
274 ; GFX6-NEXT: s_mov_b32 s2, 0
275 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
276 ; GFX6-NEXT: s_mov_b32 s0, s2
277 ; GFX6-NEXT: s_mov_b32 s1, s2
278 ; GFX6-NEXT: s_waitcnt vmcnt(0)
279 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
280 ; GFX6-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
281 ; GFX6-NEXT: s_endpgm
283 ; GFX7-LABEL: raw_buffer_load_v2i32_tfe:
285 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
286 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
287 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
288 ; GFX7-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
289 ; GFX7-NEXT: s_mov_b32 s2, 0
290 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
291 ; GFX7-NEXT: s_mov_b32 s0, s2
292 ; GFX7-NEXT: s_mov_b32 s1, s2
293 ; GFX7-NEXT: s_waitcnt vmcnt(0)
294 ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
295 ; GFX7-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
296 ; GFX7-NEXT: s_endpgm
298 ; GFX8-LABEL: raw_buffer_load_v2i32_tfe:
300 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
301 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
302 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
303 ; GFX8-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
304 ; GFX8-NEXT: s_waitcnt vmcnt(0)
305 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
306 ; GFX8-NEXT: flat_store_dword v[2:3], v6
307 ; GFX8-NEXT: s_endpgm
309 ; GFX910-LABEL: raw_buffer_load_v2i32_tfe:
311 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
312 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
313 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
314 ; GFX910-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
315 ; GFX910-NEXT: s_waitcnt vmcnt(0)
316 ; GFX910-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
317 ; GFX910-NEXT: global_store_dword v[2:3], v6, off
318 ; GFX910-NEXT: s_endpgm
320 ; GFX11-LABEL: raw_buffer_load_v2i32_tfe:
322 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
323 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
324 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
325 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
326 ; GFX11-NEXT: buffer_load_b64 v[4:6], off, s[0:3], 0 tfe
327 ; GFX11-NEXT: s_waitcnt vmcnt(0)
328 ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
329 ; GFX11-NEXT: global_store_b32 v[2:3], v6, off
330 ; GFX11-NEXT: s_endpgm
332 ; GFX12-LABEL: raw_buffer_load_v2i32_tfe:
334 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
335 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
336 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
337 ; GFX12-NEXT: buffer_load_b64 v[4:6], off, s[0:3], null tfe
338 ; GFX12-NEXT: s_wait_loadcnt 0x0
339 ; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
340 ; GFX12-NEXT: global_store_b32 v[2:3], v6, off
341 ; GFX12-NEXT: s_endpgm
342 %res = call { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
343 %data = extractvalue { <2 x i32>, i32 } %res, 0
344 store <2 x i32> %data, ptr addrspace(1) %data_addr
345 %tfe = extractvalue { <2 x i32>, i32 } %res, 1
346 store i32 %tfe, ptr addrspace(1) %tfe_addr
350 define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
351 ; GFX6-LABEL: raw_buffer_load_v2f32_tfe:
353 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
354 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
355 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
356 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
357 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
358 ; GFX6-NEXT: s_mov_b32 s2, 0
359 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
360 ; GFX6-NEXT: s_mov_b32 s0, s2
361 ; GFX6-NEXT: s_mov_b32 s1, s2
362 ; GFX6-NEXT: s_waitcnt vmcnt(0)
363 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
364 ; GFX6-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
365 ; GFX6-NEXT: s_endpgm
367 ; GFX7-LABEL: raw_buffer_load_v2f32_tfe:
369 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
370 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
371 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
372 ; GFX7-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
373 ; GFX7-NEXT: s_mov_b32 s2, 0
374 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
375 ; GFX7-NEXT: s_mov_b32 s0, s2
376 ; GFX7-NEXT: s_mov_b32 s1, s2
377 ; GFX7-NEXT: s_waitcnt vmcnt(0)
378 ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
379 ; GFX7-NEXT: buffer_store_dword v6, v[2:3], s[0:3], 0 addr64
380 ; GFX7-NEXT: s_endpgm
382 ; GFX8-LABEL: raw_buffer_load_v2f32_tfe:
384 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
385 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
386 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
387 ; GFX8-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
388 ; GFX8-NEXT: s_waitcnt vmcnt(0)
389 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
390 ; GFX8-NEXT: flat_store_dword v[2:3], v6
391 ; GFX8-NEXT: s_endpgm
393 ; GFX910-LABEL: raw_buffer_load_v2f32_tfe:
395 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
396 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
397 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
398 ; GFX910-NEXT: buffer_load_dwordx2 v[4:6], off, s[0:3], 0 tfe
399 ; GFX910-NEXT: s_waitcnt vmcnt(0)
400 ; GFX910-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
401 ; GFX910-NEXT: global_store_dword v[2:3], v6, off
402 ; GFX910-NEXT: s_endpgm
404 ; GFX11-LABEL: raw_buffer_load_v2f32_tfe:
406 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
407 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
408 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
409 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
410 ; GFX11-NEXT: buffer_load_b64 v[4:6], off, s[0:3], 0 tfe
411 ; GFX11-NEXT: s_waitcnt vmcnt(0)
412 ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
413 ; GFX11-NEXT: global_store_b32 v[2:3], v6, off
414 ; GFX11-NEXT: s_endpgm
416 ; GFX12-LABEL: raw_buffer_load_v2f32_tfe:
418 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
419 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
420 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
421 ; GFX12-NEXT: buffer_load_b64 v[4:6], off, s[0:3], null tfe
422 ; GFX12-NEXT: s_wait_loadcnt 0x0
423 ; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
424 ; GFX12-NEXT: global_store_b32 v[2:3], v6, off
425 ; GFX12-NEXT: s_endpgm
426 %res = call { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
427 %data = extractvalue { <2 x float>, i32 } %res, 0
428 store <2 x float> %data, ptr addrspace(1) %data_addr
429 %tfe = extractvalue { <2 x float>, i32 } %res, 1
430 store i32 %tfe, ptr addrspace(1) %tfe_addr
434 define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
435 ; GFX6-LABEL: raw_buffer_load_v3i32_tfe:
437 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
438 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
439 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
440 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
441 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
442 ; GFX6-NEXT: s_mov_b32 s2, 0
443 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
444 ; GFX6-NEXT: s_mov_b32 s0, s2
445 ; GFX6-NEXT: s_mov_b32 s1, s2
446 ; GFX6-NEXT: s_waitcnt vmcnt(0)
447 ; GFX6-NEXT: buffer_store_dword v6, v[0:1], s[0:3], 0 addr64 offset:8
448 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
449 ; GFX6-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
450 ; GFX6-NEXT: s_endpgm
452 ; GFX7-LABEL: raw_buffer_load_v3i32_tfe:
454 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
455 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
456 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
457 ; GFX7-NEXT: v_mov_b32_e32 v7, v4
458 ; GFX7-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
459 ; GFX7-NEXT: s_mov_b32 s2, 0
460 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
461 ; GFX7-NEXT: s_mov_b32 s0, s2
462 ; GFX7-NEXT: s_mov_b32 s1, s2
463 ; GFX7-NEXT: s_waitcnt vmcnt(0)
464 ; GFX7-NEXT: buffer_store_dwordx3 v[4:6], v[0:1], s[0:3], 0 addr64
465 ; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
466 ; GFX7-NEXT: s_endpgm
468 ; GFX8-LABEL: raw_buffer_load_v3i32_tfe:
470 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
471 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
472 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
473 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
474 ; GFX8-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
475 ; GFX8-NEXT: s_waitcnt vmcnt(0)
476 ; GFX8-NEXT: flat_store_dwordx3 v[0:1], v[4:6]
477 ; GFX8-NEXT: flat_store_dword v[2:3], v7
478 ; GFX8-NEXT: s_endpgm
480 ; GFX910-LABEL: raw_buffer_load_v3i32_tfe:
482 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
483 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
484 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
485 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
486 ; GFX910-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
487 ; GFX910-NEXT: s_waitcnt vmcnt(0)
488 ; GFX910-NEXT: global_store_dwordx3 v[0:1], v[4:6], off
489 ; GFX910-NEXT: global_store_dword v[2:3], v7, off
490 ; GFX910-NEXT: s_endpgm
492 ; GFX11-LABEL: raw_buffer_load_v3i32_tfe:
494 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
495 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
496 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
497 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
498 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
499 ; GFX11-NEXT: buffer_load_b96 v[4:7], off, s[0:3], 0 tfe
500 ; GFX11-NEXT: s_waitcnt vmcnt(0)
501 ; GFX11-NEXT: global_store_b96 v[0:1], v[4:6], off
502 ; GFX11-NEXT: global_store_b32 v[2:3], v7, off
503 ; GFX11-NEXT: s_endpgm
505 ; GFX12-LABEL: raw_buffer_load_v3i32_tfe:
507 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
508 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
509 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
510 ; GFX12-NEXT: v_mov_b32_e32 v7, v4
511 ; GFX12-NEXT: buffer_load_b96 v[4:7], off, s[0:3], null tfe
512 ; GFX12-NEXT: s_wait_loadcnt 0x0
513 ; GFX12-NEXT: global_store_b96 v[0:1], v[4:6], off
514 ; GFX12-NEXT: global_store_b32 v[2:3], v7, off
515 ; GFX12-NEXT: s_endpgm
516 %res = call { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
517 %data = extractvalue { <3 x i32>, i32 } %res, 0
518 store <3 x i32> %data, ptr addrspace(1) %data_addr
519 %tfe = extractvalue { <3 x i32>, i32 } %res, 1
520 store i32 %tfe, ptr addrspace(1) %tfe_addr
524 define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
525 ; GFX6-LABEL: raw_buffer_load_v3f32_tfe:
527 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
528 ; GFX6-NEXT: v_mov_b32_e32 v5, v4
529 ; GFX6-NEXT: v_mov_b32_e32 v6, v4
530 ; GFX6-NEXT: v_mov_b32_e32 v7, v4
531 ; GFX6-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
532 ; GFX6-NEXT: s_mov_b32 s2, 0
533 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
534 ; GFX6-NEXT: s_mov_b32 s0, s2
535 ; GFX6-NEXT: s_mov_b32 s1, s2
536 ; GFX6-NEXT: s_waitcnt vmcnt(0)
537 ; GFX6-NEXT: buffer_store_dword v6, v[0:1], s[0:3], 0 addr64 offset:8
538 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64
539 ; GFX6-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
540 ; GFX6-NEXT: s_endpgm
542 ; GFX7-LABEL: raw_buffer_load_v3f32_tfe:
544 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
545 ; GFX7-NEXT: v_mov_b32_e32 v5, v4
546 ; GFX7-NEXT: v_mov_b32_e32 v6, v4
547 ; GFX7-NEXT: v_mov_b32_e32 v7, v4
548 ; GFX7-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
549 ; GFX7-NEXT: s_mov_b32 s2, 0
550 ; GFX7-NEXT: s_mov_b32 s3, 0xf000
551 ; GFX7-NEXT: s_mov_b32 s0, s2
552 ; GFX7-NEXT: s_mov_b32 s1, s2
553 ; GFX7-NEXT: s_waitcnt vmcnt(0)
554 ; GFX7-NEXT: buffer_store_dwordx3 v[4:6], v[0:1], s[0:3], 0 addr64
555 ; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[0:3], 0 addr64
556 ; GFX7-NEXT: s_endpgm
558 ; GFX8-LABEL: raw_buffer_load_v3f32_tfe:
560 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
561 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
562 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
563 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
564 ; GFX8-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
565 ; GFX8-NEXT: s_waitcnt vmcnt(0)
566 ; GFX8-NEXT: flat_store_dwordx3 v[0:1], v[4:6]
567 ; GFX8-NEXT: flat_store_dword v[2:3], v7
568 ; GFX8-NEXT: s_endpgm
570 ; GFX910-LABEL: raw_buffer_load_v3f32_tfe:
572 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
573 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
574 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
575 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
576 ; GFX910-NEXT: buffer_load_dwordx3 v[4:7], off, s[0:3], 0 tfe
577 ; GFX910-NEXT: s_waitcnt vmcnt(0)
578 ; GFX910-NEXT: global_store_dwordx3 v[0:1], v[4:6], off
579 ; GFX910-NEXT: global_store_dword v[2:3], v7, off
580 ; GFX910-NEXT: s_endpgm
582 ; GFX11-LABEL: raw_buffer_load_v3f32_tfe:
584 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
585 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
586 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
587 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
588 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
589 ; GFX11-NEXT: buffer_load_b96 v[4:7], off, s[0:3], 0 tfe
590 ; GFX11-NEXT: s_waitcnt vmcnt(0)
591 ; GFX11-NEXT: global_store_b96 v[0:1], v[4:6], off
592 ; GFX11-NEXT: global_store_b32 v[2:3], v7, off
593 ; GFX11-NEXT: s_endpgm
595 ; GFX12-LABEL: raw_buffer_load_v3f32_tfe:
597 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
598 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
599 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
600 ; GFX12-NEXT: v_mov_b32_e32 v7, v4
601 ; GFX12-NEXT: buffer_load_b96 v[4:7], off, s[0:3], null tfe
602 ; GFX12-NEXT: s_wait_loadcnt 0x0
603 ; GFX12-NEXT: global_store_b96 v[0:1], v[4:6], off
604 ; GFX12-NEXT: global_store_b32 v[2:3], v7, off
605 ; GFX12-NEXT: s_endpgm
606 %res = call { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
607 %data = extractvalue { <3 x float>, i32 } %res, 0
608 store <3 x float> %data, ptr addrspace(1) %data_addr
609 %tfe = extractvalue { <3 x float>, i32 } %res, 1
610 store i32 %tfe, ptr addrspace(1) %tfe_addr
614 define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
615 ; GFX67-LABEL: raw_buffer_load_v4i32_tfe:
617 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
618 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
619 ; GFX67-NEXT: v_mov_b32_e32 v6, v4
620 ; GFX67-NEXT: v_mov_b32_e32 v7, v4
621 ; GFX67-NEXT: v_mov_b32_e32 v8, v4
622 ; GFX67-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
623 ; GFX67-NEXT: s_mov_b32 s2, 0
624 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
625 ; GFX67-NEXT: s_mov_b32 s0, s2
626 ; GFX67-NEXT: s_mov_b32 s1, s2
627 ; GFX67-NEXT: s_waitcnt vmcnt(0)
628 ; GFX67-NEXT: buffer_store_dwordx4 v[4:7], v[0:1], s[0:3], 0 addr64
629 ; GFX67-NEXT: buffer_store_dword v8, v[2:3], s[0:3], 0 addr64
630 ; GFX67-NEXT: s_endpgm
632 ; GFX8-LABEL: raw_buffer_load_v4i32_tfe:
634 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
635 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
636 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
637 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
638 ; GFX8-NEXT: v_mov_b32_e32 v8, v4
639 ; GFX8-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
640 ; GFX8-NEXT: s_waitcnt vmcnt(0)
641 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
642 ; GFX8-NEXT: flat_store_dword v[2:3], v8
643 ; GFX8-NEXT: s_endpgm
645 ; GFX910-LABEL: raw_buffer_load_v4i32_tfe:
647 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
648 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
649 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
650 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
651 ; GFX910-NEXT: v_mov_b32_e32 v8, v4
652 ; GFX910-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
653 ; GFX910-NEXT: s_waitcnt vmcnt(0)
654 ; GFX910-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
655 ; GFX910-NEXT: global_store_dword v[2:3], v8, off
656 ; GFX910-NEXT: s_endpgm
658 ; GFX11-LABEL: raw_buffer_load_v4i32_tfe:
660 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
661 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
662 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
663 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
664 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
665 ; GFX11-NEXT: v_mov_b32_e32 v8, v4
666 ; GFX11-NEXT: buffer_load_b128 v[4:8], off, s[0:3], 0 tfe
667 ; GFX11-NEXT: s_waitcnt vmcnt(0)
668 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
669 ; GFX11-NEXT: global_store_b32 v[2:3], v8, off
670 ; GFX11-NEXT: s_endpgm
672 ; GFX12-LABEL: raw_buffer_load_v4i32_tfe:
674 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
675 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
676 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
677 ; GFX12-NEXT: v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
678 ; GFX12-NEXT: buffer_load_b128 v[4:8], off, s[0:3], null tfe
679 ; GFX12-NEXT: s_wait_loadcnt 0x0
680 ; GFX12-NEXT: global_store_b128 v[0:1], v[4:7], off
681 ; GFX12-NEXT: global_store_b32 v[2:3], v8, off
682 ; GFX12-NEXT: s_endpgm
683 %res = call { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
684 %data = extractvalue { <4 x i32>, i32 } %res, 0
685 store <4 x i32> %data, ptr addrspace(1) %data_addr
686 %tfe = extractvalue { <4 x i32>, i32 } %res, 1
687 store i32 %tfe, ptr addrspace(1) %tfe_addr
691 define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
692 ; GFX67-LABEL: raw_buffer_load_v4f32_tfe:
694 ; GFX67-NEXT: v_mov_b32_e32 v4, 0
695 ; GFX67-NEXT: v_mov_b32_e32 v5, v4
696 ; GFX67-NEXT: v_mov_b32_e32 v6, v4
697 ; GFX67-NEXT: v_mov_b32_e32 v7, v4
698 ; GFX67-NEXT: v_mov_b32_e32 v8, v4
699 ; GFX67-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
700 ; GFX67-NEXT: s_mov_b32 s2, 0
701 ; GFX67-NEXT: s_mov_b32 s3, 0xf000
702 ; GFX67-NEXT: s_mov_b32 s0, s2
703 ; GFX67-NEXT: s_mov_b32 s1, s2
704 ; GFX67-NEXT: s_waitcnt vmcnt(0)
705 ; GFX67-NEXT: buffer_store_dwordx4 v[4:7], v[0:1], s[0:3], 0 addr64
706 ; GFX67-NEXT: buffer_store_dword v8, v[2:3], s[0:3], 0 addr64
707 ; GFX67-NEXT: s_endpgm
709 ; GFX8-LABEL: raw_buffer_load_v4f32_tfe:
711 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
712 ; GFX8-NEXT: v_mov_b32_e32 v5, v4
713 ; GFX8-NEXT: v_mov_b32_e32 v6, v4
714 ; GFX8-NEXT: v_mov_b32_e32 v7, v4
715 ; GFX8-NEXT: v_mov_b32_e32 v8, v4
716 ; GFX8-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
717 ; GFX8-NEXT: s_waitcnt vmcnt(0)
718 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
719 ; GFX8-NEXT: flat_store_dword v[2:3], v8
720 ; GFX8-NEXT: s_endpgm
722 ; GFX910-LABEL: raw_buffer_load_v4f32_tfe:
724 ; GFX910-NEXT: v_mov_b32_e32 v4, 0
725 ; GFX910-NEXT: v_mov_b32_e32 v5, v4
726 ; GFX910-NEXT: v_mov_b32_e32 v6, v4
727 ; GFX910-NEXT: v_mov_b32_e32 v7, v4
728 ; GFX910-NEXT: v_mov_b32_e32 v8, v4
729 ; GFX910-NEXT: buffer_load_dwordx4 v[4:8], off, s[0:3], 0 tfe
730 ; GFX910-NEXT: s_waitcnt vmcnt(0)
731 ; GFX910-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
732 ; GFX910-NEXT: global_store_dword v[2:3], v8, off
733 ; GFX910-NEXT: s_endpgm
735 ; GFX11-LABEL: raw_buffer_load_v4f32_tfe:
737 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
738 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
739 ; GFX11-NEXT: v_mov_b32_e32 v5, v4
740 ; GFX11-NEXT: v_mov_b32_e32 v6, v4
741 ; GFX11-NEXT: v_mov_b32_e32 v7, v4
742 ; GFX11-NEXT: v_mov_b32_e32 v8, v4
743 ; GFX11-NEXT: buffer_load_b128 v[4:8], off, s[0:3], 0 tfe
744 ; GFX11-NEXT: s_waitcnt vmcnt(0)
745 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
746 ; GFX11-NEXT: global_store_b32 v[2:3], v8, off
747 ; GFX11-NEXT: s_endpgm
749 ; GFX12-LABEL: raw_buffer_load_v4f32_tfe:
751 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
752 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
753 ; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
754 ; GFX12-NEXT: v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
755 ; GFX12-NEXT: buffer_load_b128 v[4:8], off, s[0:3], null tfe
756 ; GFX12-NEXT: s_wait_loadcnt 0x0
757 ; GFX12-NEXT: global_store_b128 v[0:1], v[4:7], off
758 ; GFX12-NEXT: global_store_b32 v[2:3], v8, off
759 ; GFX12-NEXT: s_endpgm
760 %res = call { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
761 %data = extractvalue { <4 x float>, i32 } %res, 0
762 store <4 x float> %data, ptr addrspace(1) %data_addr
763 %tfe = extractvalue { <4 x float>, i32 } %res, 1
764 store i32 %tfe, ptr addrspace(1) %tfe_addr
768 declare { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32>, i32, i32, i32)
769 declare { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32>, i32, i32, i32)
770 declare { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32>, i32, i32, i32)
771 declare { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32>, i32, i32, i32)
772 declare { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32>, i32, i32, i32)
773 declare { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32>, i32, i32, i32)
774 declare { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32>, i32, i32, i32)
775 declare { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32>, i32, i32, i32)
776 declare { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32>, i32, i32, i32)
777 declare { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32>, i32, i32, i32)
778 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: