1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GFX68,VERDE %s
3 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GFX68,GFX8 %s
4 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GFX11 %s
6 define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
7 ; GFX68-LABEL: buffer_store:
8 ; GFX68: ; %bb.0: ; %main_body
9 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
10 ; GFX68-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
11 ; GFX68-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
12 ; GFX68-NEXT: s_endpgm
14 ; GFX11-LABEL: buffer_store:
15 ; GFX11: ; %bb.0: ; %main_body
16 ; GFX11-NEXT: s_clause 0x2
17 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
18 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 glc
19 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 slc
21 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
22 ; GFX11-NEXT: s_endpgm
24 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
25 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
26 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 2)
30 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
31 ; GFX68-LABEL: buffer_store_immoffs:
32 ; GFX68: ; %bb.0: ; %main_body
33 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
34 ; GFX68-NEXT: s_endpgm
36 ; GFX11-LABEL: buffer_store_immoffs:
37 ; GFX11: ; %bb.0: ; %main_body
38 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:42
40 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
41 ; GFX11-NEXT: s_endpgm
43 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 42, i32 0, i32 0)
47 define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
48 ; GFX68-LABEL: buffer_store_ofs:
49 ; GFX68: ; %bb.0: ; %main_body
50 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
51 ; GFX68-NEXT: s_endpgm
53 ; GFX11-LABEL: buffer_store_ofs:
54 ; GFX11: ; %bb.0: ; %main_body
55 ; GFX11-NEXT: buffer_store_b128 v[0:3], v4, s[0:3], 0 offen
57 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
58 ; GFX11-NEXT: s_endpgm
60 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0)
64 ; Ideally, the register allocator would avoid the wait here
65 define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
66 ; VERDE-LABEL: buffer_store_wait:
67 ; VERDE: ; %bb.0: ; %main_body
68 ; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
69 ; VERDE-NEXT: s_waitcnt expcnt(0)
70 ; VERDE-NEXT: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen
71 ; VERDE-NEXT: s_waitcnt vmcnt(0)
72 ; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen
73 ; VERDE-NEXT: s_endpgm
75 ; GFX8-LABEL: buffer_store_wait:
76 ; GFX8: ; %bb.0: ; %main_body
77 ; GFX8-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
78 ; GFX8-NEXT: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen
79 ; GFX8-NEXT: s_waitcnt vmcnt(0)
80 ; GFX8-NEXT: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen
83 ; GFX11-LABEL: buffer_store_wait:
84 ; GFX11: ; %bb.0: ; %main_body
85 ; GFX11-NEXT: buffer_store_b128 v[0:3], v4, s[0:3], 0 offen
86 ; GFX11-NEXT: buffer_load_b128 v[0:3], v5, s[0:3], 0 offen
87 ; GFX11-NEXT: s_waitcnt vmcnt(0)
88 ; GFX11-NEXT: buffer_store_b128 v[0:3], v6, s[0:3], 0 offen
90 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
91 ; GFX11-NEXT: s_endpgm
93 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0)
94 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %3, i32 0, i32 0)
95 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i32 0)
99 define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %offset) {
100 ; GFX68-LABEL: buffer_store_x1:
101 ; GFX68: ; %bb.0: ; %main_body
102 ; GFX68-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
103 ; GFX68-NEXT: s_endpgm
105 ; GFX11-LABEL: buffer_store_x1:
106 ; GFX11: ; %bb.0: ; %main_body
107 ; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 offen
108 ; GFX11-NEXT: s_nop 0
109 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
110 ; GFX11-NEXT: s_endpgm
112 call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
116 define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %offset) #0 {
117 ; GFX68-LABEL: buffer_store_x2:
118 ; GFX68: ; %bb.0: ; %main_body
119 ; GFX68-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
120 ; GFX68-NEXT: s_endpgm
122 ; GFX11-LABEL: buffer_store_x2:
123 ; GFX11: ; %bb.0: ; %main_body
124 ; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 offen
125 ; GFX11-NEXT: s_nop 0
126 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
127 ; GFX11-NEXT: s_endpgm
129 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
133 define amdgpu_ps void @buffer_store_x1_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
134 ; GFX68-LABEL: buffer_store_x1_offen_merged_and:
136 ; GFX68-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
137 ; GFX68-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
138 ; GFX68-NEXT: s_endpgm
140 ; GFX11-LABEL: buffer_store_x1_offen_merged_and:
142 ; GFX11-NEXT: s_clause 0x1
143 ; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
144 ; GFX11-NEXT: buffer_store_b64 v[5:6], v0, s[0:3], 0 offen offset:28
145 ; GFX11-NEXT: s_nop 0
146 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
147 ; GFX11-NEXT: s_endpgm
154 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
155 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
156 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
157 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
158 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
159 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
163 define amdgpu_ps void @buffer_store_x1_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
164 ; GFX68-LABEL: buffer_store_x1_offen_merged_or:
166 ; GFX68-NEXT: v_lshlrev_b32_e32 v0, 6, v0
167 ; GFX68-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
168 ; GFX68-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
169 ; GFX68-NEXT: s_endpgm
171 ; GFX11-LABEL: buffer_store_x1_offen_merged_or:
173 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 6, v0
174 ; GFX11-NEXT: s_clause 0x1
175 ; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
176 ; GFX11-NEXT: buffer_store_b64 v[5:6], v0, s[0:3], 0 offen offset:28
177 ; GFX11-NEXT: s_nop 0
178 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
179 ; GFX11-NEXT: s_endpgm
187 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
188 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
189 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
190 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
191 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
192 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
196 define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
197 ; GFX68-LABEL: buffer_store_x1_offen_merged_glc_slc:
199 ; GFX68-NEXT: buffer_store_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
200 ; GFX68-NEXT: buffer_store_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
201 ; GFX68-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
202 ; GFX68-NEXT: s_endpgm
204 ; GFX11-LABEL: buffer_store_x1_offen_merged_glc_slc:
206 ; GFX11-NEXT: s_clause 0x2
207 ; GFX11-NEXT: buffer_store_b64 v[1:2], v0, s[0:3], 0 offen offset:4
208 ; GFX11-NEXT: buffer_store_b64 v[3:4], v0, s[0:3], 0 offen offset:12 glc
209 ; GFX11-NEXT: buffer_store_b64 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
210 ; GFX11-NEXT: s_nop 0
211 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
212 ; GFX11-NEXT: s_endpgm
219 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
220 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
221 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 1)
222 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 1)
223 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 3)
224 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 3)
228 define amdgpu_ps void @buffer_store_x2_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
229 ; GFX68-LABEL: buffer_store_x2_offen_merged_and:
231 ; GFX68-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
232 ; GFX68-NEXT: s_endpgm
234 ; GFX11-LABEL: buffer_store_x2_offen_merged_and:
236 ; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
237 ; GFX11-NEXT: s_nop 0
238 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
239 ; GFX11-NEXT: s_endpgm
242 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
243 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
247 define amdgpu_ps void @buffer_store_x2_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, <2 x float> %v1, <2 x float> %v2) {
248 ; GFX68-LABEL: buffer_store_x2_offen_merged_or:
250 ; GFX68-NEXT: v_lshlrev_b32_e32 v0, 4, v0
251 ; GFX68-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
252 ; GFX68-NEXT: s_endpgm
254 ; GFX11-LABEL: buffer_store_x2_offen_merged_or:
256 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
257 ; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
258 ; GFX11-NEXT: s_nop 0
259 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
260 ; GFX11-NEXT: s_endpgm
264 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
265 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
269 define amdgpu_ps void @buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
270 ; GFX68-LABEL: buffer_store_x1_offset_merged:
272 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
273 ; GFX68-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28
274 ; GFX68-NEXT: s_endpgm
276 ; GFX11-LABEL: buffer_store_x1_offset_merged:
278 ; GFX11-NEXT: s_clause 0x1
279 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:4
280 ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 offset:28
281 ; GFX11-NEXT: s_nop 0
282 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
283 ; GFX11-NEXT: s_endpgm
284 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
285 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0)
286 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
287 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0)
288 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0)
289 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0)
293 define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x float> %v1,<2 x float> %v2) {
294 ; GFX68-LABEL: buffer_store_x2_offset_merged:
296 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
297 ; GFX68-NEXT: s_endpgm
299 ; GFX11-LABEL: buffer_store_x2_offset_merged:
301 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:4
302 ; GFX11-NEXT: s_nop 0
303 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
304 ; GFX11-NEXT: s_endpgm
305 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
306 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
310 define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
311 ; GFX68-LABEL: buffer_store_int:
312 ; GFX68: ; %bb.0: ; %main_body
313 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
314 ; GFX68-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
315 ; GFX68-NEXT: buffer_store_dword v6, off, s[0:3], 0 slc
316 ; GFX68-NEXT: s_endpgm
318 ; GFX11-LABEL: buffer_store_int:
319 ; GFX11: ; %bb.0: ; %main_body
320 ; GFX11-NEXT: s_clause 0x2
321 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
322 ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 glc
323 ; GFX11-NEXT: buffer_store_b32 v6, off, s[0:3], 0 slc
324 ; GFX11-NEXT: s_nop 0
325 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
326 ; GFX11-NEXT: s_endpgm
328 call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
329 call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
330 call void @llvm.amdgcn.raw.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 2)
334 define amdgpu_ps void @raw_buffer_store_byte(<4 x i32> inreg %rsrc, float %v1) {
335 ; GFX68-LABEL: raw_buffer_store_byte:
336 ; GFX68: ; %bb.0: ; %main_body
337 ; GFX68-NEXT: v_cvt_u32_f32_e32 v0, v0
338 ; GFX68-NEXT: buffer_store_byte v0, off, s[0:3], 0
339 ; GFX68-NEXT: s_endpgm
341 ; GFX11-LABEL: raw_buffer_store_byte:
342 ; GFX11: ; %bb.0: ; %main_body
343 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
344 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
345 ; GFX11-NEXT: s_nop 0
346 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
347 ; GFX11-NEXT: s_endpgm
349 %v2 = fptoui float %v1 to i32
350 %v3 = trunc i32 %v2 to i8
351 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %v3, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
355 define amdgpu_ps void @raw_buffer_store_short(<4 x i32> inreg %rsrc, float %v1) {
356 ; GFX68-LABEL: raw_buffer_store_short:
357 ; GFX68: ; %bb.0: ; %main_body
358 ; GFX68-NEXT: v_cvt_u32_f32_e32 v0, v0
359 ; GFX68-NEXT: buffer_store_short v0, off, s[0:3], 0
360 ; GFX68-NEXT: s_endpgm
362 ; GFX11-LABEL: raw_buffer_store_short:
363 ; GFX11: ; %bb.0: ; %main_body
364 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
365 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
366 ; GFX11-NEXT: s_nop 0
367 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
368 ; GFX11-NEXT: s_endpgm
370 %v2 = fptoui float %v1 to i32
371 %v3 = trunc i32 %v2 to i16
372 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %v3, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
376 define amdgpu_ps void @raw_buffer_store_f16(<4 x i32> inreg %rsrc, i32 %v1) {
377 ; GFX68-LABEL: raw_buffer_store_f16:
378 ; GFX68: ; %bb.0: ; %main_body
379 ; GFX68-NEXT: buffer_store_short v0, off, s[0:3], 0
380 ; GFX68-NEXT: s_endpgm
382 ; GFX11-LABEL: raw_buffer_store_f16:
383 ; GFX11: ; %bb.0: ; %main_body
384 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
385 ; GFX11-NEXT: s_nop 0
386 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
387 ; GFX11-NEXT: s_endpgm
389 %trunc = trunc i32 %v1 to i16
390 %cast = bitcast i16 %trunc to half
391 call void @llvm.amdgcn.raw.buffer.store.f16(half %cast, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
395 define amdgpu_ps void @buffer_store_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %offset) {
396 ; VERDE-LABEL: buffer_store_v2f16:
397 ; VERDE: ; %bb.0: ; %main_body
398 ; VERDE-NEXT: v_cvt_f16_f32_e32 v1, v1
399 ; VERDE-NEXT: v_cvt_f16_f32_e32 v0, v0
400 ; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
401 ; VERDE-NEXT: v_or_b32_e32 v0, v0, v1
402 ; VERDE-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen
403 ; VERDE-NEXT: s_endpgm
405 ; GFX8-LABEL: buffer_store_v2f16:
406 ; GFX8: ; %bb.0: ; %main_body
407 ; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
408 ; GFX8-NEXT: s_endpgm
410 ; GFX11-LABEL: buffer_store_v2f16:
411 ; GFX11: ; %bb.0: ; %main_body
412 ; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 offen
413 ; GFX11-NEXT: s_nop 0
414 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
415 ; GFX11-NEXT: s_endpgm
417 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
421 define amdgpu_ps void @buffer_store_v4f16(<4 x i32> inreg %rsrc, <4 x half> %data, i32 %offset) #0 {
422 ; VERDE-LABEL: buffer_store_v4f16:
423 ; VERDE: ; %bb.0: ; %main_body
424 ; VERDE-NEXT: v_cvt_f16_f32_e32 v3, v3
425 ; VERDE-NEXT: v_cvt_f16_f32_e32 v2, v2
426 ; VERDE-NEXT: v_cvt_f16_f32_e32 v5, v1
427 ; VERDE-NEXT: v_cvt_f16_f32_e32 v0, v0
428 ; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v3
429 ; VERDE-NEXT: v_or_b32_e32 v1, v2, v1
430 ; VERDE-NEXT: v_lshlrev_b32_e32 v2, 16, v5
431 ; VERDE-NEXT: v_or_b32_e32 v0, v0, v2
432 ; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v4, s[0:3], 0 offen
433 ; VERDE-NEXT: s_endpgm
435 ; GFX8-LABEL: buffer_store_v4f16:
436 ; GFX8: ; %bb.0: ; %main_body
437 ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
438 ; GFX8-NEXT: s_endpgm
440 ; GFX11-LABEL: buffer_store_v4f16:
441 ; GFX11: ; %bb.0: ; %main_body
442 ; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 offen
443 ; GFX11-NEXT: s_nop 0
444 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
445 ; GFX11-NEXT: s_endpgm
447 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
451 define amdgpu_ps void @raw_buffer_store_i16(<4 x i32> inreg %rsrc, i32 %v1) {
452 ; GFX68-LABEL: raw_buffer_store_i16:
453 ; GFX68: ; %bb.0: ; %main_body
454 ; GFX68-NEXT: buffer_store_short v0, off, s[0:3], 0
455 ; GFX68-NEXT: s_endpgm
457 ; GFX11-LABEL: raw_buffer_store_i16:
458 ; GFX11: ; %bb.0: ; %main_body
459 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
460 ; GFX11-NEXT: s_nop 0
461 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
462 ; GFX11-NEXT: s_endpgm
464 %trunc = trunc i32 %v1 to i16
465 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %trunc, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
469 define amdgpu_ps void @buffer_store_v2i16(<4 x i32> inreg %rsrc, <2 x i16> %data, i32 %offset) {
470 ; VERDE-LABEL: buffer_store_v2i16:
471 ; VERDE: ; %bb.0: ; %main_body
472 ; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
473 ; VERDE-NEXT: v_and_b32_e32 v0, 0xffff, v0
474 ; VERDE-NEXT: v_or_b32_e32 v0, v0, v1
475 ; VERDE-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen
476 ; VERDE-NEXT: s_endpgm
478 ; GFX8-LABEL: buffer_store_v2i16:
479 ; GFX8: ; %bb.0: ; %main_body
480 ; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
481 ; GFX8-NEXT: s_endpgm
483 ; GFX11-LABEL: buffer_store_v2i16:
484 ; GFX11: ; %bb.0: ; %main_body
485 ; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 offen
486 ; GFX11-NEXT: s_nop 0
487 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
488 ; GFX11-NEXT: s_endpgm
490 call void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
494 define amdgpu_ps void @buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16> %data, i32 %offset) #0 {
495 ; VERDE-LABEL: buffer_store_v4i16:
496 ; VERDE: ; %bb.0: ; %main_body
497 ; VERDE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
498 ; VERDE-NEXT: v_and_b32_e32 v2, 0xffff, v2
499 ; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
500 ; VERDE-NEXT: v_and_b32_e32 v0, 0xffff, v0
501 ; VERDE-NEXT: v_or_b32_e32 v2, v2, v3
502 ; VERDE-NEXT: v_or_b32_e32 v1, v0, v1
503 ; VERDE-NEXT: buffer_store_dwordx2 v[1:2], v4, s[0:3], 0 offen
504 ; VERDE-NEXT: s_endpgm
506 ; GFX8-LABEL: buffer_store_v4i16:
507 ; GFX8: ; %bb.0: ; %main_body
508 ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
509 ; GFX8-NEXT: s_endpgm
511 ; GFX11-LABEL: buffer_store_v4i16:
512 ; GFX11: ; %bb.0: ; %main_body
513 ; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 offen
514 ; GFX11-NEXT: s_nop 0
515 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
516 ; GFX11-NEXT: s_endpgm
518 call void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
522 define amdgpu_ps void @raw_buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
523 ; GFX68-LABEL: raw_buffer_store_x1_offset_merged:
525 ; GFX68-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
526 ; GFX68-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28
527 ; GFX68-NEXT: s_endpgm
529 ; GFX11-LABEL: raw_buffer_store_x1_offset_merged:
531 ; GFX11-NEXT: s_clause 0x1
532 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:4
533 ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 offset:28
534 ; GFX11-NEXT: s_nop 0
535 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
536 ; GFX11-NEXT: s_endpgm
537 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
538 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0)
539 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
540 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0)
541 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0)
542 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0)
546 define amdgpu_ps void @raw_buffer_store_x1_offset_swizzled_not_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
547 ; GFX68-LABEL: raw_buffer_store_x1_offset_swizzled_not_merged:
549 ; GFX68-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
550 ; GFX68-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8
551 ; GFX68-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:12
552 ; GFX68-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:16
553 ; GFX68-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:28
554 ; GFX68-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:32
555 ; GFX68-NEXT: s_endpgm
557 ; GFX11-LABEL: raw_buffer_store_x1_offset_swizzled_not_merged:
559 ; GFX11-NEXT: s_clause 0x5
560 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 offset:4
561 ; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 offset:8
562 ; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 offset:12
563 ; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 offset:16
564 ; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 offset:28
565 ; GFX11-NEXT: buffer_store_b32 v5, off, s[0:3], 0 offset:32
566 ; GFX11-NEXT: s_nop 0
567 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
568 ; GFX11-NEXT: s_endpgm
569 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 8)
570 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 8)
571 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 8)
572 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 8)
573 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 8)
574 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 8)
578 declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
579 declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
580 declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
581 declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
582 declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
583 declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
584 declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
585 declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32) #0
586 declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32) #0
587 declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) #0
588 declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) #0
589 declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32) #0
590 declare void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32) #0
591 declare void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32) #0
593 attributes #0 = { nounwind }
594 attributes #1 = { nounwind readonly }