Bump version to 19.1.0-rc3
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / llvm.amdgcn.raw.tbuffer.store.d16.ll
bloba241bdeaff1a75e8b4305cf6393c065433a64284
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-UNPACKED %s
3 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
4 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
5 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10-PACKED %s
6 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED %s
7 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
8 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL %s
10 define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data) {
11 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
12 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
13 ; PREGFX10-UNPACKED-NEXT:    s_load_dword s4, s[2:3], 0x34
14 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
15 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
16 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
17 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_x v0, off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
18 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
20 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_x:
21 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
22 ; PREGFX10-PACKED-NEXT:    s_load_dword s0, s[2:3], 0x34
23 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
24 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
25 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
26 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, off, s[4:7], 0 format:[BUF_NUM_FORMAT_USCALED]
27 ; PREGFX10-PACKED-NEXT:    s_endpgm
29 ; GFX10-PACKED-LABEL: tbuffer_store_d16_x:
30 ; GFX10-PACKED:       ; %bb.0: ; %main_body
31 ; GFX10-PACKED-NEXT:    s_clause 0x1
32 ; GFX10-PACKED-NEXT:    s_load_dword s0, s[2:3], 0x34
33 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
34 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
35 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
36 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, off, s[4:7], 0 format:[BUF_FMT_10_11_11_SSCALED]
37 ; GFX10-PACKED-NEXT:    s_endpgm
39 ; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
40 ; GFX11-PACKED:       ; %bb.0: ; %main_body
41 ; GFX11-PACKED-NEXT:    s_clause 0x1
42 ; GFX11-PACKED-NEXT:    s_load_b32 s4, s[2:3], 0x34
43 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
44 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
45 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
46 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
47 ; GFX11-PACKED-NEXT:    s_nop 0
48 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
49 ; GFX11-PACKED-NEXT:    s_endpgm
51 ; GFX12-PACKED-LABEL: tbuffer_store_d16_x:
52 ; GFX12-PACKED:       ; %bb.0: ; %main_body
53 ; GFX12-PACKED-NEXT:    s_clause 0x1
54 ; GFX12-PACKED-NEXT:    s_load_b32 s4, s[2:3], 0x34
55 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
56 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
57 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s4
58 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
59 ; GFX12-PACKED-NEXT:    s_nop 0
60 ; GFX12-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
61 ; GFX12-PACKED-NEXT:    s_endpgm
62 main_body:
63   call void @llvm.amdgcn.raw.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
64   ret void
67 define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data) {
68 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xy:
69 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
70 ; PREGFX10-UNPACKED-NEXT:    s_load_dword s4, s[2:3], 0x34
71 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
72 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
73 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s5, s4, 16
74 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
75 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
76 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s5
77 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
78 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
80 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xy:
81 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
82 ; PREGFX10-PACKED-NEXT:    s_load_dword s0, s[2:3], 0x34
83 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
84 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
85 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
86 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, off, s[4:7], 0 format:[BUF_NUM_FORMAT_USCALED]
87 ; PREGFX10-PACKED-NEXT:    s_endpgm
89 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xy:
90 ; GFX10-PACKED:       ; %bb.0: ; %main_body
91 ; GFX10-PACKED-NEXT:    s_clause 0x1
92 ; GFX10-PACKED-NEXT:    s_load_dword s0, s[2:3], 0x34
93 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
94 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
95 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
96 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, off, s[4:7], 0 format:[BUF_FMT_10_11_11_SSCALED]
97 ; GFX10-PACKED-NEXT:    s_endpgm
99 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xy:
100 ; GFX11-PACKED:       ; %bb.0: ; %main_body
101 ; GFX11-PACKED-NEXT:    s_clause 0x1
102 ; GFX11-PACKED-NEXT:    s_load_b32 s4, s[2:3], 0x34
103 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
104 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
105 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
106 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
107 ; GFX11-PACKED-NEXT:    s_nop 0
108 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
109 ; GFX11-PACKED-NEXT:    s_endpgm
111 ; GFX12-PACKED-LABEL: tbuffer_store_d16_xy:
112 ; GFX12-PACKED:       ; %bb.0: ; %main_body
113 ; GFX12-PACKED-NEXT:    s_clause 0x1
114 ; GFX12-PACKED-NEXT:    s_load_b32 s4, s[2:3], 0x34
115 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
116 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
117 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s4
118 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
119 ; GFX12-PACKED-NEXT:    s_nop 0
120 ; GFX12-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
121 ; GFX12-PACKED-NEXT:    s_endpgm
122 main_body:
123   call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
124   ret void
127 define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data) {
128 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyz:
129 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
130 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x34
131 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
132 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
133 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
134 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s6, s4, 16
135 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
136 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
137 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s6
138 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
139 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
140 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
142 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
143 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
144 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x34
145 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
146 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
147 ; PREGFX10-PACKED-NEXT:    s_and_b32 s1, s1, 0xffff
148 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
149 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s1
150 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], off, s[4:7], 0 format:[BUF_NUM_FORMAT_USCALED]
151 ; PREGFX10-PACKED-NEXT:    s_endpgm
153 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
154 ; GFX10-PACKED:       ; %bb.0: ; %main_body
155 ; GFX10-PACKED-NEXT:    s_clause 0x1
156 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x34
157 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
158 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
159 ; GFX10-PACKED-NEXT:    s_and_b32 s1, s1, 0xffff
160 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
161 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s1
162 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], off, s[4:7], 0 format:[BUF_FMT_10_11_11_SSCALED]
163 ; GFX10-PACKED-NEXT:    s_endpgm
165 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xyz:
166 ; GFX11-PACKED:       ; %bb.0: ; %main_body
167 ; GFX11-PACKED-NEXT:    s_clause 0x1
168 ; GFX11-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x34
169 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
170 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
171 ; GFX11-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
172 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
173 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s5
174 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
175 ; GFX11-PACKED-NEXT:    s_nop 0
176 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
177 ; GFX11-PACKED-NEXT:    s_endpgm
179 ; GFX12-PACKED-SDAG-LABEL: tbuffer_store_d16_xyz:
180 ; GFX12-PACKED-SDAG:       ; %bb.0: ; %main_body
181 ; GFX12-PACKED-SDAG-NEXT:    s_clause 0x1
182 ; GFX12-PACKED-SDAG-NEXT:    s_load_b64 s[4:5], s[2:3], 0x34
183 ; GFX12-PACKED-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
184 ; GFX12-PACKED-SDAG-NEXT:    s_wait_kmcnt 0x0
185 ; GFX12-PACKED-SDAG-NEXT:    s_and_b32 s5, s5, 0xffff
186 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v0, s4
187 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v1, s5
188 ; GFX12-PACKED-SDAG-NEXT:    tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
189 ; GFX12-PACKED-SDAG-NEXT:    s_nop 0
190 ; GFX12-PACKED-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
191 ; GFX12-PACKED-SDAG-NEXT:    s_endpgm
193 ; GFX12-PACKED-GISEL-LABEL: tbuffer_store_d16_xyz:
194 ; GFX12-PACKED-GISEL:       ; %bb.0: ; %main_body
195 ; GFX12-PACKED-GISEL-NEXT:    s_clause 0x1
196 ; GFX12-PACKED-GISEL-NEXT:    s_load_b64 s[4:5], s[2:3], 0x34
197 ; GFX12-PACKED-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
198 ; GFX12-PACKED-GISEL-NEXT:    s_wait_kmcnt 0x0
199 ; GFX12-PACKED-GISEL-NEXT:    s_pack_lh_b32_b16 s4, s4, s4
200 ; GFX12-PACKED-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
201 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v0, s4
202 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v1, s5
203 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
204 ; GFX12-PACKED-GISEL-NEXT:    s_nop 0
205 ; GFX12-PACKED-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
206 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
207 main_body:
208   %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
209   call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
210   ret void
213 define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data) {
214 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyzw:
215 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
216 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x34
217 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
218 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
219 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s6, s5, 16
220 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
221 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s4, 16
222 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
223 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
224 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s7
225 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
226 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s6
227 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
228 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
230 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
231 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
232 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x34
233 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
234 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
235 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
236 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s1
237 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], off, s[4:7], 0 format:[BUF_NUM_FORMAT_USCALED]
238 ; PREGFX10-PACKED-NEXT:    s_endpgm
240 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
241 ; GFX10-PACKED:       ; %bb.0: ; %main_body
242 ; GFX10-PACKED-NEXT:    s_clause 0x1
243 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x34
244 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
245 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
246 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s0
247 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s1
248 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], off, s[4:7], 0 format:[BUF_FMT_10_11_11_SSCALED]
249 ; GFX10-PACKED-NEXT:    s_endpgm
251 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xyzw:
252 ; GFX11-PACKED:       ; %bb.0: ; %main_body
253 ; GFX11-PACKED-NEXT:    s_clause 0x1
254 ; GFX11-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x34
255 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
256 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
257 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
258 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s5
259 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
260 ; GFX11-PACKED-NEXT:    s_nop 0
261 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
262 ; GFX11-PACKED-NEXT:    s_endpgm
264 ; GFX12-PACKED-LABEL: tbuffer_store_d16_xyzw:
265 ; GFX12-PACKED:       ; %bb.0: ; %main_body
266 ; GFX12-PACKED-NEXT:    s_clause 0x1
267 ; GFX12-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x34
268 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
269 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
270 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s4
271 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s5
272 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
273 ; GFX12-PACKED-NEXT:    s_nop 0
274 ; GFX12-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
275 ; GFX12-PACKED-NEXT:    s_endpgm
276 main_body:
277   call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
278   ret void
281 declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32)
282 declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32)
283 declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32)
284 declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32)