[TableGen] Fix validateOperandClass for non Phyical Reg (#118146)
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / llvm.amdgcn.struct.tbuffer.store.d16.ll
blob17ebb1a835462de6a5c5547a83a252a5dd6c1b80
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-UNPACKED %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-PACKED %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
9 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL %s
11 define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) {
12 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
13 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
14 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
15 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
16 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
17 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
18 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s5
19 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
20 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
22 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_x:
23 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
24 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
25 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
26 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
27 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
28 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
29 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
30 ; PREGFX10-PACKED-NEXT:    s_endpgm
32 ; GFX10-PACKED-LABEL: tbuffer_store_d16_x:
33 ; GFX10-PACKED:       ; %bb.0: ; %main_body
34 ; GFX10-PACKED-NEXT:    s_clause 0x1
35 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
36 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
37 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
38 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
39 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
40 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
41 ; GFX10-PACKED-NEXT:    s_endpgm
43 ; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
44 ; GFX11-PACKED:       ; %bb.0: ; %main_body
45 ; GFX11-PACKED-NEXT:    s_clause 0x1
46 ; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
47 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
48 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
49 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
50 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
51 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
52 ; GFX11-PACKED-NEXT:    s_endpgm
54 ; GFX12-PACKED-LABEL: tbuffer_store_d16_x:
55 ; GFX12-PACKED:       ; %bb.0: ; %main_body
56 ; GFX12-PACKED-NEXT:    s_clause 0x1
57 ; GFX12-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
58 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
59 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
60 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s6
61 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s7
62 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
63 ; GFX12-PACKED-NEXT:    s_endpgm
64 main_body:
65   call void @llvm.amdgcn.struct.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
66   ret void
69 define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) {
70 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xy:
71 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
72 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
73 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
74 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
75 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s6, s4, 16
76 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
77 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
78 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s6
79 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
80 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
81 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
83 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xy:
84 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
85 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
86 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
87 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
88 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
89 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
90 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
91 ; PREGFX10-PACKED-NEXT:    s_endpgm
93 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xy:
94 ; GFX10-PACKED:       ; %bb.0: ; %main_body
95 ; GFX10-PACKED-NEXT:    s_clause 0x1
96 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
97 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
98 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
99 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
100 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
101 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
102 ; GFX10-PACKED-NEXT:    s_endpgm
104 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xy:
105 ; GFX11-PACKED:       ; %bb.0: ; %main_body
106 ; GFX11-PACKED-NEXT:    s_clause 0x1
107 ; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
108 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
109 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
110 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
111 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
112 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
113 ; GFX11-PACKED-NEXT:    s_endpgm
115 ; GFX12-PACKED-LABEL: tbuffer_store_d16_xy:
116 ; GFX12-PACKED:       ; %bb.0: ; %main_body
117 ; GFX12-PACKED-NEXT:    s_clause 0x1
118 ; GFX12-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
119 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
120 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
121 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s6
122 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s7
123 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
124 ; GFX12-PACKED-NEXT:    s_endpgm
125 main_body:
126   call void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
127   ret void
130 define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
131 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyz:
132 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
133 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
134 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
135 ; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
136 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
137 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
138 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s4, 16
139 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
140 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
141 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s7
142 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
143 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s6
144 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
145 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
147 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
148 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
149 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
150 ; PREGFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
151 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
152 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
153 ; PREGFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
154 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
155 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
156 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
157 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
158 ; PREGFX10-PACKED-NEXT:    s_endpgm
160 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
161 ; GFX10-PACKED:       ; %bb.0: ; %main_body
162 ; GFX10-PACKED-NEXT:    s_clause 0x2
163 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
164 ; GFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
165 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
166 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
167 ; GFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
168 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
169 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
170 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
171 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
172 ; GFX10-PACKED-NEXT:    s_endpgm
174 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xyz:
175 ; GFX11-PACKED:       ; %bb.0: ; %main_body
176 ; GFX11-PACKED-NEXT:    s_clause 0x2
177 ; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
178 ; GFX11-PACKED-NEXT:    s_load_b32 s8, s[4:5], 0x18
179 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
180 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
181 ; GFX11-PACKED-NEXT:    s_and_b32 s4, s7, 0xffff
182 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
183 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s4
184 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s8
185 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
186 ; GFX11-PACKED-NEXT:    s_endpgm
188 ; GFX12-PACKED-SDAG-LABEL: tbuffer_store_d16_xyz:
189 ; GFX12-PACKED-SDAG:       ; %bb.0: ; %main_body
190 ; GFX12-PACKED-SDAG-NEXT:    s_clause 0x1
191 ; GFX12-PACKED-SDAG-NEXT:    s_load_b96 s[8:10], s[4:5], 0x10
192 ; GFX12-PACKED-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
193 ; GFX12-PACKED-SDAG-NEXT:    s_wait_kmcnt 0x0
194 ; GFX12-PACKED-SDAG-NEXT:    s_and_b32 s4, s9, 0xffff
195 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v0, s8
196 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v1, s4
197 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v2, s10
198 ; GFX12-PACKED-SDAG-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
199 ; GFX12-PACKED-SDAG-NEXT:    s_endpgm
201 ; GFX12-PACKED-GISEL-LABEL: tbuffer_store_d16_xyz:
202 ; GFX12-PACKED-GISEL:       ; %bb.0: ; %main_body
203 ; GFX12-PACKED-GISEL-NEXT:    s_clause 0x1
204 ; GFX12-PACKED-GISEL-NEXT:    s_load_b96 s[8:10], s[4:5], 0x10
205 ; GFX12-PACKED-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
206 ; GFX12-PACKED-GISEL-NEXT:    s_wait_kmcnt 0x0
207 ; GFX12-PACKED-GISEL-NEXT:    s_pack_lh_b32_b16 s8, s8, s8
208 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v2, s10
209 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v0, s8
210 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v1, s9
211 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
212 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
213 main_body:
214   %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
215   call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
216   ret void
219 define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
220 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyzw:
221 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
222 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
223 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
224 ; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
225 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
226 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s5, 16
227 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
228 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s8, s4, 16
229 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
230 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
231 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s8
232 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
233 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s7
234 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v4, s6
235 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
236 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
238 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
239 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
240 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
241 ; PREGFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
242 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
243 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
244 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
245 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
246 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
247 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
248 ; PREGFX10-PACKED-NEXT:    s_endpgm
250 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
251 ; GFX10-PACKED:       ; %bb.0: ; %main_body
252 ; GFX10-PACKED-NEXT:    s_clause 0x2
253 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
254 ; GFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
255 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
256 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
257 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
258 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
259 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
260 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
261 ; GFX10-PACKED-NEXT:    s_endpgm
263 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xyzw:
264 ; GFX11-PACKED:       ; %bb.0: ; %main_body
265 ; GFX11-PACKED-NEXT:    s_clause 0x2
266 ; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
267 ; GFX11-PACKED-NEXT:    s_load_b32 s8, s[4:5], 0x18
268 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
269 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
270 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
271 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
272 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s8
273 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
274 ; GFX11-PACKED-NEXT:    s_endpgm
276 ; GFX12-PACKED-LABEL: tbuffer_store_d16_xyzw:
277 ; GFX12-PACKED:       ; %bb.0: ; %main_body
278 ; GFX12-PACKED-NEXT:    s_clause 0x1
279 ; GFX12-PACKED-NEXT:    s_load_b96 s[8:10], s[4:5], 0x10
280 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
281 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
282 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s8
283 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s9
284 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v2, s10
285 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
286 ; GFX12-PACKED-NEXT:    s_endpgm
287 main_body:
288   call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
289   ret void
292 declare void @llvm.amdgcn.struct.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32)
293 declare void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32)
294 declare void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32)
295 declare void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32)