Bump version to 19.1.0-rc3
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / llvm.amdgcn.struct.tbuffer.store.d16.ll
blob1da076c65239906f80ac5963401f923a85b8c97b
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-UNPACKED %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-PACKED %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED %s
7 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
8 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
9 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL %s
11 define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) {
12 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
13 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
14 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
15 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
16 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
17 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
18 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s5
19 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
20 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
22 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_x:
23 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
24 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
25 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
26 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
27 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
28 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
29 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
30 ; PREGFX10-PACKED-NEXT:    s_endpgm
32 ; GFX10-PACKED-LABEL: tbuffer_store_d16_x:
33 ; GFX10-PACKED:       ; %bb.0: ; %main_body
34 ; GFX10-PACKED-NEXT:    s_clause 0x1
35 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
36 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
37 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
38 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
39 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
40 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
41 ; GFX10-PACKED-NEXT:    s_endpgm
43 ; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
44 ; GFX11-PACKED:       ; %bb.0: ; %main_body
45 ; GFX11-PACKED-NEXT:    s_clause 0x1
46 ; GFX11-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x10
47 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
48 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
49 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
50 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s5
51 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
52 ; GFX11-PACKED-NEXT:    s_nop 0
53 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
54 ; GFX11-PACKED-NEXT:    s_endpgm
56 ; GFX12-PACKED-LABEL: tbuffer_store_d16_x:
57 ; GFX12-PACKED:       ; %bb.0: ; %main_body
58 ; GFX12-PACKED-NEXT:    s_clause 0x1
59 ; GFX12-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x10
60 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
61 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
62 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s4
63 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s5
64 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
65 ; GFX12-PACKED-NEXT:    s_nop 0
66 ; GFX12-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
67 ; GFX12-PACKED-NEXT:    s_endpgm
68 main_body:
69   call void @llvm.amdgcn.struct.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
70   ret void
73 define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) {
74 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xy:
75 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
76 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
77 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
78 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
79 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s6, s4, 16
80 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
81 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
82 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s6
83 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
84 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
85 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
87 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xy:
88 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
89 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
90 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
91 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
92 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
93 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
94 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
95 ; PREGFX10-PACKED-NEXT:    s_endpgm
97 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xy:
98 ; GFX10-PACKED:       ; %bb.0: ; %main_body
99 ; GFX10-PACKED-NEXT:    s_clause 0x1
100 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
101 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
102 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
103 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
104 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
105 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
106 ; GFX10-PACKED-NEXT:    s_endpgm
108 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xy:
109 ; GFX11-PACKED:       ; %bb.0: ; %main_body
110 ; GFX11-PACKED-NEXT:    s_clause 0x1
111 ; GFX11-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x10
112 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
113 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
114 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
115 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s5
116 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
117 ; GFX11-PACKED-NEXT:    s_nop 0
118 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
119 ; GFX11-PACKED-NEXT:    s_endpgm
121 ; GFX12-PACKED-LABEL: tbuffer_store_d16_xy:
122 ; GFX12-PACKED:       ; %bb.0: ; %main_body
123 ; GFX12-PACKED-NEXT:    s_clause 0x1
124 ; GFX12-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x10
125 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
126 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
127 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s4
128 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s5
129 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
130 ; GFX12-PACKED-NEXT:    s_nop 0
131 ; GFX12-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
132 ; GFX12-PACKED-NEXT:    s_endpgm
133 main_body:
134   call void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
135   ret void
138 define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
139 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyz:
140 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
141 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
142 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
143 ; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[6:7], 0x18
144 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
145 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
146 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s4, 16
147 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
148 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
149 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s7
150 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
151 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s6
152 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
153 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
155 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
156 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
157 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
158 ; PREGFX10-PACKED-NEXT:    s_load_dword s8, s[6:7], 0x18
159 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
160 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
161 ; PREGFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
162 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
163 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
164 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s8
165 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
166 ; PREGFX10-PACKED-NEXT:    s_endpgm
168 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
169 ; GFX10-PACKED:       ; %bb.0: ; %main_body
170 ; GFX10-PACKED-NEXT:    s_clause 0x2
171 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
172 ; GFX10-PACKED-NEXT:    s_load_dword s8, s[6:7], 0x18
173 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
174 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
175 ; GFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
176 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
177 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
178 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s8
179 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
180 ; GFX10-PACKED-NEXT:    s_endpgm
182 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xyz:
183 ; GFX11-PACKED:       ; %bb.0: ; %main_body
184 ; GFX11-PACKED-NEXT:    s_clause 0x2
185 ; GFX11-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x10
186 ; GFX11-PACKED-NEXT:    s_load_b32 s6, s[2:3], 0x18
187 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
188 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
189 ; GFX11-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
190 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
191 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s5
192 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s6
193 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
194 ; GFX11-PACKED-NEXT:    s_nop 0
195 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
196 ; GFX11-PACKED-NEXT:    s_endpgm
198 ; GFX12-PACKED-SDAG-LABEL: tbuffer_store_d16_xyz:
199 ; GFX12-PACKED-SDAG:       ; %bb.0: ; %main_body
200 ; GFX12-PACKED-SDAG-NEXT:    s_clause 0x1
201 ; GFX12-PACKED-SDAG-NEXT:    s_load_b96 s[4:6], s[2:3], 0x10
202 ; GFX12-PACKED-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
203 ; GFX12-PACKED-SDAG-NEXT:    s_wait_kmcnt 0x0
204 ; GFX12-PACKED-SDAG-NEXT:    s_and_b32 s5, s5, 0xffff
205 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v0, s4
206 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v1, s5
207 ; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v2, s6
208 ; GFX12-PACKED-SDAG-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
209 ; GFX12-PACKED-SDAG-NEXT:    s_nop 0
210 ; GFX12-PACKED-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
211 ; GFX12-PACKED-SDAG-NEXT:    s_endpgm
213 ; GFX12-PACKED-GISEL-LABEL: tbuffer_store_d16_xyz:
214 ; GFX12-PACKED-GISEL:       ; %bb.0: ; %main_body
215 ; GFX12-PACKED-GISEL-NEXT:    s_clause 0x1
216 ; GFX12-PACKED-GISEL-NEXT:    s_load_b96 s[4:6], s[2:3], 0x10
217 ; GFX12-PACKED-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
218 ; GFX12-PACKED-GISEL-NEXT:    s_wait_kmcnt 0x0
219 ; GFX12-PACKED-GISEL-NEXT:    s_pack_lh_b32_b16 s4, s4, s4
220 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v2, s6
221 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v0, s4
222 ; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v1, s5
223 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
224 ; GFX12-PACKED-GISEL-NEXT:    s_nop 0
225 ; GFX12-PACKED-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
226 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
227 main_body:
228   %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
229   call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
230   ret void
233 define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
234 ; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyzw:
235 ; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
236 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
237 ; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
238 ; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[6:7], 0x18
239 ; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
240 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s5, 16
241 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
242 ; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s8, s4, 16
243 ; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
244 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
245 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s8
246 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
247 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s7
248 ; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v4, s6
249 ; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
250 ; PREGFX10-UNPACKED-NEXT:    s_endpgm
252 ; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
253 ; PREGFX10-PACKED:       ; %bb.0: ; %main_body
254 ; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
255 ; PREGFX10-PACKED-NEXT:    s_load_dword s8, s[6:7], 0x18
256 ; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
257 ; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
258 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
259 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
260 ; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s8
261 ; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
262 ; PREGFX10-PACKED-NEXT:    s_endpgm
264 ; GFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
265 ; GFX10-PACKED:       ; %bb.0: ; %main_body
266 ; GFX10-PACKED-NEXT:    s_clause 0x2
267 ; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x10
268 ; GFX10-PACKED-NEXT:    s_load_dword s8, s[6:7], 0x18
269 ; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
270 ; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
271 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
272 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
273 ; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s8
274 ; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
275 ; GFX10-PACKED-NEXT:    s_endpgm
277 ; GFX11-PACKED-LABEL: tbuffer_store_d16_xyzw:
278 ; GFX11-PACKED:       ; %bb.0: ; %main_body
279 ; GFX11-PACKED-NEXT:    s_clause 0x2
280 ; GFX11-PACKED-NEXT:    s_load_b64 s[4:5], s[2:3], 0x10
281 ; GFX11-PACKED-NEXT:    s_load_b32 s6, s[2:3], 0x18
282 ; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
283 ; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
284 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s4
285 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s5
286 ; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s6
287 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
288 ; GFX11-PACKED-NEXT:    s_nop 0
289 ; GFX11-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
290 ; GFX11-PACKED-NEXT:    s_endpgm
292 ; GFX12-PACKED-LABEL: tbuffer_store_d16_xyzw:
293 ; GFX12-PACKED:       ; %bb.0: ; %main_body
294 ; GFX12-PACKED-NEXT:    s_clause 0x1
295 ; GFX12-PACKED-NEXT:    s_load_b96 s[4:6], s[2:3], 0x10
296 ; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
297 ; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
298 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s4
299 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s5
300 ; GFX12-PACKED-NEXT:    v_mov_b32_e32 v2, s6
301 ; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
302 ; GFX12-PACKED-NEXT:    s_nop 0
303 ; GFX12-PACKED-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
304 ; GFX12-PACKED-NEXT:    s_endpgm
305 main_body:
306   call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
307   ret void
310 declare void @llvm.amdgcn.struct.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32)
311 declare void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32)
312 declare void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32)
313 declare void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32)