1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
6 define amdgpu_ps void @store_f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
7 ; GFX9-LABEL: store_f16_1d:
8 ; GFX9: ; %bb.0: ; %main_body
9 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
12 ; GFX10-LABEL: store_f16_1d:
13 ; GFX10: ; %bb.0: ; %main_body
14 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
15 ; GFX10-NEXT: s_endpgm
17 ; GFX11-LABEL: store_f16_1d:
18 ; GFX11: ; %bb.0: ; %main_body
19 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
21 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
22 ; GFX11-NEXT: s_endpgm
24 %x = extractelement <2 x i16> %coords, i32 0
25 %bitcast = bitcast <2 x i32> %val to <4 x half>
26 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
30 define amdgpu_ps void @store_v2f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
31 ; GFX9-LABEL: store_v2f16_1d:
32 ; GFX9: ; %bb.0: ; %main_body
33 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
36 ; GFX10-LABEL: store_v2f16_1d:
37 ; GFX10: ; %bb.0: ; %main_body
38 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
39 ; GFX10-NEXT: s_endpgm
41 ; GFX11-LABEL: store_v2f16_1d:
42 ; GFX11: ; %bb.0: ; %main_body
43 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
45 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
46 ; GFX11-NEXT: s_endpgm
48 %x = extractelement <2 x i16> %coords, i32 0
49 %bitcast = bitcast <2 x i32> %val to <4 x half>
50 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
54 define amdgpu_ps void @store_v3f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
55 ; GFX9-LABEL: store_v3f16_1d:
56 ; GFX9: ; %bb.0: ; %main_body
57 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
60 ; GFX10-LABEL: store_v3f16_1d:
61 ; GFX10: ; %bb.0: ; %main_body
62 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
63 ; GFX10-NEXT: s_endpgm
65 ; GFX11-LABEL: store_v3f16_1d:
66 ; GFX11: ; %bb.0: ; %main_body
67 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
69 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
70 ; GFX11-NEXT: s_endpgm
72 %x = extractelement <2 x i16> %coords, i32 0
73 %bitcast = bitcast <2 x i32> %val to <4 x half>
74 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
78 define amdgpu_ps void @store_v4f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
79 ; GFX9-LABEL: store_v4f16_1d:
80 ; GFX9: ; %bb.0: ; %main_body
81 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
84 ; GFX10-LABEL: store_v4f16_1d:
85 ; GFX10: ; %bb.0: ; %main_body
86 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
87 ; GFX10-NEXT: s_endpgm
89 ; GFX11-LABEL: store_v4f16_1d:
90 ; GFX11: ; %bb.0: ; %main_body
91 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
93 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
94 ; GFX11-NEXT: s_endpgm
96 %x = extractelement <2 x i16> %coords, i32 0
97 %bitcast = bitcast <2 x i32> %val to <4 x half>
98 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
102 define amdgpu_ps void @store_f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
103 ; GFX9-LABEL: store_f16_2d:
104 ; GFX9: ; %bb.0: ; %main_body
105 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
106 ; GFX9-NEXT: s_endpgm
108 ; GFX10-LABEL: store_f16_2d:
109 ; GFX10: ; %bb.0: ; %main_body
110 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
111 ; GFX10-NEXT: s_endpgm
113 ; GFX11-LABEL: store_f16_2d:
114 ; GFX11: ; %bb.0: ; %main_body
115 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
116 ; GFX11-NEXT: s_nop 0
117 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
118 ; GFX11-NEXT: s_endpgm
120 %x = extractelement <2 x i16> %coords, i32 0
121 %y = extractelement <2 x i16> %coords, i32 1
122 %bitcast = bitcast <2 x i32> %val to <4 x half>
123 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
127 define amdgpu_ps void @store_v2f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
128 ; GFX9-LABEL: store_v2f16_2d:
129 ; GFX9: ; %bb.0: ; %main_body
130 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
131 ; GFX9-NEXT: s_endpgm
133 ; GFX10-LABEL: store_v2f16_2d:
134 ; GFX10: ; %bb.0: ; %main_body
135 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
136 ; GFX10-NEXT: s_endpgm
138 ; GFX11-LABEL: store_v2f16_2d:
139 ; GFX11: ; %bb.0: ; %main_body
140 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
141 ; GFX11-NEXT: s_nop 0
142 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
143 ; GFX11-NEXT: s_endpgm
145 %x = extractelement <2 x i16> %coords, i32 0
146 %y = extractelement <2 x i16> %coords, i32 1
147 %bitcast = bitcast <2 x i32> %val to <4 x half>
148 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
152 define amdgpu_ps void @store_v3f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
153 ; GFX9-LABEL: store_v3f16_2d:
154 ; GFX9: ; %bb.0: ; %main_body
155 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
156 ; GFX9-NEXT: s_endpgm
158 ; GFX10-LABEL: store_v3f16_2d:
159 ; GFX10: ; %bb.0: ; %main_body
160 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
161 ; GFX10-NEXT: s_endpgm
163 ; GFX11-LABEL: store_v3f16_2d:
164 ; GFX11: ; %bb.0: ; %main_body
165 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
166 ; GFX11-NEXT: s_nop 0
167 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
168 ; GFX11-NEXT: s_endpgm
170 %x = extractelement <2 x i16> %coords, i32 0
171 %y = extractelement <2 x i16> %coords, i32 1
172 %bitcast = bitcast <2 x i32> %val to <4 x half>
173 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
177 define amdgpu_ps void @store_v4f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
178 ; GFX9-LABEL: store_v4f16_2d:
179 ; GFX9: ; %bb.0: ; %main_body
180 ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
181 ; GFX9-NEXT: s_endpgm
183 ; GFX10-LABEL: store_v4f16_2d:
184 ; GFX10: ; %bb.0: ; %main_body
185 ; GFX10-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
186 ; GFX10-NEXT: s_endpgm
188 ; GFX11-LABEL: store_v4f16_2d:
189 ; GFX11: ; %bb.0: ; %main_body
190 ; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
191 ; GFX11-NEXT: s_nop 0
192 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
193 ; GFX11-NEXT: s_endpgm
195 %x = extractelement <2 x i16> %coords, i32 0
196 %y = extractelement <2 x i16> %coords, i32 1
197 %bitcast = bitcast <2 x i32> %val to <4 x half>
198 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
202 define amdgpu_ps void @store_f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
203 ; GFX9-LABEL: store_f16_3d:
204 ; GFX9: ; %bb.0: ; %main_body
205 ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 unorm a16 d16
206 ; GFX9-NEXT: s_endpgm
208 ; GFX10-LABEL: store_f16_3d:
209 ; GFX10: ; %bb.0: ; %main_body
210 ; GFX10-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
211 ; GFX10-NEXT: s_endpgm
213 ; GFX11-LABEL: store_f16_3d:
214 ; GFX11: ; %bb.0: ; %main_body
215 ; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
216 ; GFX11-NEXT: s_nop 0
217 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
218 ; GFX11-NEXT: s_endpgm
220 %x = extractelement <2 x i16> %coords_lo, i32 0
221 %y = extractelement <2 x i16> %coords_lo, i32 1
222 %z = extractelement <2 x i16> %coords_hi, i32 0
223 %bitcast = bitcast <2 x i32> %val to <4 x half>
224 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
228 define amdgpu_ps void @store_v2f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
229 ; GFX9-LABEL: store_v2f16_3d:
230 ; GFX9: ; %bb.0: ; %main_body
231 ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm a16 d16
232 ; GFX9-NEXT: s_endpgm
234 ; GFX10-LABEL: store_v2f16_3d:
235 ; GFX10: ; %bb.0: ; %main_body
236 ; GFX10-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
237 ; GFX10-NEXT: s_endpgm
239 ; GFX11-LABEL: store_v2f16_3d:
240 ; GFX11: ; %bb.0: ; %main_body
241 ; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
242 ; GFX11-NEXT: s_nop 0
243 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
244 ; GFX11-NEXT: s_endpgm
246 %x = extractelement <2 x i16> %coords_lo, i32 0
247 %y = extractelement <2 x i16> %coords_lo, i32 1
248 %z = extractelement <2 x i16> %coords_hi, i32 0
249 %bitcast = bitcast <2 x i32> %val to <4 x half>
250 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
254 define amdgpu_ps void @store_v3f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
255 ; GFX9-LABEL: store_v3f16_3d:
256 ; GFX9: ; %bb.0: ; %main_body
257 ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
258 ; GFX9-NEXT: s_endpgm
260 ; GFX10-LABEL: store_v3f16_3d:
261 ; GFX10: ; %bb.0: ; %main_body
262 ; GFX10-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
263 ; GFX10-NEXT: s_endpgm
265 ; GFX11-LABEL: store_v3f16_3d:
266 ; GFX11: ; %bb.0: ; %main_body
267 ; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
268 ; GFX11-NEXT: s_nop 0
269 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
270 ; GFX11-NEXT: s_endpgm
272 %x = extractelement <2 x i16> %coords_lo, i32 0
273 %y = extractelement <2 x i16> %coords_lo, i32 1
274 %z = extractelement <2 x i16> %coords_hi, i32 0
275 %bitcast = bitcast <2 x i32> %val to <4 x half>
276 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
280 define amdgpu_ps void @store_v4f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
281 ; GFX9-LABEL: store_v4f16_3d:
282 ; GFX9: ; %bb.0: ; %main_body
283 ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm a16 d16
284 ; GFX9-NEXT: s_endpgm
286 ; GFX10-LABEL: store_v4f16_3d:
287 ; GFX10: ; %bb.0: ; %main_body
288 ; GFX10-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
289 ; GFX10-NEXT: s_endpgm
291 ; GFX11-LABEL: store_v4f16_3d:
292 ; GFX11: ; %bb.0: ; %main_body
293 ; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
294 ; GFX11-NEXT: s_nop 0
295 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
296 ; GFX11-NEXT: s_endpgm
298 %x = extractelement <2 x i16> %coords_lo, i32 0
299 %y = extractelement <2 x i16> %coords_lo, i32 1
300 %z = extractelement <2 x i16> %coords_hi, i32 0
301 %bitcast = bitcast <2 x i32> %val to <4 x half>
302 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
306 declare void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half>, i32, i16, <8 x i32>, i32, i32) #2
307 declare void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half>, i32, i16, i16, <8 x i32>, i32, i32) #2
308 declare void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
310 attributes #0 = { nounwind }
311 attributes #1 = { nounwind readonly }