1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
6 define amdgpu_ps void @store_f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
7 ; GFX9-LABEL: store_f32_1d:
8 ; GFX9: ; %bb.0: ; %main_body
9 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16
12 ; GFX10-LABEL: store_f32_1d:
13 ; GFX10: ; %bb.0: ; %main_body
14 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
15 ; GFX10-NEXT: s_endpgm
17 ; GFX11-LABEL: store_f32_1d:
18 ; GFX11: ; %bb.0: ; %main_body
19 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
21 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
22 ; GFX11-NEXT: s_endpgm
24 %x = extractelement <2 x i16> %coords, i32 0
25 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
29 define amdgpu_ps void @store_v2f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
30 ; GFX9-LABEL: store_v2f32_1d:
31 ; GFX9: ; %bb.0: ; %main_body
32 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16
35 ; GFX10-LABEL: store_v2f32_1d:
36 ; GFX10: ; %bb.0: ; %main_body
37 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
38 ; GFX10-NEXT: s_endpgm
40 ; GFX11-LABEL: store_v2f32_1d:
41 ; GFX11: ; %bb.0: ; %main_body
42 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
44 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
45 ; GFX11-NEXT: s_endpgm
47 %x = extractelement <2 x i16> %coords, i32 0
48 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
52 define amdgpu_ps void @store_v3f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
53 ; GFX9-LABEL: store_v3f32_1d:
54 ; GFX9: ; %bb.0: ; %main_body
55 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16
58 ; GFX10-LABEL: store_v3f32_1d:
59 ; GFX10: ; %bb.0: ; %main_body
60 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
61 ; GFX10-NEXT: s_endpgm
63 ; GFX11-LABEL: store_v3f32_1d:
64 ; GFX11: ; %bb.0: ; %main_body
65 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
67 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
68 ; GFX11-NEXT: s_endpgm
70 %x = extractelement <2 x i16> %coords, i32 0
71 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
75 define amdgpu_ps void @store_v4f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
76 ; GFX9-LABEL: store_v4f32_1d:
77 ; GFX9: ; %bb.0: ; %main_body
78 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16
81 ; GFX10-LABEL: store_v4f32_1d:
82 ; GFX10: ; %bb.0: ; %main_body
83 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
84 ; GFX10-NEXT: s_endpgm
86 ; GFX11-LABEL: store_v4f32_1d:
87 ; GFX11: ; %bb.0: ; %main_body
88 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
90 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
91 ; GFX11-NEXT: s_endpgm
93 %x = extractelement <2 x i16> %coords, i32 0
94 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
98 define amdgpu_ps void @store_f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
99 ; GFX9-LABEL: store_f32_2d:
100 ; GFX9: ; %bb.0: ; %main_body
101 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16
102 ; GFX9-NEXT: s_endpgm
104 ; GFX10-LABEL: store_f32_2d:
105 ; GFX10: ; %bb.0: ; %main_body
106 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
107 ; GFX10-NEXT: s_endpgm
109 ; GFX11-LABEL: store_f32_2d:
110 ; GFX11: ; %bb.0: ; %main_body
111 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
112 ; GFX11-NEXT: s_nop 0
113 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
114 ; GFX11-NEXT: s_endpgm
116 %x = extractelement <2 x i16> %coords, i32 0
117 %y = extractelement <2 x i16> %coords, i32 1
118 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %val, i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
122 define amdgpu_ps void @store_v2f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
123 ; GFX9-LABEL: store_v2f32_2d:
124 ; GFX9: ; %bb.0: ; %main_body
125 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16
126 ; GFX9-NEXT: s_endpgm
128 ; GFX10-LABEL: store_v2f32_2d:
129 ; GFX10: ; %bb.0: ; %main_body
130 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
131 ; GFX10-NEXT: s_endpgm
133 ; GFX11-LABEL: store_v2f32_2d:
134 ; GFX11: ; %bb.0: ; %main_body
135 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
136 ; GFX11-NEXT: s_nop 0
137 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
138 ; GFX11-NEXT: s_endpgm
140 %x = extractelement <2 x i16> %coords, i32 0
141 %y = extractelement <2 x i16> %coords, i32 1
142 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %val, i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
146 define amdgpu_ps void @store_v3f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
147 ; GFX9-LABEL: store_v3f32_2d:
148 ; GFX9: ; %bb.0: ; %main_body
149 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16
150 ; GFX9-NEXT: s_endpgm
152 ; GFX10-LABEL: store_v3f32_2d:
153 ; GFX10: ; %bb.0: ; %main_body
154 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
155 ; GFX10-NEXT: s_endpgm
157 ; GFX11-LABEL: store_v3f32_2d:
158 ; GFX11: ; %bb.0: ; %main_body
159 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
160 ; GFX11-NEXT: s_nop 0
161 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
162 ; GFX11-NEXT: s_endpgm
164 %x = extractelement <2 x i16> %coords, i32 0
165 %y = extractelement <2 x i16> %coords, i32 1
166 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %val, i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
170 define amdgpu_ps void @store_v4f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
171 ; GFX9-LABEL: store_v4f32_2d:
172 ; GFX9: ; %bb.0: ; %main_body
173 ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16
174 ; GFX9-NEXT: s_endpgm
176 ; GFX10-LABEL: store_v4f32_2d:
177 ; GFX10: ; %bb.0: ; %main_body
178 ; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
179 ; GFX10-NEXT: s_endpgm
181 ; GFX11-LABEL: store_v4f32_2d:
182 ; GFX11: ; %bb.0: ; %main_body
183 ; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
184 ; GFX11-NEXT: s_nop 0
185 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
186 ; GFX11-NEXT: s_endpgm
188 %x = extractelement <2 x i16> %coords, i32 0
189 %y = extractelement <2 x i16> %coords, i32 1
190 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %val, i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
194 define amdgpu_ps void @store_f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
195 ; GFX9-LABEL: store_f32_3d:
196 ; GFX9: ; %bb.0: ; %main_body
197 ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 unorm a16
198 ; GFX9-NEXT: s_endpgm
200 ; GFX10-LABEL: store_f32_3d:
201 ; GFX10: ; %bb.0: ; %main_body
202 ; GFX10-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
203 ; GFX10-NEXT: s_endpgm
205 ; GFX11-LABEL: store_f32_3d:
206 ; GFX11: ; %bb.0: ; %main_body
207 ; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
208 ; GFX11-NEXT: s_nop 0
209 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
210 ; GFX11-NEXT: s_endpgm
212 %x = extractelement <2 x i16> %coords_lo, i32 0
213 %y = extractelement <2 x i16> %coords_lo, i32 1
214 %z = extractelement <2 x i16> %coords_hi, i32 0
215 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %val, i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
219 define amdgpu_ps void @store_v2f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
220 ; GFX9-LABEL: store_v2f32_3d:
221 ; GFX9: ; %bb.0: ; %main_body
222 ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 unorm a16
223 ; GFX9-NEXT: s_endpgm
225 ; GFX10-LABEL: store_v2f32_3d:
226 ; GFX10: ; %bb.0: ; %main_body
227 ; GFX10-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
228 ; GFX10-NEXT: s_endpgm
230 ; GFX11-LABEL: store_v2f32_3d:
231 ; GFX11: ; %bb.0: ; %main_body
232 ; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
233 ; GFX11-NEXT: s_nop 0
234 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
235 ; GFX11-NEXT: s_endpgm
237 %x = extractelement <2 x i16> %coords_lo, i32 0
238 %y = extractelement <2 x i16> %coords_lo, i32 1
239 %z = extractelement <2 x i16> %coords_hi, i32 0
240 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %val, i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
244 define amdgpu_ps void @store_v3f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
245 ; GFX9-LABEL: store_v3f32_3d:
246 ; GFX9: ; %bb.0: ; %main_body
247 ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 unorm a16
248 ; GFX9-NEXT: s_endpgm
250 ; GFX10-LABEL: store_v3f32_3d:
251 ; GFX10: ; %bb.0: ; %main_body
252 ; GFX10-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
253 ; GFX10-NEXT: s_endpgm
255 ; GFX11-LABEL: store_v3f32_3d:
256 ; GFX11: ; %bb.0: ; %main_body
257 ; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
258 ; GFX11-NEXT: s_nop 0
259 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
260 ; GFX11-NEXT: s_endpgm
262 %x = extractelement <2 x i16> %coords_lo, i32 0
263 %y = extractelement <2 x i16> %coords_lo, i32 1
264 %z = extractelement <2 x i16> %coords_hi, i32 0
265 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %val, i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
269 define amdgpu_ps void @store_v4f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
270 ; GFX9-LABEL: store_v4f32_3d:
271 ; GFX9: ; %bb.0: ; %main_body
272 ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm a16
273 ; GFX9-NEXT: s_endpgm
275 ; GFX10-LABEL: store_v4f32_3d:
276 ; GFX10: ; %bb.0: ; %main_body
277 ; GFX10-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
278 ; GFX10-NEXT: s_endpgm
280 ; GFX11-LABEL: store_v4f32_3d:
281 ; GFX11: ; %bb.0: ; %main_body
282 ; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
283 ; GFX11-NEXT: s_nop 0
284 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
285 ; GFX11-NEXT: s_endpgm
287 %x = extractelement <2 x i16> %coords_lo, i32 0
288 %y = extractelement <2 x i16> %coords_lo, i32 1
289 %z = extractelement <2 x i16> %coords_hi, i32 0
290 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %val, i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
294 declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #2
295 declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #2
296 declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
298 attributes #0 = { nounwind }
299 attributes #1 = { nounwind readonly }