1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
5 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
7 ; GFX9: ; %bb.0: ; %main_body
8 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
9 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10 ; GFX9-NEXT: ; return to shader part epilog
12 ; GFX10-LABEL: load_1d:
13 ; GFX10: ; %bb.0: ; %main_body
14 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
15 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16 ; GFX10-NEXT: ; return to shader part epilog
18 %s = extractelement <2 x i16> %coords, i32 0
19 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
23 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
24 ; GFX9-LABEL: load_2d:
25 ; GFX9: ; %bb.0: ; %main_body
26 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
27 ; GFX9-NEXT: s_waitcnt vmcnt(0)
28 ; GFX9-NEXT: ; return to shader part epilog
30 ; GFX10-LABEL: load_2d:
31 ; GFX10: ; %bb.0: ; %main_body
32 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
33 ; GFX10-NEXT: s_waitcnt vmcnt(0)
34 ; GFX10-NEXT: ; return to shader part epilog
36 %s = extractelement <2 x i16> %coords, i32 0
37 %t = extractelement <2 x i16> %coords, i32 1
38 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
42 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
43 ; GFX9-LABEL: load_3d:
44 ; GFX9: ; %bb.0: ; %main_body
45 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
46 ; GFX9-NEXT: s_waitcnt vmcnt(0)
47 ; GFX9-NEXT: ; return to shader part epilog
49 ; GFX10-LABEL: load_3d:
50 ; GFX10: ; %bb.0: ; %main_body
51 ; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
52 ; GFX10-NEXT: s_waitcnt vmcnt(0)
53 ; GFX10-NEXT: ; return to shader part epilog
55 %s = extractelement <2 x i16> %coords_lo, i32 0
56 %t = extractelement <2 x i16> %coords_lo, i32 1
57 %r = extractelement <2 x i16> %coords_hi, i32 0
58 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
62 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
63 ; GFX9-LABEL: load_cube:
64 ; GFX9: ; %bb.0: ; %main_body
65 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
66 ; GFX9-NEXT: s_waitcnt vmcnt(0)
67 ; GFX9-NEXT: ; return to shader part epilog
69 ; GFX10-LABEL: load_cube:
70 ; GFX10: ; %bb.0: ; %main_body
71 ; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
72 ; GFX10-NEXT: s_waitcnt vmcnt(0)
73 ; GFX10-NEXT: ; return to shader part epilog
75 %s = extractelement <2 x i16> %coords_lo, i32 0
76 %t = extractelement <2 x i16> %coords_lo, i32 1
77 %slice = extractelement <2 x i16> %coords_hi, i32 0
78 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
82 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
83 ; GFX9-LABEL: load_1darray:
84 ; GFX9: ; %bb.0: ; %main_body
85 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
86 ; GFX9-NEXT: s_waitcnt vmcnt(0)
87 ; GFX9-NEXT: ; return to shader part epilog
89 ; GFX10-LABEL: load_1darray:
90 ; GFX10: ; %bb.0: ; %main_body
91 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
92 ; GFX10-NEXT: s_waitcnt vmcnt(0)
93 ; GFX10-NEXT: ; return to shader part epilog
95 %s = extractelement <2 x i16> %coords, i32 0
96 %slice = extractelement <2 x i16> %coords, i32 1
97 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
101 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
102 ; GFX9-LABEL: load_2darray:
103 ; GFX9: ; %bb.0: ; %main_body
104 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
105 ; GFX9-NEXT: s_waitcnt vmcnt(0)
106 ; GFX9-NEXT: ; return to shader part epilog
108 ; GFX10-LABEL: load_2darray:
109 ; GFX10: ; %bb.0: ; %main_body
110 ; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
111 ; GFX10-NEXT: s_waitcnt vmcnt(0)
112 ; GFX10-NEXT: ; return to shader part epilog
114 %s = extractelement <2 x i16> %coords_lo, i32 0
115 %t = extractelement <2 x i16> %coords_lo, i32 1
116 %slice = extractelement <2 x i16> %coords_hi, i32 0
117 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
121 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
122 ; GFX9-LABEL: load_2dmsaa:
123 ; GFX9: ; %bb.0: ; %main_body
124 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
125 ; GFX9-NEXT: s_waitcnt vmcnt(0)
126 ; GFX9-NEXT: ; return to shader part epilog
128 ; GFX10-LABEL: load_2dmsaa:
129 ; GFX10: ; %bb.0: ; %main_body
130 ; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
131 ; GFX10-NEXT: s_waitcnt vmcnt(0)
132 ; GFX10-NEXT: ; return to shader part epilog
134 %s = extractelement <2 x i16> %coords_lo, i32 0
135 %t = extractelement <2 x i16> %coords_lo, i32 1
136 %fragid = extractelement <2 x i16> %coords_hi, i32 0
137 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
141 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
142 ; GFX9-LABEL: load_2darraymsaa:
143 ; GFX9: ; %bb.0: ; %main_body
144 ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
145 ; GFX9-NEXT: s_waitcnt vmcnt(0)
146 ; GFX9-NEXT: ; return to shader part epilog
148 ; GFX10-LABEL: load_2darraymsaa:
149 ; GFX10: ; %bb.0: ; %main_body
150 ; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
151 ; GFX10-NEXT: s_waitcnt vmcnt(0)
152 ; GFX10-NEXT: ; return to shader part epilog
154 %s = extractelement <2 x i16> %coords_lo, i32 0
155 %t = extractelement <2 x i16> %coords_lo, i32 1
156 %slice = extractelement <2 x i16> %coords_hi, i32 0
157 %fragid = extractelement <2 x i16> %coords_hi, i32 1
158 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
162 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
163 ; GFX9-LABEL: load_mip_1d:
164 ; GFX9: ; %bb.0: ; %main_body
165 ; GFX9-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16
166 ; GFX9-NEXT: s_waitcnt vmcnt(0)
167 ; GFX9-NEXT: ; return to shader part epilog
169 ; GFX10-LABEL: load_mip_1d:
170 ; GFX10: ; %bb.0: ; %main_body
171 ; GFX10-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
172 ; GFX10-NEXT: s_waitcnt vmcnt(0)
173 ; GFX10-NEXT: ; return to shader part epilog
175 %s = extractelement <2 x i16> %coords, i32 0
176 %mip = extractelement <2 x i16> %coords, i32 1
177 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
181 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
182 ; GFX9-LABEL: load_mip_2d:
183 ; GFX9: ; %bb.0: ; %main_body
184 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
185 ; GFX9-NEXT: s_waitcnt vmcnt(0)
186 ; GFX9-NEXT: ; return to shader part epilog
188 ; GFX10-LABEL: load_mip_2d:
189 ; GFX10: ; %bb.0: ; %main_body
190 ; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
191 ; GFX10-NEXT: s_waitcnt vmcnt(0)
192 ; GFX10-NEXT: ; return to shader part epilog
194 %s = extractelement <2 x i16> %coords_lo, i32 0
195 %t = extractelement <2 x i16> %coords_lo, i32 1
196 %mip = extractelement <2 x i16> %coords_hi, i32 0
197 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
201 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
202 ; GFX9-LABEL: load_mip_3d:
203 ; GFX9: ; %bb.0: ; %main_body
204 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
205 ; GFX9-NEXT: s_waitcnt vmcnt(0)
206 ; GFX9-NEXT: ; return to shader part epilog
208 ; GFX10-LABEL: load_mip_3d:
209 ; GFX10: ; %bb.0: ; %main_body
210 ; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
211 ; GFX10-NEXT: s_waitcnt vmcnt(0)
212 ; GFX10-NEXT: ; return to shader part epilog
214 %s = extractelement <2 x i16> %coords_lo, i32 0
215 %t = extractelement <2 x i16> %coords_lo, i32 1
216 %r = extractelement <2 x i16> %coords_hi, i32 0
217 %mip = extractelement <2 x i16> %coords_hi, i32 1
218 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
222 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
223 ; GFX9-LABEL: load_mip_cube:
224 ; GFX9: ; %bb.0: ; %main_body
225 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
226 ; GFX9-NEXT: s_waitcnt vmcnt(0)
227 ; GFX9-NEXT: ; return to shader part epilog
229 ; GFX10-LABEL: load_mip_cube:
230 ; GFX10: ; %bb.0: ; %main_body
231 ; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
232 ; GFX10-NEXT: s_waitcnt vmcnt(0)
233 ; GFX10-NEXT: ; return to shader part epilog
235 %s = extractelement <2 x i16> %coords_lo, i32 0
236 %t = extractelement <2 x i16> %coords_lo, i32 1
237 %slice = extractelement <2 x i16> %coords_hi, i32 0
238 %mip = extractelement <2 x i16> %coords_hi, i32 1
239 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
243 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
244 ; GFX9-LABEL: load_mip_1darray:
245 ; GFX9: ; %bb.0: ; %main_body
246 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
247 ; GFX9-NEXT: s_waitcnt vmcnt(0)
248 ; GFX9-NEXT: ; return to shader part epilog
250 ; GFX10-LABEL: load_mip_1darray:
251 ; GFX10: ; %bb.0: ; %main_body
252 ; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
253 ; GFX10-NEXT: s_waitcnt vmcnt(0)
254 ; GFX10-NEXT: ; return to shader part epilog
256 %s = extractelement <2 x i16> %coords_lo, i32 0
257 %slice = extractelement <2 x i16> %coords_lo, i32 1
258 %mip = extractelement <2 x i16> %coords_hi, i32 0
259 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
263 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
264 ; GFX9-LABEL: load_mip_2darray:
265 ; GFX9: ; %bb.0: ; %main_body
266 ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
267 ; GFX9-NEXT: s_waitcnt vmcnt(0)
268 ; GFX9-NEXT: ; return to shader part epilog
270 ; GFX10-LABEL: load_mip_2darray:
271 ; GFX10: ; %bb.0: ; %main_body
272 ; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
273 ; GFX10-NEXT: s_waitcnt vmcnt(0)
274 ; GFX10-NEXT: ; return to shader part epilog
276 %s = extractelement <2 x i16> %coords_lo, i32 0
277 %t = extractelement <2 x i16> %coords_lo, i32 1
278 %slice = extractelement <2 x i16> %coords_hi, i32 0
279 %mip = extractelement <2 x i16> %coords_hi, i32 1
280 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
284 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
285 ; GFX9-LABEL: store_1d:
286 ; GFX9: ; %bb.0: ; %main_body
287 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
288 ; GFX9-NEXT: s_endpgm
290 ; GFX10-LABEL: store_1d:
291 ; GFX10: ; %bb.0: ; %main_body
292 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
293 ; GFX10-NEXT: s_endpgm
295 %s = extractelement <2 x i16> %coords, i32 0
296 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
300 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
301 ; GFX9-LABEL: store_2d:
302 ; GFX9: ; %bb.0: ; %main_body
303 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
304 ; GFX9-NEXT: s_endpgm
306 ; GFX10-LABEL: store_2d:
307 ; GFX10: ; %bb.0: ; %main_body
308 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
309 ; GFX10-NEXT: s_endpgm
311 %s = extractelement <2 x i16> %coords, i32 0
312 %t = extractelement <2 x i16> %coords, i32 1
313 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
317 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
318 ; GFX9-LABEL: store_3d:
319 ; GFX9: ; %bb.0: ; %main_body
320 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
321 ; GFX9-NEXT: s_endpgm
323 ; GFX10-LABEL: store_3d:
324 ; GFX10: ; %bb.0: ; %main_body
325 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
326 ; GFX10-NEXT: s_endpgm
328 %s = extractelement <2 x i16> %coords_lo, i32 0
329 %t = extractelement <2 x i16> %coords_lo, i32 1
330 %r = extractelement <2 x i16> %coords_hi, i32 0
331 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
335 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
336 ; GFX9-LABEL: store_cube:
337 ; GFX9: ; %bb.0: ; %main_body
338 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
339 ; GFX9-NEXT: s_endpgm
341 ; GFX10-LABEL: store_cube:
342 ; GFX10: ; %bb.0: ; %main_body
343 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
344 ; GFX10-NEXT: s_endpgm
346 %s = extractelement <2 x i16> %coords_lo, i32 0
347 %t = extractelement <2 x i16> %coords_lo, i32 1
348 %slice = extractelement <2 x i16> %coords_hi, i32 0
349 call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
353 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
354 ; GFX9-LABEL: store_1darray:
355 ; GFX9: ; %bb.0: ; %main_body
356 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da
357 ; GFX9-NEXT: s_endpgm
359 ; GFX10-LABEL: store_1darray:
360 ; GFX10: ; %bb.0: ; %main_body
361 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
362 ; GFX10-NEXT: s_endpgm
364 %s = extractelement <2 x i16> %coords, i32 0
365 %slice = extractelement <2 x i16> %coords, i32 1
366 call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
370 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
371 ; GFX9-LABEL: store_2darray:
372 ; GFX9: ; %bb.0: ; %main_body
373 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
374 ; GFX9-NEXT: s_endpgm
376 ; GFX10-LABEL: store_2darray:
377 ; GFX10: ; %bb.0: ; %main_body
378 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
379 ; GFX10-NEXT: s_endpgm
381 %s = extractelement <2 x i16> %coords_lo, i32 0
382 %t = extractelement <2 x i16> %coords_lo, i32 1
383 %slice = extractelement <2 x i16> %coords_hi, i32 0
384 call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
388 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
389 ; GFX9-LABEL: store_2dmsaa:
390 ; GFX9: ; %bb.0: ; %main_body
391 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
392 ; GFX9-NEXT: s_endpgm
394 ; GFX10-LABEL: store_2dmsaa:
395 ; GFX10: ; %bb.0: ; %main_body
396 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
397 ; GFX10-NEXT: s_endpgm
399 %s = extractelement <2 x i16> %coords_lo, i32 0
400 %t = extractelement <2 x i16> %coords_lo, i32 1
401 %fragid = extractelement <2 x i16> %coords_hi, i32 0
402 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
406 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
407 ; GFX9-LABEL: store_2darraymsaa:
408 ; GFX9: ; %bb.0: ; %main_body
409 ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
410 ; GFX9-NEXT: s_endpgm
412 ; GFX10-LABEL: store_2darraymsaa:
413 ; GFX10: ; %bb.0: ; %main_body
414 ; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
415 ; GFX10-NEXT: s_endpgm
417 %s = extractelement <2 x i16> %coords_lo, i32 0
418 %t = extractelement <2 x i16> %coords_lo, i32 1
419 %slice = extractelement <2 x i16> %coords_hi, i32 0
420 %fragid = extractelement <2 x i16> %coords_hi, i32 1
421 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
425 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
426 ; GFX9-LABEL: store_mip_1d:
427 ; GFX9: ; %bb.0: ; %main_body
428 ; GFX9-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16
429 ; GFX9-NEXT: s_endpgm
431 ; GFX10-LABEL: store_mip_1d:
432 ; GFX10: ; %bb.0: ; %main_body
433 ; GFX10-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
434 ; GFX10-NEXT: s_endpgm
436 %s = extractelement <2 x i16> %coords, i32 0
437 %mip = extractelement <2 x i16> %coords, i32 1
438 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
442 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
443 ; GFX9-LABEL: store_mip_2d:
444 ; GFX9: ; %bb.0: ; %main_body
445 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
446 ; GFX9-NEXT: s_endpgm
448 ; GFX10-LABEL: store_mip_2d:
449 ; GFX10: ; %bb.0: ; %main_body
450 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
451 ; GFX10-NEXT: s_endpgm
453 %s = extractelement <2 x i16> %coords_lo, i32 0
454 %t = extractelement <2 x i16> %coords_lo, i32 1
455 %mip = extractelement <2 x i16> %coords_hi, i32 0
456 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
460 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
461 ; GFX9-LABEL: store_mip_3d:
462 ; GFX9: ; %bb.0: ; %main_body
463 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
464 ; GFX9-NEXT: s_endpgm
466 ; GFX10-LABEL: store_mip_3d:
467 ; GFX10: ; %bb.0: ; %main_body
468 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
469 ; GFX10-NEXT: s_endpgm
471 %s = extractelement <2 x i16> %coords_lo, i32 0
472 %t = extractelement <2 x i16> %coords_lo, i32 1
473 %r = extractelement <2 x i16> %coords_hi, i32 0
474 %mip = extractelement <2 x i16> %coords_hi, i32 1
475 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
479 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
480 ; GFX9-LABEL: store_mip_cube:
481 ; GFX9: ; %bb.0: ; %main_body
482 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
483 ; GFX9-NEXT: s_endpgm
485 ; GFX10-LABEL: store_mip_cube:
486 ; GFX10: ; %bb.0: ; %main_body
487 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
488 ; GFX10-NEXT: s_endpgm
490 %s = extractelement <2 x i16> %coords_lo, i32 0
491 %t = extractelement <2 x i16> %coords_lo, i32 1
492 %slice = extractelement <2 x i16> %coords_hi, i32 0
493 %mip = extractelement <2 x i16> %coords_hi, i32 1
494 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
498 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
499 ; GFX9-LABEL: store_mip_1darray:
500 ; GFX9: ; %bb.0: ; %main_body
501 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
502 ; GFX9-NEXT: s_endpgm
504 ; GFX10-LABEL: store_mip_1darray:
505 ; GFX10: ; %bb.0: ; %main_body
506 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
507 ; GFX10-NEXT: s_endpgm
509 %s = extractelement <2 x i16> %coords_lo, i32 0
510 %slice = extractelement <2 x i16> %coords_lo, i32 1
511 %mip = extractelement <2 x i16> %coords_hi, i32 0
512 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
516 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
517 ; GFX9-LABEL: store_mip_2darray:
518 ; GFX9: ; %bb.0: ; %main_body
519 ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
520 ; GFX9-NEXT: s_endpgm
522 ; GFX10-LABEL: store_mip_2darray:
523 ; GFX10: ; %bb.0: ; %main_body
524 ; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
525 ; GFX10-NEXT: s_endpgm
527 %s = extractelement <2 x i16> %coords_lo, i32 0
528 %t = extractelement <2 x i16> %coords_lo, i32 1
529 %slice = extractelement <2 x i16> %coords_hi, i32 0
530 %mip = extractelement <2 x i16> %coords_hi, i32 1
531 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
535 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
536 ; GFX9-LABEL: getresinfo_1d:
537 ; GFX9: ; %bb.0: ; %main_body
538 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
539 ; GFX9-NEXT: s_waitcnt vmcnt(0)
540 ; GFX9-NEXT: ; return to shader part epilog
542 ; GFX10-LABEL: getresinfo_1d:
543 ; GFX10: ; %bb.0: ; %main_body
544 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
545 ; GFX10-NEXT: s_waitcnt vmcnt(0)
546 ; GFX10-NEXT: ; return to shader part epilog
548 %mip = extractelement <2 x i16> %coords, i32 0
549 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
553 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
554 ; GFX9-LABEL: getresinfo_2d:
555 ; GFX9: ; %bb.0: ; %main_body
556 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
557 ; GFX9-NEXT: s_waitcnt vmcnt(0)
558 ; GFX9-NEXT: ; return to shader part epilog
560 ; GFX10-LABEL: getresinfo_2d:
561 ; GFX10: ; %bb.0: ; %main_body
562 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
563 ; GFX10-NEXT: s_waitcnt vmcnt(0)
564 ; GFX10-NEXT: ; return to shader part epilog
566 %mip = extractelement <2 x i16> %coords, i32 0
567 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
571 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
572 ; GFX9-LABEL: getresinfo_3d:
573 ; GFX9: ; %bb.0: ; %main_body
574 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
575 ; GFX9-NEXT: s_waitcnt vmcnt(0)
576 ; GFX9-NEXT: ; return to shader part epilog
578 ; GFX10-LABEL: getresinfo_3d:
579 ; GFX10: ; %bb.0: ; %main_body
580 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
581 ; GFX10-NEXT: s_waitcnt vmcnt(0)
582 ; GFX10-NEXT: ; return to shader part epilog
584 %mip = extractelement <2 x i16> %coords, i32 0
585 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
589 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
590 ; GFX9-LABEL: getresinfo_cube:
591 ; GFX9: ; %bb.0: ; %main_body
592 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
593 ; GFX9-NEXT: s_waitcnt vmcnt(0)
594 ; GFX9-NEXT: ; return to shader part epilog
596 ; GFX10-LABEL: getresinfo_cube:
597 ; GFX10: ; %bb.0: ; %main_body
598 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
599 ; GFX10-NEXT: s_waitcnt vmcnt(0)
600 ; GFX10-NEXT: ; return to shader part epilog
602 %mip = extractelement <2 x i16> %coords, i32 0
603 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
607 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
608 ; GFX9-LABEL: getresinfo_1darray:
609 ; GFX9: ; %bb.0: ; %main_body
610 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
611 ; GFX9-NEXT: s_waitcnt vmcnt(0)
612 ; GFX9-NEXT: ; return to shader part epilog
614 ; GFX10-LABEL: getresinfo_1darray:
615 ; GFX10: ; %bb.0: ; %main_body
616 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
617 ; GFX10-NEXT: s_waitcnt vmcnt(0)
618 ; GFX10-NEXT: ; return to shader part epilog
620 %mip = extractelement <2 x i16> %coords, i32 0
621 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
625 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
626 ; GFX9-LABEL: getresinfo_2darray:
627 ; GFX9: ; %bb.0: ; %main_body
628 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
629 ; GFX9-NEXT: s_waitcnt vmcnt(0)
630 ; GFX9-NEXT: ; return to shader part epilog
632 ; GFX10-LABEL: getresinfo_2darray:
633 ; GFX10: ; %bb.0: ; %main_body
634 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
635 ; GFX10-NEXT: s_waitcnt vmcnt(0)
636 ; GFX10-NEXT: ; return to shader part epilog
638 %mip = extractelement <2 x i16> %coords, i32 0
639 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
643 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
644 ; GFX9-LABEL: getresinfo_2dmsaa:
645 ; GFX9: ; %bb.0: ; %main_body
646 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
647 ; GFX9-NEXT: s_waitcnt vmcnt(0)
648 ; GFX9-NEXT: ; return to shader part epilog
650 ; GFX10-LABEL: getresinfo_2dmsaa:
651 ; GFX10: ; %bb.0: ; %main_body
652 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
653 ; GFX10-NEXT: s_waitcnt vmcnt(0)
654 ; GFX10-NEXT: ; return to shader part epilog
656 %mip = extractelement <2 x i16> %coords, i32 0
657 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
661 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
662 ; GFX9-LABEL: getresinfo_2darraymsaa:
663 ; GFX9: ; %bb.0: ; %main_body
664 ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
665 ; GFX9-NEXT: s_waitcnt vmcnt(0)
666 ; GFX9-NEXT: ; return to shader part epilog
668 ; GFX10-LABEL: getresinfo_2darraymsaa:
669 ; GFX10: ; %bb.0: ; %main_body
670 ; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
671 ; GFX10-NEXT: s_waitcnt vmcnt(0)
672 ; GFX10-NEXT: ; return to shader part epilog
674 %mip = extractelement <2 x i16> %coords, i32 0
675 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
679 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
680 ; GFX9-LABEL: load_1d_V1:
681 ; GFX9: ; %bb.0: ; %main_body
682 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm a16
683 ; GFX9-NEXT: s_waitcnt vmcnt(0)
684 ; GFX9-NEXT: ; return to shader part epilog
686 ; GFX10-LABEL: load_1d_V1:
687 ; GFX10: ; %bb.0: ; %main_body
688 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm a16
689 ; GFX10-NEXT: s_waitcnt vmcnt(0)
690 ; GFX10-NEXT: ; return to shader part epilog
692 %s = extractelement <2 x i16> %coords, i32 0
693 %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
697 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
698 ; GFX9-LABEL: load_1d_V2:
699 ; GFX9: ; %bb.0: ; %main_body
700 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16
701 ; GFX9-NEXT: s_waitcnt vmcnt(0)
702 ; GFX9-NEXT: ; return to shader part epilog
704 ; GFX10-LABEL: load_1d_V2:
705 ; GFX10: ; %bb.0: ; %main_body
706 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm a16
707 ; GFX10-NEXT: s_waitcnt vmcnt(0)
708 ; GFX10-NEXT: ; return to shader part epilog
710 %s = extractelement <2 x i16> %coords, i32 0
711 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
715 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
716 ; GFX9-LABEL: store_1d_V1:
717 ; GFX9: ; %bb.0: ; %main_body
718 ; GFX9-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm a16
719 ; GFX9-NEXT: s_endpgm
721 ; GFX10-LABEL: store_1d_V1:
722 ; GFX10: ; %bb.0: ; %main_body
723 ; GFX10-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16
724 ; GFX10-NEXT: s_endpgm
726 %s = extractelement <2 x i16> %coords, i32 0
727 call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
731 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
732 ; GFX9-LABEL: store_1d_V2:
733 ; GFX9: ; %bb.0: ; %main_body
734 ; GFX9-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16
735 ; GFX9-NEXT: s_endpgm
737 ; GFX10-LABEL: store_1d_V2:
738 ; GFX10: ; %bb.0: ; %main_body
739 ; GFX10-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16
740 ; GFX10-NEXT: s_endpgm
742 %s = extractelement <2 x i16> %coords, i32 0
743 call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
747 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
748 ; GFX9-LABEL: load_1d_glc:
749 ; GFX9: ; %bb.0: ; %main_body
750 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16
751 ; GFX9-NEXT: s_waitcnt vmcnt(0)
752 ; GFX9-NEXT: ; return to shader part epilog
754 ; GFX10-LABEL: load_1d_glc:
755 ; GFX10: ; %bb.0: ; %main_body
756 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
757 ; GFX10-NEXT: s_waitcnt vmcnt(0)
758 ; GFX10-NEXT: ; return to shader part epilog
760 %s = extractelement <2 x i16> %coords, i32 0
761 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
765 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
766 ; GFX9-LABEL: load_1d_slc:
767 ; GFX9: ; %bb.0: ; %main_body
768 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16
769 ; GFX9-NEXT: s_waitcnt vmcnt(0)
770 ; GFX9-NEXT: ; return to shader part epilog
772 ; GFX10-LABEL: load_1d_slc:
773 ; GFX10: ; %bb.0: ; %main_body
774 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
775 ; GFX10-NEXT: s_waitcnt vmcnt(0)
776 ; GFX10-NEXT: ; return to shader part epilog
778 %s = extractelement <2 x i16> %coords, i32 0
779 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
783 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
784 ; GFX9-LABEL: load_1d_glc_slc:
785 ; GFX9: ; %bb.0: ; %main_body
786 ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16
787 ; GFX9-NEXT: s_waitcnt vmcnt(0)
788 ; GFX9-NEXT: ; return to shader part epilog
790 ; GFX10-LABEL: load_1d_glc_slc:
791 ; GFX10: ; %bb.0: ; %main_body
792 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
793 ; GFX10-NEXT: s_waitcnt vmcnt(0)
794 ; GFX10-NEXT: ; return to shader part epilog
796 %s = extractelement <2 x i16> %coords, i32 0
797 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
801 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
802 ; GFX9-LABEL: store_1d_glc:
803 ; GFX9: ; %bb.0: ; %main_body
804 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16
805 ; GFX9-NEXT: s_endpgm
807 ; GFX10-LABEL: store_1d_glc:
808 ; GFX10: ; %bb.0: ; %main_body
809 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
810 ; GFX10-NEXT: s_endpgm
812 %s = extractelement <2 x i16> %coords, i32 0
813 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
817 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
818 ; GFX9-LABEL: store_1d_slc:
819 ; GFX9: ; %bb.0: ; %main_body
820 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16
821 ; GFX9-NEXT: s_endpgm
823 ; GFX10-LABEL: store_1d_slc:
824 ; GFX10: ; %bb.0: ; %main_body
825 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
826 ; GFX10-NEXT: s_endpgm
828 %s = extractelement <2 x i16> %coords, i32 0
829 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
833 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
834 ; GFX9-LABEL: store_1d_glc_slc:
835 ; GFX9: ; %bb.0: ; %main_body
836 ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16
837 ; GFX9-NEXT: s_endpgm
839 ; GFX10-LABEL: store_1d_glc_slc:
840 ; GFX10: ; %bb.0: ; %main_body
841 ; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
842 ; GFX10-NEXT: s_endpgm
844 %s = extractelement <2 x i16> %coords, i32 0
845 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
849 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 {
850 ; GFX9-LABEL: getresinfo_dmask0:
851 ; GFX9: ; %bb.0: ; %main_body
852 ; GFX9-NEXT: ; return to shader part epilog
854 ; GFX10-LABEL: getresinfo_dmask0:
855 ; GFX10: ; %bb.0: ; %main_body
856 ; GFX10-NEXT: ; return to shader part epilog
858 %mip = extractelement <2 x i16> %coords, i32 0
859 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
863 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
864 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
865 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
866 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
867 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
868 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
869 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
870 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
872 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
873 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
874 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
875 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
876 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
877 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
879 declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
880 declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
881 declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
882 declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
883 declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
884 declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
885 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
886 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
888 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
889 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
890 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
891 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
892 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
893 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
895 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
896 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
897 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
898 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
899 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
900 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
901 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
902 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
904 declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
905 declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
906 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
907 declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
908 declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
910 attributes #0 = { nounwind }
911 attributes #1 = { nounwind readonly }
912 attributes #2 = { nounwind readnone }