1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s
8 define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
9 ; GFX8-UNPACKED-LABEL: load_1d_f16_x:
10 ; GFX8-UNPACKED: ; %bb.0:
11 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
12 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
13 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
14 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
15 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
16 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
17 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
18 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
19 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16
20 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
21 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
23 ; GFX8-PACKED-LABEL: load_1d_f16_x:
24 ; GFX8-PACKED: ; %bb.0:
25 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
26 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
27 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
28 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
29 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
30 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
31 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
32 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
33 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16
34 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
35 ; GFX8-PACKED-NEXT: ; return to shader part epilog
37 ; GFX9-LABEL: load_1d_f16_x:
39 ; GFX9-NEXT: s_mov_b32 s0, s2
40 ; GFX9-NEXT: s_mov_b32 s1, s3
41 ; GFX9-NEXT: s_mov_b32 s2, s4
42 ; GFX9-NEXT: s_mov_b32 s3, s5
43 ; GFX9-NEXT: s_mov_b32 s4, s6
44 ; GFX9-NEXT: s_mov_b32 s5, s7
45 ; GFX9-NEXT: s_mov_b32 s6, s8
46 ; GFX9-NEXT: s_mov_b32 s7, s9
47 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16
48 ; GFX9-NEXT: s_waitcnt vmcnt(0)
49 ; GFX9-NEXT: ; return to shader part epilog
51 ; GFX10PLUS-LABEL: load_1d_f16_x:
53 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
54 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
55 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
56 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
57 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
58 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
59 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
60 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
61 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16
62 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
63 ; GFX10PLUS-NEXT: ; return to shader part epilog
64 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
68 define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) {
69 ; GFX8-UNPACKED-LABEL: load_1d_f16_y:
70 ; GFX8-UNPACKED: ; %bb.0:
71 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
72 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
73 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
74 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
75 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
76 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
77 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
78 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
79 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16
80 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
81 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
83 ; GFX8-PACKED-LABEL: load_1d_f16_y:
84 ; GFX8-PACKED: ; %bb.0:
85 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
86 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
87 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
88 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
89 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
90 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
91 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
92 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
93 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16
94 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
95 ; GFX8-PACKED-NEXT: ; return to shader part epilog
97 ; GFX9-LABEL: load_1d_f16_y:
99 ; GFX9-NEXT: s_mov_b32 s0, s2
100 ; GFX9-NEXT: s_mov_b32 s1, s3
101 ; GFX9-NEXT: s_mov_b32 s2, s4
102 ; GFX9-NEXT: s_mov_b32 s3, s5
103 ; GFX9-NEXT: s_mov_b32 s4, s6
104 ; GFX9-NEXT: s_mov_b32 s5, s7
105 ; GFX9-NEXT: s_mov_b32 s6, s8
106 ; GFX9-NEXT: s_mov_b32 s7, s9
107 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16
108 ; GFX9-NEXT: s_waitcnt vmcnt(0)
109 ; GFX9-NEXT: ; return to shader part epilog
111 ; GFX10PLUS-LABEL: load_1d_f16_y:
112 ; GFX10PLUS: ; %bb.0:
113 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
114 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
115 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
116 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
117 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
118 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
119 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
120 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
121 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16
122 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
123 ; GFX10PLUS-NEXT: ; return to shader part epilog
124 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
128 define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) {
129 ; GFX8-UNPACKED-LABEL: load_1d_f16_z:
130 ; GFX8-UNPACKED: ; %bb.0:
131 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
132 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
133 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
134 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
135 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
136 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
137 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
138 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
139 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16
140 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
141 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
143 ; GFX8-PACKED-LABEL: load_1d_f16_z:
144 ; GFX8-PACKED: ; %bb.0:
145 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
146 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
147 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
148 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
149 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
150 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
151 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
152 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
153 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16
154 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
155 ; GFX8-PACKED-NEXT: ; return to shader part epilog
157 ; GFX9-LABEL: load_1d_f16_z:
159 ; GFX9-NEXT: s_mov_b32 s0, s2
160 ; GFX9-NEXT: s_mov_b32 s1, s3
161 ; GFX9-NEXT: s_mov_b32 s2, s4
162 ; GFX9-NEXT: s_mov_b32 s3, s5
163 ; GFX9-NEXT: s_mov_b32 s4, s6
164 ; GFX9-NEXT: s_mov_b32 s5, s7
165 ; GFX9-NEXT: s_mov_b32 s6, s8
166 ; GFX9-NEXT: s_mov_b32 s7, s9
167 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16
168 ; GFX9-NEXT: s_waitcnt vmcnt(0)
169 ; GFX9-NEXT: ; return to shader part epilog
171 ; GFX10PLUS-LABEL: load_1d_f16_z:
172 ; GFX10PLUS: ; %bb.0:
173 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
174 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
175 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
176 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
177 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
178 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
179 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
180 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
181 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16
182 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
183 ; GFX10PLUS-NEXT: ; return to shader part epilog
184 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
188 define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) {
189 ; GFX8-UNPACKED-LABEL: load_1d_f16_w:
190 ; GFX8-UNPACKED: ; %bb.0:
191 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
192 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
193 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
194 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
195 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
196 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
197 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
198 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
199 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16
200 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
201 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
203 ; GFX8-PACKED-LABEL: load_1d_f16_w:
204 ; GFX8-PACKED: ; %bb.0:
205 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
206 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
207 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
208 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
209 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
210 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
211 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
212 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
213 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16
214 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
215 ; GFX8-PACKED-NEXT: ; return to shader part epilog
217 ; GFX9-LABEL: load_1d_f16_w:
219 ; GFX9-NEXT: s_mov_b32 s0, s2
220 ; GFX9-NEXT: s_mov_b32 s1, s3
221 ; GFX9-NEXT: s_mov_b32 s2, s4
222 ; GFX9-NEXT: s_mov_b32 s3, s5
223 ; GFX9-NEXT: s_mov_b32 s4, s6
224 ; GFX9-NEXT: s_mov_b32 s5, s7
225 ; GFX9-NEXT: s_mov_b32 s6, s8
226 ; GFX9-NEXT: s_mov_b32 s7, s9
227 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16
228 ; GFX9-NEXT: s_waitcnt vmcnt(0)
229 ; GFX9-NEXT: ; return to shader part epilog
231 ; GFX10PLUS-LABEL: load_1d_f16_w:
232 ; GFX10PLUS: ; %bb.0:
233 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
234 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
235 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
236 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
237 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
238 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
239 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
240 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
241 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16
242 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
243 ; GFX10PLUS-NEXT: ; return to shader part epilog
244 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
248 define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) {
249 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy:
250 ; GFX8-UNPACKED: ; %bb.0:
251 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
252 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
253 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
254 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
255 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
256 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
257 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
258 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
259 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16
260 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
261 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
262 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
263 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
264 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
266 ; GFX8-PACKED-LABEL: load_1d_v2f16_xy:
267 ; GFX8-PACKED: ; %bb.0:
268 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
269 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
270 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
271 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
272 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
273 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
274 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
275 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
276 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16
277 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
278 ; GFX8-PACKED-NEXT: ; return to shader part epilog
280 ; GFX9-LABEL: load_1d_v2f16_xy:
282 ; GFX9-NEXT: s_mov_b32 s0, s2
283 ; GFX9-NEXT: s_mov_b32 s1, s3
284 ; GFX9-NEXT: s_mov_b32 s2, s4
285 ; GFX9-NEXT: s_mov_b32 s3, s5
286 ; GFX9-NEXT: s_mov_b32 s4, s6
287 ; GFX9-NEXT: s_mov_b32 s5, s7
288 ; GFX9-NEXT: s_mov_b32 s6, s8
289 ; GFX9-NEXT: s_mov_b32 s7, s9
290 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16
291 ; GFX9-NEXT: s_waitcnt vmcnt(0)
292 ; GFX9-NEXT: ; return to shader part epilog
294 ; GFX10PLUS-LABEL: load_1d_v2f16_xy:
295 ; GFX10PLUS: ; %bb.0:
296 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
297 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
298 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
299 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
300 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
301 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
302 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
303 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
304 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16
305 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
306 ; GFX10PLUS-NEXT: ; return to shader part epilog
307 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
311 define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) {
312 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz:
313 ; GFX8-UNPACKED: ; %bb.0:
314 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
315 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
316 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
317 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
318 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
319 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
320 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
321 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
322 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16
323 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
324 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
325 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
326 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
327 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
329 ; GFX8-PACKED-LABEL: load_1d_v2f16_xz:
330 ; GFX8-PACKED: ; %bb.0:
331 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
332 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
333 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
334 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
335 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
336 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
337 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
338 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
339 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16
340 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
341 ; GFX8-PACKED-NEXT: ; return to shader part epilog
343 ; GFX9-LABEL: load_1d_v2f16_xz:
345 ; GFX9-NEXT: s_mov_b32 s0, s2
346 ; GFX9-NEXT: s_mov_b32 s1, s3
347 ; GFX9-NEXT: s_mov_b32 s2, s4
348 ; GFX9-NEXT: s_mov_b32 s3, s5
349 ; GFX9-NEXT: s_mov_b32 s4, s6
350 ; GFX9-NEXT: s_mov_b32 s5, s7
351 ; GFX9-NEXT: s_mov_b32 s6, s8
352 ; GFX9-NEXT: s_mov_b32 s7, s9
353 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16
354 ; GFX9-NEXT: s_waitcnt vmcnt(0)
355 ; GFX9-NEXT: ; return to shader part epilog
357 ; GFX10PLUS-LABEL: load_1d_v2f16_xz:
358 ; GFX10PLUS: ; %bb.0:
359 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
360 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
361 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
362 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
363 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
364 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
365 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
366 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
367 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16
368 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
369 ; GFX10PLUS-NEXT: ; return to shader part epilog
370 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
374 define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) {
375 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw:
376 ; GFX8-UNPACKED: ; %bb.0:
377 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
378 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
379 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
380 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
381 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
382 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
383 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
384 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
385 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16
386 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
387 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
388 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
389 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
390 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
392 ; GFX8-PACKED-LABEL: load_1d_v2f16_xw:
393 ; GFX8-PACKED: ; %bb.0:
394 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
395 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
396 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
397 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
398 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
399 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
400 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
401 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
402 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16
403 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
404 ; GFX8-PACKED-NEXT: ; return to shader part epilog
406 ; GFX9-LABEL: load_1d_v2f16_xw:
408 ; GFX9-NEXT: s_mov_b32 s0, s2
409 ; GFX9-NEXT: s_mov_b32 s1, s3
410 ; GFX9-NEXT: s_mov_b32 s2, s4
411 ; GFX9-NEXT: s_mov_b32 s3, s5
412 ; GFX9-NEXT: s_mov_b32 s4, s6
413 ; GFX9-NEXT: s_mov_b32 s5, s7
414 ; GFX9-NEXT: s_mov_b32 s6, s8
415 ; GFX9-NEXT: s_mov_b32 s7, s9
416 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16
417 ; GFX9-NEXT: s_waitcnt vmcnt(0)
418 ; GFX9-NEXT: ; return to shader part epilog
420 ; GFX10PLUS-LABEL: load_1d_v2f16_xw:
421 ; GFX10PLUS: ; %bb.0:
422 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
423 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
424 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
425 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
426 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
427 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
428 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
429 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
430 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16
431 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
432 ; GFX10PLUS-NEXT: ; return to shader part epilog
433 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
437 define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) {
438 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz:
439 ; GFX8-UNPACKED: ; %bb.0:
440 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
441 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
442 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
443 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
444 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
445 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
446 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
447 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
448 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16
449 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
450 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
451 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
452 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
453 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
455 ; GFX8-PACKED-LABEL: load_1d_v2f16_yz:
456 ; GFX8-PACKED: ; %bb.0:
457 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
458 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
459 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
460 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
461 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
462 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
463 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
464 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
465 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16
466 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
467 ; GFX8-PACKED-NEXT: ; return to shader part epilog
469 ; GFX9-LABEL: load_1d_v2f16_yz:
471 ; GFX9-NEXT: s_mov_b32 s0, s2
472 ; GFX9-NEXT: s_mov_b32 s1, s3
473 ; GFX9-NEXT: s_mov_b32 s2, s4
474 ; GFX9-NEXT: s_mov_b32 s3, s5
475 ; GFX9-NEXT: s_mov_b32 s4, s6
476 ; GFX9-NEXT: s_mov_b32 s5, s7
477 ; GFX9-NEXT: s_mov_b32 s6, s8
478 ; GFX9-NEXT: s_mov_b32 s7, s9
479 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16
480 ; GFX9-NEXT: s_waitcnt vmcnt(0)
481 ; GFX9-NEXT: ; return to shader part epilog
483 ; GFX10PLUS-LABEL: load_1d_v2f16_yz:
484 ; GFX10PLUS: ; %bb.0:
485 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
486 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
487 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
488 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
489 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
490 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
491 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
492 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
493 ; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16
494 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
495 ; GFX10PLUS-NEXT: ; return to shader part epilog
496 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
500 define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
501 ; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz:
502 ; GFX8-UNPACKED: ; %bb.0:
503 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
504 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
505 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
506 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
507 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
508 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
509 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
510 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
511 ; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16
512 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
513 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, 0xffff, v1
514 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v2
515 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3
516 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
517 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
519 ; GFX8-PACKED-LABEL: load_1d_v3f16_xyz:
520 ; GFX8-PACKED: ; %bb.0:
521 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
522 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
523 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
524 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
525 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
526 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
527 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
528 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
529 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
530 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
531 ; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
532 ; GFX8-PACKED-NEXT: ; return to shader part epilog
534 ; GFX9-LABEL: load_1d_v3f16_xyz:
536 ; GFX9-NEXT: s_mov_b32 s0, s2
537 ; GFX9-NEXT: s_mov_b32 s1, s3
538 ; GFX9-NEXT: s_mov_b32 s2, s4
539 ; GFX9-NEXT: s_mov_b32 s3, s5
540 ; GFX9-NEXT: s_mov_b32 s4, s6
541 ; GFX9-NEXT: s_mov_b32 s5, s7
542 ; GFX9-NEXT: s_mov_b32 s6, s8
543 ; GFX9-NEXT: s_mov_b32 s7, s9
544 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
545 ; GFX9-NEXT: s_waitcnt vmcnt(0)
546 ; GFX9-NEXT: ; return to shader part epilog
548 ; GFX10PLUS-LABEL: load_1d_v3f16_xyz:
549 ; GFX10PLUS: ; %bb.0:
550 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
551 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
552 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
553 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
554 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
555 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
556 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
557 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
558 ; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
559 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
560 ; GFX10PLUS-NEXT: ; return to shader part epilog
561 %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
565 define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
566 ; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw:
567 ; GFX8-UNPACKED: ; %bb.0:
568 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
569 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
570 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
571 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
572 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
573 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
574 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
575 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
576 ; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16
577 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
578 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
579 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, 0xffff, v3
580 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
581 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v3, 16, v3
582 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
583 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
584 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
586 ; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw:
587 ; GFX8-PACKED: ; %bb.0:
588 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
589 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
590 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
591 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
592 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
593 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
594 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
595 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
596 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
597 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
598 ; GFX8-PACKED-NEXT: ; return to shader part epilog
600 ; GFX9-LABEL: load_1d_v4f16_xyzw:
602 ; GFX9-NEXT: s_mov_b32 s0, s2
603 ; GFX9-NEXT: s_mov_b32 s1, s3
604 ; GFX9-NEXT: s_mov_b32 s2, s4
605 ; GFX9-NEXT: s_mov_b32 s3, s5
606 ; GFX9-NEXT: s_mov_b32 s4, s6
607 ; GFX9-NEXT: s_mov_b32 s5, s7
608 ; GFX9-NEXT: s_mov_b32 s6, s8
609 ; GFX9-NEXT: s_mov_b32 s7, s9
610 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
611 ; GFX9-NEXT: s_waitcnt vmcnt(0)
612 ; GFX9-NEXT: ; return to shader part epilog
614 ; GFX10PLUS-LABEL: load_1d_v4f16_xyzw:
615 ; GFX10PLUS: ; %bb.0:
616 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
617 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
618 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
619 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
620 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
621 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
622 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
623 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
624 ; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16
625 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
626 ; GFX10PLUS-NEXT: ; return to shader part epilog
627 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
631 define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
632 ; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x:
633 ; GFX8-UNPACKED: ; %bb.0:
634 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
635 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
636 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
637 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
638 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
639 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
640 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
641 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
642 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
643 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
644 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
645 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
646 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2
647 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
649 ; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x:
650 ; GFX8-PACKED: ; %bb.0:
651 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
652 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
653 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
654 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
655 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
656 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
657 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
658 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
659 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
660 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
661 ; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
662 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
663 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2
664 ; GFX8-PACKED-NEXT: ; return to shader part epilog
666 ; GFX9-LABEL: load_1d_f16_tfe_dmask_x:
668 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
669 ; GFX9-NEXT: s_mov_b32 s0, s2
670 ; GFX9-NEXT: s_mov_b32 s1, s3
671 ; GFX9-NEXT: s_mov_b32 s2, s4
672 ; GFX9-NEXT: s_mov_b32 s3, s5
673 ; GFX9-NEXT: s_mov_b32 s4, s6
674 ; GFX9-NEXT: s_mov_b32 s5, s7
675 ; GFX9-NEXT: s_mov_b32 s6, s8
676 ; GFX9-NEXT: s_mov_b32 s7, s9
677 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
678 ; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
679 ; GFX9-NEXT: s_waitcnt vmcnt(0)
680 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
681 ; GFX9-NEXT: ; return to shader part epilog
683 ; GFX10PLUS-LABEL: load_1d_f16_tfe_dmask_x:
684 ; GFX10PLUS: ; %bb.0:
685 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0
686 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
687 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
688 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
689 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
690 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
691 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
692 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
693 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
694 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1
695 ; GFX10PLUS-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
696 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
697 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v2
698 ; GFX10PLUS-NEXT: ; return to shader part epilog
699 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
700 %v.err = extractvalue { half, i32 } %v, 1
701 %vv = bitcast i32 %v.err to float
705 define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
706 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
707 ; GFX8-UNPACKED: ; %bb.0:
708 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
709 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
710 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
711 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
712 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
713 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
714 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
715 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
716 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
717 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
718 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
719 ; GFX8-UNPACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe d16
720 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
721 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v3
722 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
724 ; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
725 ; GFX8-PACKED: ; %bb.0:
726 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
727 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
728 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
729 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
730 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
731 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
732 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
733 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
734 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
735 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
736 ; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
737 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
738 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2
739 ; GFX8-PACKED-NEXT: ; return to shader part epilog
741 ; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy:
743 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
744 ; GFX9-NEXT: s_mov_b32 s0, s2
745 ; GFX9-NEXT: s_mov_b32 s1, s3
746 ; GFX9-NEXT: s_mov_b32 s2, s4
747 ; GFX9-NEXT: s_mov_b32 s3, s5
748 ; GFX9-NEXT: s_mov_b32 s4, s6
749 ; GFX9-NEXT: s_mov_b32 s5, s7
750 ; GFX9-NEXT: s_mov_b32 s6, s8
751 ; GFX9-NEXT: s_mov_b32 s7, s9
752 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
753 ; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
754 ; GFX9-NEXT: s_waitcnt vmcnt(0)
755 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
756 ; GFX9-NEXT: ; return to shader part epilog
758 ; GFX10PLUS-LABEL: load_1d_v2f16_tfe_dmask_xy:
759 ; GFX10PLUS: ; %bb.0:
760 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0
761 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
762 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
763 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
764 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
765 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
766 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
767 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
768 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
769 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1
770 ; GFX10PLUS-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
771 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
772 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v2
773 ; GFX10PLUS-NEXT: ; return to shader part epilog
774 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
775 %v.err = extractvalue { <2 x half>, i32 } %v, 1
776 %vv = bitcast i32 %v.err to float
780 define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
781 ; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
782 ; GFX8-UNPACKED: ; %bb.0:
783 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
784 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
785 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
786 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
787 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
788 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
789 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
790 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
791 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
792 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
793 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
794 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1
795 ; GFX8-UNPACKED-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe d16
796 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
797 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v4
798 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
800 ; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
801 ; GFX8-PACKED: ; %bb.0:
802 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
803 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
804 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
805 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
806 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
807 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
808 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
809 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
810 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
811 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
812 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v3, v1
813 ; GFX8-PACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
814 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
815 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v3
816 ; GFX8-PACKED-NEXT: ; return to shader part epilog
818 ; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz:
820 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
821 ; GFX9-NEXT: s_mov_b32 s0, s2
822 ; GFX9-NEXT: s_mov_b32 s1, s3
823 ; GFX9-NEXT: s_mov_b32 s2, s4
824 ; GFX9-NEXT: s_mov_b32 s3, s5
825 ; GFX9-NEXT: s_mov_b32 s4, s6
826 ; GFX9-NEXT: s_mov_b32 s5, s7
827 ; GFX9-NEXT: s_mov_b32 s6, s8
828 ; GFX9-NEXT: s_mov_b32 s7, s9
829 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
830 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
831 ; GFX9-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
832 ; GFX9-NEXT: s_waitcnt vmcnt(0)
833 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
834 ; GFX9-NEXT: ; return to shader part epilog
836 ; GFX10PLUS-LABEL: load_1d_v3f16_tfe_dmask_xyz:
837 ; GFX10PLUS: ; %bb.0:
838 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0
839 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
840 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
841 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
842 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
843 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
844 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
845 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
846 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
847 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1
848 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1
849 ; GFX10PLUS-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
850 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
851 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v3
852 ; GFX10PLUS-NEXT: ; return to shader part epilog
853 %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
854 %v.err = extractvalue { <3 x half>, i32 } %v, 1
855 %vv = bitcast i32 %v.err to float
859 define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
860 ; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
861 ; GFX8-UNPACKED: ; %bb.0:
862 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
863 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
864 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
865 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
866 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
867 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
868 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
869 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
870 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
871 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
872 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
873 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
874 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2
875 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
877 ; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
878 ; GFX8-PACKED: ; %bb.0:
879 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
880 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
881 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
882 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
883 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
884 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
885 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
886 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
887 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
888 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
889 ; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
890 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
891 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2
892 ; GFX8-PACKED-NEXT: ; return to shader part epilog
894 ; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
896 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
897 ; GFX9-NEXT: s_mov_b32 s0, s2
898 ; GFX9-NEXT: s_mov_b32 s1, s3
899 ; GFX9-NEXT: s_mov_b32 s2, s4
900 ; GFX9-NEXT: s_mov_b32 s3, s5
901 ; GFX9-NEXT: s_mov_b32 s4, s6
902 ; GFX9-NEXT: s_mov_b32 s5, s7
903 ; GFX9-NEXT: s_mov_b32 s6, s8
904 ; GFX9-NEXT: s_mov_b32 s7, s9
905 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
906 ; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
907 ; GFX9-NEXT: s_waitcnt vmcnt(0)
908 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
909 ; GFX9-NEXT: ; return to shader part epilog
911 ; GFX10PLUS-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
912 ; GFX10PLUS: ; %bb.0:
913 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0
914 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
915 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
916 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
917 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
918 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6
919 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7
920 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8
921 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9
922 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1
923 ; GFX10PLUS-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16
924 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
925 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v2
926 ; GFX10PLUS-NEXT: ; return to shader part epilog
927 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
928 %v.err = extractvalue { <4 x half>, i32 } %v, 1
929 %vv = bitcast i32 %v.err to float
933 declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
934 declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
935 declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
936 declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
938 declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
939 declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
940 declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
941 declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
943 attributes #0 = { nounwind readonly }