1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
7 define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
8 ; GFX8-UNPACKED-LABEL: load_1d_f16_x:
9 ; GFX8-UNPACKED: ; %bb.0:
10 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
11 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
12 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
13 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
14 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
15 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
16 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
17 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
18 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16
19 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
20 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
22 ; GFX8-PACKED-LABEL: load_1d_f16_x:
23 ; GFX8-PACKED: ; %bb.0:
24 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
25 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
26 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
27 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
28 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
29 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
30 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
31 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
32 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16
33 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
34 ; GFX8-PACKED-NEXT: ; return to shader part epilog
36 ; GFX9-LABEL: load_1d_f16_x:
38 ; GFX9-NEXT: s_mov_b32 s0, s2
39 ; GFX9-NEXT: s_mov_b32 s1, s3
40 ; GFX9-NEXT: s_mov_b32 s2, s4
41 ; GFX9-NEXT: s_mov_b32 s3, s5
42 ; GFX9-NEXT: s_mov_b32 s4, s6
43 ; GFX9-NEXT: s_mov_b32 s5, s7
44 ; GFX9-NEXT: s_mov_b32 s6, s8
45 ; GFX9-NEXT: s_mov_b32 s7, s9
46 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16
47 ; GFX9-NEXT: s_waitcnt vmcnt(0)
48 ; GFX9-NEXT: ; return to shader part epilog
50 ; GFX10-LABEL: load_1d_f16_x:
52 ; GFX10-NEXT: s_mov_b32 s0, s2
53 ; GFX10-NEXT: s_mov_b32 s1, s3
54 ; GFX10-NEXT: s_mov_b32 s2, s4
55 ; GFX10-NEXT: s_mov_b32 s3, s5
56 ; GFX10-NEXT: s_mov_b32 s4, s6
57 ; GFX10-NEXT: s_mov_b32 s5, s7
58 ; GFX10-NEXT: s_mov_b32 s6, s8
59 ; GFX10-NEXT: s_mov_b32 s7, s9
60 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16
61 ; GFX10-NEXT: s_waitcnt vmcnt(0)
62 ; GFX10-NEXT: ; return to shader part epilog
63 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
67 define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) {
68 ; GFX8-UNPACKED-LABEL: load_1d_f16_y:
69 ; GFX8-UNPACKED: ; %bb.0:
70 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
71 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
72 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
73 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
74 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
75 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
76 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
77 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
78 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16
79 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
80 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
82 ; GFX8-PACKED-LABEL: load_1d_f16_y:
83 ; GFX8-PACKED: ; %bb.0:
84 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
85 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
86 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
87 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
88 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
89 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
90 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
91 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
92 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16
93 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
94 ; GFX8-PACKED-NEXT: ; return to shader part epilog
96 ; GFX9-LABEL: load_1d_f16_y:
98 ; GFX9-NEXT: s_mov_b32 s0, s2
99 ; GFX9-NEXT: s_mov_b32 s1, s3
100 ; GFX9-NEXT: s_mov_b32 s2, s4
101 ; GFX9-NEXT: s_mov_b32 s3, s5
102 ; GFX9-NEXT: s_mov_b32 s4, s6
103 ; GFX9-NEXT: s_mov_b32 s5, s7
104 ; GFX9-NEXT: s_mov_b32 s6, s8
105 ; GFX9-NEXT: s_mov_b32 s7, s9
106 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16
107 ; GFX9-NEXT: s_waitcnt vmcnt(0)
108 ; GFX9-NEXT: ; return to shader part epilog
110 ; GFX10-LABEL: load_1d_f16_y:
112 ; GFX10-NEXT: s_mov_b32 s0, s2
113 ; GFX10-NEXT: s_mov_b32 s1, s3
114 ; GFX10-NEXT: s_mov_b32 s2, s4
115 ; GFX10-NEXT: s_mov_b32 s3, s5
116 ; GFX10-NEXT: s_mov_b32 s4, s6
117 ; GFX10-NEXT: s_mov_b32 s5, s7
118 ; GFX10-NEXT: s_mov_b32 s6, s8
119 ; GFX10-NEXT: s_mov_b32 s7, s9
120 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16
121 ; GFX10-NEXT: s_waitcnt vmcnt(0)
122 ; GFX10-NEXT: ; return to shader part epilog
123 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
127 define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) {
128 ; GFX8-UNPACKED-LABEL: load_1d_f16_z:
129 ; GFX8-UNPACKED: ; %bb.0:
130 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
131 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
132 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
133 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
134 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
135 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
136 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
137 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
138 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16
139 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
140 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
142 ; GFX8-PACKED-LABEL: load_1d_f16_z:
143 ; GFX8-PACKED: ; %bb.0:
144 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
145 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
146 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
147 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
148 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
149 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
150 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
151 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
152 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16
153 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
154 ; GFX8-PACKED-NEXT: ; return to shader part epilog
156 ; GFX9-LABEL: load_1d_f16_z:
158 ; GFX9-NEXT: s_mov_b32 s0, s2
159 ; GFX9-NEXT: s_mov_b32 s1, s3
160 ; GFX9-NEXT: s_mov_b32 s2, s4
161 ; GFX9-NEXT: s_mov_b32 s3, s5
162 ; GFX9-NEXT: s_mov_b32 s4, s6
163 ; GFX9-NEXT: s_mov_b32 s5, s7
164 ; GFX9-NEXT: s_mov_b32 s6, s8
165 ; GFX9-NEXT: s_mov_b32 s7, s9
166 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16
167 ; GFX9-NEXT: s_waitcnt vmcnt(0)
168 ; GFX9-NEXT: ; return to shader part epilog
170 ; GFX10-LABEL: load_1d_f16_z:
172 ; GFX10-NEXT: s_mov_b32 s0, s2
173 ; GFX10-NEXT: s_mov_b32 s1, s3
174 ; GFX10-NEXT: s_mov_b32 s2, s4
175 ; GFX10-NEXT: s_mov_b32 s3, s5
176 ; GFX10-NEXT: s_mov_b32 s4, s6
177 ; GFX10-NEXT: s_mov_b32 s5, s7
178 ; GFX10-NEXT: s_mov_b32 s6, s8
179 ; GFX10-NEXT: s_mov_b32 s7, s9
180 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16
181 ; GFX10-NEXT: s_waitcnt vmcnt(0)
182 ; GFX10-NEXT: ; return to shader part epilog
183 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
187 define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) {
188 ; GFX8-UNPACKED-LABEL: load_1d_f16_w:
189 ; GFX8-UNPACKED: ; %bb.0:
190 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
191 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
192 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
193 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
194 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
195 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
196 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
197 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
198 ; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16
199 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
200 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
202 ; GFX8-PACKED-LABEL: load_1d_f16_w:
203 ; GFX8-PACKED: ; %bb.0:
204 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
205 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
206 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
207 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
208 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
209 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
210 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
211 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
212 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16
213 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
214 ; GFX8-PACKED-NEXT: ; return to shader part epilog
216 ; GFX9-LABEL: load_1d_f16_w:
218 ; GFX9-NEXT: s_mov_b32 s0, s2
219 ; GFX9-NEXT: s_mov_b32 s1, s3
220 ; GFX9-NEXT: s_mov_b32 s2, s4
221 ; GFX9-NEXT: s_mov_b32 s3, s5
222 ; GFX9-NEXT: s_mov_b32 s4, s6
223 ; GFX9-NEXT: s_mov_b32 s5, s7
224 ; GFX9-NEXT: s_mov_b32 s6, s8
225 ; GFX9-NEXT: s_mov_b32 s7, s9
226 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16
227 ; GFX9-NEXT: s_waitcnt vmcnt(0)
228 ; GFX9-NEXT: ; return to shader part epilog
230 ; GFX10-LABEL: load_1d_f16_w:
232 ; GFX10-NEXT: s_mov_b32 s0, s2
233 ; GFX10-NEXT: s_mov_b32 s1, s3
234 ; GFX10-NEXT: s_mov_b32 s2, s4
235 ; GFX10-NEXT: s_mov_b32 s3, s5
236 ; GFX10-NEXT: s_mov_b32 s4, s6
237 ; GFX10-NEXT: s_mov_b32 s5, s7
238 ; GFX10-NEXT: s_mov_b32 s6, s8
239 ; GFX10-NEXT: s_mov_b32 s7, s9
240 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16
241 ; GFX10-NEXT: s_waitcnt vmcnt(0)
242 ; GFX10-NEXT: ; return to shader part epilog
243 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
247 define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) {
248 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy:
249 ; GFX8-UNPACKED: ; %bb.0:
250 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
251 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
252 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
253 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
254 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
255 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
256 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
257 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
258 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16
259 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
260 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
261 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
262 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
263 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
265 ; GFX8-PACKED-LABEL: load_1d_v2f16_xy:
266 ; GFX8-PACKED: ; %bb.0:
267 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
268 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
269 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
270 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
271 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
272 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
273 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
274 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
275 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16
276 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
277 ; GFX8-PACKED-NEXT: ; return to shader part epilog
279 ; GFX9-LABEL: load_1d_v2f16_xy:
281 ; GFX9-NEXT: s_mov_b32 s0, s2
282 ; GFX9-NEXT: s_mov_b32 s1, s3
283 ; GFX9-NEXT: s_mov_b32 s2, s4
284 ; GFX9-NEXT: s_mov_b32 s3, s5
285 ; GFX9-NEXT: s_mov_b32 s4, s6
286 ; GFX9-NEXT: s_mov_b32 s5, s7
287 ; GFX9-NEXT: s_mov_b32 s6, s8
288 ; GFX9-NEXT: s_mov_b32 s7, s9
289 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16
290 ; GFX9-NEXT: s_waitcnt vmcnt(0)
291 ; GFX9-NEXT: ; return to shader part epilog
293 ; GFX10-LABEL: load_1d_v2f16_xy:
295 ; GFX10-NEXT: s_mov_b32 s0, s2
296 ; GFX10-NEXT: s_mov_b32 s1, s3
297 ; GFX10-NEXT: s_mov_b32 s2, s4
298 ; GFX10-NEXT: s_mov_b32 s3, s5
299 ; GFX10-NEXT: s_mov_b32 s4, s6
300 ; GFX10-NEXT: s_mov_b32 s5, s7
301 ; GFX10-NEXT: s_mov_b32 s6, s8
302 ; GFX10-NEXT: s_mov_b32 s7, s9
303 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16
304 ; GFX10-NEXT: s_waitcnt vmcnt(0)
305 ; GFX10-NEXT: ; return to shader part epilog
306 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
310 define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) {
311 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz:
312 ; GFX8-UNPACKED: ; %bb.0:
313 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
314 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
315 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
316 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
317 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
318 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
319 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
320 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
321 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16
322 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
323 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
324 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
325 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
326 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
328 ; GFX8-PACKED-LABEL: load_1d_v2f16_xz:
329 ; GFX8-PACKED: ; %bb.0:
330 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
331 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
332 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
333 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
334 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
335 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
336 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
337 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
338 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16
339 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
340 ; GFX8-PACKED-NEXT: ; return to shader part epilog
342 ; GFX9-LABEL: load_1d_v2f16_xz:
344 ; GFX9-NEXT: s_mov_b32 s0, s2
345 ; GFX9-NEXT: s_mov_b32 s1, s3
346 ; GFX9-NEXT: s_mov_b32 s2, s4
347 ; GFX9-NEXT: s_mov_b32 s3, s5
348 ; GFX9-NEXT: s_mov_b32 s4, s6
349 ; GFX9-NEXT: s_mov_b32 s5, s7
350 ; GFX9-NEXT: s_mov_b32 s6, s8
351 ; GFX9-NEXT: s_mov_b32 s7, s9
352 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16
353 ; GFX9-NEXT: s_waitcnt vmcnt(0)
354 ; GFX9-NEXT: ; return to shader part epilog
356 ; GFX10-LABEL: load_1d_v2f16_xz:
358 ; GFX10-NEXT: s_mov_b32 s0, s2
359 ; GFX10-NEXT: s_mov_b32 s1, s3
360 ; GFX10-NEXT: s_mov_b32 s2, s4
361 ; GFX10-NEXT: s_mov_b32 s3, s5
362 ; GFX10-NEXT: s_mov_b32 s4, s6
363 ; GFX10-NEXT: s_mov_b32 s5, s7
364 ; GFX10-NEXT: s_mov_b32 s6, s8
365 ; GFX10-NEXT: s_mov_b32 s7, s9
366 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16
367 ; GFX10-NEXT: s_waitcnt vmcnt(0)
368 ; GFX10-NEXT: ; return to shader part epilog
369 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
373 define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) {
374 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw:
375 ; GFX8-UNPACKED: ; %bb.0:
376 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
377 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
378 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
379 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
380 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
381 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
382 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
383 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
384 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16
385 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
386 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
387 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
388 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
389 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
391 ; GFX8-PACKED-LABEL: load_1d_v2f16_xw:
392 ; GFX8-PACKED: ; %bb.0:
393 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
394 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
395 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
396 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
397 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
398 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
399 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
400 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
401 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16
402 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
403 ; GFX8-PACKED-NEXT: ; return to shader part epilog
405 ; GFX9-LABEL: load_1d_v2f16_xw:
407 ; GFX9-NEXT: s_mov_b32 s0, s2
408 ; GFX9-NEXT: s_mov_b32 s1, s3
409 ; GFX9-NEXT: s_mov_b32 s2, s4
410 ; GFX9-NEXT: s_mov_b32 s3, s5
411 ; GFX9-NEXT: s_mov_b32 s4, s6
412 ; GFX9-NEXT: s_mov_b32 s5, s7
413 ; GFX9-NEXT: s_mov_b32 s6, s8
414 ; GFX9-NEXT: s_mov_b32 s7, s9
415 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16
416 ; GFX9-NEXT: s_waitcnt vmcnt(0)
417 ; GFX9-NEXT: ; return to shader part epilog
419 ; GFX10-LABEL: load_1d_v2f16_xw:
421 ; GFX10-NEXT: s_mov_b32 s0, s2
422 ; GFX10-NEXT: s_mov_b32 s1, s3
423 ; GFX10-NEXT: s_mov_b32 s2, s4
424 ; GFX10-NEXT: s_mov_b32 s3, s5
425 ; GFX10-NEXT: s_mov_b32 s4, s6
426 ; GFX10-NEXT: s_mov_b32 s5, s7
427 ; GFX10-NEXT: s_mov_b32 s6, s8
428 ; GFX10-NEXT: s_mov_b32 s7, s9
429 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16
430 ; GFX10-NEXT: s_waitcnt vmcnt(0)
431 ; GFX10-NEXT: ; return to shader part epilog
432 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
436 define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) {
437 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz:
438 ; GFX8-UNPACKED: ; %bb.0:
439 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
440 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
441 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
442 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
443 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
444 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
445 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
446 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
447 ; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16
448 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
449 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
450 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
451 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
452 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
454 ; GFX8-PACKED-LABEL: load_1d_v2f16_yz:
455 ; GFX8-PACKED: ; %bb.0:
456 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
457 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
458 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
459 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
460 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
461 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
462 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
463 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
464 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16
465 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
466 ; GFX8-PACKED-NEXT: ; return to shader part epilog
468 ; GFX9-LABEL: load_1d_v2f16_yz:
470 ; GFX9-NEXT: s_mov_b32 s0, s2
471 ; GFX9-NEXT: s_mov_b32 s1, s3
472 ; GFX9-NEXT: s_mov_b32 s2, s4
473 ; GFX9-NEXT: s_mov_b32 s3, s5
474 ; GFX9-NEXT: s_mov_b32 s4, s6
475 ; GFX9-NEXT: s_mov_b32 s5, s7
476 ; GFX9-NEXT: s_mov_b32 s6, s8
477 ; GFX9-NEXT: s_mov_b32 s7, s9
478 ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16
479 ; GFX9-NEXT: s_waitcnt vmcnt(0)
480 ; GFX9-NEXT: ; return to shader part epilog
482 ; GFX10-LABEL: load_1d_v2f16_yz:
484 ; GFX10-NEXT: s_mov_b32 s0, s2
485 ; GFX10-NEXT: s_mov_b32 s1, s3
486 ; GFX10-NEXT: s_mov_b32 s2, s4
487 ; GFX10-NEXT: s_mov_b32 s3, s5
488 ; GFX10-NEXT: s_mov_b32 s4, s6
489 ; GFX10-NEXT: s_mov_b32 s5, s7
490 ; GFX10-NEXT: s_mov_b32 s6, s8
491 ; GFX10-NEXT: s_mov_b32 s7, s9
492 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16
493 ; GFX10-NEXT: s_waitcnt vmcnt(0)
494 ; GFX10-NEXT: ; return to shader part epilog
495 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
499 define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
500 ; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz:
501 ; GFX8-UNPACKED: ; %bb.0:
502 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
503 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
504 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
505 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
506 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
507 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
508 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
509 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
510 ; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16
511 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff
512 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
513 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v1
514 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v2
515 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3
516 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
517 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
519 ; GFX8-PACKED-LABEL: load_1d_v3f16_xyz:
520 ; GFX8-PACKED: ; %bb.0:
521 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
522 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
523 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
524 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
525 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
526 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
527 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
528 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
529 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
530 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
531 ; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v0
532 ; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v2
533 ; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
534 ; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
535 ; GFX8-PACKED-NEXT: ; return to shader part epilog
537 ; GFX9-LABEL: load_1d_v3f16_xyz:
539 ; GFX9-NEXT: s_mov_b32 s0, s2
540 ; GFX9-NEXT: s_mov_b32 s1, s3
541 ; GFX9-NEXT: s_mov_b32 s2, s4
542 ; GFX9-NEXT: s_mov_b32 s3, s5
543 ; GFX9-NEXT: s_mov_b32 s4, s6
544 ; GFX9-NEXT: s_mov_b32 s5, s7
545 ; GFX9-NEXT: s_mov_b32 s6, s8
546 ; GFX9-NEXT: s_mov_b32 s7, s9
547 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
548 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
549 ; GFX9-NEXT: s_lshl_b32 s0, s0, 16
550 ; GFX9-NEXT: s_waitcnt vmcnt(0)
551 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
552 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
553 ; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0
554 ; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3
555 ; GFX9-NEXT: ; return to shader part epilog
557 ; GFX10-LABEL: load_1d_v3f16_xyz:
559 ; GFX10-NEXT: s_mov_b32 s0, s2
560 ; GFX10-NEXT: s_mov_b32 s1, s3
561 ; GFX10-NEXT: s_mov_b32 s2, s4
562 ; GFX10-NEXT: s_mov_b32 s3, s5
563 ; GFX10-NEXT: s_mov_b32 s4, s6
564 ; GFX10-NEXT: s_mov_b32 s5, s7
565 ; GFX10-NEXT: s_mov_b32 s6, s8
566 ; GFX10-NEXT: s_mov_b32 s7, s9
567 ; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff
568 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
569 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
570 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16
571 ; GFX10-NEXT: s_waitcnt vmcnt(0)
572 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
573 ; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0
574 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
575 ; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2
576 ; GFX10-NEXT: ; return to shader part epilog
577 %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
581 define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
582 ; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw:
583 ; GFX8-UNPACKED: ; %bb.0:
584 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
585 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
586 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
587 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
588 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
589 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
590 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
591 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
592 ; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16
593 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff
594 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
595 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v1
596 ; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v3
597 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
598 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v3, 16, v3
599 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
600 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
601 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
603 ; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw:
604 ; GFX8-PACKED: ; %bb.0:
605 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
606 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
607 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
608 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
609 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
610 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
611 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
612 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
613 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
614 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
615 ; GFX8-PACKED-NEXT: ; return to shader part epilog
617 ; GFX9-LABEL: load_1d_v4f16_xyzw:
619 ; GFX9-NEXT: s_mov_b32 s0, s2
620 ; GFX9-NEXT: s_mov_b32 s1, s3
621 ; GFX9-NEXT: s_mov_b32 s2, s4
622 ; GFX9-NEXT: s_mov_b32 s3, s5
623 ; GFX9-NEXT: s_mov_b32 s4, s6
624 ; GFX9-NEXT: s_mov_b32 s5, s7
625 ; GFX9-NEXT: s_mov_b32 s6, s8
626 ; GFX9-NEXT: s_mov_b32 s7, s9
627 ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
628 ; GFX9-NEXT: s_waitcnt vmcnt(0)
629 ; GFX9-NEXT: ; return to shader part epilog
631 ; GFX10-LABEL: load_1d_v4f16_xyzw:
633 ; GFX10-NEXT: s_mov_b32 s0, s2
634 ; GFX10-NEXT: s_mov_b32 s1, s3
635 ; GFX10-NEXT: s_mov_b32 s2, s4
636 ; GFX10-NEXT: s_mov_b32 s3, s5
637 ; GFX10-NEXT: s_mov_b32 s4, s6
638 ; GFX10-NEXT: s_mov_b32 s5, s7
639 ; GFX10-NEXT: s_mov_b32 s6, s8
640 ; GFX10-NEXT: s_mov_b32 s7, s9
641 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16
642 ; GFX10-NEXT: s_waitcnt vmcnt(0)
643 ; GFX10-NEXT: ; return to shader part epilog
644 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
648 define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
649 ; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x:
650 ; GFX8-UNPACKED: ; %bb.0:
651 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
652 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
653 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
654 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
655 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
656 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
657 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
658 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
659 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
660 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
661 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
662 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
663 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2
664 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
666 ; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x:
667 ; GFX8-PACKED: ; %bb.0:
668 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
669 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
670 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
671 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
672 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
673 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
674 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
675 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
676 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
677 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
678 ; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
679 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
680 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2
681 ; GFX8-PACKED-NEXT: ; return to shader part epilog
683 ; GFX9-LABEL: load_1d_f16_tfe_dmask_x:
685 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
686 ; GFX9-NEXT: s_mov_b32 s0, s2
687 ; GFX9-NEXT: s_mov_b32 s1, s3
688 ; GFX9-NEXT: s_mov_b32 s2, s4
689 ; GFX9-NEXT: s_mov_b32 s3, s5
690 ; GFX9-NEXT: s_mov_b32 s4, s6
691 ; GFX9-NEXT: s_mov_b32 s5, s7
692 ; GFX9-NEXT: s_mov_b32 s6, s8
693 ; GFX9-NEXT: s_mov_b32 s7, s9
694 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
695 ; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
696 ; GFX9-NEXT: s_waitcnt vmcnt(0)
697 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
698 ; GFX9-NEXT: ; return to shader part epilog
700 ; GFX10-LABEL: load_1d_f16_tfe_dmask_x:
702 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
703 ; GFX10-NEXT: s_mov_b32 s0, s2
704 ; GFX10-NEXT: s_mov_b32 s1, s3
705 ; GFX10-NEXT: s_mov_b32 s2, s4
706 ; GFX10-NEXT: s_mov_b32 s3, s5
707 ; GFX10-NEXT: s_mov_b32 s4, s6
708 ; GFX10-NEXT: s_mov_b32 s5, s7
709 ; GFX10-NEXT: s_mov_b32 s6, s8
710 ; GFX10-NEXT: s_mov_b32 s7, s9
711 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
712 ; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
713 ; GFX10-NEXT: s_waitcnt vmcnt(0)
714 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
715 ; GFX10-NEXT: ; return to shader part epilog
716 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
717 %v.err = extractvalue { half, i32 } %v, 1
718 %vv = bitcast i32 %v.err to float
722 define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
723 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
724 ; GFX8-UNPACKED: ; %bb.0:
725 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
726 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
727 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
728 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
729 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
730 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
731 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
732 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
733 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
734 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
735 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
736 ; GFX8-UNPACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe d16
737 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
738 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v3
739 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
741 ; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
742 ; GFX8-PACKED: ; %bb.0:
743 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
744 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
745 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
746 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
747 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
748 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
749 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
750 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
751 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
752 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
753 ; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
754 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
755 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2
756 ; GFX8-PACKED-NEXT: ; return to shader part epilog
758 ; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy:
760 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
761 ; GFX9-NEXT: s_mov_b32 s0, s2
762 ; GFX9-NEXT: s_mov_b32 s1, s3
763 ; GFX9-NEXT: s_mov_b32 s2, s4
764 ; GFX9-NEXT: s_mov_b32 s3, s5
765 ; GFX9-NEXT: s_mov_b32 s4, s6
766 ; GFX9-NEXT: s_mov_b32 s5, s7
767 ; GFX9-NEXT: s_mov_b32 s6, s8
768 ; GFX9-NEXT: s_mov_b32 s7, s9
769 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
770 ; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
771 ; GFX9-NEXT: s_waitcnt vmcnt(0)
772 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
773 ; GFX9-NEXT: ; return to shader part epilog
775 ; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy:
777 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
778 ; GFX10-NEXT: s_mov_b32 s0, s2
779 ; GFX10-NEXT: s_mov_b32 s1, s3
780 ; GFX10-NEXT: s_mov_b32 s2, s4
781 ; GFX10-NEXT: s_mov_b32 s3, s5
782 ; GFX10-NEXT: s_mov_b32 s4, s6
783 ; GFX10-NEXT: s_mov_b32 s5, s7
784 ; GFX10-NEXT: s_mov_b32 s6, s8
785 ; GFX10-NEXT: s_mov_b32 s7, s9
786 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
787 ; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
788 ; GFX10-NEXT: s_waitcnt vmcnt(0)
789 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
790 ; GFX10-NEXT: ; return to shader part epilog
791 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
792 %v.err = extractvalue { <2 x half>, i32 } %v, 1
793 %vv = bitcast i32 %v.err to float
797 define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
798 ; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
799 ; GFX8-UNPACKED: ; %bb.0:
800 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
801 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
802 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
803 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
804 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
805 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
806 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
807 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
808 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
809 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
810 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
811 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1
812 ; GFX8-UNPACKED-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe d16
813 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
814 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v4
815 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
817 ; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
818 ; GFX8-PACKED: ; %bb.0:
819 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
820 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
821 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
822 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
823 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
824 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
825 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
826 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
827 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
828 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
829 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v3, v1
830 ; GFX8-PACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
831 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
832 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v3
833 ; GFX8-PACKED-NEXT: ; return to shader part epilog
835 ; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz:
837 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
838 ; GFX9-NEXT: s_mov_b32 s0, s2
839 ; GFX9-NEXT: s_mov_b32 s1, s3
840 ; GFX9-NEXT: s_mov_b32 s2, s4
841 ; GFX9-NEXT: s_mov_b32 s3, s5
842 ; GFX9-NEXT: s_mov_b32 s4, s6
843 ; GFX9-NEXT: s_mov_b32 s5, s7
844 ; GFX9-NEXT: s_mov_b32 s6, s8
845 ; GFX9-NEXT: s_mov_b32 s7, s9
846 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
847 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
848 ; GFX9-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
849 ; GFX9-NEXT: s_waitcnt vmcnt(0)
850 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
851 ; GFX9-NEXT: ; return to shader part epilog
853 ; GFX10-LABEL: load_1d_v3f16_tfe_dmask_xyz:
855 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
856 ; GFX10-NEXT: s_mov_b32 s0, s2
857 ; GFX10-NEXT: s_mov_b32 s1, s3
858 ; GFX10-NEXT: s_mov_b32 s2, s4
859 ; GFX10-NEXT: s_mov_b32 s3, s5
860 ; GFX10-NEXT: s_mov_b32 s4, s6
861 ; GFX10-NEXT: s_mov_b32 s5, s7
862 ; GFX10-NEXT: s_mov_b32 s6, s8
863 ; GFX10-NEXT: s_mov_b32 s7, s9
864 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
865 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
866 ; GFX10-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
867 ; GFX10-NEXT: s_waitcnt vmcnt(0)
868 ; GFX10-NEXT: v_mov_b32_e32 v0, v3
869 ; GFX10-NEXT: ; return to shader part epilog
870 %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
871 %v.err = extractvalue { <3 x half>, i32 } %v, 1
872 %vv = bitcast i32 %v.err to float
876 define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
877 ; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
878 ; GFX8-UNPACKED: ; %bb.0:
879 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
880 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2
881 ; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3
882 ; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4
883 ; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5
884 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6
885 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7
886 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8
887 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
888 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
889 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
890 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
891 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2
892 ; GFX8-UNPACKED-NEXT: ; return to shader part epilog
894 ; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
895 ; GFX8-PACKED: ; %bb.0:
896 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0
897 ; GFX8-PACKED-NEXT: s_mov_b32 s0, s2
898 ; GFX8-PACKED-NEXT: s_mov_b32 s1, s3
899 ; GFX8-PACKED-NEXT: s_mov_b32 s2, s4
900 ; GFX8-PACKED-NEXT: s_mov_b32 s3, s5
901 ; GFX8-PACKED-NEXT: s_mov_b32 s4, s6
902 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7
903 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
904 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
905 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1
906 ; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
907 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
908 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2
909 ; GFX8-PACKED-NEXT: ; return to shader part epilog
911 ; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
913 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
914 ; GFX9-NEXT: s_mov_b32 s0, s2
915 ; GFX9-NEXT: s_mov_b32 s1, s3
916 ; GFX9-NEXT: s_mov_b32 s2, s4
917 ; GFX9-NEXT: s_mov_b32 s3, s5
918 ; GFX9-NEXT: s_mov_b32 s4, s6
919 ; GFX9-NEXT: s_mov_b32 s5, s7
920 ; GFX9-NEXT: s_mov_b32 s6, s8
921 ; GFX9-NEXT: s_mov_b32 s7, s9
922 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
923 ; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
924 ; GFX9-NEXT: s_waitcnt vmcnt(0)
925 ; GFX9-NEXT: v_mov_b32_e32 v0, v2
926 ; GFX9-NEXT: ; return to shader part epilog
928 ; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
930 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
931 ; GFX10-NEXT: s_mov_b32 s0, s2
932 ; GFX10-NEXT: s_mov_b32 s1, s3
933 ; GFX10-NEXT: s_mov_b32 s2, s4
934 ; GFX10-NEXT: s_mov_b32 s3, s5
935 ; GFX10-NEXT: s_mov_b32 s4, s6
936 ; GFX10-NEXT: s_mov_b32 s5, s7
937 ; GFX10-NEXT: s_mov_b32 s6, s8
938 ; GFX10-NEXT: s_mov_b32 s7, s9
939 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
940 ; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16
941 ; GFX10-NEXT: s_waitcnt vmcnt(0)
942 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
943 ; GFX10-NEXT: ; return to shader part epilog
944 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
945 %v.err = extractvalue { <4 x half>, i32 } %v, 1
946 %vv = bitcast i32 %v.err to float
950 declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
951 declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
952 declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
953 declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
955 declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
956 declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
957 declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
958 declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
960 attributes #0 = { nounwind readonly }