1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
7 define amdgpu_ps void @load_1d_f16_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
8 ; GFX9-LABEL: load_1d_f16_tfe_dmask0:
10 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
11 ; GFX9-NEXT: s_mov_b32 s11, s9
12 ; GFX9-NEXT: s_mov_b32 s10, s8
13 ; GFX9-NEXT: s_mov_b32 s9, s7
14 ; GFX9-NEXT: s_mov_b32 s8, s6
15 ; GFX9-NEXT: s_mov_b32 s7, s5
16 ; GFX9-NEXT: s_mov_b32 s6, s4
17 ; GFX9-NEXT: s_mov_b32 s5, s3
18 ; GFX9-NEXT: s_mov_b32 s4, s2
19 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
20 ; GFX9-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
21 ; GFX9-NEXT: s_waitcnt vmcnt(0)
22 ; GFX9-NEXT: global_store_short v[0:1], v1, off
23 ; GFX9-NEXT: s_waitcnt vmcnt(0)
24 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
25 ; GFX9-NEXT: s_waitcnt vmcnt(0)
28 ; GFX10-LABEL: load_1d_f16_tfe_dmask0:
30 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
31 ; GFX10-NEXT: s_mov_b32 s11, s9
32 ; GFX10-NEXT: s_mov_b32 s10, s8
33 ; GFX10-NEXT: s_mov_b32 s9, s7
34 ; GFX10-NEXT: s_mov_b32 s8, s6
35 ; GFX10-NEXT: s_mov_b32 s7, s5
36 ; GFX10-NEXT: s_mov_b32 s6, s4
37 ; GFX10-NEXT: s_mov_b32 s5, s3
38 ; GFX10-NEXT: s_mov_b32 s4, s2
39 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
40 ; GFX10-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
41 ; GFX10-NEXT: s_waitcnt vmcnt(0)
42 ; GFX10-NEXT: global_store_short v[0:1], v1, off
43 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
44 ; GFX10-NEXT: global_store_dword v[0:1], v2, off
45 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
46 ; GFX10-NEXT: s_endpgm
48 ; GFX11-LABEL: load_1d_f16_tfe_dmask0:
50 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
51 ; GFX11-NEXT: s_mov_b32 s11, s9
52 ; GFX11-NEXT: s_mov_b32 s10, s8
53 ; GFX11-NEXT: s_mov_b32 s9, s7
54 ; GFX11-NEXT: s_mov_b32 s8, s6
55 ; GFX11-NEXT: s_mov_b32 s7, s5
56 ; GFX11-NEXT: s_mov_b32 s6, s4
57 ; GFX11-NEXT: s_mov_b32 s5, s3
58 ; GFX11-NEXT: s_mov_b32 s4, s2
59 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
60 ; GFX11-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
61 ; GFX11-NEXT: s_waitcnt vmcnt(0)
62 ; GFX11-NEXT: global_store_b16 v[0:1], v1, off dlc
63 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
64 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
65 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
66 ; GFX11-NEXT: s_endpgm
68 ; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask0:
69 ; GFX8-UNPACKED: ; %bb.0:
70 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
71 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
72 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
73 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
74 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
75 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
76 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
77 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
78 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
79 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
80 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
81 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
82 ; GFX8-UNPACKED-NEXT: flat_store_short v[0:1], v1
83 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
84 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v2
85 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
86 ; GFX8-UNPACKED-NEXT: s_endpgm
87 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
88 %v.data = extractvalue { half, i32 } %v, 0
89 %v.err = extractvalue { half, i32 } %v, 1
90 store volatile half %v.data, ptr addrspace(1) undef
91 store volatile i32 %v.err, ptr addrspace(1) undef
95 define amdgpu_ps void @load_1d_f16_tfe_dmask1(<8 x i32> inreg %rsrc, i32 %s) {
96 ; GFX9-LABEL: load_1d_f16_tfe_dmask1:
98 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
99 ; GFX9-NEXT: s_mov_b32 s11, s9
100 ; GFX9-NEXT: s_mov_b32 s10, s8
101 ; GFX9-NEXT: s_mov_b32 s9, s7
102 ; GFX9-NEXT: s_mov_b32 s8, s6
103 ; GFX9-NEXT: s_mov_b32 s7, s5
104 ; GFX9-NEXT: s_mov_b32 s6, s4
105 ; GFX9-NEXT: s_mov_b32 s5, s3
106 ; GFX9-NEXT: s_mov_b32 s4, s2
107 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
108 ; GFX9-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
109 ; GFX9-NEXT: s_waitcnt vmcnt(0)
110 ; GFX9-NEXT: global_store_short v[0:1], v1, off
111 ; GFX9-NEXT: s_waitcnt vmcnt(0)
112 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
113 ; GFX9-NEXT: s_waitcnt vmcnt(0)
114 ; GFX9-NEXT: s_endpgm
116 ; GFX10-LABEL: load_1d_f16_tfe_dmask1:
118 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
119 ; GFX10-NEXT: s_mov_b32 s11, s9
120 ; GFX10-NEXT: s_mov_b32 s10, s8
121 ; GFX10-NEXT: s_mov_b32 s9, s7
122 ; GFX10-NEXT: s_mov_b32 s8, s6
123 ; GFX10-NEXT: s_mov_b32 s7, s5
124 ; GFX10-NEXT: s_mov_b32 s6, s4
125 ; GFX10-NEXT: s_mov_b32 s5, s3
126 ; GFX10-NEXT: s_mov_b32 s4, s2
127 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
128 ; GFX10-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
129 ; GFX10-NEXT: s_waitcnt vmcnt(0)
130 ; GFX10-NEXT: global_store_short v[0:1], v1, off
131 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
132 ; GFX10-NEXT: global_store_dword v[0:1], v2, off
133 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
134 ; GFX10-NEXT: s_endpgm
136 ; GFX11-LABEL: load_1d_f16_tfe_dmask1:
138 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
139 ; GFX11-NEXT: s_mov_b32 s11, s9
140 ; GFX11-NEXT: s_mov_b32 s10, s8
141 ; GFX11-NEXT: s_mov_b32 s9, s7
142 ; GFX11-NEXT: s_mov_b32 s8, s6
143 ; GFX11-NEXT: s_mov_b32 s7, s5
144 ; GFX11-NEXT: s_mov_b32 s6, s4
145 ; GFX11-NEXT: s_mov_b32 s5, s3
146 ; GFX11-NEXT: s_mov_b32 s4, s2
147 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
148 ; GFX11-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
149 ; GFX11-NEXT: s_waitcnt vmcnt(0)
150 ; GFX11-NEXT: global_store_b16 v[0:1], v1, off dlc
151 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
152 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
153 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
154 ; GFX11-NEXT: s_endpgm
156 ; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask1:
157 ; GFX8-UNPACKED: ; %bb.0:
158 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
159 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
160 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
161 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
162 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
163 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
164 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
165 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
166 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
167 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
168 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
169 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
170 ; GFX8-UNPACKED-NEXT: flat_store_short v[0:1], v1
171 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
172 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v2
173 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
174 ; GFX8-UNPACKED-NEXT: s_endpgm
175 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
176 %v.data = extractvalue { half, i32 } %v, 0
177 %v.err = extractvalue { half, i32 } %v, 1
178 store volatile half %v.data, ptr addrspace(1) undef
179 store volatile i32 %v.err, ptr addrspace(1) undef
183 define amdgpu_ps void @load_1d_v2f16_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
184 ; GFX9-LABEL: load_1d_v2f16_tfe_dmask0:
186 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
187 ; GFX9-NEXT: s_mov_b32 s11, s9
188 ; GFX9-NEXT: s_mov_b32 s10, s8
189 ; GFX9-NEXT: s_mov_b32 s9, s7
190 ; GFX9-NEXT: s_mov_b32 s8, s6
191 ; GFX9-NEXT: s_mov_b32 s7, s5
192 ; GFX9-NEXT: s_mov_b32 s6, s4
193 ; GFX9-NEXT: s_mov_b32 s5, s3
194 ; GFX9-NEXT: s_mov_b32 s4, s2
195 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
196 ; GFX9-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
197 ; GFX9-NEXT: s_waitcnt vmcnt(0)
198 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
199 ; GFX9-NEXT: s_waitcnt vmcnt(0)
200 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
201 ; GFX9-NEXT: s_waitcnt vmcnt(0)
202 ; GFX9-NEXT: s_endpgm
204 ; GFX10-LABEL: load_1d_v2f16_tfe_dmask0:
206 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
207 ; GFX10-NEXT: s_mov_b32 s11, s9
208 ; GFX10-NEXT: s_mov_b32 s10, s8
209 ; GFX10-NEXT: s_mov_b32 s9, s7
210 ; GFX10-NEXT: s_mov_b32 s8, s6
211 ; GFX10-NEXT: s_mov_b32 s7, s5
212 ; GFX10-NEXT: s_mov_b32 s6, s4
213 ; GFX10-NEXT: s_mov_b32 s5, s3
214 ; GFX10-NEXT: s_mov_b32 s4, s2
215 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
216 ; GFX10-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
217 ; GFX10-NEXT: s_waitcnt vmcnt(0)
218 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
219 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
220 ; GFX10-NEXT: global_store_dword v[0:1], v2, off
221 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
222 ; GFX10-NEXT: s_endpgm
224 ; GFX11-LABEL: load_1d_v2f16_tfe_dmask0:
226 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
227 ; GFX11-NEXT: s_mov_b32 s11, s9
228 ; GFX11-NEXT: s_mov_b32 s10, s8
229 ; GFX11-NEXT: s_mov_b32 s9, s7
230 ; GFX11-NEXT: s_mov_b32 s8, s6
231 ; GFX11-NEXT: s_mov_b32 s7, s5
232 ; GFX11-NEXT: s_mov_b32 s6, s4
233 ; GFX11-NEXT: s_mov_b32 s5, s3
234 ; GFX11-NEXT: s_mov_b32 s4, s2
235 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
236 ; GFX11-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
237 ; GFX11-NEXT: s_waitcnt vmcnt(0)
238 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
239 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
240 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
241 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
242 ; GFX11-NEXT: s_endpgm
244 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask0:
245 ; GFX8-UNPACKED: ; %bb.0:
246 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
247 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
248 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
249 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
250 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
251 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
252 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
253 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
254 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
255 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
256 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
257 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
258 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v1
259 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
260 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v2
261 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
262 ; GFX8-UNPACKED-NEXT: s_endpgm
263 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
264 %v.data = extractvalue { <2 x half>, i32 } %v, 0
265 %v.err = extractvalue { <2 x half>, i32 } %v, 1
266 store volatile <2 x half> %v.data, ptr addrspace(1) undef
267 store volatile i32 %v.err, ptr addrspace(1) undef
271 define amdgpu_ps void @load_1d_v2f16_tfe_dmask1(<8 x i32> inreg %rsrc, i32 %s) {
272 ; GFX9-LABEL: load_1d_v2f16_tfe_dmask1:
274 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
275 ; GFX9-NEXT: s_mov_b32 s11, s9
276 ; GFX9-NEXT: s_mov_b32 s10, s8
277 ; GFX9-NEXT: s_mov_b32 s9, s7
278 ; GFX9-NEXT: s_mov_b32 s8, s6
279 ; GFX9-NEXT: s_mov_b32 s7, s5
280 ; GFX9-NEXT: s_mov_b32 s6, s4
281 ; GFX9-NEXT: s_mov_b32 s5, s3
282 ; GFX9-NEXT: s_mov_b32 s4, s2
283 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
284 ; GFX9-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
285 ; GFX9-NEXT: s_waitcnt vmcnt(0)
286 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
287 ; GFX9-NEXT: s_waitcnt vmcnt(0)
288 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
289 ; GFX9-NEXT: s_waitcnt vmcnt(0)
290 ; GFX9-NEXT: s_endpgm
292 ; GFX10-LABEL: load_1d_v2f16_tfe_dmask1:
294 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
295 ; GFX10-NEXT: s_mov_b32 s11, s9
296 ; GFX10-NEXT: s_mov_b32 s10, s8
297 ; GFX10-NEXT: s_mov_b32 s9, s7
298 ; GFX10-NEXT: s_mov_b32 s8, s6
299 ; GFX10-NEXT: s_mov_b32 s7, s5
300 ; GFX10-NEXT: s_mov_b32 s6, s4
301 ; GFX10-NEXT: s_mov_b32 s5, s3
302 ; GFX10-NEXT: s_mov_b32 s4, s2
303 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
304 ; GFX10-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
305 ; GFX10-NEXT: s_waitcnt vmcnt(0)
306 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
307 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
308 ; GFX10-NEXT: global_store_dword v[0:1], v2, off
309 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
310 ; GFX10-NEXT: s_endpgm
312 ; GFX11-LABEL: load_1d_v2f16_tfe_dmask1:
314 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
315 ; GFX11-NEXT: s_mov_b32 s11, s9
316 ; GFX11-NEXT: s_mov_b32 s10, s8
317 ; GFX11-NEXT: s_mov_b32 s9, s7
318 ; GFX11-NEXT: s_mov_b32 s8, s6
319 ; GFX11-NEXT: s_mov_b32 s7, s5
320 ; GFX11-NEXT: s_mov_b32 s6, s4
321 ; GFX11-NEXT: s_mov_b32 s5, s3
322 ; GFX11-NEXT: s_mov_b32 s4, s2
323 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
324 ; GFX11-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
325 ; GFX11-NEXT: s_waitcnt vmcnt(0)
326 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
327 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
328 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
329 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
330 ; GFX11-NEXT: s_endpgm
332 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask1:
333 ; GFX8-UNPACKED: ; %bb.0:
334 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
335 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
336 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
337 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
338 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
339 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
340 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
341 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
342 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
343 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
344 ; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x1 unorm tfe d16
345 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
346 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v1
347 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
348 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v2
349 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
350 ; GFX8-UNPACKED-NEXT: s_endpgm
351 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
352 %v.data = extractvalue { <2 x half>, i32 } %v, 0
353 %v.err = extractvalue { <2 x half>, i32 } %v, 1
354 store volatile <2 x half> %v.data, ptr addrspace(1) undef
355 store volatile i32 %v.err, ptr addrspace(1) undef
359 define amdgpu_ps void @load_1d_v2f16_tfe_dmask3(<8 x i32> inreg %rsrc, i32 %s) {
360 ; GFX9-LABEL: load_1d_v2f16_tfe_dmask3:
362 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
363 ; GFX9-NEXT: s_mov_b32 s11, s9
364 ; GFX9-NEXT: s_mov_b32 s10, s8
365 ; GFX9-NEXT: s_mov_b32 s9, s7
366 ; GFX9-NEXT: s_mov_b32 s8, s6
367 ; GFX9-NEXT: s_mov_b32 s7, s5
368 ; GFX9-NEXT: s_mov_b32 s6, s4
369 ; GFX9-NEXT: s_mov_b32 s5, s3
370 ; GFX9-NEXT: s_mov_b32 s4, s2
371 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
372 ; GFX9-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x3 unorm tfe d16
373 ; GFX9-NEXT: s_waitcnt vmcnt(0)
374 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
375 ; GFX9-NEXT: s_waitcnt vmcnt(0)
376 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
377 ; GFX9-NEXT: s_waitcnt vmcnt(0)
378 ; GFX9-NEXT: s_endpgm
380 ; GFX10-LABEL: load_1d_v2f16_tfe_dmask3:
382 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
383 ; GFX10-NEXT: s_mov_b32 s11, s9
384 ; GFX10-NEXT: s_mov_b32 s10, s8
385 ; GFX10-NEXT: s_mov_b32 s9, s7
386 ; GFX10-NEXT: s_mov_b32 s8, s6
387 ; GFX10-NEXT: s_mov_b32 s7, s5
388 ; GFX10-NEXT: s_mov_b32 s6, s4
389 ; GFX10-NEXT: s_mov_b32 s5, s3
390 ; GFX10-NEXT: s_mov_b32 s4, s2
391 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
392 ; GFX10-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
393 ; GFX10-NEXT: s_waitcnt vmcnt(0)
394 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
395 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
396 ; GFX10-NEXT: global_store_dword v[0:1], v2, off
397 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
398 ; GFX10-NEXT: s_endpgm
400 ; GFX11-LABEL: load_1d_v2f16_tfe_dmask3:
402 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
403 ; GFX11-NEXT: s_mov_b32 s11, s9
404 ; GFX11-NEXT: s_mov_b32 s10, s8
405 ; GFX11-NEXT: s_mov_b32 s9, s7
406 ; GFX11-NEXT: s_mov_b32 s8, s6
407 ; GFX11-NEXT: s_mov_b32 s7, s5
408 ; GFX11-NEXT: s_mov_b32 s6, s4
409 ; GFX11-NEXT: s_mov_b32 s5, s3
410 ; GFX11-NEXT: s_mov_b32 s4, s2
411 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
412 ; GFX11-NEXT: image_load v[1:2], v0, s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
413 ; GFX11-NEXT: s_waitcnt vmcnt(0)
414 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
415 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
416 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
417 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
418 ; GFX11-NEXT: s_endpgm
420 ; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask3:
421 ; GFX8-UNPACKED: ; %bb.0:
422 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
423 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
424 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
425 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
426 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
427 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
428 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
429 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
430 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
431 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
432 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
433 ; GFX8-UNPACKED-NEXT: image_load v[1:3], v0, s[4:11] dmask:0x3 unorm tfe d16
434 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
435 ; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v0, 16, v2
436 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
437 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v0
438 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
439 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v3
440 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
441 ; GFX8-UNPACKED-NEXT: s_endpgm
442 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
443 %v.data = extractvalue { <2 x half>, i32 } %v, 0
444 %v.err = extractvalue { <2 x half>, i32 } %v, 1
445 store volatile <2 x half> %v.data, ptr addrspace(1) undef
446 store volatile i32 %v.err, ptr addrspace(1) undef
450 define amdgpu_ps void @load_1d_v3f16_tfe_dmask7(<8 x i32> inreg %rsrc, i32 %s) {
451 ; GFX9-LABEL: load_1d_v3f16_tfe_dmask7:
453 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
454 ; GFX9-NEXT: s_mov_b32 s11, s9
455 ; GFX9-NEXT: s_mov_b32 s10, s8
456 ; GFX9-NEXT: s_mov_b32 s9, s7
457 ; GFX9-NEXT: s_mov_b32 s8, s6
458 ; GFX9-NEXT: s_mov_b32 s7, s5
459 ; GFX9-NEXT: s_mov_b32 s6, s4
460 ; GFX9-NEXT: s_mov_b32 s5, s3
461 ; GFX9-NEXT: s_mov_b32 s4, s2
462 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
463 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
464 ; GFX9-NEXT: image_load v[1:3], v0, s[4:11] dmask:0x7 unorm tfe d16
465 ; GFX9-NEXT: s_waitcnt vmcnt(0)
466 ; GFX9-NEXT: global_store_short v[0:1], v2, off
467 ; GFX9-NEXT: s_waitcnt vmcnt(0)
468 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
469 ; GFX9-NEXT: s_waitcnt vmcnt(0)
470 ; GFX9-NEXT: global_store_dword v[0:1], v3, off
471 ; GFX9-NEXT: s_waitcnt vmcnt(0)
472 ; GFX9-NEXT: s_endpgm
474 ; GFX10-LABEL: load_1d_v3f16_tfe_dmask7:
476 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
477 ; GFX10-NEXT: s_mov_b32 s11, s9
478 ; GFX10-NEXT: s_mov_b32 s10, s8
479 ; GFX10-NEXT: s_mov_b32 s9, s7
480 ; GFX10-NEXT: s_mov_b32 s8, s6
481 ; GFX10-NEXT: s_mov_b32 s7, s5
482 ; GFX10-NEXT: s_mov_b32 s6, s4
483 ; GFX10-NEXT: s_mov_b32 s5, s3
484 ; GFX10-NEXT: s_mov_b32 s4, s2
485 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
486 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
487 ; GFX10-NEXT: image_load v[1:3], v0, s[4:11] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
488 ; GFX10-NEXT: s_waitcnt vmcnt(0)
489 ; GFX10-NEXT: global_store_short v[0:1], v2, off
490 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
491 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
492 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
493 ; GFX10-NEXT: global_store_dword v[0:1], v3, off
494 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
495 ; GFX10-NEXT: s_endpgm
497 ; GFX11-LABEL: load_1d_v3f16_tfe_dmask7:
499 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
500 ; GFX11-NEXT: s_mov_b32 s11, s9
501 ; GFX11-NEXT: s_mov_b32 s10, s8
502 ; GFX11-NEXT: s_mov_b32 s9, s7
503 ; GFX11-NEXT: s_mov_b32 s8, s6
504 ; GFX11-NEXT: s_mov_b32 s7, s5
505 ; GFX11-NEXT: s_mov_b32 s6, s4
506 ; GFX11-NEXT: s_mov_b32 s5, s3
507 ; GFX11-NEXT: s_mov_b32 s4, s2
508 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
509 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
510 ; GFX11-NEXT: image_load v[1:3], v0, s[4:11] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
511 ; GFX11-NEXT: s_waitcnt vmcnt(0)
512 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off dlc
513 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
514 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
515 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
516 ; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
517 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
518 ; GFX11-NEXT: s_endpgm
520 ; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask7:
521 ; GFX8-UNPACKED: ; %bb.0:
522 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
523 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
524 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
525 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
526 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
527 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
528 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
529 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
530 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
531 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
532 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
533 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1
534 ; GFX8-UNPACKED-NEXT: image_load v[1:4], v0, s[4:11] dmask:0x7 unorm tfe d16
535 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0x1000504
536 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
537 ; GFX8-UNPACKED-NEXT: v_perm_b32 v0, v1, v2, s0
538 ; GFX8-UNPACKED-NEXT: flat_store_short v[0:1], v3
539 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
540 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v0
541 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
542 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v4
543 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
544 ; GFX8-UNPACKED-NEXT: s_endpgm
545 %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
546 %v.data = extractvalue { <3 x half>, i32 } %v, 0
547 %v.err = extractvalue { <3 x half>, i32 } %v, 1
548 store volatile <3 x half> %v.data, ptr addrspace(1) undef
549 store volatile i32 %v.err, ptr addrspace(1) undef
553 define amdgpu_ps void @load_1d_v4f16_tfe_dmask15(<8 x i32> inreg %rsrc, i32 %s) {
554 ; GFX9-LABEL: load_1d_v4f16_tfe_dmask15:
556 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
557 ; GFX9-NEXT: s_mov_b32 s11, s9
558 ; GFX9-NEXT: s_mov_b32 s10, s8
559 ; GFX9-NEXT: s_mov_b32 s9, s7
560 ; GFX9-NEXT: s_mov_b32 s8, s6
561 ; GFX9-NEXT: s_mov_b32 s7, s5
562 ; GFX9-NEXT: s_mov_b32 s6, s4
563 ; GFX9-NEXT: s_mov_b32 s5, s3
564 ; GFX9-NEXT: s_mov_b32 s4, s2
565 ; GFX9-NEXT: v_mov_b32_e32 v2, v1
566 ; GFX9-NEXT: v_mov_b32_e32 v3, v1
567 ; GFX9-NEXT: image_load v[1:3], v0, s[4:11] dmask:0xf unorm tfe d16
568 ; GFX9-NEXT: s_waitcnt vmcnt(0)
569 ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[1:2], off
570 ; GFX9-NEXT: s_waitcnt vmcnt(0)
571 ; GFX9-NEXT: global_store_dword v[0:1], v3, off
572 ; GFX9-NEXT: s_waitcnt vmcnt(0)
573 ; GFX9-NEXT: s_endpgm
575 ; GFX10-LABEL: load_1d_v4f16_tfe_dmask15:
577 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
578 ; GFX10-NEXT: s_mov_b32 s11, s9
579 ; GFX10-NEXT: s_mov_b32 s10, s8
580 ; GFX10-NEXT: s_mov_b32 s9, s7
581 ; GFX10-NEXT: s_mov_b32 s8, s6
582 ; GFX10-NEXT: s_mov_b32 s7, s5
583 ; GFX10-NEXT: s_mov_b32 s6, s4
584 ; GFX10-NEXT: s_mov_b32 s5, s3
585 ; GFX10-NEXT: s_mov_b32 s4, s2
586 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
587 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
588 ; GFX10-NEXT: image_load v[1:3], v0, s[4:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe d16
589 ; GFX10-NEXT: s_waitcnt vmcnt(0)
590 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[1:2], off
591 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
592 ; GFX10-NEXT: global_store_dword v[0:1], v3, off
593 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
594 ; GFX10-NEXT: s_endpgm
596 ; GFX11-LABEL: load_1d_v4f16_tfe_dmask15:
598 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
599 ; GFX11-NEXT: s_mov_b32 s11, s9
600 ; GFX11-NEXT: s_mov_b32 s10, s8
601 ; GFX11-NEXT: s_mov_b32 s9, s7
602 ; GFX11-NEXT: s_mov_b32 s8, s6
603 ; GFX11-NEXT: s_mov_b32 s7, s5
604 ; GFX11-NEXT: s_mov_b32 s6, s4
605 ; GFX11-NEXT: s_mov_b32 s5, s3
606 ; GFX11-NEXT: s_mov_b32 s4, s2
607 ; GFX11-NEXT: v_mov_b32_e32 v2, v1
608 ; GFX11-NEXT: v_mov_b32_e32 v3, v1
609 ; GFX11-NEXT: image_load v[1:3], v0, s[4:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe d16
610 ; GFX11-NEXT: s_waitcnt vmcnt(0)
611 ; GFX11-NEXT: global_store_b64 v[0:1], v[1:2], off dlc
612 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
613 ; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
614 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
615 ; GFX11-NEXT: s_endpgm
617 ; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask15:
618 ; GFX8-UNPACKED: ; %bb.0:
619 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0
620 ; GFX8-UNPACKED-NEXT: s_mov_b32 s11, s9
621 ; GFX8-UNPACKED-NEXT: s_mov_b32 s10, s8
622 ; GFX8-UNPACKED-NEXT: s_mov_b32 s9, s7
623 ; GFX8-UNPACKED-NEXT: s_mov_b32 s8, s6
624 ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s5
625 ; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s4
626 ; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s3
627 ; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s2
628 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1
629 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1
630 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1
631 ; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v5, v1
632 ; GFX8-UNPACKED-NEXT: image_load v[1:5], v0, s[4:11] dmask:0xf unorm tfe d16
633 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0x1000504
634 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
635 ; GFX8-UNPACKED-NEXT: v_perm_b32 v3, v3, v4, s0
636 ; GFX8-UNPACKED-NEXT: v_perm_b32 v2, v1, v2, s0
637 ; GFX8-UNPACKED-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
638 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
639 ; GFX8-UNPACKED-NEXT: flat_store_dword v[0:1], v5
640 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
641 ; GFX8-UNPACKED-NEXT: s_endpgm
642 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
643 %v.data = extractvalue { <4 x half>, i32 } %v, 0
644 %v.err = extractvalue { <4 x half>, i32 } %v, 1
645 store volatile <4 x half> %v.data, ptr addrspace(1) undef
646 store volatile i32 %v.err, ptr addrspace(1) undef
650 declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
651 declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
652 declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
653 declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
655 attributes #0 = { nounwind readonly }