1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX68 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GFX68 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefix=NOPRT %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
9 define amdgpu_ps float @load_1d_f32_x(<8 x i32> inreg %rsrc, i32 %s) {
10 ; GFX68-LABEL: load_1d_f32_x:
12 ; GFX68-NEXT: s_mov_b32 s0, s2
13 ; GFX68-NEXT: s_mov_b32 s1, s3
14 ; GFX68-NEXT: s_mov_b32 s2, s4
15 ; GFX68-NEXT: s_mov_b32 s3, s5
16 ; GFX68-NEXT: s_mov_b32 s4, s6
17 ; GFX68-NEXT: s_mov_b32 s5, s7
18 ; GFX68-NEXT: s_mov_b32 s6, s8
19 ; GFX68-NEXT: s_mov_b32 s7, s9
20 ; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm
21 ; GFX68-NEXT: s_waitcnt vmcnt(0)
22 ; GFX68-NEXT: ; return to shader part epilog
24 ; GFX10-LABEL: load_1d_f32_x:
26 ; GFX10-NEXT: s_mov_b32 s0, s2
27 ; GFX10-NEXT: s_mov_b32 s1, s3
28 ; GFX10-NEXT: s_mov_b32 s2, s4
29 ; GFX10-NEXT: s_mov_b32 s3, s5
30 ; GFX10-NEXT: s_mov_b32 s4, s6
31 ; GFX10-NEXT: s_mov_b32 s5, s7
32 ; GFX10-NEXT: s_mov_b32 s6, s8
33 ; GFX10-NEXT: s_mov_b32 s7, s9
34 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
35 ; GFX10-NEXT: s_waitcnt vmcnt(0)
36 ; GFX10-NEXT: ; return to shader part epilog
38 ; NOPRT-LABEL: load_1d_f32_x:
40 ; NOPRT-NEXT: s_mov_b32 s0, s2
41 ; NOPRT-NEXT: s_mov_b32 s1, s3
42 ; NOPRT-NEXT: s_mov_b32 s2, s4
43 ; NOPRT-NEXT: s_mov_b32 s3, s5
44 ; NOPRT-NEXT: s_mov_b32 s4, s6
45 ; NOPRT-NEXT: s_mov_b32 s5, s7
46 ; NOPRT-NEXT: s_mov_b32 s6, s8
47 ; NOPRT-NEXT: s_mov_b32 s7, s9
48 ; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
49 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
50 ; NOPRT-NEXT: ; return to shader part epilog
52 ; GFX12-LABEL: load_1d_f32_x:
54 ; GFX12-NEXT: s_mov_b32 s0, s2
55 ; GFX12-NEXT: s_mov_b32 s1, s3
56 ; GFX12-NEXT: s_mov_b32 s2, s4
57 ; GFX12-NEXT: s_mov_b32 s3, s5
58 ; GFX12-NEXT: s_mov_b32 s4, s6
59 ; GFX12-NEXT: s_mov_b32 s5, s7
60 ; GFX12-NEXT: s_mov_b32 s6, s8
61 ; GFX12-NEXT: s_mov_b32 s7, s9
62 ; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
63 ; GFX12-NEXT: s_wait_loadcnt 0x0
64 ; GFX12-NEXT: ; return to shader part epilog
65 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
69 define amdgpu_ps float @load_1d_f32_y(<8 x i32> inreg %rsrc, i32 %s) {
70 ; GFX68-LABEL: load_1d_f32_y:
72 ; GFX68-NEXT: s_mov_b32 s0, s2
73 ; GFX68-NEXT: s_mov_b32 s1, s3
74 ; GFX68-NEXT: s_mov_b32 s2, s4
75 ; GFX68-NEXT: s_mov_b32 s3, s5
76 ; GFX68-NEXT: s_mov_b32 s4, s6
77 ; GFX68-NEXT: s_mov_b32 s5, s7
78 ; GFX68-NEXT: s_mov_b32 s6, s8
79 ; GFX68-NEXT: s_mov_b32 s7, s9
80 ; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm
81 ; GFX68-NEXT: s_waitcnt vmcnt(0)
82 ; GFX68-NEXT: ; return to shader part epilog
84 ; GFX10-LABEL: load_1d_f32_y:
86 ; GFX10-NEXT: s_mov_b32 s0, s2
87 ; GFX10-NEXT: s_mov_b32 s1, s3
88 ; GFX10-NEXT: s_mov_b32 s2, s4
89 ; GFX10-NEXT: s_mov_b32 s3, s5
90 ; GFX10-NEXT: s_mov_b32 s4, s6
91 ; GFX10-NEXT: s_mov_b32 s5, s7
92 ; GFX10-NEXT: s_mov_b32 s6, s8
93 ; GFX10-NEXT: s_mov_b32 s7, s9
94 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm
95 ; GFX10-NEXT: s_waitcnt vmcnt(0)
96 ; GFX10-NEXT: ; return to shader part epilog
98 ; NOPRT-LABEL: load_1d_f32_y:
100 ; NOPRT-NEXT: s_mov_b32 s0, s2
101 ; NOPRT-NEXT: s_mov_b32 s1, s3
102 ; NOPRT-NEXT: s_mov_b32 s2, s4
103 ; NOPRT-NEXT: s_mov_b32 s3, s5
104 ; NOPRT-NEXT: s_mov_b32 s4, s6
105 ; NOPRT-NEXT: s_mov_b32 s5, s7
106 ; NOPRT-NEXT: s_mov_b32 s6, s8
107 ; NOPRT-NEXT: s_mov_b32 s7, s9
108 ; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm
109 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
110 ; NOPRT-NEXT: ; return to shader part epilog
112 ; GFX12-LABEL: load_1d_f32_y:
114 ; GFX12-NEXT: s_mov_b32 s0, s2
115 ; GFX12-NEXT: s_mov_b32 s1, s3
116 ; GFX12-NEXT: s_mov_b32 s2, s4
117 ; GFX12-NEXT: s_mov_b32 s3, s5
118 ; GFX12-NEXT: s_mov_b32 s4, s6
119 ; GFX12-NEXT: s_mov_b32 s5, s7
120 ; GFX12-NEXT: s_mov_b32 s6, s8
121 ; GFX12-NEXT: s_mov_b32 s7, s9
122 ; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D
123 ; GFX12-NEXT: s_wait_loadcnt 0x0
124 ; GFX12-NEXT: ; return to shader part epilog
125 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
129 define amdgpu_ps float @load_1d_f32_z(<8 x i32> inreg %rsrc, i32 %s) {
130 ; GFX68-LABEL: load_1d_f32_z:
132 ; GFX68-NEXT: s_mov_b32 s0, s2
133 ; GFX68-NEXT: s_mov_b32 s1, s3
134 ; GFX68-NEXT: s_mov_b32 s2, s4
135 ; GFX68-NEXT: s_mov_b32 s3, s5
136 ; GFX68-NEXT: s_mov_b32 s4, s6
137 ; GFX68-NEXT: s_mov_b32 s5, s7
138 ; GFX68-NEXT: s_mov_b32 s6, s8
139 ; GFX68-NEXT: s_mov_b32 s7, s9
140 ; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm
141 ; GFX68-NEXT: s_waitcnt vmcnt(0)
142 ; GFX68-NEXT: ; return to shader part epilog
144 ; GFX10-LABEL: load_1d_f32_z:
146 ; GFX10-NEXT: s_mov_b32 s0, s2
147 ; GFX10-NEXT: s_mov_b32 s1, s3
148 ; GFX10-NEXT: s_mov_b32 s2, s4
149 ; GFX10-NEXT: s_mov_b32 s3, s5
150 ; GFX10-NEXT: s_mov_b32 s4, s6
151 ; GFX10-NEXT: s_mov_b32 s5, s7
152 ; GFX10-NEXT: s_mov_b32 s6, s8
153 ; GFX10-NEXT: s_mov_b32 s7, s9
154 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm
155 ; GFX10-NEXT: s_waitcnt vmcnt(0)
156 ; GFX10-NEXT: ; return to shader part epilog
158 ; NOPRT-LABEL: load_1d_f32_z:
160 ; NOPRT-NEXT: s_mov_b32 s0, s2
161 ; NOPRT-NEXT: s_mov_b32 s1, s3
162 ; NOPRT-NEXT: s_mov_b32 s2, s4
163 ; NOPRT-NEXT: s_mov_b32 s3, s5
164 ; NOPRT-NEXT: s_mov_b32 s4, s6
165 ; NOPRT-NEXT: s_mov_b32 s5, s7
166 ; NOPRT-NEXT: s_mov_b32 s6, s8
167 ; NOPRT-NEXT: s_mov_b32 s7, s9
168 ; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm
169 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
170 ; NOPRT-NEXT: ; return to shader part epilog
172 ; GFX12-LABEL: load_1d_f32_z:
174 ; GFX12-NEXT: s_mov_b32 s0, s2
175 ; GFX12-NEXT: s_mov_b32 s1, s3
176 ; GFX12-NEXT: s_mov_b32 s2, s4
177 ; GFX12-NEXT: s_mov_b32 s3, s5
178 ; GFX12-NEXT: s_mov_b32 s4, s6
179 ; GFX12-NEXT: s_mov_b32 s5, s7
180 ; GFX12-NEXT: s_mov_b32 s6, s8
181 ; GFX12-NEXT: s_mov_b32 s7, s9
182 ; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D
183 ; GFX12-NEXT: s_wait_loadcnt 0x0
184 ; GFX12-NEXT: ; return to shader part epilog
185 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
189 define amdgpu_ps float @load_1d_f32_w(<8 x i32> inreg %rsrc, i32 %s) {
190 ; GFX68-LABEL: load_1d_f32_w:
192 ; GFX68-NEXT: s_mov_b32 s0, s2
193 ; GFX68-NEXT: s_mov_b32 s1, s3
194 ; GFX68-NEXT: s_mov_b32 s2, s4
195 ; GFX68-NEXT: s_mov_b32 s3, s5
196 ; GFX68-NEXT: s_mov_b32 s4, s6
197 ; GFX68-NEXT: s_mov_b32 s5, s7
198 ; GFX68-NEXT: s_mov_b32 s6, s8
199 ; GFX68-NEXT: s_mov_b32 s7, s9
200 ; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm
201 ; GFX68-NEXT: s_waitcnt vmcnt(0)
202 ; GFX68-NEXT: ; return to shader part epilog
204 ; GFX10-LABEL: load_1d_f32_w:
206 ; GFX10-NEXT: s_mov_b32 s0, s2
207 ; GFX10-NEXT: s_mov_b32 s1, s3
208 ; GFX10-NEXT: s_mov_b32 s2, s4
209 ; GFX10-NEXT: s_mov_b32 s3, s5
210 ; GFX10-NEXT: s_mov_b32 s4, s6
211 ; GFX10-NEXT: s_mov_b32 s5, s7
212 ; GFX10-NEXT: s_mov_b32 s6, s8
213 ; GFX10-NEXT: s_mov_b32 s7, s9
214 ; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm
215 ; GFX10-NEXT: s_waitcnt vmcnt(0)
216 ; GFX10-NEXT: ; return to shader part epilog
218 ; NOPRT-LABEL: load_1d_f32_w:
220 ; NOPRT-NEXT: s_mov_b32 s0, s2
221 ; NOPRT-NEXT: s_mov_b32 s1, s3
222 ; NOPRT-NEXT: s_mov_b32 s2, s4
223 ; NOPRT-NEXT: s_mov_b32 s3, s5
224 ; NOPRT-NEXT: s_mov_b32 s4, s6
225 ; NOPRT-NEXT: s_mov_b32 s5, s7
226 ; NOPRT-NEXT: s_mov_b32 s6, s8
227 ; NOPRT-NEXT: s_mov_b32 s7, s9
228 ; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm
229 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
230 ; NOPRT-NEXT: ; return to shader part epilog
232 ; GFX12-LABEL: load_1d_f32_w:
234 ; GFX12-NEXT: s_mov_b32 s0, s2
235 ; GFX12-NEXT: s_mov_b32 s1, s3
236 ; GFX12-NEXT: s_mov_b32 s2, s4
237 ; GFX12-NEXT: s_mov_b32 s3, s5
238 ; GFX12-NEXT: s_mov_b32 s4, s6
239 ; GFX12-NEXT: s_mov_b32 s5, s7
240 ; GFX12-NEXT: s_mov_b32 s6, s8
241 ; GFX12-NEXT: s_mov_b32 s7, s9
242 ; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D
243 ; GFX12-NEXT: s_wait_loadcnt 0x0
244 ; GFX12-NEXT: ; return to shader part epilog
245 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
249 define amdgpu_ps <2 x float> @load_1d_v2f32_xy(<8 x i32> inreg %rsrc, i32 %s) {
250 ; GFX68-LABEL: load_1d_v2f32_xy:
252 ; GFX68-NEXT: s_mov_b32 s0, s2
253 ; GFX68-NEXT: s_mov_b32 s1, s3
254 ; GFX68-NEXT: s_mov_b32 s2, s4
255 ; GFX68-NEXT: s_mov_b32 s3, s5
256 ; GFX68-NEXT: s_mov_b32 s4, s6
257 ; GFX68-NEXT: s_mov_b32 s5, s7
258 ; GFX68-NEXT: s_mov_b32 s6, s8
259 ; GFX68-NEXT: s_mov_b32 s7, s9
260 ; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm
261 ; GFX68-NEXT: s_waitcnt vmcnt(0)
262 ; GFX68-NEXT: ; return to shader part epilog
264 ; GFX10-LABEL: load_1d_v2f32_xy:
266 ; GFX10-NEXT: s_mov_b32 s0, s2
267 ; GFX10-NEXT: s_mov_b32 s1, s3
268 ; GFX10-NEXT: s_mov_b32 s2, s4
269 ; GFX10-NEXT: s_mov_b32 s3, s5
270 ; GFX10-NEXT: s_mov_b32 s4, s6
271 ; GFX10-NEXT: s_mov_b32 s5, s7
272 ; GFX10-NEXT: s_mov_b32 s6, s8
273 ; GFX10-NEXT: s_mov_b32 s7, s9
274 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
275 ; GFX10-NEXT: s_waitcnt vmcnt(0)
276 ; GFX10-NEXT: ; return to shader part epilog
278 ; NOPRT-LABEL: load_1d_v2f32_xy:
280 ; NOPRT-NEXT: s_mov_b32 s0, s2
281 ; NOPRT-NEXT: s_mov_b32 s1, s3
282 ; NOPRT-NEXT: s_mov_b32 s2, s4
283 ; NOPRT-NEXT: s_mov_b32 s3, s5
284 ; NOPRT-NEXT: s_mov_b32 s4, s6
285 ; NOPRT-NEXT: s_mov_b32 s5, s7
286 ; NOPRT-NEXT: s_mov_b32 s6, s8
287 ; NOPRT-NEXT: s_mov_b32 s7, s9
288 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
289 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
290 ; NOPRT-NEXT: ; return to shader part epilog
292 ; GFX12-LABEL: load_1d_v2f32_xy:
294 ; GFX12-NEXT: s_mov_b32 s0, s2
295 ; GFX12-NEXT: s_mov_b32 s1, s3
296 ; GFX12-NEXT: s_mov_b32 s2, s4
297 ; GFX12-NEXT: s_mov_b32 s3, s5
298 ; GFX12-NEXT: s_mov_b32 s4, s6
299 ; GFX12-NEXT: s_mov_b32 s5, s7
300 ; GFX12-NEXT: s_mov_b32 s6, s8
301 ; GFX12-NEXT: s_mov_b32 s7, s9
302 ; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
303 ; GFX12-NEXT: s_wait_loadcnt 0x0
304 ; GFX12-NEXT: ; return to shader part epilog
305 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
309 define amdgpu_ps <2 x float> @load_1d_v2f32_xz(<8 x i32> inreg %rsrc, i32 %s) {
310 ; GFX68-LABEL: load_1d_v2f32_xz:
312 ; GFX68-NEXT: s_mov_b32 s0, s2
313 ; GFX68-NEXT: s_mov_b32 s1, s3
314 ; GFX68-NEXT: s_mov_b32 s2, s4
315 ; GFX68-NEXT: s_mov_b32 s3, s5
316 ; GFX68-NEXT: s_mov_b32 s4, s6
317 ; GFX68-NEXT: s_mov_b32 s5, s7
318 ; GFX68-NEXT: s_mov_b32 s6, s8
319 ; GFX68-NEXT: s_mov_b32 s7, s9
320 ; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm
321 ; GFX68-NEXT: s_waitcnt vmcnt(0)
322 ; GFX68-NEXT: ; return to shader part epilog
324 ; GFX10-LABEL: load_1d_v2f32_xz:
326 ; GFX10-NEXT: s_mov_b32 s0, s2
327 ; GFX10-NEXT: s_mov_b32 s1, s3
328 ; GFX10-NEXT: s_mov_b32 s2, s4
329 ; GFX10-NEXT: s_mov_b32 s3, s5
330 ; GFX10-NEXT: s_mov_b32 s4, s6
331 ; GFX10-NEXT: s_mov_b32 s5, s7
332 ; GFX10-NEXT: s_mov_b32 s6, s8
333 ; GFX10-NEXT: s_mov_b32 s7, s9
334 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm
335 ; GFX10-NEXT: s_waitcnt vmcnt(0)
336 ; GFX10-NEXT: ; return to shader part epilog
338 ; NOPRT-LABEL: load_1d_v2f32_xz:
340 ; NOPRT-NEXT: s_mov_b32 s0, s2
341 ; NOPRT-NEXT: s_mov_b32 s1, s3
342 ; NOPRT-NEXT: s_mov_b32 s2, s4
343 ; NOPRT-NEXT: s_mov_b32 s3, s5
344 ; NOPRT-NEXT: s_mov_b32 s4, s6
345 ; NOPRT-NEXT: s_mov_b32 s5, s7
346 ; NOPRT-NEXT: s_mov_b32 s6, s8
347 ; NOPRT-NEXT: s_mov_b32 s7, s9
348 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm
349 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
350 ; NOPRT-NEXT: ; return to shader part epilog
352 ; GFX12-LABEL: load_1d_v2f32_xz:
354 ; GFX12-NEXT: s_mov_b32 s0, s2
355 ; GFX12-NEXT: s_mov_b32 s1, s3
356 ; GFX12-NEXT: s_mov_b32 s2, s4
357 ; GFX12-NEXT: s_mov_b32 s3, s5
358 ; GFX12-NEXT: s_mov_b32 s4, s6
359 ; GFX12-NEXT: s_mov_b32 s5, s7
360 ; GFX12-NEXT: s_mov_b32 s6, s8
361 ; GFX12-NEXT: s_mov_b32 s7, s9
362 ; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D
363 ; GFX12-NEXT: s_wait_loadcnt 0x0
364 ; GFX12-NEXT: ; return to shader part epilog
365 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
369 define amdgpu_ps <2 x float> @load_1d_v2f32_xw(<8 x i32> inreg %rsrc, i32 %s) {
370 ; GFX68-LABEL: load_1d_v2f32_xw:
372 ; GFX68-NEXT: s_mov_b32 s0, s2
373 ; GFX68-NEXT: s_mov_b32 s1, s3
374 ; GFX68-NEXT: s_mov_b32 s2, s4
375 ; GFX68-NEXT: s_mov_b32 s3, s5
376 ; GFX68-NEXT: s_mov_b32 s4, s6
377 ; GFX68-NEXT: s_mov_b32 s5, s7
378 ; GFX68-NEXT: s_mov_b32 s6, s8
379 ; GFX68-NEXT: s_mov_b32 s7, s9
380 ; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
381 ; GFX68-NEXT: s_waitcnt vmcnt(0)
382 ; GFX68-NEXT: ; return to shader part epilog
384 ; GFX10-LABEL: load_1d_v2f32_xw:
386 ; GFX10-NEXT: s_mov_b32 s0, s2
387 ; GFX10-NEXT: s_mov_b32 s1, s3
388 ; GFX10-NEXT: s_mov_b32 s2, s4
389 ; GFX10-NEXT: s_mov_b32 s3, s5
390 ; GFX10-NEXT: s_mov_b32 s4, s6
391 ; GFX10-NEXT: s_mov_b32 s5, s7
392 ; GFX10-NEXT: s_mov_b32 s6, s8
393 ; GFX10-NEXT: s_mov_b32 s7, s9
394 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm
395 ; GFX10-NEXT: s_waitcnt vmcnt(0)
396 ; GFX10-NEXT: ; return to shader part epilog
398 ; NOPRT-LABEL: load_1d_v2f32_xw:
400 ; NOPRT-NEXT: s_mov_b32 s0, s2
401 ; NOPRT-NEXT: s_mov_b32 s1, s3
402 ; NOPRT-NEXT: s_mov_b32 s2, s4
403 ; NOPRT-NEXT: s_mov_b32 s3, s5
404 ; NOPRT-NEXT: s_mov_b32 s4, s6
405 ; NOPRT-NEXT: s_mov_b32 s5, s7
406 ; NOPRT-NEXT: s_mov_b32 s6, s8
407 ; NOPRT-NEXT: s_mov_b32 s7, s9
408 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm
409 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
410 ; NOPRT-NEXT: ; return to shader part epilog
412 ; GFX12-LABEL: load_1d_v2f32_xw:
414 ; GFX12-NEXT: s_mov_b32 s0, s2
415 ; GFX12-NEXT: s_mov_b32 s1, s3
416 ; GFX12-NEXT: s_mov_b32 s2, s4
417 ; GFX12-NEXT: s_mov_b32 s3, s5
418 ; GFX12-NEXT: s_mov_b32 s4, s6
419 ; GFX12-NEXT: s_mov_b32 s5, s7
420 ; GFX12-NEXT: s_mov_b32 s6, s8
421 ; GFX12-NEXT: s_mov_b32 s7, s9
422 ; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D
423 ; GFX12-NEXT: s_wait_loadcnt 0x0
424 ; GFX12-NEXT: ; return to shader part epilog
425 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
429 define amdgpu_ps <2 x float> @load_1d_v2f32_yz(<8 x i32> inreg %rsrc, i32 %s) {
430 ; GFX68-LABEL: load_1d_v2f32_yz:
432 ; GFX68-NEXT: s_mov_b32 s0, s2
433 ; GFX68-NEXT: s_mov_b32 s1, s3
434 ; GFX68-NEXT: s_mov_b32 s2, s4
435 ; GFX68-NEXT: s_mov_b32 s3, s5
436 ; GFX68-NEXT: s_mov_b32 s4, s6
437 ; GFX68-NEXT: s_mov_b32 s5, s7
438 ; GFX68-NEXT: s_mov_b32 s6, s8
439 ; GFX68-NEXT: s_mov_b32 s7, s9
440 ; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm
441 ; GFX68-NEXT: s_waitcnt vmcnt(0)
442 ; GFX68-NEXT: ; return to shader part epilog
444 ; GFX10-LABEL: load_1d_v2f32_yz:
446 ; GFX10-NEXT: s_mov_b32 s0, s2
447 ; GFX10-NEXT: s_mov_b32 s1, s3
448 ; GFX10-NEXT: s_mov_b32 s2, s4
449 ; GFX10-NEXT: s_mov_b32 s3, s5
450 ; GFX10-NEXT: s_mov_b32 s4, s6
451 ; GFX10-NEXT: s_mov_b32 s5, s7
452 ; GFX10-NEXT: s_mov_b32 s6, s8
453 ; GFX10-NEXT: s_mov_b32 s7, s9
454 ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm
455 ; GFX10-NEXT: s_waitcnt vmcnt(0)
456 ; GFX10-NEXT: ; return to shader part epilog
458 ; NOPRT-LABEL: load_1d_v2f32_yz:
460 ; NOPRT-NEXT: s_mov_b32 s0, s2
461 ; NOPRT-NEXT: s_mov_b32 s1, s3
462 ; NOPRT-NEXT: s_mov_b32 s2, s4
463 ; NOPRT-NEXT: s_mov_b32 s3, s5
464 ; NOPRT-NEXT: s_mov_b32 s4, s6
465 ; NOPRT-NEXT: s_mov_b32 s5, s7
466 ; NOPRT-NEXT: s_mov_b32 s6, s8
467 ; NOPRT-NEXT: s_mov_b32 s7, s9
468 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm
469 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
470 ; NOPRT-NEXT: ; return to shader part epilog
472 ; GFX12-LABEL: load_1d_v2f32_yz:
474 ; GFX12-NEXT: s_mov_b32 s0, s2
475 ; GFX12-NEXT: s_mov_b32 s1, s3
476 ; GFX12-NEXT: s_mov_b32 s2, s4
477 ; GFX12-NEXT: s_mov_b32 s3, s5
478 ; GFX12-NEXT: s_mov_b32 s4, s6
479 ; GFX12-NEXT: s_mov_b32 s5, s7
480 ; GFX12-NEXT: s_mov_b32 s6, s8
481 ; GFX12-NEXT: s_mov_b32 s7, s9
482 ; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D
483 ; GFX12-NEXT: s_wait_loadcnt 0x0
484 ; GFX12-NEXT: ; return to shader part epilog
485 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
489 define amdgpu_ps <3 x float> @load_1d_v3f32_xyz(<8 x i32> inreg %rsrc, i32 %s) {
490 ; GFX68-LABEL: load_1d_v3f32_xyz:
492 ; GFX68-NEXT: s_mov_b32 s0, s2
493 ; GFX68-NEXT: s_mov_b32 s1, s3
494 ; GFX68-NEXT: s_mov_b32 s2, s4
495 ; GFX68-NEXT: s_mov_b32 s3, s5
496 ; GFX68-NEXT: s_mov_b32 s4, s6
497 ; GFX68-NEXT: s_mov_b32 s5, s7
498 ; GFX68-NEXT: s_mov_b32 s6, s8
499 ; GFX68-NEXT: s_mov_b32 s7, s9
500 ; GFX68-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm
501 ; GFX68-NEXT: s_waitcnt vmcnt(0)
502 ; GFX68-NEXT: ; return to shader part epilog
504 ; GFX10-LABEL: load_1d_v3f32_xyz:
506 ; GFX10-NEXT: s_mov_b32 s0, s2
507 ; GFX10-NEXT: s_mov_b32 s1, s3
508 ; GFX10-NEXT: s_mov_b32 s2, s4
509 ; GFX10-NEXT: s_mov_b32 s3, s5
510 ; GFX10-NEXT: s_mov_b32 s4, s6
511 ; GFX10-NEXT: s_mov_b32 s5, s7
512 ; GFX10-NEXT: s_mov_b32 s6, s8
513 ; GFX10-NEXT: s_mov_b32 s7, s9
514 ; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm
515 ; GFX10-NEXT: s_waitcnt vmcnt(0)
516 ; GFX10-NEXT: ; return to shader part epilog
518 ; NOPRT-LABEL: load_1d_v3f32_xyz:
520 ; NOPRT-NEXT: s_mov_b32 s0, s2
521 ; NOPRT-NEXT: s_mov_b32 s1, s3
522 ; NOPRT-NEXT: s_mov_b32 s2, s4
523 ; NOPRT-NEXT: s_mov_b32 s3, s5
524 ; NOPRT-NEXT: s_mov_b32 s4, s6
525 ; NOPRT-NEXT: s_mov_b32 s5, s7
526 ; NOPRT-NEXT: s_mov_b32 s6, s8
527 ; NOPRT-NEXT: s_mov_b32 s7, s9
528 ; NOPRT-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm
529 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
530 ; NOPRT-NEXT: ; return to shader part epilog
532 ; GFX12-LABEL: load_1d_v3f32_xyz:
534 ; GFX12-NEXT: s_mov_b32 s0, s2
535 ; GFX12-NEXT: s_mov_b32 s1, s3
536 ; GFX12-NEXT: s_mov_b32 s2, s4
537 ; GFX12-NEXT: s_mov_b32 s3, s5
538 ; GFX12-NEXT: s_mov_b32 s4, s6
539 ; GFX12-NEXT: s_mov_b32 s5, s7
540 ; GFX12-NEXT: s_mov_b32 s6, s8
541 ; GFX12-NEXT: s_mov_b32 s7, s9
542 ; GFX12-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D
543 ; GFX12-NEXT: s_wait_loadcnt 0x0
544 ; GFX12-NEXT: ; return to shader part epilog
545 %v = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
549 define amdgpu_ps <4 x float> @load_1d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
550 ; GFX68-LABEL: load_1d_v4f32_xyzw:
552 ; GFX68-NEXT: s_mov_b32 s0, s2
553 ; GFX68-NEXT: s_mov_b32 s1, s3
554 ; GFX68-NEXT: s_mov_b32 s2, s4
555 ; GFX68-NEXT: s_mov_b32 s3, s5
556 ; GFX68-NEXT: s_mov_b32 s4, s6
557 ; GFX68-NEXT: s_mov_b32 s5, s7
558 ; GFX68-NEXT: s_mov_b32 s6, s8
559 ; GFX68-NEXT: s_mov_b32 s7, s9
560 ; GFX68-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
561 ; GFX68-NEXT: s_waitcnt vmcnt(0)
562 ; GFX68-NEXT: ; return to shader part epilog
564 ; GFX10-LABEL: load_1d_v4f32_xyzw:
566 ; GFX10-NEXT: s_mov_b32 s0, s2
567 ; GFX10-NEXT: s_mov_b32 s1, s3
568 ; GFX10-NEXT: s_mov_b32 s2, s4
569 ; GFX10-NEXT: s_mov_b32 s3, s5
570 ; GFX10-NEXT: s_mov_b32 s4, s6
571 ; GFX10-NEXT: s_mov_b32 s5, s7
572 ; GFX10-NEXT: s_mov_b32 s6, s8
573 ; GFX10-NEXT: s_mov_b32 s7, s9
574 ; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
575 ; GFX10-NEXT: s_waitcnt vmcnt(0)
576 ; GFX10-NEXT: ; return to shader part epilog
578 ; NOPRT-LABEL: load_1d_v4f32_xyzw:
580 ; NOPRT-NEXT: s_mov_b32 s0, s2
581 ; NOPRT-NEXT: s_mov_b32 s1, s3
582 ; NOPRT-NEXT: s_mov_b32 s2, s4
583 ; NOPRT-NEXT: s_mov_b32 s3, s5
584 ; NOPRT-NEXT: s_mov_b32 s4, s6
585 ; NOPRT-NEXT: s_mov_b32 s5, s7
586 ; NOPRT-NEXT: s_mov_b32 s6, s8
587 ; NOPRT-NEXT: s_mov_b32 s7, s9
588 ; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
589 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
590 ; NOPRT-NEXT: ; return to shader part epilog
592 ; GFX12-LABEL: load_1d_v4f32_xyzw:
594 ; GFX12-NEXT: s_mov_b32 s0, s2
595 ; GFX12-NEXT: s_mov_b32 s1, s3
596 ; GFX12-NEXT: s_mov_b32 s2, s4
597 ; GFX12-NEXT: s_mov_b32 s3, s5
598 ; GFX12-NEXT: s_mov_b32 s4, s6
599 ; GFX12-NEXT: s_mov_b32 s5, s7
600 ; GFX12-NEXT: s_mov_b32 s6, s8
601 ; GFX12-NEXT: s_mov_b32 s7, s9
602 ; GFX12-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
603 ; GFX12-NEXT: s_wait_loadcnt 0x0
604 ; GFX12-NEXT: ; return to shader part epilog
605 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
609 define amdgpu_ps float @load_1d_f32_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
610 ; GFX68-LABEL: load_1d_f32_tfe_dmask_x:
612 ; GFX68-NEXT: v_mov_b32_e32 v1, 0
613 ; GFX68-NEXT: s_mov_b32 s0, s2
614 ; GFX68-NEXT: s_mov_b32 s1, s3
615 ; GFX68-NEXT: s_mov_b32 s2, s4
616 ; GFX68-NEXT: s_mov_b32 s3, s5
617 ; GFX68-NEXT: s_mov_b32 s4, s6
618 ; GFX68-NEXT: s_mov_b32 s5, s7
619 ; GFX68-NEXT: s_mov_b32 s6, s8
620 ; GFX68-NEXT: s_mov_b32 s7, s9
621 ; GFX68-NEXT: v_mov_b32_e32 v2, v1
622 ; GFX68-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
623 ; GFX68-NEXT: s_waitcnt vmcnt(0)
624 ; GFX68-NEXT: v_mov_b32_e32 v0, v2
625 ; GFX68-NEXT: ; return to shader part epilog
627 ; GFX10-LABEL: load_1d_f32_tfe_dmask_x:
629 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
630 ; GFX10-NEXT: s_mov_b32 s0, s2
631 ; GFX10-NEXT: s_mov_b32 s1, s3
632 ; GFX10-NEXT: s_mov_b32 s2, s4
633 ; GFX10-NEXT: s_mov_b32 s3, s5
634 ; GFX10-NEXT: s_mov_b32 s4, s6
635 ; GFX10-NEXT: s_mov_b32 s5, s7
636 ; GFX10-NEXT: s_mov_b32 s6, s8
637 ; GFX10-NEXT: s_mov_b32 s7, s9
638 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
639 ; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
640 ; GFX10-NEXT: s_waitcnt vmcnt(0)
641 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
642 ; GFX10-NEXT: ; return to shader part epilog
644 ; NOPRT-LABEL: load_1d_f32_tfe_dmask_x:
646 ; NOPRT-NEXT: s_mov_b32 s0, s2
647 ; NOPRT-NEXT: s_mov_b32 s1, s3
648 ; NOPRT-NEXT: s_mov_b32 s2, s4
649 ; NOPRT-NEXT: s_mov_b32 s3, s5
650 ; NOPRT-NEXT: s_mov_b32 s4, s6
651 ; NOPRT-NEXT: s_mov_b32 s5, s7
652 ; NOPRT-NEXT: s_mov_b32 s6, s8
653 ; NOPRT-NEXT: s_mov_b32 s7, s9
654 ; NOPRT-NEXT: v_mov_b32_e32 v1, 0
655 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
656 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
657 ; NOPRT-NEXT: v_mov_b32_e32 v0, v1
658 ; NOPRT-NEXT: ; return to shader part epilog
660 ; GFX12-LABEL: load_1d_f32_tfe_dmask_x:
662 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
663 ; GFX12-NEXT: s_mov_b32 s0, s2
664 ; GFX12-NEXT: s_mov_b32 s1, s3
665 ; GFX12-NEXT: s_mov_b32 s2, s4
666 ; GFX12-NEXT: s_mov_b32 s3, s5
667 ; GFX12-NEXT: s_mov_b32 s4, s6
668 ; GFX12-NEXT: s_mov_b32 s5, s7
669 ; GFX12-NEXT: s_mov_b32 s6, s8
670 ; GFX12-NEXT: s_mov_b32 s7, s9
671 ; GFX12-NEXT: v_mov_b32_e32 v2, v1
672 ; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
673 ; GFX12-NEXT: s_wait_loadcnt 0x0
674 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
675 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
676 ; GFX12-NEXT: ; return to shader part epilog
677 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
678 %v.err = extractvalue { float, i32 } %v, 1
679 %vv = bitcast i32 %v.err to float
683 define amdgpu_ps float @load_1d_v2f32_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
684 ; GFX68-LABEL: load_1d_v2f32_tfe_dmask_xy:
686 ; GFX68-NEXT: v_mov_b32_e32 v1, 0
687 ; GFX68-NEXT: s_mov_b32 s0, s2
688 ; GFX68-NEXT: s_mov_b32 s1, s3
689 ; GFX68-NEXT: s_mov_b32 s2, s4
690 ; GFX68-NEXT: s_mov_b32 s3, s5
691 ; GFX68-NEXT: s_mov_b32 s4, s6
692 ; GFX68-NEXT: s_mov_b32 s5, s7
693 ; GFX68-NEXT: s_mov_b32 s6, s8
694 ; GFX68-NEXT: s_mov_b32 s7, s9
695 ; GFX68-NEXT: v_mov_b32_e32 v2, v1
696 ; GFX68-NEXT: v_mov_b32_e32 v3, v1
697 ; GFX68-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe
698 ; GFX68-NEXT: s_waitcnt vmcnt(0)
699 ; GFX68-NEXT: v_mov_b32_e32 v0, v3
700 ; GFX68-NEXT: ; return to shader part epilog
702 ; GFX10-LABEL: load_1d_v2f32_tfe_dmask_xy:
704 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
705 ; GFX10-NEXT: s_mov_b32 s0, s2
706 ; GFX10-NEXT: s_mov_b32 s1, s3
707 ; GFX10-NEXT: s_mov_b32 s2, s4
708 ; GFX10-NEXT: s_mov_b32 s3, s5
709 ; GFX10-NEXT: s_mov_b32 s4, s6
710 ; GFX10-NEXT: s_mov_b32 s5, s7
711 ; GFX10-NEXT: s_mov_b32 s6, s8
712 ; GFX10-NEXT: s_mov_b32 s7, s9
713 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
714 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
715 ; GFX10-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe
716 ; GFX10-NEXT: s_waitcnt vmcnt(0)
717 ; GFX10-NEXT: v_mov_b32_e32 v0, v3
718 ; GFX10-NEXT: ; return to shader part epilog
720 ; NOPRT-LABEL: load_1d_v2f32_tfe_dmask_xy:
722 ; NOPRT-NEXT: s_mov_b32 s0, s2
723 ; NOPRT-NEXT: s_mov_b32 s1, s3
724 ; NOPRT-NEXT: s_mov_b32 s2, s4
725 ; NOPRT-NEXT: s_mov_b32 s3, s5
726 ; NOPRT-NEXT: s_mov_b32 s4, s6
727 ; NOPRT-NEXT: s_mov_b32 s5, s7
728 ; NOPRT-NEXT: s_mov_b32 s6, s8
729 ; NOPRT-NEXT: s_mov_b32 s7, s9
730 ; NOPRT-NEXT: v_mov_b32_e32 v2, 0
731 ; NOPRT-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe
732 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
733 ; NOPRT-NEXT: v_mov_b32_e32 v0, v2
734 ; NOPRT-NEXT: ; return to shader part epilog
736 ; GFX12-LABEL: load_1d_v2f32_tfe_dmask_xy:
738 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
739 ; GFX12-NEXT: s_mov_b32 s0, s2
740 ; GFX12-NEXT: s_mov_b32 s1, s3
741 ; GFX12-NEXT: s_mov_b32 s2, s4
742 ; GFX12-NEXT: s_mov_b32 s3, s5
743 ; GFX12-NEXT: s_mov_b32 s4, s6
744 ; GFX12-NEXT: s_mov_b32 s5, s7
745 ; GFX12-NEXT: s_mov_b32 s6, s8
746 ; GFX12-NEXT: s_mov_b32 s7, s9
747 ; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1
748 ; GFX12-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
749 ; GFX12-NEXT: s_wait_loadcnt 0x0
750 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
751 ; GFX12-NEXT: v_mov_b32_e32 v0, v3
752 ; GFX12-NEXT: ; return to shader part epilog
753 %v = call { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
754 %v.err = extractvalue { <2 x float>, i32 } %v, 1
755 %vv = bitcast i32 %v.err to float
759 define amdgpu_ps float @load_1d_v3f32_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
760 ; GFX68-LABEL: load_1d_v3f32_tfe_dmask_xyz:
762 ; GFX68-NEXT: v_mov_b32_e32 v1, 0
763 ; GFX68-NEXT: s_mov_b32 s0, s2
764 ; GFX68-NEXT: s_mov_b32 s1, s3
765 ; GFX68-NEXT: s_mov_b32 s2, s4
766 ; GFX68-NEXT: s_mov_b32 s3, s5
767 ; GFX68-NEXT: s_mov_b32 s4, s6
768 ; GFX68-NEXT: s_mov_b32 s5, s7
769 ; GFX68-NEXT: s_mov_b32 s6, s8
770 ; GFX68-NEXT: s_mov_b32 s7, s9
771 ; GFX68-NEXT: v_mov_b32_e32 v2, v1
772 ; GFX68-NEXT: v_mov_b32_e32 v3, v1
773 ; GFX68-NEXT: v_mov_b32_e32 v4, v1
774 ; GFX68-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe
775 ; GFX68-NEXT: s_waitcnt vmcnt(0)
776 ; GFX68-NEXT: v_mov_b32_e32 v0, v4
777 ; GFX68-NEXT: ; return to shader part epilog
779 ; GFX10-LABEL: load_1d_v3f32_tfe_dmask_xyz:
781 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
782 ; GFX10-NEXT: s_mov_b32 s0, s2
783 ; GFX10-NEXT: s_mov_b32 s1, s3
784 ; GFX10-NEXT: s_mov_b32 s2, s4
785 ; GFX10-NEXT: s_mov_b32 s3, s5
786 ; GFX10-NEXT: s_mov_b32 s4, s6
787 ; GFX10-NEXT: s_mov_b32 s5, s7
788 ; GFX10-NEXT: s_mov_b32 s6, s8
789 ; GFX10-NEXT: s_mov_b32 s7, s9
790 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
791 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
792 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
793 ; GFX10-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
794 ; GFX10-NEXT: s_waitcnt vmcnt(0)
795 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
796 ; GFX10-NEXT: ; return to shader part epilog
798 ; NOPRT-LABEL: load_1d_v3f32_tfe_dmask_xyz:
800 ; NOPRT-NEXT: s_mov_b32 s0, s2
801 ; NOPRT-NEXT: s_mov_b32 s1, s3
802 ; NOPRT-NEXT: s_mov_b32 s2, s4
803 ; NOPRT-NEXT: s_mov_b32 s3, s5
804 ; NOPRT-NEXT: s_mov_b32 s4, s6
805 ; NOPRT-NEXT: s_mov_b32 s5, s7
806 ; NOPRT-NEXT: s_mov_b32 s6, s8
807 ; NOPRT-NEXT: s_mov_b32 s7, s9
808 ; NOPRT-NEXT: v_mov_b32_e32 v3, 0
809 ; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
810 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
811 ; NOPRT-NEXT: v_mov_b32_e32 v0, v3
812 ; NOPRT-NEXT: ; return to shader part epilog
814 ; GFX12-LABEL: load_1d_v3f32_tfe_dmask_xyz:
816 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
817 ; GFX12-NEXT: s_mov_b32 s0, s2
818 ; GFX12-NEXT: s_mov_b32 s1, s3
819 ; GFX12-NEXT: s_mov_b32 s2, s4
820 ; GFX12-NEXT: s_mov_b32 s3, s5
821 ; GFX12-NEXT: s_mov_b32 s4, s6
822 ; GFX12-NEXT: s_mov_b32 s5, s7
823 ; GFX12-NEXT: s_mov_b32 s6, s8
824 ; GFX12-NEXT: s_mov_b32 s7, s9
825 ; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1
826 ; GFX12-NEXT: v_mov_b32_e32 v4, v1
827 ; GFX12-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D tfe
828 ; GFX12-NEXT: s_wait_loadcnt 0x0
829 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
830 ; GFX12-NEXT: v_mov_b32_e32 v0, v4
831 ; GFX12-NEXT: ; return to shader part epilog
832 %v = call { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
833 %v.err = extractvalue { <3 x float>, i32 } %v, 1
834 %vv = bitcast i32 %v.err to float
838 define amdgpu_ps float @load_1d_v4f32_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
839 ; GFX68-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
841 ; GFX68-NEXT: v_mov_b32_e32 v1, 0
842 ; GFX68-NEXT: s_mov_b32 s0, s2
843 ; GFX68-NEXT: s_mov_b32 s1, s3
844 ; GFX68-NEXT: s_mov_b32 s2, s4
845 ; GFX68-NEXT: s_mov_b32 s3, s5
846 ; GFX68-NEXT: s_mov_b32 s4, s6
847 ; GFX68-NEXT: s_mov_b32 s5, s7
848 ; GFX68-NEXT: s_mov_b32 s6, s8
849 ; GFX68-NEXT: s_mov_b32 s7, s9
850 ; GFX68-NEXT: v_mov_b32_e32 v2, v1
851 ; GFX68-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe
852 ; GFX68-NEXT: s_waitcnt vmcnt(0)
853 ; GFX68-NEXT: v_mov_b32_e32 v0, v2
854 ; GFX68-NEXT: ; return to shader part epilog
856 ; GFX10-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
858 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
859 ; GFX10-NEXT: s_mov_b32 s0, s2
860 ; GFX10-NEXT: s_mov_b32 s1, s3
861 ; GFX10-NEXT: s_mov_b32 s2, s4
862 ; GFX10-NEXT: s_mov_b32 s3, s5
863 ; GFX10-NEXT: s_mov_b32 s4, s6
864 ; GFX10-NEXT: s_mov_b32 s5, s7
865 ; GFX10-NEXT: s_mov_b32 s6, s8
866 ; GFX10-NEXT: s_mov_b32 s7, s9
867 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
868 ; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe
869 ; GFX10-NEXT: s_waitcnt vmcnt(0)
870 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
871 ; GFX10-NEXT: ; return to shader part epilog
873 ; NOPRT-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
875 ; NOPRT-NEXT: s_mov_b32 s0, s2
876 ; NOPRT-NEXT: s_mov_b32 s1, s3
877 ; NOPRT-NEXT: s_mov_b32 s2, s4
878 ; NOPRT-NEXT: s_mov_b32 s3, s5
879 ; NOPRT-NEXT: s_mov_b32 s4, s6
880 ; NOPRT-NEXT: s_mov_b32 s5, s7
881 ; NOPRT-NEXT: s_mov_b32 s6, s8
882 ; NOPRT-NEXT: s_mov_b32 s7, s9
883 ; NOPRT-NEXT: v_mov_b32_e32 v1, 0
884 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe
885 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
886 ; NOPRT-NEXT: v_mov_b32_e32 v0, v1
887 ; NOPRT-NEXT: ; return to shader part epilog
889 ; GFX12-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
891 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
892 ; GFX12-NEXT: s_mov_b32 s0, s2
893 ; GFX12-NEXT: s_mov_b32 s1, s3
894 ; GFX12-NEXT: s_mov_b32 s2, s4
895 ; GFX12-NEXT: s_mov_b32 s3, s5
896 ; GFX12-NEXT: s_mov_b32 s4, s6
897 ; GFX12-NEXT: s_mov_b32 s5, s7
898 ; GFX12-NEXT: s_mov_b32 s6, s8
899 ; GFX12-NEXT: s_mov_b32 s7, s9
900 ; GFX12-NEXT: v_mov_b32_e32 v2, v1
901 ; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D tfe
902 ; GFX12-NEXT: s_wait_loadcnt 0x0
903 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
904 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
905 ; GFX12-NEXT: ; return to shader part epilog
906 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
907 %v.err = extractvalue { <4 x float>, i32 } %v, 1
908 %vv = bitcast i32 %v.err to float
912 define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) {
913 ; GFX68-LABEL: load_1d_f32_tfe_dmask_0:
915 ; GFX68-NEXT: v_mov_b32_e32 v1, 0
916 ; GFX68-NEXT: s_mov_b32 s0, s2
917 ; GFX68-NEXT: s_mov_b32 s1, s3
918 ; GFX68-NEXT: s_mov_b32 s2, s4
919 ; GFX68-NEXT: s_mov_b32 s3, s5
920 ; GFX68-NEXT: s_mov_b32 s4, s6
921 ; GFX68-NEXT: s_mov_b32 s5, s7
922 ; GFX68-NEXT: s_mov_b32 s6, s8
923 ; GFX68-NEXT: s_mov_b32 s7, s9
924 ; GFX68-NEXT: v_mov_b32_e32 v2, v1
925 ; GFX68-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
926 ; GFX68-NEXT: s_waitcnt vmcnt(0)
927 ; GFX68-NEXT: v_mov_b32_e32 v0, v2
928 ; GFX68-NEXT: ; return to shader part epilog
930 ; GFX10-LABEL: load_1d_f32_tfe_dmask_0:
932 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
933 ; GFX10-NEXT: s_mov_b32 s0, s2
934 ; GFX10-NEXT: s_mov_b32 s1, s3
935 ; GFX10-NEXT: s_mov_b32 s2, s4
936 ; GFX10-NEXT: s_mov_b32 s3, s5
937 ; GFX10-NEXT: s_mov_b32 s4, s6
938 ; GFX10-NEXT: s_mov_b32 s5, s7
939 ; GFX10-NEXT: s_mov_b32 s6, s8
940 ; GFX10-NEXT: s_mov_b32 s7, s9
941 ; GFX10-NEXT: v_mov_b32_e32 v2, v1
942 ; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
943 ; GFX10-NEXT: s_waitcnt vmcnt(0)
944 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
945 ; GFX10-NEXT: ; return to shader part epilog
947 ; NOPRT-LABEL: load_1d_f32_tfe_dmask_0:
949 ; NOPRT-NEXT: s_mov_b32 s0, s2
950 ; NOPRT-NEXT: s_mov_b32 s1, s3
951 ; NOPRT-NEXT: s_mov_b32 s2, s4
952 ; NOPRT-NEXT: s_mov_b32 s3, s5
953 ; NOPRT-NEXT: s_mov_b32 s4, s6
954 ; NOPRT-NEXT: s_mov_b32 s5, s7
955 ; NOPRT-NEXT: s_mov_b32 s6, s8
956 ; NOPRT-NEXT: s_mov_b32 s7, s9
957 ; NOPRT-NEXT: v_mov_b32_e32 v1, 0
958 ; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
959 ; NOPRT-NEXT: s_waitcnt vmcnt(0)
960 ; NOPRT-NEXT: v_mov_b32_e32 v0, v1
961 ; NOPRT-NEXT: ; return to shader part epilog
963 ; GFX12-LABEL: load_1d_f32_tfe_dmask_0:
965 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
966 ; GFX12-NEXT: s_mov_b32 s0, s2
967 ; GFX12-NEXT: s_mov_b32 s1, s3
968 ; GFX12-NEXT: s_mov_b32 s2, s4
969 ; GFX12-NEXT: s_mov_b32 s3, s5
970 ; GFX12-NEXT: s_mov_b32 s4, s6
971 ; GFX12-NEXT: s_mov_b32 s5, s7
972 ; GFX12-NEXT: s_mov_b32 s6, s8
973 ; GFX12-NEXT: s_mov_b32 s7, s9
974 ; GFX12-NEXT: v_mov_b32_e32 v2, v1
975 ; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
976 ; GFX12-NEXT: s_wait_loadcnt 0x0
977 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
978 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
979 ; GFX12-NEXT: ; return to shader part epilog
980 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
981 %v.err = extractvalue { float, i32 } %v, 1
982 %vv = bitcast i32 %v.err to float
986 declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
987 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
988 declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
989 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
991 declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
992 declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
993 declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
994 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
996 attributes #0 = { nounwind readonly }