1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10NSA %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefix=GFX12 %s
7 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
8 ; GFX9-LABEL: gather4_2d:
9 ; GFX9: ; %bb.0: ; %main_body
10 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
11 ; GFX9-NEXT: s_mov_b32 s0, s2
12 ; GFX9-NEXT: s_wqm_b64 exec, exec
13 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
14 ; GFX9-NEXT: s_mov_b32 s1, s3
15 ; GFX9-NEXT: s_mov_b32 s2, s4
16 ; GFX9-NEXT: s_mov_b32 s3, s5
17 ; GFX9-NEXT: s_mov_b32 s4, s6
18 ; GFX9-NEXT: s_mov_b32 s5, s7
19 ; GFX9-NEXT: s_mov_b32 s6, s8
20 ; GFX9-NEXT: s_mov_b32 s7, s9
21 ; GFX9-NEXT: s_mov_b32 s8, s10
22 ; GFX9-NEXT: s_mov_b32 s9, s11
23 ; GFX9-NEXT: s_mov_b32 s10, s12
24 ; GFX9-NEXT: s_mov_b32 s11, s13
25 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
26 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
27 ; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
28 ; GFX9-NEXT: s_waitcnt vmcnt(0)
29 ; GFX9-NEXT: ; return to shader part epilog
31 ; GFX10NSA-LABEL: gather4_2d:
32 ; GFX10NSA: ; %bb.0: ; %main_body
33 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
34 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
35 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
36 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
37 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
38 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
39 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
40 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
41 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
42 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
43 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
44 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
45 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
46 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
47 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
48 ; GFX10NSA-NEXT: v_lshl_or_b32 v0, v1, 16, v0
49 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
50 ; GFX10NSA-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
51 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
52 ; GFX10NSA-NEXT: ; return to shader part epilog
54 ; GFX12-LABEL: gather4_2d:
55 ; GFX12: ; %bb.0: ; %main_body
56 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
57 ; GFX12-NEXT: s_mov_b32 s0, s2
58 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
59 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
60 ; GFX12-NEXT: s_mov_b32 s1, s3
61 ; GFX12-NEXT: s_mov_b32 s2, s4
62 ; GFX12-NEXT: s_mov_b32 s3, s5
63 ; GFX12-NEXT: s_mov_b32 s4, s6
64 ; GFX12-NEXT: s_mov_b32 s5, s7
65 ; GFX12-NEXT: s_mov_b32 s6, s8
66 ; GFX12-NEXT: s_mov_b32 s7, s9
67 ; GFX12-NEXT: s_mov_b32 s8, s10
68 ; GFX12-NEXT: s_mov_b32 s9, s11
69 ; GFX12-NEXT: s_mov_b32 s10, s12
70 ; GFX12-NEXT: s_mov_b32 s11, s13
71 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
72 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
73 ; GFX12-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
74 ; GFX12-NEXT: s_wait_samplecnt 0x0
75 ; GFX12-NEXT: ; return to shader part epilog
77 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
81 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
82 ; GFX9-LABEL: gather4_cube:
83 ; GFX9: ; %bb.0: ; %main_body
84 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
85 ; GFX9-NEXT: s_mov_b32 s0, s2
86 ; GFX9-NEXT: s_wqm_b64 exec, exec
87 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
88 ; GFX9-NEXT: s_mov_b32 s1, s3
89 ; GFX9-NEXT: s_mov_b32 s2, s4
90 ; GFX9-NEXT: s_mov_b32 s3, s5
91 ; GFX9-NEXT: s_mov_b32 s4, s6
92 ; GFX9-NEXT: s_mov_b32 s5, s7
93 ; GFX9-NEXT: s_mov_b32 s6, s8
94 ; GFX9-NEXT: s_mov_b32 s7, s9
95 ; GFX9-NEXT: s_mov_b32 s8, s10
96 ; GFX9-NEXT: s_mov_b32 s9, s11
97 ; GFX9-NEXT: s_mov_b32 s10, s12
98 ; GFX9-NEXT: s_mov_b32 s11, s13
99 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
100 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
101 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
102 ; GFX9-NEXT: s_waitcnt vmcnt(0)
103 ; GFX9-NEXT: ; return to shader part epilog
105 ; GFX10NSA-LABEL: gather4_cube:
106 ; GFX10NSA: ; %bb.0: ; %main_body
107 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
108 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
109 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
110 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
111 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
112 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
113 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
114 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
115 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
116 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
117 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
118 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
119 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
120 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
121 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
122 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
123 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
124 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
125 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
126 ; GFX10NSA-NEXT: ; return to shader part epilog
128 ; GFX12-LABEL: gather4_cube:
129 ; GFX12: ; %bb.0: ; %main_body
130 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
131 ; GFX12-NEXT: s_mov_b32 s0, s2
132 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
133 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
134 ; GFX12-NEXT: s_mov_b32 s1, s3
135 ; GFX12-NEXT: s_mov_b32 s2, s4
136 ; GFX12-NEXT: s_mov_b32 s3, s5
137 ; GFX12-NEXT: s_mov_b32 s4, s6
138 ; GFX12-NEXT: s_mov_b32 s5, s7
139 ; GFX12-NEXT: s_mov_b32 s6, s8
140 ; GFX12-NEXT: s_mov_b32 s7, s9
141 ; GFX12-NEXT: s_mov_b32 s8, s10
142 ; GFX12-NEXT: s_mov_b32 s9, s11
143 ; GFX12-NEXT: s_mov_b32 s10, s12
144 ; GFX12-NEXT: s_mov_b32 s11, s13
145 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
146 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
147 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
148 ; GFX12-NEXT: s_wait_samplecnt 0x0
149 ; GFX12-NEXT: ; return to shader part epilog
151 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
155 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
156 ; GFX9-LABEL: gather4_2darray:
157 ; GFX9: ; %bb.0: ; %main_body
158 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
159 ; GFX9-NEXT: s_mov_b32 s0, s2
160 ; GFX9-NEXT: s_wqm_b64 exec, exec
161 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
162 ; GFX9-NEXT: s_mov_b32 s1, s3
163 ; GFX9-NEXT: s_mov_b32 s2, s4
164 ; GFX9-NEXT: s_mov_b32 s3, s5
165 ; GFX9-NEXT: s_mov_b32 s4, s6
166 ; GFX9-NEXT: s_mov_b32 s5, s7
167 ; GFX9-NEXT: s_mov_b32 s6, s8
168 ; GFX9-NEXT: s_mov_b32 s7, s9
169 ; GFX9-NEXT: s_mov_b32 s8, s10
170 ; GFX9-NEXT: s_mov_b32 s9, s11
171 ; GFX9-NEXT: s_mov_b32 s10, s12
172 ; GFX9-NEXT: s_mov_b32 s11, s13
173 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
174 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
175 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
176 ; GFX9-NEXT: s_waitcnt vmcnt(0)
177 ; GFX9-NEXT: ; return to shader part epilog
179 ; GFX10NSA-LABEL: gather4_2darray:
180 ; GFX10NSA: ; %bb.0: ; %main_body
181 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
182 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
183 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
184 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
185 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
186 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
187 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
188 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
189 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
190 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
191 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
192 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
193 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
194 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
195 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
196 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
197 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
198 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
199 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
200 ; GFX10NSA-NEXT: ; return to shader part epilog
202 ; GFX12-LABEL: gather4_2darray:
203 ; GFX12: ; %bb.0: ; %main_body
204 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
205 ; GFX12-NEXT: s_mov_b32 s0, s2
206 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
207 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
208 ; GFX12-NEXT: s_mov_b32 s1, s3
209 ; GFX12-NEXT: s_mov_b32 s2, s4
210 ; GFX12-NEXT: s_mov_b32 s3, s5
211 ; GFX12-NEXT: s_mov_b32 s4, s6
212 ; GFX12-NEXT: s_mov_b32 s5, s7
213 ; GFX12-NEXT: s_mov_b32 s6, s8
214 ; GFX12-NEXT: s_mov_b32 s7, s9
215 ; GFX12-NEXT: s_mov_b32 s8, s10
216 ; GFX12-NEXT: s_mov_b32 s9, s11
217 ; GFX12-NEXT: s_mov_b32 s10, s12
218 ; GFX12-NEXT: s_mov_b32 s11, s13
219 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
220 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
221 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
222 ; GFX12-NEXT: s_wait_samplecnt 0x0
223 ; GFX12-NEXT: ; return to shader part epilog
225 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
229 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
230 ; GFX9-LABEL: gather4_c_2d:
231 ; GFX9: ; %bb.0: ; %main_body
232 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
233 ; GFX9-NEXT: s_mov_b32 s0, s2
234 ; GFX9-NEXT: s_wqm_b64 exec, exec
235 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
236 ; GFX9-NEXT: s_mov_b32 s1, s3
237 ; GFX9-NEXT: s_mov_b32 s2, s4
238 ; GFX9-NEXT: s_mov_b32 s3, s5
239 ; GFX9-NEXT: s_mov_b32 s4, s6
240 ; GFX9-NEXT: s_mov_b32 s5, s7
241 ; GFX9-NEXT: s_mov_b32 s6, s8
242 ; GFX9-NEXT: s_mov_b32 s7, s9
243 ; GFX9-NEXT: s_mov_b32 s8, s10
244 ; GFX9-NEXT: s_mov_b32 s9, s11
245 ; GFX9-NEXT: s_mov_b32 s10, s12
246 ; GFX9-NEXT: s_mov_b32 s11, s13
247 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
248 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
249 ; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
250 ; GFX9-NEXT: s_waitcnt vmcnt(0)
251 ; GFX9-NEXT: ; return to shader part epilog
253 ; GFX10NSA-LABEL: gather4_c_2d:
254 ; GFX10NSA: ; %bb.0: ; %main_body
255 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
256 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
257 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
258 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
259 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
260 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
261 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
262 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
263 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
264 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
265 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
266 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
267 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
268 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
269 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
270 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
271 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
272 ; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
273 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
274 ; GFX10NSA-NEXT: ; return to shader part epilog
276 ; GFX12-LABEL: gather4_c_2d:
277 ; GFX12: ; %bb.0: ; %main_body
278 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
279 ; GFX12-NEXT: s_mov_b32 s0, s2
280 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
281 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
282 ; GFX12-NEXT: s_mov_b32 s1, s3
283 ; GFX12-NEXT: s_mov_b32 s2, s4
284 ; GFX12-NEXT: s_mov_b32 s3, s5
285 ; GFX12-NEXT: s_mov_b32 s4, s6
286 ; GFX12-NEXT: s_mov_b32 s5, s7
287 ; GFX12-NEXT: s_mov_b32 s6, s8
288 ; GFX12-NEXT: s_mov_b32 s7, s9
289 ; GFX12-NEXT: s_mov_b32 s8, s10
290 ; GFX12-NEXT: s_mov_b32 s9, s11
291 ; GFX12-NEXT: s_mov_b32 s10, s12
292 ; GFX12-NEXT: s_mov_b32 s11, s13
293 ; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1
294 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
295 ; GFX12-NEXT: image_gather4_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
296 ; GFX12-NEXT: s_wait_samplecnt 0x0
297 ; GFX12-NEXT: ; return to shader part epilog
299 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
303 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
304 ; GFX9-LABEL: gather4_cl_2d:
305 ; GFX9: ; %bb.0: ; %main_body
306 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
307 ; GFX9-NEXT: s_mov_b32 s0, s2
308 ; GFX9-NEXT: s_wqm_b64 exec, exec
309 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
310 ; GFX9-NEXT: s_mov_b32 s1, s3
311 ; GFX9-NEXT: s_mov_b32 s2, s4
312 ; GFX9-NEXT: s_mov_b32 s3, s5
313 ; GFX9-NEXT: s_mov_b32 s4, s6
314 ; GFX9-NEXT: s_mov_b32 s5, s7
315 ; GFX9-NEXT: s_mov_b32 s6, s8
316 ; GFX9-NEXT: s_mov_b32 s7, s9
317 ; GFX9-NEXT: s_mov_b32 s8, s10
318 ; GFX9-NEXT: s_mov_b32 s9, s11
319 ; GFX9-NEXT: s_mov_b32 s10, s12
320 ; GFX9-NEXT: s_mov_b32 s11, s13
321 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
322 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
323 ; GFX9-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
324 ; GFX9-NEXT: s_waitcnt vmcnt(0)
325 ; GFX9-NEXT: ; return to shader part epilog
327 ; GFX10NSA-LABEL: gather4_cl_2d:
328 ; GFX10NSA: ; %bb.0: ; %main_body
329 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
330 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
331 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
332 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
333 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
334 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
335 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
336 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
337 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
338 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
339 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
340 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
341 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
342 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
343 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
344 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
345 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
346 ; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
347 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
348 ; GFX10NSA-NEXT: ; return to shader part epilog
350 ; GFX12-LABEL: gather4_cl_2d:
351 ; GFX12: ; %bb.0: ; %main_body
352 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
353 ; GFX12-NEXT: s_mov_b32 s0, s2
354 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
355 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
356 ; GFX12-NEXT: s_mov_b32 s1, s3
357 ; GFX12-NEXT: s_mov_b32 s2, s4
358 ; GFX12-NEXT: s_mov_b32 s3, s5
359 ; GFX12-NEXT: s_mov_b32 s4, s6
360 ; GFX12-NEXT: s_mov_b32 s5, s7
361 ; GFX12-NEXT: s_mov_b32 s6, s8
362 ; GFX12-NEXT: s_mov_b32 s7, s9
363 ; GFX12-NEXT: s_mov_b32 s8, s10
364 ; GFX12-NEXT: s_mov_b32 s9, s11
365 ; GFX12-NEXT: s_mov_b32 s10, s12
366 ; GFX12-NEXT: s_mov_b32 s11, s13
367 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
368 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
369 ; GFX12-NEXT: image_gather4_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
370 ; GFX12-NEXT: s_wait_samplecnt 0x0
371 ; GFX12-NEXT: ; return to shader part epilog
373 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
377 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
378 ; GFX9-LABEL: gather4_c_cl_2d:
379 ; GFX9: ; %bb.0: ; %main_body
380 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
381 ; GFX9-NEXT: s_mov_b32 s0, s2
382 ; GFX9-NEXT: s_wqm_b64 exec, exec
383 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
384 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
385 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
386 ; GFX9-NEXT: s_mov_b32 s1, s3
387 ; GFX9-NEXT: s_mov_b32 s2, s4
388 ; GFX9-NEXT: s_mov_b32 s3, s5
389 ; GFX9-NEXT: s_mov_b32 s4, s6
390 ; GFX9-NEXT: s_mov_b32 s5, s7
391 ; GFX9-NEXT: s_mov_b32 s6, s8
392 ; GFX9-NEXT: s_mov_b32 s7, s9
393 ; GFX9-NEXT: s_mov_b32 s8, s10
394 ; GFX9-NEXT: s_mov_b32 s9, s11
395 ; GFX9-NEXT: s_mov_b32 s10, s12
396 ; GFX9-NEXT: s_mov_b32 s11, s13
397 ; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
398 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
399 ; GFX9-NEXT: image_gather4_c_cl v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
400 ; GFX9-NEXT: s_waitcnt vmcnt(0)
401 ; GFX9-NEXT: ; return to shader part epilog
403 ; GFX10NSA-LABEL: gather4_c_cl_2d:
404 ; GFX10NSA: ; %bb.0: ; %main_body
405 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
406 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
407 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
408 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
409 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
410 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
411 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
412 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
413 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
414 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
415 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
416 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
417 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
418 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
419 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
420 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
421 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
422 ; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
423 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
424 ; GFX10NSA-NEXT: ; return to shader part epilog
426 ; GFX12-LABEL: gather4_c_cl_2d:
427 ; GFX12: ; %bb.0: ; %main_body
428 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
429 ; GFX12-NEXT: s_mov_b32 s0, s2
430 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
431 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
432 ; GFX12-NEXT: s_mov_b32 s1, s3
433 ; GFX12-NEXT: s_mov_b32 s2, s4
434 ; GFX12-NEXT: s_mov_b32 s3, s5
435 ; GFX12-NEXT: s_mov_b32 s4, s6
436 ; GFX12-NEXT: s_mov_b32 s5, s7
437 ; GFX12-NEXT: s_mov_b32 s6, s8
438 ; GFX12-NEXT: s_mov_b32 s7, s9
439 ; GFX12-NEXT: s_mov_b32 s8, s10
440 ; GFX12-NEXT: s_mov_b32 s9, s11
441 ; GFX12-NEXT: s_mov_b32 s10, s12
442 ; GFX12-NEXT: s_mov_b32 s11, s13
443 ; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1
444 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
445 ; GFX12-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
446 ; GFX12-NEXT: s_wait_samplecnt 0x0
447 ; GFX12-NEXT: ; return to shader part epilog
449 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
453 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
454 ; GFX9-LABEL: gather4_b_2d:
455 ; GFX9: ; %bb.0: ; %main_body
456 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
457 ; GFX9-NEXT: s_mov_b32 s0, s2
458 ; GFX9-NEXT: s_wqm_b64 exec, exec
459 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
460 ; GFX9-NEXT: s_mov_b32 s1, s3
461 ; GFX9-NEXT: s_mov_b32 s2, s4
462 ; GFX9-NEXT: s_mov_b32 s3, s5
463 ; GFX9-NEXT: s_mov_b32 s4, s6
464 ; GFX9-NEXT: s_mov_b32 s5, s7
465 ; GFX9-NEXT: s_mov_b32 s6, s8
466 ; GFX9-NEXT: s_mov_b32 s7, s9
467 ; GFX9-NEXT: s_mov_b32 s8, s10
468 ; GFX9-NEXT: s_mov_b32 s9, s11
469 ; GFX9-NEXT: s_mov_b32 s10, s12
470 ; GFX9-NEXT: s_mov_b32 s11, s13
471 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
472 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
473 ; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
474 ; GFX9-NEXT: s_waitcnt vmcnt(0)
475 ; GFX9-NEXT: ; return to shader part epilog
477 ; GFX10NSA-LABEL: gather4_b_2d:
478 ; GFX10NSA: ; %bb.0: ; %main_body
479 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
480 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
481 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
482 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
483 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
484 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
485 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
486 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
487 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
488 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
489 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
490 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
491 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
492 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
493 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
494 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
495 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
496 ; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
497 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
498 ; GFX10NSA-NEXT: ; return to shader part epilog
500 ; GFX12-LABEL: gather4_b_2d:
501 ; GFX12: ; %bb.0: ; %main_body
502 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
503 ; GFX12-NEXT: s_mov_b32 s0, s2
504 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
505 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
506 ; GFX12-NEXT: s_mov_b32 s1, s3
507 ; GFX12-NEXT: s_mov_b32 s2, s4
508 ; GFX12-NEXT: s_mov_b32 s3, s5
509 ; GFX12-NEXT: s_mov_b32 s4, s6
510 ; GFX12-NEXT: s_mov_b32 s5, s7
511 ; GFX12-NEXT: s_mov_b32 s6, s8
512 ; GFX12-NEXT: s_mov_b32 s7, s9
513 ; GFX12-NEXT: s_mov_b32 s8, s10
514 ; GFX12-NEXT: s_mov_b32 s9, s11
515 ; GFX12-NEXT: s_mov_b32 s10, s12
516 ; GFX12-NEXT: s_mov_b32 s11, s13
517 ; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1
518 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
519 ; GFX12-NEXT: image_gather4_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
520 ; GFX12-NEXT: s_wait_samplecnt 0x0
521 ; GFX12-NEXT: ; return to shader part epilog
523 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
527 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
528 ; GFX9-LABEL: gather4_c_b_2d:
529 ; GFX9: ; %bb.0: ; %main_body
530 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
531 ; GFX9-NEXT: s_mov_b32 s0, s2
532 ; GFX9-NEXT: s_wqm_b64 exec, exec
533 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
534 ; GFX9-NEXT: s_mov_b32 s1, s3
535 ; GFX9-NEXT: s_mov_b32 s2, s4
536 ; GFX9-NEXT: s_mov_b32 s3, s5
537 ; GFX9-NEXT: s_mov_b32 s4, s6
538 ; GFX9-NEXT: s_mov_b32 s5, s7
539 ; GFX9-NEXT: s_mov_b32 s6, s8
540 ; GFX9-NEXT: s_mov_b32 s7, s9
541 ; GFX9-NEXT: s_mov_b32 s8, s10
542 ; GFX9-NEXT: s_mov_b32 s9, s11
543 ; GFX9-NEXT: s_mov_b32 s10, s12
544 ; GFX9-NEXT: s_mov_b32 s11, s13
545 ; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
546 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
547 ; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
548 ; GFX9-NEXT: s_waitcnt vmcnt(0)
549 ; GFX9-NEXT: ; return to shader part epilog
551 ; GFX10NSA-LABEL: gather4_c_b_2d:
552 ; GFX10NSA: ; %bb.0: ; %main_body
553 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
554 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
555 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
556 ; GFX10NSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
557 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
558 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
559 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
560 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
561 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
562 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
563 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
564 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
565 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
566 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
567 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
568 ; GFX10NSA-NEXT: v_lshl_or_b32 v2, v3, 16, v2
569 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
570 ; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
571 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
572 ; GFX10NSA-NEXT: ; return to shader part epilog
574 ; GFX12-LABEL: gather4_c_b_2d:
575 ; GFX12: ; %bb.0: ; %main_body
576 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
577 ; GFX12-NEXT: s_mov_b32 s0, s2
578 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
579 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
580 ; GFX12-NEXT: s_mov_b32 s1, s3
581 ; GFX12-NEXT: s_mov_b32 s2, s4
582 ; GFX12-NEXT: s_mov_b32 s3, s5
583 ; GFX12-NEXT: s_mov_b32 s4, s6
584 ; GFX12-NEXT: s_mov_b32 s5, s7
585 ; GFX12-NEXT: s_mov_b32 s6, s8
586 ; GFX12-NEXT: s_mov_b32 s7, s9
587 ; GFX12-NEXT: s_mov_b32 s8, s10
588 ; GFX12-NEXT: s_mov_b32 s9, s11
589 ; GFX12-NEXT: s_mov_b32 s10, s12
590 ; GFX12-NEXT: s_mov_b32 s11, s13
591 ; GFX12-NEXT: v_lshl_or_b32 v2, v3, 16, v2
592 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
593 ; GFX12-NEXT: image_gather4_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
594 ; GFX12-NEXT: s_wait_samplecnt 0x0
595 ; GFX12-NEXT: ; return to shader part epilog
597 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
601 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
602 ; GFX9-LABEL: gather4_b_cl_2d:
603 ; GFX9: ; %bb.0: ; %main_body
604 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
605 ; GFX9-NEXT: s_mov_b32 s0, s2
606 ; GFX9-NEXT: s_wqm_b64 exec, exec
607 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
608 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
609 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
610 ; GFX9-NEXT: s_mov_b32 s1, s3
611 ; GFX9-NEXT: s_mov_b32 s2, s4
612 ; GFX9-NEXT: s_mov_b32 s3, s5
613 ; GFX9-NEXT: s_mov_b32 s4, s6
614 ; GFX9-NEXT: s_mov_b32 s5, s7
615 ; GFX9-NEXT: s_mov_b32 s6, s8
616 ; GFX9-NEXT: s_mov_b32 s7, s9
617 ; GFX9-NEXT: s_mov_b32 s8, s10
618 ; GFX9-NEXT: s_mov_b32 s9, s11
619 ; GFX9-NEXT: s_mov_b32 s10, s12
620 ; GFX9-NEXT: s_mov_b32 s11, s13
621 ; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
622 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
623 ; GFX9-NEXT: image_gather4_b_cl v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
624 ; GFX9-NEXT: s_waitcnt vmcnt(0)
625 ; GFX9-NEXT: ; return to shader part epilog
627 ; GFX10NSA-LABEL: gather4_b_cl_2d:
628 ; GFX10NSA: ; %bb.0: ; %main_body
629 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
630 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
631 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
632 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
633 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
634 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
635 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
636 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
637 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
638 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
639 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
640 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
641 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
642 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
643 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
644 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
645 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
646 ; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
647 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
648 ; GFX10NSA-NEXT: ; return to shader part epilog
650 ; GFX12-LABEL: gather4_b_cl_2d:
651 ; GFX12: ; %bb.0: ; %main_body
652 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
653 ; GFX12-NEXT: s_mov_b32 s0, s2
654 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
655 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
656 ; GFX12-NEXT: s_mov_b32 s1, s3
657 ; GFX12-NEXT: s_mov_b32 s2, s4
658 ; GFX12-NEXT: s_mov_b32 s3, s5
659 ; GFX12-NEXT: s_mov_b32 s4, s6
660 ; GFX12-NEXT: s_mov_b32 s5, s7
661 ; GFX12-NEXT: s_mov_b32 s6, s8
662 ; GFX12-NEXT: s_mov_b32 s7, s9
663 ; GFX12-NEXT: s_mov_b32 s8, s10
664 ; GFX12-NEXT: s_mov_b32 s9, s11
665 ; GFX12-NEXT: s_mov_b32 s10, s12
666 ; GFX12-NEXT: s_mov_b32 s11, s13
667 ; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1
668 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
669 ; GFX12-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
670 ; GFX12-NEXT: s_wait_samplecnt 0x0
671 ; GFX12-NEXT: ; return to shader part epilog
673 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
677 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
678 ; GFX9-LABEL: gather4_c_b_cl_2d:
679 ; GFX9: ; %bb.0: ; %main_body
680 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
681 ; GFX9-NEXT: s_mov_b32 s0, s2
682 ; GFX9-NEXT: s_wqm_b64 exec, exec
683 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
684 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
685 ; GFX9-NEXT: s_mov_b32 s1, s3
686 ; GFX9-NEXT: s_mov_b32 s2, s4
687 ; GFX9-NEXT: s_mov_b32 s3, s5
688 ; GFX9-NEXT: s_mov_b32 s4, s6
689 ; GFX9-NEXT: s_mov_b32 s5, s7
690 ; GFX9-NEXT: s_mov_b32 s6, s8
691 ; GFX9-NEXT: s_mov_b32 s7, s9
692 ; GFX9-NEXT: s_mov_b32 s8, s10
693 ; GFX9-NEXT: s_mov_b32 s9, s11
694 ; GFX9-NEXT: s_mov_b32 s10, s12
695 ; GFX9-NEXT: s_mov_b32 s11, s13
696 ; GFX9-NEXT: v_mov_b32_e32 v3, v4
697 ; GFX9-NEXT: v_lshl_or_b32 v2, v5, 16, v2
698 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
699 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16
700 ; GFX9-NEXT: s_waitcnt vmcnt(0)
701 ; GFX9-NEXT: ; return to shader part epilog
703 ; GFX10NSA-LABEL: gather4_c_b_cl_2d:
704 ; GFX10NSA: ; %bb.0: ; %main_body
705 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
706 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
707 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
708 ; GFX10NSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
709 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
710 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
711 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
712 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
713 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
714 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
715 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
716 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
717 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
718 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
719 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
720 ; GFX10NSA-NEXT: v_lshl_or_b32 v2, v3, 16, v2
721 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
722 ; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
723 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
724 ; GFX10NSA-NEXT: ; return to shader part epilog
726 ; GFX12-LABEL: gather4_c_b_cl_2d:
727 ; GFX12: ; %bb.0: ; %main_body
728 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
729 ; GFX12-NEXT: s_mov_b32 s0, s2
730 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
731 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
732 ; GFX12-NEXT: s_mov_b32 s1, s3
733 ; GFX12-NEXT: s_mov_b32 s2, s4
734 ; GFX12-NEXT: s_mov_b32 s3, s5
735 ; GFX12-NEXT: s_mov_b32 s4, s6
736 ; GFX12-NEXT: s_mov_b32 s5, s7
737 ; GFX12-NEXT: s_mov_b32 s6, s8
738 ; GFX12-NEXT: s_mov_b32 s7, s9
739 ; GFX12-NEXT: s_mov_b32 s8, s10
740 ; GFX12-NEXT: s_mov_b32 s9, s11
741 ; GFX12-NEXT: s_mov_b32 s10, s12
742 ; GFX12-NEXT: s_mov_b32 s11, s13
743 ; GFX12-NEXT: v_lshl_or_b32 v2, v3, 16, v2
744 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
745 ; GFX12-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
746 ; GFX12-NEXT: s_wait_samplecnt 0x0
747 ; GFX12-NEXT: ; return to shader part epilog
749 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
753 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
754 ; GFX9-LABEL: gather4_l_2d:
755 ; GFX9: ; %bb.0: ; %main_body
756 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
757 ; GFX9-NEXT: s_mov_b32 s0, s2
758 ; GFX9-NEXT: s_mov_b32 s1, s3
759 ; GFX9-NEXT: s_mov_b32 s2, s4
760 ; GFX9-NEXT: s_mov_b32 s3, s5
761 ; GFX9-NEXT: s_mov_b32 s4, s6
762 ; GFX9-NEXT: s_mov_b32 s5, s7
763 ; GFX9-NEXT: s_mov_b32 s6, s8
764 ; GFX9-NEXT: s_mov_b32 s7, s9
765 ; GFX9-NEXT: s_mov_b32 s8, s10
766 ; GFX9-NEXT: s_mov_b32 s9, s11
767 ; GFX9-NEXT: s_mov_b32 s10, s12
768 ; GFX9-NEXT: s_mov_b32 s11, s13
769 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
770 ; GFX9-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
771 ; GFX9-NEXT: s_waitcnt vmcnt(0)
772 ; GFX9-NEXT: ; return to shader part epilog
774 ; GFX10NSA-LABEL: gather4_l_2d:
775 ; GFX10NSA: ; %bb.0: ; %main_body
776 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
777 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
778 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
779 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
780 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
781 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
782 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
783 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
784 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
785 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
786 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
787 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
788 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
789 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
790 ; GFX10NSA-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
791 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
792 ; GFX10NSA-NEXT: ; return to shader part epilog
794 ; GFX12-LABEL: gather4_l_2d:
795 ; GFX12: ; %bb.0: ; %main_body
796 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
797 ; GFX12-NEXT: s_mov_b32 s0, s2
798 ; GFX12-NEXT: s_mov_b32 s1, s3
799 ; GFX12-NEXT: s_mov_b32 s2, s4
800 ; GFX12-NEXT: s_mov_b32 s3, s5
801 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
802 ; GFX12-NEXT: s_mov_b32 s4, s6
803 ; GFX12-NEXT: s_mov_b32 s5, s7
804 ; GFX12-NEXT: s_mov_b32 s6, s8
805 ; GFX12-NEXT: s_mov_b32 s7, s9
806 ; GFX12-NEXT: s_mov_b32 s8, s10
807 ; GFX12-NEXT: s_mov_b32 s9, s11
808 ; GFX12-NEXT: s_mov_b32 s10, s12
809 ; GFX12-NEXT: s_mov_b32 s11, s13
810 ; GFX12-NEXT: image_gather4_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
811 ; GFX12-NEXT: s_wait_samplecnt 0x0
812 ; GFX12-NEXT: ; return to shader part epilog
814 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
818 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
819 ; GFX9-LABEL: gather4_c_l_2d:
820 ; GFX9: ; %bb.0: ; %main_body
821 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
822 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
823 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
824 ; GFX9-NEXT: s_mov_b32 s0, s2
825 ; GFX9-NEXT: s_mov_b32 s1, s3
826 ; GFX9-NEXT: s_mov_b32 s2, s4
827 ; GFX9-NEXT: s_mov_b32 s3, s5
828 ; GFX9-NEXT: s_mov_b32 s4, s6
829 ; GFX9-NEXT: s_mov_b32 s5, s7
830 ; GFX9-NEXT: s_mov_b32 s6, s8
831 ; GFX9-NEXT: s_mov_b32 s7, s9
832 ; GFX9-NEXT: s_mov_b32 s8, s10
833 ; GFX9-NEXT: s_mov_b32 s9, s11
834 ; GFX9-NEXT: s_mov_b32 s10, s12
835 ; GFX9-NEXT: s_mov_b32 s11, s13
836 ; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
837 ; GFX9-NEXT: image_gather4_c_l v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
838 ; GFX9-NEXT: s_waitcnt vmcnt(0)
839 ; GFX9-NEXT: ; return to shader part epilog
841 ; GFX10NSA-LABEL: gather4_c_l_2d:
842 ; GFX10NSA: ; %bb.0: ; %main_body
843 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
844 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
845 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
846 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
847 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
848 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
849 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
850 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
851 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
852 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
853 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
854 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
855 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
856 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
857 ; GFX10NSA-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
858 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
859 ; GFX10NSA-NEXT: ; return to shader part epilog
861 ; GFX12-LABEL: gather4_c_l_2d:
862 ; GFX12: ; %bb.0: ; %main_body
863 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
864 ; GFX12-NEXT: s_mov_b32 s0, s2
865 ; GFX12-NEXT: s_mov_b32 s1, s3
866 ; GFX12-NEXT: s_mov_b32 s2, s4
867 ; GFX12-NEXT: s_mov_b32 s3, s5
868 ; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1
869 ; GFX12-NEXT: s_mov_b32 s4, s6
870 ; GFX12-NEXT: s_mov_b32 s5, s7
871 ; GFX12-NEXT: s_mov_b32 s6, s8
872 ; GFX12-NEXT: s_mov_b32 s7, s9
873 ; GFX12-NEXT: s_mov_b32 s8, s10
874 ; GFX12-NEXT: s_mov_b32 s9, s11
875 ; GFX12-NEXT: s_mov_b32 s10, s12
876 ; GFX12-NEXT: s_mov_b32 s11, s13
877 ; GFX12-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
878 ; GFX12-NEXT: s_wait_samplecnt 0x0
879 ; GFX12-NEXT: ; return to shader part epilog
881 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
885 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
886 ; GFX9-LABEL: gather4_lz_2d:
887 ; GFX9: ; %bb.0: ; %main_body
888 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
889 ; GFX9-NEXT: s_mov_b32 s0, s2
890 ; GFX9-NEXT: s_mov_b32 s1, s3
891 ; GFX9-NEXT: s_mov_b32 s2, s4
892 ; GFX9-NEXT: s_mov_b32 s3, s5
893 ; GFX9-NEXT: s_mov_b32 s4, s6
894 ; GFX9-NEXT: s_mov_b32 s5, s7
895 ; GFX9-NEXT: s_mov_b32 s6, s8
896 ; GFX9-NEXT: s_mov_b32 s7, s9
897 ; GFX9-NEXT: s_mov_b32 s8, s10
898 ; GFX9-NEXT: s_mov_b32 s9, s11
899 ; GFX9-NEXT: s_mov_b32 s10, s12
900 ; GFX9-NEXT: s_mov_b32 s11, s13
901 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
902 ; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
903 ; GFX9-NEXT: s_waitcnt vmcnt(0)
904 ; GFX9-NEXT: ; return to shader part epilog
906 ; GFX10NSA-LABEL: gather4_lz_2d:
907 ; GFX10NSA: ; %bb.0: ; %main_body
908 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
909 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
910 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
911 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
912 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
913 ; GFX10NSA-NEXT: v_lshl_or_b32 v0, v1, 16, v0
914 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
915 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
916 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
917 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
918 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
919 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
920 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
921 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
922 ; GFX10NSA-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
923 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
924 ; GFX10NSA-NEXT: ; return to shader part epilog
926 ; GFX12-LABEL: gather4_lz_2d:
927 ; GFX12: ; %bb.0: ; %main_body
928 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
929 ; GFX12-NEXT: s_mov_b32 s0, s2
930 ; GFX12-NEXT: s_mov_b32 s1, s3
931 ; GFX12-NEXT: s_mov_b32 s2, s4
932 ; GFX12-NEXT: s_mov_b32 s3, s5
933 ; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
934 ; GFX12-NEXT: s_mov_b32 s4, s6
935 ; GFX12-NEXT: s_mov_b32 s5, s7
936 ; GFX12-NEXT: s_mov_b32 s6, s8
937 ; GFX12-NEXT: s_mov_b32 s7, s9
938 ; GFX12-NEXT: s_mov_b32 s8, s10
939 ; GFX12-NEXT: s_mov_b32 s9, s11
940 ; GFX12-NEXT: s_mov_b32 s10, s12
941 ; GFX12-NEXT: s_mov_b32 s11, s13
942 ; GFX12-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
943 ; GFX12-NEXT: s_wait_samplecnt 0x0
944 ; GFX12-NEXT: ; return to shader part epilog
946 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
950 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
951 ; GFX9-LABEL: gather4_c_lz_2d:
952 ; GFX9: ; %bb.0: ; %main_body
953 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
954 ; GFX9-NEXT: s_mov_b32 s0, s2
955 ; GFX9-NEXT: s_mov_b32 s1, s3
956 ; GFX9-NEXT: s_mov_b32 s2, s4
957 ; GFX9-NEXT: s_mov_b32 s3, s5
958 ; GFX9-NEXT: s_mov_b32 s4, s6
959 ; GFX9-NEXT: s_mov_b32 s5, s7
960 ; GFX9-NEXT: s_mov_b32 s6, s8
961 ; GFX9-NEXT: s_mov_b32 s7, s9
962 ; GFX9-NEXT: s_mov_b32 s8, s10
963 ; GFX9-NEXT: s_mov_b32 s9, s11
964 ; GFX9-NEXT: s_mov_b32 s10, s12
965 ; GFX9-NEXT: s_mov_b32 s11, s13
966 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
967 ; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
968 ; GFX9-NEXT: s_waitcnt vmcnt(0)
969 ; GFX9-NEXT: ; return to shader part epilog
971 ; GFX10NSA-LABEL: gather4_c_lz_2d:
972 ; GFX10NSA: ; %bb.0: ; %main_body
973 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
974 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
975 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
976 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
977 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
978 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
979 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
980 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
981 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
982 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
983 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
984 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
985 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
986 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
987 ; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
988 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
989 ; GFX10NSA-NEXT: ; return to shader part epilog
991 ; GFX12-LABEL: gather4_c_lz_2d:
992 ; GFX12: ; %bb.0: ; %main_body
993 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
994 ; GFX12-NEXT: s_mov_b32 s0, s2
995 ; GFX12-NEXT: s_mov_b32 s1, s3
996 ; GFX12-NEXT: s_mov_b32 s2, s4
997 ; GFX12-NEXT: s_mov_b32 s3, s5
998 ; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1
999 ; GFX12-NEXT: s_mov_b32 s4, s6
1000 ; GFX12-NEXT: s_mov_b32 s5, s7
1001 ; GFX12-NEXT: s_mov_b32 s6, s8
1002 ; GFX12-NEXT: s_mov_b32 s7, s9
1003 ; GFX12-NEXT: s_mov_b32 s8, s10
1004 ; GFX12-NEXT: s_mov_b32 s9, s11
1005 ; GFX12-NEXT: s_mov_b32 s10, s12
1006 ; GFX12-NEXT: s_mov_b32 s11, s13
1007 ; GFX12-NEXT: image_gather4_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
1008 ; GFX12-NEXT: s_wait_samplecnt 0x0
1009 ; GFX12-NEXT: ; return to shader part epilog
1011 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
1015 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1016 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1017 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1018 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1019 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1020 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1021 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 immarg, half, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1022 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1023 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1024 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1025 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1026 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1027 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1028 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1030 attributes #0 = { nounwind readonly }