1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -o - %s | FileCheck -check-prefix=GFX10NSA %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -o - %s | FileCheck -check-prefix=GFX12 %s
7 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
8 ; GFX6-LABEL: gather4_2d:
9 ; GFX6: ; %bb.0: ; %main_body
10 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
11 ; GFX6-NEXT: s_mov_b32 s0, s2
12 ; GFX6-NEXT: s_mov_b32 s1, s3
13 ; GFX6-NEXT: s_mov_b32 s2, s4
14 ; GFX6-NEXT: s_mov_b32 s3, s5
15 ; GFX6-NEXT: s_mov_b32 s4, s6
16 ; GFX6-NEXT: s_mov_b32 s5, s7
17 ; GFX6-NEXT: s_mov_b32 s6, s8
18 ; GFX6-NEXT: s_mov_b32 s7, s9
19 ; GFX6-NEXT: s_mov_b32 s8, s10
20 ; GFX6-NEXT: s_mov_b32 s9, s11
21 ; GFX6-NEXT: s_mov_b32 s10, s12
22 ; GFX6-NEXT: s_mov_b32 s11, s13
23 ; GFX6-NEXT: s_wqm_b64 exec, exec
24 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
25 ; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1
26 ; GFX6-NEXT: s_waitcnt vmcnt(0)
27 ; GFX6-NEXT: ; return to shader part epilog
29 ; GFX10NSA-LABEL: gather4_2d:
30 ; GFX10NSA: ; %bb.0: ; %main_body
31 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
32 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
33 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
34 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
35 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
36 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
37 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
38 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
39 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
40 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
41 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
42 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
43 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
44 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
45 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
46 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
47 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
48 ; GFX10NSA-NEXT: ; return to shader part epilog
50 ; GFX12-LABEL: gather4_2d:
51 ; GFX12: ; %bb.0: ; %main_body
52 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
53 ; GFX12-NEXT: s_mov_b32 s0, s2
54 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
55 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
56 ; GFX12-NEXT: s_mov_b32 s1, s3
57 ; GFX12-NEXT: s_mov_b32 s2, s4
58 ; GFX12-NEXT: s_mov_b32 s3, s5
59 ; GFX12-NEXT: s_mov_b32 s4, s6
60 ; GFX12-NEXT: s_mov_b32 s5, s7
61 ; GFX12-NEXT: s_mov_b32 s6, s8
62 ; GFX12-NEXT: s_mov_b32 s7, s9
63 ; GFX12-NEXT: s_mov_b32 s8, s10
64 ; GFX12-NEXT: s_mov_b32 s9, s11
65 ; GFX12-NEXT: s_mov_b32 s10, s12
66 ; GFX12-NEXT: s_mov_b32 s11, s13
67 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
68 ; GFX12-NEXT: s_wait_samplecnt 0x0
69 ; GFX12-NEXT: ; return to shader part epilog
71 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
75 define amdgpu_ps <4 x float> @gather4_2d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
76 ; GFX6-LABEL: gather4_2d_tfe:
77 ; GFX6: ; %bb.0: ; %main_body
78 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
79 ; GFX6-NEXT: s_mov_b32 s0, s2
80 ; GFX6-NEXT: s_mov_b32 s1, s3
81 ; GFX6-NEXT: s_mov_b32 s2, s4
82 ; GFX6-NEXT: s_mov_b32 s3, s5
83 ; GFX6-NEXT: s_mov_b32 s4, s6
84 ; GFX6-NEXT: s_mov_b32 s5, s7
85 ; GFX6-NEXT: s_mov_b32 s6, s8
86 ; GFX6-NEXT: s_mov_b32 s7, s9
87 ; GFX6-NEXT: s_mov_b32 s8, s10
88 ; GFX6-NEXT: s_mov_b32 s9, s11
89 ; GFX6-NEXT: s_mov_b32 s10, s12
90 ; GFX6-NEXT: s_mov_b32 s11, s13
91 ; GFX6-NEXT: s_wqm_b64 exec, exec
92 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
93 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
94 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
95 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
96 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
97 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
98 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
99 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
100 ; GFX6-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 tfe
101 ; GFX6-NEXT: s_waitcnt vmcnt(0)
102 ; GFX6-NEXT: ; return to shader part epilog
104 ; GFX10NSA-LABEL: gather4_2d_tfe:
105 ; GFX10NSA: ; %bb.0: ; %main_body
106 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
107 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
108 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
109 ; GFX10NSA-NEXT: v_mov_b32_e32 v5, v0
110 ; GFX10NSA-NEXT: v_mov_b32_e32 v0, 0
111 ; GFX10NSA-NEXT: v_mov_b32_e32 v6, v1
112 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
113 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
114 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
115 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
116 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
117 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
118 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
119 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
120 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
121 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
122 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
123 ; GFX10NSA-NEXT: v_mov_b32_e32 v1, v0
124 ; GFX10NSA-NEXT: v_mov_b32_e32 v2, v0
125 ; GFX10NSA-NEXT: v_mov_b32_e32 v3, v0
126 ; GFX10NSA-NEXT: v_mov_b32_e32 v4, v0
127 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
128 ; GFX10NSA-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
129 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
130 ; GFX10NSA-NEXT: ; return to shader part epilog
132 ; GFX12-LABEL: gather4_2d_tfe:
133 ; GFX12: ; %bb.0: ; %main_body
134 ; GFX12-NEXT: s_mov_b32 s14, exec_lo
135 ; GFX12-NEXT: s_mov_b32 s0, s2
136 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
137 ; GFX12-NEXT: v_mov_b32_e32 v5, v0
138 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
139 ; GFX12-NEXT: v_mov_b32_e32 v6, v1
140 ; GFX12-NEXT: s_mov_b32 s1, s3
141 ; GFX12-NEXT: s_mov_b32 s2, s4
142 ; GFX12-NEXT: s_mov_b32 s3, s5
143 ; GFX12-NEXT: s_mov_b32 s4, s6
144 ; GFX12-NEXT: s_mov_b32 s5, s7
145 ; GFX12-NEXT: s_mov_b32 s6, s8
146 ; GFX12-NEXT: s_mov_b32 s7, s9
147 ; GFX12-NEXT: s_mov_b32 s8, s10
148 ; GFX12-NEXT: s_mov_b32 s9, s11
149 ; GFX12-NEXT: s_mov_b32 s10, s12
150 ; GFX12-NEXT: s_mov_b32 s11, s13
151 ; GFX12-NEXT: v_mov_b32_e32 v1, v0
152 ; GFX12-NEXT: v_mov_b32_e32 v2, v0
153 ; GFX12-NEXT: v_mov_b32_e32 v3, v0
154 ; GFX12-NEXT: v_mov_b32_e32 v4, v0
155 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
156 ; GFX12-NEXT: image_gather4 v[0:4], [v5, v6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
157 ; GFX12-NEXT: s_wait_samplecnt 0x0
158 ; GFX12-NEXT: ; return to shader part epilog
160 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
161 %r = extractvalue { <4 x float>, i32 } %v, 0
165 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
166 ; GFX6-LABEL: gather4_cube:
167 ; GFX6: ; %bb.0: ; %main_body
168 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
169 ; GFX6-NEXT: s_mov_b32 s0, s2
170 ; GFX6-NEXT: s_mov_b32 s1, s3
171 ; GFX6-NEXT: s_mov_b32 s2, s4
172 ; GFX6-NEXT: s_mov_b32 s3, s5
173 ; GFX6-NEXT: s_mov_b32 s4, s6
174 ; GFX6-NEXT: s_mov_b32 s5, s7
175 ; GFX6-NEXT: s_mov_b32 s6, s8
176 ; GFX6-NEXT: s_mov_b32 s7, s9
177 ; GFX6-NEXT: s_mov_b32 s8, s10
178 ; GFX6-NEXT: s_mov_b32 s9, s11
179 ; GFX6-NEXT: s_mov_b32 s10, s12
180 ; GFX6-NEXT: s_mov_b32 s11, s13
181 ; GFX6-NEXT: s_wqm_b64 exec, exec
182 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
183 ; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da
184 ; GFX6-NEXT: s_waitcnt vmcnt(0)
185 ; GFX6-NEXT: ; return to shader part epilog
187 ; GFX10NSA-LABEL: gather4_cube:
188 ; GFX10NSA: ; %bb.0: ; %main_body
189 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
190 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
191 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
192 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
193 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
194 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
195 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
196 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
197 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
198 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
199 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
200 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
201 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
202 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
203 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
204 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE
205 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
206 ; GFX10NSA-NEXT: ; return to shader part epilog
208 ; GFX12-LABEL: gather4_cube:
209 ; GFX12: ; %bb.0: ; %main_body
210 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
211 ; GFX12-NEXT: s_mov_b32 s0, s2
212 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
213 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
214 ; GFX12-NEXT: s_mov_b32 s1, s3
215 ; GFX12-NEXT: s_mov_b32 s2, s4
216 ; GFX12-NEXT: s_mov_b32 s3, s5
217 ; GFX12-NEXT: s_mov_b32 s4, s6
218 ; GFX12-NEXT: s_mov_b32 s5, s7
219 ; GFX12-NEXT: s_mov_b32 s6, s8
220 ; GFX12-NEXT: s_mov_b32 s7, s9
221 ; GFX12-NEXT: s_mov_b32 s8, s10
222 ; GFX12-NEXT: s_mov_b32 s9, s11
223 ; GFX12-NEXT: s_mov_b32 s10, s12
224 ; GFX12-NEXT: s_mov_b32 s11, s13
225 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE
226 ; GFX12-NEXT: s_wait_samplecnt 0x0
227 ; GFX12-NEXT: ; return to shader part epilog
229 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
233 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
234 ; GFX6-LABEL: gather4_2darray:
235 ; GFX6: ; %bb.0: ; %main_body
236 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
237 ; GFX6-NEXT: s_mov_b32 s0, s2
238 ; GFX6-NEXT: s_mov_b32 s1, s3
239 ; GFX6-NEXT: s_mov_b32 s2, s4
240 ; GFX6-NEXT: s_mov_b32 s3, s5
241 ; GFX6-NEXT: s_mov_b32 s4, s6
242 ; GFX6-NEXT: s_mov_b32 s5, s7
243 ; GFX6-NEXT: s_mov_b32 s6, s8
244 ; GFX6-NEXT: s_mov_b32 s7, s9
245 ; GFX6-NEXT: s_mov_b32 s8, s10
246 ; GFX6-NEXT: s_mov_b32 s9, s11
247 ; GFX6-NEXT: s_mov_b32 s10, s12
248 ; GFX6-NEXT: s_mov_b32 s11, s13
249 ; GFX6-NEXT: s_wqm_b64 exec, exec
250 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
251 ; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da
252 ; GFX6-NEXT: s_waitcnt vmcnt(0)
253 ; GFX6-NEXT: ; return to shader part epilog
255 ; GFX10NSA-LABEL: gather4_2darray:
256 ; GFX10NSA: ; %bb.0: ; %main_body
257 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
258 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
259 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
260 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
261 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
262 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
263 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
264 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
265 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
266 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
267 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
268 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
269 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
270 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
271 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
272 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
273 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
274 ; GFX10NSA-NEXT: ; return to shader part epilog
276 ; GFX12-LABEL: gather4_2darray:
277 ; GFX12: ; %bb.0: ; %main_body
278 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
279 ; GFX12-NEXT: s_mov_b32 s0, s2
280 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
281 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
282 ; GFX12-NEXT: s_mov_b32 s1, s3
283 ; GFX12-NEXT: s_mov_b32 s2, s4
284 ; GFX12-NEXT: s_mov_b32 s3, s5
285 ; GFX12-NEXT: s_mov_b32 s4, s6
286 ; GFX12-NEXT: s_mov_b32 s5, s7
287 ; GFX12-NEXT: s_mov_b32 s6, s8
288 ; GFX12-NEXT: s_mov_b32 s7, s9
289 ; GFX12-NEXT: s_mov_b32 s8, s10
290 ; GFX12-NEXT: s_mov_b32 s9, s11
291 ; GFX12-NEXT: s_mov_b32 s10, s12
292 ; GFX12-NEXT: s_mov_b32 s11, s13
293 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
294 ; GFX12-NEXT: s_wait_samplecnt 0x0
295 ; GFX12-NEXT: ; return to shader part epilog
297 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
301 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
302 ; GFX6-LABEL: gather4_c_2d:
303 ; GFX6: ; %bb.0: ; %main_body
304 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
305 ; GFX6-NEXT: s_mov_b32 s0, s2
306 ; GFX6-NEXT: s_mov_b32 s1, s3
307 ; GFX6-NEXT: s_mov_b32 s2, s4
308 ; GFX6-NEXT: s_mov_b32 s3, s5
309 ; GFX6-NEXT: s_mov_b32 s4, s6
310 ; GFX6-NEXT: s_mov_b32 s5, s7
311 ; GFX6-NEXT: s_mov_b32 s6, s8
312 ; GFX6-NEXT: s_mov_b32 s7, s9
313 ; GFX6-NEXT: s_mov_b32 s8, s10
314 ; GFX6-NEXT: s_mov_b32 s9, s11
315 ; GFX6-NEXT: s_mov_b32 s10, s12
316 ; GFX6-NEXT: s_mov_b32 s11, s13
317 ; GFX6-NEXT: s_wqm_b64 exec, exec
318 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
319 ; GFX6-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
320 ; GFX6-NEXT: s_waitcnt vmcnt(0)
321 ; GFX6-NEXT: ; return to shader part epilog
323 ; GFX10NSA-LABEL: gather4_c_2d:
324 ; GFX10NSA: ; %bb.0: ; %main_body
325 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
326 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
327 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
328 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
329 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
330 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
331 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
332 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
333 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
334 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
335 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
336 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
337 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
338 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
339 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
340 ; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
341 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
342 ; GFX10NSA-NEXT: ; return to shader part epilog
344 ; GFX12-LABEL: gather4_c_2d:
345 ; GFX12: ; %bb.0: ; %main_body
346 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
347 ; GFX12-NEXT: s_mov_b32 s0, s2
348 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
349 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
350 ; GFX12-NEXT: s_mov_b32 s1, s3
351 ; GFX12-NEXT: s_mov_b32 s2, s4
352 ; GFX12-NEXT: s_mov_b32 s3, s5
353 ; GFX12-NEXT: s_mov_b32 s4, s6
354 ; GFX12-NEXT: s_mov_b32 s5, s7
355 ; GFX12-NEXT: s_mov_b32 s6, s8
356 ; GFX12-NEXT: s_mov_b32 s7, s9
357 ; GFX12-NEXT: s_mov_b32 s8, s10
358 ; GFX12-NEXT: s_mov_b32 s9, s11
359 ; GFX12-NEXT: s_mov_b32 s10, s12
360 ; GFX12-NEXT: s_mov_b32 s11, s13
361 ; GFX12-NEXT: image_gather4_c v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
362 ; GFX12-NEXT: s_wait_samplecnt 0x0
363 ; GFX12-NEXT: ; return to shader part epilog
365 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
369 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
370 ; GFX6-LABEL: gather4_cl_2d:
371 ; GFX6: ; %bb.0: ; %main_body
372 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
373 ; GFX6-NEXT: s_mov_b32 s0, s2
374 ; GFX6-NEXT: s_mov_b32 s1, s3
375 ; GFX6-NEXT: s_mov_b32 s2, s4
376 ; GFX6-NEXT: s_mov_b32 s3, s5
377 ; GFX6-NEXT: s_mov_b32 s4, s6
378 ; GFX6-NEXT: s_mov_b32 s5, s7
379 ; GFX6-NEXT: s_mov_b32 s6, s8
380 ; GFX6-NEXT: s_mov_b32 s7, s9
381 ; GFX6-NEXT: s_mov_b32 s8, s10
382 ; GFX6-NEXT: s_mov_b32 s9, s11
383 ; GFX6-NEXT: s_mov_b32 s10, s12
384 ; GFX6-NEXT: s_mov_b32 s11, s13
385 ; GFX6-NEXT: s_wqm_b64 exec, exec
386 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
387 ; GFX6-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
388 ; GFX6-NEXT: s_waitcnt vmcnt(0)
389 ; GFX6-NEXT: ; return to shader part epilog
391 ; GFX10NSA-LABEL: gather4_cl_2d:
392 ; GFX10NSA: ; %bb.0: ; %main_body
393 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
394 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
395 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
396 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
397 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
398 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
399 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
400 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
401 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
402 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
403 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
404 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
405 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
406 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
407 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
408 ; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
409 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
410 ; GFX10NSA-NEXT: ; return to shader part epilog
412 ; GFX12-LABEL: gather4_cl_2d:
413 ; GFX12: ; %bb.0: ; %main_body
414 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
415 ; GFX12-NEXT: s_mov_b32 s0, s2
416 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
417 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
418 ; GFX12-NEXT: s_mov_b32 s1, s3
419 ; GFX12-NEXT: s_mov_b32 s2, s4
420 ; GFX12-NEXT: s_mov_b32 s3, s5
421 ; GFX12-NEXT: s_mov_b32 s4, s6
422 ; GFX12-NEXT: s_mov_b32 s5, s7
423 ; GFX12-NEXT: s_mov_b32 s6, s8
424 ; GFX12-NEXT: s_mov_b32 s7, s9
425 ; GFX12-NEXT: s_mov_b32 s8, s10
426 ; GFX12-NEXT: s_mov_b32 s9, s11
427 ; GFX12-NEXT: s_mov_b32 s10, s12
428 ; GFX12-NEXT: s_mov_b32 s11, s13
429 ; GFX12-NEXT: image_gather4_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
430 ; GFX12-NEXT: s_wait_samplecnt 0x0
431 ; GFX12-NEXT: ; return to shader part epilog
433 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
437 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
438 ; GFX6-LABEL: gather4_c_cl_2d:
439 ; GFX6: ; %bb.0: ; %main_body
440 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
441 ; GFX6-NEXT: s_mov_b32 s0, s2
442 ; GFX6-NEXT: s_mov_b32 s1, s3
443 ; GFX6-NEXT: s_mov_b32 s2, s4
444 ; GFX6-NEXT: s_mov_b32 s3, s5
445 ; GFX6-NEXT: s_mov_b32 s4, s6
446 ; GFX6-NEXT: s_mov_b32 s5, s7
447 ; GFX6-NEXT: s_mov_b32 s6, s8
448 ; GFX6-NEXT: s_mov_b32 s7, s9
449 ; GFX6-NEXT: s_mov_b32 s8, s10
450 ; GFX6-NEXT: s_mov_b32 s9, s11
451 ; GFX6-NEXT: s_mov_b32 s10, s12
452 ; GFX6-NEXT: s_mov_b32 s11, s13
453 ; GFX6-NEXT: s_wqm_b64 exec, exec
454 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
455 ; GFX6-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
456 ; GFX6-NEXT: s_waitcnt vmcnt(0)
457 ; GFX6-NEXT: ; return to shader part epilog
459 ; GFX10NSA-LABEL: gather4_c_cl_2d:
460 ; GFX10NSA: ; %bb.0: ; %main_body
461 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
462 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
463 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
464 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
465 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
466 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
467 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
468 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
469 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
470 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
471 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
472 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
473 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
474 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
475 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
476 ; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
477 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
478 ; GFX10NSA-NEXT: ; return to shader part epilog
480 ; GFX12-LABEL: gather4_c_cl_2d:
481 ; GFX12: ; %bb.0: ; %main_body
482 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
483 ; GFX12-NEXT: s_mov_b32 s0, s2
484 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
485 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
486 ; GFX12-NEXT: s_mov_b32 s1, s3
487 ; GFX12-NEXT: s_mov_b32 s2, s4
488 ; GFX12-NEXT: s_mov_b32 s3, s5
489 ; GFX12-NEXT: s_mov_b32 s4, s6
490 ; GFX12-NEXT: s_mov_b32 s5, s7
491 ; GFX12-NEXT: s_mov_b32 s6, s8
492 ; GFX12-NEXT: s_mov_b32 s7, s9
493 ; GFX12-NEXT: s_mov_b32 s8, s10
494 ; GFX12-NEXT: s_mov_b32 s9, s11
495 ; GFX12-NEXT: s_mov_b32 s10, s12
496 ; GFX12-NEXT: s_mov_b32 s11, s13
497 ; GFX12-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
498 ; GFX12-NEXT: s_wait_samplecnt 0x0
499 ; GFX12-NEXT: ; return to shader part epilog
501 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
505 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
506 ; GFX6-LABEL: gather4_b_2d:
507 ; GFX6: ; %bb.0: ; %main_body
508 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
509 ; GFX6-NEXT: s_mov_b32 s0, s2
510 ; GFX6-NEXT: s_mov_b32 s1, s3
511 ; GFX6-NEXT: s_mov_b32 s2, s4
512 ; GFX6-NEXT: s_mov_b32 s3, s5
513 ; GFX6-NEXT: s_mov_b32 s4, s6
514 ; GFX6-NEXT: s_mov_b32 s5, s7
515 ; GFX6-NEXT: s_mov_b32 s6, s8
516 ; GFX6-NEXT: s_mov_b32 s7, s9
517 ; GFX6-NEXT: s_mov_b32 s8, s10
518 ; GFX6-NEXT: s_mov_b32 s9, s11
519 ; GFX6-NEXT: s_mov_b32 s10, s12
520 ; GFX6-NEXT: s_mov_b32 s11, s13
521 ; GFX6-NEXT: s_wqm_b64 exec, exec
522 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
523 ; GFX6-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
524 ; GFX6-NEXT: s_waitcnt vmcnt(0)
525 ; GFX6-NEXT: ; return to shader part epilog
527 ; GFX10NSA-LABEL: gather4_b_2d:
528 ; GFX10NSA: ; %bb.0: ; %main_body
529 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
530 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
531 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
532 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
533 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
534 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
535 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
536 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
537 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
538 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
539 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
540 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
541 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
542 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
543 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
544 ; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
545 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
546 ; GFX10NSA-NEXT: ; return to shader part epilog
548 ; GFX12-LABEL: gather4_b_2d:
549 ; GFX12: ; %bb.0: ; %main_body
550 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
551 ; GFX12-NEXT: s_mov_b32 s0, s2
552 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
553 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
554 ; GFX12-NEXT: s_mov_b32 s1, s3
555 ; GFX12-NEXT: s_mov_b32 s2, s4
556 ; GFX12-NEXT: s_mov_b32 s3, s5
557 ; GFX12-NEXT: s_mov_b32 s4, s6
558 ; GFX12-NEXT: s_mov_b32 s5, s7
559 ; GFX12-NEXT: s_mov_b32 s6, s8
560 ; GFX12-NEXT: s_mov_b32 s7, s9
561 ; GFX12-NEXT: s_mov_b32 s8, s10
562 ; GFX12-NEXT: s_mov_b32 s9, s11
563 ; GFX12-NEXT: s_mov_b32 s10, s12
564 ; GFX12-NEXT: s_mov_b32 s11, s13
565 ; GFX12-NEXT: image_gather4_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
566 ; GFX12-NEXT: s_wait_samplecnt 0x0
567 ; GFX12-NEXT: ; return to shader part epilog
569 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
573 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
574 ; GFX6-LABEL: gather4_c_b_2d:
575 ; GFX6: ; %bb.0: ; %main_body
576 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
577 ; GFX6-NEXT: s_mov_b32 s0, s2
578 ; GFX6-NEXT: s_mov_b32 s1, s3
579 ; GFX6-NEXT: s_mov_b32 s2, s4
580 ; GFX6-NEXT: s_mov_b32 s3, s5
581 ; GFX6-NEXT: s_mov_b32 s4, s6
582 ; GFX6-NEXT: s_mov_b32 s5, s7
583 ; GFX6-NEXT: s_mov_b32 s6, s8
584 ; GFX6-NEXT: s_mov_b32 s7, s9
585 ; GFX6-NEXT: s_mov_b32 s8, s10
586 ; GFX6-NEXT: s_mov_b32 s9, s11
587 ; GFX6-NEXT: s_mov_b32 s10, s12
588 ; GFX6-NEXT: s_mov_b32 s11, s13
589 ; GFX6-NEXT: s_wqm_b64 exec, exec
590 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
591 ; GFX6-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
592 ; GFX6-NEXT: s_waitcnt vmcnt(0)
593 ; GFX6-NEXT: ; return to shader part epilog
595 ; GFX10NSA-LABEL: gather4_c_b_2d:
596 ; GFX10NSA: ; %bb.0: ; %main_body
597 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
598 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
599 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
600 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
601 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
602 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
603 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
604 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
605 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
606 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
607 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
608 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
609 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
610 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
611 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
612 ; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
613 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
614 ; GFX10NSA-NEXT: ; return to shader part epilog
616 ; GFX12-LABEL: gather4_c_b_2d:
617 ; GFX12: ; %bb.0: ; %main_body
618 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
619 ; GFX12-NEXT: s_mov_b32 s0, s2
620 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
621 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
622 ; GFX12-NEXT: s_mov_b32 s1, s3
623 ; GFX12-NEXT: s_mov_b32 s2, s4
624 ; GFX12-NEXT: s_mov_b32 s3, s5
625 ; GFX12-NEXT: s_mov_b32 s4, s6
626 ; GFX12-NEXT: s_mov_b32 s5, s7
627 ; GFX12-NEXT: s_mov_b32 s6, s8
628 ; GFX12-NEXT: s_mov_b32 s7, s9
629 ; GFX12-NEXT: s_mov_b32 s8, s10
630 ; GFX12-NEXT: s_mov_b32 s9, s11
631 ; GFX12-NEXT: s_mov_b32 s10, s12
632 ; GFX12-NEXT: s_mov_b32 s11, s13
633 ; GFX12-NEXT: image_gather4_c_b v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
634 ; GFX12-NEXT: s_wait_samplecnt 0x0
635 ; GFX12-NEXT: ; return to shader part epilog
637 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
641 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
642 ; GFX6-LABEL: gather4_b_cl_2d:
643 ; GFX6: ; %bb.0: ; %main_body
644 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
645 ; GFX6-NEXT: s_mov_b32 s0, s2
646 ; GFX6-NEXT: s_mov_b32 s1, s3
647 ; GFX6-NEXT: s_mov_b32 s2, s4
648 ; GFX6-NEXT: s_mov_b32 s3, s5
649 ; GFX6-NEXT: s_mov_b32 s4, s6
650 ; GFX6-NEXT: s_mov_b32 s5, s7
651 ; GFX6-NEXT: s_mov_b32 s6, s8
652 ; GFX6-NEXT: s_mov_b32 s7, s9
653 ; GFX6-NEXT: s_mov_b32 s8, s10
654 ; GFX6-NEXT: s_mov_b32 s9, s11
655 ; GFX6-NEXT: s_mov_b32 s10, s12
656 ; GFX6-NEXT: s_mov_b32 s11, s13
657 ; GFX6-NEXT: s_wqm_b64 exec, exec
658 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
659 ; GFX6-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
660 ; GFX6-NEXT: s_waitcnt vmcnt(0)
661 ; GFX6-NEXT: ; return to shader part epilog
663 ; GFX10NSA-LABEL: gather4_b_cl_2d:
664 ; GFX10NSA: ; %bb.0: ; %main_body
665 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
666 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
667 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
668 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
669 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
670 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
671 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
672 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
673 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
674 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
675 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
676 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
677 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
678 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
679 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
680 ; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
681 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
682 ; GFX10NSA-NEXT: ; return to shader part epilog
684 ; GFX12-LABEL: gather4_b_cl_2d:
685 ; GFX12: ; %bb.0: ; %main_body
686 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
687 ; GFX12-NEXT: s_mov_b32 s0, s2
688 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
689 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
690 ; GFX12-NEXT: s_mov_b32 s1, s3
691 ; GFX12-NEXT: s_mov_b32 s2, s4
692 ; GFX12-NEXT: s_mov_b32 s3, s5
693 ; GFX12-NEXT: s_mov_b32 s4, s6
694 ; GFX12-NEXT: s_mov_b32 s5, s7
695 ; GFX12-NEXT: s_mov_b32 s6, s8
696 ; GFX12-NEXT: s_mov_b32 s7, s9
697 ; GFX12-NEXT: s_mov_b32 s8, s10
698 ; GFX12-NEXT: s_mov_b32 s9, s11
699 ; GFX12-NEXT: s_mov_b32 s10, s12
700 ; GFX12-NEXT: s_mov_b32 s11, s13
701 ; GFX12-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
702 ; GFX12-NEXT: s_wait_samplecnt 0x0
703 ; GFX12-NEXT: ; return to shader part epilog
705 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
709 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
710 ; GFX6-LABEL: gather4_c_b_cl_2d:
711 ; GFX6: ; %bb.0: ; %main_body
712 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
713 ; GFX6-NEXT: s_mov_b32 s0, s2
714 ; GFX6-NEXT: s_mov_b32 s1, s3
715 ; GFX6-NEXT: s_mov_b32 s2, s4
716 ; GFX6-NEXT: s_mov_b32 s3, s5
717 ; GFX6-NEXT: s_mov_b32 s4, s6
718 ; GFX6-NEXT: s_mov_b32 s5, s7
719 ; GFX6-NEXT: s_mov_b32 s6, s8
720 ; GFX6-NEXT: s_mov_b32 s7, s9
721 ; GFX6-NEXT: s_mov_b32 s8, s10
722 ; GFX6-NEXT: s_mov_b32 s9, s11
723 ; GFX6-NEXT: s_mov_b32 s10, s12
724 ; GFX6-NEXT: s_mov_b32 s11, s13
725 ; GFX6-NEXT: s_wqm_b64 exec, exec
726 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
727 ; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
728 ; GFX6-NEXT: s_waitcnt vmcnt(0)
729 ; GFX6-NEXT: ; return to shader part epilog
731 ; GFX10NSA-LABEL: gather4_c_b_cl_2d:
732 ; GFX10NSA: ; %bb.0: ; %main_body
733 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
734 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
735 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
736 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
737 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
738 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
739 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
740 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
741 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
742 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
743 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
744 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
745 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
746 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
747 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
748 ; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
749 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
750 ; GFX10NSA-NEXT: ; return to shader part epilog
752 ; GFX12-LABEL: gather4_c_b_cl_2d:
753 ; GFX12: ; %bb.0: ; %main_body
754 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
755 ; GFX12-NEXT: s_mov_b32 s0, s2
756 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
757 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
758 ; GFX12-NEXT: s_mov_b32 s1, s3
759 ; GFX12-NEXT: s_mov_b32 s2, s4
760 ; GFX12-NEXT: s_mov_b32 s3, s5
761 ; GFX12-NEXT: s_mov_b32 s4, s6
762 ; GFX12-NEXT: s_mov_b32 s5, s7
763 ; GFX12-NEXT: s_mov_b32 s6, s8
764 ; GFX12-NEXT: s_mov_b32 s7, s9
765 ; GFX12-NEXT: s_mov_b32 s8, s10
766 ; GFX12-NEXT: s_mov_b32 s9, s11
767 ; GFX12-NEXT: s_mov_b32 s10, s12
768 ; GFX12-NEXT: s_mov_b32 s11, s13
769 ; GFX12-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
770 ; GFX12-NEXT: s_wait_samplecnt 0x0
771 ; GFX12-NEXT: ; return to shader part epilog
773 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
777 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
778 ; GFX6-LABEL: gather4_l_2d:
779 ; GFX6: ; %bb.0: ; %main_body
780 ; GFX6-NEXT: s_mov_b32 s0, s2
781 ; GFX6-NEXT: s_mov_b32 s1, s3
782 ; GFX6-NEXT: s_mov_b32 s2, s4
783 ; GFX6-NEXT: s_mov_b32 s3, s5
784 ; GFX6-NEXT: s_mov_b32 s4, s6
785 ; GFX6-NEXT: s_mov_b32 s5, s7
786 ; GFX6-NEXT: s_mov_b32 s6, s8
787 ; GFX6-NEXT: s_mov_b32 s7, s9
788 ; GFX6-NEXT: s_mov_b32 s8, s10
789 ; GFX6-NEXT: s_mov_b32 s9, s11
790 ; GFX6-NEXT: s_mov_b32 s10, s12
791 ; GFX6-NEXT: s_mov_b32 s11, s13
792 ; GFX6-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
793 ; GFX6-NEXT: s_waitcnt vmcnt(0)
794 ; GFX6-NEXT: ; return to shader part epilog
796 ; GFX10NSA-LABEL: gather4_l_2d:
797 ; GFX10NSA: ; %bb.0: ; %main_body
798 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
799 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
800 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
801 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
802 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
803 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
804 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
805 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
806 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
807 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
808 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
809 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
810 ; GFX10NSA-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
811 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
812 ; GFX10NSA-NEXT: ; return to shader part epilog
814 ; GFX12-LABEL: gather4_l_2d:
815 ; GFX12: ; %bb.0: ; %main_body
816 ; GFX12-NEXT: s_mov_b32 s0, s2
817 ; GFX12-NEXT: s_mov_b32 s1, s3
818 ; GFX12-NEXT: s_mov_b32 s2, s4
819 ; GFX12-NEXT: s_mov_b32 s3, s5
820 ; GFX12-NEXT: s_mov_b32 s4, s6
821 ; GFX12-NEXT: s_mov_b32 s5, s7
822 ; GFX12-NEXT: s_mov_b32 s6, s8
823 ; GFX12-NEXT: s_mov_b32 s7, s9
824 ; GFX12-NEXT: s_mov_b32 s8, s10
825 ; GFX12-NEXT: s_mov_b32 s9, s11
826 ; GFX12-NEXT: s_mov_b32 s10, s12
827 ; GFX12-NEXT: s_mov_b32 s11, s13
828 ; GFX12-NEXT: image_gather4_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
829 ; GFX12-NEXT: s_wait_samplecnt 0x0
830 ; GFX12-NEXT: ; return to shader part epilog
832 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
836 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
837 ; GFX6-LABEL: gather4_c_l_2d:
838 ; GFX6: ; %bb.0: ; %main_body
839 ; GFX6-NEXT: s_mov_b32 s0, s2
840 ; GFX6-NEXT: s_mov_b32 s1, s3
841 ; GFX6-NEXT: s_mov_b32 s2, s4
842 ; GFX6-NEXT: s_mov_b32 s3, s5
843 ; GFX6-NEXT: s_mov_b32 s4, s6
844 ; GFX6-NEXT: s_mov_b32 s5, s7
845 ; GFX6-NEXT: s_mov_b32 s6, s8
846 ; GFX6-NEXT: s_mov_b32 s7, s9
847 ; GFX6-NEXT: s_mov_b32 s8, s10
848 ; GFX6-NEXT: s_mov_b32 s9, s11
849 ; GFX6-NEXT: s_mov_b32 s10, s12
850 ; GFX6-NEXT: s_mov_b32 s11, s13
851 ; GFX6-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
852 ; GFX6-NEXT: s_waitcnt vmcnt(0)
853 ; GFX6-NEXT: ; return to shader part epilog
855 ; GFX10NSA-LABEL: gather4_c_l_2d:
856 ; GFX10NSA: ; %bb.0: ; %main_body
857 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
858 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
859 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
860 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
861 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
862 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
863 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
864 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
865 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
866 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
867 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
868 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
869 ; GFX10NSA-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
870 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
871 ; GFX10NSA-NEXT: ; return to shader part epilog
873 ; GFX12-LABEL: gather4_c_l_2d:
874 ; GFX12: ; %bb.0: ; %main_body
875 ; GFX12-NEXT: s_mov_b32 s0, s2
876 ; GFX12-NEXT: s_mov_b32 s1, s3
877 ; GFX12-NEXT: s_mov_b32 s2, s4
878 ; GFX12-NEXT: s_mov_b32 s3, s5
879 ; GFX12-NEXT: s_mov_b32 s4, s6
880 ; GFX12-NEXT: s_mov_b32 s5, s7
881 ; GFX12-NEXT: s_mov_b32 s6, s8
882 ; GFX12-NEXT: s_mov_b32 s7, s9
883 ; GFX12-NEXT: s_mov_b32 s8, s10
884 ; GFX12-NEXT: s_mov_b32 s9, s11
885 ; GFX12-NEXT: s_mov_b32 s10, s12
886 ; GFX12-NEXT: s_mov_b32 s11, s13
887 ; GFX12-NEXT: image_gather4_c_l v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
888 ; GFX12-NEXT: s_wait_samplecnt 0x0
889 ; GFX12-NEXT: ; return to shader part epilog
891 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
895 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
896 ; GFX6-LABEL: gather4_lz_2d:
897 ; GFX6: ; %bb.0: ; %main_body
898 ; GFX6-NEXT: s_mov_b32 s0, s2
899 ; GFX6-NEXT: s_mov_b32 s1, s3
900 ; GFX6-NEXT: s_mov_b32 s2, s4
901 ; GFX6-NEXT: s_mov_b32 s3, s5
902 ; GFX6-NEXT: s_mov_b32 s4, s6
903 ; GFX6-NEXT: s_mov_b32 s5, s7
904 ; GFX6-NEXT: s_mov_b32 s6, s8
905 ; GFX6-NEXT: s_mov_b32 s7, s9
906 ; GFX6-NEXT: s_mov_b32 s8, s10
907 ; GFX6-NEXT: s_mov_b32 s9, s11
908 ; GFX6-NEXT: s_mov_b32 s10, s12
909 ; GFX6-NEXT: s_mov_b32 s11, s13
910 ; GFX6-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1
911 ; GFX6-NEXT: s_waitcnt vmcnt(0)
912 ; GFX6-NEXT: ; return to shader part epilog
914 ; GFX10NSA-LABEL: gather4_lz_2d:
915 ; GFX10NSA: ; %bb.0: ; %main_body
916 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
917 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
918 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
919 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
920 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
921 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
922 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
923 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
924 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
925 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
926 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
927 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
928 ; GFX10NSA-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
929 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
930 ; GFX10NSA-NEXT: ; return to shader part epilog
932 ; GFX12-LABEL: gather4_lz_2d:
933 ; GFX12: ; %bb.0: ; %main_body
934 ; GFX12-NEXT: s_mov_b32 s0, s2
935 ; GFX12-NEXT: s_mov_b32 s1, s3
936 ; GFX12-NEXT: s_mov_b32 s2, s4
937 ; GFX12-NEXT: s_mov_b32 s3, s5
938 ; GFX12-NEXT: s_mov_b32 s4, s6
939 ; GFX12-NEXT: s_mov_b32 s5, s7
940 ; GFX12-NEXT: s_mov_b32 s6, s8
941 ; GFX12-NEXT: s_mov_b32 s7, s9
942 ; GFX12-NEXT: s_mov_b32 s8, s10
943 ; GFX12-NEXT: s_mov_b32 s9, s11
944 ; GFX12-NEXT: s_mov_b32 s10, s12
945 ; GFX12-NEXT: s_mov_b32 s11, s13
946 ; GFX12-NEXT: image_gather4_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
947 ; GFX12-NEXT: s_wait_samplecnt 0x0
948 ; GFX12-NEXT: ; return to shader part epilog
950 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
954 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
955 ; GFX6-LABEL: gather4_c_lz_2d:
956 ; GFX6: ; %bb.0: ; %main_body
957 ; GFX6-NEXT: s_mov_b32 s0, s2
958 ; GFX6-NEXT: s_mov_b32 s1, s3
959 ; GFX6-NEXT: s_mov_b32 s2, s4
960 ; GFX6-NEXT: s_mov_b32 s3, s5
961 ; GFX6-NEXT: s_mov_b32 s4, s6
962 ; GFX6-NEXT: s_mov_b32 s5, s7
963 ; GFX6-NEXT: s_mov_b32 s6, s8
964 ; GFX6-NEXT: s_mov_b32 s7, s9
965 ; GFX6-NEXT: s_mov_b32 s8, s10
966 ; GFX6-NEXT: s_mov_b32 s9, s11
967 ; GFX6-NEXT: s_mov_b32 s10, s12
968 ; GFX6-NEXT: s_mov_b32 s11, s13
969 ; GFX6-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
970 ; GFX6-NEXT: s_waitcnt vmcnt(0)
971 ; GFX6-NEXT: ; return to shader part epilog
973 ; GFX10NSA-LABEL: gather4_c_lz_2d:
974 ; GFX10NSA: ; %bb.0: ; %main_body
975 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
976 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
977 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
978 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
979 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
980 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
981 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
982 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
983 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
984 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
985 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
986 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
987 ; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
988 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
989 ; GFX10NSA-NEXT: ; return to shader part epilog
991 ; GFX12-LABEL: gather4_c_lz_2d:
992 ; GFX12: ; %bb.0: ; %main_body
993 ; GFX12-NEXT: s_mov_b32 s0, s2
994 ; GFX12-NEXT: s_mov_b32 s1, s3
995 ; GFX12-NEXT: s_mov_b32 s2, s4
996 ; GFX12-NEXT: s_mov_b32 s3, s5
997 ; GFX12-NEXT: s_mov_b32 s4, s6
998 ; GFX12-NEXT: s_mov_b32 s5, s7
999 ; GFX12-NEXT: s_mov_b32 s6, s8
1000 ; GFX12-NEXT: s_mov_b32 s7, s9
1001 ; GFX12-NEXT: s_mov_b32 s8, s10
1002 ; GFX12-NEXT: s_mov_b32 s9, s11
1003 ; GFX12-NEXT: s_mov_b32 s10, s12
1004 ; GFX12-NEXT: s_mov_b32 s11, s13
1005 ; GFX12-NEXT: image_gather4_c_lz v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
1006 ; GFX12-NEXT: s_wait_samplecnt 0x0
1007 ; GFX12-NEXT: ; return to shader part epilog
1009 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
1013 define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1014 ; GFX6-LABEL: gather4_2d_dmask_2:
1015 ; GFX6: ; %bb.0: ; %main_body
1016 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
1017 ; GFX6-NEXT: s_mov_b32 s0, s2
1018 ; GFX6-NEXT: s_mov_b32 s1, s3
1019 ; GFX6-NEXT: s_mov_b32 s2, s4
1020 ; GFX6-NEXT: s_mov_b32 s3, s5
1021 ; GFX6-NEXT: s_mov_b32 s4, s6
1022 ; GFX6-NEXT: s_mov_b32 s5, s7
1023 ; GFX6-NEXT: s_mov_b32 s6, s8
1024 ; GFX6-NEXT: s_mov_b32 s7, s9
1025 ; GFX6-NEXT: s_mov_b32 s8, s10
1026 ; GFX6-NEXT: s_mov_b32 s9, s11
1027 ; GFX6-NEXT: s_mov_b32 s10, s12
1028 ; GFX6-NEXT: s_mov_b32 s11, s13
1029 ; GFX6-NEXT: s_wqm_b64 exec, exec
1030 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
1031 ; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2
1032 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1033 ; GFX6-NEXT: ; return to shader part epilog
1035 ; GFX10NSA-LABEL: gather4_2d_dmask_2:
1036 ; GFX10NSA: ; %bb.0: ; %main_body
1037 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
1038 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
1039 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
1040 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
1041 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
1042 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
1043 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
1044 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
1045 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
1046 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
1047 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
1048 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
1049 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
1050 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
1051 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
1052 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D
1053 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
1054 ; GFX10NSA-NEXT: ; return to shader part epilog
1056 ; GFX12-LABEL: gather4_2d_dmask_2:
1057 ; GFX12: ; %bb.0: ; %main_body
1058 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
1059 ; GFX12-NEXT: s_mov_b32 s0, s2
1060 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
1061 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
1062 ; GFX12-NEXT: s_mov_b32 s1, s3
1063 ; GFX12-NEXT: s_mov_b32 s2, s4
1064 ; GFX12-NEXT: s_mov_b32 s3, s5
1065 ; GFX12-NEXT: s_mov_b32 s4, s6
1066 ; GFX12-NEXT: s_mov_b32 s5, s7
1067 ; GFX12-NEXT: s_mov_b32 s6, s8
1068 ; GFX12-NEXT: s_mov_b32 s7, s9
1069 ; GFX12-NEXT: s_mov_b32 s8, s10
1070 ; GFX12-NEXT: s_mov_b32 s9, s11
1071 ; GFX12-NEXT: s_mov_b32 s10, s12
1072 ; GFX12-NEXT: s_mov_b32 s11, s13
1073 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D
1074 ; GFX12-NEXT: s_wait_samplecnt 0x0
1075 ; GFX12-NEXT: ; return to shader part epilog
1077 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
1081 define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1082 ; GFX6-LABEL: gather4_2d_dmask_4:
1083 ; GFX6: ; %bb.0: ; %main_body
1084 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
1085 ; GFX6-NEXT: s_mov_b32 s0, s2
1086 ; GFX6-NEXT: s_mov_b32 s1, s3
1087 ; GFX6-NEXT: s_mov_b32 s2, s4
1088 ; GFX6-NEXT: s_mov_b32 s3, s5
1089 ; GFX6-NEXT: s_mov_b32 s4, s6
1090 ; GFX6-NEXT: s_mov_b32 s5, s7
1091 ; GFX6-NEXT: s_mov_b32 s6, s8
1092 ; GFX6-NEXT: s_mov_b32 s7, s9
1093 ; GFX6-NEXT: s_mov_b32 s8, s10
1094 ; GFX6-NEXT: s_mov_b32 s9, s11
1095 ; GFX6-NEXT: s_mov_b32 s10, s12
1096 ; GFX6-NEXT: s_mov_b32 s11, s13
1097 ; GFX6-NEXT: s_wqm_b64 exec, exec
1098 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
1099 ; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4
1100 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1101 ; GFX6-NEXT: ; return to shader part epilog
1103 ; GFX10NSA-LABEL: gather4_2d_dmask_4:
1104 ; GFX10NSA: ; %bb.0: ; %main_body
1105 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
1106 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
1107 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
1108 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
1109 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
1110 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
1111 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
1112 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
1113 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
1114 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
1115 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
1116 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
1117 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
1118 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
1119 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
1120 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D
1121 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
1122 ; GFX10NSA-NEXT: ; return to shader part epilog
1124 ; GFX12-LABEL: gather4_2d_dmask_4:
1125 ; GFX12: ; %bb.0: ; %main_body
1126 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
1127 ; GFX12-NEXT: s_mov_b32 s0, s2
1128 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
1129 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
1130 ; GFX12-NEXT: s_mov_b32 s1, s3
1131 ; GFX12-NEXT: s_mov_b32 s2, s4
1132 ; GFX12-NEXT: s_mov_b32 s3, s5
1133 ; GFX12-NEXT: s_mov_b32 s4, s6
1134 ; GFX12-NEXT: s_mov_b32 s5, s7
1135 ; GFX12-NEXT: s_mov_b32 s6, s8
1136 ; GFX12-NEXT: s_mov_b32 s7, s9
1137 ; GFX12-NEXT: s_mov_b32 s8, s10
1138 ; GFX12-NEXT: s_mov_b32 s9, s11
1139 ; GFX12-NEXT: s_mov_b32 s10, s12
1140 ; GFX12-NEXT: s_mov_b32 s11, s13
1141 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D
1142 ; GFX12-NEXT: s_wait_samplecnt 0x0
1143 ; GFX12-NEXT: ; return to shader part epilog
1145 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
1149 define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1150 ; GFX6-LABEL: gather4_2d_dmask_8:
1151 ; GFX6: ; %bb.0: ; %main_body
1152 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
1153 ; GFX6-NEXT: s_mov_b32 s0, s2
1154 ; GFX6-NEXT: s_mov_b32 s1, s3
1155 ; GFX6-NEXT: s_mov_b32 s2, s4
1156 ; GFX6-NEXT: s_mov_b32 s3, s5
1157 ; GFX6-NEXT: s_mov_b32 s4, s6
1158 ; GFX6-NEXT: s_mov_b32 s5, s7
1159 ; GFX6-NEXT: s_mov_b32 s6, s8
1160 ; GFX6-NEXT: s_mov_b32 s7, s9
1161 ; GFX6-NEXT: s_mov_b32 s8, s10
1162 ; GFX6-NEXT: s_mov_b32 s9, s11
1163 ; GFX6-NEXT: s_mov_b32 s10, s12
1164 ; GFX6-NEXT: s_mov_b32 s11, s13
1165 ; GFX6-NEXT: s_wqm_b64 exec, exec
1166 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
1167 ; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8
1168 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1169 ; GFX6-NEXT: ; return to shader part epilog
1171 ; GFX10NSA-LABEL: gather4_2d_dmask_8:
1172 ; GFX10NSA: ; %bb.0: ; %main_body
1173 ; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
1174 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
1175 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
1176 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1
1177 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
1178 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
1179 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
1180 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
1181 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
1182 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
1183 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
1184 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
1185 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
1186 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
1187 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
1188 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D
1189 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
1190 ; GFX10NSA-NEXT: ; return to shader part epilog
1192 ; GFX12-LABEL: gather4_2d_dmask_8:
1193 ; GFX12: ; %bb.0: ; %main_body
1194 ; GFX12-NEXT: s_mov_b32 s1, exec_lo
1195 ; GFX12-NEXT: s_mov_b32 s0, s2
1196 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
1197 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1
1198 ; GFX12-NEXT: s_mov_b32 s1, s3
1199 ; GFX12-NEXT: s_mov_b32 s2, s4
1200 ; GFX12-NEXT: s_mov_b32 s3, s5
1201 ; GFX12-NEXT: s_mov_b32 s4, s6
1202 ; GFX12-NEXT: s_mov_b32 s5, s7
1203 ; GFX12-NEXT: s_mov_b32 s6, s8
1204 ; GFX12-NEXT: s_mov_b32 s7, s9
1205 ; GFX12-NEXT: s_mov_b32 s8, s10
1206 ; GFX12-NEXT: s_mov_b32 s9, s11
1207 ; GFX12-NEXT: s_mov_b32 s10, s12
1208 ; GFX12-NEXT: s_mov_b32 s11, s13
1209 ; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D
1210 ; GFX12-NEXT: s_wait_samplecnt 0x0
1211 ; GFX12-NEXT: ; return to shader part epilog
1213 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
1217 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1218 declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1219 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1220 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1221 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1222 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1223 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1224 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1225 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1226 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1227 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1228 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1229 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1230 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1231 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
1233 attributes #0 = { nounwind readonly }