1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10NSA %s
6 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
7 ; GFX9-LABEL: gather4_2d:
8 ; GFX9: ; %bb.0: ; %main_body
9 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
10 ; GFX9-NEXT: s_mov_b32 s0, s2
11 ; GFX9-NEXT: s_wqm_b64 exec, exec
12 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
13 ; GFX9-NEXT: s_mov_b32 s1, s3
14 ; GFX9-NEXT: s_mov_b32 s2, s4
15 ; GFX9-NEXT: s_mov_b32 s3, s5
16 ; GFX9-NEXT: s_mov_b32 s4, s6
17 ; GFX9-NEXT: s_mov_b32 s5, s7
18 ; GFX9-NEXT: s_mov_b32 s6, s8
19 ; GFX9-NEXT: s_mov_b32 s7, s9
20 ; GFX9-NEXT: s_mov_b32 s8, s10
21 ; GFX9-NEXT: s_mov_b32 s9, s11
22 ; GFX9-NEXT: s_mov_b32 s10, s12
23 ; GFX9-NEXT: s_mov_b32 s11, s13
24 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
25 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
26 ; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
27 ; GFX9-NEXT: s_waitcnt vmcnt(0)
28 ; GFX9-NEXT: ; return to shader part epilog
30 ; GFX10NSA-LABEL: gather4_2d:
31 ; GFX10NSA: ; %bb.0: ; %main_body
32 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
33 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
34 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
35 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
36 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
37 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
38 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
39 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
40 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
41 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
42 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
43 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
44 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
45 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
46 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
47 ; GFX10NSA-NEXT: v_lshl_or_b32 v0, v1, 16, v0
48 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
49 ; GFX10NSA-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
50 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
51 ; GFX10NSA-NEXT: ; return to shader part epilog
53 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
57 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
58 ; GFX9-LABEL: gather4_cube:
59 ; GFX9: ; %bb.0: ; %main_body
60 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
61 ; GFX9-NEXT: s_mov_b32 s0, s2
62 ; GFX9-NEXT: s_wqm_b64 exec, exec
63 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
64 ; GFX9-NEXT: s_mov_b32 s1, s3
65 ; GFX9-NEXT: s_mov_b32 s2, s4
66 ; GFX9-NEXT: s_mov_b32 s3, s5
67 ; GFX9-NEXT: s_mov_b32 s4, s6
68 ; GFX9-NEXT: s_mov_b32 s5, s7
69 ; GFX9-NEXT: s_mov_b32 s6, s8
70 ; GFX9-NEXT: s_mov_b32 s7, s9
71 ; GFX9-NEXT: s_mov_b32 s8, s10
72 ; GFX9-NEXT: s_mov_b32 s9, s11
73 ; GFX9-NEXT: s_mov_b32 s10, s12
74 ; GFX9-NEXT: s_mov_b32 s11, s13
75 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
76 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
77 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
78 ; GFX9-NEXT: s_waitcnt vmcnt(0)
79 ; GFX9-NEXT: ; return to shader part epilog
81 ; GFX10NSA-LABEL: gather4_cube:
82 ; GFX10NSA: ; %bb.0: ; %main_body
83 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
84 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
85 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
86 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
87 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
88 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
89 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
90 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
91 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
92 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
93 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
94 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
95 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
96 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
97 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
98 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
99 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
100 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
101 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
102 ; GFX10NSA-NEXT: ; return to shader part epilog
104 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
108 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
109 ; GFX9-LABEL: gather4_2darray:
110 ; GFX9: ; %bb.0: ; %main_body
111 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
112 ; GFX9-NEXT: s_mov_b32 s0, s2
113 ; GFX9-NEXT: s_wqm_b64 exec, exec
114 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
115 ; GFX9-NEXT: s_mov_b32 s1, s3
116 ; GFX9-NEXT: s_mov_b32 s2, s4
117 ; GFX9-NEXT: s_mov_b32 s3, s5
118 ; GFX9-NEXT: s_mov_b32 s4, s6
119 ; GFX9-NEXT: s_mov_b32 s5, s7
120 ; GFX9-NEXT: s_mov_b32 s6, s8
121 ; GFX9-NEXT: s_mov_b32 s7, s9
122 ; GFX9-NEXT: s_mov_b32 s8, s10
123 ; GFX9-NEXT: s_mov_b32 s9, s11
124 ; GFX9-NEXT: s_mov_b32 s10, s12
125 ; GFX9-NEXT: s_mov_b32 s11, s13
126 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
127 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
128 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
129 ; GFX9-NEXT: s_waitcnt vmcnt(0)
130 ; GFX9-NEXT: ; return to shader part epilog
132 ; GFX10NSA-LABEL: gather4_2darray:
133 ; GFX10NSA: ; %bb.0: ; %main_body
134 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
135 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
136 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
137 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
138 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
139 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
140 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
141 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
142 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
143 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
144 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
145 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
146 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
147 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
148 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
149 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
150 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
151 ; GFX10NSA-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
152 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
153 ; GFX10NSA-NEXT: ; return to shader part epilog
155 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
159 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
160 ; GFX9-LABEL: gather4_c_2d:
161 ; GFX9: ; %bb.0: ; %main_body
162 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
163 ; GFX9-NEXT: s_mov_b32 s0, s2
164 ; GFX9-NEXT: s_wqm_b64 exec, exec
165 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
166 ; GFX9-NEXT: s_mov_b32 s1, s3
167 ; GFX9-NEXT: s_mov_b32 s2, s4
168 ; GFX9-NEXT: s_mov_b32 s3, s5
169 ; GFX9-NEXT: s_mov_b32 s4, s6
170 ; GFX9-NEXT: s_mov_b32 s5, s7
171 ; GFX9-NEXT: s_mov_b32 s6, s8
172 ; GFX9-NEXT: s_mov_b32 s7, s9
173 ; GFX9-NEXT: s_mov_b32 s8, s10
174 ; GFX9-NEXT: s_mov_b32 s9, s11
175 ; GFX9-NEXT: s_mov_b32 s10, s12
176 ; GFX9-NEXT: s_mov_b32 s11, s13
177 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
178 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
179 ; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
180 ; GFX9-NEXT: s_waitcnt vmcnt(0)
181 ; GFX9-NEXT: ; return to shader part epilog
183 ; GFX10NSA-LABEL: gather4_c_2d:
184 ; GFX10NSA: ; %bb.0: ; %main_body
185 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
186 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
187 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
188 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
189 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
190 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
191 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
192 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
193 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
194 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
195 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
196 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
197 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
198 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
199 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
200 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
201 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
202 ; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
203 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
204 ; GFX10NSA-NEXT: ; return to shader part epilog
206 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
210 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
211 ; GFX9-LABEL: gather4_cl_2d:
212 ; GFX9: ; %bb.0: ; %main_body
213 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
214 ; GFX9-NEXT: s_mov_b32 s0, s2
215 ; GFX9-NEXT: s_wqm_b64 exec, exec
216 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
217 ; GFX9-NEXT: s_mov_b32 s1, s3
218 ; GFX9-NEXT: s_mov_b32 s2, s4
219 ; GFX9-NEXT: s_mov_b32 s3, s5
220 ; GFX9-NEXT: s_mov_b32 s4, s6
221 ; GFX9-NEXT: s_mov_b32 s5, s7
222 ; GFX9-NEXT: s_mov_b32 s6, s8
223 ; GFX9-NEXT: s_mov_b32 s7, s9
224 ; GFX9-NEXT: s_mov_b32 s8, s10
225 ; GFX9-NEXT: s_mov_b32 s9, s11
226 ; GFX9-NEXT: s_mov_b32 s10, s12
227 ; GFX9-NEXT: s_mov_b32 s11, s13
228 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
229 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
230 ; GFX9-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
231 ; GFX9-NEXT: s_waitcnt vmcnt(0)
232 ; GFX9-NEXT: ; return to shader part epilog
234 ; GFX10NSA-LABEL: gather4_cl_2d:
235 ; GFX10NSA: ; %bb.0: ; %main_body
236 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
237 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
238 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
239 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
240 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
241 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
242 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
243 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
244 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
245 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
246 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
247 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
248 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
249 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
250 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
251 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
252 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
253 ; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
254 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
255 ; GFX10NSA-NEXT: ; return to shader part epilog
257 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
261 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
262 ; GFX9-LABEL: gather4_c_cl_2d:
263 ; GFX9: ; %bb.0: ; %main_body
264 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
265 ; GFX9-NEXT: s_mov_b32 s0, s2
266 ; GFX9-NEXT: s_wqm_b64 exec, exec
267 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
268 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
269 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
270 ; GFX9-NEXT: s_mov_b32 s1, s3
271 ; GFX9-NEXT: s_mov_b32 s2, s4
272 ; GFX9-NEXT: s_mov_b32 s3, s5
273 ; GFX9-NEXT: s_mov_b32 s4, s6
274 ; GFX9-NEXT: s_mov_b32 s5, s7
275 ; GFX9-NEXT: s_mov_b32 s6, s8
276 ; GFX9-NEXT: s_mov_b32 s7, s9
277 ; GFX9-NEXT: s_mov_b32 s8, s10
278 ; GFX9-NEXT: s_mov_b32 s9, s11
279 ; GFX9-NEXT: s_mov_b32 s10, s12
280 ; GFX9-NEXT: s_mov_b32 s11, s13
281 ; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
282 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
283 ; GFX9-NEXT: image_gather4_c_cl v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
284 ; GFX9-NEXT: s_waitcnt vmcnt(0)
285 ; GFX9-NEXT: ; return to shader part epilog
287 ; GFX10NSA-LABEL: gather4_c_cl_2d:
288 ; GFX10NSA: ; %bb.0: ; %main_body
289 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
290 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
291 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
292 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
293 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
294 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
295 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
296 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
297 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
298 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
299 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
300 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
301 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
302 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
303 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
304 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
305 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
306 ; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
307 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
308 ; GFX10NSA-NEXT: ; return to shader part epilog
310 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
314 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
315 ; GFX9-LABEL: gather4_b_2d:
316 ; GFX9: ; %bb.0: ; %main_body
317 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
318 ; GFX9-NEXT: s_mov_b32 s0, s2
319 ; GFX9-NEXT: s_wqm_b64 exec, exec
320 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
321 ; GFX9-NEXT: s_mov_b32 s1, s3
322 ; GFX9-NEXT: s_mov_b32 s2, s4
323 ; GFX9-NEXT: s_mov_b32 s3, s5
324 ; GFX9-NEXT: s_mov_b32 s4, s6
325 ; GFX9-NEXT: s_mov_b32 s5, s7
326 ; GFX9-NEXT: s_mov_b32 s6, s8
327 ; GFX9-NEXT: s_mov_b32 s7, s9
328 ; GFX9-NEXT: s_mov_b32 s8, s10
329 ; GFX9-NEXT: s_mov_b32 s9, s11
330 ; GFX9-NEXT: s_mov_b32 s10, s12
331 ; GFX9-NEXT: s_mov_b32 s11, s13
332 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
333 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
334 ; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
335 ; GFX9-NEXT: s_waitcnt vmcnt(0)
336 ; GFX9-NEXT: ; return to shader part epilog
338 ; GFX10NSA-LABEL: gather4_b_2d:
339 ; GFX10NSA: ; %bb.0: ; %main_body
340 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
341 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
342 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
343 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
344 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
345 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
346 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
347 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
348 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
349 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
350 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
351 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
352 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
353 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
354 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
355 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
356 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
357 ; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
358 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
359 ; GFX10NSA-NEXT: ; return to shader part epilog
361 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
365 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
366 ; GFX9-LABEL: gather4_c_b_2d:
367 ; GFX9: ; %bb.0: ; %main_body
368 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
369 ; GFX9-NEXT: s_mov_b32 s0, s2
370 ; GFX9-NEXT: s_wqm_b64 exec, exec
371 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
372 ; GFX9-NEXT: s_mov_b32 s1, s3
373 ; GFX9-NEXT: s_mov_b32 s2, s4
374 ; GFX9-NEXT: s_mov_b32 s3, s5
375 ; GFX9-NEXT: s_mov_b32 s4, s6
376 ; GFX9-NEXT: s_mov_b32 s5, s7
377 ; GFX9-NEXT: s_mov_b32 s6, s8
378 ; GFX9-NEXT: s_mov_b32 s7, s9
379 ; GFX9-NEXT: s_mov_b32 s8, s10
380 ; GFX9-NEXT: s_mov_b32 s9, s11
381 ; GFX9-NEXT: s_mov_b32 s10, s12
382 ; GFX9-NEXT: s_mov_b32 s11, s13
383 ; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
384 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
385 ; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
386 ; GFX9-NEXT: s_waitcnt vmcnt(0)
387 ; GFX9-NEXT: ; return to shader part epilog
389 ; GFX10NSA-LABEL: gather4_c_b_2d:
390 ; GFX10NSA: ; %bb.0: ; %main_body
391 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
392 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
393 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
394 ; GFX10NSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
395 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
396 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
397 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
398 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
399 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
400 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
401 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
402 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
403 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
404 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
405 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
406 ; GFX10NSA-NEXT: v_lshl_or_b32 v2, v3, 16, v2
407 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
408 ; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
409 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
410 ; GFX10NSA-NEXT: ; return to shader part epilog
412 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
416 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
417 ; GFX9-LABEL: gather4_b_cl_2d:
418 ; GFX9: ; %bb.0: ; %main_body
419 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
420 ; GFX9-NEXT: s_mov_b32 s0, s2
421 ; GFX9-NEXT: s_wqm_b64 exec, exec
422 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
423 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
424 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
425 ; GFX9-NEXT: s_mov_b32 s1, s3
426 ; GFX9-NEXT: s_mov_b32 s2, s4
427 ; GFX9-NEXT: s_mov_b32 s3, s5
428 ; GFX9-NEXT: s_mov_b32 s4, s6
429 ; GFX9-NEXT: s_mov_b32 s5, s7
430 ; GFX9-NEXT: s_mov_b32 s6, s8
431 ; GFX9-NEXT: s_mov_b32 s7, s9
432 ; GFX9-NEXT: s_mov_b32 s8, s10
433 ; GFX9-NEXT: s_mov_b32 s9, s11
434 ; GFX9-NEXT: s_mov_b32 s10, s12
435 ; GFX9-NEXT: s_mov_b32 s11, s13
436 ; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
437 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
438 ; GFX9-NEXT: image_gather4_b_cl v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
439 ; GFX9-NEXT: s_waitcnt vmcnt(0)
440 ; GFX9-NEXT: ; return to shader part epilog
442 ; GFX10NSA-LABEL: gather4_b_cl_2d:
443 ; GFX10NSA: ; %bb.0: ; %main_body
444 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
445 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
446 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
447 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
448 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
449 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
450 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
451 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
452 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
453 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
454 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
455 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
456 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
457 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
458 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
459 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
460 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
461 ; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
462 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
463 ; GFX10NSA-NEXT: ; return to shader part epilog
465 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
469 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
470 ; GFX9-LABEL: gather4_c_b_cl_2d:
471 ; GFX9: ; %bb.0: ; %main_body
472 ; GFX9-NEXT: s_mov_b64 s[14:15], exec
473 ; GFX9-NEXT: s_mov_b32 s0, s2
474 ; GFX9-NEXT: s_wqm_b64 exec, exec
475 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
476 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
477 ; GFX9-NEXT: s_mov_b32 s1, s3
478 ; GFX9-NEXT: s_mov_b32 s2, s4
479 ; GFX9-NEXT: s_mov_b32 s3, s5
480 ; GFX9-NEXT: s_mov_b32 s4, s6
481 ; GFX9-NEXT: s_mov_b32 s5, s7
482 ; GFX9-NEXT: s_mov_b32 s6, s8
483 ; GFX9-NEXT: s_mov_b32 s7, s9
484 ; GFX9-NEXT: s_mov_b32 s8, s10
485 ; GFX9-NEXT: s_mov_b32 s9, s11
486 ; GFX9-NEXT: s_mov_b32 s10, s12
487 ; GFX9-NEXT: s_mov_b32 s11, s13
488 ; GFX9-NEXT: v_mov_b32_e32 v3, v4
489 ; GFX9-NEXT: v_lshl_or_b32 v2, v5, 16, v2
490 ; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
491 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16
492 ; GFX9-NEXT: s_waitcnt vmcnt(0)
493 ; GFX9-NEXT: ; return to shader part epilog
495 ; GFX10NSA-LABEL: gather4_c_b_cl_2d:
496 ; GFX10NSA: ; %bb.0: ; %main_body
497 ; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
498 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
499 ; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
500 ; GFX10NSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
501 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
502 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
503 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
504 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
505 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
506 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
507 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
508 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
509 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
510 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
511 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
512 ; GFX10NSA-NEXT: v_lshl_or_b32 v2, v3, 16, v2
513 ; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
514 ; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
515 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
516 ; GFX10NSA-NEXT: ; return to shader part epilog
518 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
522 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
523 ; GFX9-LABEL: gather4_l_2d:
524 ; GFX9: ; %bb.0: ; %main_body
525 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
526 ; GFX9-NEXT: s_mov_b32 s0, s2
527 ; GFX9-NEXT: s_mov_b32 s1, s3
528 ; GFX9-NEXT: s_mov_b32 s2, s4
529 ; GFX9-NEXT: s_mov_b32 s3, s5
530 ; GFX9-NEXT: s_mov_b32 s4, s6
531 ; GFX9-NEXT: s_mov_b32 s5, s7
532 ; GFX9-NEXT: s_mov_b32 s6, s8
533 ; GFX9-NEXT: s_mov_b32 s7, s9
534 ; GFX9-NEXT: s_mov_b32 s8, s10
535 ; GFX9-NEXT: s_mov_b32 s9, s11
536 ; GFX9-NEXT: s_mov_b32 s10, s12
537 ; GFX9-NEXT: s_mov_b32 s11, s13
538 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
539 ; GFX9-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
540 ; GFX9-NEXT: s_waitcnt vmcnt(0)
541 ; GFX9-NEXT: ; return to shader part epilog
543 ; GFX10NSA-LABEL: gather4_l_2d:
544 ; GFX10NSA: ; %bb.0: ; %main_body
545 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
546 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
547 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
548 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
549 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
550 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
551 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
552 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
553 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
554 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
555 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
556 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
557 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
558 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
559 ; GFX10NSA-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
560 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
561 ; GFX10NSA-NEXT: ; return to shader part epilog
563 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
567 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
568 ; GFX9-LABEL: gather4_c_l_2d:
569 ; GFX9: ; %bb.0: ; %main_body
570 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
571 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
572 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
573 ; GFX9-NEXT: s_mov_b32 s0, s2
574 ; GFX9-NEXT: s_mov_b32 s1, s3
575 ; GFX9-NEXT: s_mov_b32 s2, s4
576 ; GFX9-NEXT: s_mov_b32 s3, s5
577 ; GFX9-NEXT: s_mov_b32 s4, s6
578 ; GFX9-NEXT: s_mov_b32 s5, s7
579 ; GFX9-NEXT: s_mov_b32 s6, s8
580 ; GFX9-NEXT: s_mov_b32 s7, s9
581 ; GFX9-NEXT: s_mov_b32 s8, s10
582 ; GFX9-NEXT: s_mov_b32 s9, s11
583 ; GFX9-NEXT: s_mov_b32 s10, s12
584 ; GFX9-NEXT: s_mov_b32 s11, s13
585 ; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
586 ; GFX9-NEXT: image_gather4_c_l v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
587 ; GFX9-NEXT: s_waitcnt vmcnt(0)
588 ; GFX9-NEXT: ; return to shader part epilog
590 ; GFX10NSA-LABEL: gather4_c_l_2d:
591 ; GFX10NSA: ; %bb.0: ; %main_body
592 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
593 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
594 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
595 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
596 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
597 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
598 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
599 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
600 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
601 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
602 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
603 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
604 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
605 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
606 ; GFX10NSA-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
607 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
608 ; GFX10NSA-NEXT: ; return to shader part epilog
610 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
614 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
615 ; GFX9-LABEL: gather4_lz_2d:
616 ; GFX9: ; %bb.0: ; %main_body
617 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
618 ; GFX9-NEXT: s_mov_b32 s0, s2
619 ; GFX9-NEXT: s_mov_b32 s1, s3
620 ; GFX9-NEXT: s_mov_b32 s2, s4
621 ; GFX9-NEXT: s_mov_b32 s3, s5
622 ; GFX9-NEXT: s_mov_b32 s4, s6
623 ; GFX9-NEXT: s_mov_b32 s5, s7
624 ; GFX9-NEXT: s_mov_b32 s6, s8
625 ; GFX9-NEXT: s_mov_b32 s7, s9
626 ; GFX9-NEXT: s_mov_b32 s8, s10
627 ; GFX9-NEXT: s_mov_b32 s9, s11
628 ; GFX9-NEXT: s_mov_b32 s10, s12
629 ; GFX9-NEXT: s_mov_b32 s11, s13
630 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
631 ; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
632 ; GFX9-NEXT: s_waitcnt vmcnt(0)
633 ; GFX9-NEXT: ; return to shader part epilog
635 ; GFX10NSA-LABEL: gather4_lz_2d:
636 ; GFX10NSA: ; %bb.0: ; %main_body
637 ; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
638 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
639 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
640 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
641 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
642 ; GFX10NSA-NEXT: v_lshl_or_b32 v0, v1, 16, v0
643 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
644 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
645 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
646 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
647 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
648 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
649 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
650 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
651 ; GFX10NSA-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
652 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
653 ; GFX10NSA-NEXT: ; return to shader part epilog
655 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
659 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
660 ; GFX9-LABEL: gather4_c_lz_2d:
661 ; GFX9: ; %bb.0: ; %main_body
662 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
663 ; GFX9-NEXT: s_mov_b32 s0, s2
664 ; GFX9-NEXT: s_mov_b32 s1, s3
665 ; GFX9-NEXT: s_mov_b32 s2, s4
666 ; GFX9-NEXT: s_mov_b32 s3, s5
667 ; GFX9-NEXT: s_mov_b32 s4, s6
668 ; GFX9-NEXT: s_mov_b32 s5, s7
669 ; GFX9-NEXT: s_mov_b32 s6, s8
670 ; GFX9-NEXT: s_mov_b32 s7, s9
671 ; GFX9-NEXT: s_mov_b32 s8, s10
672 ; GFX9-NEXT: s_mov_b32 s9, s11
673 ; GFX9-NEXT: s_mov_b32 s10, s12
674 ; GFX9-NEXT: s_mov_b32 s11, s13
675 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
676 ; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
677 ; GFX9-NEXT: s_waitcnt vmcnt(0)
678 ; GFX9-NEXT: ; return to shader part epilog
680 ; GFX10NSA-LABEL: gather4_c_lz_2d:
681 ; GFX10NSA: ; %bb.0: ; %main_body
682 ; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
683 ; GFX10NSA-NEXT: s_mov_b32 s0, s2
684 ; GFX10NSA-NEXT: s_mov_b32 s1, s3
685 ; GFX10NSA-NEXT: s_mov_b32 s2, s4
686 ; GFX10NSA-NEXT: s_mov_b32 s3, s5
687 ; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
688 ; GFX10NSA-NEXT: s_mov_b32 s4, s6
689 ; GFX10NSA-NEXT: s_mov_b32 s5, s7
690 ; GFX10NSA-NEXT: s_mov_b32 s6, s8
691 ; GFX10NSA-NEXT: s_mov_b32 s7, s9
692 ; GFX10NSA-NEXT: s_mov_b32 s8, s10
693 ; GFX10NSA-NEXT: s_mov_b32 s9, s11
694 ; GFX10NSA-NEXT: s_mov_b32 s10, s12
695 ; GFX10NSA-NEXT: s_mov_b32 s11, s13
696 ; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
697 ; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
698 ; GFX10NSA-NEXT: ; return to shader part epilog
700 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
704 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
705 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
706 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
707 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
708 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
709 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
710 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 immarg, half, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
711 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
712 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
713 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
714 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
715 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
716 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
717 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
719 attributes #0 = { nounwind readonly }