1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
5 define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
6 ; GFX6-LABEL: gather4_o_2d:
7 ; GFX6: ; %bb.0: ; %main_body
8 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
9 ; GFX6-NEXT: s_mov_b32 s0, s2
10 ; GFX6-NEXT: s_mov_b32 s1, s3
11 ; GFX6-NEXT: s_mov_b32 s2, s4
12 ; GFX6-NEXT: s_mov_b32 s3, s5
13 ; GFX6-NEXT: s_mov_b32 s4, s6
14 ; GFX6-NEXT: s_mov_b32 s5, s7
15 ; GFX6-NEXT: s_mov_b32 s6, s8
16 ; GFX6-NEXT: s_mov_b32 s7, s9
17 ; GFX6-NEXT: s_mov_b32 s8, s10
18 ; GFX6-NEXT: s_mov_b32 s9, s11
19 ; GFX6-NEXT: s_mov_b32 s10, s12
20 ; GFX6-NEXT: s_mov_b32 s11, s13
21 ; GFX6-NEXT: s_wqm_b64 exec, exec
22 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
23 ; GFX6-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
24 ; GFX6-NEXT: s_waitcnt vmcnt(0)
25 ; GFX6-NEXT: ; return to shader part epilog
27 ; GFX10-LABEL: gather4_o_2d:
28 ; GFX10: ; %bb.0: ; %main_body
29 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
30 ; GFX10-NEXT: s_mov_b32 s0, s2
31 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
32 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
33 ; GFX10-NEXT: s_mov_b32 s1, s3
34 ; GFX10-NEXT: s_mov_b32 s2, s4
35 ; GFX10-NEXT: s_mov_b32 s3, s5
36 ; GFX10-NEXT: s_mov_b32 s4, s6
37 ; GFX10-NEXT: s_mov_b32 s5, s7
38 ; GFX10-NEXT: s_mov_b32 s6, s8
39 ; GFX10-NEXT: s_mov_b32 s7, s9
40 ; GFX10-NEXT: s_mov_b32 s8, s10
41 ; GFX10-NEXT: s_mov_b32 s9, s11
42 ; GFX10-NEXT: s_mov_b32 s10, s12
43 ; GFX10-NEXT: s_mov_b32 s11, s13
44 ; GFX10-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
45 ; GFX10-NEXT: s_waitcnt vmcnt(0)
46 ; GFX10-NEXT: ; return to shader part epilog
48 %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
52 define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
53 ; GFX6-LABEL: gather4_c_o_2d:
54 ; GFX6: ; %bb.0: ; %main_body
55 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
56 ; GFX6-NEXT: s_mov_b32 s0, s2
57 ; GFX6-NEXT: s_mov_b32 s1, s3
58 ; GFX6-NEXT: s_mov_b32 s2, s4
59 ; GFX6-NEXT: s_mov_b32 s3, s5
60 ; GFX6-NEXT: s_mov_b32 s4, s6
61 ; GFX6-NEXT: s_mov_b32 s5, s7
62 ; GFX6-NEXT: s_mov_b32 s6, s8
63 ; GFX6-NEXT: s_mov_b32 s7, s9
64 ; GFX6-NEXT: s_mov_b32 s8, s10
65 ; GFX6-NEXT: s_mov_b32 s9, s11
66 ; GFX6-NEXT: s_mov_b32 s10, s12
67 ; GFX6-NEXT: s_mov_b32 s11, s13
68 ; GFX6-NEXT: s_wqm_b64 exec, exec
69 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
70 ; GFX6-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
71 ; GFX6-NEXT: s_waitcnt vmcnt(0)
72 ; GFX6-NEXT: ; return to shader part epilog
74 ; GFX10-LABEL: gather4_c_o_2d:
75 ; GFX10: ; %bb.0: ; %main_body
76 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
77 ; GFX10-NEXT: s_mov_b32 s0, s2
78 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
79 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
80 ; GFX10-NEXT: s_mov_b32 s1, s3
81 ; GFX10-NEXT: s_mov_b32 s2, s4
82 ; GFX10-NEXT: s_mov_b32 s3, s5
83 ; GFX10-NEXT: s_mov_b32 s4, s6
84 ; GFX10-NEXT: s_mov_b32 s5, s7
85 ; GFX10-NEXT: s_mov_b32 s6, s8
86 ; GFX10-NEXT: s_mov_b32 s7, s9
87 ; GFX10-NEXT: s_mov_b32 s8, s10
88 ; GFX10-NEXT: s_mov_b32 s9, s11
89 ; GFX10-NEXT: s_mov_b32 s10, s12
90 ; GFX10-NEXT: s_mov_b32 s11, s13
91 ; GFX10-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
92 ; GFX10-NEXT: s_waitcnt vmcnt(0)
93 ; GFX10-NEXT: ; return to shader part epilog
95 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
99 define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
100 ; GFX6-LABEL: gather4_cl_o_2d:
101 ; GFX6: ; %bb.0: ; %main_body
102 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
103 ; GFX6-NEXT: s_mov_b32 s0, s2
104 ; GFX6-NEXT: s_mov_b32 s1, s3
105 ; GFX6-NEXT: s_mov_b32 s2, s4
106 ; GFX6-NEXT: s_mov_b32 s3, s5
107 ; GFX6-NEXT: s_mov_b32 s4, s6
108 ; GFX6-NEXT: s_mov_b32 s5, s7
109 ; GFX6-NEXT: s_mov_b32 s6, s8
110 ; GFX6-NEXT: s_mov_b32 s7, s9
111 ; GFX6-NEXT: s_mov_b32 s8, s10
112 ; GFX6-NEXT: s_mov_b32 s9, s11
113 ; GFX6-NEXT: s_mov_b32 s10, s12
114 ; GFX6-NEXT: s_mov_b32 s11, s13
115 ; GFX6-NEXT: s_wqm_b64 exec, exec
116 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
117 ; GFX6-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
118 ; GFX6-NEXT: s_waitcnt vmcnt(0)
119 ; GFX6-NEXT: ; return to shader part epilog
121 ; GFX10-LABEL: gather4_cl_o_2d:
122 ; GFX10: ; %bb.0: ; %main_body
123 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
124 ; GFX10-NEXT: s_mov_b32 s0, s2
125 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
126 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
127 ; GFX10-NEXT: s_mov_b32 s1, s3
128 ; GFX10-NEXT: s_mov_b32 s2, s4
129 ; GFX10-NEXT: s_mov_b32 s3, s5
130 ; GFX10-NEXT: s_mov_b32 s4, s6
131 ; GFX10-NEXT: s_mov_b32 s5, s7
132 ; GFX10-NEXT: s_mov_b32 s6, s8
133 ; GFX10-NEXT: s_mov_b32 s7, s9
134 ; GFX10-NEXT: s_mov_b32 s8, s10
135 ; GFX10-NEXT: s_mov_b32 s9, s11
136 ; GFX10-NEXT: s_mov_b32 s10, s12
137 ; GFX10-NEXT: s_mov_b32 s11, s13
138 ; GFX10-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
139 ; GFX10-NEXT: s_waitcnt vmcnt(0)
140 ; GFX10-NEXT: ; return to shader part epilog
142 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
146 define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
147 ; GFX6-LABEL: gather4_c_cl_o_2d:
148 ; GFX6: ; %bb.0: ; %main_body
149 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
150 ; GFX6-NEXT: s_mov_b32 s0, s2
151 ; GFX6-NEXT: s_mov_b32 s1, s3
152 ; GFX6-NEXT: s_mov_b32 s2, s4
153 ; GFX6-NEXT: s_mov_b32 s3, s5
154 ; GFX6-NEXT: s_mov_b32 s4, s6
155 ; GFX6-NEXT: s_mov_b32 s5, s7
156 ; GFX6-NEXT: s_mov_b32 s6, s8
157 ; GFX6-NEXT: s_mov_b32 s7, s9
158 ; GFX6-NEXT: s_mov_b32 s8, s10
159 ; GFX6-NEXT: s_mov_b32 s9, s11
160 ; GFX6-NEXT: s_mov_b32 s10, s12
161 ; GFX6-NEXT: s_mov_b32 s11, s13
162 ; GFX6-NEXT: s_wqm_b64 exec, exec
163 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
164 ; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
165 ; GFX6-NEXT: s_waitcnt vmcnt(0)
166 ; GFX6-NEXT: ; return to shader part epilog
168 ; GFX10-LABEL: gather4_c_cl_o_2d:
169 ; GFX10: ; %bb.0: ; %main_body
170 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
171 ; GFX10-NEXT: s_mov_b32 s0, s2
172 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
173 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
174 ; GFX10-NEXT: s_mov_b32 s1, s3
175 ; GFX10-NEXT: s_mov_b32 s2, s4
176 ; GFX10-NEXT: s_mov_b32 s3, s5
177 ; GFX10-NEXT: s_mov_b32 s4, s6
178 ; GFX10-NEXT: s_mov_b32 s5, s7
179 ; GFX10-NEXT: s_mov_b32 s6, s8
180 ; GFX10-NEXT: s_mov_b32 s7, s9
181 ; GFX10-NEXT: s_mov_b32 s8, s10
182 ; GFX10-NEXT: s_mov_b32 s9, s11
183 ; GFX10-NEXT: s_mov_b32 s10, s12
184 ; GFX10-NEXT: s_mov_b32 s11, s13
185 ; GFX10-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
186 ; GFX10-NEXT: s_waitcnt vmcnt(0)
187 ; GFX10-NEXT: ; return to shader part epilog
189 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
193 define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
194 ; GFX6-LABEL: gather4_b_o_2d:
195 ; GFX6: ; %bb.0: ; %main_body
196 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
197 ; GFX6-NEXT: s_mov_b32 s0, s2
198 ; GFX6-NEXT: s_mov_b32 s1, s3
199 ; GFX6-NEXT: s_mov_b32 s2, s4
200 ; GFX6-NEXT: s_mov_b32 s3, s5
201 ; GFX6-NEXT: s_mov_b32 s4, s6
202 ; GFX6-NEXT: s_mov_b32 s5, s7
203 ; GFX6-NEXT: s_mov_b32 s6, s8
204 ; GFX6-NEXT: s_mov_b32 s7, s9
205 ; GFX6-NEXT: s_mov_b32 s8, s10
206 ; GFX6-NEXT: s_mov_b32 s9, s11
207 ; GFX6-NEXT: s_mov_b32 s10, s12
208 ; GFX6-NEXT: s_mov_b32 s11, s13
209 ; GFX6-NEXT: s_wqm_b64 exec, exec
210 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
211 ; GFX6-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
212 ; GFX6-NEXT: s_waitcnt vmcnt(0)
213 ; GFX6-NEXT: ; return to shader part epilog
215 ; GFX10-LABEL: gather4_b_o_2d:
216 ; GFX10: ; %bb.0: ; %main_body
217 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
218 ; GFX10-NEXT: s_mov_b32 s0, s2
219 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
220 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
221 ; GFX10-NEXT: s_mov_b32 s1, s3
222 ; GFX10-NEXT: s_mov_b32 s2, s4
223 ; GFX10-NEXT: s_mov_b32 s3, s5
224 ; GFX10-NEXT: s_mov_b32 s4, s6
225 ; GFX10-NEXT: s_mov_b32 s5, s7
226 ; GFX10-NEXT: s_mov_b32 s6, s8
227 ; GFX10-NEXT: s_mov_b32 s7, s9
228 ; GFX10-NEXT: s_mov_b32 s8, s10
229 ; GFX10-NEXT: s_mov_b32 s9, s11
230 ; GFX10-NEXT: s_mov_b32 s10, s12
231 ; GFX10-NEXT: s_mov_b32 s11, s13
232 ; GFX10-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
233 ; GFX10-NEXT: s_waitcnt vmcnt(0)
234 ; GFX10-NEXT: ; return to shader part epilog
236 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
240 define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
241 ; GFX6-LABEL: gather4_c_b_o_2d:
242 ; GFX6: ; %bb.0: ; %main_body
243 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
244 ; GFX6-NEXT: s_mov_b32 s0, s2
245 ; GFX6-NEXT: s_mov_b32 s1, s3
246 ; GFX6-NEXT: s_mov_b32 s2, s4
247 ; GFX6-NEXT: s_mov_b32 s3, s5
248 ; GFX6-NEXT: s_mov_b32 s4, s6
249 ; GFX6-NEXT: s_mov_b32 s5, s7
250 ; GFX6-NEXT: s_mov_b32 s6, s8
251 ; GFX6-NEXT: s_mov_b32 s7, s9
252 ; GFX6-NEXT: s_mov_b32 s8, s10
253 ; GFX6-NEXT: s_mov_b32 s9, s11
254 ; GFX6-NEXT: s_mov_b32 s10, s12
255 ; GFX6-NEXT: s_mov_b32 s11, s13
256 ; GFX6-NEXT: s_wqm_b64 exec, exec
257 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
258 ; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
259 ; GFX6-NEXT: s_waitcnt vmcnt(0)
260 ; GFX6-NEXT: ; return to shader part epilog
262 ; GFX10-LABEL: gather4_c_b_o_2d:
263 ; GFX10: ; %bb.0: ; %main_body
264 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
265 ; GFX10-NEXT: s_mov_b32 s0, s2
266 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
267 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
268 ; GFX10-NEXT: s_mov_b32 s1, s3
269 ; GFX10-NEXT: s_mov_b32 s2, s4
270 ; GFX10-NEXT: s_mov_b32 s3, s5
271 ; GFX10-NEXT: s_mov_b32 s4, s6
272 ; GFX10-NEXT: s_mov_b32 s5, s7
273 ; GFX10-NEXT: s_mov_b32 s6, s8
274 ; GFX10-NEXT: s_mov_b32 s7, s9
275 ; GFX10-NEXT: s_mov_b32 s8, s10
276 ; GFX10-NEXT: s_mov_b32 s9, s11
277 ; GFX10-NEXT: s_mov_b32 s10, s12
278 ; GFX10-NEXT: s_mov_b32 s11, s13
279 ; GFX10-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
280 ; GFX10-NEXT: s_waitcnt vmcnt(0)
281 ; GFX10-NEXT: ; return to shader part epilog
283 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
287 define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
288 ; GFX6-LABEL: gather4_b_cl_o_2d:
289 ; GFX6: ; %bb.0: ; %main_body
290 ; GFX6-NEXT: s_mov_b32 s0, s2
291 ; GFX6-NEXT: s_mov_b32 s1, s3
292 ; GFX6-NEXT: s_mov_b32 s2, s4
293 ; GFX6-NEXT: s_mov_b32 s3, s5
294 ; GFX6-NEXT: s_mov_b32 s4, s6
295 ; GFX6-NEXT: s_mov_b32 s5, s7
296 ; GFX6-NEXT: s_mov_b32 s6, s8
297 ; GFX6-NEXT: s_mov_b32 s7, s9
298 ; GFX6-NEXT: s_mov_b32 s8, s10
299 ; GFX6-NEXT: s_mov_b32 s9, s11
300 ; GFX6-NEXT: s_mov_b32 s10, s12
301 ; GFX6-NEXT: s_mov_b32 s11, s13
302 ; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
303 ; GFX6-NEXT: s_waitcnt vmcnt(0)
304 ; GFX6-NEXT: ; return to shader part epilog
306 ; GFX10-LABEL: gather4_b_cl_o_2d:
307 ; GFX10: ; %bb.0: ; %main_body
308 ; GFX10-NEXT: s_mov_b32 s0, s2
309 ; GFX10-NEXT: s_mov_b32 s1, s3
310 ; GFX10-NEXT: s_mov_b32 s2, s4
311 ; GFX10-NEXT: s_mov_b32 s3, s5
312 ; GFX10-NEXT: s_mov_b32 s4, s6
313 ; GFX10-NEXT: s_mov_b32 s5, s7
314 ; GFX10-NEXT: s_mov_b32 s6, s8
315 ; GFX10-NEXT: s_mov_b32 s7, s9
316 ; GFX10-NEXT: s_mov_b32 s8, s10
317 ; GFX10-NEXT: s_mov_b32 s9, s11
318 ; GFX10-NEXT: s_mov_b32 s10, s12
319 ; GFX10-NEXT: s_mov_b32 s11, s13
320 ; GFX10-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
321 ; GFX10-NEXT: s_waitcnt vmcnt(0)
322 ; GFX10-NEXT: ; return to shader part epilog
324 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
328 define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
329 ; GFX6-LABEL: gather4_c_b_cl_o_2d:
330 ; GFX6: ; %bb.0: ; %main_body
331 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
332 ; GFX6-NEXT: s_mov_b32 s0, s2
333 ; GFX6-NEXT: s_mov_b32 s1, s3
334 ; GFX6-NEXT: s_mov_b32 s2, s4
335 ; GFX6-NEXT: s_mov_b32 s3, s5
336 ; GFX6-NEXT: s_mov_b32 s4, s6
337 ; GFX6-NEXT: s_mov_b32 s5, s7
338 ; GFX6-NEXT: s_mov_b32 s6, s8
339 ; GFX6-NEXT: s_mov_b32 s7, s9
340 ; GFX6-NEXT: s_mov_b32 s8, s10
341 ; GFX6-NEXT: s_mov_b32 s9, s11
342 ; GFX6-NEXT: s_mov_b32 s10, s12
343 ; GFX6-NEXT: s_mov_b32 s11, s13
344 ; GFX6-NEXT: s_wqm_b64 exec, exec
345 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
346 ; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1
347 ; GFX6-NEXT: s_waitcnt vmcnt(0)
348 ; GFX6-NEXT: ; return to shader part epilog
350 ; GFX10-LABEL: gather4_c_b_cl_o_2d:
351 ; GFX10: ; %bb.0: ; %main_body
352 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
353 ; GFX10-NEXT: s_mov_b32 s0, s2
354 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
355 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
356 ; GFX10-NEXT: s_mov_b32 s1, s3
357 ; GFX10-NEXT: s_mov_b32 s2, s4
358 ; GFX10-NEXT: s_mov_b32 s3, s5
359 ; GFX10-NEXT: s_mov_b32 s4, s6
360 ; GFX10-NEXT: s_mov_b32 s5, s7
361 ; GFX10-NEXT: s_mov_b32 s6, s8
362 ; GFX10-NEXT: s_mov_b32 s7, s9
363 ; GFX10-NEXT: s_mov_b32 s8, s10
364 ; GFX10-NEXT: s_mov_b32 s9, s11
365 ; GFX10-NEXT: s_mov_b32 s10, s12
366 ; GFX10-NEXT: s_mov_b32 s11, s13
367 ; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
368 ; GFX10-NEXT: s_waitcnt vmcnt(0)
369 ; GFX10-NEXT: ; return to shader part epilog
371 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
375 define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
376 ; GFX6-LABEL: gather4_l_o_2d:
377 ; GFX6: ; %bb.0: ; %main_body
378 ; GFX6-NEXT: s_mov_b32 s0, s2
379 ; GFX6-NEXT: s_mov_b32 s1, s3
380 ; GFX6-NEXT: s_mov_b32 s2, s4
381 ; GFX6-NEXT: s_mov_b32 s3, s5
382 ; GFX6-NEXT: s_mov_b32 s4, s6
383 ; GFX6-NEXT: s_mov_b32 s5, s7
384 ; GFX6-NEXT: s_mov_b32 s6, s8
385 ; GFX6-NEXT: s_mov_b32 s7, s9
386 ; GFX6-NEXT: s_mov_b32 s8, s10
387 ; GFX6-NEXT: s_mov_b32 s9, s11
388 ; GFX6-NEXT: s_mov_b32 s10, s12
389 ; GFX6-NEXT: s_mov_b32 s11, s13
390 ; GFX6-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
391 ; GFX6-NEXT: s_waitcnt vmcnt(0)
392 ; GFX6-NEXT: ; return to shader part epilog
394 ; GFX10-LABEL: gather4_l_o_2d:
395 ; GFX10: ; %bb.0: ; %main_body
396 ; GFX10-NEXT: s_mov_b32 s0, s2
397 ; GFX10-NEXT: s_mov_b32 s1, s3
398 ; GFX10-NEXT: s_mov_b32 s2, s4
399 ; GFX10-NEXT: s_mov_b32 s3, s5
400 ; GFX10-NEXT: s_mov_b32 s4, s6
401 ; GFX10-NEXT: s_mov_b32 s5, s7
402 ; GFX10-NEXT: s_mov_b32 s6, s8
403 ; GFX10-NEXT: s_mov_b32 s7, s9
404 ; GFX10-NEXT: s_mov_b32 s8, s10
405 ; GFX10-NEXT: s_mov_b32 s9, s11
406 ; GFX10-NEXT: s_mov_b32 s10, s12
407 ; GFX10-NEXT: s_mov_b32 s11, s13
408 ; GFX10-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
409 ; GFX10-NEXT: s_waitcnt vmcnt(0)
410 ; GFX10-NEXT: ; return to shader part epilog
412 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
416 define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
417 ; GFX6-LABEL: gather4_c_l_o_2d:
418 ; GFX6: ; %bb.0: ; %main_body
419 ; GFX6-NEXT: s_mov_b32 s0, s2
420 ; GFX6-NEXT: s_mov_b32 s1, s3
421 ; GFX6-NEXT: s_mov_b32 s2, s4
422 ; GFX6-NEXT: s_mov_b32 s3, s5
423 ; GFX6-NEXT: s_mov_b32 s4, s6
424 ; GFX6-NEXT: s_mov_b32 s5, s7
425 ; GFX6-NEXT: s_mov_b32 s6, s8
426 ; GFX6-NEXT: s_mov_b32 s7, s9
427 ; GFX6-NEXT: s_mov_b32 s8, s10
428 ; GFX6-NEXT: s_mov_b32 s9, s11
429 ; GFX6-NEXT: s_mov_b32 s10, s12
430 ; GFX6-NEXT: s_mov_b32 s11, s13
431 ; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
432 ; GFX6-NEXT: s_waitcnt vmcnt(0)
433 ; GFX6-NEXT: ; return to shader part epilog
435 ; GFX10-LABEL: gather4_c_l_o_2d:
436 ; GFX10: ; %bb.0: ; %main_body
437 ; GFX10-NEXT: s_mov_b32 s0, s2
438 ; GFX10-NEXT: s_mov_b32 s1, s3
439 ; GFX10-NEXT: s_mov_b32 s2, s4
440 ; GFX10-NEXT: s_mov_b32 s3, s5
441 ; GFX10-NEXT: s_mov_b32 s4, s6
442 ; GFX10-NEXT: s_mov_b32 s5, s7
443 ; GFX10-NEXT: s_mov_b32 s6, s8
444 ; GFX10-NEXT: s_mov_b32 s7, s9
445 ; GFX10-NEXT: s_mov_b32 s8, s10
446 ; GFX10-NEXT: s_mov_b32 s9, s11
447 ; GFX10-NEXT: s_mov_b32 s10, s12
448 ; GFX10-NEXT: s_mov_b32 s11, s13
449 ; GFX10-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
450 ; GFX10-NEXT: s_waitcnt vmcnt(0)
451 ; GFX10-NEXT: ; return to shader part epilog
453 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
457 define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
458 ; GFX6-LABEL: gather4_lz_o_2d:
459 ; GFX6: ; %bb.0: ; %main_body
460 ; GFX6-NEXT: s_mov_b32 s0, s2
461 ; GFX6-NEXT: s_mov_b32 s1, s3
462 ; GFX6-NEXT: s_mov_b32 s2, s4
463 ; GFX6-NEXT: s_mov_b32 s3, s5
464 ; GFX6-NEXT: s_mov_b32 s4, s6
465 ; GFX6-NEXT: s_mov_b32 s5, s7
466 ; GFX6-NEXT: s_mov_b32 s6, s8
467 ; GFX6-NEXT: s_mov_b32 s7, s9
468 ; GFX6-NEXT: s_mov_b32 s8, s10
469 ; GFX6-NEXT: s_mov_b32 s9, s11
470 ; GFX6-NEXT: s_mov_b32 s10, s12
471 ; GFX6-NEXT: s_mov_b32 s11, s13
472 ; GFX6-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
473 ; GFX6-NEXT: s_waitcnt vmcnt(0)
474 ; GFX6-NEXT: ; return to shader part epilog
476 ; GFX10-LABEL: gather4_lz_o_2d:
477 ; GFX10: ; %bb.0: ; %main_body
478 ; GFX10-NEXT: s_mov_b32 s0, s2
479 ; GFX10-NEXT: s_mov_b32 s1, s3
480 ; GFX10-NEXT: s_mov_b32 s2, s4
481 ; GFX10-NEXT: s_mov_b32 s3, s5
482 ; GFX10-NEXT: s_mov_b32 s4, s6
483 ; GFX10-NEXT: s_mov_b32 s5, s7
484 ; GFX10-NEXT: s_mov_b32 s6, s8
485 ; GFX10-NEXT: s_mov_b32 s7, s9
486 ; GFX10-NEXT: s_mov_b32 s8, s10
487 ; GFX10-NEXT: s_mov_b32 s9, s11
488 ; GFX10-NEXT: s_mov_b32 s10, s12
489 ; GFX10-NEXT: s_mov_b32 s11, s13
490 ; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
491 ; GFX10-NEXT: s_waitcnt vmcnt(0)
492 ; GFX10-NEXT: ; return to shader part epilog
494 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
498 define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
499 ; GFX6-LABEL: gather4_c_lz_o_2d:
500 ; GFX6: ; %bb.0: ; %main_body
501 ; GFX6-NEXT: s_mov_b32 s0, s2
502 ; GFX6-NEXT: s_mov_b32 s1, s3
503 ; GFX6-NEXT: s_mov_b32 s2, s4
504 ; GFX6-NEXT: s_mov_b32 s3, s5
505 ; GFX6-NEXT: s_mov_b32 s4, s6
506 ; GFX6-NEXT: s_mov_b32 s5, s7
507 ; GFX6-NEXT: s_mov_b32 s6, s8
508 ; GFX6-NEXT: s_mov_b32 s7, s9
509 ; GFX6-NEXT: s_mov_b32 s8, s10
510 ; GFX6-NEXT: s_mov_b32 s9, s11
511 ; GFX6-NEXT: s_mov_b32 s10, s12
512 ; GFX6-NEXT: s_mov_b32 s11, s13
513 ; GFX6-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
514 ; GFX6-NEXT: s_waitcnt vmcnt(0)
515 ; GFX6-NEXT: ; return to shader part epilog
517 ; GFX10-LABEL: gather4_c_lz_o_2d:
518 ; GFX10: ; %bb.0: ; %main_body
519 ; GFX10-NEXT: s_mov_b32 s0, s2
520 ; GFX10-NEXT: s_mov_b32 s1, s3
521 ; GFX10-NEXT: s_mov_b32 s2, s4
522 ; GFX10-NEXT: s_mov_b32 s3, s5
523 ; GFX10-NEXT: s_mov_b32 s4, s6
524 ; GFX10-NEXT: s_mov_b32 s5, s7
525 ; GFX10-NEXT: s_mov_b32 s6, s8
526 ; GFX10-NEXT: s_mov_b32 s7, s9
527 ; GFX10-NEXT: s_mov_b32 s8, s10
528 ; GFX10-NEXT: s_mov_b32 s9, s11
529 ; GFX10-NEXT: s_mov_b32 s10, s12
530 ; GFX10-NEXT: s_mov_b32 s11, s13
531 ; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
532 ; GFX10-NEXT: s_waitcnt vmcnt(0)
533 ; GFX10-NEXT: ; return to shader part epilog
535 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
539 declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
540 declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
541 declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
542 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
543 declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
544 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
545 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
546 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
547 declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
548 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
549 declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
550 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
552 attributes #0 = { nounwind readonly }