1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4 ; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s
6 ; GFX11-ERR: LLVM ERROR: cannot select: {{.*}} = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4
8 define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
9 ; GFX6-LABEL: gather4_o_2d:
10 ; GFX6: ; %bb.0: ; %main_body
11 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
12 ; GFX6-NEXT: s_mov_b32 s0, s2
13 ; GFX6-NEXT: s_mov_b32 s1, s3
14 ; GFX6-NEXT: s_mov_b32 s2, s4
15 ; GFX6-NEXT: s_mov_b32 s3, s5
16 ; GFX6-NEXT: s_mov_b32 s4, s6
17 ; GFX6-NEXT: s_mov_b32 s5, s7
18 ; GFX6-NEXT: s_mov_b32 s6, s8
19 ; GFX6-NEXT: s_mov_b32 s7, s9
20 ; GFX6-NEXT: s_mov_b32 s8, s10
21 ; GFX6-NEXT: s_mov_b32 s9, s11
22 ; GFX6-NEXT: s_mov_b32 s10, s12
23 ; GFX6-NEXT: s_mov_b32 s11, s13
24 ; GFX6-NEXT: s_wqm_b64 exec, exec
25 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
26 ; GFX6-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
27 ; GFX6-NEXT: s_waitcnt vmcnt(0)
28 ; GFX6-NEXT: ; return to shader part epilog
30 ; GFX10-LABEL: gather4_o_2d:
31 ; GFX10: ; %bb.0: ; %main_body
32 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
33 ; GFX10-NEXT: s_mov_b32 s0, s2
34 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
35 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
36 ; GFX10-NEXT: s_mov_b32 s1, s3
37 ; GFX10-NEXT: s_mov_b32 s2, s4
38 ; GFX10-NEXT: s_mov_b32 s3, s5
39 ; GFX10-NEXT: s_mov_b32 s4, s6
40 ; GFX10-NEXT: s_mov_b32 s5, s7
41 ; GFX10-NEXT: s_mov_b32 s6, s8
42 ; GFX10-NEXT: s_mov_b32 s7, s9
43 ; GFX10-NEXT: s_mov_b32 s8, s10
44 ; GFX10-NEXT: s_mov_b32 s9, s11
45 ; GFX10-NEXT: s_mov_b32 s10, s12
46 ; GFX10-NEXT: s_mov_b32 s11, s13
47 ; GFX10-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
48 ; GFX10-NEXT: s_waitcnt vmcnt(0)
49 ; GFX10-NEXT: ; return to shader part epilog
51 %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
55 define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
56 ; GFX6-LABEL: gather4_c_o_2d:
57 ; GFX6: ; %bb.0: ; %main_body
58 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
59 ; GFX6-NEXT: s_mov_b32 s0, s2
60 ; GFX6-NEXT: s_mov_b32 s1, s3
61 ; GFX6-NEXT: s_mov_b32 s2, s4
62 ; GFX6-NEXT: s_mov_b32 s3, s5
63 ; GFX6-NEXT: s_mov_b32 s4, s6
64 ; GFX6-NEXT: s_mov_b32 s5, s7
65 ; GFX6-NEXT: s_mov_b32 s6, s8
66 ; GFX6-NEXT: s_mov_b32 s7, s9
67 ; GFX6-NEXT: s_mov_b32 s8, s10
68 ; GFX6-NEXT: s_mov_b32 s9, s11
69 ; GFX6-NEXT: s_mov_b32 s10, s12
70 ; GFX6-NEXT: s_mov_b32 s11, s13
71 ; GFX6-NEXT: s_wqm_b64 exec, exec
72 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
73 ; GFX6-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
74 ; GFX6-NEXT: s_waitcnt vmcnt(0)
75 ; GFX6-NEXT: ; return to shader part epilog
77 ; GFX10-LABEL: gather4_c_o_2d:
78 ; GFX10: ; %bb.0: ; %main_body
79 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
80 ; GFX10-NEXT: s_mov_b32 s0, s2
81 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
82 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
83 ; GFX10-NEXT: s_mov_b32 s1, s3
84 ; GFX10-NEXT: s_mov_b32 s2, s4
85 ; GFX10-NEXT: s_mov_b32 s3, s5
86 ; GFX10-NEXT: s_mov_b32 s4, s6
87 ; GFX10-NEXT: s_mov_b32 s5, s7
88 ; GFX10-NEXT: s_mov_b32 s6, s8
89 ; GFX10-NEXT: s_mov_b32 s7, s9
90 ; GFX10-NEXT: s_mov_b32 s8, s10
91 ; GFX10-NEXT: s_mov_b32 s9, s11
92 ; GFX10-NEXT: s_mov_b32 s10, s12
93 ; GFX10-NEXT: s_mov_b32 s11, s13
94 ; GFX10-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
95 ; GFX10-NEXT: s_waitcnt vmcnt(0)
96 ; GFX10-NEXT: ; return to shader part epilog
98 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
102 define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
103 ; GFX6-LABEL: gather4_cl_o_2d:
104 ; GFX6: ; %bb.0: ; %main_body
105 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
106 ; GFX6-NEXT: s_mov_b32 s0, s2
107 ; GFX6-NEXT: s_mov_b32 s1, s3
108 ; GFX6-NEXT: s_mov_b32 s2, s4
109 ; GFX6-NEXT: s_mov_b32 s3, s5
110 ; GFX6-NEXT: s_mov_b32 s4, s6
111 ; GFX6-NEXT: s_mov_b32 s5, s7
112 ; GFX6-NEXT: s_mov_b32 s6, s8
113 ; GFX6-NEXT: s_mov_b32 s7, s9
114 ; GFX6-NEXT: s_mov_b32 s8, s10
115 ; GFX6-NEXT: s_mov_b32 s9, s11
116 ; GFX6-NEXT: s_mov_b32 s10, s12
117 ; GFX6-NEXT: s_mov_b32 s11, s13
118 ; GFX6-NEXT: s_wqm_b64 exec, exec
119 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
120 ; GFX6-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
121 ; GFX6-NEXT: s_waitcnt vmcnt(0)
122 ; GFX6-NEXT: ; return to shader part epilog
124 ; GFX10-LABEL: gather4_cl_o_2d:
125 ; GFX10: ; %bb.0: ; %main_body
126 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
127 ; GFX10-NEXT: s_mov_b32 s0, s2
128 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
129 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
130 ; GFX10-NEXT: s_mov_b32 s1, s3
131 ; GFX10-NEXT: s_mov_b32 s2, s4
132 ; GFX10-NEXT: s_mov_b32 s3, s5
133 ; GFX10-NEXT: s_mov_b32 s4, s6
134 ; GFX10-NEXT: s_mov_b32 s5, s7
135 ; GFX10-NEXT: s_mov_b32 s6, s8
136 ; GFX10-NEXT: s_mov_b32 s7, s9
137 ; GFX10-NEXT: s_mov_b32 s8, s10
138 ; GFX10-NEXT: s_mov_b32 s9, s11
139 ; GFX10-NEXT: s_mov_b32 s10, s12
140 ; GFX10-NEXT: s_mov_b32 s11, s13
141 ; GFX10-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
142 ; GFX10-NEXT: s_waitcnt vmcnt(0)
143 ; GFX10-NEXT: ; return to shader part epilog
145 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
149 define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
150 ; GFX6-LABEL: gather4_c_cl_o_2d:
151 ; GFX6: ; %bb.0: ; %main_body
152 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
153 ; GFX6-NEXT: s_mov_b32 s0, s2
154 ; GFX6-NEXT: s_mov_b32 s1, s3
155 ; GFX6-NEXT: s_mov_b32 s2, s4
156 ; GFX6-NEXT: s_mov_b32 s3, s5
157 ; GFX6-NEXT: s_mov_b32 s4, s6
158 ; GFX6-NEXT: s_mov_b32 s5, s7
159 ; GFX6-NEXT: s_mov_b32 s6, s8
160 ; GFX6-NEXT: s_mov_b32 s7, s9
161 ; GFX6-NEXT: s_mov_b32 s8, s10
162 ; GFX6-NEXT: s_mov_b32 s9, s11
163 ; GFX6-NEXT: s_mov_b32 s10, s12
164 ; GFX6-NEXT: s_mov_b32 s11, s13
165 ; GFX6-NEXT: s_wqm_b64 exec, exec
166 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
167 ; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
168 ; GFX6-NEXT: s_waitcnt vmcnt(0)
169 ; GFX6-NEXT: ; return to shader part epilog
171 ; GFX10-LABEL: gather4_c_cl_o_2d:
172 ; GFX10: ; %bb.0: ; %main_body
173 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
174 ; GFX10-NEXT: s_mov_b32 s0, s2
175 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
176 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
177 ; GFX10-NEXT: s_mov_b32 s1, s3
178 ; GFX10-NEXT: s_mov_b32 s2, s4
179 ; GFX10-NEXT: s_mov_b32 s3, s5
180 ; GFX10-NEXT: s_mov_b32 s4, s6
181 ; GFX10-NEXT: s_mov_b32 s5, s7
182 ; GFX10-NEXT: s_mov_b32 s6, s8
183 ; GFX10-NEXT: s_mov_b32 s7, s9
184 ; GFX10-NEXT: s_mov_b32 s8, s10
185 ; GFX10-NEXT: s_mov_b32 s9, s11
186 ; GFX10-NEXT: s_mov_b32 s10, s12
187 ; GFX10-NEXT: s_mov_b32 s11, s13
188 ; GFX10-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
189 ; GFX10-NEXT: s_waitcnt vmcnt(0)
190 ; GFX10-NEXT: ; return to shader part epilog
192 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
196 define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
197 ; GFX6-LABEL: gather4_b_o_2d:
198 ; GFX6: ; %bb.0: ; %main_body
199 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
200 ; GFX6-NEXT: s_mov_b32 s0, s2
201 ; GFX6-NEXT: s_mov_b32 s1, s3
202 ; GFX6-NEXT: s_mov_b32 s2, s4
203 ; GFX6-NEXT: s_mov_b32 s3, s5
204 ; GFX6-NEXT: s_mov_b32 s4, s6
205 ; GFX6-NEXT: s_mov_b32 s5, s7
206 ; GFX6-NEXT: s_mov_b32 s6, s8
207 ; GFX6-NEXT: s_mov_b32 s7, s9
208 ; GFX6-NEXT: s_mov_b32 s8, s10
209 ; GFX6-NEXT: s_mov_b32 s9, s11
210 ; GFX6-NEXT: s_mov_b32 s10, s12
211 ; GFX6-NEXT: s_mov_b32 s11, s13
212 ; GFX6-NEXT: s_wqm_b64 exec, exec
213 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
214 ; GFX6-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
215 ; GFX6-NEXT: s_waitcnt vmcnt(0)
216 ; GFX6-NEXT: ; return to shader part epilog
218 ; GFX10-LABEL: gather4_b_o_2d:
219 ; GFX10: ; %bb.0: ; %main_body
220 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
221 ; GFX10-NEXT: s_mov_b32 s0, s2
222 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
223 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
224 ; GFX10-NEXT: s_mov_b32 s1, s3
225 ; GFX10-NEXT: s_mov_b32 s2, s4
226 ; GFX10-NEXT: s_mov_b32 s3, s5
227 ; GFX10-NEXT: s_mov_b32 s4, s6
228 ; GFX10-NEXT: s_mov_b32 s5, s7
229 ; GFX10-NEXT: s_mov_b32 s6, s8
230 ; GFX10-NEXT: s_mov_b32 s7, s9
231 ; GFX10-NEXT: s_mov_b32 s8, s10
232 ; GFX10-NEXT: s_mov_b32 s9, s11
233 ; GFX10-NEXT: s_mov_b32 s10, s12
234 ; GFX10-NEXT: s_mov_b32 s11, s13
235 ; GFX10-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
236 ; GFX10-NEXT: s_waitcnt vmcnt(0)
237 ; GFX10-NEXT: ; return to shader part epilog
239 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
243 define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
244 ; GFX6-LABEL: gather4_c_b_o_2d:
245 ; GFX6: ; %bb.0: ; %main_body
246 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
247 ; GFX6-NEXT: s_mov_b32 s0, s2
248 ; GFX6-NEXT: s_mov_b32 s1, s3
249 ; GFX6-NEXT: s_mov_b32 s2, s4
250 ; GFX6-NEXT: s_mov_b32 s3, s5
251 ; GFX6-NEXT: s_mov_b32 s4, s6
252 ; GFX6-NEXT: s_mov_b32 s5, s7
253 ; GFX6-NEXT: s_mov_b32 s6, s8
254 ; GFX6-NEXT: s_mov_b32 s7, s9
255 ; GFX6-NEXT: s_mov_b32 s8, s10
256 ; GFX6-NEXT: s_mov_b32 s9, s11
257 ; GFX6-NEXT: s_mov_b32 s10, s12
258 ; GFX6-NEXT: s_mov_b32 s11, s13
259 ; GFX6-NEXT: s_wqm_b64 exec, exec
260 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
261 ; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
262 ; GFX6-NEXT: s_waitcnt vmcnt(0)
263 ; GFX6-NEXT: ; return to shader part epilog
265 ; GFX10-LABEL: gather4_c_b_o_2d:
266 ; GFX10: ; %bb.0: ; %main_body
267 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
268 ; GFX10-NEXT: s_mov_b32 s0, s2
269 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
270 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
271 ; GFX10-NEXT: s_mov_b32 s1, s3
272 ; GFX10-NEXT: s_mov_b32 s2, s4
273 ; GFX10-NEXT: s_mov_b32 s3, s5
274 ; GFX10-NEXT: s_mov_b32 s4, s6
275 ; GFX10-NEXT: s_mov_b32 s5, s7
276 ; GFX10-NEXT: s_mov_b32 s6, s8
277 ; GFX10-NEXT: s_mov_b32 s7, s9
278 ; GFX10-NEXT: s_mov_b32 s8, s10
279 ; GFX10-NEXT: s_mov_b32 s9, s11
280 ; GFX10-NEXT: s_mov_b32 s10, s12
281 ; GFX10-NEXT: s_mov_b32 s11, s13
282 ; GFX10-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
283 ; GFX10-NEXT: s_waitcnt vmcnt(0)
284 ; GFX10-NEXT: ; return to shader part epilog
286 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
290 define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
291 ; GFX6-LABEL: gather4_b_cl_o_2d:
292 ; GFX6: ; %bb.0: ; %main_body
293 ; GFX6-NEXT: s_mov_b32 s0, s2
294 ; GFX6-NEXT: s_mov_b32 s1, s3
295 ; GFX6-NEXT: s_mov_b32 s2, s4
296 ; GFX6-NEXT: s_mov_b32 s3, s5
297 ; GFX6-NEXT: s_mov_b32 s4, s6
298 ; GFX6-NEXT: s_mov_b32 s5, s7
299 ; GFX6-NEXT: s_mov_b32 s6, s8
300 ; GFX6-NEXT: s_mov_b32 s7, s9
301 ; GFX6-NEXT: s_mov_b32 s8, s10
302 ; GFX6-NEXT: s_mov_b32 s9, s11
303 ; GFX6-NEXT: s_mov_b32 s10, s12
304 ; GFX6-NEXT: s_mov_b32 s11, s13
305 ; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
306 ; GFX6-NEXT: s_waitcnt vmcnt(0)
307 ; GFX6-NEXT: ; return to shader part epilog
309 ; GFX10-LABEL: gather4_b_cl_o_2d:
310 ; GFX10: ; %bb.0: ; %main_body
311 ; GFX10-NEXT: s_mov_b32 s0, s2
312 ; GFX10-NEXT: s_mov_b32 s1, s3
313 ; GFX10-NEXT: s_mov_b32 s2, s4
314 ; GFX10-NEXT: s_mov_b32 s3, s5
315 ; GFX10-NEXT: s_mov_b32 s4, s6
316 ; GFX10-NEXT: s_mov_b32 s5, s7
317 ; GFX10-NEXT: s_mov_b32 s6, s8
318 ; GFX10-NEXT: s_mov_b32 s7, s9
319 ; GFX10-NEXT: s_mov_b32 s8, s10
320 ; GFX10-NEXT: s_mov_b32 s9, s11
321 ; GFX10-NEXT: s_mov_b32 s10, s12
322 ; GFX10-NEXT: s_mov_b32 s11, s13
323 ; GFX10-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
324 ; GFX10-NEXT: s_waitcnt vmcnt(0)
325 ; GFX10-NEXT: ; return to shader part epilog
327 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
331 define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
332 ; GFX6-LABEL: gather4_c_b_cl_o_2d:
333 ; GFX6: ; %bb.0: ; %main_body
334 ; GFX6-NEXT: s_mov_b64 s[14:15], exec
335 ; GFX6-NEXT: s_mov_b32 s0, s2
336 ; GFX6-NEXT: s_mov_b32 s1, s3
337 ; GFX6-NEXT: s_mov_b32 s2, s4
338 ; GFX6-NEXT: s_mov_b32 s3, s5
339 ; GFX6-NEXT: s_mov_b32 s4, s6
340 ; GFX6-NEXT: s_mov_b32 s5, s7
341 ; GFX6-NEXT: s_mov_b32 s6, s8
342 ; GFX6-NEXT: s_mov_b32 s7, s9
343 ; GFX6-NEXT: s_mov_b32 s8, s10
344 ; GFX6-NEXT: s_mov_b32 s9, s11
345 ; GFX6-NEXT: s_mov_b32 s10, s12
346 ; GFX6-NEXT: s_mov_b32 s11, s13
347 ; GFX6-NEXT: s_wqm_b64 exec, exec
348 ; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
349 ; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1
350 ; GFX6-NEXT: s_waitcnt vmcnt(0)
351 ; GFX6-NEXT: ; return to shader part epilog
353 ; GFX10-LABEL: gather4_c_b_cl_o_2d:
354 ; GFX10: ; %bb.0: ; %main_body
355 ; GFX10-NEXT: s_mov_b32 s1, exec_lo
356 ; GFX10-NEXT: s_mov_b32 s0, s2
357 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
358 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1
359 ; GFX10-NEXT: s_mov_b32 s1, s3
360 ; GFX10-NEXT: s_mov_b32 s2, s4
361 ; GFX10-NEXT: s_mov_b32 s3, s5
362 ; GFX10-NEXT: s_mov_b32 s4, s6
363 ; GFX10-NEXT: s_mov_b32 s5, s7
364 ; GFX10-NEXT: s_mov_b32 s6, s8
365 ; GFX10-NEXT: s_mov_b32 s7, s9
366 ; GFX10-NEXT: s_mov_b32 s8, s10
367 ; GFX10-NEXT: s_mov_b32 s9, s11
368 ; GFX10-NEXT: s_mov_b32 s10, s12
369 ; GFX10-NEXT: s_mov_b32 s11, s13
370 ; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
371 ; GFX10-NEXT: s_waitcnt vmcnt(0)
372 ; GFX10-NEXT: ; return to shader part epilog
374 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
378 define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
379 ; GFX6-LABEL: gather4_l_o_2d:
380 ; GFX6: ; %bb.0: ; %main_body
381 ; GFX6-NEXT: s_mov_b32 s0, s2
382 ; GFX6-NEXT: s_mov_b32 s1, s3
383 ; GFX6-NEXT: s_mov_b32 s2, s4
384 ; GFX6-NEXT: s_mov_b32 s3, s5
385 ; GFX6-NEXT: s_mov_b32 s4, s6
386 ; GFX6-NEXT: s_mov_b32 s5, s7
387 ; GFX6-NEXT: s_mov_b32 s6, s8
388 ; GFX6-NEXT: s_mov_b32 s7, s9
389 ; GFX6-NEXT: s_mov_b32 s8, s10
390 ; GFX6-NEXT: s_mov_b32 s9, s11
391 ; GFX6-NEXT: s_mov_b32 s10, s12
392 ; GFX6-NEXT: s_mov_b32 s11, s13
393 ; GFX6-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
394 ; GFX6-NEXT: s_waitcnt vmcnt(0)
395 ; GFX6-NEXT: ; return to shader part epilog
397 ; GFX10-LABEL: gather4_l_o_2d:
398 ; GFX10: ; %bb.0: ; %main_body
399 ; GFX10-NEXT: s_mov_b32 s0, s2
400 ; GFX10-NEXT: s_mov_b32 s1, s3
401 ; GFX10-NEXT: s_mov_b32 s2, s4
402 ; GFX10-NEXT: s_mov_b32 s3, s5
403 ; GFX10-NEXT: s_mov_b32 s4, s6
404 ; GFX10-NEXT: s_mov_b32 s5, s7
405 ; GFX10-NEXT: s_mov_b32 s6, s8
406 ; GFX10-NEXT: s_mov_b32 s7, s9
407 ; GFX10-NEXT: s_mov_b32 s8, s10
408 ; GFX10-NEXT: s_mov_b32 s9, s11
409 ; GFX10-NEXT: s_mov_b32 s10, s12
410 ; GFX10-NEXT: s_mov_b32 s11, s13
411 ; GFX10-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
412 ; GFX10-NEXT: s_waitcnt vmcnt(0)
413 ; GFX10-NEXT: ; return to shader part epilog
415 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
419 define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
420 ; GFX6-LABEL: gather4_c_l_o_2d:
421 ; GFX6: ; %bb.0: ; %main_body
422 ; GFX6-NEXT: s_mov_b32 s0, s2
423 ; GFX6-NEXT: s_mov_b32 s1, s3
424 ; GFX6-NEXT: s_mov_b32 s2, s4
425 ; GFX6-NEXT: s_mov_b32 s3, s5
426 ; GFX6-NEXT: s_mov_b32 s4, s6
427 ; GFX6-NEXT: s_mov_b32 s5, s7
428 ; GFX6-NEXT: s_mov_b32 s6, s8
429 ; GFX6-NEXT: s_mov_b32 s7, s9
430 ; GFX6-NEXT: s_mov_b32 s8, s10
431 ; GFX6-NEXT: s_mov_b32 s9, s11
432 ; GFX6-NEXT: s_mov_b32 s10, s12
433 ; GFX6-NEXT: s_mov_b32 s11, s13
434 ; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
435 ; GFX6-NEXT: s_waitcnt vmcnt(0)
436 ; GFX6-NEXT: ; return to shader part epilog
438 ; GFX10-LABEL: gather4_c_l_o_2d:
439 ; GFX10: ; %bb.0: ; %main_body
440 ; GFX10-NEXT: s_mov_b32 s0, s2
441 ; GFX10-NEXT: s_mov_b32 s1, s3
442 ; GFX10-NEXT: s_mov_b32 s2, s4
443 ; GFX10-NEXT: s_mov_b32 s3, s5
444 ; GFX10-NEXT: s_mov_b32 s4, s6
445 ; GFX10-NEXT: s_mov_b32 s5, s7
446 ; GFX10-NEXT: s_mov_b32 s6, s8
447 ; GFX10-NEXT: s_mov_b32 s7, s9
448 ; GFX10-NEXT: s_mov_b32 s8, s10
449 ; GFX10-NEXT: s_mov_b32 s9, s11
450 ; GFX10-NEXT: s_mov_b32 s10, s12
451 ; GFX10-NEXT: s_mov_b32 s11, s13
452 ; GFX10-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
453 ; GFX10-NEXT: s_waitcnt vmcnt(0)
454 ; GFX10-NEXT: ; return to shader part epilog
456 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
460 define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
461 ; GFX6-LABEL: gather4_lz_o_2d:
462 ; GFX6: ; %bb.0: ; %main_body
463 ; GFX6-NEXT: s_mov_b32 s0, s2
464 ; GFX6-NEXT: s_mov_b32 s1, s3
465 ; GFX6-NEXT: s_mov_b32 s2, s4
466 ; GFX6-NEXT: s_mov_b32 s3, s5
467 ; GFX6-NEXT: s_mov_b32 s4, s6
468 ; GFX6-NEXT: s_mov_b32 s5, s7
469 ; GFX6-NEXT: s_mov_b32 s6, s8
470 ; GFX6-NEXT: s_mov_b32 s7, s9
471 ; GFX6-NEXT: s_mov_b32 s8, s10
472 ; GFX6-NEXT: s_mov_b32 s9, s11
473 ; GFX6-NEXT: s_mov_b32 s10, s12
474 ; GFX6-NEXT: s_mov_b32 s11, s13
475 ; GFX6-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
476 ; GFX6-NEXT: s_waitcnt vmcnt(0)
477 ; GFX6-NEXT: ; return to shader part epilog
479 ; GFX10-LABEL: gather4_lz_o_2d:
480 ; GFX10: ; %bb.0: ; %main_body
481 ; GFX10-NEXT: s_mov_b32 s0, s2
482 ; GFX10-NEXT: s_mov_b32 s1, s3
483 ; GFX10-NEXT: s_mov_b32 s2, s4
484 ; GFX10-NEXT: s_mov_b32 s3, s5
485 ; GFX10-NEXT: s_mov_b32 s4, s6
486 ; GFX10-NEXT: s_mov_b32 s5, s7
487 ; GFX10-NEXT: s_mov_b32 s6, s8
488 ; GFX10-NEXT: s_mov_b32 s7, s9
489 ; GFX10-NEXT: s_mov_b32 s8, s10
490 ; GFX10-NEXT: s_mov_b32 s9, s11
491 ; GFX10-NEXT: s_mov_b32 s10, s12
492 ; GFX10-NEXT: s_mov_b32 s11, s13
493 ; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
494 ; GFX10-NEXT: s_waitcnt vmcnt(0)
495 ; GFX10-NEXT: ; return to shader part epilog
497 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
501 define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
502 ; GFX6-LABEL: gather4_c_lz_o_2d:
503 ; GFX6: ; %bb.0: ; %main_body
504 ; GFX6-NEXT: s_mov_b32 s0, s2
505 ; GFX6-NEXT: s_mov_b32 s1, s3
506 ; GFX6-NEXT: s_mov_b32 s2, s4
507 ; GFX6-NEXT: s_mov_b32 s3, s5
508 ; GFX6-NEXT: s_mov_b32 s4, s6
509 ; GFX6-NEXT: s_mov_b32 s5, s7
510 ; GFX6-NEXT: s_mov_b32 s6, s8
511 ; GFX6-NEXT: s_mov_b32 s7, s9
512 ; GFX6-NEXT: s_mov_b32 s8, s10
513 ; GFX6-NEXT: s_mov_b32 s9, s11
514 ; GFX6-NEXT: s_mov_b32 s10, s12
515 ; GFX6-NEXT: s_mov_b32 s11, s13
516 ; GFX6-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
517 ; GFX6-NEXT: s_waitcnt vmcnt(0)
518 ; GFX6-NEXT: ; return to shader part epilog
520 ; GFX10-LABEL: gather4_c_lz_o_2d:
521 ; GFX10: ; %bb.0: ; %main_body
522 ; GFX10-NEXT: s_mov_b32 s0, s2
523 ; GFX10-NEXT: s_mov_b32 s1, s3
524 ; GFX10-NEXT: s_mov_b32 s2, s4
525 ; GFX10-NEXT: s_mov_b32 s3, s5
526 ; GFX10-NEXT: s_mov_b32 s4, s6
527 ; GFX10-NEXT: s_mov_b32 s5, s7
528 ; GFX10-NEXT: s_mov_b32 s6, s8
529 ; GFX10-NEXT: s_mov_b32 s7, s9
530 ; GFX10-NEXT: s_mov_b32 s8, s10
531 ; GFX10-NEXT: s_mov_b32 s9, s11
532 ; GFX10-NEXT: s_mov_b32 s10, s12
533 ; GFX10-NEXT: s_mov_b32 s11, s13
534 ; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
535 ; GFX10-NEXT: s_waitcnt vmcnt(0)
536 ; GFX10-NEXT: ; return to shader part epilog
538 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
542 declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
543 declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
544 declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
545 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
546 declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
547 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
548 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
549 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
550 declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
551 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
552 declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
553 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
555 attributes #0 = { nounwind readonly }