1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
7 ; GFX9-LABEL: gather4_2d:
8 ; GFX9: ; %bb.0: ; %main_body
9 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
10 ; GFX9-NEXT: s_wqm_b64 exec, exec
11 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
12 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
13 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
14 ; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
15 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16 ; GFX9-NEXT: ; return to shader part epilog
18 ; GFX10-LABEL: gather4_2d:
19 ; GFX10: ; %bb.0: ; %main_body
20 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
21 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
22 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
23 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
24 ; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
25 ; GFX10-NEXT: s_waitcnt vmcnt(0)
26 ; GFX10-NEXT: ; return to shader part epilog
28 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
32 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
33 ; GFX9-LABEL: gather4_cube:
34 ; GFX9: ; %bb.0: ; %main_body
35 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
36 ; GFX9-NEXT: s_wqm_b64 exec, exec
37 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
38 ; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
39 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
40 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
41 ; GFX9-NEXT: s_waitcnt vmcnt(0)
42 ; GFX9-NEXT: ; return to shader part epilog
44 ; GFX10-LABEL: gather4_cube:
45 ; GFX10: ; %bb.0: ; %main_body
46 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
47 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
48 ; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
49 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
50 ; GFX10-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
51 ; GFX10-NEXT: s_waitcnt vmcnt(0)
52 ; GFX10-NEXT: ; return to shader part epilog
54 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
58 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
59 ; GFX9-LABEL: gather4_2darray:
60 ; GFX9: ; %bb.0: ; %main_body
61 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
62 ; GFX9-NEXT: s_wqm_b64 exec, exec
63 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
64 ; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
65 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
66 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
67 ; GFX9-NEXT: s_waitcnt vmcnt(0)
68 ; GFX9-NEXT: ; return to shader part epilog
70 ; GFX10-LABEL: gather4_2darray:
71 ; GFX10: ; %bb.0: ; %main_body
72 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
73 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
74 ; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
75 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
76 ; GFX10-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
77 ; GFX10-NEXT: s_waitcnt vmcnt(0)
78 ; GFX10-NEXT: ; return to shader part epilog
80 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
84 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
85 ; GFX9-LABEL: gather4_c_2d:
86 ; GFX9: ; %bb.0: ; %main_body
87 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
88 ; GFX9-NEXT: s_wqm_b64 exec, exec
89 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
90 ; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
91 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
92 ; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
93 ; GFX9-NEXT: s_waitcnt vmcnt(0)
94 ; GFX9-NEXT: ; return to shader part epilog
96 ; GFX10-LABEL: gather4_c_2d:
97 ; GFX10: ; %bb.0: ; %main_body
98 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
99 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
100 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
101 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
102 ; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
103 ; GFX10-NEXT: s_waitcnt vmcnt(0)
104 ; GFX10-NEXT: ; return to shader part epilog
106 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
110 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
111 ; GFX9-LABEL: gather4_cl_2d:
112 ; GFX9: ; %bb.0: ; %main_body
113 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
114 ; GFX9-NEXT: s_wqm_b64 exec, exec
115 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
116 ; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
117 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
118 ; GFX9-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
119 ; GFX9-NEXT: s_waitcnt vmcnt(0)
120 ; GFX9-NEXT: ; return to shader part epilog
122 ; GFX10-LABEL: gather4_cl_2d:
123 ; GFX10: ; %bb.0: ; %main_body
124 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
125 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
126 ; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
127 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
128 ; GFX10-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
129 ; GFX10-NEXT: s_waitcnt vmcnt(0)
130 ; GFX10-NEXT: ; return to shader part epilog
132 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
136 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
137 ; GFX9-LABEL: gather4_c_cl_2d:
138 ; GFX9: ; %bb.0: ; %main_body
139 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
140 ; GFX9-NEXT: s_wqm_b64 exec, exec
141 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
142 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
143 ; GFX9-NEXT: v_mov_b32_e32 v3, v0
144 ; GFX9-NEXT: v_perm_b32 v4, v2, v1, s14
145 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
146 ; GFX9-NEXT: image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
147 ; GFX9-NEXT: s_waitcnt vmcnt(0)
148 ; GFX9-NEXT: ; return to shader part epilog
150 ; GFX10-LABEL: gather4_c_cl_2d:
151 ; GFX10: ; %bb.0: ; %main_body
152 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
153 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
154 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
155 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
156 ; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
157 ; GFX10-NEXT: s_waitcnt vmcnt(0)
158 ; GFX10-NEXT: ; return to shader part epilog
160 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
164 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
165 ; GFX9-LABEL: gather4_b_2d:
166 ; GFX9: ; %bb.0: ; %main_body
167 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
168 ; GFX9-NEXT: s_wqm_b64 exec, exec
169 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
170 ; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
171 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
172 ; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
173 ; GFX9-NEXT: s_waitcnt vmcnt(0)
174 ; GFX9-NEXT: ; return to shader part epilog
176 ; GFX10-LABEL: gather4_b_2d:
177 ; GFX10: ; %bb.0: ; %main_body
178 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
179 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
180 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
181 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
182 ; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
183 ; GFX10-NEXT: s_waitcnt vmcnt(0)
184 ; GFX10-NEXT: ; return to shader part epilog
186 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
190 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
191 ; GFX9-LABEL: gather4_c_b_2d:
192 ; GFX9: ; %bb.0: ; %main_body
193 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
194 ; GFX9-NEXT: s_wqm_b64 exec, exec
195 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
196 ; GFX9-NEXT: v_perm_b32 v2, v3, v2, s14
197 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
198 ; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
199 ; GFX9-NEXT: s_waitcnt vmcnt(0)
200 ; GFX9-NEXT: ; return to shader part epilog
202 ; GFX10-LABEL: gather4_c_b_2d:
203 ; GFX10: ; %bb.0: ; %main_body
204 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
205 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
206 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
207 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
208 ; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
209 ; GFX10-NEXT: s_waitcnt vmcnt(0)
210 ; GFX10-NEXT: ; return to shader part epilog
212 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
216 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
217 ; GFX9-LABEL: gather4_b_cl_2d:
218 ; GFX9: ; %bb.0: ; %main_body
219 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
220 ; GFX9-NEXT: s_wqm_b64 exec, exec
221 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
222 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
223 ; GFX9-NEXT: v_mov_b32_e32 v3, v0
224 ; GFX9-NEXT: v_perm_b32 v4, v2, v1, s14
225 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
226 ; GFX9-NEXT: image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
227 ; GFX9-NEXT: s_waitcnt vmcnt(0)
228 ; GFX9-NEXT: ; return to shader part epilog
230 ; GFX10-LABEL: gather4_b_cl_2d:
231 ; GFX10: ; %bb.0: ; %main_body
232 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
233 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
234 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
235 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
236 ; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
237 ; GFX10-NEXT: s_waitcnt vmcnt(0)
238 ; GFX10-NEXT: ; return to shader part epilog
240 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
244 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
245 ; GFX9-LABEL: gather4_c_b_cl_2d:
246 ; GFX9: ; %bb.0: ; %main_body
247 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
248 ; GFX9-NEXT: s_wqm_b64 exec, exec
249 ; GFX9-NEXT: s_mov_b32 s14, 0x5040100
250 ; GFX9-NEXT: v_mov_b32_e32 v7, v4
251 ; GFX9-NEXT: v_mov_b32_e32 v5, v1
252 ; GFX9-NEXT: v_mov_b32_e32 v4, v0
253 ; GFX9-NEXT: v_perm_b32 v6, v3, v2, s14
254 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
255 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
256 ; GFX9-NEXT: s_waitcnt vmcnt(0)
257 ; GFX9-NEXT: ; return to shader part epilog
259 ; GFX10-LABEL: gather4_c_b_cl_2d:
260 ; GFX10: ; %bb.0: ; %main_body
261 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
262 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
263 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
264 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
265 ; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
266 ; GFX10-NEXT: s_waitcnt vmcnt(0)
267 ; GFX10-NEXT: ; return to shader part epilog
269 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
273 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
274 ; GFX9-LABEL: gather4_l_2d:
275 ; GFX9: ; %bb.0: ; %main_body
276 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
277 ; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
278 ; GFX9-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
279 ; GFX9-NEXT: s_waitcnt vmcnt(0)
280 ; GFX9-NEXT: ; return to shader part epilog
282 ; GFX10-LABEL: gather4_l_2d:
283 ; GFX10: ; %bb.0: ; %main_body
284 ; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
285 ; GFX10-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
286 ; GFX10-NEXT: s_waitcnt vmcnt(0)
287 ; GFX10-NEXT: ; return to shader part epilog
289 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
293 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
294 ; GFX9-LABEL: gather4_c_l_2d:
295 ; GFX9: ; %bb.0: ; %main_body
296 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
297 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
298 ; GFX9-NEXT: v_mov_b32_e32 v3, v0
299 ; GFX9-NEXT: v_perm_b32 v4, v2, v1, s12
300 ; GFX9-NEXT: image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
301 ; GFX9-NEXT: s_waitcnt vmcnt(0)
302 ; GFX9-NEXT: ; return to shader part epilog
304 ; GFX10-LABEL: gather4_c_l_2d:
305 ; GFX10: ; %bb.0: ; %main_body
306 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
307 ; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
308 ; GFX10-NEXT: s_waitcnt vmcnt(0)
309 ; GFX10-NEXT: ; return to shader part epilog
311 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
315 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
316 ; GFX9-LABEL: gather4_lz_2d:
317 ; GFX9: ; %bb.0: ; %main_body
318 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
319 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
320 ; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
321 ; GFX9-NEXT: s_waitcnt vmcnt(0)
322 ; GFX9-NEXT: ; return to shader part epilog
324 ; GFX10-LABEL: gather4_lz_2d:
325 ; GFX10: ; %bb.0: ; %main_body
326 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
327 ; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
328 ; GFX10-NEXT: s_waitcnt vmcnt(0)
329 ; GFX10-NEXT: ; return to shader part epilog
331 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
335 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
336 ; GFX9-LABEL: gather4_c_lz_2d:
337 ; GFX9: ; %bb.0: ; %main_body
338 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
339 ; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
340 ; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
341 ; GFX9-NEXT: s_waitcnt vmcnt(0)
342 ; GFX9-NEXT: ; return to shader part epilog
344 ; GFX10-LABEL: gather4_c_lz_2d:
345 ; GFX10: ; %bb.0: ; %main_body
346 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
347 ; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
348 ; GFX10-NEXT: s_waitcnt vmcnt(0)
349 ; GFX10-NEXT: ; return to shader part epilog
351 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
355 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
356 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
357 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
359 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
360 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
361 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
363 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
364 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
365 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
366 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
368 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
369 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
371 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
372 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
374 attributes #0 = { nounwind }
375 attributes #1 = { nounwind readonly }
376 attributes #2 = { nounwind readnone }