1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
7 ; GFX9-LABEL: gather4_2d:
8 ; GFX9: ; %bb.0: ; %main_body
9 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
10 ; GFX9-NEXT: s_wqm_b64 exec, exec
11 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
12 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
13 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
14 ; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
15 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16 ; GFX9-NEXT: ; return to shader part epilog
18 ; GFX10-LABEL: gather4_2d:
19 ; GFX10: ; %bb.0: ; %main_body
20 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
21 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
22 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
23 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
24 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
25 ; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
26 ; GFX10-NEXT: s_waitcnt vmcnt(0)
27 ; GFX10-NEXT: ; return to shader part epilog
29 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
33 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
34 ; GFX9-LABEL: gather4_cube:
35 ; GFX9: ; %bb.0: ; %main_body
36 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
37 ; GFX9-NEXT: s_wqm_b64 exec, exec
38 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
39 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
40 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
41 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
42 ; GFX9-NEXT: s_waitcnt vmcnt(0)
43 ; GFX9-NEXT: ; return to shader part epilog
45 ; GFX10-LABEL: gather4_cube:
46 ; GFX10: ; %bb.0: ; %main_body
47 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
48 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
49 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
50 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0
51 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
52 ; GFX10-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
53 ; GFX10-NEXT: s_waitcnt vmcnt(0)
54 ; GFX10-NEXT: ; return to shader part epilog
56 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
60 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
61 ; GFX9-LABEL: gather4_2darray:
62 ; GFX9: ; %bb.0: ; %main_body
63 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
64 ; GFX9-NEXT: s_wqm_b64 exec, exec
65 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
66 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
67 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
68 ; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
69 ; GFX9-NEXT: s_waitcnt vmcnt(0)
70 ; GFX9-NEXT: ; return to shader part epilog
72 ; GFX10-LABEL: gather4_2darray:
73 ; GFX10: ; %bb.0: ; %main_body
74 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
75 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
76 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
77 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0
78 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
79 ; GFX10-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
80 ; GFX10-NEXT: s_waitcnt vmcnt(0)
81 ; GFX10-NEXT: ; return to shader part epilog
83 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
87 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
88 ; GFX9-LABEL: gather4_c_2d:
89 ; GFX9: ; %bb.0: ; %main_body
90 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
91 ; GFX9-NEXT: s_wqm_b64 exec, exec
92 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
93 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
94 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
95 ; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
96 ; GFX9-NEXT: s_waitcnt vmcnt(0)
97 ; GFX9-NEXT: ; return to shader part epilog
99 ; GFX10-LABEL: gather4_c_2d:
100 ; GFX10: ; %bb.0: ; %main_body
101 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
102 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
103 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
104 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
105 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
106 ; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
107 ; GFX10-NEXT: s_waitcnt vmcnt(0)
108 ; GFX10-NEXT: ; return to shader part epilog
110 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
114 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
115 ; GFX9-LABEL: gather4_cl_2d:
116 ; GFX9: ; %bb.0: ; %main_body
117 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
118 ; GFX9-NEXT: s_wqm_b64 exec, exec
119 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
120 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
121 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
122 ; GFX9-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
123 ; GFX9-NEXT: s_waitcnt vmcnt(0)
124 ; GFX9-NEXT: ; return to shader part epilog
126 ; GFX10-LABEL: gather4_cl_2d:
127 ; GFX10: ; %bb.0: ; %main_body
128 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
129 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
130 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
131 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0
132 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
133 ; GFX10-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
134 ; GFX10-NEXT: s_waitcnt vmcnt(0)
135 ; GFX10-NEXT: ; return to shader part epilog
137 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
141 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
142 ; GFX9-LABEL: gather4_c_cl_2d:
143 ; GFX9: ; %bb.0: ; %main_body
144 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
145 ; GFX9-NEXT: s_wqm_b64 exec, exec
146 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
147 ; GFX9-NEXT: v_mov_b32_e32 v3, v0
148 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1
149 ; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0
150 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
151 ; GFX9-NEXT: image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
152 ; GFX9-NEXT: s_waitcnt vmcnt(0)
153 ; GFX9-NEXT: ; return to shader part epilog
155 ; GFX10-LABEL: gather4_c_cl_2d:
156 ; GFX10: ; %bb.0: ; %main_body
157 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
158 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
159 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
160 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
161 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
162 ; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
163 ; GFX10-NEXT: s_waitcnt vmcnt(0)
164 ; GFX10-NEXT: ; return to shader part epilog
166 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
170 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
171 ; GFX9-LABEL: gather4_b_2d:
172 ; GFX9: ; %bb.0: ; %main_body
173 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
174 ; GFX9-NEXT: s_wqm_b64 exec, exec
175 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
176 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
177 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
178 ; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
179 ; GFX9-NEXT: s_waitcnt vmcnt(0)
180 ; GFX9-NEXT: ; return to shader part epilog
182 ; GFX10-LABEL: gather4_b_2d:
183 ; GFX10: ; %bb.0: ; %main_body
184 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
185 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
186 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
187 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
188 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
189 ; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
190 ; GFX10-NEXT: s_waitcnt vmcnt(0)
191 ; GFX10-NEXT: ; return to shader part epilog
193 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
197 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
198 ; GFX9-LABEL: gather4_c_b_2d:
199 ; GFX9: ; %bb.0: ; %main_body
200 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
201 ; GFX9-NEXT: s_wqm_b64 exec, exec
202 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
203 ; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
204 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
205 ; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
206 ; GFX9-NEXT: s_waitcnt vmcnt(0)
207 ; GFX9-NEXT: ; return to shader part epilog
209 ; GFX10-LABEL: gather4_c_b_2d:
210 ; GFX10: ; %bb.0: ; %main_body
211 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
212 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
213 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
214 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
215 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
216 ; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
217 ; GFX10-NEXT: s_waitcnt vmcnt(0)
218 ; GFX10-NEXT: ; return to shader part epilog
220 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
224 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
225 ; GFX9-LABEL: gather4_b_cl_2d:
226 ; GFX9: ; %bb.0: ; %main_body
227 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
228 ; GFX9-NEXT: s_wqm_b64 exec, exec
229 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
230 ; GFX9-NEXT: v_mov_b32_e32 v3, v0
231 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1
232 ; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0
233 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
234 ; GFX9-NEXT: image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
235 ; GFX9-NEXT: s_waitcnt vmcnt(0)
236 ; GFX9-NEXT: ; return to shader part epilog
238 ; GFX10-LABEL: gather4_b_cl_2d:
239 ; GFX10: ; %bb.0: ; %main_body
240 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
241 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
242 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
243 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
244 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
245 ; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
246 ; GFX10-NEXT: s_waitcnt vmcnt(0)
247 ; GFX10-NEXT: ; return to shader part epilog
249 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
253 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
254 ; GFX9-LABEL: gather4_c_b_cl_2d:
255 ; GFX9: ; %bb.0: ; %main_body
256 ; GFX9-NEXT: s_mov_b64 s[12:13], exec
257 ; GFX9-NEXT: s_wqm_b64 exec, exec
258 ; GFX9-NEXT: v_mov_b32_e32 v7, v4
259 ; GFX9-NEXT: v_mov_b32_e32 v4, v0
260 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
261 ; GFX9-NEXT: v_mov_b32_e32 v5, v1
262 ; GFX9-NEXT: v_lshl_or_b32 v6, v3, 16, v0
263 ; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
264 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
265 ; GFX9-NEXT: s_waitcnt vmcnt(0)
266 ; GFX9-NEXT: ; return to shader part epilog
268 ; GFX10-LABEL: gather4_c_b_cl_2d:
269 ; GFX10: ; %bb.0: ; %main_body
270 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
271 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
272 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
273 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
274 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
275 ; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
276 ; GFX10-NEXT: s_waitcnt vmcnt(0)
277 ; GFX10-NEXT: ; return to shader part epilog
279 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
283 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
284 ; GFX9-LABEL: gather4_l_2d:
285 ; GFX9: ; %bb.0: ; %main_body
286 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
287 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
288 ; GFX9-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
289 ; GFX9-NEXT: s_waitcnt vmcnt(0)
290 ; GFX9-NEXT: ; return to shader part epilog
292 ; GFX10-LABEL: gather4_l_2d:
293 ; GFX10: ; %bb.0: ; %main_body
294 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
295 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v0
296 ; GFX10-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
297 ; GFX10-NEXT: s_waitcnt vmcnt(0)
298 ; GFX10-NEXT: ; return to shader part epilog
300 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
304 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
305 ; GFX9-LABEL: gather4_c_l_2d:
306 ; GFX9: ; %bb.0: ; %main_body
307 ; GFX9-NEXT: v_mov_b32_e32 v5, v3
308 ; GFX9-NEXT: v_mov_b32_e32 v3, v0
309 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1
310 ; GFX9-NEXT: v_lshl_or_b32 v4, v2, 16, v0
311 ; GFX9-NEXT: image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
312 ; GFX9-NEXT: s_waitcnt vmcnt(0)
313 ; GFX9-NEXT: ; return to shader part epilog
315 ; GFX10-LABEL: gather4_c_l_2d:
316 ; GFX10: ; %bb.0: ; %main_body
317 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
318 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
319 ; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
320 ; GFX10-NEXT: s_waitcnt vmcnt(0)
321 ; GFX10-NEXT: ; return to shader part epilog
323 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
327 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
328 ; GFX9-LABEL: gather4_lz_2d:
329 ; GFX9: ; %bb.0: ; %main_body
330 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
331 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
332 ; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
333 ; GFX9-NEXT: s_waitcnt vmcnt(0)
334 ; GFX9-NEXT: ; return to shader part epilog
336 ; GFX10-LABEL: gather4_lz_2d:
337 ; GFX10: ; %bb.0: ; %main_body
338 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
339 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
340 ; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
341 ; GFX10-NEXT: s_waitcnt vmcnt(0)
342 ; GFX10-NEXT: ; return to shader part epilog
344 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
348 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
349 ; GFX9-LABEL: gather4_c_lz_2d:
350 ; GFX9: ; %bb.0: ; %main_body
351 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
352 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
353 ; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
354 ; GFX9-NEXT: s_waitcnt vmcnt(0)
355 ; GFX9-NEXT: ; return to shader part epilog
357 ; GFX10-LABEL: gather4_c_lz_2d:
358 ; GFX10: ; %bb.0: ; %main_body
359 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
360 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
361 ; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
362 ; GFX10-NEXT: s_waitcnt vmcnt(0)
363 ; GFX10-NEXT: ; return to shader part epilog
365 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
369 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
370 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
371 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
373 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
374 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
375 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
377 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
378 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
379 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
380 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
382 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
383 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
385 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
386 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
388 attributes #0 = { nounwind }
389 attributes #1 = { nounwind readonly }
390 attributes #2 = { nounwind readnone }