1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
8 ; VERDE-LABEL: sample_1d:
9 ; VERDE: ; %bb.0: ; %main_body
10 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
11 ; VERDE-NEXT: s_wqm_b64 exec, exec
12 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
13 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
14 ; VERDE-NEXT: s_waitcnt vmcnt(0)
15 ; VERDE-NEXT: ; return to shader part epilog
17 ; GFX6789-LABEL: sample_1d:
18 ; GFX6789: ; %bb.0: ; %main_body
19 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
20 ; GFX6789-NEXT: s_wqm_b64 exec, exec
21 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
22 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
23 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
24 ; GFX6789-NEXT: ; return to shader part epilog
26 ; GFX10PLUS-LABEL: sample_1d:
27 ; GFX10PLUS: ; %bb.0: ; %main_body
28 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
29 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
30 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
31 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
32 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
33 ; GFX10PLUS-NEXT: ; return to shader part epilog
35 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
39 define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
40 ; VERDE-LABEL: sample_1d_tfe:
41 ; VERDE: ; %bb.0: ; %main_body
42 ; VERDE-NEXT: s_mov_b64 s[14:15], exec
43 ; VERDE-NEXT: s_wqm_b64 exec, exec
44 ; VERDE-NEXT: v_mov_b32_e32 v5, v0
45 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
46 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
47 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
48 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
49 ; VERDE-NEXT: v_mov_b32_e32 v4, v0
50 ; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
51 ; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
52 ; VERDE-NEXT: s_mov_b32 s15, 0xf000
53 ; VERDE-NEXT: s_mov_b32 s14, -1
54 ; VERDE-NEXT: s_waitcnt vmcnt(0)
55 ; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
56 ; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
57 ; VERDE-NEXT: ; return to shader part epilog
59 ; GFX6789-LABEL: sample_1d_tfe:
60 ; GFX6789: ; %bb.0: ; %main_body
61 ; GFX6789-NEXT: s_mov_b64 s[14:15], exec
62 ; GFX6789-NEXT: s_wqm_b64 exec, exec
63 ; GFX6789-NEXT: v_mov_b32_e32 v6, 0
64 ; GFX6789-NEXT: v_mov_b32_e32 v5, v0
65 ; GFX6789-NEXT: v_mov_b32_e32 v7, v6
66 ; GFX6789-NEXT: v_mov_b32_e32 v8, v6
67 ; GFX6789-NEXT: v_mov_b32_e32 v9, v6
68 ; GFX6789-NEXT: v_mov_b32_e32 v10, v6
69 ; GFX6789-NEXT: v_mov_b32_e32 v0, v6
70 ; GFX6789-NEXT: v_mov_b32_e32 v1, v7
71 ; GFX6789-NEXT: v_mov_b32_e32 v2, v8
72 ; GFX6789-NEXT: v_mov_b32_e32 v3, v9
73 ; GFX6789-NEXT: v_mov_b32_e32 v4, v10
74 ; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
75 ; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
76 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
77 ; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
78 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
79 ; GFX6789-NEXT: ; return to shader part epilog
81 ; GFX10-LABEL: sample_1d_tfe:
82 ; GFX10: ; %bb.0: ; %main_body
83 ; GFX10-NEXT: s_mov_b32 s14, exec_lo
84 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
85 ; GFX10-NEXT: v_mov_b32_e32 v6, 0
86 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
87 ; GFX10-NEXT: v_mov_b32_e32 v7, v6
88 ; GFX10-NEXT: v_mov_b32_e32 v8, v6
89 ; GFX10-NEXT: v_mov_b32_e32 v9, v6
90 ; GFX10-NEXT: v_mov_b32_e32 v10, v6
91 ; GFX10-NEXT: v_mov_b32_e32 v0, v6
92 ; GFX10-NEXT: v_mov_b32_e32 v1, v7
93 ; GFX10-NEXT: v_mov_b32_e32 v2, v8
94 ; GFX10-NEXT: v_mov_b32_e32 v3, v9
95 ; GFX10-NEXT: v_mov_b32_e32 v4, v10
96 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
97 ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
98 ; GFX10-NEXT: s_waitcnt vmcnt(0)
99 ; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
100 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
101 ; GFX10-NEXT: ; return to shader part epilog
103 ; GFX11-LABEL: sample_1d_tfe:
104 ; GFX11: ; %bb.0: ; %main_body
105 ; GFX11-NEXT: s_mov_b32 s14, exec_lo
106 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
107 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
108 ; GFX11-NEXT: v_mov_b32_e32 v9, v6
109 ; GFX11-NEXT: v_mov_b32_e32 v10, v6
110 ; GFX11-NEXT: v_mov_b32_e32 v8, v6
111 ; GFX11-NEXT: v_mov_b32_e32 v7, v6
112 ; GFX11-NEXT: v_mov_b32_e32 v0, v6
113 ; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7
114 ; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10
115 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
116 ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
117 ; GFX11-NEXT: s_waitcnt vmcnt(0)
118 ; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
119 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
120 ; GFX11-NEXT: ; return to shader part epilog
122 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
123 %v.vec = extractvalue {<4 x float>, i32} %v, 0
124 %v.err = extractvalue {<4 x float>, i32} %v, 1
125 store i32 %v.err, i32 addrspace(1)* %out, align 4
126 ret <4 x float> %v.vec
129 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
130 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
131 ; VERDE: ; %bb.0: ; %main_body
132 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
133 ; VERDE-NEXT: s_wqm_b64 exec, exec
134 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
135 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
136 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
137 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
138 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
139 ; VERDE-NEXT: s_waitcnt vmcnt(0)
140 ; VERDE-NEXT: ; return to shader part epilog
142 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
143 ; GFX6789: ; %bb.0: ; %main_body
144 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
145 ; GFX6789-NEXT: s_wqm_b64 exec, exec
146 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
147 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
148 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
149 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
150 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
151 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
152 ; GFX6789-NEXT: ; return to shader part epilog
154 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1:
155 ; GFX10PLUS: ; %bb.0: ; %main_body
156 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
157 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
158 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
159 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
160 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
161 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
162 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
163 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
164 ; GFX10PLUS-NEXT: ; return to shader part epilog
166 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
167 %res.vec = extractvalue {<4 x float>,i32} %v, 0
168 %res.f = extractelement <4 x float> %res.vec, i32 0
169 %res.err = extractvalue {<4 x float>,i32} %v, 1
170 %res.errf = bitcast i32 %res.err to float
171 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
172 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
176 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
177 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
178 ; VERDE: ; %bb.0: ; %main_body
179 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
180 ; VERDE-NEXT: s_wqm_b64 exec, exec
181 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
182 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
183 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
184 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
185 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
186 ; VERDE-NEXT: s_waitcnt vmcnt(0)
187 ; VERDE-NEXT: ; return to shader part epilog
189 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
190 ; GFX6789: ; %bb.0: ; %main_body
191 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
192 ; GFX6789-NEXT: s_wqm_b64 exec, exec
193 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
194 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
195 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
196 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
197 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
198 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
199 ; GFX6789-NEXT: ; return to shader part epilog
201 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2:
202 ; GFX10PLUS: ; %bb.0: ; %main_body
203 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
204 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
205 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
206 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
207 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
208 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
209 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
210 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
211 ; GFX10PLUS-NEXT: ; return to shader part epilog
213 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
214 %res.vec = extractvalue {<4 x float>,i32} %v, 0
215 %res.f = extractelement <4 x float> %res.vec, i32 1
216 %res.err = extractvalue {<4 x float>,i32} %v, 1
217 %res.errf = bitcast i32 %res.err to float
218 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
219 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
223 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
224 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
225 ; VERDE: ; %bb.0: ; %main_body
226 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
227 ; VERDE-NEXT: s_wqm_b64 exec, exec
228 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
229 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
230 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
231 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
232 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
233 ; VERDE-NEXT: s_waitcnt vmcnt(0)
234 ; VERDE-NEXT: ; return to shader part epilog
236 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
237 ; GFX6789: ; %bb.0: ; %main_body
238 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
239 ; GFX6789-NEXT: s_wqm_b64 exec, exec
240 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
241 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
242 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
243 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
244 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
245 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
246 ; GFX6789-NEXT: ; return to shader part epilog
248 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3:
249 ; GFX10PLUS: ; %bb.0: ; %main_body
250 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
251 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
252 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
253 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
254 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
255 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
256 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
257 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
258 ; GFX10PLUS-NEXT: ; return to shader part epilog
260 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
261 %res.vec = extractvalue {<4 x float>,i32} %v, 0
262 %res.f = extractelement <4 x float> %res.vec, i32 2
263 %res.err = extractvalue {<4 x float>,i32} %v, 1
264 %res.errf = bitcast i32 %res.err to float
265 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
266 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
270 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
271 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
272 ; VERDE: ; %bb.0: ; %main_body
273 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
274 ; VERDE-NEXT: s_wqm_b64 exec, exec
275 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
276 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
277 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
278 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
279 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
280 ; VERDE-NEXT: s_waitcnt vmcnt(0)
281 ; VERDE-NEXT: ; return to shader part epilog
283 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
284 ; GFX6789: ; %bb.0: ; %main_body
285 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
286 ; GFX6789-NEXT: s_wqm_b64 exec, exec
287 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
288 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
289 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
290 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
291 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
292 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
293 ; GFX6789-NEXT: ; return to shader part epilog
295 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4:
296 ; GFX10PLUS: ; %bb.0: ; %main_body
297 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
298 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
299 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
300 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
301 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
302 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
303 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
304 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
305 ; GFX10PLUS-NEXT: ; return to shader part epilog
307 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
308 %res.vec = extractvalue {<4 x float>,i32} %v, 0
309 %res.f = extractelement <4 x float> %res.vec, i32 3
310 %res.err = extractvalue {<4 x float>,i32} %v, 1
311 %res.errf = bitcast i32 %res.err to float
312 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
313 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
317 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
318 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
319 ; VERDE: ; %bb.0: ; %main_body
320 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
321 ; VERDE-NEXT: s_wqm_b64 exec, exec
322 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
323 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
324 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
325 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
326 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
327 ; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
328 ; VERDE-NEXT: s_waitcnt vmcnt(0)
329 ; VERDE-NEXT: ; return to shader part epilog
331 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
332 ; GFX6789: ; %bb.0: ; %main_body
333 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
334 ; GFX6789-NEXT: s_wqm_b64 exec, exec
335 ; GFX6789-NEXT: v_mov_b32_e32 v3, v0
336 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
337 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
338 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
339 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
340 ; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
341 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
342 ; GFX6789-NEXT: ; return to shader part epilog
344 ; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
345 ; GFX10: ; %bb.0: ; %main_body
346 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
347 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
348 ; GFX10-NEXT: v_mov_b32_e32 v3, v0
349 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
350 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
351 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
352 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
353 ; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
354 ; GFX10-NEXT: s_waitcnt vmcnt(0)
355 ; GFX10-NEXT: ; return to shader part epilog
357 ; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12:
358 ; GFX11: ; %bb.0: ; %main_body
359 ; GFX11-NEXT: s_mov_b32 s12, exec_lo
360 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
361 ; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
362 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
363 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
364 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
365 ; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
366 ; GFX11-NEXT: s_waitcnt vmcnt(0)
367 ; GFX11-NEXT: ; return to shader part epilog
369 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
370 %res.vec = extractvalue {<4 x float>,i32} %v, 0
371 %res.f1 = extractelement <4 x float> %res.vec, i32 0
372 %res.f2 = extractelement <4 x float> %res.vec, i32 1
373 %res.err = extractvalue {<4 x float>,i32} %v, 1
374 %res.errf = bitcast i32 %res.err to float
375 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
376 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
377 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
381 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
382 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
383 ; VERDE: ; %bb.0: ; %main_body
384 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
385 ; VERDE-NEXT: s_wqm_b64 exec, exec
386 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
387 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
388 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
389 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
390 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
391 ; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
392 ; VERDE-NEXT: s_waitcnt vmcnt(0)
393 ; VERDE-NEXT: ; return to shader part epilog
395 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
396 ; GFX6789: ; %bb.0: ; %main_body
397 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
398 ; GFX6789-NEXT: s_wqm_b64 exec, exec
399 ; GFX6789-NEXT: v_mov_b32_e32 v3, v0
400 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
401 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
402 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
403 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
404 ; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
405 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
406 ; GFX6789-NEXT: ; return to shader part epilog
408 ; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
409 ; GFX10: ; %bb.0: ; %main_body
410 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
411 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
412 ; GFX10-NEXT: v_mov_b32_e32 v3, v0
413 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
414 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
415 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
416 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
417 ; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
418 ; GFX10-NEXT: s_waitcnt vmcnt(0)
419 ; GFX10-NEXT: ; return to shader part epilog
421 ; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24:
422 ; GFX11: ; %bb.0: ; %main_body
423 ; GFX11-NEXT: s_mov_b32 s12, exec_lo
424 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
425 ; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
426 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
427 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
428 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
429 ; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
430 ; GFX11-NEXT: s_waitcnt vmcnt(0)
431 ; GFX11-NEXT: ; return to shader part epilog
433 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
434 %res.vec = extractvalue {<4 x float>,i32} %v, 0
435 %res.f1 = extractelement <4 x float> %res.vec, i32 1
436 %res.f2 = extractelement <4 x float> %res.vec, i32 3
437 %res.err = extractvalue {<4 x float>,i32} %v, 1
438 %res.errf = bitcast i32 %res.err to float
439 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
440 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
441 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
445 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
446 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
447 ; VERDE: ; %bb.0: ; %main_body
448 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
449 ; VERDE-NEXT: s_wqm_b64 exec, exec
450 ; VERDE-NEXT: v_mov_b32_e32 v4, v0
451 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
452 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
453 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
454 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
455 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
456 ; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
457 ; VERDE-NEXT: s_waitcnt vmcnt(0)
458 ; VERDE-NEXT: ; return to shader part epilog
460 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
461 ; GFX6789: ; %bb.0: ; %main_body
462 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
463 ; GFX6789-NEXT: s_wqm_b64 exec, exec
464 ; GFX6789-NEXT: v_mov_b32_e32 v4, v0
465 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
466 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
467 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
468 ; GFX6789-NEXT: v_mov_b32_e32 v3, v0
469 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
470 ; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
471 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
472 ; GFX6789-NEXT: ; return to shader part epilog
474 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134:
475 ; GFX10PLUS: ; %bb.0: ; %main_body
476 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
477 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
478 ; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
479 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
480 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
481 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
482 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0
483 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
484 ; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
485 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
486 ; GFX10PLUS-NEXT: ; return to shader part epilog
488 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
489 %res.vec = extractvalue {<4 x float>,i32} %v, 0
490 %res.f1 = extractelement <4 x float> %res.vec, i32 0
491 %res.f2 = extractelement <4 x float> %res.vec, i32 2
492 %res.f3 = extractelement <4 x float> %res.vec, i32 3
493 %res.err = extractvalue {<4 x float>,i32} %v, 1
494 %res.errf = bitcast i32 %res.err to float
495 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
496 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
497 %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
498 %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
502 define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
503 ; VERDE-LABEL: sample_1d_lwe:
504 ; VERDE: ; %bb.0: ; %main_body
505 ; VERDE-NEXT: s_mov_b64 s[14:15], exec
506 ; VERDE-NEXT: s_wqm_b64 exec, exec
507 ; VERDE-NEXT: v_mov_b32_e32 v5, v0
508 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
509 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
510 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
511 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
512 ; VERDE-NEXT: v_mov_b32_e32 v4, v0
513 ; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
514 ; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
515 ; VERDE-NEXT: s_mov_b32 s15, 0xf000
516 ; VERDE-NEXT: s_mov_b32 s14, -1
517 ; VERDE-NEXT: s_waitcnt vmcnt(0)
518 ; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
519 ; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
520 ; VERDE-NEXT: ; return to shader part epilog
522 ; GFX6789-LABEL: sample_1d_lwe:
523 ; GFX6789: ; %bb.0: ; %main_body
524 ; GFX6789-NEXT: s_mov_b64 s[14:15], exec
525 ; GFX6789-NEXT: s_wqm_b64 exec, exec
526 ; GFX6789-NEXT: v_mov_b32_e32 v6, 0
527 ; GFX6789-NEXT: v_mov_b32_e32 v5, v0
528 ; GFX6789-NEXT: v_mov_b32_e32 v7, v6
529 ; GFX6789-NEXT: v_mov_b32_e32 v8, v6
530 ; GFX6789-NEXT: v_mov_b32_e32 v9, v6
531 ; GFX6789-NEXT: v_mov_b32_e32 v10, v6
532 ; GFX6789-NEXT: v_mov_b32_e32 v0, v6
533 ; GFX6789-NEXT: v_mov_b32_e32 v1, v7
534 ; GFX6789-NEXT: v_mov_b32_e32 v2, v8
535 ; GFX6789-NEXT: v_mov_b32_e32 v3, v9
536 ; GFX6789-NEXT: v_mov_b32_e32 v4, v10
537 ; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
538 ; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
539 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
540 ; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
541 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
542 ; GFX6789-NEXT: ; return to shader part epilog
544 ; GFX10-LABEL: sample_1d_lwe:
545 ; GFX10: ; %bb.0: ; %main_body
546 ; GFX10-NEXT: s_mov_b32 s14, exec_lo
547 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
548 ; GFX10-NEXT: v_mov_b32_e32 v6, 0
549 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
550 ; GFX10-NEXT: v_mov_b32_e32 v7, v6
551 ; GFX10-NEXT: v_mov_b32_e32 v8, v6
552 ; GFX10-NEXT: v_mov_b32_e32 v9, v6
553 ; GFX10-NEXT: v_mov_b32_e32 v10, v6
554 ; GFX10-NEXT: v_mov_b32_e32 v0, v6
555 ; GFX10-NEXT: v_mov_b32_e32 v1, v7
556 ; GFX10-NEXT: v_mov_b32_e32 v2, v8
557 ; GFX10-NEXT: v_mov_b32_e32 v3, v9
558 ; GFX10-NEXT: v_mov_b32_e32 v4, v10
559 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
560 ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
561 ; GFX10-NEXT: s_waitcnt vmcnt(0)
562 ; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
563 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
564 ; GFX10-NEXT: ; return to shader part epilog
566 ; GFX11-LABEL: sample_1d_lwe:
567 ; GFX11: ; %bb.0: ; %main_body
568 ; GFX11-NEXT: s_mov_b32 s14, exec_lo
569 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
570 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
571 ; GFX11-NEXT: v_mov_b32_e32 v9, v6
572 ; GFX11-NEXT: v_mov_b32_e32 v10, v6
573 ; GFX11-NEXT: v_mov_b32_e32 v8, v6
574 ; GFX11-NEXT: v_mov_b32_e32 v7, v6
575 ; GFX11-NEXT: v_mov_b32_e32 v0, v6
576 ; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v1, v7
577 ; GFX11-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v10
578 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
579 ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
580 ; GFX11-NEXT: s_waitcnt vmcnt(0)
581 ; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
582 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
583 ; GFX11-NEXT: ; return to shader part epilog
585 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
586 %v.vec = extractvalue {<4 x float>, i32} %v, 0
587 %v.err = extractvalue {<4 x float>, i32} %v, 1
588 store i32 %v.err, i32 addrspace(1)* %out, align 4
589 ret <4 x float> %v.vec
592 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
593 ; VERDE-LABEL: sample_2d:
594 ; VERDE: ; %bb.0: ; %main_body
595 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
596 ; VERDE-NEXT: s_wqm_b64 exec, exec
597 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
598 ; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
599 ; VERDE-NEXT: s_waitcnt vmcnt(0)
600 ; VERDE-NEXT: ; return to shader part epilog
602 ; GFX6789-LABEL: sample_2d:
603 ; GFX6789: ; %bb.0: ; %main_body
604 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
605 ; GFX6789-NEXT: s_wqm_b64 exec, exec
606 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
607 ; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
608 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
609 ; GFX6789-NEXT: ; return to shader part epilog
611 ; GFX10PLUS-LABEL: sample_2d:
612 ; GFX10PLUS: ; %bb.0: ; %main_body
613 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
614 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
615 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
616 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
617 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
618 ; GFX10PLUS-NEXT: ; return to shader part epilog
620 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
624 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
625 ; VERDE-LABEL: sample_3d:
626 ; VERDE: ; %bb.0: ; %main_body
627 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
628 ; VERDE-NEXT: s_wqm_b64 exec, exec
629 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
630 ; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
631 ; VERDE-NEXT: s_waitcnt vmcnt(0)
632 ; VERDE-NEXT: ; return to shader part epilog
634 ; GFX6789-LABEL: sample_3d:
635 ; GFX6789: ; %bb.0: ; %main_body
636 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
637 ; GFX6789-NEXT: s_wqm_b64 exec, exec
638 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
639 ; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
640 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
641 ; GFX6789-NEXT: ; return to shader part epilog
643 ; GFX10PLUS-LABEL: sample_3d:
644 ; GFX10PLUS: ; %bb.0: ; %main_body
645 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
646 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
647 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
648 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
649 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
650 ; GFX10PLUS-NEXT: ; return to shader part epilog
652 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
656 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
657 ; VERDE-LABEL: sample_cube:
658 ; VERDE: ; %bb.0: ; %main_body
659 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
660 ; VERDE-NEXT: s_wqm_b64 exec, exec
661 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
662 ; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
663 ; VERDE-NEXT: s_waitcnt vmcnt(0)
664 ; VERDE-NEXT: ; return to shader part epilog
666 ; GFX6789-LABEL: sample_cube:
667 ; GFX6789: ; %bb.0: ; %main_body
668 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
669 ; GFX6789-NEXT: s_wqm_b64 exec, exec
670 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
671 ; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
672 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
673 ; GFX6789-NEXT: ; return to shader part epilog
675 ; GFX10PLUS-LABEL: sample_cube:
676 ; GFX10PLUS: ; %bb.0: ; %main_body
677 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
678 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
679 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
680 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
681 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
682 ; GFX10PLUS-NEXT: ; return to shader part epilog
684 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
688 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
689 ; VERDE-LABEL: sample_1darray:
690 ; VERDE: ; %bb.0: ; %main_body
691 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
692 ; VERDE-NEXT: s_wqm_b64 exec, exec
693 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
694 ; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
695 ; VERDE-NEXT: s_waitcnt vmcnt(0)
696 ; VERDE-NEXT: ; return to shader part epilog
698 ; GFX6789-LABEL: sample_1darray:
699 ; GFX6789: ; %bb.0: ; %main_body
700 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
701 ; GFX6789-NEXT: s_wqm_b64 exec, exec
702 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
703 ; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
704 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
705 ; GFX6789-NEXT: ; return to shader part epilog
707 ; GFX10PLUS-LABEL: sample_1darray:
708 ; GFX10PLUS: ; %bb.0: ; %main_body
709 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
710 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
711 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
712 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
713 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
714 ; GFX10PLUS-NEXT: ; return to shader part epilog
716 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
720 define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
721 ; VERDE-LABEL: sample_2darray:
722 ; VERDE: ; %bb.0: ; %main_body
723 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
724 ; VERDE-NEXT: s_wqm_b64 exec, exec
725 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
726 ; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
727 ; VERDE-NEXT: s_waitcnt vmcnt(0)
728 ; VERDE-NEXT: ; return to shader part epilog
730 ; GFX6789-LABEL: sample_2darray:
731 ; GFX6789: ; %bb.0: ; %main_body
732 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
733 ; GFX6789-NEXT: s_wqm_b64 exec, exec
734 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
735 ; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
736 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
737 ; GFX6789-NEXT: ; return to shader part epilog
739 ; GFX10PLUS-LABEL: sample_2darray:
740 ; GFX10PLUS: ; %bb.0: ; %main_body
741 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
742 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
743 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
744 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
745 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
746 ; GFX10PLUS-NEXT: ; return to shader part epilog
748 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
752 define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
753 ; VERDE-LABEL: sample_c_1d:
754 ; VERDE: ; %bb.0: ; %main_body
755 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
756 ; VERDE-NEXT: s_wqm_b64 exec, exec
757 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
758 ; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
759 ; VERDE-NEXT: s_waitcnt vmcnt(0)
760 ; VERDE-NEXT: ; return to shader part epilog
762 ; GFX6789-LABEL: sample_c_1d:
763 ; GFX6789: ; %bb.0: ; %main_body
764 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
765 ; GFX6789-NEXT: s_wqm_b64 exec, exec
766 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
767 ; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
768 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
769 ; GFX6789-NEXT: ; return to shader part epilog
771 ; GFX10PLUS-LABEL: sample_c_1d:
772 ; GFX10PLUS: ; %bb.0: ; %main_body
773 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
774 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
775 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
776 ; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
777 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
778 ; GFX10PLUS-NEXT: ; return to shader part epilog
780 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
784 define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
785 ; VERDE-LABEL: sample_c_2d:
786 ; VERDE: ; %bb.0: ; %main_body
787 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
788 ; VERDE-NEXT: s_wqm_b64 exec, exec
789 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
790 ; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
791 ; VERDE-NEXT: s_waitcnt vmcnt(0)
792 ; VERDE-NEXT: ; return to shader part epilog
794 ; GFX6789-LABEL: sample_c_2d:
795 ; GFX6789: ; %bb.0: ; %main_body
796 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
797 ; GFX6789-NEXT: s_wqm_b64 exec, exec
798 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
799 ; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
800 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
801 ; GFX6789-NEXT: ; return to shader part epilog
803 ; GFX10PLUS-LABEL: sample_c_2d:
804 ; GFX10PLUS: ; %bb.0: ; %main_body
805 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
806 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
807 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
808 ; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
809 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
810 ; GFX10PLUS-NEXT: ; return to shader part epilog
812 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
816 define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
817 ; VERDE-LABEL: sample_cl_1d:
818 ; VERDE: ; %bb.0: ; %main_body
819 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
820 ; VERDE-NEXT: s_wqm_b64 exec, exec
821 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
822 ; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
823 ; VERDE-NEXT: s_waitcnt vmcnt(0)
824 ; VERDE-NEXT: ; return to shader part epilog
826 ; GFX6789-LABEL: sample_cl_1d:
827 ; GFX6789: ; %bb.0: ; %main_body
828 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
829 ; GFX6789-NEXT: s_wqm_b64 exec, exec
830 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
831 ; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
832 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
833 ; GFX6789-NEXT: ; return to shader part epilog
835 ; GFX10PLUS-LABEL: sample_cl_1d:
836 ; GFX10PLUS: ; %bb.0: ; %main_body
837 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
838 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
839 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
840 ; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
841 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
842 ; GFX10PLUS-NEXT: ; return to shader part epilog
844 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
848 define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
849 ; VERDE-LABEL: sample_cl_2d:
850 ; VERDE: ; %bb.0: ; %main_body
851 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
852 ; VERDE-NEXT: s_wqm_b64 exec, exec
853 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
854 ; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
855 ; VERDE-NEXT: s_waitcnt vmcnt(0)
856 ; VERDE-NEXT: ; return to shader part epilog
858 ; GFX6789-LABEL: sample_cl_2d:
859 ; GFX6789: ; %bb.0: ; %main_body
860 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
861 ; GFX6789-NEXT: s_wqm_b64 exec, exec
862 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
863 ; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
864 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
865 ; GFX6789-NEXT: ; return to shader part epilog
867 ; GFX10PLUS-LABEL: sample_cl_2d:
868 ; GFX10PLUS: ; %bb.0: ; %main_body
869 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
870 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
871 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
872 ; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
873 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
874 ; GFX10PLUS-NEXT: ; return to shader part epilog
876 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
880 define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
881 ; VERDE-LABEL: sample_c_cl_1d:
882 ; VERDE: ; %bb.0: ; %main_body
883 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
884 ; VERDE-NEXT: s_wqm_b64 exec, exec
885 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
886 ; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
887 ; VERDE-NEXT: s_waitcnt vmcnt(0)
888 ; VERDE-NEXT: ; return to shader part epilog
890 ; GFX6789-LABEL: sample_c_cl_1d:
891 ; GFX6789: ; %bb.0: ; %main_body
892 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
893 ; GFX6789-NEXT: s_wqm_b64 exec, exec
894 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
895 ; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
896 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
897 ; GFX6789-NEXT: ; return to shader part epilog
899 ; GFX10PLUS-LABEL: sample_c_cl_1d:
900 ; GFX10PLUS: ; %bb.0: ; %main_body
901 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
902 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
903 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
904 ; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
905 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
906 ; GFX10PLUS-NEXT: ; return to shader part epilog
908 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
912 define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
913 ; VERDE-LABEL: sample_c_cl_2d:
914 ; VERDE: ; %bb.0: ; %main_body
915 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
916 ; VERDE-NEXT: s_wqm_b64 exec, exec
917 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
918 ; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
919 ; VERDE-NEXT: s_waitcnt vmcnt(0)
920 ; VERDE-NEXT: ; return to shader part epilog
922 ; GFX6789-LABEL: sample_c_cl_2d:
923 ; GFX6789: ; %bb.0: ; %main_body
924 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
925 ; GFX6789-NEXT: s_wqm_b64 exec, exec
926 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
927 ; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
928 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
929 ; GFX6789-NEXT: ; return to shader part epilog
931 ; GFX10PLUS-LABEL: sample_c_cl_2d:
932 ; GFX10PLUS: ; %bb.0: ; %main_body
933 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
934 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
935 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
936 ; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
937 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
938 ; GFX10PLUS-NEXT: ; return to shader part epilog
940 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
944 define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
945 ; VERDE-LABEL: sample_b_1d:
946 ; VERDE: ; %bb.0: ; %main_body
947 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
948 ; VERDE-NEXT: s_wqm_b64 exec, exec
949 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
950 ; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
951 ; VERDE-NEXT: s_waitcnt vmcnt(0)
952 ; VERDE-NEXT: ; return to shader part epilog
954 ; GFX6789-LABEL: sample_b_1d:
955 ; GFX6789: ; %bb.0: ; %main_body
956 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
957 ; GFX6789-NEXT: s_wqm_b64 exec, exec
958 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
959 ; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
960 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
961 ; GFX6789-NEXT: ; return to shader part epilog
963 ; GFX10PLUS-LABEL: sample_b_1d:
964 ; GFX10PLUS: ; %bb.0: ; %main_body
965 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
966 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
967 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
968 ; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
969 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
970 ; GFX10PLUS-NEXT: ; return to shader part epilog
972 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
976 define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
977 ; VERDE-LABEL: sample_b_2d:
978 ; VERDE: ; %bb.0: ; %main_body
979 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
980 ; VERDE-NEXT: s_wqm_b64 exec, exec
981 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
982 ; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
983 ; VERDE-NEXT: s_waitcnt vmcnt(0)
984 ; VERDE-NEXT: ; return to shader part epilog
986 ; GFX6789-LABEL: sample_b_2d:
987 ; GFX6789: ; %bb.0: ; %main_body
988 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
989 ; GFX6789-NEXT: s_wqm_b64 exec, exec
990 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
991 ; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
992 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
993 ; GFX6789-NEXT: ; return to shader part epilog
995 ; GFX10PLUS-LABEL: sample_b_2d:
996 ; GFX10PLUS: ; %bb.0: ; %main_body
997 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
998 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
999 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1000 ; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1001 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1002 ; GFX10PLUS-NEXT: ; return to shader part epilog
1004 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1008 define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
1009 ; VERDE-LABEL: sample_c_b_1d:
1010 ; VERDE: ; %bb.0: ; %main_body
1011 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1012 ; VERDE-NEXT: s_wqm_b64 exec, exec
1013 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1014 ; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1015 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1016 ; VERDE-NEXT: ; return to shader part epilog
1018 ; GFX6789-LABEL: sample_c_b_1d:
1019 ; GFX6789: ; %bb.0: ; %main_body
1020 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1021 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1022 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1023 ; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1024 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1025 ; GFX6789-NEXT: ; return to shader part epilog
1027 ; GFX10PLUS-LABEL: sample_c_b_1d:
1028 ; GFX10PLUS: ; %bb.0: ; %main_body
1029 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1030 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1031 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1032 ; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1033 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1034 ; GFX10PLUS-NEXT: ; return to shader part epilog
1036 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1040 define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
1041 ; VERDE-LABEL: sample_c_b_2d:
1042 ; VERDE: ; %bb.0: ; %main_body
1043 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1044 ; VERDE-NEXT: s_wqm_b64 exec, exec
1045 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1046 ; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1047 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1048 ; VERDE-NEXT: ; return to shader part epilog
1050 ; GFX6789-LABEL: sample_c_b_2d:
1051 ; GFX6789: ; %bb.0: ; %main_body
1052 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1053 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1054 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1055 ; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1056 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1057 ; GFX6789-NEXT: ; return to shader part epilog
1059 ; GFX10PLUS-LABEL: sample_c_b_2d:
1060 ; GFX10PLUS: ; %bb.0: ; %main_body
1061 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1062 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1063 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1064 ; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1065 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1066 ; GFX10PLUS-NEXT: ; return to shader part epilog
1068 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1072 define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
1073 ; VERDE-LABEL: sample_b_cl_1d:
1074 ; VERDE: ; %bb.0: ; %main_body
1075 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1076 ; VERDE-NEXT: s_wqm_b64 exec, exec
1077 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1078 ; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1079 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1080 ; VERDE-NEXT: ; return to shader part epilog
1082 ; GFX6789-LABEL: sample_b_cl_1d:
1083 ; GFX6789: ; %bb.0: ; %main_body
1084 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1085 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1086 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1087 ; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1088 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1089 ; GFX6789-NEXT: ; return to shader part epilog
1091 ; GFX10PLUS-LABEL: sample_b_cl_1d:
1092 ; GFX10PLUS: ; %bb.0: ; %main_body
1093 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1094 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1095 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1096 ; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1097 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1098 ; GFX10PLUS-NEXT: ; return to shader part epilog
1100 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1104 define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
1105 ; VERDE-LABEL: sample_b_cl_2d:
1106 ; VERDE: ; %bb.0: ; %main_body
1107 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1108 ; VERDE-NEXT: s_wqm_b64 exec, exec
1109 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1110 ; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1111 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1112 ; VERDE-NEXT: ; return to shader part epilog
1114 ; GFX6789-LABEL: sample_b_cl_2d:
1115 ; GFX6789: ; %bb.0: ; %main_body
1116 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1117 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1118 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1119 ; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1120 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1121 ; GFX6789-NEXT: ; return to shader part epilog
1123 ; GFX10PLUS-LABEL: sample_b_cl_2d:
1124 ; GFX10PLUS: ; %bb.0: ; %main_body
1125 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1126 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1127 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1128 ; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1129 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1130 ; GFX10PLUS-NEXT: ; return to shader part epilog
1132 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1136 define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
1137 ; VERDE-LABEL: sample_c_b_cl_1d:
1138 ; VERDE: ; %bb.0: ; %main_body
1139 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1140 ; VERDE-NEXT: s_wqm_b64 exec, exec
1141 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1142 ; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1143 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1144 ; VERDE-NEXT: ; return to shader part epilog
1146 ; GFX6789-LABEL: sample_c_b_cl_1d:
1147 ; GFX6789: ; %bb.0: ; %main_body
1148 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1149 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1150 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1151 ; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1152 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1153 ; GFX6789-NEXT: ; return to shader part epilog
1155 ; GFX10PLUS-LABEL: sample_c_b_cl_1d:
1156 ; GFX10PLUS: ; %bb.0: ; %main_body
1157 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1158 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1159 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1160 ; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1161 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1162 ; GFX10PLUS-NEXT: ; return to shader part epilog
1164 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1168 define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
1169 ; VERDE-LABEL: sample_c_b_cl_2d:
1170 ; VERDE: ; %bb.0: ; %main_body
1171 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1172 ; VERDE-NEXT: s_wqm_b64 exec, exec
1173 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1174 ; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1175 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1176 ; VERDE-NEXT: ; return to shader part epilog
1178 ; GFX6789-LABEL: sample_c_b_cl_2d:
1179 ; GFX6789: ; %bb.0: ; %main_body
1180 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1181 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1182 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1183 ; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1184 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1185 ; GFX6789-NEXT: ; return to shader part epilog
1187 ; GFX10PLUS-LABEL: sample_c_b_cl_2d:
1188 ; GFX10PLUS: ; %bb.0: ; %main_body
1189 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1190 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1191 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1192 ; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1193 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1194 ; GFX10PLUS-NEXT: ; return to shader part epilog
1196 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1200 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
1201 ; VERDE-LABEL: sample_d_1d:
1202 ; VERDE: ; %bb.0: ; %main_body
1203 ; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1204 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1205 ; VERDE-NEXT: ; return to shader part epilog
1207 ; GFX6789-LABEL: sample_d_1d:
1208 ; GFX6789: ; %bb.0: ; %main_body
1209 ; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1210 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1211 ; GFX6789-NEXT: ; return to shader part epilog
1213 ; GFX10PLUS-LABEL: sample_d_1d:
1214 ; GFX10PLUS: ; %bb.0: ; %main_body
1215 ; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1216 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1217 ; GFX10PLUS-NEXT: ; return to shader part epilog
1219 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1223 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1224 ; VERDE-LABEL: sample_d_2d:
1225 ; VERDE: ; %bb.0: ; %main_body
1226 ; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1227 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1228 ; VERDE-NEXT: ; return to shader part epilog
1230 ; GFX6789-LABEL: sample_d_2d:
1231 ; GFX6789: ; %bb.0: ; %main_body
1232 ; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1233 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1234 ; GFX6789-NEXT: ; return to shader part epilog
1236 ; GFX10PLUS-LABEL: sample_d_2d:
1237 ; GFX10PLUS: ; %bb.0: ; %main_body
1238 ; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1239 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1240 ; GFX10PLUS-NEXT: ; return to shader part epilog
1242 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1246 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
1247 ; VERDE-LABEL: sample_c_d_1d:
1248 ; VERDE: ; %bb.0: ; %main_body
1249 ; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1250 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1251 ; VERDE-NEXT: ; return to shader part epilog
1253 ; GFX6789-LABEL: sample_c_d_1d:
1254 ; GFX6789: ; %bb.0: ; %main_body
1255 ; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1256 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1257 ; GFX6789-NEXT: ; return to shader part epilog
1259 ; GFX10PLUS-LABEL: sample_c_d_1d:
1260 ; GFX10PLUS: ; %bb.0: ; %main_body
1261 ; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1262 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1263 ; GFX10PLUS-NEXT: ; return to shader part epilog
1265 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1269 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1270 ; VERDE-LABEL: sample_c_d_2d:
1271 ; VERDE: ; %bb.0: ; %main_body
1272 ; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1273 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1274 ; VERDE-NEXT: ; return to shader part epilog
1276 ; GFX6789-LABEL: sample_c_d_2d:
1277 ; GFX6789: ; %bb.0: ; %main_body
1278 ; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1279 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1280 ; GFX6789-NEXT: ; return to shader part epilog
1282 ; GFX10PLUS-LABEL: sample_c_d_2d:
1283 ; GFX10PLUS: ; %bb.0: ; %main_body
1284 ; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1285 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1286 ; GFX10PLUS-NEXT: ; return to shader part epilog
1288 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1292 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
1293 ; VERDE-LABEL: sample_d_cl_1d:
1294 ; VERDE: ; %bb.0: ; %main_body
1295 ; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1296 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1297 ; VERDE-NEXT: ; return to shader part epilog
1299 ; GFX6789-LABEL: sample_d_cl_1d:
1300 ; GFX6789: ; %bb.0: ; %main_body
1301 ; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1302 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1303 ; GFX6789-NEXT: ; return to shader part epilog
1305 ; GFX10PLUS-LABEL: sample_d_cl_1d:
1306 ; GFX10PLUS: ; %bb.0: ; %main_body
1307 ; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1308 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1309 ; GFX10PLUS-NEXT: ; return to shader part epilog
1311 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1315 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1316 ; VERDE-LABEL: sample_d_cl_2d:
1317 ; VERDE: ; %bb.0: ; %main_body
1318 ; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1319 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1320 ; VERDE-NEXT: ; return to shader part epilog
1322 ; GFX6789-LABEL: sample_d_cl_2d:
1323 ; GFX6789: ; %bb.0: ; %main_body
1324 ; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1325 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1326 ; GFX6789-NEXT: ; return to shader part epilog
1328 ; GFX10PLUS-LABEL: sample_d_cl_2d:
1329 ; GFX10PLUS: ; %bb.0: ; %main_body
1330 ; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1331 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1332 ; GFX10PLUS-NEXT: ; return to shader part epilog
1334 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1338 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
1339 ; VERDE-LABEL: sample_c_d_cl_1d:
1340 ; VERDE: ; %bb.0: ; %main_body
1341 ; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1342 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1343 ; VERDE-NEXT: ; return to shader part epilog
1345 ; GFX6789-LABEL: sample_c_d_cl_1d:
1346 ; GFX6789: ; %bb.0: ; %main_body
1347 ; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1348 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1349 ; GFX6789-NEXT: ; return to shader part epilog
1351 ; GFX10PLUS-LABEL: sample_c_d_cl_1d:
1352 ; GFX10PLUS: ; %bb.0: ; %main_body
1353 ; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1354 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1355 ; GFX10PLUS-NEXT: ; return to shader part epilog
1357 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1361 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1362 ; VERDE-LABEL: sample_c_d_cl_2d:
1363 ; VERDE: ; %bb.0: ; %main_body
1364 ; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1365 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1366 ; VERDE-NEXT: ; return to shader part epilog
1368 ; GFX6789-LABEL: sample_c_d_cl_2d:
1369 ; GFX6789: ; %bb.0: ; %main_body
1370 ; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1371 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1372 ; GFX6789-NEXT: ; return to shader part epilog
1374 ; GFX10PLUS-LABEL: sample_c_d_cl_2d:
1375 ; GFX10PLUS: ; %bb.0: ; %main_body
1376 ; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1377 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1378 ; GFX10PLUS-NEXT: ; return to shader part epilog
1380 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1384 define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
1385 ; VERDE-LABEL: sample_l_1d:
1386 ; VERDE: ; %bb.0: ; %main_body
1387 ; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1388 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1389 ; VERDE-NEXT: ; return to shader part epilog
1391 ; GFX6789-LABEL: sample_l_1d:
1392 ; GFX6789: ; %bb.0: ; %main_body
1393 ; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1394 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1395 ; GFX6789-NEXT: ; return to shader part epilog
1397 ; GFX10PLUS-LABEL: sample_l_1d:
1398 ; GFX10PLUS: ; %bb.0: ; %main_body
1399 ; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1400 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1401 ; GFX10PLUS-NEXT: ; return to shader part epilog
1403 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1407 define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
1408 ; VERDE-LABEL: sample_l_2d:
1409 ; VERDE: ; %bb.0: ; %main_body
1410 ; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1411 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1412 ; VERDE-NEXT: ; return to shader part epilog
1414 ; GFX6789-LABEL: sample_l_2d:
1415 ; GFX6789: ; %bb.0: ; %main_body
1416 ; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1417 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1418 ; GFX6789-NEXT: ; return to shader part epilog
1420 ; GFX10PLUS-LABEL: sample_l_2d:
1421 ; GFX10PLUS: ; %bb.0: ; %main_body
1422 ; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1423 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1424 ; GFX10PLUS-NEXT: ; return to shader part epilog
1426 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1430 define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
1431 ; VERDE-LABEL: sample_c_l_1d:
1432 ; VERDE: ; %bb.0: ; %main_body
1433 ; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1434 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1435 ; VERDE-NEXT: ; return to shader part epilog
1437 ; GFX6789-LABEL: sample_c_l_1d:
1438 ; GFX6789: ; %bb.0: ; %main_body
1439 ; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1440 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1441 ; GFX6789-NEXT: ; return to shader part epilog
1443 ; GFX10PLUS-LABEL: sample_c_l_1d:
1444 ; GFX10PLUS: ; %bb.0: ; %main_body
1445 ; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1446 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1447 ; GFX10PLUS-NEXT: ; return to shader part epilog
1449 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1453 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
1454 ; VERDE-LABEL: sample_c_l_2d:
1455 ; VERDE: ; %bb.0: ; %main_body
1456 ; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1457 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1458 ; VERDE-NEXT: ; return to shader part epilog
1460 ; GFX6789-LABEL: sample_c_l_2d:
1461 ; GFX6789: ; %bb.0: ; %main_body
1462 ; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1463 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1464 ; GFX6789-NEXT: ; return to shader part epilog
1466 ; GFX10PLUS-LABEL: sample_c_l_2d:
1467 ; GFX10PLUS: ; %bb.0: ; %main_body
1468 ; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1469 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1470 ; GFX10PLUS-NEXT: ; return to shader part epilog
1472 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1476 define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1477 ; VERDE-LABEL: sample_lz_1d:
1478 ; VERDE: ; %bb.0: ; %main_body
1479 ; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1480 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1481 ; VERDE-NEXT: ; return to shader part epilog
1483 ; GFX6789-LABEL: sample_lz_1d:
1484 ; GFX6789: ; %bb.0: ; %main_body
1485 ; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1486 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1487 ; GFX6789-NEXT: ; return to shader part epilog
1489 ; GFX10PLUS-LABEL: sample_lz_1d:
1490 ; GFX10PLUS: ; %bb.0: ; %main_body
1491 ; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1492 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1493 ; GFX10PLUS-NEXT: ; return to shader part epilog
1495 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1499 define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1500 ; VERDE-LABEL: sample_lz_2d:
1501 ; VERDE: ; %bb.0: ; %main_body
1502 ; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1503 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1504 ; VERDE-NEXT: ; return to shader part epilog
1506 ; GFX6789-LABEL: sample_lz_2d:
1507 ; GFX6789: ; %bb.0: ; %main_body
1508 ; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1509 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1510 ; GFX6789-NEXT: ; return to shader part epilog
1512 ; GFX10PLUS-LABEL: sample_lz_2d:
1513 ; GFX10PLUS: ; %bb.0: ; %main_body
1514 ; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1515 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1516 ; GFX10PLUS-NEXT: ; return to shader part epilog
1518 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1522 define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
1523 ; VERDE-LABEL: sample_c_lz_1d:
1524 ; VERDE: ; %bb.0: ; %main_body
1525 ; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1526 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1527 ; VERDE-NEXT: ; return to shader part epilog
1529 ; GFX6789-LABEL: sample_c_lz_1d:
1530 ; GFX6789: ; %bb.0: ; %main_body
1531 ; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1532 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1533 ; GFX6789-NEXT: ; return to shader part epilog
1535 ; GFX10PLUS-LABEL: sample_c_lz_1d:
1536 ; GFX10PLUS: ; %bb.0: ; %main_body
1537 ; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1538 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1539 ; GFX10PLUS-NEXT: ; return to shader part epilog
1541 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1545 define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
1546 ; VERDE-LABEL: sample_c_lz_2d:
1547 ; VERDE: ; %bb.0: ; %main_body
1548 ; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1549 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1550 ; VERDE-NEXT: ; return to shader part epilog
1552 ; GFX6789-LABEL: sample_c_lz_2d:
1553 ; GFX6789: ; %bb.0: ; %main_body
1554 ; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1555 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1556 ; GFX6789-NEXT: ; return to shader part epilog
1558 ; GFX10PLUS-LABEL: sample_c_lz_2d:
1559 ; GFX10PLUS: ; %bb.0: ; %main_body
1560 ; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1561 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1562 ; GFX10PLUS-NEXT: ; return to shader part epilog
1564 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1568 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1569 ; VERDE-LABEL: sample_c_d_o_2darray_V1:
1570 ; VERDE: ; %bb.0: ; %main_body
1571 ; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
1572 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1573 ; VERDE-NEXT: ; return to shader part epilog
1575 ; GFX6789-LABEL: sample_c_d_o_2darray_V1:
1576 ; GFX6789: ; %bb.0: ; %main_body
1577 ; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
1578 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1579 ; GFX6789-NEXT: ; return to shader part epilog
1581 ; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
1582 ; GFX10PLUS: ; %bb.0: ; %main_body
1583 ; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
1584 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1585 ; GFX10PLUS-NEXT: ; return to shader part epilog
1587 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1591 define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
1592 ; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
1593 ; VERDE: ; %bb.0: ; %main_body
1594 ; VERDE-NEXT: v_mov_b32_e32 v9, 0
1595 ; VERDE-NEXT: v_mov_b32_e32 v10, v9
1596 ; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
1597 ; VERDE-NEXT: s_mov_b32 s15, 0xf000
1598 ; VERDE-NEXT: s_mov_b32 s14, -1
1599 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1600 ; VERDE-NEXT: v_mov_b32_e32 v0, v9
1601 ; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
1602 ; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1603 ; VERDE-NEXT: ; return to shader part epilog
1605 ; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
1606 ; GFX6789: ; %bb.0: ; %main_body
1607 ; GFX6789-NEXT: v_mov_b32_e32 v11, 0
1608 ; GFX6789-NEXT: v_mov_b32_e32 v12, v11
1609 ; GFX6789-NEXT: v_mov_b32_e32 v9, v11
1610 ; GFX6789-NEXT: v_mov_b32_e32 v10, v12
1611 ; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
1612 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1613 ; GFX6789-NEXT: v_mov_b32_e32 v0, v9
1614 ; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
1615 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1616 ; GFX6789-NEXT: ; return to shader part epilog
1618 ; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
1619 ; GFX10: ; %bb.0: ; %main_body
1620 ; GFX10-NEXT: v_mov_b32_e32 v11, 0
1621 ; GFX10-NEXT: v_mov_b32_e32 v12, v11
1622 ; GFX10-NEXT: v_mov_b32_e32 v9, v11
1623 ; GFX10-NEXT: v_mov_b32_e32 v10, v12
1624 ; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1625 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1626 ; GFX10-NEXT: v_mov_b32_e32 v0, v9
1627 ; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
1628 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1629 ; GFX10-NEXT: ; return to shader part epilog
1631 ; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
1632 ; GFX11: ; %bb.0: ; %main_body
1633 ; GFX11-NEXT: v_mov_b32_e32 v11, 0
1634 ; GFX11-NEXT: v_mov_b32_e32 v12, v11
1635 ; GFX11-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v10, v12
1636 ; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1637 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1638 ; GFX11-NEXT: v_mov_b32_e32 v0, v9
1639 ; GFX11-NEXT: global_store_b32 v11, v10, s[12:13]
1640 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1641 ; GFX11-NEXT: ; return to shader part epilog
1643 %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
1644 %v.vec = extractvalue {float, i32} %v, 0
1645 %v.err = extractvalue {float, i32} %v, 1
1646 store i32 %v.err, i32 addrspace(1)* %out, align 4
1650 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1651 ; VERDE-LABEL: sample_c_d_o_2darray_V2:
1652 ; VERDE: ; %bb.0: ; %main_body
1653 ; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
1654 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1655 ; VERDE-NEXT: ; return to shader part epilog
1657 ; GFX6789-LABEL: sample_c_d_o_2darray_V2:
1658 ; GFX6789: ; %bb.0: ; %main_body
1659 ; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
1660 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1661 ; GFX6789-NEXT: ; return to shader part epilog
1663 ; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
1664 ; GFX10PLUS: ; %bb.0: ; %main_body
1665 ; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
1666 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1667 ; GFX10PLUS-NEXT: ; return to shader part epilog
1669 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1673 define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1674 ; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
1675 ; VERDE: ; %bb.0: ; %main_body
1676 ; VERDE-NEXT: v_mov_b32_e32 v9, 0
1677 ; VERDE-NEXT: v_mov_b32_e32 v10, v9
1678 ; VERDE-NEXT: v_mov_b32_e32 v11, v9
1679 ; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
1680 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1681 ; VERDE-NEXT: v_mov_b32_e32 v0, v9
1682 ; VERDE-NEXT: v_mov_b32_e32 v1, v10
1683 ; VERDE-NEXT: v_mov_b32_e32 v2, v11
1684 ; VERDE-NEXT: ; return to shader part epilog
1686 ; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
1687 ; GFX6789: ; %bb.0: ; %main_body
1688 ; GFX6789-NEXT: v_mov_b32_e32 v9, 0
1689 ; GFX6789-NEXT: v_mov_b32_e32 v10, v9
1690 ; GFX6789-NEXT: v_mov_b32_e32 v11, v9
1691 ; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
1692 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1693 ; GFX6789-NEXT: v_mov_b32_e32 v0, v9
1694 ; GFX6789-NEXT: v_mov_b32_e32 v1, v10
1695 ; GFX6789-NEXT: v_mov_b32_e32 v2, v11
1696 ; GFX6789-NEXT: ; return to shader part epilog
1698 ; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
1699 ; GFX10: ; %bb.0: ; %main_body
1700 ; GFX10-NEXT: v_mov_b32_e32 v9, 0
1701 ; GFX10-NEXT: v_mov_b32_e32 v10, v9
1702 ; GFX10-NEXT: v_mov_b32_e32 v11, v9
1703 ; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1704 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1705 ; GFX10-NEXT: v_mov_b32_e32 v0, v9
1706 ; GFX10-NEXT: v_mov_b32_e32 v1, v10
1707 ; GFX10-NEXT: v_mov_b32_e32 v2, v11
1708 ; GFX10-NEXT: ; return to shader part epilog
1710 ; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:
1711 ; GFX11: ; %bb.0: ; %main_body
1712 ; GFX11-NEXT: v_mov_b32_e32 v9, 0
1713 ; GFX11-NEXT: v_mov_b32_e32 v10, v9
1714 ; GFX11-NEXT: v_mov_b32_e32 v11, v9
1715 ; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1716 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1717 ; GFX11-NEXT: v_mov_b32_e32 v2, v11
1718 ; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10
1719 ; GFX11-NEXT: ; return to shader part epilog
1721 %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
1722 %v.vec = extractvalue {<2 x float>, i32} %v, 0
1723 %v.f1 = extractelement <2 x float> %v.vec, i32 0
1724 %v.f2 = extractelement <2 x float> %v.vec, i32 1
1725 %v.err = extractvalue {<2 x float>, i32} %v, 1
1726 %v.errf = bitcast i32 %v.err to float
1727 %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
1728 %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
1729 %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
1730 ret <4 x float> %res.2
1733 define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1734 ; VERDE-LABEL: sample_1d_unorm:
1735 ; VERDE: ; %bb.0: ; %main_body
1736 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1737 ; VERDE-NEXT: s_wqm_b64 exec, exec
1738 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1739 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
1740 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1741 ; VERDE-NEXT: ; return to shader part epilog
1743 ; GFX6789-LABEL: sample_1d_unorm:
1744 ; GFX6789: ; %bb.0: ; %main_body
1745 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1746 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1747 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1748 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
1749 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1750 ; GFX6789-NEXT: ; return to shader part epilog
1752 ; GFX10PLUS-LABEL: sample_1d_unorm:
1753 ; GFX10PLUS: ; %bb.0: ; %main_body
1754 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1755 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1756 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1757 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
1758 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1759 ; GFX10PLUS-NEXT: ; return to shader part epilog
1761 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
1765 define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1766 ; VERDE-LABEL: sample_1d_glc:
1767 ; VERDE: ; %bb.0: ; %main_body
1768 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1769 ; VERDE-NEXT: s_wqm_b64 exec, exec
1770 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1771 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
1772 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1773 ; VERDE-NEXT: ; return to shader part epilog
1775 ; GFX6789-LABEL: sample_1d_glc:
1776 ; GFX6789: ; %bb.0: ; %main_body
1777 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1778 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1779 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1780 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
1781 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1782 ; GFX6789-NEXT: ; return to shader part epilog
1784 ; GFX10PLUS-LABEL: sample_1d_glc:
1785 ; GFX10PLUS: ; %bb.0: ; %main_body
1786 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1787 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1788 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1789 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc
1790 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1791 ; GFX10PLUS-NEXT: ; return to shader part epilog
1793 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
1797 define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1798 ; VERDE-LABEL: sample_1d_slc:
1799 ; VERDE: ; %bb.0: ; %main_body
1800 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1801 ; VERDE-NEXT: s_wqm_b64 exec, exec
1802 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1803 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
1804 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1805 ; VERDE-NEXT: ; return to shader part epilog
1807 ; GFX6789-LABEL: sample_1d_slc:
1808 ; GFX6789: ; %bb.0: ; %main_body
1809 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1810 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1811 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1812 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
1813 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1814 ; GFX6789-NEXT: ; return to shader part epilog
1816 ; GFX10PLUS-LABEL: sample_1d_slc:
1817 ; GFX10PLUS: ; %bb.0: ; %main_body
1818 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1819 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1820 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1821 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc
1822 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1823 ; GFX10PLUS-NEXT: ; return to shader part epilog
1825 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
1829 define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1830 ; VERDE-LABEL: sample_1d_glc_slc:
1831 ; VERDE: ; %bb.0: ; %main_body
1832 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1833 ; VERDE-NEXT: s_wqm_b64 exec, exec
1834 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1835 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
1836 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1837 ; VERDE-NEXT: ; return to shader part epilog
1839 ; GFX6789-LABEL: sample_1d_glc_slc:
1840 ; GFX6789: ; %bb.0: ; %main_body
1841 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1842 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1843 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1844 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
1845 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1846 ; GFX6789-NEXT: ; return to shader part epilog
1848 ; GFX10PLUS-LABEL: sample_1d_glc_slc:
1849 ; GFX10PLUS: ; %bb.0: ; %main_body
1850 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1851 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1852 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1853 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc
1854 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1855 ; GFX10PLUS-NEXT: ; return to shader part epilog
1857 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
1861 define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1862 ; VERDE-LABEL: adjust_writemask_sample_0:
1863 ; VERDE: ; %bb.0: ; %main_body
1864 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1865 ; VERDE-NEXT: s_wqm_b64 exec, exec
1866 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1867 ; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
1868 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1869 ; VERDE-NEXT: ; return to shader part epilog
1871 ; GFX6789-LABEL: adjust_writemask_sample_0:
1872 ; GFX6789: ; %bb.0: ; %main_body
1873 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1874 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1875 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1876 ; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
1877 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1878 ; GFX6789-NEXT: ; return to shader part epilog
1880 ; GFX10PLUS-LABEL: adjust_writemask_sample_0:
1881 ; GFX10PLUS: ; %bb.0: ; %main_body
1882 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1883 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1884 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1885 ; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
1886 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1887 ; GFX10PLUS-NEXT: ; return to shader part epilog
1889 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1890 %elt0 = extractelement <4 x float> %r, i32 0
1894 define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1895 ; VERDE-LABEL: adjust_writemask_sample_01:
1896 ; VERDE: ; %bb.0: ; %main_body
1897 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1898 ; VERDE-NEXT: s_wqm_b64 exec, exec
1899 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1900 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
1901 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1902 ; VERDE-NEXT: ; return to shader part epilog
1904 ; GFX6789-LABEL: adjust_writemask_sample_01:
1905 ; GFX6789: ; %bb.0: ; %main_body
1906 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1907 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1908 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1909 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
1910 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1911 ; GFX6789-NEXT: ; return to shader part epilog
1913 ; GFX10PLUS-LABEL: adjust_writemask_sample_01:
1914 ; GFX10PLUS: ; %bb.0: ; %main_body
1915 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1916 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1917 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1918 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
1919 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1920 ; GFX10PLUS-NEXT: ; return to shader part epilog
1922 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1923 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1924 ret <2 x float> %out
1927 define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1928 ; VERDE-LABEL: adjust_writemask_sample_012:
1929 ; VERDE: ; %bb.0: ; %main_body
1930 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1931 ; VERDE-NEXT: s_wqm_b64 exec, exec
1932 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1933 ; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
1934 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1935 ; VERDE-NEXT: ; return to shader part epilog
1937 ; GFX6789-LABEL: adjust_writemask_sample_012:
1938 ; GFX6789: ; %bb.0: ; %main_body
1939 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1940 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1941 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1942 ; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
1943 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1944 ; GFX6789-NEXT: ; return to shader part epilog
1946 ; GFX10PLUS-LABEL: adjust_writemask_sample_012:
1947 ; GFX10PLUS: ; %bb.0: ; %main_body
1948 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1949 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1950 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1951 ; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
1952 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1953 ; GFX10PLUS-NEXT: ; return to shader part epilog
1955 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1956 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1957 ret <3 x float> %out
1960 define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1961 ; VERDE-LABEL: adjust_writemask_sample_12:
1962 ; VERDE: ; %bb.0: ; %main_body
1963 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1964 ; VERDE-NEXT: s_wqm_b64 exec, exec
1965 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1966 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
1967 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1968 ; VERDE-NEXT: ; return to shader part epilog
1970 ; GFX6789-LABEL: adjust_writemask_sample_12:
1971 ; GFX6789: ; %bb.0: ; %main_body
1972 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1973 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1974 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1975 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
1976 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1977 ; GFX6789-NEXT: ; return to shader part epilog
1979 ; GFX10PLUS-LABEL: adjust_writemask_sample_12:
1980 ; GFX10PLUS: ; %bb.0: ; %main_body
1981 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1982 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1983 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1984 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
1985 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1986 ; GFX10PLUS-NEXT: ; return to shader part epilog
1988 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1989 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
1990 ret <2 x float> %out
1993 define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1994 ; VERDE-LABEL: adjust_writemask_sample_03:
1995 ; VERDE: ; %bb.0: ; %main_body
1996 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1997 ; VERDE-NEXT: s_wqm_b64 exec, exec
1998 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1999 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
2000 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2001 ; VERDE-NEXT: ; return to shader part epilog
2003 ; GFX6789-LABEL: adjust_writemask_sample_03:
2004 ; GFX6789: ; %bb.0: ; %main_body
2005 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2006 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2007 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2008 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
2009 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2010 ; GFX6789-NEXT: ; return to shader part epilog
2012 ; GFX10PLUS-LABEL: adjust_writemask_sample_03:
2013 ; GFX10PLUS: ; %bb.0: ; %main_body
2014 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2015 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2016 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2017 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
2018 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2019 ; GFX10PLUS-NEXT: ; return to shader part epilog
2021 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2022 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
2023 ret <2 x float> %out
2026 define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2027 ; VERDE-LABEL: adjust_writemask_sample_13:
2028 ; VERDE: ; %bb.0: ; %main_body
2029 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2030 ; VERDE-NEXT: s_wqm_b64 exec, exec
2031 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2032 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2033 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2034 ; VERDE-NEXT: ; return to shader part epilog
2036 ; GFX6789-LABEL: adjust_writemask_sample_13:
2037 ; GFX6789: ; %bb.0: ; %main_body
2038 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2039 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2040 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2041 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2042 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2043 ; GFX6789-NEXT: ; return to shader part epilog
2045 ; GFX10PLUS-LABEL: adjust_writemask_sample_13:
2046 ; GFX10PLUS: ; %bb.0: ; %main_body
2047 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2048 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2049 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2050 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2051 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2052 ; GFX10PLUS-NEXT: ; return to shader part epilog
2054 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2055 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
2056 ret <2 x float> %out
2059 define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2060 ; VERDE-LABEL: adjust_writemask_sample_123:
2061 ; VERDE: ; %bb.0: ; %main_body
2062 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2063 ; VERDE-NEXT: s_wqm_b64 exec, exec
2064 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2065 ; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2066 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2067 ; VERDE-NEXT: ; return to shader part epilog
2069 ; GFX6789-LABEL: adjust_writemask_sample_123:
2070 ; GFX6789: ; %bb.0: ; %main_body
2071 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2072 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2073 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2074 ; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2075 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2076 ; GFX6789-NEXT: ; return to shader part epilog
2078 ; GFX10PLUS-LABEL: adjust_writemask_sample_123:
2079 ; GFX10PLUS: ; %bb.0: ; %main_body
2080 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2081 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2082 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2083 ; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
2084 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2085 ; GFX10PLUS-NEXT: ; return to shader part epilog
2087 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2088 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2089 ret <3 x float> %out
2092 define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2093 ; VERDE-LABEL: adjust_writemask_sample_none_enabled:
2094 ; VERDE: ; %bb.0: ; %main_body
2095 ; VERDE-NEXT: ; return to shader part epilog
2097 ; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
2098 ; GFX6789: ; %bb.0: ; %main_body
2099 ; GFX6789-NEXT: ; return to shader part epilog
2101 ; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled:
2102 ; GFX10PLUS: ; %bb.0: ; %main_body
2103 ; GFX10PLUS-NEXT: ; return to shader part epilog
2105 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2109 define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2110 ; VERDE-LABEL: adjust_writemask_sample_123_to_12:
2111 ; VERDE: ; %bb.0: ; %main_body
2112 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2113 ; VERDE-NEXT: s_wqm_b64 exec, exec
2114 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2115 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2116 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2117 ; VERDE-NEXT: ; return to shader part epilog
2119 ; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
2120 ; GFX6789: ; %bb.0: ; %main_body
2121 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2122 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2123 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2124 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2125 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2126 ; GFX6789-NEXT: ; return to shader part epilog
2128 ; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12:
2129 ; GFX10PLUS: ; %bb.0: ; %main_body
2130 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2131 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2132 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2133 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2134 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2135 ; GFX10PLUS-NEXT: ; return to shader part epilog
2137 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2138 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2139 ret <2 x float> %out
2142 define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2143 ; VERDE-LABEL: adjust_writemask_sample_013_to_13:
2144 ; VERDE: ; %bb.0: ; %main_body
2145 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2146 ; VERDE-NEXT: s_wqm_b64 exec, exec
2147 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2148 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2149 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2150 ; VERDE-NEXT: ; return to shader part epilog
2152 ; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
2153 ; GFX6789: ; %bb.0: ; %main_body
2154 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2155 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2156 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2157 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2158 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2159 ; GFX6789-NEXT: ; return to shader part epilog
2161 ; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13:
2162 ; GFX10PLUS: ; %bb.0: ; %main_body
2163 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2164 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2165 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2166 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2167 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2168 ; GFX10PLUS-NEXT: ; return to shader part epilog
2170 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2171 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2172 ret <2 x float> %out
2175 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2176 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2177 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2178 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2179 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2180 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2181 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2183 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2184 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2185 declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2186 declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2187 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2188 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2190 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2191 declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2192 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2193 declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2194 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2195 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2196 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2197 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2199 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2200 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2201 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2202 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2203 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2204 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2205 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2206 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2208 declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2209 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2210 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2211 declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2213 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2214 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2215 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2216 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2218 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2219 declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2220 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2221 declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2223 attributes #0 = { nounwind }
2224 attributes #1 = { nounwind readonly }
2225 attributes #2 = { nounwind readnone }