1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
8 ; VERDE-LABEL: sample_1d:
9 ; VERDE: ; %bb.0: ; %main_body
10 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
11 ; VERDE-NEXT: s_wqm_b64 exec, exec
12 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
13 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
14 ; VERDE-NEXT: s_waitcnt vmcnt(0)
15 ; VERDE-NEXT: ; return to shader part epilog
17 ; GFX6789-LABEL: sample_1d:
18 ; GFX6789: ; %bb.0: ; %main_body
19 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
20 ; GFX6789-NEXT: s_wqm_b64 exec, exec
21 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
22 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
23 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
24 ; GFX6789-NEXT: ; return to shader part epilog
26 ; GFX10PLUS-LABEL: sample_1d:
27 ; GFX10PLUS: ; %bb.0: ; %main_body
28 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
29 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
30 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
31 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
32 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
33 ; GFX10PLUS-NEXT: ; return to shader part epilog
35 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
39 define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
40 ; VERDE-LABEL: sample_1d_tfe:
41 ; VERDE: ; %bb.0: ; %main_body
42 ; VERDE-NEXT: s_mov_b64 s[14:15], exec
43 ; VERDE-NEXT: s_wqm_b64 exec, exec
44 ; VERDE-NEXT: v_mov_b32_e32 v5, v0
45 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
46 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
47 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
48 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
49 ; VERDE-NEXT: v_mov_b32_e32 v4, v0
50 ; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
51 ; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
52 ; VERDE-NEXT: s_mov_b32 s15, 0xf000
53 ; VERDE-NEXT: s_mov_b32 s14, -1
54 ; VERDE-NEXT: s_waitcnt vmcnt(0)
55 ; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
56 ; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
57 ; VERDE-NEXT: ; return to shader part epilog
59 ; GFX6789-LABEL: sample_1d_tfe:
60 ; GFX6789: ; %bb.0: ; %main_body
61 ; GFX6789-NEXT: s_mov_b64 s[14:15], exec
62 ; GFX6789-NEXT: s_wqm_b64 exec, exec
63 ; GFX6789-NEXT: v_mov_b32_e32 v6, 0
64 ; GFX6789-NEXT: v_mov_b32_e32 v5, v0
65 ; GFX6789-NEXT: v_mov_b32_e32 v7, v6
66 ; GFX6789-NEXT: v_mov_b32_e32 v8, v6
67 ; GFX6789-NEXT: v_mov_b32_e32 v9, v6
68 ; GFX6789-NEXT: v_mov_b32_e32 v10, v6
69 ; GFX6789-NEXT: v_mov_b32_e32 v0, v6
70 ; GFX6789-NEXT: v_mov_b32_e32 v1, v7
71 ; GFX6789-NEXT: v_mov_b32_e32 v2, v8
72 ; GFX6789-NEXT: v_mov_b32_e32 v3, v9
73 ; GFX6789-NEXT: v_mov_b32_e32 v4, v10
74 ; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
75 ; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
76 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
77 ; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
78 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
79 ; GFX6789-NEXT: ; return to shader part epilog
81 ; GFX10-LABEL: sample_1d_tfe:
82 ; GFX10: ; %bb.0: ; %main_body
83 ; GFX10-NEXT: s_mov_b32 s14, exec_lo
84 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
85 ; GFX10-NEXT: v_mov_b32_e32 v6, 0
86 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
87 ; GFX10-NEXT: v_mov_b32_e32 v7, v6
88 ; GFX10-NEXT: v_mov_b32_e32 v8, v6
89 ; GFX10-NEXT: v_mov_b32_e32 v9, v6
90 ; GFX10-NEXT: v_mov_b32_e32 v10, v6
91 ; GFX10-NEXT: v_mov_b32_e32 v0, v6
92 ; GFX10-NEXT: v_mov_b32_e32 v1, v7
93 ; GFX10-NEXT: v_mov_b32_e32 v2, v8
94 ; GFX10-NEXT: v_mov_b32_e32 v3, v9
95 ; GFX10-NEXT: v_mov_b32_e32 v4, v10
96 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
97 ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
98 ; GFX10-NEXT: s_waitcnt vmcnt(0)
99 ; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
100 ; GFX10-NEXT: ; return to shader part epilog
102 ; GFX11-LABEL: sample_1d_tfe:
103 ; GFX11: ; %bb.0: ; %main_body
104 ; GFX11-NEXT: s_mov_b32 s14, exec_lo
105 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
106 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
107 ; GFX11-NEXT: v_mov_b32_e32 v7, v6
108 ; GFX11-NEXT: v_mov_b32_e32 v8, v6
109 ; GFX11-NEXT: v_mov_b32_e32 v9, v6
110 ; GFX11-NEXT: v_mov_b32_e32 v10, v6
111 ; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
112 ; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
113 ; GFX11-NEXT: v_mov_b32_e32 v4, v10
114 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
115 ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
116 ; GFX11-NEXT: s_waitcnt vmcnt(0)
117 ; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
118 ; GFX11-NEXT: ; return to shader part epilog
120 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
121 %v.vec = extractvalue {<4 x float>, i32} %v, 0
122 %v.err = extractvalue {<4 x float>, i32} %v, 1
123 store i32 %v.err, ptr addrspace(1) %out, align 4
124 ret <4 x float> %v.vec
127 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
128 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
129 ; VERDE: ; %bb.0: ; %main_body
130 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
131 ; VERDE-NEXT: s_wqm_b64 exec, exec
132 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
133 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
134 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
135 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
136 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
137 ; VERDE-NEXT: s_waitcnt vmcnt(0)
138 ; VERDE-NEXT: ; return to shader part epilog
140 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
141 ; GFX6789: ; %bb.0: ; %main_body
142 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
143 ; GFX6789-NEXT: s_wqm_b64 exec, exec
144 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
145 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
146 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
147 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
148 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
149 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
150 ; GFX6789-NEXT: ; return to shader part epilog
152 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1:
153 ; GFX10PLUS: ; %bb.0: ; %main_body
154 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
155 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
156 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
157 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
158 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
159 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
160 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
161 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
162 ; GFX10PLUS-NEXT: ; return to shader part epilog
164 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
165 %res.vec = extractvalue {<4 x float>,i32} %v, 0
166 %res.f = extractelement <4 x float> %res.vec, i32 0
167 %res.err = extractvalue {<4 x float>,i32} %v, 1
168 %res.errf = bitcast i32 %res.err to float
169 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
170 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
174 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
175 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
176 ; VERDE: ; %bb.0: ; %main_body
177 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
178 ; VERDE-NEXT: s_wqm_b64 exec, exec
179 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
180 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
181 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
182 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
183 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
184 ; VERDE-NEXT: s_waitcnt vmcnt(0)
185 ; VERDE-NEXT: ; return to shader part epilog
187 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
188 ; GFX6789: ; %bb.0: ; %main_body
189 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
190 ; GFX6789-NEXT: s_wqm_b64 exec, exec
191 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
192 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
193 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
194 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
195 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
196 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
197 ; GFX6789-NEXT: ; return to shader part epilog
199 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2:
200 ; GFX10PLUS: ; %bb.0: ; %main_body
201 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
202 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
203 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
204 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
205 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
206 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
207 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
208 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
209 ; GFX10PLUS-NEXT: ; return to shader part epilog
211 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
212 %res.vec = extractvalue {<4 x float>,i32} %v, 0
213 %res.f = extractelement <4 x float> %res.vec, i32 1
214 %res.err = extractvalue {<4 x float>,i32} %v, 1
215 %res.errf = bitcast i32 %res.err to float
216 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
217 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
221 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
222 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
223 ; VERDE: ; %bb.0: ; %main_body
224 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
225 ; VERDE-NEXT: s_wqm_b64 exec, exec
226 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
227 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
228 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
229 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
230 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
231 ; VERDE-NEXT: s_waitcnt vmcnt(0)
232 ; VERDE-NEXT: ; return to shader part epilog
234 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
235 ; GFX6789: ; %bb.0: ; %main_body
236 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
237 ; GFX6789-NEXT: s_wqm_b64 exec, exec
238 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
239 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
240 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
241 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
242 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
243 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
244 ; GFX6789-NEXT: ; return to shader part epilog
246 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3:
247 ; GFX10PLUS: ; %bb.0: ; %main_body
248 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
249 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
250 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
251 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
252 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
253 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
254 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
255 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
256 ; GFX10PLUS-NEXT: ; return to shader part epilog
258 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
259 %res.vec = extractvalue {<4 x float>,i32} %v, 0
260 %res.f = extractelement <4 x float> %res.vec, i32 2
261 %res.err = extractvalue {<4 x float>,i32} %v, 1
262 %res.errf = bitcast i32 %res.err to float
263 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
264 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
268 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
269 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
270 ; VERDE: ; %bb.0: ; %main_body
271 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
272 ; VERDE-NEXT: s_wqm_b64 exec, exec
273 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
274 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
275 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
276 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
277 ; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
278 ; VERDE-NEXT: s_waitcnt vmcnt(0)
279 ; VERDE-NEXT: ; return to shader part epilog
281 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
282 ; GFX6789: ; %bb.0: ; %main_body
283 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
284 ; GFX6789-NEXT: s_wqm_b64 exec, exec
285 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
286 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
287 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
288 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
289 ; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
290 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
291 ; GFX6789-NEXT: ; return to shader part epilog
293 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4:
294 ; GFX10PLUS: ; %bb.0: ; %main_body
295 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
296 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
297 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
298 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
299 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
300 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
301 ; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
302 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
303 ; GFX10PLUS-NEXT: ; return to shader part epilog
305 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
306 %res.vec = extractvalue {<4 x float>,i32} %v, 0
307 %res.f = extractelement <4 x float> %res.vec, i32 3
308 %res.err = extractvalue {<4 x float>,i32} %v, 1
309 %res.errf = bitcast i32 %res.err to float
310 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
311 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
315 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
316 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
317 ; VERDE: ; %bb.0: ; %main_body
318 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
319 ; VERDE-NEXT: s_wqm_b64 exec, exec
320 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
321 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
322 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
323 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
324 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
325 ; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
326 ; VERDE-NEXT: s_waitcnt vmcnt(0)
327 ; VERDE-NEXT: ; return to shader part epilog
329 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
330 ; GFX6789: ; %bb.0: ; %main_body
331 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
332 ; GFX6789-NEXT: s_wqm_b64 exec, exec
333 ; GFX6789-NEXT: v_mov_b32_e32 v3, v0
334 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
335 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
336 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
337 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
338 ; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
339 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
340 ; GFX6789-NEXT: ; return to shader part epilog
342 ; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
343 ; GFX10: ; %bb.0: ; %main_body
344 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
345 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
346 ; GFX10-NEXT: v_mov_b32_e32 v3, v0
347 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
348 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
349 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
350 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
351 ; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
352 ; GFX10-NEXT: s_waitcnt vmcnt(0)
353 ; GFX10-NEXT: ; return to shader part epilog
355 ; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12:
356 ; GFX11: ; %bb.0: ; %main_body
357 ; GFX11-NEXT: s_mov_b32 s12, exec_lo
358 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
359 ; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
360 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
361 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
362 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
363 ; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
364 ; GFX11-NEXT: s_waitcnt vmcnt(0)
365 ; GFX11-NEXT: ; return to shader part epilog
367 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
368 %res.vec = extractvalue {<4 x float>,i32} %v, 0
369 %res.f1 = extractelement <4 x float> %res.vec, i32 0
370 %res.f2 = extractelement <4 x float> %res.vec, i32 1
371 %res.err = extractvalue {<4 x float>,i32} %v, 1
372 %res.errf = bitcast i32 %res.err to float
373 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
374 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
375 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
379 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
380 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
381 ; VERDE: ; %bb.0: ; %main_body
382 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
383 ; VERDE-NEXT: s_wqm_b64 exec, exec
384 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
385 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
386 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
387 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
388 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
389 ; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
390 ; VERDE-NEXT: s_waitcnt vmcnt(0)
391 ; VERDE-NEXT: ; return to shader part epilog
393 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
394 ; GFX6789: ; %bb.0: ; %main_body
395 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
396 ; GFX6789-NEXT: s_wqm_b64 exec, exec
397 ; GFX6789-NEXT: v_mov_b32_e32 v3, v0
398 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
399 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
400 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
401 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
402 ; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
403 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
404 ; GFX6789-NEXT: ; return to shader part epilog
406 ; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
407 ; GFX10: ; %bb.0: ; %main_body
408 ; GFX10-NEXT: s_mov_b32 s12, exec_lo
409 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
410 ; GFX10-NEXT: v_mov_b32_e32 v3, v0
411 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
412 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
413 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
414 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
415 ; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
416 ; GFX10-NEXT: s_waitcnt vmcnt(0)
417 ; GFX10-NEXT: ; return to shader part epilog
419 ; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24:
420 ; GFX11: ; %bb.0: ; %main_body
421 ; GFX11-NEXT: s_mov_b32 s12, exec_lo
422 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
423 ; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
424 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
425 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
426 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
427 ; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
428 ; GFX11-NEXT: s_waitcnt vmcnt(0)
429 ; GFX11-NEXT: ; return to shader part epilog
431 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
432 %res.vec = extractvalue {<4 x float>,i32} %v, 0
433 %res.f1 = extractelement <4 x float> %res.vec, i32 1
434 %res.f2 = extractelement <4 x float> %res.vec, i32 3
435 %res.err = extractvalue {<4 x float>,i32} %v, 1
436 %res.errf = bitcast i32 %res.err to float
437 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
438 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
439 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
443 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
444 ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
445 ; VERDE: ; %bb.0: ; %main_body
446 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
447 ; VERDE-NEXT: s_wqm_b64 exec, exec
448 ; VERDE-NEXT: v_mov_b32_e32 v4, v0
449 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
450 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
451 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
452 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
453 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
454 ; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
455 ; VERDE-NEXT: s_waitcnt vmcnt(0)
456 ; VERDE-NEXT: ; return to shader part epilog
458 ; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
459 ; GFX6789: ; %bb.0: ; %main_body
460 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
461 ; GFX6789-NEXT: s_wqm_b64 exec, exec
462 ; GFX6789-NEXT: v_mov_b32_e32 v4, v0
463 ; GFX6789-NEXT: v_mov_b32_e32 v0, 0
464 ; GFX6789-NEXT: v_mov_b32_e32 v1, v0
465 ; GFX6789-NEXT: v_mov_b32_e32 v2, v0
466 ; GFX6789-NEXT: v_mov_b32_e32 v3, v0
467 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
468 ; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
469 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
470 ; GFX6789-NEXT: ; return to shader part epilog
472 ; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134:
473 ; GFX10PLUS: ; %bb.0: ; %main_body
474 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
475 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
476 ; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
477 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
478 ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
479 ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
480 ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0
481 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
482 ; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
483 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
484 ; GFX10PLUS-NEXT: ; return to shader part epilog
486 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
487 %res.vec = extractvalue {<4 x float>,i32} %v, 0
488 %res.f1 = extractelement <4 x float> %res.vec, i32 0
489 %res.f2 = extractelement <4 x float> %res.vec, i32 2
490 %res.f3 = extractelement <4 x float> %res.vec, i32 3
491 %res.err = extractvalue {<4 x float>,i32} %v, 1
492 %res.errf = bitcast i32 %res.err to float
493 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
494 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
495 %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
496 %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
500 define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
501 ; VERDE-LABEL: sample_1d_lwe:
502 ; VERDE: ; %bb.0: ; %main_body
503 ; VERDE-NEXT: s_mov_b64 s[14:15], exec
504 ; VERDE-NEXT: s_wqm_b64 exec, exec
505 ; VERDE-NEXT: v_mov_b32_e32 v5, v0
506 ; VERDE-NEXT: v_mov_b32_e32 v0, 0
507 ; VERDE-NEXT: v_mov_b32_e32 v1, v0
508 ; VERDE-NEXT: v_mov_b32_e32 v2, v0
509 ; VERDE-NEXT: v_mov_b32_e32 v3, v0
510 ; VERDE-NEXT: v_mov_b32_e32 v4, v0
511 ; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
512 ; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
513 ; VERDE-NEXT: s_mov_b32 s15, 0xf000
514 ; VERDE-NEXT: s_mov_b32 s14, -1
515 ; VERDE-NEXT: s_waitcnt vmcnt(0)
516 ; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
517 ; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
518 ; VERDE-NEXT: ; return to shader part epilog
520 ; GFX6789-LABEL: sample_1d_lwe:
521 ; GFX6789: ; %bb.0: ; %main_body
522 ; GFX6789-NEXT: s_mov_b64 s[14:15], exec
523 ; GFX6789-NEXT: s_wqm_b64 exec, exec
524 ; GFX6789-NEXT: v_mov_b32_e32 v6, 0
525 ; GFX6789-NEXT: v_mov_b32_e32 v5, v0
526 ; GFX6789-NEXT: v_mov_b32_e32 v7, v6
527 ; GFX6789-NEXT: v_mov_b32_e32 v8, v6
528 ; GFX6789-NEXT: v_mov_b32_e32 v9, v6
529 ; GFX6789-NEXT: v_mov_b32_e32 v10, v6
530 ; GFX6789-NEXT: v_mov_b32_e32 v0, v6
531 ; GFX6789-NEXT: v_mov_b32_e32 v1, v7
532 ; GFX6789-NEXT: v_mov_b32_e32 v2, v8
533 ; GFX6789-NEXT: v_mov_b32_e32 v3, v9
534 ; GFX6789-NEXT: v_mov_b32_e32 v4, v10
535 ; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
536 ; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
537 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
538 ; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
539 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
540 ; GFX6789-NEXT: ; return to shader part epilog
542 ; GFX10-LABEL: sample_1d_lwe:
543 ; GFX10: ; %bb.0: ; %main_body
544 ; GFX10-NEXT: s_mov_b32 s14, exec_lo
545 ; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
546 ; GFX10-NEXT: v_mov_b32_e32 v6, 0
547 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
548 ; GFX10-NEXT: v_mov_b32_e32 v7, v6
549 ; GFX10-NEXT: v_mov_b32_e32 v8, v6
550 ; GFX10-NEXT: v_mov_b32_e32 v9, v6
551 ; GFX10-NEXT: v_mov_b32_e32 v10, v6
552 ; GFX10-NEXT: v_mov_b32_e32 v0, v6
553 ; GFX10-NEXT: v_mov_b32_e32 v1, v7
554 ; GFX10-NEXT: v_mov_b32_e32 v2, v8
555 ; GFX10-NEXT: v_mov_b32_e32 v3, v9
556 ; GFX10-NEXT: v_mov_b32_e32 v4, v10
557 ; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
558 ; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
559 ; GFX10-NEXT: s_waitcnt vmcnt(0)
560 ; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
561 ; GFX10-NEXT: ; return to shader part epilog
563 ; GFX11-LABEL: sample_1d_lwe:
564 ; GFX11: ; %bb.0: ; %main_body
565 ; GFX11-NEXT: s_mov_b32 s14, exec_lo
566 ; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
567 ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
568 ; GFX11-NEXT: v_mov_b32_e32 v7, v6
569 ; GFX11-NEXT: v_mov_b32_e32 v8, v6
570 ; GFX11-NEXT: v_mov_b32_e32 v9, v6
571 ; GFX11-NEXT: v_mov_b32_e32 v10, v6
572 ; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
573 ; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
574 ; GFX11-NEXT: v_mov_b32_e32 v4, v10
575 ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
576 ; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
577 ; GFX11-NEXT: s_waitcnt vmcnt(0)
578 ; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
579 ; GFX11-NEXT: ; return to shader part epilog
581 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
582 %v.vec = extractvalue {<4 x float>, i32} %v, 0
583 %v.err = extractvalue {<4 x float>, i32} %v, 1
584 store i32 %v.err, ptr addrspace(1) %out, align 4
585 ret <4 x float> %v.vec
588 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
589 ; VERDE-LABEL: sample_2d:
590 ; VERDE: ; %bb.0: ; %main_body
591 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
592 ; VERDE-NEXT: s_wqm_b64 exec, exec
593 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
594 ; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
595 ; VERDE-NEXT: s_waitcnt vmcnt(0)
596 ; VERDE-NEXT: ; return to shader part epilog
598 ; GFX6789-LABEL: sample_2d:
599 ; GFX6789: ; %bb.0: ; %main_body
600 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
601 ; GFX6789-NEXT: s_wqm_b64 exec, exec
602 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
603 ; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
604 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
605 ; GFX6789-NEXT: ; return to shader part epilog
607 ; GFX10PLUS-LABEL: sample_2d:
608 ; GFX10PLUS: ; %bb.0: ; %main_body
609 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
610 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
611 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
612 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
613 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
614 ; GFX10PLUS-NEXT: ; return to shader part epilog
616 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
620 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
621 ; VERDE-LABEL: sample_3d:
622 ; VERDE: ; %bb.0: ; %main_body
623 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
624 ; VERDE-NEXT: s_wqm_b64 exec, exec
625 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
626 ; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
627 ; VERDE-NEXT: s_waitcnt vmcnt(0)
628 ; VERDE-NEXT: ; return to shader part epilog
630 ; GFX6789-LABEL: sample_3d:
631 ; GFX6789: ; %bb.0: ; %main_body
632 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
633 ; GFX6789-NEXT: s_wqm_b64 exec, exec
634 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
635 ; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
636 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
637 ; GFX6789-NEXT: ; return to shader part epilog
639 ; GFX10PLUS-LABEL: sample_3d:
640 ; GFX10PLUS: ; %bb.0: ; %main_body
641 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
642 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
643 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
644 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
645 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
646 ; GFX10PLUS-NEXT: ; return to shader part epilog
648 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
652 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
653 ; VERDE-LABEL: sample_cube:
654 ; VERDE: ; %bb.0: ; %main_body
655 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
656 ; VERDE-NEXT: s_wqm_b64 exec, exec
657 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
658 ; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
659 ; VERDE-NEXT: s_waitcnt vmcnt(0)
660 ; VERDE-NEXT: ; return to shader part epilog
662 ; GFX6789-LABEL: sample_cube:
663 ; GFX6789: ; %bb.0: ; %main_body
664 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
665 ; GFX6789-NEXT: s_wqm_b64 exec, exec
666 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
667 ; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
668 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
669 ; GFX6789-NEXT: ; return to shader part epilog
671 ; GFX10PLUS-LABEL: sample_cube:
672 ; GFX10PLUS: ; %bb.0: ; %main_body
673 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
674 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
675 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
676 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
677 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
678 ; GFX10PLUS-NEXT: ; return to shader part epilog
680 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
684 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
685 ; VERDE-LABEL: sample_1darray:
686 ; VERDE: ; %bb.0: ; %main_body
687 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
688 ; VERDE-NEXT: s_wqm_b64 exec, exec
689 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
690 ; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
691 ; VERDE-NEXT: s_waitcnt vmcnt(0)
692 ; VERDE-NEXT: ; return to shader part epilog
694 ; GFX6789-LABEL: sample_1darray:
695 ; GFX6789: ; %bb.0: ; %main_body
696 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
697 ; GFX6789-NEXT: s_wqm_b64 exec, exec
698 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
699 ; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
700 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
701 ; GFX6789-NEXT: ; return to shader part epilog
703 ; GFX10PLUS-LABEL: sample_1darray:
704 ; GFX10PLUS: ; %bb.0: ; %main_body
705 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
706 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
707 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
708 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
709 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
710 ; GFX10PLUS-NEXT: ; return to shader part epilog
712 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
716 define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
717 ; VERDE-LABEL: sample_2darray:
718 ; VERDE: ; %bb.0: ; %main_body
719 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
720 ; VERDE-NEXT: s_wqm_b64 exec, exec
721 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
722 ; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
723 ; VERDE-NEXT: s_waitcnt vmcnt(0)
724 ; VERDE-NEXT: ; return to shader part epilog
726 ; GFX6789-LABEL: sample_2darray:
727 ; GFX6789: ; %bb.0: ; %main_body
728 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
729 ; GFX6789-NEXT: s_wqm_b64 exec, exec
730 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
731 ; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
732 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
733 ; GFX6789-NEXT: ; return to shader part epilog
735 ; GFX10PLUS-LABEL: sample_2darray:
736 ; GFX10PLUS: ; %bb.0: ; %main_body
737 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
738 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
739 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
740 ; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
741 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
742 ; GFX10PLUS-NEXT: ; return to shader part epilog
744 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
748 define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
749 ; VERDE-LABEL: sample_c_1d:
750 ; VERDE: ; %bb.0: ; %main_body
751 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
752 ; VERDE-NEXT: s_wqm_b64 exec, exec
753 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
754 ; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
755 ; VERDE-NEXT: s_waitcnt vmcnt(0)
756 ; VERDE-NEXT: ; return to shader part epilog
758 ; GFX6789-LABEL: sample_c_1d:
759 ; GFX6789: ; %bb.0: ; %main_body
760 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
761 ; GFX6789-NEXT: s_wqm_b64 exec, exec
762 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
763 ; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
764 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
765 ; GFX6789-NEXT: ; return to shader part epilog
767 ; GFX10PLUS-LABEL: sample_c_1d:
768 ; GFX10PLUS: ; %bb.0: ; %main_body
769 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
770 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
771 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
772 ; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
773 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
774 ; GFX10PLUS-NEXT: ; return to shader part epilog
776 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
780 define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
781 ; VERDE-LABEL: sample_c_2d:
782 ; VERDE: ; %bb.0: ; %main_body
783 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
784 ; VERDE-NEXT: s_wqm_b64 exec, exec
785 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
786 ; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
787 ; VERDE-NEXT: s_waitcnt vmcnt(0)
788 ; VERDE-NEXT: ; return to shader part epilog
790 ; GFX6789-LABEL: sample_c_2d:
791 ; GFX6789: ; %bb.0: ; %main_body
792 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
793 ; GFX6789-NEXT: s_wqm_b64 exec, exec
794 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
795 ; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
796 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
797 ; GFX6789-NEXT: ; return to shader part epilog
799 ; GFX10PLUS-LABEL: sample_c_2d:
800 ; GFX10PLUS: ; %bb.0: ; %main_body
801 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
802 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
803 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
804 ; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
805 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
806 ; GFX10PLUS-NEXT: ; return to shader part epilog
808 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
812 define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
813 ; VERDE-LABEL: sample_cl_1d:
814 ; VERDE: ; %bb.0: ; %main_body
815 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
816 ; VERDE-NEXT: s_wqm_b64 exec, exec
817 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
818 ; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
819 ; VERDE-NEXT: s_waitcnt vmcnt(0)
820 ; VERDE-NEXT: ; return to shader part epilog
822 ; GFX6789-LABEL: sample_cl_1d:
823 ; GFX6789: ; %bb.0: ; %main_body
824 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
825 ; GFX6789-NEXT: s_wqm_b64 exec, exec
826 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
827 ; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
828 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
829 ; GFX6789-NEXT: ; return to shader part epilog
831 ; GFX10PLUS-LABEL: sample_cl_1d:
832 ; GFX10PLUS: ; %bb.0: ; %main_body
833 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
834 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
835 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
836 ; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
837 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
838 ; GFX10PLUS-NEXT: ; return to shader part epilog
840 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
844 define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
845 ; VERDE-LABEL: sample_cl_2d:
846 ; VERDE: ; %bb.0: ; %main_body
847 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
848 ; VERDE-NEXT: s_wqm_b64 exec, exec
849 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
850 ; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
851 ; VERDE-NEXT: s_waitcnt vmcnt(0)
852 ; VERDE-NEXT: ; return to shader part epilog
854 ; GFX6789-LABEL: sample_cl_2d:
855 ; GFX6789: ; %bb.0: ; %main_body
856 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
857 ; GFX6789-NEXT: s_wqm_b64 exec, exec
858 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
859 ; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
860 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
861 ; GFX6789-NEXT: ; return to shader part epilog
863 ; GFX10PLUS-LABEL: sample_cl_2d:
864 ; GFX10PLUS: ; %bb.0: ; %main_body
865 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
866 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
867 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
868 ; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
869 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
870 ; GFX10PLUS-NEXT: ; return to shader part epilog
872 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
876 define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
877 ; VERDE-LABEL: sample_c_cl_1d:
878 ; VERDE: ; %bb.0: ; %main_body
879 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
880 ; VERDE-NEXT: s_wqm_b64 exec, exec
881 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
882 ; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
883 ; VERDE-NEXT: s_waitcnt vmcnt(0)
884 ; VERDE-NEXT: ; return to shader part epilog
886 ; GFX6789-LABEL: sample_c_cl_1d:
887 ; GFX6789: ; %bb.0: ; %main_body
888 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
889 ; GFX6789-NEXT: s_wqm_b64 exec, exec
890 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
891 ; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
892 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
893 ; GFX6789-NEXT: ; return to shader part epilog
895 ; GFX10PLUS-LABEL: sample_c_cl_1d:
896 ; GFX10PLUS: ; %bb.0: ; %main_body
897 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
898 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
899 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
900 ; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
901 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
902 ; GFX10PLUS-NEXT: ; return to shader part epilog
904 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
908 define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
909 ; VERDE-LABEL: sample_c_cl_2d:
910 ; VERDE: ; %bb.0: ; %main_body
911 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
912 ; VERDE-NEXT: s_wqm_b64 exec, exec
913 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
914 ; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
915 ; VERDE-NEXT: s_waitcnt vmcnt(0)
916 ; VERDE-NEXT: ; return to shader part epilog
918 ; GFX6789-LABEL: sample_c_cl_2d:
919 ; GFX6789: ; %bb.0: ; %main_body
920 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
921 ; GFX6789-NEXT: s_wqm_b64 exec, exec
922 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
923 ; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
924 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
925 ; GFX6789-NEXT: ; return to shader part epilog
927 ; GFX10PLUS-LABEL: sample_c_cl_2d:
928 ; GFX10PLUS: ; %bb.0: ; %main_body
929 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
930 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
931 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
932 ; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
933 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
934 ; GFX10PLUS-NEXT: ; return to shader part epilog
936 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
940 define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
941 ; VERDE-LABEL: sample_b_1d:
942 ; VERDE: ; %bb.0: ; %main_body
943 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
944 ; VERDE-NEXT: s_wqm_b64 exec, exec
945 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
946 ; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
947 ; VERDE-NEXT: s_waitcnt vmcnt(0)
948 ; VERDE-NEXT: ; return to shader part epilog
950 ; GFX6789-LABEL: sample_b_1d:
951 ; GFX6789: ; %bb.0: ; %main_body
952 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
953 ; GFX6789-NEXT: s_wqm_b64 exec, exec
954 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
955 ; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
956 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
957 ; GFX6789-NEXT: ; return to shader part epilog
959 ; GFX10PLUS-LABEL: sample_b_1d:
960 ; GFX10PLUS: ; %bb.0: ; %main_body
961 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
962 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
963 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
964 ; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
965 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
966 ; GFX10PLUS-NEXT: ; return to shader part epilog
968 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
972 define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
973 ; VERDE-LABEL: sample_b_2d:
974 ; VERDE: ; %bb.0: ; %main_body
975 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
976 ; VERDE-NEXT: s_wqm_b64 exec, exec
977 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
978 ; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
979 ; VERDE-NEXT: s_waitcnt vmcnt(0)
980 ; VERDE-NEXT: ; return to shader part epilog
982 ; GFX6789-LABEL: sample_b_2d:
983 ; GFX6789: ; %bb.0: ; %main_body
984 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
985 ; GFX6789-NEXT: s_wqm_b64 exec, exec
986 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
987 ; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
988 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
989 ; GFX6789-NEXT: ; return to shader part epilog
991 ; GFX10PLUS-LABEL: sample_b_2d:
992 ; GFX10PLUS: ; %bb.0: ; %main_body
993 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
994 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
995 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
996 ; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
997 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
998 ; GFX10PLUS-NEXT: ; return to shader part epilog
1000 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1004 define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
1005 ; VERDE-LABEL: sample_c_b_1d:
1006 ; VERDE: ; %bb.0: ; %main_body
1007 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1008 ; VERDE-NEXT: s_wqm_b64 exec, exec
1009 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1010 ; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1011 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1012 ; VERDE-NEXT: ; return to shader part epilog
1014 ; GFX6789-LABEL: sample_c_b_1d:
1015 ; GFX6789: ; %bb.0: ; %main_body
1016 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1017 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1018 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1019 ; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1020 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1021 ; GFX6789-NEXT: ; return to shader part epilog
1023 ; GFX10PLUS-LABEL: sample_c_b_1d:
1024 ; GFX10PLUS: ; %bb.0: ; %main_body
1025 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1026 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1027 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1028 ; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1029 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1030 ; GFX10PLUS-NEXT: ; return to shader part epilog
1032 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1036 define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
1037 ; VERDE-LABEL: sample_c_b_2d:
1038 ; VERDE: ; %bb.0: ; %main_body
1039 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1040 ; VERDE-NEXT: s_wqm_b64 exec, exec
1041 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1042 ; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1043 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1044 ; VERDE-NEXT: ; return to shader part epilog
1046 ; GFX6789-LABEL: sample_c_b_2d:
1047 ; GFX6789: ; %bb.0: ; %main_body
1048 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1049 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1050 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1051 ; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1052 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1053 ; GFX6789-NEXT: ; return to shader part epilog
1055 ; GFX10PLUS-LABEL: sample_c_b_2d:
1056 ; GFX10PLUS: ; %bb.0: ; %main_body
1057 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1058 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1059 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1060 ; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1061 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1062 ; GFX10PLUS-NEXT: ; return to shader part epilog
1064 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1068 define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
1069 ; VERDE-LABEL: sample_b_cl_1d:
1070 ; VERDE: ; %bb.0: ; %main_body
1071 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1072 ; VERDE-NEXT: s_wqm_b64 exec, exec
1073 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1074 ; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1075 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1076 ; VERDE-NEXT: ; return to shader part epilog
1078 ; GFX6789-LABEL: sample_b_cl_1d:
1079 ; GFX6789: ; %bb.0: ; %main_body
1080 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1081 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1082 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1083 ; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1084 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1085 ; GFX6789-NEXT: ; return to shader part epilog
1087 ; GFX10PLUS-LABEL: sample_b_cl_1d:
1088 ; GFX10PLUS: ; %bb.0: ; %main_body
1089 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1090 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1091 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1092 ; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1093 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1094 ; GFX10PLUS-NEXT: ; return to shader part epilog
1096 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1100 define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
1101 ; VERDE-LABEL: sample_b_cl_2d:
1102 ; VERDE: ; %bb.0: ; %main_body
1103 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1104 ; VERDE-NEXT: s_wqm_b64 exec, exec
1105 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1106 ; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1107 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1108 ; VERDE-NEXT: ; return to shader part epilog
1110 ; GFX6789-LABEL: sample_b_cl_2d:
1111 ; GFX6789: ; %bb.0: ; %main_body
1112 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1113 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1114 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1115 ; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1116 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1117 ; GFX6789-NEXT: ; return to shader part epilog
1119 ; GFX10PLUS-LABEL: sample_b_cl_2d:
1120 ; GFX10PLUS: ; %bb.0: ; %main_body
1121 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1122 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1123 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1124 ; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1125 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1126 ; GFX10PLUS-NEXT: ; return to shader part epilog
1128 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1132 define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
1133 ; VERDE-LABEL: sample_c_b_cl_1d:
1134 ; VERDE: ; %bb.0: ; %main_body
1135 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1136 ; VERDE-NEXT: s_wqm_b64 exec, exec
1137 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1138 ; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1139 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1140 ; VERDE-NEXT: ; return to shader part epilog
1142 ; GFX6789-LABEL: sample_c_b_cl_1d:
1143 ; GFX6789: ; %bb.0: ; %main_body
1144 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1145 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1146 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1147 ; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1148 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1149 ; GFX6789-NEXT: ; return to shader part epilog
1151 ; GFX10PLUS-LABEL: sample_c_b_cl_1d:
1152 ; GFX10PLUS: ; %bb.0: ; %main_body
1153 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1154 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1155 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1156 ; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1157 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1158 ; GFX10PLUS-NEXT: ; return to shader part epilog
1160 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1164 define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
1165 ; VERDE-LABEL: sample_c_b_cl_2d:
1166 ; VERDE: ; %bb.0: ; %main_body
1167 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1168 ; VERDE-NEXT: s_wqm_b64 exec, exec
1169 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1170 ; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1171 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1172 ; VERDE-NEXT: ; return to shader part epilog
1174 ; GFX6789-LABEL: sample_c_b_cl_2d:
1175 ; GFX6789: ; %bb.0: ; %main_body
1176 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1177 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1178 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1179 ; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1180 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1181 ; GFX6789-NEXT: ; return to shader part epilog
1183 ; GFX10PLUS-LABEL: sample_c_b_cl_2d:
1184 ; GFX10PLUS: ; %bb.0: ; %main_body
1185 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1186 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1187 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1188 ; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1189 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1190 ; GFX10PLUS-NEXT: ; return to shader part epilog
1192 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1196 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
1197 ; VERDE-LABEL: sample_d_1d:
1198 ; VERDE: ; %bb.0: ; %main_body
1199 ; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1200 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1201 ; VERDE-NEXT: ; return to shader part epilog
1203 ; GFX6789-LABEL: sample_d_1d:
1204 ; GFX6789: ; %bb.0: ; %main_body
1205 ; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1206 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1207 ; GFX6789-NEXT: ; return to shader part epilog
1209 ; GFX10PLUS-LABEL: sample_d_1d:
1210 ; GFX10PLUS: ; %bb.0: ; %main_body
1211 ; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1212 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1213 ; GFX10PLUS-NEXT: ; return to shader part epilog
1215 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1219 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1220 ; VERDE-LABEL: sample_d_2d:
1221 ; VERDE: ; %bb.0: ; %main_body
1222 ; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1223 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1224 ; VERDE-NEXT: ; return to shader part epilog
1226 ; GFX6789-LABEL: sample_d_2d:
1227 ; GFX6789: ; %bb.0: ; %main_body
1228 ; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1229 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1230 ; GFX6789-NEXT: ; return to shader part epilog
1232 ; GFX10PLUS-LABEL: sample_d_2d:
1233 ; GFX10PLUS: ; %bb.0: ; %main_body
1234 ; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1235 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1236 ; GFX10PLUS-NEXT: ; return to shader part epilog
1238 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1242 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
1243 ; VERDE-LABEL: sample_c_d_1d:
1244 ; VERDE: ; %bb.0: ; %main_body
1245 ; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1246 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1247 ; VERDE-NEXT: ; return to shader part epilog
1249 ; GFX6789-LABEL: sample_c_d_1d:
1250 ; GFX6789: ; %bb.0: ; %main_body
1251 ; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1252 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1253 ; GFX6789-NEXT: ; return to shader part epilog
1255 ; GFX10PLUS-LABEL: sample_c_d_1d:
1256 ; GFX10PLUS: ; %bb.0: ; %main_body
1257 ; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1258 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1259 ; GFX10PLUS-NEXT: ; return to shader part epilog
1261 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1265 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1266 ; VERDE-LABEL: sample_c_d_2d:
1267 ; VERDE: ; %bb.0: ; %main_body
1268 ; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1269 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1270 ; VERDE-NEXT: ; return to shader part epilog
1272 ; GFX6789-LABEL: sample_c_d_2d:
1273 ; GFX6789: ; %bb.0: ; %main_body
1274 ; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1275 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1276 ; GFX6789-NEXT: ; return to shader part epilog
1278 ; GFX10PLUS-LABEL: sample_c_d_2d:
1279 ; GFX10PLUS: ; %bb.0: ; %main_body
1280 ; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1281 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1282 ; GFX10PLUS-NEXT: ; return to shader part epilog
1284 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1288 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
1289 ; VERDE-LABEL: sample_d_cl_1d:
1290 ; VERDE: ; %bb.0: ; %main_body
1291 ; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1292 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1293 ; VERDE-NEXT: ; return to shader part epilog
1295 ; GFX6789-LABEL: sample_d_cl_1d:
1296 ; GFX6789: ; %bb.0: ; %main_body
1297 ; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1298 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1299 ; GFX6789-NEXT: ; return to shader part epilog
1301 ; GFX10PLUS-LABEL: sample_d_cl_1d:
1302 ; GFX10PLUS: ; %bb.0: ; %main_body
1303 ; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1304 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1305 ; GFX10PLUS-NEXT: ; return to shader part epilog
1307 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1311 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1312 ; VERDE-LABEL: sample_d_cl_2d:
1313 ; VERDE: ; %bb.0: ; %main_body
1314 ; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1315 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1316 ; VERDE-NEXT: ; return to shader part epilog
1318 ; GFX6789-LABEL: sample_d_cl_2d:
1319 ; GFX6789: ; %bb.0: ; %main_body
1320 ; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1321 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1322 ; GFX6789-NEXT: ; return to shader part epilog
1324 ; GFX10PLUS-LABEL: sample_d_cl_2d:
1325 ; GFX10PLUS: ; %bb.0: ; %main_body
1326 ; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1327 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1328 ; GFX10PLUS-NEXT: ; return to shader part epilog
1330 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1334 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
1335 ; VERDE-LABEL: sample_c_d_cl_1d:
1336 ; VERDE: ; %bb.0: ; %main_body
1337 ; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1338 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1339 ; VERDE-NEXT: ; return to shader part epilog
1341 ; GFX6789-LABEL: sample_c_d_cl_1d:
1342 ; GFX6789: ; %bb.0: ; %main_body
1343 ; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1344 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1345 ; GFX6789-NEXT: ; return to shader part epilog
1347 ; GFX10PLUS-LABEL: sample_c_d_cl_1d:
1348 ; GFX10PLUS: ; %bb.0: ; %main_body
1349 ; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1350 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1351 ; GFX10PLUS-NEXT: ; return to shader part epilog
1353 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1357 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1358 ; VERDE-LABEL: sample_c_d_cl_2d:
1359 ; VERDE: ; %bb.0: ; %main_body
1360 ; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1361 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1362 ; VERDE-NEXT: ; return to shader part epilog
1364 ; GFX6789-LABEL: sample_c_d_cl_2d:
1365 ; GFX6789: ; %bb.0: ; %main_body
1366 ; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1367 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1368 ; GFX6789-NEXT: ; return to shader part epilog
1370 ; GFX10PLUS-LABEL: sample_c_d_cl_2d:
1371 ; GFX10PLUS: ; %bb.0: ; %main_body
1372 ; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1373 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1374 ; GFX10PLUS-NEXT: ; return to shader part epilog
1376 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1380 define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
1381 ; VERDE-LABEL: sample_l_1d:
1382 ; VERDE: ; %bb.0: ; %main_body
1383 ; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1384 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1385 ; VERDE-NEXT: ; return to shader part epilog
1387 ; GFX6789-LABEL: sample_l_1d:
1388 ; GFX6789: ; %bb.0: ; %main_body
1389 ; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1390 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1391 ; GFX6789-NEXT: ; return to shader part epilog
1393 ; GFX10PLUS-LABEL: sample_l_1d:
1394 ; GFX10PLUS: ; %bb.0: ; %main_body
1395 ; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1396 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1397 ; GFX10PLUS-NEXT: ; return to shader part epilog
1399 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1403 define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
1404 ; VERDE-LABEL: sample_l_2d:
1405 ; VERDE: ; %bb.0: ; %main_body
1406 ; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1407 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1408 ; VERDE-NEXT: ; return to shader part epilog
1410 ; GFX6789-LABEL: sample_l_2d:
1411 ; GFX6789: ; %bb.0: ; %main_body
1412 ; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1413 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1414 ; GFX6789-NEXT: ; return to shader part epilog
1416 ; GFX10PLUS-LABEL: sample_l_2d:
1417 ; GFX10PLUS: ; %bb.0: ; %main_body
1418 ; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1419 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1420 ; GFX10PLUS-NEXT: ; return to shader part epilog
1422 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1426 define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
1427 ; VERDE-LABEL: sample_c_l_1d:
1428 ; VERDE: ; %bb.0: ; %main_body
1429 ; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1430 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1431 ; VERDE-NEXT: ; return to shader part epilog
1433 ; GFX6789-LABEL: sample_c_l_1d:
1434 ; GFX6789: ; %bb.0: ; %main_body
1435 ; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1436 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1437 ; GFX6789-NEXT: ; return to shader part epilog
1439 ; GFX10PLUS-LABEL: sample_c_l_1d:
1440 ; GFX10PLUS: ; %bb.0: ; %main_body
1441 ; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1442 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1443 ; GFX10PLUS-NEXT: ; return to shader part epilog
1445 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1449 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
1450 ; VERDE-LABEL: sample_c_l_2d:
1451 ; VERDE: ; %bb.0: ; %main_body
1452 ; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1453 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1454 ; VERDE-NEXT: ; return to shader part epilog
1456 ; GFX6789-LABEL: sample_c_l_2d:
1457 ; GFX6789: ; %bb.0: ; %main_body
1458 ; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1459 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1460 ; GFX6789-NEXT: ; return to shader part epilog
1462 ; GFX10PLUS-LABEL: sample_c_l_2d:
1463 ; GFX10PLUS: ; %bb.0: ; %main_body
1464 ; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1465 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1466 ; GFX10PLUS-NEXT: ; return to shader part epilog
1468 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1472 define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1473 ; VERDE-LABEL: sample_lz_1d:
1474 ; VERDE: ; %bb.0: ; %main_body
1475 ; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1476 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1477 ; VERDE-NEXT: ; return to shader part epilog
1479 ; GFX6789-LABEL: sample_lz_1d:
1480 ; GFX6789: ; %bb.0: ; %main_body
1481 ; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1482 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1483 ; GFX6789-NEXT: ; return to shader part epilog
1485 ; GFX10PLUS-LABEL: sample_lz_1d:
1486 ; GFX10PLUS: ; %bb.0: ; %main_body
1487 ; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1488 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1489 ; GFX10PLUS-NEXT: ; return to shader part epilog
1491 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1495 define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1496 ; VERDE-LABEL: sample_lz_2d:
1497 ; VERDE: ; %bb.0: ; %main_body
1498 ; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1499 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1500 ; VERDE-NEXT: ; return to shader part epilog
1502 ; GFX6789-LABEL: sample_lz_2d:
1503 ; GFX6789: ; %bb.0: ; %main_body
1504 ; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1505 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1506 ; GFX6789-NEXT: ; return to shader part epilog
1508 ; GFX10PLUS-LABEL: sample_lz_2d:
1509 ; GFX10PLUS: ; %bb.0: ; %main_body
1510 ; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1511 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX10PLUS-NEXT: ; return to shader part epilog
1514 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1518 define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
1519 ; VERDE-LABEL: sample_c_lz_1d:
1520 ; VERDE: ; %bb.0: ; %main_body
1521 ; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1522 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1523 ; VERDE-NEXT: ; return to shader part epilog
1525 ; GFX6789-LABEL: sample_c_lz_1d:
1526 ; GFX6789: ; %bb.0: ; %main_body
1527 ; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1528 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1529 ; GFX6789-NEXT: ; return to shader part epilog
1531 ; GFX10PLUS-LABEL: sample_c_lz_1d:
1532 ; GFX10PLUS: ; %bb.0: ; %main_body
1533 ; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1534 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1535 ; GFX10PLUS-NEXT: ; return to shader part epilog
1537 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1541 define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
1542 ; VERDE-LABEL: sample_c_lz_2d:
1543 ; VERDE: ; %bb.0: ; %main_body
1544 ; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1545 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1546 ; VERDE-NEXT: ; return to shader part epilog
1548 ; GFX6789-LABEL: sample_c_lz_2d:
1549 ; GFX6789: ; %bb.0: ; %main_body
1550 ; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1551 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1552 ; GFX6789-NEXT: ; return to shader part epilog
1554 ; GFX10PLUS-LABEL: sample_c_lz_2d:
1555 ; GFX10PLUS: ; %bb.0: ; %main_body
1556 ; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1557 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1558 ; GFX10PLUS-NEXT: ; return to shader part epilog
1560 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1564 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1565 ; VERDE-LABEL: sample_c_d_o_2darray_V1:
1566 ; VERDE: ; %bb.0: ; %main_body
1567 ; VERDE-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
1568 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1569 ; VERDE-NEXT: ; return to shader part epilog
1571 ; GFX6789-LABEL: sample_c_d_o_2darray_V1:
1572 ; GFX6789: ; %bb.0: ; %main_body
1573 ; GFX6789-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
1574 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1575 ; GFX6789-NEXT: ; return to shader part epilog
1577 ; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
1578 ; GFX10PLUS: ; %bb.0: ; %main_body
1579 ; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
1580 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1581 ; GFX10PLUS-NEXT: ; return to shader part epilog
1583 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1587 define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, ptr addrspace(1) inreg %out) {
1588 ; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
1589 ; VERDE: ; %bb.0: ; %main_body
1590 ; VERDE-NEXT: v_mov_b32_e32 v9, 0
1591 ; VERDE-NEXT: v_mov_b32_e32 v10, v9
1592 ; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
1593 ; VERDE-NEXT: s_mov_b32 s15, 0xf000
1594 ; VERDE-NEXT: s_mov_b32 s14, -1
1595 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1596 ; VERDE-NEXT: v_mov_b32_e32 v0, v9
1597 ; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
1598 ; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1599 ; VERDE-NEXT: ; return to shader part epilog
1601 ; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
1602 ; GFX6789: ; %bb.0: ; %main_body
1603 ; GFX6789-NEXT: v_mov_b32_e32 v11, 0
1604 ; GFX6789-NEXT: v_mov_b32_e32 v12, v11
1605 ; GFX6789-NEXT: v_mov_b32_e32 v9, v11
1606 ; GFX6789-NEXT: v_mov_b32_e32 v10, v12
1607 ; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
1608 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1609 ; GFX6789-NEXT: v_mov_b32_e32 v0, v9
1610 ; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
1611 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1612 ; GFX6789-NEXT: ; return to shader part epilog
1614 ; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
1615 ; GFX10: ; %bb.0: ; %main_body
1616 ; GFX10-NEXT: v_mov_b32_e32 v11, 0
1617 ; GFX10-NEXT: v_mov_b32_e32 v12, v11
1618 ; GFX10-NEXT: v_mov_b32_e32 v9, v11
1619 ; GFX10-NEXT: v_mov_b32_e32 v10, v12
1620 ; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1621 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1622 ; GFX10-NEXT: v_mov_b32_e32 v0, v9
1623 ; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
1624 ; GFX10-NEXT: ; return to shader part epilog
1626 ; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
1627 ; GFX11: ; %bb.0: ; %main_body
1628 ; GFX11-NEXT: v_mov_b32_e32 v11, 0
1629 ; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0
1630 ; GFX11-NEXT: v_mov_b32_e32 v12, v11
1631 ; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12
1632 ; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1633 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX11-NEXT: global_store_b32 v11, v1, s[12:13]
1635 ; GFX11-NEXT: ; return to shader part epilog
1637 %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
1638 %v.vec = extractvalue {float, i32} %v, 0
1639 %v.err = extractvalue {float, i32} %v, 1
1640 store i32 %v.err, ptr addrspace(1) %out, align 4
1644 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1645 ; VERDE-LABEL: sample_c_d_o_2darray_V2:
1646 ; VERDE: ; %bb.0: ; %main_body
1647 ; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
1648 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1649 ; VERDE-NEXT: ; return to shader part epilog
1651 ; GFX6789-LABEL: sample_c_d_o_2darray_V2:
1652 ; GFX6789: ; %bb.0: ; %main_body
1653 ; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
1654 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1655 ; GFX6789-NEXT: ; return to shader part epilog
1657 ; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
1658 ; GFX10PLUS: ; %bb.0: ; %main_body
1659 ; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
1660 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1661 ; GFX10PLUS-NEXT: ; return to shader part epilog
1663 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1667 define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1668 ; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
1669 ; VERDE: ; %bb.0: ; %main_body
1670 ; VERDE-NEXT: v_mov_b32_e32 v9, 0
1671 ; VERDE-NEXT: v_mov_b32_e32 v10, v9
1672 ; VERDE-NEXT: v_mov_b32_e32 v11, v9
1673 ; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
1674 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1675 ; VERDE-NEXT: v_mov_b32_e32 v0, v9
1676 ; VERDE-NEXT: v_mov_b32_e32 v1, v10
1677 ; VERDE-NEXT: v_mov_b32_e32 v2, v11
1678 ; VERDE-NEXT: ; return to shader part epilog
1680 ; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
1681 ; GFX6789: ; %bb.0: ; %main_body
1682 ; GFX6789-NEXT: v_mov_b32_e32 v9, 0
1683 ; GFX6789-NEXT: v_mov_b32_e32 v10, v9
1684 ; GFX6789-NEXT: v_mov_b32_e32 v11, v9
1685 ; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
1686 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1687 ; GFX6789-NEXT: v_mov_b32_e32 v0, v9
1688 ; GFX6789-NEXT: v_mov_b32_e32 v1, v10
1689 ; GFX6789-NEXT: v_mov_b32_e32 v2, v11
1690 ; GFX6789-NEXT: ; return to shader part epilog
1692 ; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
1693 ; GFX10: ; %bb.0: ; %main_body
1694 ; GFX10-NEXT: v_mov_b32_e32 v9, 0
1695 ; GFX10-NEXT: v_mov_b32_e32 v10, v9
1696 ; GFX10-NEXT: v_mov_b32_e32 v11, v9
1697 ; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1698 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1699 ; GFX10-NEXT: v_mov_b32_e32 v0, v9
1700 ; GFX10-NEXT: v_mov_b32_e32 v1, v10
1701 ; GFX10-NEXT: v_mov_b32_e32 v2, v11
1702 ; GFX10-NEXT: ; return to shader part epilog
1704 ; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:
1705 ; GFX11: ; %bb.0: ; %main_body
1706 ; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0
1707 ; GFX11-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1
1708 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
1709 ; GFX11-NEXT: v_mov_b32_e32 v2, v0
1710 ; GFX11-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v[4:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1711 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1712 ; GFX11-NEXT: ; return to shader part epilog
1714 %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
1715 %v.vec = extractvalue {<2 x float>, i32} %v, 0
1716 %v.f1 = extractelement <2 x float> %v.vec, i32 0
1717 %v.f2 = extractelement <2 x float> %v.vec, i32 1
1718 %v.err = extractvalue {<2 x float>, i32} %v, 1
1719 %v.errf = bitcast i32 %v.err to float
1720 %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
1721 %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
1722 %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
1723 ret <4 x float> %res.2
1726 define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1727 ; VERDE-LABEL: sample_1d_unorm:
1728 ; VERDE: ; %bb.0: ; %main_body
1729 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1730 ; VERDE-NEXT: s_wqm_b64 exec, exec
1731 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1732 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
1733 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1734 ; VERDE-NEXT: ; return to shader part epilog
1736 ; GFX6789-LABEL: sample_1d_unorm:
1737 ; GFX6789: ; %bb.0: ; %main_body
1738 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1739 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1740 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1741 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
1742 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1743 ; GFX6789-NEXT: ; return to shader part epilog
1745 ; GFX10PLUS-LABEL: sample_1d_unorm:
1746 ; GFX10PLUS: ; %bb.0: ; %main_body
1747 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1748 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1749 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1750 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
1751 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1752 ; GFX10PLUS-NEXT: ; return to shader part epilog
1754 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
1758 define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1759 ; VERDE-LABEL: sample_1d_glc:
1760 ; VERDE: ; %bb.0: ; %main_body
1761 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1762 ; VERDE-NEXT: s_wqm_b64 exec, exec
1763 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1764 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
1765 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1766 ; VERDE-NEXT: ; return to shader part epilog
1768 ; GFX6789-LABEL: sample_1d_glc:
1769 ; GFX6789: ; %bb.0: ; %main_body
1770 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1771 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1772 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1773 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
1774 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1775 ; GFX6789-NEXT: ; return to shader part epilog
1777 ; GFX10PLUS-LABEL: sample_1d_glc:
1778 ; GFX10PLUS: ; %bb.0: ; %main_body
1779 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1780 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1781 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1782 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc
1783 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1784 ; GFX10PLUS-NEXT: ; return to shader part epilog
1786 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
1790 define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1791 ; VERDE-LABEL: sample_1d_slc:
1792 ; VERDE: ; %bb.0: ; %main_body
1793 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1794 ; VERDE-NEXT: s_wqm_b64 exec, exec
1795 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1796 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
1797 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1798 ; VERDE-NEXT: ; return to shader part epilog
1800 ; GFX6789-LABEL: sample_1d_slc:
1801 ; GFX6789: ; %bb.0: ; %main_body
1802 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1803 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1804 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1805 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
1806 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1807 ; GFX6789-NEXT: ; return to shader part epilog
1809 ; GFX10PLUS-LABEL: sample_1d_slc:
1810 ; GFX10PLUS: ; %bb.0: ; %main_body
1811 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1812 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1813 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1814 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc
1815 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1816 ; GFX10PLUS-NEXT: ; return to shader part epilog
1818 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
1822 define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1823 ; VERDE-LABEL: sample_1d_glc_slc:
1824 ; VERDE: ; %bb.0: ; %main_body
1825 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1826 ; VERDE-NEXT: s_wqm_b64 exec, exec
1827 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1828 ; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
1829 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1830 ; VERDE-NEXT: ; return to shader part epilog
1832 ; GFX6789-LABEL: sample_1d_glc_slc:
1833 ; GFX6789: ; %bb.0: ; %main_body
1834 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1835 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1836 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1837 ; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
1838 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1839 ; GFX6789-NEXT: ; return to shader part epilog
1841 ; GFX10PLUS-LABEL: sample_1d_glc_slc:
1842 ; GFX10PLUS: ; %bb.0: ; %main_body
1843 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1844 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1845 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1846 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc
1847 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1848 ; GFX10PLUS-NEXT: ; return to shader part epilog
1850 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
1854 define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1855 ; VERDE-LABEL: adjust_writemask_sample_0:
1856 ; VERDE: ; %bb.0: ; %main_body
1857 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1858 ; VERDE-NEXT: s_wqm_b64 exec, exec
1859 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1860 ; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
1861 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1862 ; VERDE-NEXT: ; return to shader part epilog
1864 ; GFX6789-LABEL: adjust_writemask_sample_0:
1865 ; GFX6789: ; %bb.0: ; %main_body
1866 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1867 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1868 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1869 ; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
1870 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1871 ; GFX6789-NEXT: ; return to shader part epilog
1873 ; GFX10PLUS-LABEL: adjust_writemask_sample_0:
1874 ; GFX10PLUS: ; %bb.0: ; %main_body
1875 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1876 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1877 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1878 ; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
1879 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1880 ; GFX10PLUS-NEXT: ; return to shader part epilog
1882 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1883 %elt0 = extractelement <4 x float> %r, i32 0
1887 define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1888 ; VERDE-LABEL: adjust_writemask_sample_01:
1889 ; VERDE: ; %bb.0: ; %main_body
1890 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1891 ; VERDE-NEXT: s_wqm_b64 exec, exec
1892 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1893 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
1894 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1895 ; VERDE-NEXT: ; return to shader part epilog
1897 ; GFX6789-LABEL: adjust_writemask_sample_01:
1898 ; GFX6789: ; %bb.0: ; %main_body
1899 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1900 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1901 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1902 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
1903 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1904 ; GFX6789-NEXT: ; return to shader part epilog
1906 ; GFX10PLUS-LABEL: adjust_writemask_sample_01:
1907 ; GFX10PLUS: ; %bb.0: ; %main_body
1908 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1909 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1910 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1911 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
1912 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1913 ; GFX10PLUS-NEXT: ; return to shader part epilog
1915 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1916 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1917 ret <2 x float> %out
1920 define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1921 ; VERDE-LABEL: adjust_writemask_sample_012:
1922 ; VERDE: ; %bb.0: ; %main_body
1923 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1924 ; VERDE-NEXT: s_wqm_b64 exec, exec
1925 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1926 ; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
1927 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1928 ; VERDE-NEXT: ; return to shader part epilog
1930 ; GFX6789-LABEL: adjust_writemask_sample_012:
1931 ; GFX6789: ; %bb.0: ; %main_body
1932 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1933 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1934 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1935 ; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
1936 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1937 ; GFX6789-NEXT: ; return to shader part epilog
1939 ; GFX10PLUS-LABEL: adjust_writemask_sample_012:
1940 ; GFX10PLUS: ; %bb.0: ; %main_body
1941 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1942 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1943 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1944 ; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
1945 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1946 ; GFX10PLUS-NEXT: ; return to shader part epilog
1948 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1949 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1950 ret <3 x float> %out
1953 define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1954 ; VERDE-LABEL: adjust_writemask_sample_12:
1955 ; VERDE: ; %bb.0: ; %main_body
1956 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1957 ; VERDE-NEXT: s_wqm_b64 exec, exec
1958 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1959 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
1960 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1961 ; VERDE-NEXT: ; return to shader part epilog
1963 ; GFX6789-LABEL: adjust_writemask_sample_12:
1964 ; GFX6789: ; %bb.0: ; %main_body
1965 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1966 ; GFX6789-NEXT: s_wqm_b64 exec, exec
1967 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
1968 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
1969 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
1970 ; GFX6789-NEXT: ; return to shader part epilog
1972 ; GFX10PLUS-LABEL: adjust_writemask_sample_12:
1973 ; GFX10PLUS: ; %bb.0: ; %main_body
1974 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
1975 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
1976 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
1977 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
1978 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
1979 ; GFX10PLUS-NEXT: ; return to shader part epilog
1981 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1982 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
1983 ret <2 x float> %out
1986 define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1987 ; VERDE-LABEL: adjust_writemask_sample_03:
1988 ; VERDE: ; %bb.0: ; %main_body
1989 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
1990 ; VERDE-NEXT: s_wqm_b64 exec, exec
1991 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
1992 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
1993 ; VERDE-NEXT: s_waitcnt vmcnt(0)
1994 ; VERDE-NEXT: ; return to shader part epilog
1996 ; GFX6789-LABEL: adjust_writemask_sample_03:
1997 ; GFX6789: ; %bb.0: ; %main_body
1998 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
1999 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2000 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2001 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
2002 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2003 ; GFX6789-NEXT: ; return to shader part epilog
2005 ; GFX10PLUS-LABEL: adjust_writemask_sample_03:
2006 ; GFX10PLUS: ; %bb.0: ; %main_body
2007 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2008 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2009 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2010 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
2011 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2012 ; GFX10PLUS-NEXT: ; return to shader part epilog
2014 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2015 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
2016 ret <2 x float> %out
2019 define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2020 ; VERDE-LABEL: adjust_writemask_sample_13:
2021 ; VERDE: ; %bb.0: ; %main_body
2022 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2023 ; VERDE-NEXT: s_wqm_b64 exec, exec
2024 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2025 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2026 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2027 ; VERDE-NEXT: ; return to shader part epilog
2029 ; GFX6789-LABEL: adjust_writemask_sample_13:
2030 ; GFX6789: ; %bb.0: ; %main_body
2031 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2032 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2033 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2034 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2035 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2036 ; GFX6789-NEXT: ; return to shader part epilog
2038 ; GFX10PLUS-LABEL: adjust_writemask_sample_13:
2039 ; GFX10PLUS: ; %bb.0: ; %main_body
2040 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2041 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2042 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2043 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2044 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2045 ; GFX10PLUS-NEXT: ; return to shader part epilog
2047 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2048 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
2049 ret <2 x float> %out
2052 define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2053 ; VERDE-LABEL: adjust_writemask_sample_123:
2054 ; VERDE: ; %bb.0: ; %main_body
2055 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2056 ; VERDE-NEXT: s_wqm_b64 exec, exec
2057 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2058 ; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2059 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2060 ; VERDE-NEXT: ; return to shader part epilog
2062 ; GFX6789-LABEL: adjust_writemask_sample_123:
2063 ; GFX6789: ; %bb.0: ; %main_body
2064 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2065 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2066 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2067 ; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2068 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2069 ; GFX6789-NEXT: ; return to shader part epilog
2071 ; GFX10PLUS-LABEL: adjust_writemask_sample_123:
2072 ; GFX10PLUS: ; %bb.0: ; %main_body
2073 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2074 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2075 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2076 ; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
2077 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2078 ; GFX10PLUS-NEXT: ; return to shader part epilog
2080 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2081 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2082 ret <3 x float> %out
2085 define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2086 ; VERDE-LABEL: adjust_writemask_sample_none_enabled:
2087 ; VERDE: ; %bb.0: ; %main_body
2088 ; VERDE-NEXT: ; return to shader part epilog
2090 ; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
2091 ; GFX6789: ; %bb.0: ; %main_body
2092 ; GFX6789-NEXT: ; return to shader part epilog
2094 ; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled:
2095 ; GFX10PLUS: ; %bb.0: ; %main_body
2096 ; GFX10PLUS-NEXT: ; return to shader part epilog
2098 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2102 define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2103 ; VERDE-LABEL: adjust_writemask_sample_123_to_12:
2104 ; VERDE: ; %bb.0: ; %main_body
2105 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2106 ; VERDE-NEXT: s_wqm_b64 exec, exec
2107 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2108 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2109 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2110 ; VERDE-NEXT: ; return to shader part epilog
2112 ; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
2113 ; GFX6789: ; %bb.0: ; %main_body
2114 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2115 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2116 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2117 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2118 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2119 ; GFX6789-NEXT: ; return to shader part epilog
2121 ; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12:
2122 ; GFX10PLUS: ; %bb.0: ; %main_body
2123 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2124 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2125 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2126 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2127 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2128 ; GFX10PLUS-NEXT: ; return to shader part epilog
2130 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2131 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2132 ret <2 x float> %out
2135 define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2136 ; VERDE-LABEL: adjust_writemask_sample_013_to_13:
2137 ; VERDE: ; %bb.0: ; %main_body
2138 ; VERDE-NEXT: s_mov_b64 s[12:13], exec
2139 ; VERDE-NEXT: s_wqm_b64 exec, exec
2140 ; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
2141 ; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2142 ; VERDE-NEXT: s_waitcnt vmcnt(0)
2143 ; VERDE-NEXT: ; return to shader part epilog
2145 ; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
2146 ; GFX6789: ; %bb.0: ; %main_body
2147 ; GFX6789-NEXT: s_mov_b64 s[12:13], exec
2148 ; GFX6789-NEXT: s_wqm_b64 exec, exec
2149 ; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
2150 ; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2151 ; GFX6789-NEXT: s_waitcnt vmcnt(0)
2152 ; GFX6789-NEXT: ; return to shader part epilog
2154 ; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13:
2155 ; GFX10PLUS: ; %bb.0: ; %main_body
2156 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
2157 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
2158 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
2159 ; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2160 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
2161 ; GFX10PLUS-NEXT: ; return to shader part epilog
2163 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2164 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2165 ret <2 x float> %out
2168 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2169 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2170 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2171 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2172 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2173 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2174 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2176 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2177 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2178 declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2179 declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2180 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2181 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2183 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2184 declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2185 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2186 declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2187 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2188 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2189 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2190 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2192 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2193 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2194 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2195 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2196 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2197 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2198 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2199 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2201 declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2202 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2203 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2204 declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2206 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2207 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2208 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2209 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2211 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2212 declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2213 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2214 declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2216 attributes #0 = { nounwind }
2217 attributes #1 = { nounwind readonly }
2218 attributes #2 = { nounwind readnone }