1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG %s
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL %s
6 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
7 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
9 define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
10 ; GFX10PLUS-LABEL: sample_1d_nortn:
11 ; GFX10PLUS: ; %bb.0: ; %main_body
12 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
13 ; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
14 ; GFX10PLUS-NEXT: s_endpgm
16 ; GFX12-LABEL: sample_1d_nortn:
17 ; GFX12: ; %bb.0: ; %main_body
18 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
19 ; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
20 ; GFX12-NEXT: s_endpgm
22 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
26 define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
27 ; GFX10PLUS-LABEL: sample_2d_nortn:
28 ; GFX10PLUS: ; %bb.0: ; %main_body
29 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
30 ; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
31 ; GFX10PLUS-NEXT: s_endpgm
33 ; GFX12-LABEL: sample_2d_nortn:
34 ; GFX12: ; %bb.0: ; %main_body
35 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
36 ; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
37 ; GFX12-NEXT: s_endpgm
39 call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
43 define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
44 ; GFX10PLUS-LABEL: sample_3d_nortn:
45 ; GFX10PLUS: ; %bb.0: ; %main_body
46 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
47 ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
48 ; GFX10PLUS-NEXT: s_endpgm
50 ; GFX12-LABEL: sample_3d_nortn:
51 ; GFX12: ; %bb.0: ; %main_body
52 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
53 ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
54 ; GFX12-NEXT: s_endpgm
56 call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
60 define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
61 ; GFX10PLUS-LABEL: sample_cube_nortn:
62 ; GFX10PLUS: ; %bb.0: ; %main_body
63 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
64 ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
65 ; GFX10PLUS-NEXT: s_endpgm
67 ; GFX12-LABEL: sample_cube_nortn:
68 ; GFX12: ; %bb.0: ; %main_body
69 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
70 ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
71 ; GFX12-NEXT: s_endpgm
73 call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
77 define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
78 ; GFX10PLUS-LABEL: sample_1darray_nortn:
79 ; GFX10PLUS: ; %bb.0: ; %main_body
80 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
81 ; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
82 ; GFX10PLUS-NEXT: s_endpgm
84 ; GFX12-LABEL: sample_1darray_nortn:
85 ; GFX12: ; %bb.0: ; %main_body
86 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
87 ; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
88 ; GFX12-NEXT: s_endpgm
90 call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
94 define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
95 ; GFX10PLUS-LABEL: sample_2darray_nortn:
96 ; GFX10PLUS: ; %bb.0: ; %main_body
97 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
98 ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
99 ; GFX10PLUS-NEXT: s_endpgm
101 ; GFX12-LABEL: sample_2darray_nortn:
102 ; GFX12: ; %bb.0: ; %main_body
103 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
104 ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
105 ; GFX12-NEXT: s_endpgm
107 call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
111 define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
112 ; GFX10PLUS-LABEL: sample_b_1d_nortn:
113 ; GFX10PLUS: ; %bb.0: ; %main_body
114 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
115 ; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
116 ; GFX10PLUS-NEXT: s_endpgm
118 ; GFX12-LABEL: sample_b_1d_nortn:
119 ; GFX12: ; %bb.0: ; %main_body
120 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
121 ; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
122 ; GFX12-NEXT: s_endpgm
124 call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
128 define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
129 ; GFX10PLUS-LABEL: sample_b_2d_nortn:
130 ; GFX10PLUS: ; %bb.0: ; %main_body
131 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
132 ; GFX10PLUS-NEXT: image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
133 ; GFX10PLUS-NEXT: s_endpgm
135 ; GFX12-LABEL: sample_b_2d_nortn:
136 ; GFX12: ; %bb.0: ; %main_body
137 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
138 ; GFX12-NEXT: image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
139 ; GFX12-NEXT: s_endpgm
141 call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
145 define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
146 ; GFX10PLUS-LABEL: sample_c_1d_nortn:
147 ; GFX10PLUS: ; %bb.0: ; %main_body
148 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
149 ; GFX10PLUS-NEXT: image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
150 ; GFX10PLUS-NEXT: s_endpgm
152 ; GFX12-LABEL: sample_c_1d_nortn:
153 ; GFX12: ; %bb.0: ; %main_body
154 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
155 ; GFX12-NEXT: image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
156 ; GFX12-NEXT: s_endpgm
158 call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
162 define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
163 ; GFX10PLUS-LABEL: sample_c_2d_nortn:
164 ; GFX10PLUS: ; %bb.0: ; %main_body
165 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
166 ; GFX10PLUS-NEXT: image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
167 ; GFX10PLUS-NEXT: s_endpgm
169 ; GFX12-LABEL: sample_c_2d_nortn:
170 ; GFX12: ; %bb.0: ; %main_body
171 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
172 ; GFX12-NEXT: image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
173 ; GFX12-NEXT: s_endpgm
175 call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
179 define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
180 ; GFX10PLUS-LABEL: sample_d_1d_nortn:
181 ; GFX10PLUS: ; %bb.0: ; %main_body
182 ; GFX10PLUS-NEXT: image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
183 ; GFX10PLUS-NEXT: s_endpgm
185 ; GFX12-LABEL: sample_d_1d_nortn:
186 ; GFX12: ; %bb.0: ; %main_body
187 ; GFX12-NEXT: image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
188 ; GFX12-NEXT: s_endpgm
190 call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
194 define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
195 ; GFX10PLUS-LABEL: sample_d_2d_nortn:
196 ; GFX10PLUS: ; %bb.0: ; %main_body
197 ; GFX10PLUS-NEXT: image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
198 ; GFX10PLUS-NEXT: s_endpgm
200 ; GFX12-LABEL: sample_d_2d_nortn:
201 ; GFX12: ; %bb.0: ; %main_body
202 ; GFX12-NEXT: image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
203 ; GFX12-NEXT: s_endpgm
205 call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
209 define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
210 ; GFX10PLUS-LABEL: sample_l_1d_nortn:
211 ; GFX10PLUS: ; %bb.0: ; %main_body
212 ; GFX10PLUS-NEXT: image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
213 ; GFX10PLUS-NEXT: s_endpgm
215 ; GFX12-LABEL: sample_l_1d_nortn:
216 ; GFX12: ; %bb.0: ; %main_body
217 ; GFX12-NEXT: image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
218 ; GFX12-NEXT: s_endpgm
220 call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
224 define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
225 ; GFX10PLUS-LABEL: sample_l_2d_nortn:
226 ; GFX10PLUS: ; %bb.0: ; %main_body
227 ; GFX10PLUS-NEXT: image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
228 ; GFX10PLUS-NEXT: s_endpgm
230 ; GFX12-LABEL: sample_l_2d_nortn:
231 ; GFX12: ; %bb.0: ; %main_body
232 ; GFX12-NEXT: image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
233 ; GFX12-NEXT: s_endpgm
235 call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
239 define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
240 ; GFX10PLUS-LABEL: sample_nortn_mix_1:
241 ; GFX10PLUS: ; %bb.0: ; %main_body
242 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
243 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
244 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
245 ; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
246 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
247 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
248 ; GFX10PLUS-NEXT: ; return to shader part epilog
250 ; GFX12-LABEL: sample_nortn_mix_1:
251 ; GFX12: ; %bb.0: ; %main_body
252 ; GFX12-NEXT: s_mov_b32 s12, exec_lo
253 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
254 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
255 ; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
256 ; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
257 ; GFX12-NEXT: s_wait_samplecnt 0x0
258 ; GFX12-NEXT: ; return to shader part epilog
260 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
261 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
265 define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
266 ; GFX10PLUS-LABEL: sample_nortn_mix_2:
267 ; GFX10PLUS: ; %bb.0: ; %main_body
268 ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
269 ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
270 ; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
271 ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
272 ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
273 ; GFX10PLUS-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
274 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
275 ; GFX10PLUS-NEXT: ; return to shader part epilog
277 ; GFX12-LABEL: sample_nortn_mix_2:
278 ; GFX12: ; %bb.0: ; %main_body
279 ; GFX12-NEXT: s_mov_b32 s12, exec_lo
280 ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
281 ; GFX12-NEXT: v_mov_b32_e32 v4, v0
282 ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
283 ; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
284 ; GFX12-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
285 ; GFX12-NEXT: s_wait_samplecnt 0x0
286 ; GFX12-NEXT: ; return to shader part epilog
288 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
289 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
293 define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
294 ; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3:
295 ; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
296 ; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
297 ; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
298 ; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
299 ; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
300 ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
301 ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
302 ; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
303 ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
304 ; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
306 ; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3:
307 ; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
308 ; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
309 ; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
310 ; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
311 ; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
312 ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
313 ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
314 ; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
315 ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
316 ; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
318 ; GFX12-SDAG-LABEL: sample_nortn_mix_3:
319 ; GFX12-SDAG: ; %bb.0: ; %main_body
320 ; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo
321 ; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
322 ; GFX12-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
323 ; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
324 ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
325 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1
326 ; GFX12-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
327 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
328 ; GFX12-SDAG-NEXT: ; return to shader part epilog
330 ; GFX12-GISEL-LABEL: sample_nortn_mix_3:
331 ; GFX12-GISEL: ; %bb.0: ; %main_body
332 ; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo
333 ; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
334 ; GFX12-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
335 ; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
336 ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
337 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1
338 ; GFX12-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
339 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
340 ; GFX12-GISEL-NEXT: ; return to shader part epilog
342 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
343 %v.0 = extractelement <4 x float> %v, i32 0
344 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
345 %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
349 define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
350 ; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4:
351 ; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
352 ; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
353 ; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
354 ; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
355 ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
356 ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
357 ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
358 ; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
359 ; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
360 ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
361 ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
362 ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2)
363 ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
364 ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
365 ; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
367 ; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4:
368 ; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
369 ; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
370 ; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
371 ; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
372 ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
373 ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
374 ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
375 ; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
376 ; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
377 ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
378 ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
379 ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2)
380 ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
381 ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
382 ; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
384 ; GFX12-SDAG-LABEL: sample_nortn_mix_4:
385 ; GFX12-SDAG: ; %bb.0: ; %main_body
386 ; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo
387 ; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
388 ; GFX12-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
389 ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
390 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1
391 ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
392 ; GFX12-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
393 ; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
394 ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
395 ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
396 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x2
397 ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
398 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
399 ; GFX12-SDAG-NEXT: ; return to shader part epilog
401 ; GFX12-GISEL-LABEL: sample_nortn_mix_4:
402 ; GFX12-GISEL: ; %bb.0: ; %main_body
403 ; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo
404 ; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
405 ; GFX12-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
406 ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
407 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1
408 ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
409 ; GFX12-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
410 ; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
411 ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
412 ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
413 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x2
414 ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
415 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
416 ; GFX12-GISEL-NEXT: ; return to shader part epilog
418 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
419 %v.0 = extractelement <4 x float> %v, i32 0
420 %v.1 = extractelement <4 x float> %v, i32 0
421 %v.2 = extractelement <4 x float> %v, i32 0
422 %v.3 = extractelement <4 x float> %v, i32 0
423 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
424 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
425 %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
426 %u.0 = extractelement <4 x float> %u, i32 0
427 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
428 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
429 call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
433 define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
434 ; GFX10PLUS-LABEL: sample_d_1d_g16_nortn:
435 ; GFX10PLUS: ; %bb.0: ; %main_body
436 ; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
437 ; GFX10PLUS-NEXT: s_endpgm
439 ; GFX12-LABEL: sample_d_1d_g16_nortn:
440 ; GFX12: ; %bb.0: ; %main_body
441 ; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
442 ; GFX12-NEXT: s_endpgm
444 call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
448 declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
449 declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
450 declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
451 declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
452 declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
453 declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
455 declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
456 declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
458 declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
459 declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
461 declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
462 declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
464 declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
465 declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
467 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
469 declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
471 attributes #0 = { nounwind }
472 attributes #1 = { nounwind readonly }
473 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
475 ; GFX10-GISEL: {{.*}}
478 ; GFX11-GISEL: {{.*}}