1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL %s
4 ; TODO: global-isel produces more code - there will need to be some more combines in the postregbankselectcombine phase
5 ; Depends on some other changes to pass this test - those are in review separately
7 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
8 ; GFX10-LABEL: sample_d_1d:
9 ; GFX10: ; %bb.0: ; %main_body
10 ; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
11 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12 ; GFX10-NEXT: ; return to shader part epilog
14 ; GFX10GISEL-LABEL: sample_d_1d:
15 ; GFX10GISEL: ; %bb.0: ; %main_body
16 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
17 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
18 ; GFX10GISEL-NEXT: ; return to shader part epilog
20 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
24 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
25 ; GFX10-LABEL: sample_d_2d:
26 ; GFX10: ; %bb.0: ; %main_body
27 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
28 ; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
29 ; GFX10-NEXT: s_waitcnt vmcnt(0)
30 ; GFX10-NEXT: ; return to shader part epilog
32 ; GFX10GISEL-LABEL: sample_d_2d:
33 ; GFX10GISEL: ; %bb.0: ; %main_body
34 ; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
35 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
36 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
37 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
38 ; GFX10GISEL-NEXT: ; return to shader part epilog
40 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
44 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
45 ; GFX10-LABEL: sample_d_3d:
46 ; GFX10: ; %bb.0: ; %main_body
47 ; GFX10-NEXT: v_mov_b32_e32 v15, v8
48 ; GFX10-NEXT: v_mov_b32_e32 v13, v5
49 ; GFX10-NEXT: v_mov_b32_e32 v12, v4
50 ; GFX10-NEXT: v_mov_b32_e32 v11, v3
51 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
52 ; GFX10-NEXT: v_mov_b32_e32 v9, v1
53 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
54 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
55 ; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
56 ; GFX10-NEXT: s_waitcnt vmcnt(0)
57 ; GFX10-NEXT: ; return to shader part epilog
59 ; GFX10GISEL-LABEL: sample_d_3d:
60 ; GFX10GISEL: ; %bb.0: ; %main_body
61 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
62 ; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
63 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
64 ; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6
65 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
66 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
67 ; GFX10GISEL-NEXT: ; return to shader part epilog
69 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
73 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
74 ; GFX10-LABEL: sample_c_d_1d:
75 ; GFX10: ; %bb.0: ; %main_body
76 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
77 ; GFX10-NEXT: s_waitcnt vmcnt(0)
78 ; GFX10-NEXT: ; return to shader part epilog
80 ; GFX10GISEL-LABEL: sample_c_d_1d:
81 ; GFX10GISEL: ; %bb.0: ; %main_body
82 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
83 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
84 ; GFX10GISEL-NEXT: ; return to shader part epilog
86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
90 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
91 ; GFX10-LABEL: sample_c_d_2d:
92 ; GFX10: ; %bb.0: ; %main_body
93 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
94 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
95 ; GFX10-NEXT: s_waitcnt vmcnt(0)
96 ; GFX10-NEXT: ; return to shader part epilog
98 ; GFX10GISEL-LABEL: sample_c_d_2d:
99 ; GFX10GISEL: ; %bb.0: ; %main_body
100 ; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
101 ; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
102 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
103 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
104 ; GFX10GISEL-NEXT: ; return to shader part epilog
106 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
110 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
111 ; GFX10-LABEL: sample_d_cl_1d:
112 ; GFX10: ; %bb.0: ; %main_body
113 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
114 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
115 ; GFX10-NEXT: s_waitcnt vmcnt(0)
116 ; GFX10-NEXT: ; return to shader part epilog
118 ; GFX10GISEL-LABEL: sample_d_cl_1d:
119 ; GFX10GISEL: ; %bb.0: ; %main_body
120 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
121 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
122 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
123 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
124 ; GFX10GISEL-NEXT: ; return to shader part epilog
126 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
130 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
131 ; GFX10-LABEL: sample_d_cl_2d:
132 ; GFX10: ; %bb.0: ; %main_body
133 ; GFX10-NEXT: v_mov_b32_e32 v11, v6
134 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
135 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
136 ; GFX10-NEXT: v_mov_b32_e32 v7, v1
137 ; GFX10-NEXT: v_mov_b32_e32 v6, v0
138 ; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100
139 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
140 ; GFX10-NEXT: s_waitcnt vmcnt(0)
141 ; GFX10-NEXT: ; return to shader part epilog
143 ; GFX10GISEL-LABEL: sample_d_cl_2d:
144 ; GFX10GISEL: ; %bb.0: ; %main_body
145 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5
146 ; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
147 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6
148 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v4
149 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
150 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
151 ; GFX10GISEL-NEXT: ; return to shader part epilog
153 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
157 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
158 ; GFX10-LABEL: sample_c_d_cl_1d:
159 ; GFX10: ; %bb.0: ; %main_body
160 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
161 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
162 ; GFX10-NEXT: s_waitcnt vmcnt(0)
163 ; GFX10-NEXT: ; return to shader part epilog
165 ; GFX10GISEL-LABEL: sample_c_d_cl_1d:
166 ; GFX10GISEL: ; %bb.0: ; %main_body
167 ; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
168 ; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3
169 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
170 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
171 ; GFX10GISEL-NEXT: ; return to shader part epilog
173 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
177 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
178 ; GFX10-LABEL: sample_c_d_cl_2d:
179 ; GFX10: ; %bb.0: ; %main_body
180 ; GFX10-NEXT: v_mov_b32_e32 v13, v7
181 ; GFX10-NEXT: v_mov_b32_e32 v11, v4
182 ; GFX10-NEXT: v_mov_b32_e32 v10, v3
183 ; GFX10-NEXT: v_mov_b32_e32 v9, v2
184 ; GFX10-NEXT: v_mov_b32_e32 v8, v1
185 ; GFX10-NEXT: v_mov_b32_e32 v7, v0
186 ; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100
187 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
188 ; GFX10-NEXT: s_waitcnt vmcnt(0)
189 ; GFX10-NEXT: ; return to shader part epilog
191 ; GFX10GISEL-LABEL: sample_c_d_cl_2d:
192 ; GFX10GISEL: ; %bb.0: ; %main_body
193 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6
194 ; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
195 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7
196 ; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v8, 16, v5
197 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
198 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
199 ; GFX10GISEL-NEXT: ; return to shader part epilog
201 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
205 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
206 ; GFX10-LABEL: sample_cd_1d:
207 ; GFX10: ; %bb.0: ; %main_body
208 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
209 ; GFX10-NEXT: s_waitcnt vmcnt(0)
210 ; GFX10-NEXT: ; return to shader part epilog
212 ; GFX10GISEL-LABEL: sample_cd_1d:
213 ; GFX10GISEL: ; %bb.0: ; %main_body
214 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
215 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
216 ; GFX10GISEL-NEXT: ; return to shader part epilog
218 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
222 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
223 ; GFX10-LABEL: sample_cd_2d:
224 ; GFX10: ; %bb.0: ; %main_body
225 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
226 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
227 ; GFX10-NEXT: s_waitcnt vmcnt(0)
228 ; GFX10-NEXT: ; return to shader part epilog
230 ; GFX10GISEL-LABEL: sample_cd_2d:
231 ; GFX10GISEL: ; %bb.0: ; %main_body
232 ; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
233 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
234 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
235 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
236 ; GFX10GISEL-NEXT: ; return to shader part epilog
238 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
242 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
243 ; GFX10-LABEL: sample_c_cd_1d:
244 ; GFX10: ; %bb.0: ; %main_body
245 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
246 ; GFX10-NEXT: s_waitcnt vmcnt(0)
247 ; GFX10-NEXT: ; return to shader part epilog
249 ; GFX10GISEL-LABEL: sample_c_cd_1d:
250 ; GFX10GISEL: ; %bb.0: ; %main_body
251 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
252 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
253 ; GFX10GISEL-NEXT: ; return to shader part epilog
255 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
259 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
260 ; GFX10-LABEL: sample_c_cd_2d:
261 ; GFX10: ; %bb.0: ; %main_body
262 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
263 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
264 ; GFX10-NEXT: s_waitcnt vmcnt(0)
265 ; GFX10-NEXT: ; return to shader part epilog
267 ; GFX10GISEL-LABEL: sample_c_cd_2d:
268 ; GFX10GISEL: ; %bb.0: ; %main_body
269 ; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
270 ; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
271 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
272 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
273 ; GFX10GISEL-NEXT: ; return to shader part epilog
275 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
279 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
280 ; GFX10-LABEL: sample_cd_cl_1d:
281 ; GFX10: ; %bb.0: ; %main_body
282 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
283 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
284 ; GFX10-NEXT: s_waitcnt vmcnt(0)
285 ; GFX10-NEXT: ; return to shader part epilog
287 ; GFX10GISEL-LABEL: sample_cd_cl_1d:
288 ; GFX10GISEL: ; %bb.0: ; %main_body
289 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
290 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
291 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
292 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
293 ; GFX10GISEL-NEXT: ; return to shader part epilog
295 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
299 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
300 ; GFX10-LABEL: sample_cd_cl_2d:
301 ; GFX10: ; %bb.0: ; %main_body
302 ; GFX10-NEXT: v_mov_b32_e32 v11, v6
303 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
304 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
305 ; GFX10-NEXT: v_mov_b32_e32 v7, v1
306 ; GFX10-NEXT: v_mov_b32_e32 v6, v0
307 ; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100
308 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
309 ; GFX10-NEXT: s_waitcnt vmcnt(0)
310 ; GFX10-NEXT: ; return to shader part epilog
312 ; GFX10GISEL-LABEL: sample_cd_cl_2d:
313 ; GFX10GISEL: ; %bb.0: ; %main_body
314 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5
315 ; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
316 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6
317 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v4
318 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
319 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
320 ; GFX10GISEL-NEXT: ; return to shader part epilog
322 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
326 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
327 ; GFX10-LABEL: sample_c_cd_cl_1d:
328 ; GFX10: ; %bb.0: ; %main_body
329 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
330 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
331 ; GFX10-NEXT: s_waitcnt vmcnt(0)
332 ; GFX10-NEXT: ; return to shader part epilog
334 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d:
335 ; GFX10GISEL: ; %bb.0: ; %main_body
336 ; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
337 ; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3
338 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
339 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
340 ; GFX10GISEL-NEXT: ; return to shader part epilog
342 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
346 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
347 ; GFX10-LABEL: sample_c_cd_cl_2d:
348 ; GFX10: ; %bb.0: ; %main_body
349 ; GFX10-NEXT: v_mov_b32_e32 v13, v7
350 ; GFX10-NEXT: v_mov_b32_e32 v11, v4
351 ; GFX10-NEXT: v_mov_b32_e32 v10, v3
352 ; GFX10-NEXT: v_mov_b32_e32 v9, v2
353 ; GFX10-NEXT: v_mov_b32_e32 v8, v1
354 ; GFX10-NEXT: v_mov_b32_e32 v7, v0
355 ; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100
356 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
357 ; GFX10-NEXT: s_waitcnt vmcnt(0)
358 ; GFX10-NEXT: ; return to shader part epilog
360 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
361 ; GFX10GISEL: ; %bb.0: ; %main_body
362 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6
363 ; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
364 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7
365 ; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v8, 16, v5
366 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
367 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
368 ; GFX10GISEL-NEXT: ; return to shader part epilog
370 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
374 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
375 ; GFX10-LABEL: sample_c_d_o_2darray_V1:
376 ; GFX10: ; %bb.0: ; %main_body
377 ; GFX10-NEXT: v_mov_b32_e32 v15, v8
378 ; GFX10-NEXT: v_mov_b32_e32 v13, v5
379 ; GFX10-NEXT: v_mov_b32_e32 v12, v4
380 ; GFX10-NEXT: v_mov_b32_e32 v11, v3
381 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
382 ; GFX10-NEXT: v_mov_b32_e32 v9, v1
383 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
384 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
385 ; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
386 ; GFX10-NEXT: s_waitcnt vmcnt(0)
387 ; GFX10-NEXT: ; return to shader part epilog
389 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
390 ; GFX10GISEL: ; %bb.0: ; %main_body
391 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
392 ; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
393 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
394 ; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6
395 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
396 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
397 ; GFX10GISEL-NEXT: ; return to shader part epilog
399 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
403 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
404 ; GFX10-LABEL: sample_c_d_o_2darray_V2:
405 ; GFX10: ; %bb.0: ; %main_body
406 ; GFX10-NEXT: v_mov_b32_e32 v15, v8
407 ; GFX10-NEXT: v_mov_b32_e32 v13, v5
408 ; GFX10-NEXT: v_mov_b32_e32 v12, v4
409 ; GFX10-NEXT: v_mov_b32_e32 v11, v3
410 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
411 ; GFX10-NEXT: v_mov_b32_e32 v9, v1
412 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
413 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
414 ; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
415 ; GFX10-NEXT: s_waitcnt vmcnt(0)
416 ; GFX10-NEXT: ; return to shader part epilog
418 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
419 ; GFX10GISEL: ; %bb.0: ; %main_body
420 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
421 ; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
422 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
423 ; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6
424 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
425 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
426 ; GFX10GISEL-NEXT: ; return to shader part epilog
428 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
432 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
433 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
434 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
435 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
436 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
437 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
438 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
439 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
440 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
442 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
443 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
444 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
445 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
446 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
447 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
448 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
449 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
451 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
452 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
454 define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
455 ; GFX10-LABEL: sample_g16_noa16_d_1d:
456 ; GFX10: ; %bb.0: ; %main_body
457 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
458 ; GFX10-NEXT: s_waitcnt vmcnt(0)
459 ; GFX10-NEXT: ; return to shader part epilog
461 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d:
462 ; GFX10GISEL: ; %bb.0: ; %main_body
463 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
464 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
465 ; GFX10GISEL-NEXT: ; return to shader part epilog
467 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
471 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
472 ; GFX10-LABEL: sample_g16_noa16_d_2d:
473 ; GFX10: ; %bb.0: ; %main_body
474 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
475 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
476 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
477 ; GFX10-NEXT: s_waitcnt vmcnt(0)
478 ; GFX10-NEXT: ; return to shader part epilog
480 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d:
481 ; GFX10GISEL: ; %bb.0: ; %main_body
482 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
483 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
484 ; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
485 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
486 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
487 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
488 ; GFX10GISEL-NEXT: ; return to shader part epilog
490 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
494 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
495 ; GFX10-LABEL: sample_g16_noa16_d_3d:
496 ; GFX10: ; %bb.0: ; %main_body
497 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
498 ; GFX10-NEXT: v_mov_b32_e32 v3, v2
499 ; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
500 ; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
501 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
502 ; GFX10-NEXT: s_waitcnt vmcnt(0)
503 ; GFX10-NEXT: ; return to shader part epilog
505 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
506 ; GFX10GISEL: ; %bb.0: ; %main_body
507 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
508 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
509 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
510 ; GFX10GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
511 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0
512 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v9
513 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
514 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
515 ; GFX10GISEL-NEXT: ; return to shader part epilog
517 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
521 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
522 ; GFX10-LABEL: sample_g16_noa16_c_d_1d:
523 ; GFX10: ; %bb.0: ; %main_body
524 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
525 ; GFX10-NEXT: s_waitcnt vmcnt(0)
526 ; GFX10-NEXT: ; return to shader part epilog
528 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d:
529 ; GFX10GISEL: ; %bb.0: ; %main_body
530 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
531 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
532 ; GFX10GISEL-NEXT: ; return to shader part epilog
534 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
538 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
539 ; GFX10-LABEL: sample_g16_noa16_c_d_2d:
540 ; GFX10: ; %bb.0: ; %main_body
541 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
542 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
543 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
544 ; GFX10-NEXT: s_waitcnt vmcnt(0)
545 ; GFX10-NEXT: ; return to shader part epilog
547 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d:
548 ; GFX10GISEL: ; %bb.0: ; %main_body
549 ; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
550 ; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
551 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
552 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3
553 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
554 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
555 ; GFX10GISEL-NEXT: ; return to shader part epilog
557 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
561 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
562 ; GFX10-LABEL: sample_g16_noa16_d_cl_1d:
563 ; GFX10: ; %bb.0: ; %main_body
564 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
565 ; GFX10-NEXT: s_waitcnt vmcnt(0)
566 ; GFX10-NEXT: ; return to shader part epilog
568 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d:
569 ; GFX10GISEL: ; %bb.0: ; %main_body
570 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
571 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
572 ; GFX10GISEL-NEXT: ; return to shader part epilog
574 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
578 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
579 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d:
580 ; GFX10: ; %bb.0: ; %main_body
581 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
582 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
583 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
584 ; GFX10-NEXT: s_waitcnt vmcnt(0)
585 ; GFX10-NEXT: ; return to shader part epilog
587 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d:
588 ; GFX10GISEL: ; %bb.0: ; %main_body
589 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
590 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
591 ; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
592 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
593 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
594 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
595 ; GFX10GISEL-NEXT: ; return to shader part epilog
597 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
601 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
602 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d:
603 ; GFX10: ; %bb.0: ; %main_body
604 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
605 ; GFX10-NEXT: s_waitcnt vmcnt(0)
606 ; GFX10-NEXT: ; return to shader part epilog
608 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d:
609 ; GFX10GISEL: ; %bb.0: ; %main_body
610 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
611 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
612 ; GFX10GISEL-NEXT: ; return to shader part epilog
614 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
618 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
619 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
620 ; GFX10: ; %bb.0: ; %main_body
621 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
622 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
623 ; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100
624 ; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
625 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
626 ; GFX10-NEXT: s_waitcnt vmcnt(0)
627 ; GFX10-NEXT: ; return to shader part epilog
629 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
630 ; GFX10GISEL: ; %bb.0: ; %main_body
631 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
632 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
633 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
634 ; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
635 ; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0
636 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1
637 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
638 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
639 ; GFX10GISEL-NEXT: ; return to shader part epilog
641 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
645 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
646 ; GFX10-LABEL: sample_g16_noa16_cd_1d:
647 ; GFX10: ; %bb.0: ; %main_body
648 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
649 ; GFX10-NEXT: s_waitcnt vmcnt(0)
650 ; GFX10-NEXT: ; return to shader part epilog
652 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d:
653 ; GFX10GISEL: ; %bb.0: ; %main_body
654 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
655 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
656 ; GFX10GISEL-NEXT: ; return to shader part epilog
658 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
662 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
663 ; GFX10-LABEL: sample_g16_noa16_cd_2d:
664 ; GFX10: ; %bb.0: ; %main_body
665 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
666 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
667 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
668 ; GFX10-NEXT: s_waitcnt vmcnt(0)
669 ; GFX10-NEXT: ; return to shader part epilog
671 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d:
672 ; GFX10GISEL: ; %bb.0: ; %main_body
673 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
674 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
675 ; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
676 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
677 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
678 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
679 ; GFX10GISEL-NEXT: ; return to shader part epilog
681 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
685 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
686 ; GFX10-LABEL: sample_g16_noa16_c_cd_1d:
687 ; GFX10: ; %bb.0: ; %main_body
688 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
689 ; GFX10-NEXT: s_waitcnt vmcnt(0)
690 ; GFX10-NEXT: ; return to shader part epilog
692 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d:
693 ; GFX10GISEL: ; %bb.0: ; %main_body
694 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
695 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
696 ; GFX10GISEL-NEXT: ; return to shader part epilog
698 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
702 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
703 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d:
704 ; GFX10: ; %bb.0: ; %main_body
705 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
706 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
707 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
708 ; GFX10-NEXT: s_waitcnt vmcnt(0)
709 ; GFX10-NEXT: ; return to shader part epilog
711 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d:
712 ; GFX10GISEL: ; %bb.0: ; %main_body
713 ; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
714 ; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
715 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
716 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3
717 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
718 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
719 ; GFX10GISEL-NEXT: ; return to shader part epilog
721 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
725 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
726 ; GFX10-LABEL: sample_g16_noa16_cd_cl_1d:
727 ; GFX10: ; %bb.0: ; %main_body
728 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
729 ; GFX10-NEXT: s_waitcnt vmcnt(0)
730 ; GFX10-NEXT: ; return to shader part epilog
732 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d:
733 ; GFX10GISEL: ; %bb.0: ; %main_body
734 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
735 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
736 ; GFX10GISEL-NEXT: ; return to shader part epilog
738 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
742 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
743 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d:
744 ; GFX10: ; %bb.0: ; %main_body
745 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
746 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
747 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
748 ; GFX10-NEXT: s_waitcnt vmcnt(0)
749 ; GFX10-NEXT: ; return to shader part epilog
751 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d:
752 ; GFX10GISEL: ; %bb.0: ; %main_body
753 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
754 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
755 ; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
756 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
757 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
758 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
759 ; GFX10GISEL-NEXT: ; return to shader part epilog
761 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
765 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
766 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d:
767 ; GFX10: ; %bb.0: ; %main_body
768 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
769 ; GFX10-NEXT: s_waitcnt vmcnt(0)
770 ; GFX10-NEXT: ; return to shader part epilog
772 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d:
773 ; GFX10GISEL: ; %bb.0: ; %main_body
774 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
775 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
776 ; GFX10GISEL-NEXT: ; return to shader part epilog
778 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
782 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
783 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
784 ; GFX10: ; %bb.0: ; %main_body
785 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
786 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
787 ; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100
788 ; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
789 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
790 ; GFX10-NEXT: s_waitcnt vmcnt(0)
791 ; GFX10-NEXT: ; return to shader part epilog
793 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
794 ; GFX10GISEL: ; %bb.0: ; %main_body
795 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
796 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
797 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
798 ; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
799 ; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0
800 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1
801 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
802 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
803 ; GFX10GISEL-NEXT: ; return to shader part epilog
805 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
809 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
810 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
811 ; GFX10: ; %bb.0: ; %main_body
812 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
813 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
814 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
815 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
816 ; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100
817 ; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100
818 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
819 ; GFX10-NEXT: s_waitcnt vmcnt(0)
820 ; GFX10-NEXT: ; return to shader part epilog
822 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
823 ; GFX10GISEL: ; %bb.0: ; %main_body
824 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
825 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
826 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
827 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
828 ; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
829 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9
830 ; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1
831 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0
832 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
833 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
834 ; GFX10GISEL-NEXT: ; return to shader part epilog
836 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
840 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
841 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
842 ; GFX10: ; %bb.0: ; %main_body
843 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
844 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
845 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
846 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
847 ; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100
848 ; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100
849 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
850 ; GFX10-NEXT: s_waitcnt vmcnt(0)
851 ; GFX10-NEXT: ; return to shader part epilog
853 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
854 ; GFX10GISEL: ; %bb.0: ; %main_body
855 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
856 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
857 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
858 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
859 ; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
860 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9
861 ; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1
862 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0
863 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
864 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
865 ; GFX10GISEL-NEXT: ; return to shader part epilog
867 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
871 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
872 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
873 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
874 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
875 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
876 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
877 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
878 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
879 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
881 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
882 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
883 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
884 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
885 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
886 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
887 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
888 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
890 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
891 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
893 define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
894 ; GFX10-LABEL: sample_d_1d_g16_a16:
895 ; GFX10: ; %bb.0: ; %main_body
896 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
897 ; GFX10-NEXT: s_waitcnt vmcnt(0)
898 ; GFX10-NEXT: ; return to shader part epilog
900 ; GFX10GISEL-LABEL: sample_d_1d_g16_a16:
901 ; GFX10GISEL: ; %bb.0: ; %main_body
902 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
903 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
904 ; GFX10GISEL-NEXT: ; return to shader part epilog
906 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
910 define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
911 ; GFX10-LABEL: sample_d_2d_g16_a16:
912 ; GFX10: ; %bb.0: ; %main_body
913 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
914 ; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
915 ; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
916 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
917 ; GFX10-NEXT: s_waitcnt vmcnt(0)
918 ; GFX10-NEXT: ; return to shader part epilog
920 ; GFX10GISEL-LABEL: sample_d_2d_g16_a16:
921 ; GFX10GISEL: ; %bb.0: ; %main_body
922 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
923 ; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
924 ; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
925 ; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
926 ; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
927 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v5, 16, v4
928 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
929 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
930 ; GFX10GISEL-NEXT: ; return to shader part epilog
932 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
936 define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
937 ; GFX10-LABEL: sample_d_3d_g16_a16:
938 ; GFX10: ; %bb.0: ; %main_body
939 ; GFX10-NEXT: v_mov_b32_e32 v12, v8
940 ; GFX10-NEXT: v_mov_b32_e32 v10, v5
941 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
942 ; GFX10-NEXT: v_perm_b32 v11, v7, v6, 0x5040100
943 ; GFX10-NEXT: v_perm_b32 v9, v4, v3, 0x5040100
944 ; GFX10-NEXT: v_perm_b32 v7, v1, v0, 0x5040100
945 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
946 ; GFX10-NEXT: s_waitcnt vmcnt(0)
947 ; GFX10-NEXT: ; return to shader part epilog
949 ; GFX10GISEL-LABEL: sample_d_3d_g16_a16:
950 ; GFX10GISEL: ; %bb.0: ; %main_body
951 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
952 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v7
953 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
954 ; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
955 ; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
956 ; GFX10GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
957 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
958 ; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0
959 ; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v10, 16, v6
960 ; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v8
961 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
962 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
963 ; GFX10GISEL-NEXT: ; return to shader part epilog
965 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
969 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
970 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
971 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
973 attributes #0 = { nounwind }
974 attributes #1 = { nounwind readonly }
975 attributes #2 = { nounwind readnone }