1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
7 ; FUNC-LABEL: {{^}}constant_load_i8:
8 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
9 ; GCN-HSA: flat_load_ubyte
11 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
13 define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
15 %ld = load i8, i8 addrspace(4)* %in
16 store i8 %ld, i8 addrspace(1)* %out
20 ; FUNC-LABEL: {{^}}constant_load_v2i8:
21 ; GCN-NOHSA: buffer_load_ushort v
22 ; GCN-HSA: flat_load_ushort v
24 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
25 define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
27 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
28 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
32 ; FUNC-LABEL: {{^}}constant_load_v3i8:
35 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
36 define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
38 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
39 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
43 ; FUNC-LABEL: {{^}}constant_load_v4i8:
46 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
47 define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
49 %ld = load <4 x i8>, <4 x i8> addrspace(4)* %in
50 store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
54 ; FUNC-LABEL: {{^}}constant_load_v8i8:
57 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
58 define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
60 %ld = load <8 x i8>, <8 x i8> addrspace(4)* %in
61 store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
65 ; FUNC-LABEL: {{^}}constant_load_v16i8:
68 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
69 define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
71 %ld = load <16 x i8>, <16 x i8> addrspace(4)* %in
72 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
76 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32:
77 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
78 ; GCN-HSA: flat_load_ubyte
80 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
81 define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
82 %a = load i8, i8 addrspace(4)* %in
83 %ext = zext i8 %a to i32
84 store i32 %ext, i32 addrspace(1)* %out
88 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32:
89 ; GCN-NOHSA: buffer_load_sbyte
90 ; GCN-HSA: flat_load_sbyte
92 ; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
93 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
95 define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
96 %ld = load i8, i8 addrspace(4)* %in
97 %ext = sext i8 %ld to i32
98 store i32 %ext, i32 addrspace(1)* %out
102 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
104 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
105 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
106 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
107 %ext = zext <1 x i8> %load to <1 x i32>
108 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
112 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32:
114 ; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
115 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
117 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
118 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
119 %ext = sext <1 x i8> %load to <1 x i32>
120 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
124 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32:
125 ; GCN-NOHSA: buffer_load_ushort
126 ; GCN-HSA: flat_load_ushort
128 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
129 ; TODO: This should use DST, but for some there are redundant MOVs
130 ; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
132 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
133 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
134 %ext = zext <2 x i8> %load to <2 x i32>
135 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
139 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32:
140 ; GCN-NOHSA: buffer_load_ushort
142 ; GCN-HSA: flat_load_ushort
147 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
148 ; TODO: These should use DST, but for some there are redundant MOVs
149 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
150 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
153 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
154 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
155 %ext = sext <2 x i8> %load to <2 x i32>
156 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
160 ; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32:
161 ; GCN: s_load_dword s
167 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
168 ; TODO: These should use DST, but for some there are redundant MOVs
169 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
170 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
173 define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
175 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
176 %ext = zext <3 x i8> %ld to <3 x i32>
177 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
181 ; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32:
182 ; GCN: s_load_dword s
188 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
189 ; TODO: These should use DST, but for some there are redundant MOVs
190 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
191 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
192 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
196 define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
198 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
199 %ext = sext <3 x i8> %ld to <3 x i32>
200 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
204 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32:
205 ; GCN: s_load_dword s
207 ; GCN-DAG: s_lshr_b32
209 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
210 ; TODO: These should use DST, but for some there are redundant MOVs
211 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
212 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
213 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
217 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
218 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
219 %ext = zext <4 x i8> %load to <4 x i32>
220 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
224 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32:
225 ; GCN: s_load_dword s
226 ; GCN-DAG: s_sext_i32_i8
227 ; GCN-DAG: s_ashr_i32
229 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
230 ; TODO: These should use DST, but for some there are redundant MOVs
231 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
232 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
233 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
234 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
239 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
240 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
241 %ext = sext <4 x i8> %load to <4 x i32>
242 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
246 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32:
247 ; GCN: s_load_dwordx2
249 ; GCN-DAG: s_lshr_b32
251 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
252 ; TODO: These should use DST, but for some there are redundant MOVs
253 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
254 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
255 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
256 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
257 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
258 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
259 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
267 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
268 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
269 %ext = zext <8 x i8> %load to <8 x i32>
270 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
274 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32:
275 ; GCN: s_load_dwordx2
276 ; GCN-DAG: s_ashr_i32
277 ; GCN-DAG: s_sext_i32_i8
279 ; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1
280 ; TODO: These should use DST, but for some there are redundant MOVs
281 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
282 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
283 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
284 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
285 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
286 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
287 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
288 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
297 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
298 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
299 %ext = sext <8 x i8> %load to <8 x i32>
300 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
304 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32:
306 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
307 ; TODO: These should use DST, but for some there are redundant MOVs
308 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
309 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
310 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
311 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
312 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
313 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
314 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
315 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
316 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
317 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
318 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
319 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
320 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
321 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
322 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
338 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
339 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
340 %ext = zext <16 x i8> %load to <16 x i32>
341 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
345 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32:
347 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
348 ; TODO: These should use DST, but for some there are redundant MOVs
349 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
350 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
351 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
352 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
353 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
354 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
381 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
382 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
383 %ext = sext <16 x i8> %load to <16 x i32>
384 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
388 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32:
390 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
391 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
392 ; TODO: These should use DST, but for some there are redundant MOVs
393 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
394 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
395 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
396 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
397 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
398 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
399 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
400 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
401 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
402 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
403 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
404 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
405 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
406 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
407 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
408 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
409 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
410 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
411 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
412 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
413 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
414 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
415 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
416 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
417 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
418 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
419 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
420 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
421 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
422 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
453 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
454 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
455 %ext = zext <32 x i8> %load to <32 x i32>
456 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
460 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32:
462 ; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
463 ; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1
464 ; TODO: These should use DST, but for some there are redundant MOVs
465 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
466 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
467 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
468 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
469 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
470 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
471 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
472 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
473 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
474 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
475 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
476 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
477 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
478 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
479 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
480 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
481 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
482 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
483 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
484 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
485 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
486 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
487 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
488 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
489 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
490 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
491 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
492 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
493 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
494 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
495 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
496 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
529 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
530 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
531 %ext = sext <32 x i8> %load to <32 x i32>
532 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
536 ; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32:
538 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
539 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
540 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
541 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
542 define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
543 %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
544 %ext = zext <64 x i8> %load to <64 x i32>
545 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
549 ; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32:
551 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
552 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
553 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
554 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
555 define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
556 %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
557 %ext = sext <64 x i8> %load to <64 x i32>
558 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
562 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64:
563 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
565 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
566 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
568 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
569 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
571 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
572 ; EG: MOV {{.*}}, 0.0
573 define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
574 %a = load i8, i8 addrspace(4)* %in
575 %ext = zext i8 %a to i64
576 store i64 %ext, i64 addrspace(1)* %out
580 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64:
581 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
582 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
583 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
585 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
586 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
588 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
589 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
592 define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
593 %a = load i8, i8 addrspace(4)* %in
594 %ext = sext i8 %a to i64
595 store i64 %ext, i64 addrspace(1)* %out
599 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64:
601 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
602 ; EG: MOV {{.*}}, 0.0
603 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
604 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
605 %ext = zext <1 x i8> %load to <1 x i64>
606 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
610 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64:
612 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
613 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
616 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
617 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
618 %ext = sext <1 x i8> %load to <1 x i64>
619 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
623 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
625 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
626 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
627 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
628 %ext = zext <2 x i8> %load to <2 x i64>
629 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
633 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
635 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
636 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
637 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
638 %ext = sext <2 x i8> %load to <2 x i64>
639 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
643 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
645 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
646 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
647 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
648 %ext = zext <4 x i8> %load to <4 x i64>
649 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
653 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
655 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
656 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
657 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
658 %ext = sext <4 x i8> %load to <4 x i64>
659 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
663 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
665 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
666 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
667 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
668 %ext = zext <8 x i8> %load to <8 x i64>
669 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
673 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
675 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
676 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
677 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
678 %ext = sext <8 x i8> %load to <8 x i64>
679 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
683 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
685 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
686 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
687 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
688 %ext = zext <16 x i8> %load to <16 x i64>
689 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
693 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
695 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
696 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
697 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
698 %ext = sext <16 x i8> %load to <16 x i64>
699 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
703 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64:
705 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
706 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
707 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
708 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
709 %ext = zext <32 x i8> %load to <32 x i64>
710 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
714 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64:
716 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
717 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
718 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
719 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
720 %ext = sext <32 x i8> %load to <32 x i64>
721 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
725 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
726 ; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
727 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
728 ; %ext = zext <64 x i8> %load to <64 x i64>
729 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
733 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
734 ; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
735 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
736 ; %ext = sext <64 x i8> %load to <64 x i64>
737 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
741 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16:
742 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
743 ; GCN-NOHSA: buffer_store_short v[[VAL]]
745 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
746 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
747 define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
748 %a = load i8, i8 addrspace(4)* %in
749 %ext = zext i8 %a to i16
750 store i16 %ext, i16 addrspace(1)* %out
754 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16:
755 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
756 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
758 ; GCN-NOHSA: buffer_store_short v[[VAL]]
759 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
761 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
762 define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
763 %a = load i8, i8 addrspace(4)* %in
764 %ext = sext i8 %a to i16
765 store i16 %ext, i16 addrspace(1)* %out
769 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
770 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
771 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
772 %ext = zext <1 x i8> %load to <1 x i16>
773 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
777 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16:
779 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
780 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
781 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
782 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
783 %ext = sext <1 x i8> %load to <1 x i16>
784 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
788 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
790 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
791 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
792 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
793 %ext = zext <2 x i8> %load to <2 x i16>
794 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
798 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16:
800 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
801 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
802 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
803 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
804 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
805 %ext = sext <2 x i8> %load to <2 x i16>
806 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
810 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
812 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
813 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
814 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
815 %ext = zext <4 x i8> %load to <4 x i16>
816 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
820 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16:
822 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
823 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
824 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
825 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
826 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
827 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
828 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
829 %ext = sext <4 x i8> %load to <4 x i16>
830 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
834 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
836 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
837 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
838 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
839 %ext = zext <8 x i8> %load to <8 x i16>
840 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
844 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16:
846 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
847 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
848 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
849 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
850 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
851 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
852 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
853 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
854 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
856 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
857 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
858 %ext = sext <8 x i8> %load to <8 x i16>
859 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
863 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
865 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
866 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
867 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
868 %ext = zext <16 x i8> %load to <16 x i16>
869 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
873 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16:
875 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
876 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
877 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
878 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
879 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
880 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
881 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
882 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
883 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
884 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
885 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
886 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
887 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
888 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
889 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
890 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
891 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
892 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
893 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
894 %ext = sext <16 x i8> %load to <16 x i16>
895 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
899 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16:
901 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
902 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
903 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
904 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
905 %ext = zext <32 x i8> %load to <32 x i16>
906 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
910 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16:
912 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
913 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
914 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
915 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
916 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
917 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
918 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
919 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
920 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
921 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
922 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
923 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
924 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
925 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
926 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
927 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
928 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
929 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
930 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
931 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
932 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
933 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
934 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
935 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
936 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
937 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
938 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
939 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
940 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
941 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
942 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
943 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
944 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
945 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
946 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
947 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
948 %ext = sext <32 x i8> %load to <32 x i16>
949 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
953 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
954 ; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
955 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
956 ; %ext = zext <64 x i8> %load to <64 x i16>
957 ; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
961 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
962 ; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
963 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
964 ; %ext = sext <64 x i8> %load to <64 x i16>
965 ; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
969 attributes #0 = { nounwind }