1 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,SI,FUNC %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
8 ; FUNC-LABEL: {{^}}global_load_i8:
9 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
10 ; GCN-HSA: flat_load_ubyte
12 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
14 define amdgpu_kernel void @global_load_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
16 %ld = load i8, ptr addrspace(1) %in
17 store i8 %ld, ptr addrspace(1) %out
21 ; FUNC-LABEL: {{^}}global_load_v2i8:
22 ; GCN-NOHSA: buffer_load_ushort v
23 ; GCN-HSA: flat_load_ushort v
25 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
26 define amdgpu_kernel void @global_load_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
28 %ld = load <2 x i8>, ptr addrspace(1) %in
29 store <2 x i8> %ld, ptr addrspace(1) %out
33 ; FUNC-LABEL: {{^}}global_load_v3i8:
34 ; GCN-NOHSA: buffer_load_dword v
35 ; GCN-HSA: flat_load_dword v
37 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
38 define amdgpu_kernel void @global_load_v3i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
40 %ld = load <3 x i8>, ptr addrspace(1) %in
41 store <3 x i8> %ld, ptr addrspace(1) %out
45 ; FUNC-LABEL: {{^}}global_load_v4i8:
46 ; GCN-NOHSA: buffer_load_dword v
47 ; GCN-HSA: flat_load_dword v
49 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
50 define amdgpu_kernel void @global_load_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
52 %ld = load <4 x i8>, ptr addrspace(1) %in
53 store <4 x i8> %ld, ptr addrspace(1) %out
57 ; FUNC-LABEL: {{^}}global_load_v8i8:
58 ; GCN-NOHSA: buffer_load_dwordx2
59 ; GCN-HSA: flat_load_dwordx2
61 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
62 define amdgpu_kernel void @global_load_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
64 %ld = load <8 x i8>, ptr addrspace(1) %in
65 store <8 x i8> %ld, ptr addrspace(1) %out
69 ; FUNC-LABEL: {{^}}global_load_v16i8:
70 ; GCN-NOHSA: buffer_load_dwordx4
72 ; GCN-HSA: flat_load_dwordx4
74 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
75 define amdgpu_kernel void @global_load_v16i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
77 %ld = load <16 x i8>, ptr addrspace(1) %in
78 store <16 x i8> %ld, ptr addrspace(1) %out
82 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i32:
83 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
84 ; GCN-HSA: flat_load_ubyte
86 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
87 define amdgpu_kernel void @global_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
88 %a = load i8, ptr addrspace(1) %in
89 %ext = zext i8 %a to i32
90 store i32 %ext, ptr addrspace(1) %out
94 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i32:
95 ; GCN-NOHSA: buffer_load_sbyte
96 ; GCN-HSA: flat_load_sbyte
98 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
99 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
101 define amdgpu_kernel void @global_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
102 %ld = load i8, ptr addrspace(1) %in
103 %ext = sext i8 %ld to i32
104 store i32 %ext, ptr addrspace(1) %out
108 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
110 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
111 define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
112 %load = load <1 x i8>, ptr addrspace(1) %in
113 %ext = zext <1 x i8> %load to <1 x i32>
114 store <1 x i32> %ext, ptr addrspace(1) %out
118 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32:
120 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
121 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
123 define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
124 %load = load <1 x i8>, ptr addrspace(1) %in
125 %ext = sext <1 x i8> %load to <1 x i32>
126 store <1 x i32> %ext, ptr addrspace(1) %out
130 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32:
131 ; GCN-NOHSA: buffer_load_ushort
132 ; GCN-HSA: flat_load_ushort
134 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
135 ; TODO: These should use DST, but for some there are redundant MOVs
136 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
138 define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
139 %load = load <2 x i8>, ptr addrspace(1) %in
140 %ext = zext <2 x i8> %load to <2 x i32>
141 store <2 x i32> %ext, ptr addrspace(1) %out
145 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32:
146 ; GCN-NOHSA: buffer_load_ushort
147 ; GCN-HSA: flat_load_ushort
149 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
150 ; TODO: These should use DST, but for some there are redundant MOVs
151 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
152 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
155 define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
156 %load = load <2 x i8>, ptr addrspace(1) %in
157 %ext = sext <2 x i8> %load to <2 x i32>
158 store <2 x i32> %ext, ptr addrspace(1) %out
162 ; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32:
163 ; GCN-NOHSA: buffer_load_dword v
164 ; GCN-HSA: flat_load_dword v
166 ; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
167 ; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
168 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
169 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
171 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
172 ; TODO: These should use DST, but for some there are redundant MOVs
173 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
174 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
177 define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
179 %ld = load <3 x i8>, ptr addrspace(1) %in
180 %ext = zext <3 x i8> %ld to <3 x i32>
181 store <3 x i32> %ext, ptr addrspace(1) %out
185 ; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32:
186 ; GCN-NOHSA: buffer_load_dword v
187 ; GCN-HSA: flat_load_dword v
189 ;FIXME: Need to optimize this sequence to avoid extra shift on VI.
191 ; t23: i16 = truncate t18
192 ; t49: i16 = srl t23, Constant:i32<8>
193 ; t57: i32 = any_extend t49
194 ; t58: i32 = sign_extend_inreg t57, ValueType:ch:i8
196 ; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
197 ; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
198 ; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
199 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
200 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
202 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
203 ; TODO: These should use DST, but for some there are redundant MOVs
204 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
205 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
206 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
210 define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
212 %ld = load <3 x i8>, ptr addrspace(1) %in
213 %ext = sext <3 x i8> %ld to <3 x i32>
214 store <3 x i32> %ext, ptr addrspace(1) %out
218 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32:
219 ; GCN-NOHSA: buffer_load_dword
220 ; GCN-HSA: flat_load_dword
222 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
223 ; TODO: These should use DST, but for some there are redundant MOVs
224 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
225 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
226 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
230 define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
231 %load = load <4 x i8>, ptr addrspace(1) %in
232 %ext = zext <4 x i8> %load to <4 x i32>
233 store <4 x i32> %ext, ptr addrspace(1) %out
237 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32:
238 ; GCN-NOHSA: buffer_load_dword
239 ; GCN-HSA: flat_load_dword
241 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
242 ; TODO: These should use DST, but for some there are redundant MOVs
243 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
244 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
245 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
246 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
251 define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
252 %load = load <4 x i8>, ptr addrspace(1) %in
253 %ext = sext <4 x i8> %load to <4 x i32>
254 store <4 x i32> %ext, ptr addrspace(1) %out
258 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32:
260 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
261 ; TODO: These should use DST, but for some there are redundant MOVs
262 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
263 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
264 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
265 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
266 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
267 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
268 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
276 define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
277 %load = load <8 x i8>, ptr addrspace(1) %in
278 %ext = zext <8 x i8> %load to <8 x i32>
279 store <8 x i32> %ext, ptr addrspace(1) %out
283 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32:
285 ; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1
286 ; TODO: These should use DST, but for some there are redundant MOVs
287 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
288 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
289 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
290 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
291 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
292 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
293 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
294 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
303 define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
304 %load = load <8 x i8>, ptr addrspace(1) %in
305 %ext = sext <8 x i8> %load to <8 x i32>
306 store <8 x i32> %ext, ptr addrspace(1) %out
310 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32:
312 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
313 ; TODO: These should use DST, but for some there are redundant MOVs
314 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
315 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
316 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
317 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
318 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
319 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
320 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
321 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
322 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
323 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
324 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
325 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
326 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
327 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
328 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
344 define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
345 %load = load <16 x i8>, ptr addrspace(1) %in
346 %ext = zext <16 x i8> %load to <16 x i32>
347 store <16 x i32> %ext, ptr addrspace(1) %out
351 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32:
353 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
354 ; TODO: These should use DST, but for some there are redundant MOVs
355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
365 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
366 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
367 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
368 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
369 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
370 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
387 define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
388 %load = load <16 x i8>, ptr addrspace(1) %in
389 %ext = sext <16 x i8> %load to <16 x i32>
390 store <16 x i32> %ext, ptr addrspace(1) %out
394 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32:
396 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
397 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
398 ; TODO: These should use DST, but for some there are redundant MOVs
399 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
400 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
401 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
402 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
403 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
404 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
405 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
406 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
407 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
408 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
409 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
410 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
411 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
412 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
413 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
414 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
415 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
416 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
417 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
418 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
419 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
420 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
421 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
422 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
423 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
424 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
425 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
426 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
427 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
428 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
459 define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
460 %load = load <32 x i8>, ptr addrspace(1) %in
461 %ext = zext <32 x i8> %load to <32 x i32>
462 store <32 x i32> %ext, ptr addrspace(1) %out
466 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32:
468 ; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
469 ; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1
470 ; TODO: These should use DST, but for some there are redundant MOVs
471 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
472 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
473 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
474 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
475 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
476 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
477 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
478 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
479 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
480 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
481 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
482 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
483 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
484 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
485 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
486 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
487 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
488 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
489 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
490 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
491 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
492 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
493 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
494 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
495 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
496 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
497 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
498 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
499 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
500 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
501 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
502 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
535 define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
536 %load = load <32 x i8>, ptr addrspace(1) %in
537 %ext = sext <32 x i8> %load to <32 x i32>
538 store <32 x i32> %ext, ptr addrspace(1) %out
542 ; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32:
544 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
545 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
546 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
547 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
548 define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
549 %load = load <64 x i8>, ptr addrspace(1) %in
550 %ext = zext <64 x i8> %load to <64 x i32>
551 store <64 x i32> %ext, ptr addrspace(1) %out
555 ; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32:
557 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
558 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
559 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
560 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
561 define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
562 %load = load <64 x i8>, ptr addrspace(1) %in
563 %ext = sext <64 x i8> %load to <64 x i32>
564 store <64 x i32> %ext, ptr addrspace(1) %out
568 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i64:
569 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
571 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
572 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
574 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
575 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
577 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
578 ; EG: MOV {{.*}}, 0.0
579 define amdgpu_kernel void @global_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
580 %a = load i8, ptr addrspace(1) %in
581 %ext = zext i8 %a to i64
582 store i64 %ext, ptr addrspace(1) %out
586 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i64:
587 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
588 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
589 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
591 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
592 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
594 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
595 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
598 define amdgpu_kernel void @global_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
599 %a = load i8, ptr addrspace(1) %in
600 %ext = sext i8 %a to i64
601 store i64 %ext, ptr addrspace(1) %out
605 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64:
607 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
608 ; EG: MOV {{.*}}, 0.0
609 define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
610 %load = load <1 x i8>, ptr addrspace(1) %in
611 %ext = zext <1 x i8> %load to <1 x i64>
612 store <1 x i64> %ext, ptr addrspace(1) %out
616 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64:
618 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
619 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
622 define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
623 %load = load <1 x i8>, ptr addrspace(1) %in
624 %ext = sext <1 x i8> %load to <1 x i64>
625 store <1 x i64> %ext, ptr addrspace(1) %out
629 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
631 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
632 define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
633 %load = load <2 x i8>, ptr addrspace(1) %in
634 %ext = zext <2 x i8> %load to <2 x i64>
635 store <2 x i64> %ext, ptr addrspace(1) %out
639 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
641 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
642 define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
643 %load = load <2 x i8>, ptr addrspace(1) %in
644 %ext = sext <2 x i8> %load to <2 x i64>
645 store <2 x i64> %ext, ptr addrspace(1) %out
649 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
651 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
652 define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
653 %load = load <4 x i8>, ptr addrspace(1) %in
654 %ext = zext <4 x i8> %load to <4 x i64>
655 store <4 x i64> %ext, ptr addrspace(1) %out
659 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
661 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
662 define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
663 %load = load <4 x i8>, ptr addrspace(1) %in
664 %ext = sext <4 x i8> %load to <4 x i64>
665 store <4 x i64> %ext, ptr addrspace(1) %out
669 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
671 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
672 define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
673 %load = load <8 x i8>, ptr addrspace(1) %in
674 %ext = zext <8 x i8> %load to <8 x i64>
675 store <8 x i64> %ext, ptr addrspace(1) %out
679 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
681 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
682 define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
683 %load = load <8 x i8>, ptr addrspace(1) %in
684 %ext = sext <8 x i8> %load to <8 x i64>
685 store <8 x i64> %ext, ptr addrspace(1) %out
689 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
691 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
692 define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
693 %load = load <16 x i8>, ptr addrspace(1) %in
694 %ext = zext <16 x i8> %load to <16 x i64>
695 store <16 x i64> %ext, ptr addrspace(1) %out
699 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
701 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
702 define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
703 %load = load <16 x i8>, ptr addrspace(1) %in
704 %ext = sext <16 x i8> %load to <16 x i64>
705 store <16 x i64> %ext, ptr addrspace(1) %out
709 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64:
711 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
712 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
713 define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
714 %load = load <32 x i8>, ptr addrspace(1) %in
715 %ext = zext <32 x i8> %load to <32 x i64>
716 store <32 x i64> %ext, ptr addrspace(1) %out
720 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64:
722 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
723 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
724 define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
725 %load = load <32 x i8>, ptr addrspace(1) %in
726 %ext = sext <32 x i8> %load to <32 x i64>
727 store <32 x i64> %ext, ptr addrspace(1) %out
731 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
732 ; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
733 ; %load = load <64 x i8>, ptr addrspace(1) %in
734 ; %ext = zext <64 x i8> %load to <64 x i64>
735 ; store <64 x i64> %ext, ptr addrspace(1) %out
739 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
740 ; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
741 ; %load = load <64 x i8>, ptr addrspace(1) %in
742 ; %ext = sext <64 x i8> %load to <64 x i64>
743 ; store <64 x i64> %ext, ptr addrspace(1) %out
747 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i16:
748 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
749 ; GCN-NOHSA: buffer_store_short v[[VAL]]
751 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
752 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
754 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
755 define amdgpu_kernel void @global_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
756 %a = load i8, ptr addrspace(1) %in
757 %ext = zext i8 %a to i16
758 store i16 %ext, ptr addrspace(1) %out
762 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i16:
763 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
764 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
766 ; GCN-NOHSA: buffer_store_short v[[VAL]]
767 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
769 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
770 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
771 define amdgpu_kernel void @global_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
772 %a = load i8, ptr addrspace(1) %in
773 %ext = sext i8 %a to i16
774 store i16 %ext, ptr addrspace(1) %out
778 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
780 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
781 define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
782 %load = load <1 x i8>, ptr addrspace(1) %in
783 %ext = zext <1 x i8> %load to <1 x i16>
784 store <1 x i16> %ext, ptr addrspace(1) %out
788 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16:
790 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
791 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
792 define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
793 %load = load <1 x i8>, ptr addrspace(1) %in
794 %ext = sext <1 x i8> %load to <1 x i16>
795 store <1 x i16> %ext, ptr addrspace(1) %out
799 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
801 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
802 define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
803 %load = load <2 x i8>, ptr addrspace(1) %in
804 %ext = zext <2 x i8> %load to <2 x i16>
805 store <2 x i16> %ext, ptr addrspace(1) %out
809 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16:
811 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
812 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
813 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
814 define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
815 %load = load <2 x i8>, ptr addrspace(1) %in
816 %ext = sext <2 x i8> %load to <2 x i16>
817 store <2 x i16> %ext, ptr addrspace(1) %out
821 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
823 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
824 define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
825 %load = load <4 x i8>, ptr addrspace(1) %in
826 %ext = zext <4 x i8> %load to <4 x i16>
827 store <4 x i16> %ext, ptr addrspace(1) %out
831 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16:
833 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
834 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
835 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
836 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
837 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
838 define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
839 %load = load <4 x i8>, ptr addrspace(1) %in
840 %ext = sext <4 x i8> %load to <4 x i16>
841 store <4 x i16> %ext, ptr addrspace(1) %out
845 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
847 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
848 define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
849 %load = load <8 x i8>, ptr addrspace(1) %in
850 %ext = zext <8 x i8> %load to <8 x i16>
851 store <8 x i16> %ext, ptr addrspace(1) %out
855 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16:
857 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
858 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
859 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
860 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
861 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
862 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
863 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
864 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
865 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
866 define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
867 %load = load <8 x i8>, ptr addrspace(1) %in
868 %ext = sext <8 x i8> %load to <8 x i16>
869 store <8 x i16> %ext, ptr addrspace(1) %out
873 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
875 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
876 define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
877 %load = load <16 x i8>, ptr addrspace(1) %in
878 %ext = zext <16 x i8> %load to <16 x i16>
879 store <16 x i16> %ext, ptr addrspace(1) %out
883 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16:
885 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
886 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
887 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
888 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
889 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
890 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
891 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
892 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
893 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
894 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
895 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
896 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
897 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
898 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
899 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
900 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
901 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
902 define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
903 %load = load <16 x i8>, ptr addrspace(1) %in
904 %ext = sext <16 x i8> %load to <16 x i16>
905 store <16 x i16> %ext, ptr addrspace(1) %out
909 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16:
911 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
912 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
913 define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
914 %load = load <32 x i8>, ptr addrspace(1) %in
915 %ext = zext <32 x i8> %load to <32 x i16>
916 store <32 x i16> %ext, ptr addrspace(1) %out
920 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16:
922 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
923 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
924 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
925 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
926 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
927 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
928 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
929 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
930 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
931 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
932 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
933 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
934 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
935 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
936 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
937 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
938 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
939 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
940 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
941 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
942 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
943 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
944 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
945 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
946 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
947 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
948 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
949 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
950 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
951 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
952 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
953 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
954 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
955 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
956 define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
957 %load = load <32 x i8>, ptr addrspace(1) %in
958 %ext = sext <32 x i8> %load to <32 x i16>
959 store <32 x i16> %ext, ptr addrspace(1) %out
963 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
964 ; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
965 ; %load = load <64 x i8>, ptr addrspace(1) %in
966 ; %ext = zext <64 x i8> %load to <64 x i16>
967 ; store <64 x i16> %ext, ptr addrspace(1) %out
971 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
972 ; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
973 ; %load = load <64 x i8>, ptr addrspace(1) %in
974 ; %ext = sext <64 x i8> %load to <64 x i16>
975 ; store <64 x i16> %ext, ptr addrspace(1) %out
979 attributes #0 = { nounwind }