1 ; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,FUNC %s
2 ; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,FUNC %s
3 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
4 ; RUN: llc -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
6 ; Testing for ds_read/write_b128
7 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s
8 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s
10 ; FUNC-LABEL: {{^}}local_load_i8:
12 ; SICIVI: s_mov_b32 m0
16 ; EG: LDS_UBYTE_READ_RET
17 define amdgpu_kernel void @local_load_i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
19 %ld = load i8, ptr addrspace(3) %in
20 store i8 %ld, ptr addrspace(3) %out
24 ; FUNC-LABEL: {{^}}local_load_v2i8:
26 ; SICIVI: s_mov_b32 m0
30 ; EG: LDS_USHORT_READ_RET
31 define amdgpu_kernel void @local_load_v2i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
33 %ld = load <2 x i8>, ptr addrspace(3) %in
34 store <2 x i8> %ld, ptr addrspace(3) %out
38 ; FUNC-LABEL: {{^}}local_load_v3i8:
43 define amdgpu_kernel void @local_load_v3i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
45 %ld = load <3 x i8>, ptr addrspace(3) %in
46 store <3 x i8> %ld, ptr addrspace(3) %out
50 ; FUNC-LABEL: {{^}}local_load_v4i8:
55 define amdgpu_kernel void @local_load_v4i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
57 %ld = load <4 x i8>, ptr addrspace(3) %in
58 store <4 x i8> %ld, ptr addrspace(3) %out
62 ; FUNC-LABEL: {{^}}local_load_v8i8:
68 define amdgpu_kernel void @local_load_v8i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
70 %ld = load <8 x i8>, ptr addrspace(3) %in
71 store <8 x i8> %ld, ptr addrspace(3) %out
75 ; FUNC-LABEL: {{^}}local_load_v16i8:
77 ; GCN: ds_read2_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
78 ; GCN: ds_write2_b64 v{{[0-9]+}}, v[[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]] offset1:1{{$}}
84 define amdgpu_kernel void @local_load_v16i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
86 %ld = load <16 x i8>, ptr addrspace(3) %in
87 store <16 x i8> %ld, ptr addrspace(3) %out
91 ; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
94 ; SICIVI: s_mov_b32 m0
97 ; EG: LDS_UBYTE_READ_RET
98 define amdgpu_kernel void @local_zextload_i8_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
99 %a = load i8, ptr addrspace(3) %in
100 %ext = zext i8 %a to i32
101 store i32 %ext, ptr addrspace(3) %out
105 ; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
108 ; SICIVI: s_mov_b32 m0
111 ; EG: LDS_UBYTE_READ_RET
113 define amdgpu_kernel void @local_sextload_i8_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
114 %ld = load i8, ptr addrspace(3) %in
115 %ext = sext i8 %ld to i32
116 store i32 %ext, ptr addrspace(3) %out
120 ; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
122 ; EG: LDS_UBYTE_READ_RET
123 define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
124 %load = load <1 x i8>, ptr addrspace(3) %in
125 %ext = zext <1 x i8> %load to <1 x i32>
126 store <1 x i32> %ext, ptr addrspace(3) %out
130 ; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
133 ; EG: LDS_UBYTE_READ_RET
135 define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
136 %load = load <1 x i8>, ptr addrspace(3) %in
137 %ext = sext <1 x i8> %load to <1 x i32>
138 store <1 x i32> %ext, ptr addrspace(3) %out
142 ; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
146 ; EG: LDS_USHORT_READ_RET
147 define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
148 %load = load <2 x i8>, ptr addrspace(3) %in
149 %ext = zext <2 x i8> %load to <2 x i32>
150 store <2 x i32> %ext, ptr addrspace(3) %out
154 ; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
157 ; SICIVI: s_mov_b32 m0
159 ; FIXME: Need to optimize this sequence to avoid extra shift on VI.
160 ; t23: i16 = srl t39, Constant:i32<8>
161 ; t31: i32 = any_extend t23
162 ; t33: i32 = sign_extend_inreg t31, ValueType:ch:i8
164 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
165 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
167 ; EG: LDS_USHORT_READ_RET
170 define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
171 %load = load <2 x i8>, ptr addrspace(3) %in
172 %ext = sext <2 x i8> %load to <2 x i32>
173 store <2 x i32> %ext, ptr addrspace(3) %out
177 ; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
181 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
182 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
183 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
186 define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
188 %ld = load <3 x i8>, ptr addrspace(3) %in
189 %ext = zext <3 x i8> %ld to <3 x i32>
190 store <3 x i32> %ext, ptr addrspace(3) %out
194 ; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
197 ; SICIVI: s_mov_b32 m0
205 ; SI-DAG: ds_write_b64
206 ; SI-DAG: ds_write_b32
207 ; CIVI-DAG: ds_write_b96
208 ; GFX9-DAG: ds_write_b96
214 define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
216 %ld = load <3 x i8>, ptr addrspace(3) %in
217 %ext = sext <3 x i8> %ld to <3 x i32>
218 store <3 x i32> %ext, ptr addrspace(3) %out
222 ; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
225 ; SICIVI: s_mov_b32 m0
232 define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
233 %load = load <4 x i8>, ptr addrspace(3) %in
234 %ext = zext <4 x i8> %load to <4 x i32>
235 store <4 x i32> %ext, ptr addrspace(3) %out
239 ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
242 ; SICIVI: s_mov_b32 m0
245 ; EG-DAG: LDS_READ_RET
250 define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
251 %load = load <4 x i8>, ptr addrspace(3) %in
252 %ext = sext <4 x i8> %load to <4 x i32>
253 store <4 x i32> %ext, ptr addrspace(3) %out
257 ; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
258 ; SICIVI: s_mov_b32 m0
261 ; EG-DAG: LDS_READ_RET
262 ; EG-DAG: LDS_READ_RET
269 define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
270 %load = load <8 x i8>, ptr addrspace(3) %in
271 %ext = zext <8 x i8> %load to <8 x i32>
272 store <8 x i32> %ext, ptr addrspace(3) %out
276 ; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
277 ; SICIVI: s_mov_b32 m0
280 ; EG-DAG: LDS_READ_RET
281 ; EG-DAG: LDS_READ_RET
290 define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
291 %load = load <8 x i8>, ptr addrspace(3) %in
292 %ext = sext <8 x i8> %load to <8 x i32>
293 store <8 x i32> %ext, ptr addrspace(3) %out
297 ; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
298 ; SICIVI: s_mov_b32 m0
301 ; EG-DAG: LDS_READ_RET
302 ; EG-DAG: LDS_READ_RET
303 ; EG-DAG: LDS_READ_RET
304 ; EG-DAG: LDS_READ_RET
317 define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
318 %load = load <16 x i8>, ptr addrspace(3) %in
319 %ext = zext <16 x i8> %load to <16 x i32>
320 store <16 x i32> %ext, ptr addrspace(3) %out
324 ; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
325 ; SICIVI: s_mov_b32 m0
328 ; EG-DAG: LDS_READ_RET
329 ; EG-DAG: LDS_READ_RET
330 ; EG-DAG: LDS_READ_RET
331 ; EG-DAG: LDS_READ_RET
348 define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
349 %load = load <16 x i8>, ptr addrspace(3) %in
350 %ext = sext <16 x i8> %load to <16 x i32>
351 store <16 x i32> %ext, ptr addrspace(3) %out
355 ; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
356 ; SICIVI: s_mov_b32 m0
359 ; EG-DAG: LDS_READ_RET
360 ; EG-DAG: LDS_READ_RET
361 ; EG-DAG: LDS_READ_RET
362 ; EG-DAG: LDS_READ_RET
363 ; EG-DAG: LDS_READ_RET
364 ; EG-DAG: LDS_READ_RET
365 ; EG-DAG: LDS_READ_RET
366 ; EG-DAG: LDS_READ_RET
367 define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
368 %load = load <32 x i8>, ptr addrspace(3) %in
369 %ext = zext <32 x i8> %load to <32 x i32>
370 store <32 x i32> %ext, ptr addrspace(3) %out
374 ; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
375 ; SICIVI: s_mov_b32 m0
378 ; EG-DAG: LDS_READ_RET
379 ; EG-DAG: LDS_READ_RET
380 ; EG-DAG: LDS_READ_RET
381 ; EG-DAG: LDS_READ_RET
382 ; EG-DAG: LDS_READ_RET
383 ; EG-DAG: LDS_READ_RET
384 ; EG-DAG: LDS_READ_RET
385 ; EG-DAG: LDS_READ_RET
386 define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
387 %load = load <32 x i8>, ptr addrspace(3) %in
388 %ext = sext <32 x i8> %load to <32 x i32>
389 store <32 x i32> %ext, ptr addrspace(3) %out
393 ; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
394 ; SICIVI: s_mov_b32 m0
397 ; EG-DAG: LDS_READ_RET
398 ; EG-DAG: LDS_READ_RET
399 ; EG-DAG: LDS_READ_RET
400 ; EG-DAG: LDS_READ_RET
401 ; EG-DAG: LDS_READ_RET
402 ; EG-DAG: LDS_READ_RET
403 ; EG-DAG: LDS_READ_RET
404 ; EG-DAG: LDS_READ_RET
405 ; EG-DAG: LDS_READ_RET
406 ; EG-DAG: LDS_READ_RET
407 ; EG-DAG: LDS_READ_RET
408 ; EG-DAG: LDS_READ_RET
409 ; EG-DAG: LDS_READ_RET
410 ; EG-DAG: LDS_READ_RET
411 ; EG-DAG: LDS_READ_RET
412 ; EG-DAG: LDS_READ_RET
413 define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
414 %load = load <64 x i8>, ptr addrspace(3) %in
415 %ext = zext <64 x i8> %load to <64 x i32>
416 store <64 x i32> %ext, ptr addrspace(3) %out
420 ; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
421 ; SICIVI: s_mov_b32 m0
424 ; EG-DAG: LDS_READ_RET
425 ; EG-DAG: LDS_READ_RET
426 ; EG-DAG: LDS_READ_RET
427 ; EG-DAG: LDS_READ_RET
428 ; EG-DAG: LDS_READ_RET
429 ; EG-DAG: LDS_READ_RET
430 ; EG-DAG: LDS_READ_RET
431 ; EG-DAG: LDS_READ_RET
432 ; EG-DAG: LDS_READ_RET
433 ; EG-DAG: LDS_READ_RET
434 ; EG-DAG: LDS_READ_RET
435 ; EG-DAG: LDS_READ_RET
436 ; EG-DAG: LDS_READ_RET
437 ; EG-DAG: LDS_READ_RET
438 ; EG-DAG: LDS_READ_RET
439 ; EG-DAG: LDS_READ_RET
440 define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
441 %load = load <64 x i8>, ptr addrspace(3) %in
442 %ext = sext <64 x i8> %load to <64 x i32>
443 store <64 x i32> %ext, ptr addrspace(3) %out
447 ; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
448 ; SICIVI: s_mov_b32 m0
451 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
452 ; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
453 ; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]]
455 ; EG: LDS_UBYTE_READ_RET
456 ; EG: MOV {{.*}}, literal
458 define amdgpu_kernel void @local_zextload_i8_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
459 %a = load i8, ptr addrspace(3) %in
460 %ext = zext i8 %a to i64
461 store i64 %ext, ptr addrspace(3) %out
465 ; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
466 ; SICIVI: s_mov_b32 m0
469 ; GCN: ds_read_i8 v[[LO:[0-9]+]],
470 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
472 ; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]]
474 ; EG: LDS_UBYTE_READ_RET
478 define amdgpu_kernel void @local_sextload_i8_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
479 %a = load i8, ptr addrspace(3) %in
480 %ext = sext i8 %a to i64
481 store i64 %ext, ptr addrspace(3) %out
485 ; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
486 ; SICIVI: s_mov_b32 m0
489 ; EG: LDS_UBYTE_READ_RET
490 ; EG: MOV {{.*}}, literal
493 define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
494 %load = load <1 x i8>, ptr addrspace(3) %in
495 %ext = zext <1 x i8> %load to <1 x i64>
496 store <1 x i64> %ext, ptr addrspace(3) %out
500 ; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
501 ; SICIVI: s_mov_b32 m0
504 ; EG: LDS_UBYTE_READ_RET
508 define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
509 %load = load <1 x i8>, ptr addrspace(3) %in
510 %ext = sext <1 x i8> %load to <1 x i64>
511 store <1 x i64> %ext, ptr addrspace(3) %out
515 ; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
516 ; SICIVI: s_mov_b32 m0
519 ; EG: LDS_USHORT_READ_RET
520 define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
521 %load = load <2 x i8>, ptr addrspace(3) %in
522 %ext = zext <2 x i8> %load to <2 x i64>
523 store <2 x i64> %ext, ptr addrspace(3) %out
527 ; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
528 ; SICIVI: s_mov_b32 m0
531 ; EG: LDS_USHORT_READ_RET
534 define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
535 %load = load <2 x i8>, ptr addrspace(3) %in
536 %ext = sext <2 x i8> %load to <2 x i64>
537 store <2 x i64> %ext, ptr addrspace(3) %out
541 ; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
542 ; SICIVI: s_mov_b32 m0
546 define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
547 %load = load <4 x i8>, ptr addrspace(3) %in
548 %ext = zext <4 x i8> %load to <4 x i64>
549 store <4 x i64> %ext, ptr addrspace(3) %out
553 ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
554 ; SICIVI: s_mov_b32 m0
558 define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
559 %load = load <4 x i8>, ptr addrspace(3) %in
560 %ext = sext <4 x i8> %load to <4 x i64>
561 store <4 x i64> %ext, ptr addrspace(3) %out
565 ; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
566 ; SICIVI: s_mov_b32 m0
571 define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
572 %load = load <8 x i8>, ptr addrspace(3) %in
573 %ext = zext <8 x i8> %load to <8 x i64>
574 store <8 x i64> %ext, ptr addrspace(3) %out
578 ; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
579 ; SICIVI: s_mov_b32 m0
593 define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
594 %load = load <8 x i8>, ptr addrspace(3) %in
595 %ext = sext <8 x i8> %load to <8 x i64>
596 store <8 x i64> %ext, ptr addrspace(3) %out
600 ; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
601 ; SICIVI: s_mov_b32 m0
608 define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
609 %load = load <16 x i8>, ptr addrspace(3) %in
610 %ext = zext <16 x i8> %load to <16 x i64>
611 store <16 x i64> %ext, ptr addrspace(3) %out
615 ; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
616 ; SICIVI: s_mov_b32 m0
623 define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
624 %load = load <16 x i8>, ptr addrspace(3) %in
625 %ext = sext <16 x i8> %load to <16 x i64>
626 store <16 x i64> %ext, ptr addrspace(3) %out
630 ; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
631 ; SICIVI: s_mov_b32 m0
642 define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
643 %load = load <32 x i8>, ptr addrspace(3) %in
644 %ext = zext <32 x i8> %load to <32 x i64>
645 store <32 x i64> %ext, ptr addrspace(3) %out
649 ; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
650 ; SICIVI: s_mov_b32 m0
661 define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
662 %load = load <32 x i8>, ptr addrspace(3) %in
663 %ext = sext <32 x i8> %load to <32 x i64>
664 store <32 x i64> %ext, ptr addrspace(3) %out
668 ; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
669 ; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
670 ; %load = load <64 x i8>, ptr addrspace(3) %in
671 ; %ext = zext <64 x i8> %load to <64 x i64>
672 ; store <64 x i64> %ext, ptr addrspace(3) %out
676 ; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
677 ; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
678 ; %load = load <64 x i8>, ptr addrspace(3) %in
679 ; %ext = sext <64 x i8> %load to <64 x i64>
680 ; store <64 x i64> %ext, ptr addrspace(3) %out
684 ; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
685 ; SICIVI: s_mov_b32 m0
687 ; GCN: ds_read_u8 v[[VAL:[0-9]+]],
688 ; GCN: ds_write_b16 v[[VAL:[0-9]+]]
690 ; EG: LDS_UBYTE_READ_RET
691 ; EG: LDS_SHORT_WRITE
692 define amdgpu_kernel void @local_zextload_i8_to_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
693 %a = load i8, ptr addrspace(3) %in
694 %ext = zext i8 %a to i16
695 store i16 %ext, ptr addrspace(3) %out
699 ; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
700 ; SICIVI: s_mov_b32 m0
702 ; GCN: ds_read_i8 v[[VAL:[0-9]+]],
703 ; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
705 ; EG: LDS_UBYTE_READ_RET
707 ; EG: LDS_SHORT_WRITE
708 define amdgpu_kernel void @local_sextload_i8_to_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
709 %a = load i8, ptr addrspace(3) %in
710 %ext = sext i8 %a to i16
711 store i16 %ext, ptr addrspace(3) %out
715 ; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
716 ; SICIVI: s_mov_b32 m0
719 ; EG: LDS_UBYTE_READ_RET
720 ; EG: LDS_SHORT_WRITE
721 define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
722 %load = load <1 x i8>, ptr addrspace(3) %in
723 %ext = zext <1 x i8> %load to <1 x i16>
724 store <1 x i16> %ext, ptr addrspace(3) %out
728 ; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
729 ; SICIVI: s_mov_b32 m0
732 ; EG: LDS_UBYTE_READ_RET
734 ; EG: LDS_SHORT_WRITE
735 define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
736 %load = load <1 x i8>, ptr addrspace(3) %in
737 %ext = sext <1 x i8> %load to <1 x i16>
738 store <1 x i16> %ext, ptr addrspace(3) %out
742 ; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
743 ; SICIVI: s_mov_b32 m0
746 ; EG: LDS_USHORT_READ_RET
748 define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
749 %load = load <2 x i8>, ptr addrspace(3) %in
750 %ext = zext <2 x i8> %load to <2 x i16>
751 store <2 x i16> %ext, ptr addrspace(3) %out
755 ; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
756 ; SICIVI: s_mov_b32 m0
759 ; EG: LDS_USHORT_READ_RET
763 define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
764 %load = load <2 x i8>, ptr addrspace(3) %in
765 %ext = sext <2 x i8> %load to <2 x i16>
766 store <2 x i16> %ext, ptr addrspace(3) %out
770 ; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
771 ; SICIVI: s_mov_b32 m0
777 define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
778 %load = load <4 x i8>, ptr addrspace(3) %in
779 %ext = zext <4 x i8> %load to <4 x i16>
780 store <4 x i16> %ext, ptr addrspace(3) %out
784 ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
785 ; SICIVI: s_mov_b32 m0
789 ; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
796 define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
797 %load = load <4 x i8>, ptr addrspace(3) %in
798 %ext = sext <4 x i8> %load to <4 x i16>
799 store <4 x i16> %ext, ptr addrspace(3) %out
803 ; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
804 ; SICIVI: s_mov_b32 m0
813 define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
814 %load = load <8 x i8>, ptr addrspace(3) %in
815 %ext = zext <8 x i8> %load to <8 x i16>
816 store <8 x i16> %ext, ptr addrspace(3) %out
820 ; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
821 ; SICIVI: s_mov_b32 m0
826 ; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
839 define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
840 %load = load <8 x i8>, ptr addrspace(3) %in
841 %ext = sext <8 x i8> %load to <8 x i16>
842 store <8 x i16> %ext, ptr addrspace(3) %out
846 ; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
847 ; SICIVI: s_mov_b32 m0
862 define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
863 %load = load <16 x i8>, ptr addrspace(3) %in
864 %ext = zext <16 x i8> %load to <16 x i16>
865 store <16 x i16> %ext, ptr addrspace(3) %out
869 ; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
870 ; SICIVI: s_mov_b32 m0
877 ; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
902 define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
903 %load = load <16 x i8>, ptr addrspace(3) %in
904 %ext = sext <16 x i8> %load to <16 x i16>
905 store <16 x i16> %ext, ptr addrspace(3) %out
909 ; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
910 ; SICIVI: s_mov_b32 m0
937 define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
938 %load = load <32 x i8>, ptr addrspace(3) %in
939 %ext = zext <32 x i8> %load to <32 x i16>
940 store <32 x i16> %ext, ptr addrspace(3) %out
944 ; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
945 ; SICIVI: s_mov_b32 m0
956 ; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
1001 define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
1002 %load = load <32 x i8>, ptr addrspace(3) %in
1003 %ext = sext <32 x i8> %load to <32 x i16>
1004 store <32 x i16> %ext, ptr addrspace(3) %out
1008 ; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
1009 ; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
1010 ; %load = load <64 x i8>, ptr addrspace(3) %in
1011 ; %ext = zext <64 x i8> %load to <64 x i16>
1012 ; store <64 x i16> %ext, ptr addrspace(3) %out
1016 ; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
1017 ; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
1018 ; %load = load <64 x i8>, ptr addrspace(3) %in
1019 ; %ext = sext <64 x i8> %load to <64 x i16>
1020 ; store <64 x i16> %ext, ptr addrspace(3) %out
1024 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
1025 ; FUNC-LABEL: {{^}}local_v16i8_to_128:
1027 ; SI-NOT: ds_read_b128
1028 ; SI-NOT: ds_write_b128
1030 ; CIVI: ds_read_b128
1031 ; CIVI: ds_write_b128
1037 define amdgpu_kernel void @local_v16i8_to_128(ptr addrspace(3) %out, ptr addrspace(3) %in) {
1038 %ld = load <16 x i8>, ptr addrspace(3) %in, align 16
1039 store <16 x i8> %ld, ptr addrspace(3) %out, align 16
1043 attributes #0 = { nounwind }