1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
3 ; RUN: | FileCheck %s -check-prefix=RV64IV
5 declare <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
12 define <vscale x 64 x i8> @callee(<vscale x 64 x i8> %arg0, <vscale x 64 x i8> %arg1, <vscale x 64 x i8> %arg2) {
13 ; RV64IV-LABEL: callee:
15 ; RV64IV-NEXT: vl8r.v v24, (a0)
16 ; RV64IV-NEXT: li a0, 1024
17 ; RV64IV-NEXT: vsetvli zero, a0, e8, m8, tu, ma
18 ; RV64IV-NEXT: vmacc.vv v8, v16, v24
20 %ret = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
21 <vscale x 64 x i8> %arg0,
22 <vscale x 64 x i8> %arg1,
23 <vscale x 64 x i8> %arg2, i64 1024, i64 0)
24 ret <vscale x 64 x i8> %ret
27 define <vscale x 64 x i8> @caller() {
28 ; RV64IV-LABEL: caller:
30 ; RV64IV-NEXT: addi sp, sp, -80
31 ; RV64IV-NEXT: .cfi_def_cfa_offset 80
32 ; RV64IV-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
33 ; RV64IV-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
34 ; RV64IV-NEXT: .cfi_offset ra, -8
35 ; RV64IV-NEXT: .cfi_offset s0, -16
36 ; RV64IV-NEXT: addi s0, sp, 80
37 ; RV64IV-NEXT: .cfi_def_cfa s0, 0
38 ; RV64IV-NEXT: csrr a0, vlenb
39 ; RV64IV-NEXT: slli a0, a0, 5
40 ; RV64IV-NEXT: sub sp, sp, a0
41 ; RV64IV-NEXT: andi sp, sp, -64
42 ; RV64IV-NEXT: csrr a0, vlenb
43 ; RV64IV-NEXT: li a1, 24
44 ; RV64IV-NEXT: mul a0, a0, a1
45 ; RV64IV-NEXT: add a0, sp, a0
46 ; RV64IV-NEXT: addi a0, a0, 64
47 ; RV64IV-NEXT: vl8r.v v8, (a0)
48 ; RV64IV-NEXT: csrr a0, vlenb
49 ; RV64IV-NEXT: slli a0, a0, 4
50 ; RV64IV-NEXT: add a0, sp, a0
51 ; RV64IV-NEXT: addi a0, a0, 64
52 ; RV64IV-NEXT: vl8r.v v16, (a0)
53 ; RV64IV-NEXT: csrr a0, vlenb
54 ; RV64IV-NEXT: slli a0, a0, 3
55 ; RV64IV-NEXT: add a0, sp, a0
56 ; RV64IV-NEXT: addi a0, a0, 64
57 ; RV64IV-NEXT: vl8r.v v24, (a0)
58 ; RV64IV-NEXT: addi a1, sp, 64
59 ; RV64IV-NEXT: addi a0, sp, 64
60 ; RV64IV-NEXT: vs8r.v v24, (a1)
61 ; RV64IV-NEXT: call callee
62 ; RV64IV-NEXT: addi sp, s0, -80
63 ; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
64 ; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
65 ; RV64IV-NEXT: addi sp, sp, 80
67 %local0 = alloca <vscale x 64 x i8>
68 %local1 = alloca <vscale x 64 x i8>
69 %local2 = alloca <vscale x 64 x i8>
70 %arg0 = load volatile <vscale x 64 x i8>, ptr %local0
71 %arg1 = load volatile <vscale x 64 x i8>, ptr %local1
72 %arg2 = load volatile <vscale x 64 x i8>, ptr %local2
73 %ret = call <vscale x 64 x i8> @callee(<vscale x 64 x i8> %arg0,
74 <vscale x 64 x i8> %arg1,
75 <vscale x 64 x i8> %arg2)
76 ret <vscale x 64 x i8> %ret