1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 ; Check that we correctly scale the split part indirect offsets by VSCALE.
6 define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) {
7 ; CHECK-LABEL: callee_scalable_vector_split_indirect:
9 ; CHECK-NEXT: csrr a1, vlenb
10 ; CHECK-NEXT: slli a1, a1, 3
11 ; CHECK-NEXT: add a1, a0, a1
12 ; CHECK-NEXT: vl8re32.v v24, (a0)
13 ; CHECK-NEXT: vl8re32.v v0, (a1)
14 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
15 ; CHECK-NEXT: vadd.vv v8, v8, v24
16 ; CHECK-NEXT: vadd.vv v16, v16, v0
18 %a = add <vscale x 32 x i32> %x, %y
19 ret <vscale x 32 x i32> %a
22 ; Call the function above. Check that we set the arguments correctly.
23 define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x i32> %x) {
24 ; RV32-LABEL: caller_scalable_vector_split_indirect:
26 ; RV32-NEXT: addi sp, sp, -144
27 ; RV32-NEXT: .cfi_def_cfa_offset 144
28 ; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
29 ; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
30 ; RV32-NEXT: .cfi_offset ra, -4
31 ; RV32-NEXT: .cfi_offset s0, -8
32 ; RV32-NEXT: addi s0, sp, 144
33 ; RV32-NEXT: .cfi_def_cfa s0, 0
34 ; RV32-NEXT: csrr a0, vlenb
35 ; RV32-NEXT: slli a0, a0, 4
36 ; RV32-NEXT: sub sp, sp, a0
37 ; RV32-NEXT: andi sp, sp, -128
38 ; RV32-NEXT: addi a0, sp, 128
39 ; RV32-NEXT: csrr a1, vlenb
40 ; RV32-NEXT: vs8r.v v8, (a0)
41 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma
42 ; RV32-NEXT: vmv.v.i v8, 0
43 ; RV32-NEXT: slli a1, a1, 3
44 ; RV32-NEXT: add a1, a0, a1
45 ; RV32-NEXT: addi a0, sp, 128
46 ; RV32-NEXT: vs8r.v v16, (a1)
47 ; RV32-NEXT: vmv.v.i v16, 0
48 ; RV32-NEXT: call callee_scalable_vector_split_indirect
49 ; RV32-NEXT: addi sp, s0, -144
50 ; RV32-NEXT: .cfi_def_cfa sp, 144
51 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
52 ; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
53 ; RV32-NEXT: .cfi_restore ra
54 ; RV32-NEXT: .cfi_restore s0
55 ; RV32-NEXT: addi sp, sp, 144
56 ; RV32-NEXT: .cfi_def_cfa_offset 0
59 ; RV64-LABEL: caller_scalable_vector_split_indirect:
61 ; RV64-NEXT: addi sp, sp, -144
62 ; RV64-NEXT: .cfi_def_cfa_offset 144
63 ; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
64 ; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
65 ; RV64-NEXT: .cfi_offset ra, -8
66 ; RV64-NEXT: .cfi_offset s0, -16
67 ; RV64-NEXT: addi s0, sp, 144
68 ; RV64-NEXT: .cfi_def_cfa s0, 0
69 ; RV64-NEXT: csrr a0, vlenb
70 ; RV64-NEXT: slli a0, a0, 4
71 ; RV64-NEXT: sub sp, sp, a0
72 ; RV64-NEXT: andi sp, sp, -128
73 ; RV64-NEXT: addi a0, sp, 128
74 ; RV64-NEXT: csrr a1, vlenb
75 ; RV64-NEXT: vs8r.v v8, (a0)
76 ; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma
77 ; RV64-NEXT: vmv.v.i v8, 0
78 ; RV64-NEXT: slli a1, a1, 3
79 ; RV64-NEXT: add a1, a0, a1
80 ; RV64-NEXT: addi a0, sp, 128
81 ; RV64-NEXT: vs8r.v v16, (a1)
82 ; RV64-NEXT: vmv.v.i v16, 0
83 ; RV64-NEXT: call callee_scalable_vector_split_indirect
84 ; RV64-NEXT: addi sp, s0, -144
85 ; RV64-NEXT: .cfi_def_cfa sp, 144
86 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
87 ; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
88 ; RV64-NEXT: .cfi_restore ra
89 ; RV64-NEXT: .cfi_restore s0
90 ; RV64-NEXT: addi sp, sp, 144
91 ; RV64-NEXT: .cfi_def_cfa_offset 0
94 %a = call <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> %x)
95 ret <vscale x 32 x i32> %a
98 define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return() {
99 ; RV32-LABEL: caller_tuple_return:
101 ; RV32-NEXT: addi sp, sp, -16
102 ; RV32-NEXT: .cfi_def_cfa_offset 16
103 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
104 ; RV32-NEXT: .cfi_offset ra, -4
105 ; RV32-NEXT: call callee_tuple_return
106 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
107 ; RV32-NEXT: vmv2r.v v6, v8
108 ; RV32-NEXT: vmv2r.v v8, v10
109 ; RV32-NEXT: vmv2r.v v10, v6
110 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
111 ; RV32-NEXT: .cfi_restore ra
112 ; RV32-NEXT: addi sp, sp, 16
113 ; RV32-NEXT: .cfi_def_cfa_offset 0
116 ; RV64-LABEL: caller_tuple_return:
118 ; RV64-NEXT: addi sp, sp, -16
119 ; RV64-NEXT: .cfi_def_cfa_offset 16
120 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
121 ; RV64-NEXT: .cfi_offset ra, -8
122 ; RV64-NEXT: call callee_tuple_return
123 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
124 ; RV64-NEXT: vmv2r.v v6, v8
125 ; RV64-NEXT: vmv2r.v v8, v10
126 ; RV64-NEXT: vmv2r.v v10, v6
127 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
128 ; RV64-NEXT: .cfi_restore ra
129 ; RV64-NEXT: addi sp, sp, 16
130 ; RV64-NEXT: .cfi_def_cfa_offset 0
132 %a = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return()
133 %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 0)
134 %c = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 1)
135 %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %c, i32 0)
136 %e = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d, <vscale x 4 x i32> %b, i32 1)
137 ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %e
140 declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return()
142 define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x) {
143 ; RV32-LABEL: caller_tuple_argument:
145 ; RV32-NEXT: addi sp, sp, -16
146 ; RV32-NEXT: .cfi_def_cfa_offset 16
147 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
148 ; RV32-NEXT: .cfi_offset ra, -4
149 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
150 ; RV32-NEXT: vmv2r.v v6, v8
151 ; RV32-NEXT: vmv2r.v v8, v10
152 ; RV32-NEXT: vmv2r.v v10, v6
153 ; RV32-NEXT: call callee_tuple_argument
154 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
155 ; RV32-NEXT: .cfi_restore ra
156 ; RV32-NEXT: addi sp, sp, 16
157 ; RV32-NEXT: .cfi_def_cfa_offset 0
160 ; RV64-LABEL: caller_tuple_argument:
162 ; RV64-NEXT: addi sp, sp, -16
163 ; RV64-NEXT: .cfi_def_cfa_offset 16
164 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
165 ; RV64-NEXT: .cfi_offset ra, -8
166 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
167 ; RV64-NEXT: vmv2r.v v6, v8
168 ; RV64-NEXT: vmv2r.v v8, v10
169 ; RV64-NEXT: vmv2r.v v10, v6
170 ; RV64-NEXT: call callee_tuple_argument
171 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
172 ; RV64-NEXT: .cfi_restore ra
173 ; RV64-NEXT: addi sp, sp, 16
174 ; RV64-NEXT: .cfi_def_cfa_offset 0
176 %a = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 0)
177 %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 1)
178 %c = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %b, i32 0)
179 %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %c, <vscale x 4 x i32> %a, i32 1)
180 call void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d)
184 declare void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2))