1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
3 ; RUN: < %s | FileCheck %s
5 define dso_local void @lots_args(i32 signext %x0, i32 signext %x1, <vscale x 16 x i32> %v0, i32 signext %x2, i32 signext %x3, i32 signext %x4, i32 signext %x5, i32 signext %x6, i32 %x7, i32 %x8, i32 %x9) #0 {
6 ; CHECK-LABEL: lots_args:
7 ; CHECK: # %bb.0: # %entry
8 ; CHECK-NEXT: addi sp, sp, -64
9 ; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
10 ; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
11 ; CHECK-NEXT: addi s0, sp, 64
12 ; CHECK-NEXT: csrr t0, vlenb
13 ; CHECK-NEXT: slli t0, t0, 3
14 ; CHECK-NEXT: sub sp, sp, t0
15 ; CHECK-NEXT: ld t0, 8(s0)
16 ; CHECK-NEXT: ld t1, 0(s0)
17 ; CHECK-NEXT: sw a0, -28(s0)
18 ; CHECK-NEXT: sw a1, -32(s0)
19 ; CHECK-NEXT: csrr a0, vlenb
20 ; CHECK-NEXT: slli a0, a0, 3
21 ; CHECK-NEXT: sub a0, s0, a0
22 ; CHECK-NEXT: addi a0, a0, -64
23 ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
24 ; CHECK-NEXT: vse32.v v8, (a0)
25 ; CHECK-NEXT: sw a2, -36(s0)
26 ; CHECK-NEXT: sw a3, -40(s0)
27 ; CHECK-NEXT: sw a4, -44(s0)
28 ; CHECK-NEXT: sw a5, -48(s0)
29 ; CHECK-NEXT: sw a6, -52(s0)
30 ; CHECK-NEXT: sw a7, -56(s0)
31 ; CHECK-NEXT: sw t1, -60(s0)
32 ; CHECK-NEXT: sw t0, -64(s0)
33 ; CHECK-NEXT: addi sp, s0, -64
34 ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
35 ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
36 ; CHECK-NEXT: addi sp, sp, 64
39 %x0.addr = alloca i32, align 4
40 %x1.addr = alloca i32, align 4
41 %v0.addr = alloca <vscale x 16 x i32>, align 4
42 %x2.addr = alloca i32, align 4
43 %x3.addr = alloca i32, align 4
44 %x4.addr = alloca i32, align 4
45 %x5.addr = alloca i32, align 4
46 %x6.addr = alloca i32, align 4
47 %x7.addr = alloca i32, align 4
48 %x8.addr = alloca i32, align 4
49 %x9.addr = alloca i32, align 4
50 store i32 %x0, ptr %x0.addr, align 4
51 store i32 %x1, ptr %x1.addr, align 4
52 store <vscale x 16 x i32> %v0, ptr %v0.addr, align 4
53 store i32 %x2, ptr %x2.addr, align 4
54 store i32 %x3, ptr %x3.addr, align 4
55 store i32 %x4, ptr %x4.addr, align 4
56 store i32 %x5, ptr %x5.addr, align 4
57 store i32 %x6, ptr %x6.addr, align 4
58 store i32 %x7, ptr %x7.addr, align 4
59 store i32 %x8, ptr %x8.addr, align 4
60 store i32 %x9, ptr %x9.addr, align 4
64 define dso_local signext i32 @main() #0 {
66 ; CHECK: # %bb.0: # %entry
67 ; CHECK-NEXT: addi sp, sp, -112
68 ; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
69 ; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
70 ; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
71 ; CHECK-NEXT: addi s0, sp, 112
72 ; CHECK-NEXT: csrr a0, vlenb
73 ; CHECK-NEXT: slli a0, a0, 3
74 ; CHECK-NEXT: sub sp, sp, a0
75 ; CHECK-NEXT: sw zero, -36(s0)
76 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
77 ; CHECK-NEXT: vmv.v.i v8, 0
78 ; CHECK-NEXT: addi a0, s0, -64
79 ; CHECK-NEXT: vse64.v v8, (a0)
80 ; CHECK-NEXT: vsetivli a1, 4, e32, m8, ta, ma
81 ; CHECK-NEXT: sd a1, -72(s0)
82 ; CHECK-NEXT: ld a1, -72(s0)
83 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
84 ; CHECK-NEXT: vle32.v v8, (a0)
85 ; CHECK-NEXT: csrr s1, vlenb
86 ; CHECK-NEXT: slli s1, s1, 3
87 ; CHECK-NEXT: sub s1, s0, s1
88 ; CHECK-NEXT: addi s1, s1, -112
89 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
90 ; CHECK-NEXT: vse32.v v8, (s1)
91 ; CHECK-NEXT: li a0, 1
92 ; CHECK-NEXT: sw a0, -76(s0)
93 ; CHECK-NEXT: sw a0, -80(s0)
94 ; CHECK-NEXT: sw a0, -84(s0)
95 ; CHECK-NEXT: sw a0, -88(s0)
96 ; CHECK-NEXT: sw a0, -92(s0)
97 ; CHECK-NEXT: sw a0, -96(s0)
98 ; CHECK-NEXT: sw a0, -100(s0)
99 ; CHECK-NEXT: sw a0, -104(s0)
100 ; CHECK-NEXT: sw a0, -108(s0)
101 ; CHECK-NEXT: sw a0, -112(s0)
102 ; CHECK-NEXT: lw a0, -76(s0)
103 ; CHECK-NEXT: lw a1, -80(s0)
104 ; CHECK-NEXT: vle32.v v8, (s1)
105 ; CHECK-NEXT: lw a2, -84(s0)
106 ; CHECK-NEXT: lw a3, -88(s0)
107 ; CHECK-NEXT: lw a4, -92(s0)
108 ; CHECK-NEXT: lw a5, -96(s0)
109 ; CHECK-NEXT: lw a6, -100(s0)
110 ; CHECK-NEXT: lw a7, -104(s0)
111 ; CHECK-NEXT: lw t0, -108(s0)
112 ; CHECK-NEXT: lw t1, -112(s0)
113 ; CHECK-NEXT: addi sp, sp, -16
114 ; CHECK-NEXT: sd t1, 8(sp)
115 ; CHECK-NEXT: sd t0, 0(sp)
116 ; CHECK-NEXT: call lots_args
117 ; CHECK-NEXT: addi sp, sp, 16
118 ; CHECK-NEXT: lw a0, -76(s0)
119 ; CHECK-NEXT: lw a1, -80(s0)
120 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
121 ; CHECK-NEXT: vle32.v v8, (s1)
122 ; CHECK-NEXT: lw a2, -84(s0)
123 ; CHECK-NEXT: lw a3, -88(s0)
124 ; CHECK-NEXT: lw a4, -92(s0)
125 ; CHECK-NEXT: lw a5, -96(s0)
126 ; CHECK-NEXT: lw a6, -100(s0)
127 ; CHECK-NEXT: lw a7, -104(s0)
128 ; CHECK-NEXT: lw t0, -108(s0)
129 ; CHECK-NEXT: lw t1, -112(s0)
130 ; CHECK-NEXT: addi sp, sp, -16
131 ; CHECK-NEXT: sd t1, 8(sp)
132 ; CHECK-NEXT: sd t0, 0(sp)
133 ; CHECK-NEXT: call lots_args
134 ; CHECK-NEXT: addi sp, sp, 16
135 ; CHECK-NEXT: li a0, 0
136 ; CHECK-NEXT: addi sp, s0, -112
137 ; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
138 ; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
139 ; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
140 ; CHECK-NEXT: addi sp, sp, 112
143 %retval = alloca i32, align 4
144 %input = alloca [4 x i32], align 4
145 %vl = alloca i64, align 8
146 %v0 = alloca <vscale x 16 x i32>, align 4
147 %x0 = alloca i32, align 4
148 %x1 = alloca i32, align 4
149 %x2 = alloca i32, align 4
150 %x3 = alloca i32, align 4
151 %x4 = alloca i32, align 4
152 %x5 = alloca i32, align 4
153 %x6 = alloca i32, align 4
154 %x7 = alloca i32, align 4
155 %x8 = alloca i32, align 4
156 %x9 = alloca i32, align 4
157 store i32 0, ptr %retval, align 4
158 call void @llvm.memset.p0.i64(ptr align 4 %input, i8 0, i64 16, i1 false)
159 %0 = call i64 @llvm.riscv.vsetvli.i64(i64 4, i64 2, i64 3)
160 store i64 %0, ptr %vl, align 8
161 %1 = load i64, ptr %vl, align 8
162 %2 = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32> undef, ptr %input, i64 %1)
163 store <vscale x 16 x i32> %2, ptr %v0, align 4
164 store i32 1, ptr %x0, align 4
165 store i32 1, ptr %x1, align 4
166 store i32 1, ptr %x2, align 4
167 store i32 1, ptr %x3, align 4
168 store i32 1, ptr %x4, align 4
169 store i32 1, ptr %x5, align 4
170 store i32 1, ptr %x6, align 4
171 store i32 1, ptr %x7, align 4
172 store i32 1, ptr %x8, align 4
173 store i32 1, ptr %x9, align 4
174 %3 = load i32, ptr %x0, align 4
175 %4 = load i32, ptr %x1, align 4
176 %5 = load <vscale x 16 x i32>, ptr %v0, align 4
177 %6 = load i32, ptr %x2, align 4
178 %7 = load i32, ptr %x3, align 4
179 %8 = load i32, ptr %x4, align 4
180 %9 = load i32, ptr %x5, align 4
181 %10 = load i32, ptr %x6, align 4
182 %11 = load i32, ptr %x7, align 4
183 %12 = load i32, ptr %x8, align 4
184 %13 = load i32, ptr %x9, align 4
185 call void @lots_args(i32 signext %3, i32 signext %4, <vscale x 16 x i32> %5, i32 signext %6, i32 signext %7, i32 signext %8, i32 signext %9, i32 signext %10, i32 %11, i32 %12, i32 %13)
186 %14 = load i32, ptr %x0, align 4
187 %15 = load i32, ptr %x1, align 4
188 %16 = load <vscale x 16 x i32>, ptr %v0, align 4
189 %17 = load i32, ptr %x2, align 4
190 %18 = load i32, ptr %x3, align 4
191 %19 = load i32, ptr %x4, align 4
192 %20 = load i32, ptr %x5, align 4
193 %21 = load i32, ptr %x6, align 4
194 %22 = load i32, ptr %x7, align 4
195 %23 = load i32, ptr %x8, align 4
196 %24 = load i32, ptr %x9, align 4
197 call void @lots_args(i32 signext %14, i32 signext %15, <vscale x 16 x i32> %16, i32 signext %17, i32 signext %18, i32 signext %19, i32 signext %20, i32 signext %21, i32 %22, i32 %23, i32 %24)
201 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
203 declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
205 declare <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32>, ptr nocapture, i64)
207 attributes #0 = { noinline nounwind optnone "frame-pointer"="all" }