1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
7 define fastcc <vscale x 4 x i8> @ret_nxv4i8(<vscale x 4 x i8>* %p) {
8 ; CHECK-LABEL: ret_nxv4i8:
10 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
11 ; CHECK-NEXT: vle8.v v8, (a0)
13 %v = load <vscale x 4 x i8>, <vscale x 4 x i8>* %p
14 ret <vscale x 4 x i8> %v
17 define fastcc <vscale x 4 x i32> @ret_nxv4i32(<vscale x 4 x i32>* %p) {
18 ; CHECK-LABEL: ret_nxv4i32:
20 ; CHECK-NEXT: vl2re32.v v8, (a0)
22 %v = load <vscale x 4 x i32>, <vscale x 4 x i32>* %p
23 ret <vscale x 4 x i32> %v
26 define fastcc <vscale x 8 x i32> @ret_nxv8i32(<vscale x 8 x i32>* %p) {
27 ; CHECK-LABEL: ret_nxv8i32:
29 ; CHECK-NEXT: vl4re32.v v8, (a0)
31 %v = load <vscale x 8 x i32>, <vscale x 8 x i32>* %p
32 ret <vscale x 8 x i32> %v
35 define fastcc <vscale x 16 x i64> @ret_nxv16i64(<vscale x 16 x i64>* %p) {
36 ; CHECK-LABEL: ret_nxv16i64:
38 ; CHECK-NEXT: csrr a1, vlenb
39 ; CHECK-NEXT: slli a1, a1, 3
40 ; CHECK-NEXT: add a1, a0, a1
41 ; CHECK-NEXT: vl8re64.v v16, (a1)
42 ; CHECK-NEXT: vl8re64.v v8, (a0)
44 %v = load <vscale x 16 x i64>, <vscale x 16 x i64>* %p
45 ret <vscale x 16 x i64> %v
48 define fastcc <vscale x 8 x i1> @ret_mask_nxv8i1(<vscale x 8 x i1>* %p) {
49 ; CHECK-LABEL: ret_mask_nxv8i1:
51 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
52 ; CHECK-NEXT: vlm.v v0, (a0)
54 %v = load <vscale x 8 x i1>, <vscale x 8 x i1>* %p
55 ret <vscale x 8 x i1> %v
58 define fastcc <vscale x 32 x i1> @ret_mask_nxv32i1(<vscale x 32 x i1>* %p) {
59 ; CHECK-LABEL: ret_mask_nxv32i1:
61 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
62 ; CHECK-NEXT: vlm.v v0, (a0)
64 %v = load <vscale x 32 x i1>, <vscale x 32 x i1>* %p
65 ret <vscale x 32 x i1> %v
68 ; Return the vector via registers v8-v23
69 define fastcc <vscale x 64 x i32> @ret_split_nxv64i32(<vscale x 64 x i32>* %x) {
70 ; CHECK-LABEL: ret_split_nxv64i32:
72 ; CHECK-NEXT: csrr a2, vlenb
73 ; CHECK-NEXT: slli a3, a2, 3
74 ; CHECK-NEXT: add a4, a1, a3
75 ; CHECK-NEXT: vl8re32.v v8, (a4)
76 ; CHECK-NEXT: slli a4, a2, 4
77 ; CHECK-NEXT: li a5, 24
78 ; CHECK-NEXT: mul a2, a2, a5
79 ; CHECK-NEXT: add a5, a1, a4
80 ; CHECK-NEXT: vl8re32.v v16, (a1)
81 ; CHECK-NEXT: add a1, a1, a2
82 ; CHECK-NEXT: vl8re32.v v24, (a1)
83 ; CHECK-NEXT: vl8re32.v v0, (a5)
84 ; CHECK-NEXT: vs8r.v v16, (a0)
85 ; CHECK-NEXT: add a2, a0, a2
86 ; CHECK-NEXT: vs8r.v v24, (a2)
87 ; CHECK-NEXT: add a4, a0, a4
88 ; CHECK-NEXT: vs8r.v v0, (a4)
89 ; CHECK-NEXT: add a0, a0, a3
90 ; CHECK-NEXT: vs8r.v v8, (a0)
92 %v = load <vscale x 64 x i32>, <vscale x 64 x i32>* %x
93 ret <vscale x 64 x i32> %v
96 ; Return the vector fully via the stack
97 define fastcc <vscale x 128 x i32> @ret_split_nxv128i32(<vscale x 128 x i32>* %x) {
98 ; CHECK-LABEL: ret_split_nxv128i32:
100 ; CHECK-NEXT: addi sp, sp, -16
101 ; CHECK-NEXT: .cfi_def_cfa_offset 16
102 ; CHECK-NEXT: csrr a2, vlenb
103 ; CHECK-NEXT: slli a2, a2, 5
104 ; CHECK-NEXT: sub sp, sp, a2
105 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
106 ; CHECK-NEXT: csrr a2, vlenb
107 ; CHECK-NEXT: slli a3, a2, 3
108 ; CHECK-NEXT: add a4, a1, a3
109 ; CHECK-NEXT: vl8re32.v v8, (a4)
110 ; CHECK-NEXT: csrr a4, vlenb
111 ; CHECK-NEXT: li a5, 24
112 ; CHECK-NEXT: mul a4, a4, a5
113 ; CHECK-NEXT: add a4, sp, a4
114 ; CHECK-NEXT: addi a4, a4, 16
115 ; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
116 ; CHECK-NEXT: slli a4, a2, 4
117 ; CHECK-NEXT: add a5, a1, a4
118 ; CHECK-NEXT: vl8re32.v v8, (a5)
119 ; CHECK-NEXT: csrr a5, vlenb
120 ; CHECK-NEXT: slli a5, a5, 4
121 ; CHECK-NEXT: add a5, sp, a5
122 ; CHECK-NEXT: addi a5, a5, 16
123 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
124 ; CHECK-NEXT: li a5, 24
125 ; CHECK-NEXT: mul a5, a2, a5
126 ; CHECK-NEXT: add a6, a1, a5
127 ; CHECK-NEXT: vl8re32.v v8, (a6)
128 ; CHECK-NEXT: csrr a6, vlenb
129 ; CHECK-NEXT: slli a6, a6, 3
130 ; CHECK-NEXT: add a6, sp, a6
131 ; CHECK-NEXT: addi a6, a6, 16
132 ; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
133 ; CHECK-NEXT: slli a6, a2, 5
134 ; CHECK-NEXT: add a7, a1, a6
135 ; CHECK-NEXT: vl8re32.v v8, (a7)
136 ; CHECK-NEXT: addi a7, sp, 16
137 ; CHECK-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill
138 ; CHECK-NEXT: li a7, 40
139 ; CHECK-NEXT: mul a7, a2, a7
140 ; CHECK-NEXT: add t0, a1, a7
141 ; CHECK-NEXT: li t1, 48
142 ; CHECK-NEXT: mul t1, a2, t1
143 ; CHECK-NEXT: add t2, a1, t1
144 ; CHECK-NEXT: li t3, 56
145 ; CHECK-NEXT: mul a2, a2, t3
146 ; CHECK-NEXT: add t3, a1, a2
147 ; CHECK-NEXT: vl8re32.v v8, (a1)
148 ; CHECK-NEXT: vl8re32.v v0, (t0)
149 ; CHECK-NEXT: vl8re32.v v16, (t3)
150 ; CHECK-NEXT: vl8re32.v v24, (t2)
151 ; CHECK-NEXT: vs8r.v v8, (a0)
152 ; CHECK-NEXT: add a2, a0, a2
153 ; CHECK-NEXT: vs8r.v v16, (a2)
154 ; CHECK-NEXT: add t1, a0, t1
155 ; CHECK-NEXT: vs8r.v v24, (t1)
156 ; CHECK-NEXT: add a7, a0, a7
157 ; CHECK-NEXT: vs8r.v v0, (a7)
158 ; CHECK-NEXT: add a6, a0, a6
159 ; CHECK-NEXT: addi a1, sp, 16
160 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
161 ; CHECK-NEXT: vs8r.v v8, (a6)
162 ; CHECK-NEXT: add a5, a0, a5
163 ; CHECK-NEXT: csrr a1, vlenb
164 ; CHECK-NEXT: slli a1, a1, 3
165 ; CHECK-NEXT: add a1, sp, a1
166 ; CHECK-NEXT: addi a1, a1, 16
167 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
168 ; CHECK-NEXT: vs8r.v v8, (a5)
169 ; CHECK-NEXT: add a4, a0, a4
170 ; CHECK-NEXT: csrr a1, vlenb
171 ; CHECK-NEXT: slli a1, a1, 4
172 ; CHECK-NEXT: add a1, sp, a1
173 ; CHECK-NEXT: addi a1, a1, 16
174 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
175 ; CHECK-NEXT: vs8r.v v8, (a4)
176 ; CHECK-NEXT: add a0, a0, a3
177 ; CHECK-NEXT: csrr a1, vlenb
178 ; CHECK-NEXT: li a2, 24
179 ; CHECK-NEXT: mul a1, a1, a2
180 ; CHECK-NEXT: add a1, sp, a1
181 ; CHECK-NEXT: addi a1, a1, 16
182 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
183 ; CHECK-NEXT: vs8r.v v8, (a0)
184 ; CHECK-NEXT: csrr a0, vlenb
185 ; CHECK-NEXT: slli a0, a0, 5
186 ; CHECK-NEXT: add sp, sp, a0
187 ; CHECK-NEXT: addi sp, sp, 16
189 %v = load <vscale x 128 x i32>, <vscale x 128 x i32>* %x
190 ret <vscale x 128 x i32> %v
193 define fastcc <vscale x 4 x i8> @ret_nxv4i8_param_nxv4i8_nxv4i8(<vscale x 4 x i8> %v, <vscale x 4 x i8> %w) {
194 ; CHECK-LABEL: ret_nxv4i8_param_nxv4i8_nxv4i8:
196 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
197 ; CHECK-NEXT: vadd.vv v8, v8, v9
199 %r = add <vscale x 4 x i8> %v, %w
200 ret <vscale x 4 x i8> %r
203 define fastcc <vscale x 4 x i64> @ret_nxv4i64_param_nxv4i64_nxv4i64(<vscale x 4 x i64> %v, <vscale x 4 x i64> %w) {
204 ; CHECK-LABEL: ret_nxv4i64_param_nxv4i64_nxv4i64:
206 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
207 ; CHECK-NEXT: vadd.vv v8, v8, v12
209 %r = add <vscale x 4 x i64> %v, %w
210 ret <vscale x 4 x i64> %r
213 define fastcc <vscale x 8 x i1> @ret_nxv8i1_param_nxv8i1_nxv8i1(<vscale x 8 x i1> %v, <vscale x 8 x i1> %w) {
214 ; CHECK-LABEL: ret_nxv8i1_param_nxv8i1_nxv8i1:
216 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
217 ; CHECK-NEXT: vmxor.mm v0, v0, v8
219 %r = xor <vscale x 8 x i1> %v, %w
220 ret <vscale x 8 x i1> %r
223 define fastcc <vscale x 32 x i1> @ret_nxv32i1_param_nxv32i1_nxv32i1(<vscale x 32 x i1> %v, <vscale x 32 x i1> %w) {
224 ; CHECK-LABEL: ret_nxv32i1_param_nxv32i1_nxv32i1:
226 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
227 ; CHECK-NEXT: vmand.mm v0, v0, v8
229 %r = and <vscale x 32 x i1> %v, %w
230 ret <vscale x 32 x i1> %r
233 define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32_i32(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y, <vscale x 32 x i32> %z, i32 %w) {
234 ; CHECK-LABEL: ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32_i32:
236 ; CHECK-NEXT: addi sp, sp, -16
237 ; CHECK-NEXT: .cfi_def_cfa_offset 16
238 ; CHECK-NEXT: csrr a1, vlenb
239 ; CHECK-NEXT: slli a1, a1, 4
240 ; CHECK-NEXT: sub sp, sp, a1
241 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
242 ; CHECK-NEXT: csrr a1, vlenb
243 ; CHECK-NEXT: slli a1, a1, 3
244 ; CHECK-NEXT: add a1, sp, a1
245 ; CHECK-NEXT: addi a1, a1, 16
246 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
247 ; CHECK-NEXT: vmv8r.v v24, v8
248 ; CHECK-NEXT: csrr a1, vlenb
249 ; CHECK-NEXT: slli a1, a1, 3
250 ; CHECK-NEXT: add a3, a2, a1
251 ; CHECK-NEXT: vl8re32.v v8, (a3)
252 ; CHECK-NEXT: addi a3, sp, 16
253 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
254 ; CHECK-NEXT: add a1, a0, a1
255 ; CHECK-NEXT: vl8re32.v v0, (a0)
256 ; CHECK-NEXT: vl8re32.v v8, (a1)
257 ; CHECK-NEXT: vl8re32.v v16, (a2)
258 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
259 ; CHECK-NEXT: vadd.vv v0, v24, v0
260 ; CHECK-NEXT: csrr a0, vlenb
261 ; CHECK-NEXT: slli a0, a0, 3
262 ; CHECK-NEXT: add a0, sp, a0
263 ; CHECK-NEXT: addi a0, a0, 16
264 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
265 ; CHECK-NEXT: vadd.vv v8, v24, v8
266 ; CHECK-NEXT: addi a0, sp, 16
267 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
268 ; CHECK-NEXT: vadd.vv v8, v8, v24
269 ; CHECK-NEXT: vadd.vv v24, v0, v16
270 ; CHECK-NEXT: vadd.vx v16, v8, a4
271 ; CHECK-NEXT: vadd.vx v8, v24, a4
272 ; CHECK-NEXT: csrr a0, vlenb
273 ; CHECK-NEXT: slli a0, a0, 4
274 ; CHECK-NEXT: add sp, sp, a0
275 ; CHECK-NEXT: addi sp, sp, 16
277 %r = add <vscale x 32 x i32> %x, %y
278 %s = add <vscale x 32 x i32> %r, %z
279 %head = insertelement <vscale x 32 x i32> poison, i32 %w, i32 0
280 %splat = shufflevector <vscale x 32 x i32> %head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
281 %t = add <vscale x 32 x i32> %s, %splat
282 ret <vscale x 32 x i32> %t
285 declare <vscale x 32 x i32> @ext2(<vscale x 32 x i32>, <vscale x 32 x i32>, i32, i32)
286 declare <vscale x 32 x i32> @ext3(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i32>, i32, i32)
288 define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y, i32 %w) {
289 ; RV32-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_i32:
291 ; RV32-NEXT: addi sp, sp, -144
292 ; RV32-NEXT: .cfi_def_cfa_offset 144
293 ; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
294 ; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
295 ; RV32-NEXT: .cfi_offset ra, -4
296 ; RV32-NEXT: .cfi_offset s0, -8
297 ; RV32-NEXT: addi s0, sp, 144
298 ; RV32-NEXT: .cfi_def_cfa s0, 0
299 ; RV32-NEXT: csrr a1, vlenb
300 ; RV32-NEXT: slli a1, a1, 4
301 ; RV32-NEXT: sub sp, sp, a1
302 ; RV32-NEXT: andi sp, sp, -128
303 ; RV32-NEXT: csrr a1, vlenb
304 ; RV32-NEXT: slli a1, a1, 3
305 ; RV32-NEXT: add a3, a0, a1
306 ; RV32-NEXT: vl8re32.v v24, (a3)
307 ; RV32-NEXT: vl8re32.v v0, (a0)
308 ; RV32-NEXT: addi a0, sp, 128
309 ; RV32-NEXT: vs8r.v v8, (a0)
310 ; RV32-NEXT: add a1, a0, a1
311 ; RV32-NEXT: addi a0, sp, 128
312 ; RV32-NEXT: li a3, 2
313 ; RV32-NEXT: vs8r.v v16, (a1)
314 ; RV32-NEXT: vmv8r.v v8, v0
315 ; RV32-NEXT: vmv8r.v v16, v24
316 ; RV32-NEXT: call ext2@plt
317 ; RV32-NEXT: addi sp, s0, -144
318 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
319 ; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
320 ; RV32-NEXT: addi sp, sp, 144
323 ; RV64-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_i32:
325 ; RV64-NEXT: addi sp, sp, -144
326 ; RV64-NEXT: .cfi_def_cfa_offset 144
327 ; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
328 ; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
329 ; RV64-NEXT: .cfi_offset ra, -8
330 ; RV64-NEXT: .cfi_offset s0, -16
331 ; RV64-NEXT: addi s0, sp, 144
332 ; RV64-NEXT: .cfi_def_cfa s0, 0
333 ; RV64-NEXT: csrr a1, vlenb
334 ; RV64-NEXT: slli a1, a1, 4
335 ; RV64-NEXT: sub sp, sp, a1
336 ; RV64-NEXT: andi sp, sp, -128
337 ; RV64-NEXT: csrr a1, vlenb
338 ; RV64-NEXT: slli a1, a1, 3
339 ; RV64-NEXT: add a3, a0, a1
340 ; RV64-NEXT: vl8re32.v v24, (a3)
341 ; RV64-NEXT: vl8re32.v v0, (a0)
342 ; RV64-NEXT: addi a0, sp, 128
343 ; RV64-NEXT: vs8r.v v8, (a0)
344 ; RV64-NEXT: add a1, a0, a1
345 ; RV64-NEXT: addi a0, sp, 128
346 ; RV64-NEXT: li a3, 2
347 ; RV64-NEXT: vs8r.v v16, (a1)
348 ; RV64-NEXT: vmv8r.v v8, v0
349 ; RV64-NEXT: vmv8r.v v16, v24
350 ; RV64-NEXT: call ext2@plt
351 ; RV64-NEXT: addi sp, s0, -144
352 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
353 ; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
354 ; RV64-NEXT: addi sp, sp, 144
356 %t = call fastcc <vscale x 32 x i32> @ext2(<vscale x 32 x i32> %y, <vscale x 32 x i32> %x, i32 %w, i32 2)
357 ret <vscale x 32 x i32> %t
360 define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y, <vscale x 32 x i32> %z, i32 %w) {
361 ; RV32-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32:
363 ; RV32-NEXT: addi sp, sp, -144
364 ; RV32-NEXT: .cfi_def_cfa_offset 144
365 ; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
366 ; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
367 ; RV32-NEXT: .cfi_offset ra, -4
368 ; RV32-NEXT: .cfi_offset s0, -8
369 ; RV32-NEXT: addi s0, sp, 144
370 ; RV32-NEXT: .cfi_def_cfa s0, 0
371 ; RV32-NEXT: csrr a1, vlenb
372 ; RV32-NEXT: li a3, 48
373 ; RV32-NEXT: mul a1, a1, a3
374 ; RV32-NEXT: sub sp, sp, a1
375 ; RV32-NEXT: andi sp, sp, -128
376 ; RV32-NEXT: csrr a1, vlenb
377 ; RV32-NEXT: slli a1, a1, 3
378 ; RV32-NEXT: add a3, a2, a1
379 ; RV32-NEXT: vl8re32.v v24, (a3)
380 ; RV32-NEXT: csrr a3, vlenb
381 ; RV32-NEXT: slli a3, a3, 3
382 ; RV32-NEXT: add a3, sp, a3
383 ; RV32-NEXT: addi a3, a3, 128
384 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
385 ; RV32-NEXT: add a3, a0, a1
386 ; RV32-NEXT: vl8re32.v v24, (a3)
387 ; RV32-NEXT: addi a3, sp, 128
388 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
389 ; RV32-NEXT: vl8re32.v v0, (a2)
390 ; RV32-NEXT: vl8re32.v v24, (a0)
391 ; RV32-NEXT: csrr a0, vlenb
392 ; RV32-NEXT: slli a0, a0, 4
393 ; RV32-NEXT: add a0, sp, a0
394 ; RV32-NEXT: addi a0, a0, 128
395 ; RV32-NEXT: vs8r.v v8, (a0)
396 ; RV32-NEXT: csrr a2, vlenb
397 ; RV32-NEXT: slli a2, a2, 5
398 ; RV32-NEXT: add a2, sp, a2
399 ; RV32-NEXT: addi a2, a2, 128
400 ; RV32-NEXT: vs8r.v v24, (a2)
401 ; RV32-NEXT: add a0, a0, a1
402 ; RV32-NEXT: vs8r.v v16, (a0)
403 ; RV32-NEXT: add a1, a2, a1
404 ; RV32-NEXT: csrr a0, vlenb
405 ; RV32-NEXT: slli a0, a0, 5
406 ; RV32-NEXT: add a0, sp, a0
407 ; RV32-NEXT: addi a0, a0, 128
408 ; RV32-NEXT: csrr a2, vlenb
409 ; RV32-NEXT: slli a2, a2, 4
410 ; RV32-NEXT: add a2, sp, a2
411 ; RV32-NEXT: addi a2, a2, 128
412 ; RV32-NEXT: li a5, 42
413 ; RV32-NEXT: addi a3, sp, 128
414 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
415 ; RV32-NEXT: vs8r.v v8, (a1)
416 ; RV32-NEXT: vmv8r.v v8, v0
417 ; RV32-NEXT: csrr a1, vlenb
418 ; RV32-NEXT: slli a1, a1, 3
419 ; RV32-NEXT: add a1, sp, a1
420 ; RV32-NEXT: addi a1, a1, 128
421 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
422 ; RV32-NEXT: call ext3@plt
423 ; RV32-NEXT: addi sp, s0, -144
424 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
425 ; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
426 ; RV32-NEXT: addi sp, sp, 144
429 ; RV64-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32:
431 ; RV64-NEXT: addi sp, sp, -144
432 ; RV64-NEXT: .cfi_def_cfa_offset 144
433 ; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
434 ; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
435 ; RV64-NEXT: .cfi_offset ra, -8
436 ; RV64-NEXT: .cfi_offset s0, -16
437 ; RV64-NEXT: addi s0, sp, 144
438 ; RV64-NEXT: .cfi_def_cfa s0, 0
439 ; RV64-NEXT: csrr a1, vlenb
440 ; RV64-NEXT: li a3, 48
441 ; RV64-NEXT: mul a1, a1, a3
442 ; RV64-NEXT: sub sp, sp, a1
443 ; RV64-NEXT: andi sp, sp, -128
444 ; RV64-NEXT: csrr a1, vlenb
445 ; RV64-NEXT: slli a1, a1, 3
446 ; RV64-NEXT: add a3, a2, a1
447 ; RV64-NEXT: vl8re32.v v24, (a3)
448 ; RV64-NEXT: csrr a3, vlenb
449 ; RV64-NEXT: slli a3, a3, 3
450 ; RV64-NEXT: add a3, sp, a3
451 ; RV64-NEXT: addi a3, a3, 128
452 ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
453 ; RV64-NEXT: add a3, a0, a1
454 ; RV64-NEXT: vl8re32.v v24, (a3)
455 ; RV64-NEXT: addi a3, sp, 128
456 ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
457 ; RV64-NEXT: vl8re32.v v0, (a2)
458 ; RV64-NEXT: vl8re32.v v24, (a0)
459 ; RV64-NEXT: csrr a0, vlenb
460 ; RV64-NEXT: slli a0, a0, 4
461 ; RV64-NEXT: add a0, sp, a0
462 ; RV64-NEXT: addi a0, a0, 128
463 ; RV64-NEXT: vs8r.v v8, (a0)
464 ; RV64-NEXT: csrr a2, vlenb
465 ; RV64-NEXT: slli a2, a2, 5
466 ; RV64-NEXT: add a2, sp, a2
467 ; RV64-NEXT: addi a2, a2, 128
468 ; RV64-NEXT: vs8r.v v24, (a2)
469 ; RV64-NEXT: add a0, a0, a1
470 ; RV64-NEXT: vs8r.v v16, (a0)
471 ; RV64-NEXT: add a1, a2, a1
472 ; RV64-NEXT: csrr a0, vlenb
473 ; RV64-NEXT: slli a0, a0, 5
474 ; RV64-NEXT: add a0, sp, a0
475 ; RV64-NEXT: addi a0, a0, 128
476 ; RV64-NEXT: csrr a2, vlenb
477 ; RV64-NEXT: slli a2, a2, 4
478 ; RV64-NEXT: add a2, sp, a2
479 ; RV64-NEXT: addi a2, a2, 128
480 ; RV64-NEXT: li a5, 42
481 ; RV64-NEXT: addi a3, sp, 128
482 ; RV64-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
483 ; RV64-NEXT: vs8r.v v8, (a1)
484 ; RV64-NEXT: vmv8r.v v8, v0
485 ; RV64-NEXT: csrr a1, vlenb
486 ; RV64-NEXT: slli a1, a1, 3
487 ; RV64-NEXT: add a1, sp, a1
488 ; RV64-NEXT: addi a1, a1, 128
489 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
490 ; RV64-NEXT: call ext3@plt
491 ; RV64-NEXT: addi sp, s0, -144
492 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
493 ; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
494 ; RV64-NEXT: addi sp, sp, 144
496 %t = call fastcc <vscale x 32 x i32> @ext3(<vscale x 32 x i32> %z, <vscale x 32 x i32> %y, <vscale x 32 x i32> %x, i32 %w, i32 42)
497 ret <vscale x 32 x i32> %t
500 ; A test case where the normal calling convention would pass directly via the
501 ; stack, but with fastcc can pass indirectly with the extra GPR registers
503 define fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <vscale x 32 x i32> %x, <vscale x 32 x i32> %y, <vscale x 32 x i32> %z, i32 %8) {
504 ; CHECK-LABEL: vector_arg_indirect_stack:
506 ; CHECK-NEXT: csrr a0, vlenb
507 ; CHECK-NEXT: slli a0, a0, 3
508 ; CHECK-NEXT: add a0, t4, a0
509 ; CHECK-NEXT: vl8re32.v v24, (t4)
510 ; CHECK-NEXT: vl8re32.v v0, (a0)
511 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
512 ; CHECK-NEXT: vadd.vv v8, v8, v24
513 ; CHECK-NEXT: vadd.vv v16, v16, v0
515 %s = add <vscale x 32 x i32> %x, %z
516 ret <vscale x 32 x i32> %s
519 ; Calling the function above. Ensure we pass the arguments correctly.
520 define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y, <vscale x 32 x i32> %z) {
521 ; RV32-LABEL: pass_vector_arg_indirect_stack:
523 ; RV32-NEXT: addi sp, sp, -144
524 ; RV32-NEXT: .cfi_def_cfa_offset 144
525 ; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
526 ; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
527 ; RV32-NEXT: .cfi_offset ra, -4
528 ; RV32-NEXT: .cfi_offset s0, -8
529 ; RV32-NEXT: addi s0, sp, 144
530 ; RV32-NEXT: .cfi_def_cfa s0, 0
531 ; RV32-NEXT: csrr a0, vlenb
532 ; RV32-NEXT: slli a0, a0, 5
533 ; RV32-NEXT: sub sp, sp, a0
534 ; RV32-NEXT: andi sp, sp, -128
535 ; RV32-NEXT: csrr a0, vlenb
536 ; RV32-NEXT: slli a0, a0, 3
537 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
538 ; RV32-NEXT: vmv.v.i v8, 0
539 ; RV32-NEXT: addi a1, sp, 128
540 ; RV32-NEXT: vs8r.v v8, (a1)
541 ; RV32-NEXT: csrr a2, vlenb
542 ; RV32-NEXT: slli a2, a2, 4
543 ; RV32-NEXT: add a2, sp, a2
544 ; RV32-NEXT: addi a2, a2, 128
545 ; RV32-NEXT: vs8r.v v8, (a2)
546 ; RV32-NEXT: add a1, a1, a0
547 ; RV32-NEXT: vs8r.v v8, (a1)
548 ; RV32-NEXT: add a0, a2, a0
549 ; RV32-NEXT: li a1, 1
550 ; RV32-NEXT: li a2, 2
551 ; RV32-NEXT: li a3, 3
552 ; RV32-NEXT: li a4, 4
553 ; RV32-NEXT: li a5, 5
554 ; RV32-NEXT: li a6, 6
555 ; RV32-NEXT: li a7, 7
556 ; RV32-NEXT: csrr t2, vlenb
557 ; RV32-NEXT: slli t2, t2, 4
558 ; RV32-NEXT: add t2, sp, t2
559 ; RV32-NEXT: addi t2, t2, 128
560 ; RV32-NEXT: addi t4, sp, 128
561 ; RV32-NEXT: li t6, 8
562 ; RV32-NEXT: vs8r.v v8, (a0)
563 ; RV32-NEXT: li a0, 0
564 ; RV32-NEXT: vmv.v.i v16, 0
565 ; RV32-NEXT: call vector_arg_indirect_stack@plt
566 ; RV32-NEXT: addi sp, s0, -144
567 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
568 ; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
569 ; RV32-NEXT: addi sp, sp, 144
572 ; RV64-LABEL: pass_vector_arg_indirect_stack:
574 ; RV64-NEXT: addi sp, sp, -144
575 ; RV64-NEXT: .cfi_def_cfa_offset 144
576 ; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
577 ; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
578 ; RV64-NEXT: .cfi_offset ra, -8
579 ; RV64-NEXT: .cfi_offset s0, -16
580 ; RV64-NEXT: addi s0, sp, 144
581 ; RV64-NEXT: .cfi_def_cfa s0, 0
582 ; RV64-NEXT: csrr a0, vlenb
583 ; RV64-NEXT: slli a0, a0, 5
584 ; RV64-NEXT: sub sp, sp, a0
585 ; RV64-NEXT: andi sp, sp, -128
586 ; RV64-NEXT: csrr a0, vlenb
587 ; RV64-NEXT: slli a0, a0, 3
588 ; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
589 ; RV64-NEXT: vmv.v.i v8, 0
590 ; RV64-NEXT: addi a1, sp, 128
591 ; RV64-NEXT: vs8r.v v8, (a1)
592 ; RV64-NEXT: csrr a2, vlenb
593 ; RV64-NEXT: slli a2, a2, 4
594 ; RV64-NEXT: add a2, sp, a2
595 ; RV64-NEXT: addi a2, a2, 128
596 ; RV64-NEXT: vs8r.v v8, (a2)
597 ; RV64-NEXT: add a1, a1, a0
598 ; RV64-NEXT: vs8r.v v8, (a1)
599 ; RV64-NEXT: add a0, a2, a0
600 ; RV64-NEXT: li a1, 1
601 ; RV64-NEXT: li a2, 2
602 ; RV64-NEXT: li a3, 3
603 ; RV64-NEXT: li a4, 4
604 ; RV64-NEXT: li a5, 5
605 ; RV64-NEXT: li a6, 6
606 ; RV64-NEXT: li a7, 7
607 ; RV64-NEXT: csrr t2, vlenb
608 ; RV64-NEXT: slli t2, t2, 4
609 ; RV64-NEXT: add t2, sp, t2
610 ; RV64-NEXT: addi t2, t2, 128
611 ; RV64-NEXT: addi t4, sp, 128
612 ; RV64-NEXT: li t6, 8
613 ; RV64-NEXT: vs8r.v v8, (a0)
614 ; RV64-NEXT: li a0, 0
615 ; RV64-NEXT: vmv.v.i v16, 0
616 ; RV64-NEXT: call vector_arg_indirect_stack@plt
617 ; RV64-NEXT: addi sp, s0, -144
618 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
619 ; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
620 ; RV64-NEXT: addi sp, sp, 144
622 %s = call fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 8)
623 ret <vscale x 32 x i32> %s