1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
4 define void @rvv_vla(i64 %n, i64 %i) nounwind {
5 ; CHECK-LABEL: rvv_vla:
7 ; CHECK-NEXT: addi sp, sp, -32
8 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
9 ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
10 ; CHECK-NEXT: addi s0, sp, 32
11 ; CHECK-NEXT: csrr a2, vlenb
12 ; CHECK-NEXT: slli a2, a2, 2
13 ; CHECK-NEXT: sub sp, sp, a2
14 ; CHECK-NEXT: slli a0, a0, 2
15 ; CHECK-NEXT: addi a0, a0, 15
16 ; CHECK-NEXT: andi a0, a0, -16
17 ; CHECK-NEXT: sub a0, sp, a0
18 ; CHECK-NEXT: mv sp, a0
19 ; CHECK-NEXT: csrr a2, vlenb
20 ; CHECK-NEXT: sub a2, s0, a2
21 ; CHECK-NEXT: addi a2, a2, -32
22 ; CHECK-NEXT: vl1re64.v v8, (a2)
23 ; CHECK-NEXT: csrr a2, vlenb
24 ; CHECK-NEXT: slli a2, a2, 2
25 ; CHECK-NEXT: sub a2, s0, a2
26 ; CHECK-NEXT: addi a2, a2, -32
27 ; CHECK-NEXT: vl2re64.v v8, (a2)
28 ; CHECK-NEXT: slli a1, a1, 2
29 ; CHECK-NEXT: add a0, a0, a1
30 ; CHECK-NEXT: lw zero, 0(a0)
31 ; CHECK-NEXT: addi sp, s0, -32
32 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
33 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
34 ; CHECK-NEXT: addi sp, sp, 32
36 %vla.addr = alloca i32, i64 %n
38 %v1.addr = alloca <vscale x 1 x i64>
39 %v1 = load volatile <vscale x 1 x i64>, <vscale x 1 x i64>* %v1.addr
41 %v2.addr = alloca <vscale x 2 x i64>
42 %v2 = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %v2.addr
44 %p = getelementptr i32, i32* %vla.addr, i64 %i
45 %s = load volatile i32, i32* %p
49 define void @rvv_overaligned() nounwind {
50 ; CHECK-LABEL: rvv_overaligned:
52 ; CHECK-NEXT: addi sp, sp, -128
53 ; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
54 ; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
55 ; CHECK-NEXT: addi s0, sp, 128
56 ; CHECK-NEXT: csrr a0, vlenb
57 ; CHECK-NEXT: slli a0, a0, 2
58 ; CHECK-NEXT: sub sp, sp, a0
59 ; CHECK-NEXT: andi sp, sp, -64
60 ; CHECK-NEXT: csrr a0, vlenb
61 ; CHECK-NEXT: slli a1, a0, 1
62 ; CHECK-NEXT: add a0, a1, a0
63 ; CHECK-NEXT: add a0, sp, a0
64 ; CHECK-NEXT: addi a0, a0, 112
65 ; CHECK-NEXT: vl1re64.v v8, (a0)
66 ; CHECK-NEXT: addi a0, sp, 112
67 ; CHECK-NEXT: vl2re64.v v8, (a0)
68 ; CHECK-NEXT: lw zero, 64(sp)
69 ; CHECK-NEXT: addi sp, s0, -128
70 ; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
71 ; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
72 ; CHECK-NEXT: addi sp, sp, 128
74 %overaligned = alloca i32, align 64
76 %v1.addr = alloca <vscale x 1 x i64>
77 %v1 = load volatile <vscale x 1 x i64>, <vscale x 1 x i64>* %v1.addr
79 %v2.addr = alloca <vscale x 2 x i64>
80 %v2 = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %v2.addr
82 %s = load volatile i32, i32* %overaligned, align 64
86 define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind {
87 ; CHECK-LABEL: rvv_vla_and_overaligned:
89 ; CHECK-NEXT: addi sp, sp, -144
90 ; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
91 ; CHECK-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
92 ; CHECK-NEXT: sd s1, 120(sp) # 8-byte Folded Spill
93 ; CHECK-NEXT: addi s0, sp, 144
94 ; CHECK-NEXT: csrr a2, vlenb
95 ; CHECK-NEXT: slli a2, a2, 2
96 ; CHECK-NEXT: sub sp, sp, a2
97 ; CHECK-NEXT: andi sp, sp, -64
98 ; CHECK-NEXT: mv s1, sp
99 ; CHECK-NEXT: slli a0, a0, 2
100 ; CHECK-NEXT: addi a0, a0, 15
101 ; CHECK-NEXT: andi a0, a0, -16
102 ; CHECK-NEXT: sub a0, sp, a0
103 ; CHECK-NEXT: mv sp, a0
104 ; CHECK-NEXT: csrr a2, vlenb
105 ; CHECK-NEXT: slli a3, a2, 1
106 ; CHECK-NEXT: add a2, a3, a2
107 ; CHECK-NEXT: add a2, s1, a2
108 ; CHECK-NEXT: addi a2, a2, 112
109 ; CHECK-NEXT: vl1re64.v v8, (a2)
110 ; CHECK-NEXT: addi a2, s1, 112
111 ; CHECK-NEXT: vl2re64.v v8, (a2)
112 ; CHECK-NEXT: lw zero, 64(s1)
113 ; CHECK-NEXT: slli a1, a1, 2
114 ; CHECK-NEXT: add a0, a0, a1
115 ; CHECK-NEXT: lw zero, 0(a0)
116 ; CHECK-NEXT: addi sp, s0, -144
117 ; CHECK-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
118 ; CHECK-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
119 ; CHECK-NEXT: ld s1, 120(sp) # 8-byte Folded Reload
120 ; CHECK-NEXT: addi sp, sp, 144
122 %overaligned = alloca i32, align 64
123 %vla.addr = alloca i32, i64 %n
125 %v1.addr = alloca <vscale x 1 x i64>
126 %v1 = load volatile <vscale x 1 x i64>, <vscale x 1 x i64>* %v1.addr
128 %v2.addr = alloca <vscale x 2 x i64>
129 %v2 = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %v2.addr
131 %s1 = load volatile i32, i32* %overaligned, align 64
132 %p = getelementptr i32, i32* %vla.addr, i64 %i
133 %s2 = load volatile i32, i32* %p