1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
3 ; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s
5 ; The following tests check whether inserting VSETVLI avoids inserting
6 ; unneeded vsetvlis across basic blocks.
8 declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
10 declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
11 declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
13 declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
15 declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
17 declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double>, double, i64)
18 declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float>, float, i64)
20 declare void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double>, ptr nocapture, i64)
21 declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, ptr nocapture, i64)
23 define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
25 ; CHECK: # %bb.0: # %entry
26 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
27 ; CHECK-NEXT: beqz a1, .LBB0_2
28 ; CHECK-NEXT: # %bb.1: # %if.then
29 ; CHECK-NEXT: vfadd.vv v8, v8, v9
31 ; CHECK-NEXT: .LBB0_2: # %if.else
32 ; CHECK-NEXT: vfsub.vv v8, v8, v9
35 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
36 %tobool = icmp eq i8 %cond, 0
37 br i1 %tobool, label %if.else, label %if.then
39 if.then: ; preds = %entry
40 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
43 if.else: ; preds = %entry
44 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
47 if.end: ; preds = %if.else, %if.then
48 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
49 ret <vscale x 1 x double> %c.0
52 @scratch = global i8 0, align 16
54 define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
56 ; CHECK: # %bb.0: # %entry
57 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
58 ; CHECK-NEXT: beqz a1, .LBB1_2
59 ; CHECK-NEXT: # %bb.1: # %if.then
60 ; CHECK-NEXT: vfadd.vv v9, v8, v9
61 ; CHECK-NEXT: vfmul.vv v8, v9, v8
63 ; CHECK-NEXT: .LBB1_2: # %if.else
64 ; CHECK-NEXT: vfsub.vv v9, v8, v9
65 ; CHECK-NEXT: vfmul.vv v8, v9, v8
68 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
69 %tobool = icmp eq i8 %cond, 0
70 br i1 %tobool, label %if.else, label %if.then
72 if.then: ; preds = %entry
73 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
76 if.else: ; preds = %entry
77 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
80 if.end: ; preds = %if.else, %if.then
81 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
82 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
83 ret <vscale x 1 x double> %3
86 define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
88 ; CHECK: # %bb.0: # %entry
89 ; CHECK-NEXT: beqz a1, .LBB2_2
90 ; CHECK-NEXT: # %bb.1: # %if.then
91 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
92 ; CHECK-NEXT: vfadd.vv v9, v8, v9
93 ; CHECK-NEXT: vfmul.vv v8, v9, v8
95 ; CHECK-NEXT: .LBB2_2: # %if.else
96 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
97 ; CHECK-NEXT: vfsub.vv v9, v8, v9
98 ; CHECK-NEXT: vfmul.vv v8, v9, v8
101 %tobool = icmp eq i8 %cond, 0
102 br i1 %tobool, label %if.else, label %if.then
104 if.then: ; preds = %entry
105 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
106 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
109 if.else: ; preds = %entry
110 %2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
111 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %2)
114 if.end: ; preds = %if.else, %if.then
115 %vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ]
116 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %3, %if.else ]
117 %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %vl.0)
118 ret <vscale x 1 x double> %4
121 define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %l, <vscale x 1 x double> %r) nounwind {
122 ; CHECK-LABEL: test4:
123 ; CHECK: # %bb.0: # %entry
124 ; CHECK-NEXT: beqz a1, .LBB3_2
125 ; CHECK-NEXT: # %bb.1: # %if.then
126 ; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
127 ; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a1)
128 ; CHECK-NEXT: lui a1, %hi(.LCPI3_1)
129 ; CHECK-NEXT: fld fa4, %lo(.LCPI3_1)(a1)
130 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
131 ; CHECK-NEXT: vfmv.v.f v10, fa5
132 ; CHECK-NEXT: vfmv.v.f v11, fa4
133 ; CHECK-NEXT: vfadd.vv v10, v10, v11
134 ; CHECK-NEXT: lui a1, %hi(scratch)
135 ; CHECK-NEXT: addi a1, a1, %lo(scratch)
136 ; CHECK-NEXT: vse64.v v10, (a1)
137 ; CHECK-NEXT: j .LBB3_3
138 ; CHECK-NEXT: .LBB3_2: # %if.else
139 ; CHECK-NEXT: lui a1, 260096
140 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
141 ; CHECK-NEXT: vmv.v.x v10, a1
142 ; CHECK-NEXT: lui a1, 262144
143 ; CHECK-NEXT: vmv.v.x v11, a1
144 ; CHECK-NEXT: vfadd.vv v10, v10, v11
145 ; CHECK-NEXT: lui a1, %hi(scratch)
146 ; CHECK-NEXT: addi a1, a1, %lo(scratch)
147 ; CHECK-NEXT: vse32.v v10, (a1)
148 ; CHECK-NEXT: .LBB3_3: # %if.end
149 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
150 ; CHECK-NEXT: vfmul.vv v8, v8, v9
153 %tobool = icmp eq i8 %cond, 0
154 br i1 %tobool, label %if.else, label %if.then
156 if.then: ; preds = %entry
157 %0 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %avl)
158 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %avl)
159 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 7, i64 %avl)
160 %3 = bitcast ptr @scratch to ptr
161 tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %2, ptr %3, i64 %avl)
164 if.else: ; preds = %entry
165 %4 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %avl)
166 %5 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 2.000000e+00, i64 %avl)
167 %6 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %4, <vscale x 2 x float> %5, i64 7, i64 %avl)
168 %7 = bitcast ptr @scratch to ptr
169 tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %6, ptr %7, i64 %avl)
172 if.end: ; preds = %if.else, %if.then
173 %8 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %l, <vscale x 1 x double> %r, i64 7, i64 %avl)
174 ret <vscale x 1 x double> %8
177 define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
178 ; CHECK-LABEL: test5:
179 ; CHECK: # %bb.0: # %entry
180 ; CHECK-NEXT: andi a2, a1, 1
181 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
182 ; CHECK-NEXT: bnez a2, .LBB4_3
183 ; CHECK-NEXT: # %bb.1: # %if.else
184 ; CHECK-NEXT: vfsub.vv v9, v8, v9
185 ; CHECK-NEXT: andi a1, a1, 2
186 ; CHECK-NEXT: beqz a1, .LBB4_4
187 ; CHECK-NEXT: .LBB4_2: # %if.then4
188 ; CHECK-NEXT: vfmul.vv v8, v9, v8
190 ; CHECK-NEXT: .LBB4_3: # %if.then
191 ; CHECK-NEXT: vfadd.vv v9, v8, v9
192 ; CHECK-NEXT: andi a1, a1, 2
193 ; CHECK-NEXT: bnez a1, .LBB4_2
194 ; CHECK-NEXT: .LBB4_4: # %if.else5
195 ; CHECK-NEXT: vfmul.vv v8, v8, v9
198 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
199 %conv = zext i8 %cond to i32
200 %and = and i32 %conv, 1
201 %tobool = icmp eq i32 %and, 0
202 br i1 %tobool, label %if.else, label %if.then
204 if.then: ; preds = %entry
205 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
208 if.else: ; preds = %entry
209 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
212 if.end: ; preds = %if.else, %if.then
213 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
214 %and2 = and i32 %conv, 2
215 %tobool3 = icmp eq i32 %and2, 0
216 br i1 %tobool3, label %if.else5, label %if.then4
218 if.then4: ; preds = %if.end
219 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
222 if.else5: ; preds = %if.end
223 %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %c.0, i64 7, i64 %0)
226 if.end6: ; preds = %if.else5, %if.then4
227 %c.1 = phi <vscale x 1 x double> [ %3, %if.then4 ], [ %4, %if.else5 ]
228 ret <vscale x 1 x double> %c.1
231 ; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant
232 ; with the one in the entry, but we lack the ability to remove explicit
233 ; vsetvli instructions.
234 define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
235 ; CHECK-LABEL: test6:
236 ; CHECK: # %bb.0: # %entry
237 ; CHECK-NEXT: andi a2, a1, 1
238 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
239 ; CHECK-NEXT: bnez a2, .LBB5_3
240 ; CHECK-NEXT: # %bb.1: # %if.else
241 ; CHECK-NEXT: vfsub.vv v8, v8, v9
242 ; CHECK-NEXT: andi a1, a1, 2
243 ; CHECK-NEXT: beqz a1, .LBB5_4
244 ; CHECK-NEXT: .LBB5_2: # %if.then4
245 ; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
246 ; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a1)
247 ; CHECK-NEXT: lui a1, %hi(.LCPI5_1)
248 ; CHECK-NEXT: fld fa4, %lo(.LCPI5_1)(a1)
249 ; CHECK-NEXT: vfmv.v.f v9, fa5
250 ; CHECK-NEXT: vfmv.v.f v10, fa4
251 ; CHECK-NEXT: vfadd.vv v9, v9, v10
252 ; CHECK-NEXT: lui a1, %hi(scratch)
253 ; CHECK-NEXT: addi a1, a1, %lo(scratch)
254 ; CHECK-NEXT: vse64.v v9, (a1)
255 ; CHECK-NEXT: j .LBB5_5
256 ; CHECK-NEXT: .LBB5_3: # %if.then
257 ; CHECK-NEXT: vfadd.vv v8, v8, v9
258 ; CHECK-NEXT: andi a1, a1, 2
259 ; CHECK-NEXT: bnez a1, .LBB5_2
260 ; CHECK-NEXT: .LBB5_4: # %if.else5
261 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
262 ; CHECK-NEXT: lui a1, 260096
263 ; CHECK-NEXT: vmv.v.x v9, a1
264 ; CHECK-NEXT: lui a1, 262144
265 ; CHECK-NEXT: vmv.v.x v10, a1
266 ; CHECK-NEXT: vfadd.vv v9, v9, v10
267 ; CHECK-NEXT: lui a1, %hi(scratch)
268 ; CHECK-NEXT: addi a1, a1, %lo(scratch)
269 ; CHECK-NEXT: vse32.v v9, (a1)
270 ; CHECK-NEXT: .LBB5_5: # %if.end10
271 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
272 ; CHECK-NEXT: vfmul.vv v8, v8, v8
275 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
276 %conv = zext i8 %cond to i32
277 %and = and i32 %conv, 1
278 %tobool = icmp eq i32 %and, 0
279 br i1 %tobool, label %if.else, label %if.then
281 if.then: ; preds = %entry
282 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
285 if.else: ; preds = %entry
286 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
289 if.end: ; preds = %if.else, %if.then
290 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
291 %and2 = and i32 %conv, 2
292 %tobool3 = icmp eq i32 %and2, 0
293 br i1 %tobool3, label %if.else5, label %if.then4
295 if.then4: ; preds = %if.end
296 %3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
297 %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %3)
298 %5 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %3)
299 %6 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %4, <vscale x 1 x double> %5, i64 7, i64 %3)
300 %7 = bitcast ptr @scratch to ptr
301 tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %6, ptr %7, i64 %3)
304 if.else5: ; preds = %if.end
305 %8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0)
306 %9 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %8)
307 %10 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float> undef, float 2.000000e+00, i64 %8)
308 %11 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %9, <vscale x 2 x float> %10, i64 7, i64 %8)
309 %12 = bitcast ptr @scratch to ptr
310 tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %11, ptr %12, i64 %8)
313 if.end10: ; preds = %if.else5, %if.then4
314 %13 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %c.0, i64 7, i64 %0)
315 ret <vscale x 1 x double> %13
320 ; Similar to test1, but contains a call to @foo to act as barrier to analyzing
322 define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
323 ; CHECK-LABEL: test8:
324 ; CHECK: # %bb.0: # %entry
325 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
326 ; CHECK-NEXT: beqz a1, .LBB6_2
327 ; CHECK-NEXT: # %bb.1: # %if.then
328 ; CHECK-NEXT: vfadd.vv v8, v8, v9
330 ; CHECK-NEXT: .LBB6_2: # %if.else
331 ; CHECK-NEXT: addi sp, sp, -32
332 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
333 ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
334 ; CHECK-NEXT: csrr a1, vlenb
335 ; CHECK-NEXT: slli a1, a1, 1
336 ; CHECK-NEXT: sub sp, sp, a1
337 ; CHECK-NEXT: mv s0, a0
338 ; CHECK-NEXT: csrr a0, vlenb
339 ; CHECK-NEXT: add a0, a0, sp
340 ; CHECK-NEXT: addi a0, a0, 16
341 ; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
342 ; CHECK-NEXT: addi a0, sp, 16
343 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
344 ; CHECK-NEXT: call foo
345 ; CHECK-NEXT: csrr a0, vlenb
346 ; CHECK-NEXT: add a0, a0, sp
347 ; CHECK-NEXT: addi a0, a0, 16
348 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
349 ; CHECK-NEXT: addi a0, sp, 16
350 ; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
351 ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma
352 ; CHECK-NEXT: vfsub.vv v8, v9, v8
353 ; CHECK-NEXT: csrr a0, vlenb
354 ; CHECK-NEXT: slli a0, a0, 1
355 ; CHECK-NEXT: add sp, sp, a0
356 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
357 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
358 ; CHECK-NEXT: addi sp, sp, 32
361 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
362 %tobool = icmp eq i8 %cond, 0
363 br i1 %tobool, label %if.else, label %if.then
365 if.then: ; preds = %entry
366 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
369 if.else: ; preds = %entry
371 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
374 if.end: ; preds = %if.else, %if.then
375 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
376 ret <vscale x 1 x double> %c.0
379 ; Similar to test2, but contains a call to @foo to act as barrier to analyzing
381 define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
382 ; CHECK-LABEL: test9:
383 ; CHECK: # %bb.0: # %entry
384 ; CHECK-NEXT: addi sp, sp, -32
385 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
386 ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
387 ; CHECK-NEXT: csrr a2, vlenb
388 ; CHECK-NEXT: slli a2, a2, 1
389 ; CHECK-NEXT: sub sp, sp, a2
390 ; CHECK-NEXT: mv s0, a0
391 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
392 ; CHECK-NEXT: beqz a1, .LBB7_2
393 ; CHECK-NEXT: # %bb.1: # %if.then
394 ; CHECK-NEXT: vfadd.vv v9, v8, v9
395 ; CHECK-NEXT: addi a0, sp, 16
396 ; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
397 ; CHECK-NEXT: csrr a0, vlenb
398 ; CHECK-NEXT: add a0, a0, sp
399 ; CHECK-NEXT: addi a0, a0, 16
400 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
401 ; CHECK-NEXT: call foo
402 ; CHECK-NEXT: addi a0, sp, 16
403 ; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
404 ; CHECK-NEXT: csrr a0, vlenb
405 ; CHECK-NEXT: add a0, a0, sp
406 ; CHECK-NEXT: addi a0, a0, 16
407 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
408 ; CHECK-NEXT: j .LBB7_3
409 ; CHECK-NEXT: .LBB7_2: # %if.else
410 ; CHECK-NEXT: vfsub.vv v9, v8, v9
411 ; CHECK-NEXT: .LBB7_3: # %if.end
412 ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma
413 ; CHECK-NEXT: vfmul.vv v8, v9, v8
414 ; CHECK-NEXT: csrr a0, vlenb
415 ; CHECK-NEXT: slli a0, a0, 1
416 ; CHECK-NEXT: add sp, sp, a0
417 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
418 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
419 ; CHECK-NEXT: addi sp, sp, 32
422 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
423 %tobool = icmp eq i8 %cond, 0
424 br i1 %tobool, label %if.else, label %if.then
426 if.then: ; preds = %entry
427 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
431 if.else: ; preds = %entry
432 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
435 if.end: ; preds = %if.else, %if.then
436 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
437 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
438 ret <vscale x 1 x double> %3
441 define void @saxpy_vec(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
442 ; CHECK-LABEL: saxpy_vec:
443 ; CHECK: # %bb.0: # %entry
444 ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
445 ; CHECK-NEXT: beqz a3, .LBB8_2
446 ; CHECK-NEXT: .LBB8_1: # %for.body
447 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
448 ; CHECK-NEXT: vle32.v v8, (a1)
449 ; CHECK-NEXT: vle32.v v16, (a2)
450 ; CHECK-NEXT: slli a4, a3, 2
451 ; CHECK-NEXT: sub a0, a0, a3
452 ; CHECK-NEXT: add a1, a1, a4
453 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma
454 ; CHECK-NEXT: vfmacc.vf v16, fa0, v8
455 ; CHECK-NEXT: vse32.v v16, (a2)
456 ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
457 ; CHECK-NEXT: add a2, a2, a4
458 ; CHECK-NEXT: bnez a3, .LBB8_1
459 ; CHECK-NEXT: .LBB8_2: # %for.end
462 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
463 %cmp.not13 = icmp eq i64 %0, 0
464 br i1 %cmp.not13, label %for.end, label %for.body
466 for.body: ; preds = %for.body, %entry
467 %1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
468 %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
469 %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
470 %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
471 %2 = bitcast ptr %x.addr.015 to ptr
472 %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
473 %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
474 %4 = bitcast ptr %y.addr.014 to ptr
475 %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
476 %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
477 tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
478 %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
479 %sub = sub i64 %n.addr.016, %1
480 %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 2, i64 3)
481 %cmp.not = icmp eq i64 %7, 0
482 br i1 %cmp.not, label %for.end, label %for.body
484 for.end: ; preds = %for.body, %entry
488 define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
489 ; CHECK-LABEL: saxpy_vec_demanded_fields:
490 ; CHECK: # %bb.0: # %entry
491 ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
492 ; CHECK-NEXT: beqz a3, .LBB9_2
493 ; CHECK-NEXT: .LBB9_1: # %for.body
494 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
495 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
496 ; CHECK-NEXT: vle32.v v8, (a1)
497 ; CHECK-NEXT: vle32.v v16, (a2)
498 ; CHECK-NEXT: slli a4, a3, 2
499 ; CHECK-NEXT: sub a0, a0, a3
500 ; CHECK-NEXT: add a1, a1, a4
501 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma
502 ; CHECK-NEXT: vfmacc.vf v16, fa0, v8
503 ; CHECK-NEXT: vse32.v v16, (a2)
504 ; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma
505 ; CHECK-NEXT: add a2, a2, a4
506 ; CHECK-NEXT: bnez a3, .LBB9_1
507 ; CHECK-NEXT: .LBB9_2: # %for.end
510 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
511 %cmp.not13 = icmp eq i64 %0, 0
512 br i1 %cmp.not13, label %for.end, label %for.body
514 for.body: ; preds = %for.body, %entry
515 %1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
516 %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
517 %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
518 %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
519 %2 = bitcast ptr %x.addr.015 to ptr
520 %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
521 %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
522 %4 = bitcast ptr %y.addr.014 to ptr
523 %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
524 %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
525 tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
526 %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
527 %sub = sub i64 %n.addr.016, %1
528 %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 1, i64 2)
529 %cmp.not = icmp eq i64 %7, 0
530 br i1 %cmp.not, label %for.end, label %for.body
532 for.end: ; preds = %for.body, %entry
536 declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
537 declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
538 declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64, i64, i64)
539 declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
541 ; We need a vsetvli in the last block because the predecessors have different
542 ; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
543 ; we don't need to read AVL and can keep VL unchanged.
544 define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind {
545 ; CHECK-LABEL: test_vsetvli_x0_x0:
546 ; CHECK: # %bb.0: # %entry
547 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
548 ; CHECK-NEXT: vle32.v v9, (a0)
549 ; CHECK-NEXT: andi a3, a3, 1
550 ; CHECK-NEXT: beqz a3, .LBB10_2
551 ; CHECK-NEXT: # %bb.1: # %if
552 ; CHECK-NEXT: vle16.v v10, (a1)
553 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
554 ; CHECK-NEXT: vwcvt.x.x.v v8, v10
555 ; CHECK-NEXT: .LBB10_2: # %if.end
556 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
557 ; CHECK-NEXT: vadd.vv v8, v9, v8
560 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl)
561 br i1 %cond, label %if, label %if.end
564 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl)
565 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 %vl)
569 %d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
570 %e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
571 ret <vscale x 2 x i32> %e
573 declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>, ptr, i64)
574 declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>, ptr, i64)
575 declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i16>, i16, i64)
576 declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64)
578 ; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
579 ; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
580 ; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
581 ; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
582 ; a predecessor we know the vtype for.
583 define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind {
584 ; CHECK-LABEL: test_vsetvli_x0_x0_2:
585 ; CHECK: # %bb.0: # %entry
586 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
587 ; CHECK-NEXT: vle32.v v9, (a0)
588 ; CHECK-NEXT: andi a4, a4, 1
589 ; CHECK-NEXT: beqz a4, .LBB11_2
590 ; CHECK-NEXT: # %bb.1: # %if
591 ; CHECK-NEXT: vle16.v v10, (a1)
592 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
593 ; CHECK-NEXT: vwadd.wv v9, v9, v10
594 ; CHECK-NEXT: .LBB11_2: # %if.end
595 ; CHECK-NEXT: andi a5, a5, 1
596 ; CHECK-NEXT: beqz a5, .LBB11_4
597 ; CHECK-NEXT: # %bb.3: # %if2
598 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
599 ; CHECK-NEXT: vle16.v v10, (a2)
600 ; CHECK-NEXT: vwadd.wv v9, v9, v10
601 ; CHECK-NEXT: .LBB11_4: # %if2.end
602 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
603 ; CHECK-NEXT: vadd.vv v8, v9, v8
606 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl)
607 br i1 %cond, label %if, label %if.end
610 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl)
611 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
615 %d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
616 br i1 %cond2, label %if2, label %if2.end
619 %e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %z, i64 %vl)
620 %f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
624 %g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
625 %h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl)
626 ret <vscale x 2 x i32> %h
628 declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64)
630 ; We should only need 1 vsetvli for this code.
631 define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
632 ; CHECK-LABEL: vlmax:
633 ; CHECK: # %bb.0: # %entry
634 ; CHECK-NEXT: blez a0, .LBB12_3
635 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
636 ; CHECK-NEXT: li a4, 0
637 ; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma
638 ; CHECK-NEXT: slli a5, a6, 3
639 ; CHECK-NEXT: .LBB12_2: # %for.body
640 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
641 ; CHECK-NEXT: vle64.v v8, (a2)
642 ; CHECK-NEXT: vle64.v v9, (a3)
643 ; CHECK-NEXT: add a4, a4, a6
644 ; CHECK-NEXT: add a3, a3, a5
645 ; CHECK-NEXT: vfadd.vv v8, v8, v9
646 ; CHECK-NEXT: vse64.v v8, (a1)
647 ; CHECK-NEXT: add a1, a1, a5
648 ; CHECK-NEXT: add a2, a2, a5
649 ; CHECK-NEXT: blt a4, a0, .LBB12_2
650 ; CHECK-NEXT: .LBB12_3: # %for.end
653 %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
654 %cmp13 = icmp sgt i64 %N, 0
655 br i1 %cmp13, label %for.body, label %for.end
657 for.body: ; preds = %entry, %for.body
658 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
659 %arrayidx = getelementptr inbounds double, ptr %a, i64 %i.014
660 %1 = bitcast ptr %arrayidx to ptr
661 %2 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %1, i64 %0)
662 %arrayidx1 = getelementptr inbounds double, ptr %b, i64 %i.014
663 %3 = bitcast ptr %arrayidx1 to ptr
664 %4 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %3, i64 %0)
665 %5 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double> %2, <vscale x 1 x double> %4, i64 7, i64 %0)
666 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
667 %6 = bitcast ptr %arrayidx2 to ptr
668 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> %5, ptr %6, i64 %0)
669 %add = add nuw nsw i64 %i.014, %0
670 %cmp = icmp slt i64 %add, %N
671 br i1 %cmp, label %for.body, label %for.end
673 for.end: ; preds = %for.body, %entry
677 ; A single vector store in the loop with VL controlled by VLMAX
678 define void @vector_init_vlmax(i64 %N, ptr %c) {
679 ; CHECK-LABEL: vector_init_vlmax:
680 ; CHECK: # %bb.0: # %entry
681 ; CHECK-NEXT: blez a0, .LBB13_3
682 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
683 ; CHECK-NEXT: li a2, 0
684 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
685 ; CHECK-NEXT: slli a4, a3, 3
686 ; CHECK-NEXT: vmv.v.i v8, 0
687 ; CHECK-NEXT: .LBB13_2: # %for.body
688 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
689 ; CHECK-NEXT: vse64.v v8, (a1)
690 ; CHECK-NEXT: add a2, a2, a3
691 ; CHECK-NEXT: add a1, a1, a4
692 ; CHECK-NEXT: blt a2, a0, .LBB13_2
693 ; CHECK-NEXT: .LBB13_3: # %for.end
696 %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
697 %cmp13 = icmp sgt i64 %N, 0
698 br i1 %cmp13, label %for.body, label %for.end
700 for.body: ; preds = %entry, %for.body
701 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
702 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
703 %addr = bitcast ptr %arrayidx2 to ptr
704 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
705 %add = add nuw nsw i64 %i.014, %0
706 %cmp = icmp slt i64 %add, %N
707 br i1 %cmp, label %for.body, label %for.end
709 for.end: ; preds = %for.body, %entry
713 ; Same as above, but VL comes from user provided AVL value
714 define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
715 ; CHECK-LABEL: vector_init_vsetvli_N:
716 ; CHECK: # %bb.0: # %entry
717 ; CHECK-NEXT: blez a0, .LBB14_3
718 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
719 ; CHECK-NEXT: li a2, 0
720 ; CHECK-NEXT: vsetvli a3, a0, e64, m1, ta, ma
721 ; CHECK-NEXT: slli a4, a3, 3
722 ; CHECK-NEXT: vmv.v.i v8, 0
723 ; CHECK-NEXT: .LBB14_2: # %for.body
724 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
725 ; CHECK-NEXT: vse64.v v8, (a1)
726 ; CHECK-NEXT: add a2, a2, a3
727 ; CHECK-NEXT: add a1, a1, a4
728 ; CHECK-NEXT: blt a2, a0, .LBB14_2
729 ; CHECK-NEXT: .LBB14_3: # %for.end
732 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0)
733 %cmp13 = icmp sgt i64 %N, 0
734 br i1 %cmp13, label %for.body, label %for.end
736 for.body: ; preds = %entry, %for.body
737 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
738 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
739 %addr = bitcast ptr %arrayidx2 to ptr
740 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
741 %add = add nuw nsw i64 %i.014, %0
742 %cmp = icmp slt i64 %add, %N
743 br i1 %cmp, label %for.body, label %for.end
745 for.end: ; preds = %for.body, %entry
749 ; Same as above, but VL is a hard coded constant (in the preheader)
750 define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
751 ; CHECK-LABEL: vector_init_vsetvli_fv:
752 ; CHECK: # %bb.0: # %entry
753 ; CHECK-NEXT: li a2, 0
754 ; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, ma
755 ; CHECK-NEXT: slli a4, a3, 3
756 ; CHECK-NEXT: vmv.v.i v8, 0
757 ; CHECK-NEXT: .LBB15_1: # %for.body
758 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
759 ; CHECK-NEXT: vse64.v v8, (a1)
760 ; CHECK-NEXT: add a2, a2, a3
761 ; CHECK-NEXT: add a1, a1, a4
762 ; CHECK-NEXT: blt a2, a0, .LBB15_1
763 ; CHECK-NEXT: # %bb.2: # %for.end
766 %0 = tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0)
769 for.body: ; preds = %entry, %for.body
770 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
771 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
772 %addr = bitcast ptr %arrayidx2 to ptr
773 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
774 %add = add nuw nsw i64 %i.014, %0
775 %cmp = icmp slt i64 %add, %N
776 br i1 %cmp, label %for.body, label %for.end
778 for.end: ; preds = %for.body
782 ; Same as above, but result of vsetvli in preheader isn't used, and
783 ; constant is repeated in loop
784 define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) {
785 ; CHECK-LABEL: vector_init_vsetvli_fv2:
786 ; CHECK: # %bb.0: # %entry
787 ; CHECK-NEXT: li a2, 0
788 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
789 ; CHECK-NEXT: vmv.v.i v8, 0
790 ; CHECK-NEXT: .LBB16_1: # %for.body
791 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
792 ; CHECK-NEXT: vse64.v v8, (a1)
793 ; CHECK-NEXT: addi a2, a2, 4
794 ; CHECK-NEXT: addi a1, a1, 32
795 ; CHECK-NEXT: blt a2, a0, .LBB16_1
796 ; CHECK-NEXT: # %bb.2: # %for.end
799 tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0)
802 for.body: ; preds = %entry, %for.body
803 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
804 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
805 %addr = bitcast ptr %arrayidx2 to ptr
806 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4)
807 %add = add nuw nsw i64 %i.014, 4
808 %cmp = icmp slt i64 %add, %N
809 br i1 %cmp, label %for.body, label %for.end
811 for.end: ; preds = %for.body
815 ; Same as above, but AVL is only specified on the store intrinsic
816 ; This case will require some form of hoisting or PRE
817 define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) {
818 ; CHECK-LABEL: vector_init_vsetvli_fv3:
819 ; CHECK: # %bb.0: # %entry
820 ; CHECK-NEXT: li a2, 0
821 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
822 ; CHECK-NEXT: vmv.v.i v8, 0
823 ; CHECK-NEXT: .LBB17_1: # %for.body
824 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
825 ; CHECK-NEXT: vse64.v v8, (a1)
826 ; CHECK-NEXT: addi a2, a2, 4
827 ; CHECK-NEXT: addi a1, a1, 32
828 ; CHECK-NEXT: blt a2, a0, .LBB17_1
829 ; CHECK-NEXT: # %bb.2: # %for.end
834 for.body: ; preds = %entry, %for.body
835 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
836 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
837 %addr = bitcast ptr %arrayidx2 to ptr
838 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4)
839 %add = add nuw nsw i64 %i.014, 4
840 %cmp = icmp slt i64 %add, %N
841 br i1 %cmp, label %for.body, label %for.end
843 for.end: ; preds = %for.body
847 ; Demonstrates a case where mutation in phase3 is problematic. We mutate the
848 ; vsetvli without considering that it changes the compatibility result of the
849 ; vadd in the second block.
850 define <vscale x 4 x i32> @cross_block_mutate(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
851 ; CHECK-LABEL: cross_block_mutate:
852 ; CHECK: # %bb.0: # %entry
853 ; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, ma
854 ; CHECK-NEXT: vmv.s.x v8, a0
855 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
856 ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
858 <vscale x 4 x i1> %mask) {
860 %vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 1, i64 0)
861 %vl.trunc = trunc i64 %vl to i32
862 %a.mod = insertelement <vscale x 4 x i32> %a, i32 %vl.trunc, i32 0
863 br label %fallthrough
866 %res = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(
867 <vscale x 4 x i32> undef, <vscale x 4 x i32> %a.mod,
868 <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, i64 %vl, i64 0)
869 ret <vscale x 4 x i32> %res
872 define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i1 %cond) nounwind {
873 ; CHECK-LABEL: pre_lmul:
874 ; CHECK: # %bb.0: # %entry
875 ; CHECK-NEXT: andi a0, a0, 1
876 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
877 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
878 ; CHECK-NEXT: vadd.vv v8, v8, v9
879 ; CHECK-NEXT: vadd.vv v8, v8, v9
882 %vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
883 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl)
884 br i1 %cond, label %if, label %if.end
887 ; Deliberately change vtype - this could be an unknown call, but the broader
888 ; code quality is distractingly bad
889 tail call i64 @llvm.riscv.vsetvlimax.i64(i64 2, i64 1)
893 %b = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %y, i64 %vl)
894 ret <vscale x 2 x i32> %b
897 define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b, ptr %p1, <vscale x 1 x float> %c, ptr %p2) {
898 ; CHECK-LABEL: compat_store_consistency:
899 ; CHECK: # %bb.0: # %entry
900 ; CHECK-NEXT: andi a0, a0, 1
901 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
902 ; CHECK-NEXT: vfadd.vv v8, v8, v9
903 ; CHECK-NEXT: vs1r.v v8, (a1)
904 ; CHECK-NEXT: beqz a0, .LBB20_2
905 ; CHECK-NEXT: # %bb.1: # %if.then
906 ; CHECK-NEXT: vse32.v v10, (a2)
907 ; CHECK-NEXT: .LBB20_2: # %if.end
910 %res = fadd <vscale x 1 x double> %a, %b
911 store <vscale x 1 x double> %res, ptr %p1
912 br i1 %cond, label %if.then, label %if.end
914 if.then: ; preds = %entry
915 store <vscale x 1 x float> %c, ptr %p2
918 if.end: ; preds = %if.else, %if.then
919 ret <vscale x 1 x double> %res
922 ; Next two tests (which are the same except for swapped block order), make sure that the
923 ; demanded reasoning around vmv.s.x correctly handles a forward state with only a valid
924 ; SEWLMULRatio. We previously had a crash bug in this case.
925 define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nounwind {
926 ; CHECK-LABEL: test_ratio_only_vmv_s_x:
927 ; CHECK: # %bb.0: # %entry
928 ; CHECK-NEXT: andi a2, a2, 1
929 ; CHECK-NEXT: beqz a2, .LBB21_2
930 ; CHECK-NEXT: # %bb.1: # %if
931 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
932 ; CHECK-NEXT: vle16.v v9, (a1)
933 ; CHECK-NEXT: vwcvt.x.x.v v8, v9
934 ; CHECK-NEXT: j .LBB21_3
935 ; CHECK-NEXT: .LBB21_2:
936 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
937 ; CHECK-NEXT: vle32.v v8, (a0)
938 ; CHECK-NEXT: .LBB21_3: # %if.end
939 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
940 ; CHECK-NEXT: vmv.s.x v8, zero
943 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2)
944 br i1 %cond, label %if, label %if.end
947 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2)
948 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2)
952 %d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
953 %e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0
954 ret <vscale x 2 x i32> %e
957 define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) nounwind {
958 ; CHECK-LABEL: test_ratio_only_vmv_s_x2:
959 ; CHECK: # %bb.0: # %entry
960 ; CHECK-NEXT: andi a2, a2, 1
961 ; CHECK-NEXT: beqz a2, .LBB22_2
962 ; CHECK-NEXT: # %bb.1: # %if
963 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
964 ; CHECK-NEXT: vle32.v v8, (a0)
965 ; CHECK-NEXT: j .LBB22_3
966 ; CHECK-NEXT: .LBB22_2:
967 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
968 ; CHECK-NEXT: vle16.v v9, (a1)
969 ; CHECK-NEXT: vwcvt.x.x.v v8, v9
970 ; CHECK-NEXT: .LBB22_3: # %if.end
971 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
972 ; CHECK-NEXT: vmv.s.x v8, zero
975 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2)
976 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2)
977 br i1 %cond, label %if, label %if.end
980 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2)
984 %d = phi <vscale x 2 x i32> [ %a, %if ], [ %c, %entry ]
985 %e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0
986 ret <vscale x 2 x i32> %e
989 ; This case demonstrates a PRE case where the first instruction in the block
990 ; doesn't require a state transition.
991 define void @pre_over_vle(ptr %A) {
992 ; CHECK-LABEL: pre_over_vle:
993 ; CHECK: # %bb.0: # %entry
994 ; CHECK-NEXT: addi a1, a0, 800
995 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
996 ; CHECK-NEXT: .LBB23_1: # %vector.body
997 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
998 ; CHECK-NEXT: vle8.v v8, (a0)
999 ; CHECK-NEXT: vsext.vf4 v9, v8
1000 ; CHECK-NEXT: vse32.v v9, (a0)
1001 ; CHECK-NEXT: addi a0, a0, 8
1002 ; CHECK-NEXT: bne a0, a1, .LBB23_1
1003 ; CHECK-NEXT: # %bb.2: # %exit
1006 br label %vector.body
1009 %iv = phi i64 [ 0, %entry], [%iv.next, %vector.body]
1010 %addr = getelementptr inbounds <2 x i32>, ptr %A, i64 %iv
1011 %v = load <2 x i8>, ptr %addr
1012 %v2 = sext <2 x i8> %v to <2 x i32>
1013 store <2 x i32> %v2, ptr %addr
1014 %iv.next = add i64 %iv, 1
1015 %cmp = icmp ne i64 %iv.next, 100
1016 br i1 %cmp, label %vector.body, label %exit
1021 declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
1022 declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64)
1023 declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
1024 declare void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64)
1025 declare <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(
1033 ; Normally a pseudo's AVL is already live in its block, so it will already be
1034 ; live where we're inserting the vsetvli, before the pseudo. In some cases the
1035 ; AVL can be from a predecessor block, so make sure we extend its live range
1037 define <vscale x 2 x i32> @cross_block_avl_extend(i64 %avl, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
1038 ; CHECK-LABEL: cross_block_avl_extend:
1039 ; CHECK: # %bb.0: # %entry
1040 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1041 ; CHECK-NEXT: vadd.vv v9, v8, v9
1042 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1043 ; CHECK-NEXT: vadd.vv v8, v8, v9
1046 ; Get the output vl from a vsetvli
1047 %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 0)
1048 ; Force a vsetvli toggle so we need to insert a new vsetvli in exit
1049 %d = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b, i64 1)
1052 ; The use of the vl from the vsetvli will be replaced with its %avl because
1053 ; VLMAX is the same. So %avl, which was previously only live in %entry, will
1054 ; need to be extended down toe %exit.
1055 %c = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
1056 ret <vscale x 2 x i32> %c
1059 define void @cross_block_avl_extend_backwards(i1 %cond, <vscale x 8 x i8> %v, ptr %p, i64 %avl) {
1060 ; CHECK-LABEL: cross_block_avl_extend_backwards:
1061 ; CHECK: # %bb.0: # %entry
1062 ; CHECK-NEXT: andi a0, a0, 1
1063 ; CHECK-NEXT: beqz a0, .LBB25_2
1064 ; CHECK-NEXT: # %bb.1: # %exit
1066 ; CHECK-NEXT: .LBB25_2: # %bar
1067 ; CHECK-NEXT: addi a2, a2, 1
1068 ; CHECK-NEXT: .LBB25_3: # %foo
1069 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1070 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1071 ; CHECK-NEXT: vse8.v v8, (a1)
1072 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
1073 ; CHECK-NEXT: vse8.v v8, (a1)
1074 ; CHECK-NEXT: j .LBB25_3
1076 br i1 %cond, label %exit, label %bar
1079 call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 1)
1080 ; %add's LiveRange needs to be extended backwards to here.
1081 call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 %add)
1086 %add = add i64 %avl, 1
1090 define void @vlmax_avl_phi(i1 %cmp, ptr %p, i64 %a, i64 %b) {
1091 ; CHECK-LABEL: vlmax_avl_phi:
1092 ; CHECK: # %bb.0: # %entry
1093 ; CHECK-NEXT: andi a0, a0, 1
1094 ; CHECK-NEXT: beqz a0, .LBB26_2
1095 ; CHECK-NEXT: # %bb.1: # %foo
1096 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
1097 ; CHECK-NEXT: j .LBB26_3
1098 ; CHECK-NEXT: .LBB26_2: # %bar
1099 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
1100 ; CHECK-NEXT: .LBB26_3: # %exit
1101 ; CHECK-NEXT: vmv.v.i v8, 0
1102 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1103 ; CHECK-NEXT: vse8.v v8, (a1)
1106 br i1 %cmp, label %foo, label %bar
1109 %vl.foo = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a, i64 0, i64 0)
1113 %vl.bar = tail call i64 @llvm.riscv.vsetvli.i64(i64 %b, i64 0, i64 0)
1117 %phivl = phi i64 [ %vl.foo, %foo ], [ %vl.bar, %bar ]
1118 %1 = tail call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 0, i64 %phivl)
1119 call void @llvm.riscv.vse.nxv8i8(<vscale x 8 x i8> %1, ptr %p, i64 1)
1123 ; Check that if we forward an AVL whose value is clobbered in its LiveInterval
1124 ; we emit a copy instead.
1125 define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v, i1 %cmp) {
1126 ; CHECK-LABEL: clobbered_forwarded_avl:
1127 ; CHECK: # %bb.0: # %entry
1128 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1129 ; CHECK-NEXT: andi a1, a1, 1
1130 ; CHECK-NEXT: .LBB27_1: # %for.body
1131 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1132 ; CHECK-NEXT: addi a0, a0, 1
1133 ; CHECK-NEXT: bnez a1, .LBB27_1
1134 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
1135 ; CHECK-NEXT: vadd.vv v10, v8, v8
1136 ; CHECK-NEXT: vadd.vv v8, v10, v8
1139 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 1)
1143 ; Use %n in a PHI here so its virtual register is assigned to a second time here.
1144 %1 = phi i64 [ %3, %for.body ], [ %n, %entry ]
1145 %2 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %1, i64 0, i64 0)
1147 br i1 %cmp, label %for.body, label %for.cond.cleanup
1150 %4 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %v, <vscale x 4 x i32> %v, i64 -1)
1151 ; VL toggle needed here: If the %n AVL was forwarded here we wouldn't be able
1152 ; to extend it's LiveInterval because it would clobber the assignment at %1.
1153 %5 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %4, <vscale x 4 x i32> %v, i64 %0)
1154 ret <vscale x 4 x i32> %5