1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
3 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
4 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
5 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
6 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
8 declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
13 declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
19 ; Test same rounding mode in one block.
20 define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
22 ; CHECK: # %bb.0: # %entry
23 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
24 ; CHECK-NEXT: csrwi vxrm, 0
25 ; CHECK-NEXT: vaadd.vv v8, v8, v9
26 ; CHECK-NEXT: vaadd.vv v8, v8, v10
29 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
30 <vscale x 1 x i8> undef,
34 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
35 <vscale x 1 x i8> undef,
40 ret <vscale x 1 x i8> %b
43 ; Test different rounding mode.
44 define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
46 ; CHECK: # %bb.0: # %entry
47 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
48 ; CHECK-NEXT: csrwi vxrm, 2
49 ; CHECK-NEXT: vaadd.vv v8, v8, v9
50 ; CHECK-NEXT: csrwi vxrm, 0
51 ; CHECK-NEXT: vaadd.vv v8, v8, v10
54 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
55 <vscale x 1 x i8> undef,
59 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
60 <vscale x 1 x i8> undef,
65 ret <vscale x 1 x i8> %b
68 declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
70 ; Test same vxrm with call in between which may invalidate vxrm.
71 define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
73 ; RV32: # %bb.0: # %entry
74 ; RV32-NEXT: addi sp, sp, -32
75 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
76 ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
77 ; RV32-NEXT: csrr a1, vlenb
78 ; RV32-NEXT: slli a1, a1, 1
79 ; RV32-NEXT: sub sp, sp, a1
80 ; RV32-NEXT: mv s0, a0
81 ; RV32-NEXT: addi a1, sp, 16
82 ; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
83 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
84 ; RV32-NEXT: csrwi vxrm, 0
85 ; RV32-NEXT: vaadd.vv v8, v8, v9
87 ; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
88 ; RV32-NEXT: csrwi vxrm, 0
89 ; RV32-NEXT: addi a0, sp, 16
90 ; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
91 ; RV32-NEXT: vaadd.vv v8, v8, v9
92 ; RV32-NEXT: csrr a0, vlenb
93 ; RV32-NEXT: slli a0, a0, 1
94 ; RV32-NEXT: add sp, sp, a0
95 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
96 ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
97 ; RV32-NEXT: addi sp, sp, 32
101 ; RV64: # %bb.0: # %entry
102 ; RV64-NEXT: addi sp, sp, -32
103 ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
104 ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
105 ; RV64-NEXT: csrr a1, vlenb
106 ; RV64-NEXT: slli a1, a1, 1
107 ; RV64-NEXT: sub sp, sp, a1
108 ; RV64-NEXT: mv s0, a0
109 ; RV64-NEXT: addi a1, sp, 16
110 ; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
111 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
112 ; RV64-NEXT: csrwi vxrm, 0
113 ; RV64-NEXT: vaadd.vv v8, v8, v9
114 ; RV64-NEXT: call foo
115 ; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
116 ; RV64-NEXT: csrwi vxrm, 0
117 ; RV64-NEXT: addi a0, sp, 16
118 ; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
119 ; RV64-NEXT: vaadd.vv v8, v8, v9
120 ; RV64-NEXT: csrr a0, vlenb
121 ; RV64-NEXT: slli a0, a0, 1
122 ; RV64-NEXT: add sp, sp, a0
123 ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
124 ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
125 ; RV64-NEXT: addi sp, sp, 32
128 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
129 <vscale x 1 x i8> undef,
130 <vscale x 1 x i8> %0,
131 <vscale x 1 x i8> %1,
133 %b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
134 %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
135 <vscale x 1 x i8> undef,
136 <vscale x 1 x i8> %b,
137 <vscale x 1 x i8> %2,
140 ret <vscale x 1 x i8> %c
143 ; Test same vxrm with asm in between which may invalidate vxrm.
144 define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
145 ; CHECK-LABEL: test4:
146 ; CHECK: # %bb.0: # %entry
147 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
148 ; CHECK-NEXT: csrwi vxrm, 0
149 ; CHECK-NEXT: vaadd.vv v8, v8, v9
151 ; CHECK-NEXT: #NO_APP
152 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
153 ; CHECK-NEXT: csrwi vxrm, 0
154 ; CHECK-NEXT: vaadd.vv v8, v8, v10
157 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
158 <vscale x 1 x i8> undef,
159 <vscale x 1 x i8> %0,
160 <vscale x 1 x i8> %1,
162 %b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
163 %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
164 <vscale x 1 x i8> undef,
165 <vscale x 1 x i8> %b,
166 <vscale x 1 x i8> %2,
169 ret <vscale x 1 x i8> %c
172 ; Test same rounding mode in triangle.
173 define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
174 ; CHECK-LABEL: test5:
175 ; CHECK: # %bb.0: # %entry
176 ; CHECK-NEXT: andi a1, a1, 1
177 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
178 ; CHECK-NEXT: csrwi vxrm, 0
179 ; CHECK-NEXT: vaadd.vv v8, v8, v9
180 ; CHECK-NEXT: beqz a1, .LBB4_2
181 ; CHECK-NEXT: # %bb.1: # %condblock
182 ; CHECK-NEXT: vaadd.vv v8, v8, v10
183 ; CHECK-NEXT: .LBB4_2: # %mergeblock
186 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
187 <vscale x 1 x i8> undef,
188 <vscale x 1 x i8> %0,
189 <vscale x 1 x i8> %1,
191 br i1 %cond, label %condblock, label %mergeblock
194 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
195 <vscale x 1 x i8> undef,
196 <vscale x 1 x i8> %a,
197 <vscale x 1 x i8> %2,
202 %c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
204 ret <vscale x 1 x i8> %c
207 ; Test same rounding mode in diamond with no dominating vxrm.
208 define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
209 ; CHECK-LABEL: test6:
210 ; CHECK: # %bb.0: # %entry
211 ; CHECK-NEXT: andi a1, a1, 1
212 ; CHECK-NEXT: csrwi vxrm, 0
213 ; CHECK-NEXT: beqz a1, .LBB5_2
214 ; CHECK-NEXT: # %bb.1: # %trueblock
215 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
216 ; CHECK-NEXT: vaadd.vv v8, v8, v9
218 ; CHECK-NEXT: .LBB5_2: # %falseblock
219 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
220 ; CHECK-NEXT: vaadd.vv v8, v8, v10
223 br i1 %cond, label %trueblock, label %falseblock
226 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
227 <vscale x 1 x i8> undef,
228 <vscale x 1 x i8> %0,
229 <vscale x 1 x i8> %1,
234 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
235 <vscale x 1 x i8> undef,
236 <vscale x 1 x i8> %0,
237 <vscale x 1 x i8> %2,
242 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
244 ret <vscale x 1 x i8> %c
247 ; Test same rounding mode in diamond with same dominating vxrm.
248 define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
249 ; CHECK-LABEL: test7:
250 ; CHECK: # %bb.0: # %entry
251 ; CHECK-NEXT: andi a1, a1, 1
252 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
253 ; CHECK-NEXT: csrwi vxrm, 0
254 ; CHECK-NEXT: vaadd.vv v8, v8, v9
255 ; CHECK-NEXT: beqz a1, .LBB6_2
256 ; CHECK-NEXT: # %bb.1: # %trueblock
257 ; CHECK-NEXT: vaadd.vv v8, v8, v10
259 ; CHECK-NEXT: .LBB6_2: # %falseblock
260 ; CHECK-NEXT: vasub.vv v8, v8, v10
263 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
264 <vscale x 1 x i8> undef,
265 <vscale x 1 x i8> %0,
266 <vscale x 1 x i8> %1,
268 br i1 %cond, label %trueblock, label %falseblock
271 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
272 <vscale x 1 x i8> undef,
273 <vscale x 1 x i8> %a,
274 <vscale x 1 x i8> %2,
279 %c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
280 <vscale x 1 x i8> undef,
281 <vscale x 1 x i8> %a,
282 <vscale x 1 x i8> %2,
287 %d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
289 ret <vscale x 1 x i8> %d
292 ; Test same rounding mode in diamond with same vxrm at merge.
293 define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
294 ; CHECK-LABEL: test8:
295 ; CHECK: # %bb.0: # %entry
296 ; CHECK-NEXT: andi a1, a1, 1
297 ; CHECK-NEXT: csrwi vxrm, 0
298 ; CHECK-NEXT: beqz a1, .LBB7_2
299 ; CHECK-NEXT: # %bb.1: # %trueblock
300 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
301 ; CHECK-NEXT: vaadd.vv v8, v8, v9
302 ; CHECK-NEXT: vaadd.vv v8, v8, v10
304 ; CHECK-NEXT: .LBB7_2: # %falseblock
305 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
306 ; CHECK-NEXT: vasub.vv v8, v8, v9
307 ; CHECK-NEXT: vaadd.vv v8, v8, v10
310 br i1 %cond, label %trueblock, label %falseblock
313 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
314 <vscale x 1 x i8> undef,
315 <vscale x 1 x i8> %0,
316 <vscale x 1 x i8> %1,
321 %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
322 <vscale x 1 x i8> undef,
323 <vscale x 1 x i8> %0,
324 <vscale x 1 x i8> %1,
329 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
330 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
331 <vscale x 1 x i8> undef,
332 <vscale x 1 x i8> %c,
333 <vscale x 1 x i8> %2,
336 ret <vscale x 1 x i8> %d
339 ; Test same rounding mode in diamond with different vxrm at merge.
340 define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
341 ; CHECK-LABEL: test9:
342 ; CHECK: # %bb.0: # %entry
343 ; CHECK-NEXT: andi a1, a1, 1
344 ; CHECK-NEXT: csrwi vxrm, 0
345 ; CHECK-NEXT: beqz a1, .LBB8_2
346 ; CHECK-NEXT: # %bb.1: # %trueblock
347 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
348 ; CHECK-NEXT: vaadd.vv v8, v8, v9
349 ; CHECK-NEXT: j .LBB8_3
350 ; CHECK-NEXT: .LBB8_2: # %falseblock
351 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
352 ; CHECK-NEXT: vasub.vv v8, v8, v9
353 ; CHECK-NEXT: .LBB8_3: # %mergeblock
354 ; CHECK-NEXT: csrwi vxrm, 2
355 ; CHECK-NEXT: vaadd.vv v8, v8, v10
358 br i1 %cond, label %trueblock, label %falseblock
361 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
362 <vscale x 1 x i8> undef,
363 <vscale x 1 x i8> %0,
364 <vscale x 1 x i8> %1,
369 %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
370 <vscale x 1 x i8> undef,
371 <vscale x 1 x i8> %0,
372 <vscale x 1 x i8> %1,
377 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
378 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
379 <vscale x 1 x i8> undef,
380 <vscale x 1 x i8> %c,
381 <vscale x 1 x i8> %2,
384 ret <vscale x 1 x i8> %d
387 ; Test loop with no dominating vxrm write.
388 define void @test10(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) {
389 ; CHECK-LABEL: test10:
390 ; CHECK: # %bb.0: # %entry
391 ; CHECK-NEXT: beqz a3, .LBB9_3
392 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
393 ; CHECK-NEXT: csrwi vxrm, 2
394 ; CHECK-NEXT: .LBB9_2: # %for.body
395 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
396 ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
397 ; CHECK-NEXT: vle8.v v8, (a1)
398 ; CHECK-NEXT: vle8.v v9, (a2)
399 ; CHECK-NEXT: vaadd.vv v8, v8, v9
400 ; CHECK-NEXT: sub a3, a3, a4
401 ; CHECK-NEXT: vse8.v v8, (a0)
402 ; CHECK-NEXT: bnez a3, .LBB9_2
403 ; CHECK-NEXT: .LBB9_3: # %for.end
406 %tobool.not9 = icmp eq iXLen %n, 0
407 br i1 %tobool.not9, label %for.end, label %for.body
410 %n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
411 %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
412 %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
413 %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
414 %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
415 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
416 %sub = sub iXLen %n.addr.011, %vl
417 %tobool.not = icmp eq iXLen %sub, 0
418 br i1 %tobool.not, label %for.end, label %for.body
424 declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
425 declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen)
426 declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen)
428 ; Test loop with dominating vxrm write. Make sure there is no write in the loop.
429 define void @test11(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) {
430 ; CHECK-LABEL: test11:
431 ; CHECK: # %bb.0: # %entry
432 ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
433 ; CHECK-NEXT: vle8.v v8, (a1)
434 ; CHECK-NEXT: vle8.v v9, (a2)
435 ; CHECK-NEXT: csrwi vxrm, 2
436 ; CHECK-NEXT: .LBB10_1: # %for.body
437 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
438 ; CHECK-NEXT: vaadd.vv v8, v8, v9
439 ; CHECK-NEXT: sub a3, a3, a4
440 ; CHECK-NEXT: vse8.v v8, (a0)
441 ; CHECK-NEXT: beqz a3, .LBB10_3
442 ; CHECK-NEXT: # %bb.2: # %for.body
443 ; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1
444 ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
445 ; CHECK-NEXT: vle8.v v8, (a1)
446 ; CHECK-NEXT: vle8.v v9, (a2)
447 ; CHECK-NEXT: j .LBB10_1
448 ; CHECK-NEXT: .LBB10_3: # %for.end
451 %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
452 %load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
453 %load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
454 %vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
455 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
456 %suba = sub iXLen %n, %vl
457 %tobool.not9 = icmp eq iXLen %suba, 0
458 br i1 %tobool.not9, label %for.end, label %for.body
461 %n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
462 %vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
463 %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
464 %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
465 %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
466 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
467 %sub = sub iXLen %n.addr.011, %vl2
468 %tobool.not = icmp eq iXLen %sub, 0
469 br i1 %tobool.not, label %for.end, label %for.body
475 ; The edge from entry to block2 is a critical edge. The vxrm write in block2
476 ; is redundant when coming from block1, but is needed when coming from entry.
477 ; FIXME: We could remove the write from the end of block1 without splitting the
479 define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
480 ; CHECK-LABEL: test12:
481 ; CHECK: # %bb.0: # %entry
482 ; CHECK-NEXT: andi a0, a0, 1
483 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
484 ; CHECK-NEXT: csrwi vxrm, 0
485 ; CHECK-NEXT: vaadd.vv v9, v8, v9
486 ; CHECK-NEXT: beqz a0, .LBB11_2
487 ; CHECK-NEXT: # %bb.1: # %block1
488 ; CHECK-NEXT: csrwi vxrm, 1
489 ; CHECK-NEXT: vaadd.vv v9, v8, v9
490 ; CHECK-NEXT: csrwi vxrm, 2
491 ; CHECK-NEXT: .LBB11_2: # %block2
492 ; CHECK-NEXT: csrwi vxrm, 2
493 ; CHECK-NEXT: vaadd.vv v8, v8, v9
496 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
497 br i1 %c1, label %block1, label %block2
500 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
504 %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
505 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
506 ret <vscale x 1 x i8> %d
509 ; Similar to test12, but introduces a second critical edge from block1 to
510 ; block3. Now the write to vxrm at the end of block1, can't be removed because
511 ; it is needed by block3.
512 define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
513 ; CHECK-LABEL: test13:
514 ; CHECK: # %bb.0: # %entry
515 ; CHECK-NEXT: andi a0, a0, 1
516 ; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
517 ; CHECK-NEXT: csrwi vxrm, 0
518 ; CHECK-NEXT: vaadd.vv v10, v8, v9
519 ; CHECK-NEXT: beqz a0, .LBB12_2
520 ; CHECK-NEXT: # %bb.1: # %block1
521 ; CHECK-NEXT: csrwi vxrm, 1
522 ; CHECK-NEXT: vaadd.vv v10, v8, v10
523 ; CHECK-NEXT: andi a1, a1, 1
524 ; CHECK-NEXT: csrwi vxrm, 2
525 ; CHECK-NEXT: beqz a1, .LBB12_3
526 ; CHECK-NEXT: .LBB12_2: # %block2
527 ; CHECK-NEXT: csrwi vxrm, 2
528 ; CHECK-NEXT: vaadd.vv v8, v8, v10
530 ; CHECK-NEXT: .LBB12_3: # %block3
531 ; CHECK-NEXT: vaadd.vv v8, v9, v10
534 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
535 br i1 %c1, label %block1, label %block2
538 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
539 br i1 %c2, label %block2, label %block3
542 %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
543 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
544 ret <vscale x 1 x i8> %d
547 %e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
548 ret <vscale x 1 x i8> %e