1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
3 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
4 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
5 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
6 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
8 declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
13 declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
19 ; Test same rounding mode in one block.
20 define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
22 ; CHECK: # %bb.0: # %entry
23 ; CHECK-NEXT: csrwi vxrm, 0
24 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
25 ; CHECK-NEXT: vaadd.vv v8, v8, v9
26 ; CHECK-NEXT: vaadd.vv v8, v8, v10
29 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
30 <vscale x 1 x i8> undef,
34 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
35 <vscale x 1 x i8> undef,
40 ret <vscale x 1 x i8> %b
43 ; Test different rounding mode.
44 define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
46 ; CHECK: # %bb.0: # %entry
47 ; CHECK-NEXT: csrwi vxrm, 2
48 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
49 ; CHECK-NEXT: vaadd.vv v8, v8, v9
50 ; CHECK-NEXT: csrwi vxrm, 0
51 ; CHECK-NEXT: vaadd.vv v8, v8, v10
54 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
55 <vscale x 1 x i8> undef,
59 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
60 <vscale x 1 x i8> undef,
65 ret <vscale x 1 x i8> %b
68 declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>)
70 ; Test same vxrm with call in between which may invalidate vxrm.
71 define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
73 ; RV32: # %bb.0: # %entry
74 ; RV32-NEXT: addi sp, sp, -32
75 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
76 ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
77 ; RV32-NEXT: csrr a1, vlenb
78 ; RV32-NEXT: sub sp, sp, a1
79 ; RV32-NEXT: mv s0, a0
80 ; RV32-NEXT: addi a1, sp, 16
81 ; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
82 ; RV32-NEXT: csrwi vxrm, 0
83 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
84 ; RV32-NEXT: vaadd.vv v8, v8, v9
86 ; RV32-NEXT: csrwi vxrm, 0
87 ; RV32-NEXT: addi a0, sp, 16
88 ; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
89 ; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
90 ; RV32-NEXT: vaadd.vv v8, v8, v9
91 ; RV32-NEXT: csrr a0, vlenb
92 ; RV32-NEXT: add sp, sp, a0
93 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
94 ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
95 ; RV32-NEXT: addi sp, sp, 32
99 ; RV64: # %bb.0: # %entry
100 ; RV64-NEXT: addi sp, sp, -32
101 ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
102 ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
103 ; RV64-NEXT: csrr a1, vlenb
104 ; RV64-NEXT: sub sp, sp, a1
105 ; RV64-NEXT: mv s0, a0
106 ; RV64-NEXT: addi a1, sp, 16
107 ; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
108 ; RV64-NEXT: csrwi vxrm, 0
109 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
110 ; RV64-NEXT: vaadd.vv v8, v8, v9
111 ; RV64-NEXT: call foo
112 ; RV64-NEXT: csrwi vxrm, 0
113 ; RV64-NEXT: addi a0, sp, 16
114 ; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
115 ; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma
116 ; RV64-NEXT: vaadd.vv v8, v8, v9
117 ; RV64-NEXT: csrr a0, vlenb
118 ; RV64-NEXT: add sp, sp, a0
119 ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
120 ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
121 ; RV64-NEXT: addi sp, sp, 32
124 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
125 <vscale x 1 x i8> undef,
126 <vscale x 1 x i8> %0,
127 <vscale x 1 x i8> %1,
129 %b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a)
130 %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
131 <vscale x 1 x i8> undef,
132 <vscale x 1 x i8> %b,
133 <vscale x 1 x i8> %2,
136 ret <vscale x 1 x i8> %c
139 ; Test same vxrm with asm in between which may invalidate vxrm.
140 define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
141 ; CHECK-LABEL: test4:
142 ; CHECK: # %bb.0: # %entry
143 ; CHECK-NEXT: csrwi vxrm, 0
144 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
145 ; CHECK-NEXT: vaadd.vv v8, v8, v9
147 ; CHECK-NEXT: #NO_APP
148 ; CHECK-NEXT: csrwi vxrm, 0
149 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
150 ; CHECK-NEXT: vaadd.vv v8, v8, v10
153 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
154 <vscale x 1 x i8> undef,
155 <vscale x 1 x i8> %0,
156 <vscale x 1 x i8> %1,
158 %b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a)
159 %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
160 <vscale x 1 x i8> undef,
161 <vscale x 1 x i8> %b,
162 <vscale x 1 x i8> %2,
165 ret <vscale x 1 x i8> %c
168 ; Test same rounding mode in triangle.
169 define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
170 ; CHECK-LABEL: test5:
171 ; CHECK: # %bb.0: # %entry
172 ; CHECK-NEXT: andi a1, a1, 1
173 ; CHECK-NEXT: csrwi vxrm, 0
174 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
175 ; CHECK-NEXT: vaadd.vv v8, v8, v9
176 ; CHECK-NEXT: beqz a1, .LBB4_2
177 ; CHECK-NEXT: # %bb.1: # %condblock
178 ; CHECK-NEXT: vaadd.vv v8, v8, v10
179 ; CHECK-NEXT: .LBB4_2: # %mergeblock
182 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
183 <vscale x 1 x i8> undef,
184 <vscale x 1 x i8> %0,
185 <vscale x 1 x i8> %1,
187 br i1 %cond, label %condblock, label %mergeblock
190 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
191 <vscale x 1 x i8> undef,
192 <vscale x 1 x i8> %a,
193 <vscale x 1 x i8> %2,
198 %c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock]
200 ret <vscale x 1 x i8> %c
203 ; Test same rounding mode in diamond with no dominating vxrm.
204 define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
205 ; CHECK-LABEL: test6:
206 ; CHECK: # %bb.0: # %entry
207 ; CHECK-NEXT: andi a1, a1, 1
208 ; CHECK-NEXT: csrwi vxrm, 0
209 ; CHECK-NEXT: beqz a1, .LBB5_2
210 ; CHECK-NEXT: # %bb.1: # %trueblock
211 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
212 ; CHECK-NEXT: vaadd.vv v8, v8, v9
214 ; CHECK-NEXT: .LBB5_2: # %falseblock
215 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
216 ; CHECK-NEXT: vaadd.vv v8, v8, v10
219 br i1 %cond, label %trueblock, label %falseblock
222 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
223 <vscale x 1 x i8> undef,
224 <vscale x 1 x i8> %0,
225 <vscale x 1 x i8> %1,
230 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
231 <vscale x 1 x i8> undef,
232 <vscale x 1 x i8> %0,
233 <vscale x 1 x i8> %2,
238 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
240 ret <vscale x 1 x i8> %c
243 ; Test same rounding mode in diamond with same dominating vxrm.
244 define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
245 ; CHECK-LABEL: test7:
246 ; CHECK: # %bb.0: # %entry
247 ; CHECK-NEXT: andi a1, a1, 1
248 ; CHECK-NEXT: csrwi vxrm, 0
249 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
250 ; CHECK-NEXT: vaadd.vv v8, v8, v9
251 ; CHECK-NEXT: beqz a1, .LBB6_2
252 ; CHECK-NEXT: # %bb.1: # %trueblock
253 ; CHECK-NEXT: vaadd.vv v8, v8, v10
255 ; CHECK-NEXT: .LBB6_2: # %falseblock
256 ; CHECK-NEXT: vasub.vv v8, v8, v10
259 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
260 <vscale x 1 x i8> undef,
261 <vscale x 1 x i8> %0,
262 <vscale x 1 x i8> %1,
264 br i1 %cond, label %trueblock, label %falseblock
267 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
268 <vscale x 1 x i8> undef,
269 <vscale x 1 x i8> %a,
270 <vscale x 1 x i8> %2,
275 %c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
276 <vscale x 1 x i8> undef,
277 <vscale x 1 x i8> %a,
278 <vscale x 1 x i8> %2,
283 %d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock]
285 ret <vscale x 1 x i8> %d
288 ; Test same rounding mode in diamond with same vxrm at merge.
289 define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
290 ; CHECK-LABEL: test8:
291 ; CHECK: # %bb.0: # %entry
292 ; CHECK-NEXT: andi a1, a1, 1
293 ; CHECK-NEXT: csrwi vxrm, 0
294 ; CHECK-NEXT: beqz a1, .LBB7_2
295 ; CHECK-NEXT: # %bb.1: # %trueblock
296 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
297 ; CHECK-NEXT: vaadd.vv v8, v8, v9
298 ; CHECK-NEXT: vaadd.vv v8, v8, v10
300 ; CHECK-NEXT: .LBB7_2: # %falseblock
301 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
302 ; CHECK-NEXT: vasub.vv v8, v8, v9
303 ; CHECK-NEXT: vaadd.vv v8, v8, v10
306 br i1 %cond, label %trueblock, label %falseblock
309 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
310 <vscale x 1 x i8> undef,
311 <vscale x 1 x i8> %0,
312 <vscale x 1 x i8> %1,
317 %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
318 <vscale x 1 x i8> undef,
319 <vscale x 1 x i8> %0,
320 <vscale x 1 x i8> %1,
325 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
326 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
327 <vscale x 1 x i8> undef,
328 <vscale x 1 x i8> %c,
329 <vscale x 1 x i8> %2,
332 ret <vscale x 1 x i8> %d
335 ; Test same rounding mode in diamond with different vxrm at merge.
336 define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind {
337 ; CHECK-LABEL: test9:
338 ; CHECK: # %bb.0: # %entry
339 ; CHECK-NEXT: andi a1, a1, 1
340 ; CHECK-NEXT: csrwi vxrm, 0
341 ; CHECK-NEXT: beqz a1, .LBB8_2
342 ; CHECK-NEXT: # %bb.1: # %trueblock
343 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
344 ; CHECK-NEXT: vaadd.vv v8, v8, v9
345 ; CHECK-NEXT: j .LBB8_3
346 ; CHECK-NEXT: .LBB8_2: # %falseblock
347 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
348 ; CHECK-NEXT: vasub.vv v8, v8, v9
349 ; CHECK-NEXT: .LBB8_3: # %mergeblock
350 ; CHECK-NEXT: csrwi vxrm, 2
351 ; CHECK-NEXT: vaadd.vv v8, v8, v10
354 br i1 %cond, label %trueblock, label %falseblock
357 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
358 <vscale x 1 x i8> undef,
359 <vscale x 1 x i8> %0,
360 <vscale x 1 x i8> %1,
365 %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
366 <vscale x 1 x i8> undef,
367 <vscale x 1 x i8> %0,
368 <vscale x 1 x i8> %1,
373 %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock]
374 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
375 <vscale x 1 x i8> undef,
376 <vscale x 1 x i8> %c,
377 <vscale x 1 x i8> %2,
380 ret <vscale x 1 x i8> %d
383 ; Test loop with no dominating vxrm write.
384 define void @test10(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
385 ; CHECK-LABEL: test10:
386 ; CHECK: # %bb.0: # %entry
387 ; CHECK-NEXT: beqz a3, .LBB9_3
388 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
389 ; CHECK-NEXT: csrwi vxrm, 2
390 ; CHECK-NEXT: .LBB9_2: # %for.body
391 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
392 ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
393 ; CHECK-NEXT: vle8.v v8, (a1)
394 ; CHECK-NEXT: vle8.v v9, (a2)
395 ; CHECK-NEXT: vaadd.vv v8, v8, v9
396 ; CHECK-NEXT: sub a3, a3, a4
397 ; CHECK-NEXT: vse8.v v8, (a0)
398 ; CHECK-NEXT: bnez a3, .LBB9_2
399 ; CHECK-NEXT: .LBB9_3: # %for.end
402 %tobool.not9 = icmp eq iXLen %n, 0
403 br i1 %tobool.not9, label %for.end, label %for.body
406 %n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ]
407 %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
408 %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
409 %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
410 %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl)
411 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl)
412 %sub = sub iXLen %n.addr.011, %vl
413 %tobool.not = icmp eq iXLen %sub, 0
414 br i1 %tobool.not, label %for.end, label %for.body
420 declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg)
421 declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
422 declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, ptr nocapture, iXLen)
424 ; Test loop with dominating vxrm write. Make sure there is no write in the loop.
425 define void @test11(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, ptr nocapture readonly %ptr_op2, iXLen %n) {
426 ; CHECK-LABEL: test11:
427 ; CHECK: # %bb.0: # %entry
428 ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
429 ; CHECK-NEXT: vle8.v v8, (a1)
430 ; CHECK-NEXT: vle8.v v9, (a2)
431 ; CHECK-NEXT: csrwi vxrm, 2
432 ; CHECK-NEXT: .LBB10_1: # %for.body
433 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
434 ; CHECK-NEXT: vaadd.vv v8, v8, v9
435 ; CHECK-NEXT: sub a3, a3, a4
436 ; CHECK-NEXT: vse8.v v8, (a0)
437 ; CHECK-NEXT: beqz a3, .LBB10_3
438 ; CHECK-NEXT: # %bb.2: # %for.body
439 ; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1
440 ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma
441 ; CHECK-NEXT: vle8.v v8, (a1)
442 ; CHECK-NEXT: vle8.v v9, (a2)
443 ; CHECK-NEXT: j .LBB10_1
444 ; CHECK-NEXT: .LBB10_3: # %for.end
447 %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5)
448 %load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl)
449 %load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl)
450 %vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl)
451 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl)
452 %suba = sub iXLen %n, %vl
453 %tobool.not9 = icmp eq iXLen %suba, 0
454 br i1 %tobool.not9, label %for.end, label %for.body
457 %n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ]
458 %vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5)
459 %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2)
460 %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2)
461 %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2)
462 tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2)
463 %sub = sub iXLen %n.addr.011, %vl2
464 %tobool.not = icmp eq iXLen %sub, 0
465 br i1 %tobool.not, label %for.end, label %for.body
471 ; The edge from entry to block2 is a critical edge. The vxrm write in block2
472 ; is redundant when coming from block1, but is needed when coming from entry.
473 ; FIXME: We could remove the write from the end of block1 without splitting the
475 define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
476 ; CHECK-LABEL: test12:
477 ; CHECK: # %bb.0: # %entry
478 ; CHECK-NEXT: andi a0, a0, 1
479 ; CHECK-NEXT: csrwi vxrm, 0
480 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
481 ; CHECK-NEXT: vaadd.vv v9, v8, v9
482 ; CHECK-NEXT: beqz a0, .LBB11_2
483 ; CHECK-NEXT: # %bb.1: # %block1
484 ; CHECK-NEXT: csrwi vxrm, 1
485 ; CHECK-NEXT: vaadd.vv v9, v8, v9
486 ; CHECK-NEXT: csrwi vxrm, 2
487 ; CHECK-NEXT: .LBB11_2: # %block2
488 ; CHECK-NEXT: csrwi vxrm, 2
489 ; CHECK-NEXT: vaadd.vv v8, v8, v9
492 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
493 br i1 %c1, label %block1, label %block2
496 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
500 %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
501 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
502 ret <vscale x 1 x i8> %d
505 ; Similar to test12, but introduces a second critical edge from block1 to
506 ; block3. Now the write to vxrm at the end of block1, can't be removed because
507 ; it is needed by block3.
508 define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) {
509 ; CHECK-LABEL: test13:
510 ; CHECK: # %bb.0: # %entry
511 ; CHECK-NEXT: andi a0, a0, 1
512 ; CHECK-NEXT: csrwi vxrm, 0
513 ; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
514 ; CHECK-NEXT: vaadd.vv v10, v8, v9
515 ; CHECK-NEXT: beqz a0, .LBB12_2
516 ; CHECK-NEXT: # %bb.1: # %block1
517 ; CHECK-NEXT: csrwi vxrm, 1
518 ; CHECK-NEXT: vaadd.vv v10, v8, v10
519 ; CHECK-NEXT: andi a1, a1, 1
520 ; CHECK-NEXT: csrwi vxrm, 2
521 ; CHECK-NEXT: beqz a1, .LBB12_3
522 ; CHECK-NEXT: .LBB12_2: # %block2
523 ; CHECK-NEXT: csrwi vxrm, 2
524 ; CHECK-NEXT: vaadd.vv v8, v8, v10
526 ; CHECK-NEXT: .LBB12_3: # %block3
527 ; CHECK-NEXT: vaadd.vv v8, v9, v10
530 %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl)
531 br i1 %c1, label %block1, label %block2
534 %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl)
535 br i1 %c2, label %block2, label %block3
538 %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1]
539 %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl)
540 ret <vscale x 1 x i8> %d
543 %e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl)
544 ret <vscale x 1 x i8> %e