1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
13 // This pass consists of 3 phases:
15 // Phase 1 collects how each basic block affects VL/VTYPE.
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
25 //===----------------------------------------------------------------------===//
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
35 #define DEBUG_TYPE "riscv-insert-vsetvli"
36 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
38 STATISTIC(NumInsertedVSETVL
, "Number of VSETVL inst inserted");
39 STATISTIC(NumRemovedVSETVL
, "Number of VSETVL inst removed");
41 static cl::opt
<bool> DisableInsertVSETVLPHIOpt(
42 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden
,
43 cl::desc("Disable looking through phis when inserting vsetvlis."));
45 static cl::opt
<bool> UseStrictAsserts(
46 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden
,
47 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
51 static unsigned getVLOpNum(const MachineInstr
&MI
) {
52 return RISCVII::getVLOpNum(MI
.getDesc());
55 static unsigned getSEWOpNum(const MachineInstr
&MI
) {
56 return RISCVII::getSEWOpNum(MI
.getDesc());
59 static bool isVectorConfigInstr(const MachineInstr
&MI
) {
60 return MI
.getOpcode() == RISCV::PseudoVSETVLI
||
61 MI
.getOpcode() == RISCV::PseudoVSETVLIX0
||
62 MI
.getOpcode() == RISCV::PseudoVSETIVLI
;
65 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
66 /// VL and only sets VTYPE.
67 static bool isVLPreservingConfig(const MachineInstr
&MI
) {
68 if (MI
.getOpcode() != RISCV::PseudoVSETVLIX0
)
70 assert(RISCV::X0
== MI
.getOperand(1).getReg());
71 return RISCV::X0
== MI
.getOperand(0).getReg();
74 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr
&MI
) {
75 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
84 static bool isScalarExtractInstr(const MachineInstr
&MI
) {
85 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
94 static bool isScalarInsertInstr(const MachineInstr
&MI
) {
95 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
104 static bool isScalarSplatInstr(const MachineInstr
&MI
) {
105 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
110 case RISCV::VFMV_V_F
:
115 static bool isVSlideInstr(const MachineInstr
&MI
) {
116 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
119 case RISCV::VSLIDEDOWN_VX
:
120 case RISCV::VSLIDEDOWN_VI
:
121 case RISCV::VSLIDEUP_VX
:
122 case RISCV::VSLIDEUP_VI
:
127 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
128 /// not a load or store which ignores SEW.
129 static std::optional
<unsigned> getEEWForLoadStore(const MachineInstr
&MI
) {
130 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
139 case RISCV::VLSE16_V
:
141 case RISCV::VSSE16_V
:
144 case RISCV::VLSE32_V
:
146 case RISCV::VSSE32_V
:
149 case RISCV::VLSE64_V
:
151 case RISCV::VSSE64_V
:
156 static bool isNonZeroLoadImmediate(MachineInstr
&MI
) {
157 return MI
.getOpcode() == RISCV::ADDI
&&
158 MI
.getOperand(1).isReg() && MI
.getOperand(2).isImm() &&
159 MI
.getOperand(1).getReg() == RISCV::X0
&&
160 MI
.getOperand(2).getImm() != 0;
163 /// Return true if this is an operation on mask registers. Note that
164 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
165 static bool isMaskRegOp(const MachineInstr
&MI
) {
166 if (!RISCVII::hasSEWOp(MI
.getDesc().TSFlags
))
168 const unsigned Log2SEW
= MI
.getOperand(getSEWOpNum(MI
)).getImm();
169 // A Log2SEW of 0 is an operation on mask registers only.
173 /// Return true if the inactive elements in the result are entirely undefined.
174 /// Note that this is different from "agnostic" as defined by the vector
175 /// specification. Agnostic requires each lane to either be undisturbed, or
176 /// take the value -1; no other value is allowed.
177 static bool hasUndefinedMergeOp(const MachineInstr
&MI
,
178 const MachineRegisterInfo
&MRI
) {
181 if (!MI
.isRegTiedToUseOperand(0, &UseOpIdx
))
182 // If there is no passthrough operand, then the pass through
183 // lanes are undefined.
186 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
187 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
189 const MachineOperand
&UseMO
= MI
.getOperand(UseOpIdx
);
190 if (UseMO
.getReg() == RISCV::NoRegister
)
193 if (MachineInstr
*UseMI
= MRI
.getVRegDef(UseMO
.getReg())) {
194 if (UseMI
->isImplicitDef())
197 if (UseMI
->isRegSequence()) {
198 for (unsigned i
= 1, e
= UseMI
->getNumOperands(); i
< e
; i
+= 2) {
199 MachineInstr
*SourceMI
= MRI
.getVRegDef(UseMI
->getOperand(i
).getReg());
200 if (!SourceMI
|| !SourceMI
->isImplicitDef())
209 /// Which subfields of VL or VTYPE have values we need to preserve?
210 struct DemandedFields
{
211 // Some unknown property of VL is used. If demanded, must preserve entire
214 // Only zero vs non-zero is used. If demanded, can change non-zero values.
215 bool VLZeroness
= false;
216 // What properties of SEW we need to preserve.
218 SEWEqual
= 3, // The exact value of SEW needs to be preserved.
219 SEWGreaterThanOrEqual
= 2, // SEW can be changed as long as it's greater
220 // than or equal to the original value.
221 SEWGreaterThanOrEqualAndLessThan64
=
222 1, // SEW can be changed as long as it's greater
223 // than or equal to the original value, but must be less
225 SEWNone
= 0 // We don't need to preserve SEW at all.
228 bool SEWLMULRatio
= false;
229 bool TailPolicy
= false;
230 bool MaskPolicy
= false;
232 // Return true if any part of VTYPE was used
233 bool usedVTYPE() const {
234 return SEW
|| LMUL
|| SEWLMULRatio
|| TailPolicy
|| MaskPolicy
;
237 // Return true if any property of VL was used
239 return VLAny
|| VLZeroness
;
242 // Mark all VTYPE subfields and properties as demanded
251 // Mark all VL properties as demanded
257 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
258 /// Support for debugging, callable in GDB: V->dump()
259 LLVM_DUMP_METHOD
void dump() const {
264 /// Implement operator<<.
265 void print(raw_ostream
&OS
) const {
267 OS
<< "VLAny=" << VLAny
<< ", ";
268 OS
<< "VLZeroness=" << VLZeroness
<< ", ";
274 case SEWGreaterThanOrEqual
:
275 OS
<< "SEWGreaterThanOrEqual";
277 case SEWGreaterThanOrEqualAndLessThan64
:
278 OS
<< "SEWGreaterThanOrEqualAndLessThan64";
285 OS
<< "LMUL=" << LMUL
<< ", ";
286 OS
<< "SEWLMULRatio=" << SEWLMULRatio
<< ", ";
287 OS
<< "TailPolicy=" << TailPolicy
<< ", ";
288 OS
<< "MaskPolicy=" << MaskPolicy
;
294 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
296 inline raw_ostream
&operator<<(raw_ostream
&OS
, const DemandedFields
&DF
) {
302 /// Return true if moving from CurVType to NewVType is
303 /// indistinguishable from the perspective of an instruction (or set
304 /// of instructions) which use only the Used subfields and properties.
305 static bool areCompatibleVTYPEs(uint64_t CurVType
, uint64_t NewVType
,
306 const DemandedFields
&Used
) {
308 case DemandedFields::SEWNone
:
310 case DemandedFields::SEWEqual
:
311 if (RISCVVType::getSEW(CurVType
) != RISCVVType::getSEW(NewVType
))
314 case DemandedFields::SEWGreaterThanOrEqual
:
315 if (RISCVVType::getSEW(NewVType
) < RISCVVType::getSEW(CurVType
))
318 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64
:
319 if (RISCVVType::getSEW(NewVType
) < RISCVVType::getSEW(CurVType
) ||
320 RISCVVType::getSEW(NewVType
) >= 64)
326 RISCVVType::getVLMUL(CurVType
) != RISCVVType::getVLMUL(NewVType
))
329 if (Used
.SEWLMULRatio
) {
330 auto Ratio1
= RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType
),
331 RISCVVType::getVLMUL(CurVType
));
332 auto Ratio2
= RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType
),
333 RISCVVType::getVLMUL(NewVType
));
334 if (Ratio1
!= Ratio2
)
338 if (Used
.TailPolicy
&& RISCVVType::isTailAgnostic(CurVType
) !=
339 RISCVVType::isTailAgnostic(NewVType
))
341 if (Used
.MaskPolicy
&& RISCVVType::isMaskAgnostic(CurVType
) !=
342 RISCVVType::isMaskAgnostic(NewVType
))
347 /// Return the fields and properties demanded by the provided instruction.
348 DemandedFields
getDemanded(const MachineInstr
&MI
,
349 const MachineRegisterInfo
*MRI
,
350 const RISCVSubtarget
*ST
) {
351 // Warning: This function has to work on both the lowered (i.e. post
352 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
353 // that it can't use the value of a SEW, VL, or Policy operand as they might
354 // be stale after lowering.
356 // Most instructions don't use any of these subfeilds.
358 // Start conservative if registers are used
359 if (MI
.isCall() || MI
.isInlineAsm() || MI
.readsRegister(RISCV::VL
))
361 if (MI
.isCall() || MI
.isInlineAsm() || MI
.readsRegister(RISCV::VTYPE
))
363 // Start conservative on the unlowered form too
364 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
365 if (RISCVII::hasSEWOp(TSFlags
)) {
367 if (RISCVII::hasVLOp(TSFlags
))
370 // Behavior is independent of mask policy.
371 if (!RISCVII::usesMaskPolicy(TSFlags
))
372 Res
.MaskPolicy
= false;
375 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
376 // They instead demand the ratio of the two which is used in computing
377 // EMUL, but which allows us the flexibility to change SEW and LMUL
378 // provided we don't change the ratio.
379 // Note: We assume that the instructions initial SEW is the EEW encoded
380 // in the opcode. This is asserted when constructing the VSETVLIInfo.
381 if (getEEWForLoadStore(MI
)) {
382 Res
.SEW
= DemandedFields::SEWNone
;
386 // Store instructions don't use the policy fields.
387 if (RISCVII::hasSEWOp(TSFlags
) && MI
.getNumExplicitDefs() == 0) {
388 Res
.TailPolicy
= false;
389 Res
.MaskPolicy
= false;
392 // If this is a mask reg operation, it only cares about VLMAX.
393 // TODO: Possible extensions to this logic
394 // * Probably ok if available VLMax is larger than demanded
395 // * The policy bits can probably be ignored..
396 if (isMaskRegOp(MI
)) {
397 Res
.SEW
= DemandedFields::SEWNone
;
401 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
402 if (isScalarInsertInstr(MI
)) {
404 Res
.SEWLMULRatio
= false;
406 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
407 // need to preserve any other bits and are thus compatible with any larger,
408 // etype and can disregard policy bits. Warning: It's tempting to try doing
409 // this for any tail agnostic operation, but we can't as TA requires
410 // tail lanes to either be the original value or -1. We are writing
411 // unknown bits to the lanes here.
412 if (hasUndefinedMergeOp(MI
, *MRI
)) {
413 if (isFloatScalarMoveOrScalarSplatInstr(MI
) && !ST
->hasVInstructionsF64())
414 Res
.SEW
= DemandedFields::SEWGreaterThanOrEqualAndLessThan64
;
416 Res
.SEW
= DemandedFields::SEWGreaterThanOrEqual
;
417 Res
.TailPolicy
= false;
421 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
422 if (isScalarExtractInstr(MI
)) {
423 assert(!RISCVII::hasVLOp(TSFlags
));
425 Res
.SEWLMULRatio
= false;
426 Res
.TailPolicy
= false;
427 Res
.MaskPolicy
= false;
433 /// Defines the abstract state with which the forward dataflow models the
434 /// values of the VL and VTYPE registers after insertion.
446 } State
= Uninitialized
;
448 // Fields from VTYPE.
449 RISCVII::VLMUL VLMul
= RISCVII::LMUL_1
;
451 uint8_t TailAgnostic
: 1;
452 uint8_t MaskAgnostic
: 1;
453 uint8_t SEWLMULRatioOnly
: 1;
457 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
458 SEWLMULRatioOnly(false) {}
460 static VSETVLIInfo
getUnknown() {
466 bool isValid() const { return State
!= Uninitialized
; }
467 void setUnknown() { State
= Unknown
; }
468 bool isUnknown() const { return State
== Unknown
; }
470 void setAVLReg(Register Reg
) {
475 void setAVLImm(unsigned Imm
) {
480 bool hasAVLImm() const { return State
== AVLIsImm
; }
481 bool hasAVLReg() const { return State
== AVLIsReg
; }
482 Register
getAVLReg() const {
486 unsigned getAVLImm() const {
491 void setAVL(VSETVLIInfo Info
) {
492 assert(Info
.isValid());
493 if (Info
.isUnknown())
495 else if (Info
.hasAVLReg())
496 setAVLReg(Info
.getAVLReg());
498 assert(Info
.hasAVLImm());
499 setAVLImm(Info
.getAVLImm());
503 unsigned getSEW() const { return SEW
; }
504 RISCVII::VLMUL
getVLMUL() const { return VLMul
; }
505 bool getTailAgnostic() const { return TailAgnostic
; }
506 bool getMaskAgnostic() const { return MaskAgnostic
; }
508 bool hasNonZeroAVL(const MachineRegisterInfo
&MRI
) const {
510 return getAVLImm() > 0;
512 if (getAVLReg() == RISCV::X0
)
514 if (MachineInstr
*MI
= MRI
.getVRegDef(getAVLReg());
515 MI
&& isNonZeroLoadImmediate(*MI
))
522 bool hasEquallyZeroAVL(const VSETVLIInfo
&Other
,
523 const MachineRegisterInfo
&MRI
) const {
524 if (hasSameAVL(Other
))
526 return (hasNonZeroAVL(MRI
) && Other
.hasNonZeroAVL(MRI
));
529 bool hasSameAVL(const VSETVLIInfo
&Other
) const {
530 if (hasAVLReg() && Other
.hasAVLReg())
531 return getAVLReg() == Other
.getAVLReg();
533 if (hasAVLImm() && Other
.hasAVLImm())
534 return getAVLImm() == Other
.getAVLImm();
539 void setVTYPE(unsigned VType
) {
540 assert(isValid() && !isUnknown() &&
541 "Can't set VTYPE for uninitialized or unknown");
542 VLMul
= RISCVVType::getVLMUL(VType
);
543 SEW
= RISCVVType::getSEW(VType
);
544 TailAgnostic
= RISCVVType::isTailAgnostic(VType
);
545 MaskAgnostic
= RISCVVType::isMaskAgnostic(VType
);
547 void setVTYPE(RISCVII::VLMUL L
, unsigned S
, bool TA
, bool MA
) {
548 assert(isValid() && !isUnknown() &&
549 "Can't set VTYPE for uninitialized or unknown");
556 void setVLMul(RISCVII::VLMUL VLMul
) { this->VLMul
= VLMul
; }
558 unsigned encodeVTYPE() const {
559 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly
&&
560 "Can't encode VTYPE for uninitialized or unknown");
561 return RISCVVType::encodeVTYPE(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
);
564 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly
; }
566 bool hasSameVTYPE(const VSETVLIInfo
&Other
) const {
567 assert(isValid() && Other
.isValid() &&
568 "Can't compare invalid VSETVLIInfos");
569 assert(!isUnknown() && !Other
.isUnknown() &&
570 "Can't compare VTYPE in unknown state");
571 assert(!SEWLMULRatioOnly
&& !Other
.SEWLMULRatioOnly
&&
572 "Can't compare when only LMUL/SEW ratio is valid.");
573 return std::tie(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
) ==
574 std::tie(Other
.VLMul
, Other
.SEW
, Other
.TailAgnostic
,
578 unsigned getSEWLMULRatio() const {
579 assert(isValid() && !isUnknown() &&
580 "Can't use VTYPE for uninitialized or unknown");
581 return RISCVVType::getSEWLMULRatio(SEW
, VLMul
);
584 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
585 // Note that having the same VLMAX ensures that both share the same
586 // function from AVL to VL; that is, they must produce the same VL value
587 // for any given AVL value.
588 bool hasSameVLMAX(const VSETVLIInfo
&Other
) const {
589 assert(isValid() && Other
.isValid() &&
590 "Can't compare invalid VSETVLIInfos");
591 assert(!isUnknown() && !Other
.isUnknown() &&
592 "Can't compare VTYPE in unknown state");
593 return getSEWLMULRatio() == Other
.getSEWLMULRatio();
596 bool hasCompatibleVTYPE(const DemandedFields
&Used
,
597 const VSETVLIInfo
&Require
) const {
598 return areCompatibleVTYPEs(Require
.encodeVTYPE(), encodeVTYPE(), Used
);
601 // Determine whether the vector instructions requirements represented by
602 // Require are compatible with the previous vsetvli instruction represented
603 // by this. MI is the instruction whose requirements we're considering.
604 bool isCompatible(const DemandedFields
&Used
, const VSETVLIInfo
&Require
,
605 const MachineRegisterInfo
&MRI
) const {
606 assert(isValid() && Require
.isValid() &&
607 "Can't compare invalid VSETVLIInfos");
608 assert(!Require
.SEWLMULRatioOnly
&&
609 "Expected a valid VTYPE for instruction!");
610 // Nothing is compatible with Unknown.
611 if (isUnknown() || Require
.isUnknown())
614 // If only our VLMAX ratio is valid, then this isn't compatible.
615 if (SEWLMULRatioOnly
)
618 if (Used
.VLAny
&& !hasSameAVL(Require
))
621 if (Used
.VLZeroness
&& !hasEquallyZeroAVL(Require
, MRI
))
624 return hasCompatibleVTYPE(Used
, Require
);
627 bool operator==(const VSETVLIInfo
&Other
) const {
628 // Uninitialized is only equal to another Uninitialized.
630 return !Other
.isValid();
631 if (!Other
.isValid())
634 // Unknown is only equal to another Unknown.
636 return Other
.isUnknown();
637 if (Other
.isUnknown())
640 if (!hasSameAVL(Other
))
643 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
644 if (SEWLMULRatioOnly
!= Other
.SEWLMULRatioOnly
)
647 // If only the VLMAX is valid, check that it is the same.
648 if (SEWLMULRatioOnly
)
649 return hasSameVLMAX(Other
);
651 // If the full VTYPE is valid, check that it is the same.
652 return hasSameVTYPE(Other
);
655 bool operator!=(const VSETVLIInfo
&Other
) const {
656 return !(*this == Other
);
659 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
660 // both predecessors.
661 VSETVLIInfo
intersect(const VSETVLIInfo
&Other
) const {
662 // If the new value isn't valid, ignore it.
663 if (!Other
.isValid())
666 // If this value isn't valid, this must be the first predecessor, use it.
670 // If either is unknown, the result is unknown.
671 if (isUnknown() || Other
.isUnknown())
672 return VSETVLIInfo::getUnknown();
674 // If we have an exact, match return this.
678 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
679 // return an SEW/LMUL ratio only value.
680 if (hasSameAVL(Other
) && hasSameVLMAX(Other
)) {
681 VSETVLIInfo MergeInfo
= *this;
682 MergeInfo
.SEWLMULRatioOnly
= true;
686 // Otherwise the result is unknown.
687 return VSETVLIInfo::getUnknown();
690 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
691 /// Support for debugging, callable in GDB: V->dump()
692 LLVM_DUMP_METHOD
void dump() const {
697 /// Implement operator<<.
699 void print(raw_ostream
&OS
) const {
702 OS
<< "Uninitialized";
706 OS
<< "AVLReg=" << (unsigned)AVLReg
;
708 OS
<< "AVLImm=" << (unsigned)AVLImm
;
710 << "VLMul=" << (unsigned)VLMul
<< ", "
711 << "SEW=" << (unsigned)SEW
<< ", "
712 << "TailAgnostic=" << (bool)TailAgnostic
<< ", "
713 << "MaskAgnostic=" << (bool)MaskAgnostic
<< ", "
714 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly
<< "}";
719 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
721 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VSETVLIInfo
&V
) {
728 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
729 // block. Calculated in Phase 2.
732 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
733 // blocks. Calculated in Phase 2, and used by Phase 3.
736 // Keeps track of whether the block is already in the queue.
737 bool InQueue
= false;
739 BlockData() = default;
742 class RISCVInsertVSETVLI
: public MachineFunctionPass
{
743 const RISCVSubtarget
*ST
;
744 const TargetInstrInfo
*TII
;
745 MachineRegisterInfo
*MRI
;
747 std::vector
<BlockData
> BlockInfo
;
748 std::queue
<const MachineBasicBlock
*> WorkList
;
753 RISCVInsertVSETVLI() : MachineFunctionPass(ID
) {}
754 bool runOnMachineFunction(MachineFunction
&MF
) override
;
756 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
757 AU
.setPreservesCFG();
758 MachineFunctionPass::getAnalysisUsage(AU
);
761 StringRef
getPassName() const override
{ return RISCV_INSERT_VSETVLI_NAME
; }
764 bool needVSETVLI(const MachineInstr
&MI
, const VSETVLIInfo
&Require
,
765 const VSETVLIInfo
&CurInfo
) const;
766 bool needVSETVLIPHI(const VSETVLIInfo
&Require
,
767 const MachineBasicBlock
&MBB
) const;
768 void insertVSETVLI(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
769 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
);
770 void insertVSETVLI(MachineBasicBlock
&MBB
,
771 MachineBasicBlock::iterator InsertPt
, DebugLoc DL
,
772 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
);
774 void transferBefore(VSETVLIInfo
&Info
, const MachineInstr
&MI
) const;
775 void transferAfter(VSETVLIInfo
&Info
, const MachineInstr
&MI
) const;
776 bool computeVLVTYPEChanges(const MachineBasicBlock
&MBB
,
777 VSETVLIInfo
&Info
) const;
778 void computeIncomingVLVTYPE(const MachineBasicBlock
&MBB
);
779 void emitVSETVLIs(MachineBasicBlock
&MBB
);
780 void doLocalPostpass(MachineBasicBlock
&MBB
);
781 void doPRE(MachineBasicBlock
&MBB
);
782 void insertReadVL(MachineBasicBlock
&MBB
);
785 } // end anonymous namespace
787 char RISCVInsertVSETVLI::ID
= 0;
789 INITIALIZE_PASS(RISCVInsertVSETVLI
, DEBUG_TYPE
, RISCV_INSERT_VSETVLI_NAME
,
792 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
793 // VSETIVLI instruction.
794 static VSETVLIInfo
getInfoForVSETVLI(const MachineInstr
&MI
) {
796 if (MI
.getOpcode() == RISCV::PseudoVSETIVLI
) {
797 NewInfo
.setAVLImm(MI
.getOperand(1).getImm());
799 assert(MI
.getOpcode() == RISCV::PseudoVSETVLI
||
800 MI
.getOpcode() == RISCV::PseudoVSETVLIX0
);
801 Register AVLReg
= MI
.getOperand(1).getReg();
802 assert((AVLReg
!= RISCV::X0
|| MI
.getOperand(0).getReg() != RISCV::X0
) &&
803 "Can't handle X0, X0 vsetvli yet");
804 NewInfo
.setAVLReg(AVLReg
);
806 NewInfo
.setVTYPE(MI
.getOperand(2).getImm());
811 static unsigned computeVLMAX(unsigned VLEN
, unsigned SEW
,
812 RISCVII::VLMUL VLMul
) {
813 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(VLMul
);
821 static VSETVLIInfo
computeInfoForInstr(const MachineInstr
&MI
, uint64_t TSFlags
,
822 const RISCVSubtarget
&ST
,
823 const MachineRegisterInfo
*MRI
) {
824 VSETVLIInfo InstrInfo
;
826 bool TailAgnostic
= true;
827 bool MaskAgnostic
= true;
828 if (!hasUndefinedMergeOp(MI
, *MRI
)) {
829 // Start with undisturbed.
830 TailAgnostic
= false;
831 MaskAgnostic
= false;
833 // If there is a policy operand, use it.
834 if (RISCVII::hasVecPolicyOp(TSFlags
)) {
835 const MachineOperand
&Op
= MI
.getOperand(MI
.getNumExplicitOperands() - 1);
836 uint64_t Policy
= Op
.getImm();
837 assert(Policy
<= (RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
) &&
838 "Invalid Policy Value");
839 TailAgnostic
= Policy
& RISCVII::TAIL_AGNOSTIC
;
840 MaskAgnostic
= Policy
& RISCVII::MASK_AGNOSTIC
;
843 // Some pseudo instructions force a tail agnostic policy despite having a
845 if (RISCVII::doesForceTailAgnostic(TSFlags
))
848 if (!RISCVII::usesMaskPolicy(TSFlags
))
852 RISCVII::VLMUL VLMul
= RISCVII::getLMul(TSFlags
);
854 unsigned Log2SEW
= MI
.getOperand(getSEWOpNum(MI
)).getImm();
855 // A Log2SEW of 0 is an operation on mask registers only.
856 unsigned SEW
= Log2SEW
? 1 << Log2SEW
: 8;
857 assert(RISCVVType::isValidSEW(SEW
) && "Unexpected SEW");
859 if (RISCVII::hasVLOp(TSFlags
)) {
860 const MachineOperand
&VLOp
= MI
.getOperand(getVLOpNum(MI
));
862 int64_t Imm
= VLOp
.getImm();
863 // Conver the VLMax sentintel to X0 register.
864 if (Imm
== RISCV::VLMaxSentinel
) {
865 // If we know the exact VLEN, see if we can use the constant encoding
866 // for the VLMAX instead. This reduces register pressure slightly.
867 const unsigned VLMAX
= computeVLMAX(ST
.getRealMaxVLen(), SEW
, VLMul
);
868 if (ST
.getRealMinVLen() == ST
.getRealMaxVLen() && VLMAX
<= 31)
869 InstrInfo
.setAVLImm(VLMAX
);
871 InstrInfo
.setAVLReg(RISCV::X0
);
874 InstrInfo
.setAVLImm(Imm
);
876 InstrInfo
.setAVLReg(VLOp
.getReg());
879 assert(isScalarExtractInstr(MI
));
880 InstrInfo
.setAVLReg(RISCV::NoRegister
);
883 if (std::optional
<unsigned> EEW
= getEEWForLoadStore(MI
)) {
884 assert(SEW
== EEW
&& "Initial SEW doesn't match expected EEW");
887 InstrInfo
.setVTYPE(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
);
889 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
890 // AVL operand with the AVL of the defining vsetvli. We avoid general
891 // register AVLs to avoid extending live ranges without being sure we can
892 // kill the original source reg entirely.
893 if (InstrInfo
.hasAVLReg() && InstrInfo
.getAVLReg().isVirtual()) {
894 MachineInstr
*DefMI
= MRI
->getVRegDef(InstrInfo
.getAVLReg());
895 if (DefMI
&& isVectorConfigInstr(*DefMI
)) {
896 VSETVLIInfo DefInstrInfo
= getInfoForVSETVLI(*DefMI
);
897 if (DefInstrInfo
.hasSameVLMAX(InstrInfo
) &&
898 (DefInstrInfo
.hasAVLImm() || DefInstrInfo
.getAVLReg() == RISCV::X0
)) {
899 InstrInfo
.setAVL(DefInstrInfo
);
907 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
908 const VSETVLIInfo
&Info
,
909 const VSETVLIInfo
&PrevInfo
) {
910 DebugLoc DL
= MI
.getDebugLoc();
911 insertVSETVLI(MBB
, MachineBasicBlock::iterator(&MI
), DL
, Info
, PrevInfo
);
914 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock
&MBB
,
915 MachineBasicBlock::iterator InsertPt
, DebugLoc DL
,
916 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
) {
919 if (PrevInfo
.isValid() && !PrevInfo
.isUnknown()) {
920 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
922 if (Info
.hasSameAVL(PrevInfo
) && Info
.hasSameVLMAX(PrevInfo
)) {
923 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
924 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
925 .addReg(RISCV::X0
, RegState::Kill
)
926 .addImm(Info
.encodeVTYPE())
927 .addReg(RISCV::VL
, RegState::Implicit
);
931 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
932 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
933 // same, we can use the X0, X0 form.
934 if (Info
.hasSameVLMAX(PrevInfo
) && Info
.hasAVLReg() &&
935 Info
.getAVLReg().isVirtual()) {
936 if (MachineInstr
*DefMI
= MRI
->getVRegDef(Info
.getAVLReg())) {
937 if (isVectorConfigInstr(*DefMI
)) {
938 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
939 if (DefInfo
.hasSameAVL(PrevInfo
) && DefInfo
.hasSameVLMAX(PrevInfo
)) {
940 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
941 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
942 .addReg(RISCV::X0
, RegState::Kill
)
943 .addImm(Info
.encodeVTYPE())
944 .addReg(RISCV::VL
, RegState::Implicit
);
952 if (Info
.hasAVLImm()) {
953 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETIVLI
))
954 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
955 .addImm(Info
.getAVLImm())
956 .addImm(Info
.encodeVTYPE());
960 Register AVLReg
= Info
.getAVLReg();
961 if (AVLReg
== RISCV::NoRegister
) {
962 // We can only use x0, x0 if there's no chance of the vtype change causing
963 // the previous vl to become invalid.
964 if (PrevInfo
.isValid() && !PrevInfo
.isUnknown() &&
965 Info
.hasSameVLMAX(PrevInfo
)) {
966 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
967 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
968 .addReg(RISCV::X0
, RegState::Kill
)
969 .addImm(Info
.encodeVTYPE())
970 .addReg(RISCV::VL
, RegState::Implicit
);
973 // Otherwise use an AVL of 1 to avoid depending on previous vl.
974 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETIVLI
))
975 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
977 .addImm(Info
.encodeVTYPE());
981 if (AVLReg
.isVirtual())
982 MRI
->constrainRegClass(AVLReg
, &RISCV::GPRNoX0RegClass
);
984 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
985 // opcode if the AVLReg is X0 as they have different register classes for
987 Register DestReg
= RISCV::X0
;
988 unsigned Opcode
= RISCV::PseudoVSETVLI
;
989 if (AVLReg
== RISCV::X0
) {
990 DestReg
= MRI
->createVirtualRegister(&RISCV::GPRRegClass
);
991 Opcode
= RISCV::PseudoVSETVLIX0
;
993 BuildMI(MBB
, InsertPt
, DL
, TII
->get(Opcode
))
994 .addReg(DestReg
, RegState::Define
| RegState::Dead
)
996 .addImm(Info
.encodeVTYPE());
999 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL
) {
1000 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(LMUL
);
1001 return Fractional
|| LMul
== 1;
1004 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1006 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr
&MI
,
1007 const VSETVLIInfo
&Require
,
1008 const VSETVLIInfo
&CurInfo
) const {
1009 assert(Require
== computeInfoForInstr(MI
, MI
.getDesc().TSFlags
, *ST
, MRI
));
1011 if (!CurInfo
.isValid() || CurInfo
.isUnknown() || CurInfo
.hasSEWLMULRatioOnly())
1014 DemandedFields Used
= getDemanded(MI
, MRI
, ST
);
1016 // A slidedown/slideup with an *undefined* merge op can freely clobber
1017 // elements not copied from the source vector (e.g. masked off, tail, or
1018 // slideup's prefix). Notes:
1019 // * We can't modify SEW here since the slide amount is in units of SEW.
1020 // * VL=1 is special only because we have existing support for zero vs
1021 // non-zero VL. We could generalize this if we had a VL > C predicate.
1022 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1023 // * As above, this is only legal for tail "undefined" not "agnostic".
1024 if (isVSlideInstr(MI
) && Require
.hasAVLImm() && Require
.getAVLImm() == 1 &&
1025 isLMUL1OrSmaller(CurInfo
.getVLMUL()) && hasUndefinedMergeOp(MI
, *MRI
)) {
1027 Used
.VLZeroness
= true;
1029 Used
.TailPolicy
= false;
1032 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1033 // semantically as vmv.s.x. This is particularly useful since we don't have an
1034 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1035 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1036 // increase the number of active vector registers (unlike for vmv.s.x.)
1037 if (isScalarSplatInstr(MI
) && Require
.hasAVLImm() && Require
.getAVLImm() == 1 &&
1038 isLMUL1OrSmaller(CurInfo
.getVLMUL()) && hasUndefinedMergeOp(MI
, *MRI
)) {
1040 Used
.SEWLMULRatio
= false;
1042 if (isFloatScalarMoveOrScalarSplatInstr(MI
) && !ST
->hasVInstructionsF64())
1043 Used
.SEW
= DemandedFields::SEWGreaterThanOrEqualAndLessThan64
;
1045 Used
.SEW
= DemandedFields::SEWGreaterThanOrEqual
;
1046 Used
.TailPolicy
= false;
1049 if (CurInfo
.isCompatible(Used
, Require
, *MRI
))
1052 // We didn't find a compatible value. If our AVL is a virtual register,
1053 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1054 // and the last VL/VTYPE we observed is the same, we don't need a
1056 if (Require
.hasAVLReg() && Require
.getAVLReg().isVirtual() &&
1057 CurInfo
.hasCompatibleVTYPE(Used
, Require
)) {
1058 if (MachineInstr
*DefMI
= MRI
->getVRegDef(Require
.getAVLReg())) {
1059 if (isVectorConfigInstr(*DefMI
)) {
1060 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
1061 if (DefInfo
.hasSameAVL(CurInfo
) && DefInfo
.hasSameVLMAX(CurInfo
))
1070 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1071 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1073 static VSETVLIInfo
adjustIncoming(VSETVLIInfo PrevInfo
, VSETVLIInfo NewInfo
,
1074 DemandedFields
&Demanded
) {
1075 VSETVLIInfo Info
= NewInfo
;
1077 if (!Demanded
.LMUL
&& !Demanded
.SEWLMULRatio
&& PrevInfo
.isValid() &&
1078 !PrevInfo
.isUnknown()) {
1079 if (auto NewVLMul
= RISCVVType::getSameRatioLMUL(
1080 PrevInfo
.getSEW(), PrevInfo
.getVLMUL(), Info
.getSEW()))
1081 Info
.setVLMul(*NewVLMul
);
1082 Demanded
.LMUL
= true;
1088 // Given an incoming state reaching MI, minimally modifies that state so that it
1089 // is compatible with MI. The resulting state is guaranteed to be semantically
1090 // legal for MI, but may not be the state requested by MI.
1091 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo
&Info
,
1092 const MachineInstr
&MI
) const {
1093 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
1094 if (!RISCVII::hasSEWOp(TSFlags
))
1097 const VSETVLIInfo NewInfo
= computeInfoForInstr(MI
, TSFlags
, *ST
, MRI
);
1098 assert(NewInfo
.isValid() && !NewInfo
.isUnknown());
1099 if (Info
.isValid() && !needVSETVLI(MI
, NewInfo
, Info
))
1102 const VSETVLIInfo PrevInfo
= Info
;
1103 if (!Info
.isValid() || Info
.isUnknown())
1106 DemandedFields Demanded
= getDemanded(MI
, MRI
, ST
);
1107 const VSETVLIInfo IncomingInfo
= adjustIncoming(PrevInfo
, NewInfo
, Demanded
);
1109 // If MI only demands that VL has the same zeroness, we only need to set the
1110 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1111 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1112 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1113 // variant, so we avoid the transform to prevent extending live range of an
1114 // avl register operand.
1115 // TODO: We can probably relax this for immediates.
1116 bool EquallyZero
= IncomingInfo
.hasEquallyZeroAVL(PrevInfo
, *MRI
) &&
1117 IncomingInfo
.hasSameVLMAX(PrevInfo
);
1118 if (Demanded
.VLAny
|| (Demanded
.VLZeroness
&& !EquallyZero
))
1119 Info
.setAVL(IncomingInfo
);
1122 ((Demanded
.LMUL
|| Demanded
.SEWLMULRatio
) ? IncomingInfo
: Info
)
1124 ((Demanded
.SEW
|| Demanded
.SEWLMULRatio
) ? IncomingInfo
: Info
).getSEW(),
1125 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1127 (Demanded
.TailPolicy
? IncomingInfo
: Info
).getTailAgnostic() ||
1128 IncomingInfo
.getTailAgnostic(),
1129 (Demanded
.MaskPolicy
? IncomingInfo
: Info
).getMaskAgnostic() ||
1130 IncomingInfo
.getMaskAgnostic());
1132 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1134 if (Info
.hasSEWLMULRatioOnly()) {
1135 VSETVLIInfo RatiolessInfo
= IncomingInfo
;
1136 RatiolessInfo
.setAVL(Info
);
1137 Info
= RatiolessInfo
;
1141 // Given a state with which we evaluated MI (see transferBefore above for why
1142 // this might be different that the state MI requested), modify the state to
1143 // reflect the changes MI might make.
1144 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo
&Info
,
1145 const MachineInstr
&MI
) const {
1146 if (isVectorConfigInstr(MI
)) {
1147 Info
= getInfoForVSETVLI(MI
);
1151 if (RISCV::isFaultFirstLoad(MI
)) {
1152 // Update AVL to vl-output of the fault first load.
1153 Info
.setAVLReg(MI
.getOperand(1).getReg());
1157 // If this is something that updates VL/VTYPE that we don't know about, set
1158 // the state to unknown.
1159 if (MI
.isCall() || MI
.isInlineAsm() || MI
.modifiesRegister(RISCV::VL
) ||
1160 MI
.modifiesRegister(RISCV::VTYPE
))
1161 Info
= VSETVLIInfo::getUnknown();
1164 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock
&MBB
,
1165 VSETVLIInfo
&Info
) const {
1166 bool HadVectorOp
= false;
1168 Info
= BlockInfo
[MBB
.getNumber()].Pred
;
1169 for (const MachineInstr
&MI
: MBB
) {
1170 transferBefore(Info
, MI
);
1172 if (isVectorConfigInstr(MI
) || RISCVII::hasSEWOp(MI
.getDesc().TSFlags
))
1175 transferAfter(Info
, MI
);
1181 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock
&MBB
) {
1183 BlockData
&BBInfo
= BlockInfo
[MBB
.getNumber()];
1185 BBInfo
.InQueue
= false;
1187 // Start with the previous entry so that we keep the most conservative state
1188 // we have ever found.
1189 VSETVLIInfo InInfo
= BBInfo
.Pred
;
1190 if (MBB
.pred_empty()) {
1191 // There are no predecessors, so use the default starting status.
1192 InInfo
.setUnknown();
1194 for (MachineBasicBlock
*P
: MBB
.predecessors())
1195 InInfo
= InInfo
.intersect(BlockInfo
[P
->getNumber()].Exit
);
1198 // If we don't have any valid predecessor value, wait until we do.
1199 if (!InInfo
.isValid())
1202 // If no change, no need to rerun block
1203 if (InInfo
== BBInfo
.Pred
)
1206 BBInfo
.Pred
= InInfo
;
1207 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB
)
1208 << " changed to " << BBInfo
.Pred
<< "\n");
1210 // Note: It's tempting to cache the state changes here, but due to the
1211 // compatibility checks performed a blocks output state can change based on
1212 // the input state. To cache, we'd have to add logic for finding
1213 // never-compatible state changes.
1214 VSETVLIInfo TmpStatus
;
1215 computeVLVTYPEChanges(MBB
, TmpStatus
);
1217 // If the new exit value matches the old exit value, we don't need to revisit
1219 if (BBInfo
.Exit
== TmpStatus
)
1222 BBInfo
.Exit
= TmpStatus
;
1223 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB
)
1224 << " changed to " << BBInfo
.Exit
<< "\n");
1226 // Add the successors to the work list so we can propagate the changed exit
1228 for (MachineBasicBlock
*S
: MBB
.successors())
1229 if (!BlockInfo
[S
->getNumber()].InQueue
) {
1230 BlockInfo
[S
->getNumber()].InQueue
= true;
1235 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1236 // be unneeded if the AVL is a phi node where all incoming values are VL
1237 // outputs from the last VSETVLI in their respective basic blocks.
1238 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo
&Require
,
1239 const MachineBasicBlock
&MBB
) const {
1240 if (DisableInsertVSETVLPHIOpt
)
1243 if (!Require
.hasAVLReg())
1246 Register AVLReg
= Require
.getAVLReg();
1247 if (!AVLReg
.isVirtual())
1250 // We need the AVL to be produce by a PHI node in this basic block.
1251 MachineInstr
*PHI
= MRI
->getVRegDef(AVLReg
);
1252 if (!PHI
|| PHI
->getOpcode() != RISCV::PHI
|| PHI
->getParent() != &MBB
)
1255 for (unsigned PHIOp
= 1, NumOps
= PHI
->getNumOperands(); PHIOp
!= NumOps
;
1257 Register InReg
= PHI
->getOperand(PHIOp
).getReg();
1258 MachineBasicBlock
*PBB
= PHI
->getOperand(PHIOp
+ 1).getMBB();
1259 const BlockData
&PBBInfo
= BlockInfo
[PBB
->getNumber()];
1260 // If the exit from the predecessor has the VTYPE we are looking for
1261 // we might be able to avoid a VSETVLI.
1262 if (PBBInfo
.Exit
.isUnknown() || !PBBInfo
.Exit
.hasSameVTYPE(Require
))
1265 // We need the PHI input to the be the output of a VSET(I)VLI.
1266 MachineInstr
*DefMI
= MRI
->getVRegDef(InReg
);
1267 if (!DefMI
|| !isVectorConfigInstr(*DefMI
))
1270 // We found a VSET(I)VLI make sure it matches the output of the
1271 // predecessor block.
1272 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
1273 if (!DefInfo
.hasSameAVL(PBBInfo
.Exit
) ||
1274 !DefInfo
.hasSameVTYPE(PBBInfo
.Exit
))
1278 // If all the incoming values to the PHI checked out, we don't need
1279 // to insert a VSETVLI.
1283 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock
&MBB
) {
1284 VSETVLIInfo CurInfo
= BlockInfo
[MBB
.getNumber()].Pred
;
1285 // Track whether the prefix of the block we've scanned is transparent
1286 // (meaning has not yet changed the abstract state).
1287 bool PrefixTransparent
= true;
1288 for (MachineInstr
&MI
: MBB
) {
1289 const VSETVLIInfo PrevInfo
= CurInfo
;
1290 transferBefore(CurInfo
, MI
);
1292 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1293 if (isVectorConfigInstr(MI
)) {
1294 // Conservatively, mark the VL and VTYPE as live.
1295 assert(MI
.getOperand(3).getReg() == RISCV::VL
&&
1296 MI
.getOperand(4).getReg() == RISCV::VTYPE
&&
1297 "Unexpected operands where VL and VTYPE should be");
1298 MI
.getOperand(3).setIsDead(false);
1299 MI
.getOperand(4).setIsDead(false);
1300 PrefixTransparent
= false;
1303 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
1304 if (RISCVII::hasSEWOp(TSFlags
)) {
1305 if (PrevInfo
!= CurInfo
) {
1306 // If this is the first implicit state change, and the state change
1307 // requested can be proven to produce the same register contents, we
1308 // can skip emitting the actual state change and continue as if we
1309 // had since we know the GPR result of the implicit state change
1310 // wouldn't be used and VL/VTYPE registers are correct. Note that
1311 // we *do* need to model the state as if it changed as while the
1312 // register contents are unchanged, the abstract model can change.
1313 if (!PrefixTransparent
|| needVSETVLIPHI(CurInfo
, MBB
))
1314 insertVSETVLI(MBB
, MI
, CurInfo
, PrevInfo
);
1315 PrefixTransparent
= false;
1318 if (RISCVII::hasVLOp(TSFlags
)) {
1319 MachineOperand
&VLOp
= MI
.getOperand(getVLOpNum(MI
));
1321 Register Reg
= VLOp
.getReg();
1322 MachineInstr
*VLOpDef
= MRI
->getVRegDef(Reg
);
1324 // Erase the AVL operand from the instruction.
1325 VLOp
.setReg(RISCV::NoRegister
);
1326 VLOp
.setIsKill(false);
1328 // If the AVL was an immediate > 31, then it would have been emitted
1329 // as an ADDI. However, the ADDI might not have been used in the
1330 // vsetvli, or a vsetvli might not have been emitted, so it may be
1332 if (VLOpDef
&& TII
->isAddImmediate(*VLOpDef
, Reg
) &&
1333 MRI
->use_nodbg_empty(Reg
))
1334 VLOpDef
->eraseFromParent();
1336 MI
.addOperand(MachineOperand::CreateReg(RISCV::VL
, /*isDef*/ false,
1339 MI
.addOperand(MachineOperand::CreateReg(RISCV::VTYPE
, /*isDef*/ false,
1343 if (MI
.isCall() || MI
.isInlineAsm() || MI
.modifiesRegister(RISCV::VL
) ||
1344 MI
.modifiesRegister(RISCV::VTYPE
))
1345 PrefixTransparent
= false;
1347 transferAfter(CurInfo
, MI
);
1350 // If we reach the end of the block and our current info doesn't match the
1351 // expected info, insert a vsetvli to correct.
1352 if (!UseStrictAsserts
) {
1353 const VSETVLIInfo
&ExitInfo
= BlockInfo
[MBB
.getNumber()].Exit
;
1354 if (CurInfo
.isValid() && ExitInfo
.isValid() && !ExitInfo
.isUnknown() &&
1355 CurInfo
!= ExitInfo
) {
1356 // Note there's an implicit assumption here that terminators never use
1357 // or modify VL or VTYPE. Also, fallthrough will return end().
1358 auto InsertPt
= MBB
.getFirstInstrTerminator();
1359 insertVSETVLI(MBB
, InsertPt
, MBB
.findDebugLoc(InsertPt
), ExitInfo
,
1365 if (UseStrictAsserts
&& CurInfo
.isValid()) {
1366 const auto &Info
= BlockInfo
[MBB
.getNumber()];
1367 if (CurInfo
!= Info
.Exit
) {
1368 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB
) << "\n");
1369 LLVM_DEBUG(dbgs() << " begin state: " << Info
.Pred
<< "\n");
1370 LLVM_DEBUG(dbgs() << " expected end state: " << Info
.Exit
<< "\n");
1371 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo
<< "\n");
1373 assert(CurInfo
== Info
.Exit
&&
1374 "InsertVSETVLI dataflow invariant violated");
1378 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1379 /// we're about to insert by looking for cases where we can PRE from the
1380 /// beginning of one block to the end of one of its predecessors. Specifically,
1381 /// this is geared to catch the common case of a fixed length vsetvl in a single
1382 /// block loop when it could execute once in the preheader instead.
1383 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock
&MBB
) {
1384 if (!BlockInfo
[MBB
.getNumber()].Pred
.isUnknown())
1387 MachineBasicBlock
*UnavailablePred
= nullptr;
1388 VSETVLIInfo AvailableInfo
;
1389 for (MachineBasicBlock
*P
: MBB
.predecessors()) {
1390 const VSETVLIInfo
&PredInfo
= BlockInfo
[P
->getNumber()].Exit
;
1391 if (PredInfo
.isUnknown()) {
1392 if (UnavailablePred
)
1394 UnavailablePred
= P
;
1395 } else if (!AvailableInfo
.isValid()) {
1396 AvailableInfo
= PredInfo
;
1397 } else if (AvailableInfo
!= PredInfo
) {
1402 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1404 if (!UnavailablePred
|| !AvailableInfo
.isValid())
1407 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1408 // the unavailable pred.
1409 if (AvailableInfo
.hasSEWLMULRatioOnly())
1412 // Critical edge - TODO: consider splitting?
1413 if (UnavailablePred
->succ_size() != 1)
1416 // If the AVL value is a register (other than our VLMAX sentinel),
1417 // we need to prove the value is available at the point we're going
1418 // to insert the vsetvli at.
1419 if (AvailableInfo
.hasAVLReg() && RISCV::X0
!= AvailableInfo
.getAVLReg()) {
1420 MachineInstr
*AVLDefMI
= MRI
->getVRegDef(AvailableInfo
.getAVLReg());
1423 // This is an inline dominance check which covers the case of
1424 // UnavailablePred being the preheader of a loop.
1425 if (AVLDefMI
->getParent() != UnavailablePred
)
1427 for (auto &TermMI
: UnavailablePred
->terminators())
1428 if (&TermMI
== AVLDefMI
)
1432 // Model the effect of changing the input state of the block MBB to
1433 // AvailableInfo. We're looking for two issues here; one legality,
1434 // one profitability.
1435 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1436 // may hit the end of the block with a different end state. We can
1437 // not make this change without reflowing later blocks as well.
1438 // 2) If we don't actually remove a transition, inserting a vsetvli
1439 // into the predecessor block would be correct, but unprofitable.
1440 VSETVLIInfo OldInfo
= BlockInfo
[MBB
.getNumber()].Pred
;
1441 VSETVLIInfo CurInfo
= AvailableInfo
;
1442 int TransitionsRemoved
= 0;
1443 for (const MachineInstr
&MI
: MBB
) {
1444 const VSETVLIInfo LastInfo
= CurInfo
;
1445 const VSETVLIInfo LastOldInfo
= OldInfo
;
1446 transferBefore(CurInfo
, MI
);
1447 transferBefore(OldInfo
, MI
);
1448 if (CurInfo
== LastInfo
)
1449 TransitionsRemoved
++;
1450 if (LastOldInfo
== OldInfo
)
1451 TransitionsRemoved
--;
1452 transferAfter(CurInfo
, MI
);
1453 transferAfter(OldInfo
, MI
);
1454 if (CurInfo
== OldInfo
)
1455 // Convergence. All transitions after this must match by construction.
1458 if (CurInfo
!= OldInfo
|| TransitionsRemoved
<= 0)
1459 // Issues 1 and 2 above
1462 // Finally, update both data flow state and insert the actual vsetvli.
1463 // Doing both keeps the code in sync with the dataflow results, which
1464 // is critical for correctness of phase 3.
1465 auto OldExit
= BlockInfo
[UnavailablePred
->getNumber()].Exit
;
1466 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB
.getName() << " to "
1467 << UnavailablePred
->getName() << " with state "
1468 << AvailableInfo
<< "\n");
1469 BlockInfo
[UnavailablePred
->getNumber()].Exit
= AvailableInfo
;
1470 BlockInfo
[MBB
.getNumber()].Pred
= AvailableInfo
;
1472 // Note there's an implicit assumption here that terminators never use
1473 // or modify VL or VTYPE. Also, fallthrough will return end().
1474 auto InsertPt
= UnavailablePred
->getFirstInstrTerminator();
1475 insertVSETVLI(*UnavailablePred
, InsertPt
,
1476 UnavailablePred
->findDebugLoc(InsertPt
),
1477 AvailableInfo
, OldExit
);
1480 static void doUnion(DemandedFields
&A
, DemandedFields B
) {
1482 A
.VLZeroness
|= B
.VLZeroness
;
1483 A
.SEW
= std::max(A
.SEW
, B
.SEW
);
1485 A
.SEWLMULRatio
|= B
.SEWLMULRatio
;
1486 A
.TailPolicy
|= B
.TailPolicy
;
1487 A
.MaskPolicy
|= B
.MaskPolicy
;
1490 // Return true if we can mutate PrevMI to match MI without changing any the
1491 // fields which would be observed.
1492 static bool canMutatePriorConfig(const MachineInstr
&PrevMI
,
1493 const MachineInstr
&MI
,
1494 const DemandedFields
&Used
,
1495 const MachineRegisterInfo
&MRI
) {
1496 // If the VL values aren't equal, return false if either a) the former is
1497 // demanded, or b) we can't rewrite the former to be the later for
1498 // implementation reasons.
1499 if (!isVLPreservingConfig(MI
)) {
1503 if (Used
.VLZeroness
) {
1504 if (isVLPreservingConfig(PrevMI
))
1506 if (!getInfoForVSETVLI(PrevMI
).hasEquallyZeroAVL(getInfoForVSETVLI(MI
),
1511 auto &AVL
= MI
.getOperand(1);
1512 auto &PrevAVL
= PrevMI
.getOperand(1);
1513 assert(MRI
.isSSA());
1515 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1516 // For now just check that PrevMI uses the same virtual register.
1517 if (AVL
.isReg() && AVL
.getReg() != RISCV::X0
) {
1518 if (AVL
.getReg().isPhysical())
1520 if (!PrevAVL
.isReg() || PrevAVL
.getReg() != AVL
.getReg())
1525 if (!PrevMI
.getOperand(2).isImm() || !MI
.getOperand(2).isImm())
1528 auto PriorVType
= PrevMI
.getOperand(2).getImm();
1529 auto VType
= MI
.getOperand(2).getImm();
1530 return areCompatibleVTYPEs(PriorVType
, VType
, Used
);
1533 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock
&MBB
) {
1534 MachineInstr
*NextMI
= nullptr;
1535 // We can have arbitrary code in successors, so VL and VTYPE
1536 // must be considered demanded.
1537 DemandedFields Used
;
1540 SmallVector
<MachineInstr
*> ToDelete
;
1541 for (MachineInstr
&MI
: make_range(MBB
.rbegin(), MBB
.rend())) {
1543 if (!isVectorConfigInstr(MI
)) {
1544 doUnion(Used
, getDemanded(MI
, MRI
, ST
));
1548 Register VRegDef
= MI
.getOperand(0).getReg();
1549 if (VRegDef
!= RISCV::X0
&&
1550 !(VRegDef
.isVirtual() && MRI
->use_nodbg_empty(VRegDef
)))
1554 if (!Used
.usedVL() && !Used
.usedVTYPE()) {
1555 ToDelete
.push_back(&MI
);
1556 // Leave NextMI unchanged
1558 } else if (canMutatePriorConfig(MI
, *NextMI
, Used
, *MRI
)) {
1559 if (!isVLPreservingConfig(*NextMI
)) {
1560 MI
.getOperand(0).setReg(NextMI
->getOperand(0).getReg());
1561 MI
.getOperand(0).setIsDead(false);
1563 if (MI
.getOperand(1).isReg())
1564 OldVLReg
= MI
.getOperand(1).getReg();
1565 if (NextMI
->getOperand(1).isImm())
1566 MI
.getOperand(1).ChangeToImmediate(NextMI
->getOperand(1).getImm());
1568 MI
.getOperand(1).ChangeToRegister(NextMI
->getOperand(1).getReg(), false);
1570 MachineInstr
*VLOpDef
= MRI
->getUniqueVRegDef(OldVLReg
);
1571 if (VLOpDef
&& TII
->isAddImmediate(*VLOpDef
, OldVLReg
) &&
1572 MRI
->use_nodbg_empty(OldVLReg
))
1573 VLOpDef
->eraseFromParent();
1575 MI
.setDesc(NextMI
->getDesc());
1577 MI
.getOperand(2).setImm(NextMI
->getOperand(2).getImm());
1578 ToDelete
.push_back(NextMI
);
1583 Used
= getDemanded(MI
, MRI
, ST
);
1586 NumRemovedVSETVL
+= ToDelete
.size();
1587 for (auto *MI
: ToDelete
)
1588 MI
->eraseFromParent();
1591 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock
&MBB
) {
1592 for (auto I
= MBB
.begin(), E
= MBB
.end(); I
!= E
;) {
1593 MachineInstr
&MI
= *I
++;
1594 if (RISCV::isFaultFirstLoad(MI
)) {
1595 Register VLOutput
= MI
.getOperand(1).getReg();
1596 if (!MRI
->use_nodbg_empty(VLOutput
))
1597 BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(RISCV::PseudoReadVL
),
1599 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1600 MI
.getOperand(1).setReg(RISCV::X0
);
1605 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction
&MF
) {
1606 // Skip if the vector extension is not enabled.
1607 ST
= &MF
.getSubtarget
<RISCVSubtarget
>();
1608 if (!ST
->hasVInstructions())
1611 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF
.getName() << "\n");
1613 TII
= ST
->getInstrInfo();
1614 MRI
= &MF
.getRegInfo();
1616 assert(BlockInfo
.empty() && "Expect empty block infos");
1617 BlockInfo
.resize(MF
.getNumBlockIDs());
1619 bool HaveVectorOp
= false;
1621 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1622 for (const MachineBasicBlock
&MBB
: MF
) {
1623 VSETVLIInfo TmpStatus
;
1624 HaveVectorOp
|= computeVLVTYPEChanges(MBB
, TmpStatus
);
1625 // Initial exit state is whatever change we found in the block.
1626 BlockData
&BBInfo
= BlockInfo
[MBB
.getNumber()];
1627 BBInfo
.Exit
= TmpStatus
;
1628 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB
)
1629 << " is " << BBInfo
.Exit
<< "\n");
1633 // If we didn't find any instructions that need VSETVLI, we're done.
1634 if (!HaveVectorOp
) {
1639 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1640 // blocks to the list here, but will also add any that need to be revisited
1641 // during Phase 2 processing.
1642 for (const MachineBasicBlock
&MBB
: MF
) {
1643 WorkList
.push(&MBB
);
1644 BlockInfo
[MBB
.getNumber()].InQueue
= true;
1646 while (!WorkList
.empty()) {
1647 const MachineBasicBlock
&MBB
= *WorkList
.front();
1649 computeIncomingVLVTYPE(MBB
);
1652 // Perform partial redundancy elimination of vsetvli transitions.
1653 for (MachineBasicBlock
&MBB
: MF
)
1656 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1657 // Phase 2 information to avoid adding vsetvlis before the first vector
1658 // instruction in the block if the VL/VTYPE is satisfied by its
1660 for (MachineBasicBlock
&MBB
: MF
)
1663 // Now that all vsetvlis are explicit, go through and do block local
1664 // DSE and peephole based demanded fields based transforms. Note that
1665 // this *must* be done outside the main dataflow so long as we allow
1666 // any cross block analysis within the dataflow. We can't have both
1667 // demanded fields based mutation and non-local analysis in the
1668 // dataflow at the same time without introducing inconsistencies.
1669 for (MachineBasicBlock
&MBB
: MF
)
1670 doLocalPostpass(MBB
);
1672 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1673 // of VLEFF/VLSEGFF.
1674 for (MachineBasicBlock
&MBB
: MF
)
1678 return HaveVectorOp
;
1681 /// Returns an instance of the Insert VSETVLI pass.
1682 FunctionPass
*llvm::createRISCVInsertVSETVLIPass() {
1683 return new RISCVInsertVSETVLI();