1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
13 // This pass consists of 3 phases:
15 // Phase 1 collects how each basic block affects VL/VTYPE.
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
25 //===----------------------------------------------------------------------===//
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
37 static cl::opt
<bool> DisableInsertVSETVLPHIOpt(
38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden
,
39 cl::desc("Disable looking through phis when inserting vsetvlis."));
41 static cl::opt
<bool> UseStrictAsserts(
42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden
,
43 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
47 static unsigned getVLOpNum(const MachineInstr
&MI
) {
48 return RISCVII::getVLOpNum(MI
.getDesc());
51 static unsigned getSEWOpNum(const MachineInstr
&MI
) {
52 return RISCVII::getSEWOpNum(MI
.getDesc());
55 static bool isVectorConfigInstr(const MachineInstr
&MI
) {
56 return MI
.getOpcode() == RISCV::PseudoVSETVLI
||
57 MI
.getOpcode() == RISCV::PseudoVSETVLIX0
||
58 MI
.getOpcode() == RISCV::PseudoVSETIVLI
;
61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
62 /// VL and only sets VTYPE.
63 static bool isVLPreservingConfig(const MachineInstr
&MI
) {
64 if (MI
.getOpcode() != RISCV::PseudoVSETVLIX0
)
66 assert(RISCV::X0
== MI
.getOperand(1).getReg());
67 return RISCV::X0
== MI
.getOperand(0).getReg();
70 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr
&MI
) {
71 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
80 static bool isScalarExtractInstr(const MachineInstr
&MI
) {
81 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
90 static bool isScalarInsertInstr(const MachineInstr
&MI
) {
91 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
100 static bool isScalarSplatInstr(const MachineInstr
&MI
) {
101 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
106 case RISCV::VFMV_V_F
:
111 static bool isVSlideInstr(const MachineInstr
&MI
) {
112 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
115 case RISCV::VSLIDEDOWN_VX
:
116 case RISCV::VSLIDEDOWN_VI
:
117 case RISCV::VSLIDEUP_VX
:
118 case RISCV::VSLIDEUP_VI
:
123 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
124 /// not a load or store which ignores SEW.
125 static std::optional
<unsigned> getEEWForLoadStore(const MachineInstr
&MI
) {
126 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
135 case RISCV::VLSE16_V
:
137 case RISCV::VSSE16_V
:
140 case RISCV::VLSE32_V
:
142 case RISCV::VSSE32_V
:
145 case RISCV::VLSE64_V
:
147 case RISCV::VSSE64_V
:
152 /// Return true if this is an operation on mask registers. Note that
153 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
154 static bool isMaskRegOp(const MachineInstr
&MI
) {
155 if (!RISCVII::hasSEWOp(MI
.getDesc().TSFlags
))
157 const unsigned Log2SEW
= MI
.getOperand(getSEWOpNum(MI
)).getImm();
158 // A Log2SEW of 0 is an operation on mask registers only.
162 /// Return true if the inactive elements in the result are entirely undefined.
163 /// Note that this is different from "agnostic" as defined by the vector
164 /// specification. Agnostic requires each lane to either be undisturbed, or
165 /// take the value -1; no other value is allowed.
166 static bool hasUndefinedMergeOp(const MachineInstr
&MI
,
167 const MachineRegisterInfo
&MRI
) {
170 if (!MI
.isRegTiedToUseOperand(0, &UseOpIdx
))
171 // If there is no passthrough operand, then the pass through
172 // lanes are undefined.
175 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
176 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
178 const MachineOperand
&UseMO
= MI
.getOperand(UseOpIdx
);
179 if (UseMO
.getReg() == RISCV::NoRegister
)
182 if (MachineInstr
*UseMI
= MRI
.getVRegDef(UseMO
.getReg())) {
183 if (UseMI
->isImplicitDef())
186 if (UseMI
->isRegSequence()) {
187 for (unsigned i
= 1, e
= UseMI
->getNumOperands(); i
< e
; i
+= 2) {
188 MachineInstr
*SourceMI
= MRI
.getVRegDef(UseMI
->getOperand(i
).getReg());
189 if (!SourceMI
|| !SourceMI
->isImplicitDef())
198 /// Which subfields of VL or VTYPE have values we need to preserve?
199 struct DemandedFields
{
200 // Some unknown property of VL is used. If demanded, must preserve entire
203 // Only zero vs non-zero is used. If demanded, can change non-zero values.
204 bool VLZeroness
= false;
205 // What properties of SEW we need to preserve.
207 SEWEqual
= 3, // The exact value of SEW needs to be preserved.
208 SEWGreaterThanOrEqual
= 2, // SEW can be changed as long as it's greater
209 // than or equal to the original value.
210 SEWGreaterThanOrEqualAndLessThan64
=
211 1, // SEW can be changed as long as it's greater
212 // than or equal to the original value, but must be less
214 SEWNone
= 0 // We don't need to preserve SEW at all.
217 bool SEWLMULRatio
= false;
218 bool TailPolicy
= false;
219 bool MaskPolicy
= false;
221 // Return true if any part of VTYPE was used
222 bool usedVTYPE() const {
223 return SEW
|| LMUL
|| SEWLMULRatio
|| TailPolicy
|| MaskPolicy
;
226 // Return true if any property of VL was used
228 return VLAny
|| VLZeroness
;
231 // Mark all VTYPE subfields and properties as demanded
240 // Mark all VL properties as demanded
246 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
247 /// Support for debugging, callable in GDB: V->dump()
248 LLVM_DUMP_METHOD
void dump() const {
253 /// Implement operator<<.
254 void print(raw_ostream
&OS
) const {
256 OS
<< "VLAny=" << VLAny
<< ", ";
257 OS
<< "VLZeroness=" << VLZeroness
<< ", ";
263 case SEWGreaterThanOrEqual
:
264 OS
<< "SEWGreaterThanOrEqual";
266 case SEWGreaterThanOrEqualAndLessThan64
:
267 OS
<< "SEWGreaterThanOrEqualAndLessThan64";
274 OS
<< "LMUL=" << LMUL
<< ", ";
275 OS
<< "SEWLMULRatio=" << SEWLMULRatio
<< ", ";
276 OS
<< "TailPolicy=" << TailPolicy
<< ", ";
277 OS
<< "MaskPolicy=" << MaskPolicy
;
283 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
285 inline raw_ostream
&operator<<(raw_ostream
&OS
, const DemandedFields
&DF
) {
291 /// Return true if moving from CurVType to NewVType is
292 /// indistinguishable from the perspective of an instruction (or set
293 /// of instructions) which use only the Used subfields and properties.
294 static bool areCompatibleVTYPEs(uint64_t CurVType
, uint64_t NewVType
,
295 const DemandedFields
&Used
) {
297 case DemandedFields::SEWNone
:
299 case DemandedFields::SEWEqual
:
300 if (RISCVVType::getSEW(CurVType
) != RISCVVType::getSEW(NewVType
))
303 case DemandedFields::SEWGreaterThanOrEqual
:
304 if (RISCVVType::getSEW(NewVType
) < RISCVVType::getSEW(CurVType
))
307 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64
:
308 if (RISCVVType::getSEW(NewVType
) < RISCVVType::getSEW(CurVType
) ||
309 RISCVVType::getSEW(NewVType
) >= 64)
315 RISCVVType::getVLMUL(CurVType
) != RISCVVType::getVLMUL(NewVType
))
318 if (Used
.SEWLMULRatio
) {
319 auto Ratio1
= RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType
),
320 RISCVVType::getVLMUL(CurVType
));
321 auto Ratio2
= RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType
),
322 RISCVVType::getVLMUL(NewVType
));
323 if (Ratio1
!= Ratio2
)
327 if (Used
.TailPolicy
&& RISCVVType::isTailAgnostic(CurVType
) !=
328 RISCVVType::isTailAgnostic(NewVType
))
330 if (Used
.MaskPolicy
&& RISCVVType::isMaskAgnostic(CurVType
) !=
331 RISCVVType::isMaskAgnostic(NewVType
))
336 /// Return the fields and properties demanded by the provided instruction.
337 DemandedFields
getDemanded(const MachineInstr
&MI
,
338 const MachineRegisterInfo
*MRI
,
339 const RISCVSubtarget
*ST
) {
340 // Warning: This function has to work on both the lowered (i.e. post
341 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
342 // that it can't use the value of a SEW, VL, or Policy operand as they might
343 // be stale after lowering.
345 // Most instructions don't use any of these subfeilds.
347 // Start conservative if registers are used
348 if (MI
.isCall() || MI
.isInlineAsm() || MI
.readsRegister(RISCV::VL
))
350 if (MI
.isCall() || MI
.isInlineAsm() || MI
.readsRegister(RISCV::VTYPE
))
352 // Start conservative on the unlowered form too
353 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
354 if (RISCVII::hasSEWOp(TSFlags
)) {
356 if (RISCVII::hasVLOp(TSFlags
))
359 // Behavior is independent of mask policy.
360 if (!RISCVII::usesMaskPolicy(TSFlags
))
361 Res
.MaskPolicy
= false;
364 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
365 // They instead demand the ratio of the two which is used in computing
366 // EMUL, but which allows us the flexibility to change SEW and LMUL
367 // provided we don't change the ratio.
368 // Note: We assume that the instructions initial SEW is the EEW encoded
369 // in the opcode. This is asserted when constructing the VSETVLIInfo.
370 if (getEEWForLoadStore(MI
)) {
371 Res
.SEW
= DemandedFields::SEWNone
;
375 // Store instructions don't use the policy fields.
376 if (RISCVII::hasSEWOp(TSFlags
) && MI
.getNumExplicitDefs() == 0) {
377 Res
.TailPolicy
= false;
378 Res
.MaskPolicy
= false;
381 // If this is a mask reg operation, it only cares about VLMAX.
382 // TODO: Possible extensions to this logic
383 // * Probably ok if available VLMax is larger than demanded
384 // * The policy bits can probably be ignored..
385 if (isMaskRegOp(MI
)) {
386 Res
.SEW
= DemandedFields::SEWNone
;
390 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
391 if (isScalarInsertInstr(MI
)) {
393 Res
.SEWLMULRatio
= false;
395 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
396 // need to preserve any other bits and are thus compatible with any larger,
397 // etype and can disregard policy bits. Warning: It's tempting to try doing
398 // this for any tail agnostic operation, but we can't as TA requires
399 // tail lanes to either be the original value or -1. We are writing
400 // unknown bits to the lanes here.
401 if (hasUndefinedMergeOp(MI
, *MRI
)) {
402 if (isFloatScalarMoveOrScalarSplatInstr(MI
) && !ST
->hasVInstructionsF64())
403 Res
.SEW
= DemandedFields::SEWGreaterThanOrEqualAndLessThan64
;
405 Res
.SEW
= DemandedFields::SEWGreaterThanOrEqual
;
406 Res
.TailPolicy
= false;
410 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
411 if (isScalarExtractInstr(MI
)) {
412 assert(!RISCVII::hasVLOp(TSFlags
));
414 Res
.SEWLMULRatio
= false;
415 Res
.TailPolicy
= false;
416 Res
.MaskPolicy
= false;
422 /// Defines the abstract state with which the forward dataflow models the
423 /// values of the VL and VTYPE registers after insertion.
435 } State
= Uninitialized
;
437 // Fields from VTYPE.
438 RISCVII::VLMUL VLMul
= RISCVII::LMUL_1
;
440 uint8_t TailAgnostic
: 1;
441 uint8_t MaskAgnostic
: 1;
442 uint8_t SEWLMULRatioOnly
: 1;
446 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
447 SEWLMULRatioOnly(false) {}
449 static VSETVLIInfo
getUnknown() {
455 bool isValid() const { return State
!= Uninitialized
; }
456 void setUnknown() { State
= Unknown
; }
457 bool isUnknown() const { return State
== Unknown
; }
459 void setAVLReg(Register Reg
) {
464 void setAVLImm(unsigned Imm
) {
469 bool hasAVLImm() const { return State
== AVLIsImm
; }
470 bool hasAVLReg() const { return State
== AVLIsReg
; }
471 Register
getAVLReg() const {
475 unsigned getAVLImm() const {
480 unsigned getSEW() const { return SEW
; }
481 RISCVII::VLMUL
getVLMUL() const { return VLMul
; }
483 bool hasNonZeroAVL(const MachineRegisterInfo
&MRI
) const {
485 return getAVLImm() > 0;
487 if (getAVLReg() == RISCV::X0
)
489 if (MachineInstr
*MI
= MRI
.getVRegDef(getAVLReg());
490 MI
&& MI
->getOpcode() == RISCV::ADDI
&&
491 MI
->getOperand(1).isReg() && MI
->getOperand(2).isImm() &&
492 MI
->getOperand(1).getReg() == RISCV::X0
&&
493 MI
->getOperand(2).getImm() != 0)
500 bool hasEquallyZeroAVL(const VSETVLIInfo
&Other
,
501 const MachineRegisterInfo
&MRI
) const {
502 if (hasSameAVL(Other
))
504 return (hasNonZeroAVL(MRI
) && Other
.hasNonZeroAVL(MRI
));
507 bool hasSameAVL(const VSETVLIInfo
&Other
) const {
508 if (hasAVLReg() && Other
.hasAVLReg())
509 return getAVLReg() == Other
.getAVLReg();
511 if (hasAVLImm() && Other
.hasAVLImm())
512 return getAVLImm() == Other
.getAVLImm();
517 void setVTYPE(unsigned VType
) {
518 assert(isValid() && !isUnknown() &&
519 "Can't set VTYPE for uninitialized or unknown");
520 VLMul
= RISCVVType::getVLMUL(VType
);
521 SEW
= RISCVVType::getSEW(VType
);
522 TailAgnostic
= RISCVVType::isTailAgnostic(VType
);
523 MaskAgnostic
= RISCVVType::isMaskAgnostic(VType
);
525 void setVTYPE(RISCVII::VLMUL L
, unsigned S
, bool TA
, bool MA
) {
526 assert(isValid() && !isUnknown() &&
527 "Can't set VTYPE for uninitialized or unknown");
534 void setVLMul(RISCVII::VLMUL VLMul
) { this->VLMul
= VLMul
; }
536 unsigned encodeVTYPE() const {
537 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly
&&
538 "Can't encode VTYPE for uninitialized or unknown");
539 return RISCVVType::encodeVTYPE(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
);
542 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly
; }
544 bool hasSameVTYPE(const VSETVLIInfo
&Other
) const {
545 assert(isValid() && Other
.isValid() &&
546 "Can't compare invalid VSETVLIInfos");
547 assert(!isUnknown() && !Other
.isUnknown() &&
548 "Can't compare VTYPE in unknown state");
549 assert(!SEWLMULRatioOnly
&& !Other
.SEWLMULRatioOnly
&&
550 "Can't compare when only LMUL/SEW ratio is valid.");
551 return std::tie(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
) ==
552 std::tie(Other
.VLMul
, Other
.SEW
, Other
.TailAgnostic
,
556 unsigned getSEWLMULRatio() const {
557 assert(isValid() && !isUnknown() &&
558 "Can't use VTYPE for uninitialized or unknown");
559 return RISCVVType::getSEWLMULRatio(SEW
, VLMul
);
562 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
563 // Note that having the same VLMAX ensures that both share the same
564 // function from AVL to VL; that is, they must produce the same VL value
565 // for any given AVL value.
566 bool hasSameVLMAX(const VSETVLIInfo
&Other
) const {
567 assert(isValid() && Other
.isValid() &&
568 "Can't compare invalid VSETVLIInfos");
569 assert(!isUnknown() && !Other
.isUnknown() &&
570 "Can't compare VTYPE in unknown state");
571 return getSEWLMULRatio() == Other
.getSEWLMULRatio();
574 bool hasCompatibleVTYPE(const DemandedFields
&Used
,
575 const VSETVLIInfo
&Require
) const {
576 return areCompatibleVTYPEs(Require
.encodeVTYPE(), encodeVTYPE(), Used
);
579 // Determine whether the vector instructions requirements represented by
580 // Require are compatible with the previous vsetvli instruction represented
581 // by this. MI is the instruction whose requirements we're considering.
582 bool isCompatible(const DemandedFields
&Used
, const VSETVLIInfo
&Require
,
583 const MachineRegisterInfo
&MRI
) const {
584 assert(isValid() && Require
.isValid() &&
585 "Can't compare invalid VSETVLIInfos");
586 assert(!Require
.SEWLMULRatioOnly
&&
587 "Expected a valid VTYPE for instruction!");
588 // Nothing is compatible with Unknown.
589 if (isUnknown() || Require
.isUnknown())
592 // If only our VLMAX ratio is valid, then this isn't compatible.
593 if (SEWLMULRatioOnly
)
596 if (Used
.VLAny
&& !hasSameAVL(Require
))
599 if (Used
.VLZeroness
&& !hasEquallyZeroAVL(Require
, MRI
))
602 return hasCompatibleVTYPE(Used
, Require
);
605 bool operator==(const VSETVLIInfo
&Other
) const {
606 // Uninitialized is only equal to another Uninitialized.
608 return !Other
.isValid();
609 if (!Other
.isValid())
612 // Unknown is only equal to another Unknown.
614 return Other
.isUnknown();
615 if (Other
.isUnknown())
618 if (!hasSameAVL(Other
))
621 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
622 if (SEWLMULRatioOnly
!= Other
.SEWLMULRatioOnly
)
625 // If only the VLMAX is valid, check that it is the same.
626 if (SEWLMULRatioOnly
)
627 return hasSameVLMAX(Other
);
629 // If the full VTYPE is valid, check that it is the same.
630 return hasSameVTYPE(Other
);
633 bool operator!=(const VSETVLIInfo
&Other
) const {
634 return !(*this == Other
);
637 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
638 // both predecessors.
639 VSETVLIInfo
intersect(const VSETVLIInfo
&Other
) const {
640 // If the new value isn't valid, ignore it.
641 if (!Other
.isValid())
644 // If this value isn't valid, this must be the first predecessor, use it.
648 // If either is unknown, the result is unknown.
649 if (isUnknown() || Other
.isUnknown())
650 return VSETVLIInfo::getUnknown();
652 // If we have an exact, match return this.
656 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
657 // return an SEW/LMUL ratio only value.
658 if (hasSameAVL(Other
) && hasSameVLMAX(Other
)) {
659 VSETVLIInfo MergeInfo
= *this;
660 MergeInfo
.SEWLMULRatioOnly
= true;
664 // Otherwise the result is unknown.
665 return VSETVLIInfo::getUnknown();
668 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 /// Support for debugging, callable in GDB: V->dump()
670 LLVM_DUMP_METHOD
void dump() const {
675 /// Implement operator<<.
677 void print(raw_ostream
&OS
) const {
680 OS
<< "Uninitialized";
684 OS
<< "AVLReg=" << (unsigned)AVLReg
;
686 OS
<< "AVLImm=" << (unsigned)AVLImm
;
688 << "VLMul=" << (unsigned)VLMul
<< ", "
689 << "SEW=" << (unsigned)SEW
<< ", "
690 << "TailAgnostic=" << (bool)TailAgnostic
<< ", "
691 << "MaskAgnostic=" << (bool)MaskAgnostic
<< ", "
692 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly
<< "}";
697 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
699 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VSETVLIInfo
&V
) {
706 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
707 // block. Calculated in Phase 2.
710 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
711 // blocks. Calculated in Phase 2, and used by Phase 3.
714 // Keeps track of whether the block is already in the queue.
715 bool InQueue
= false;
717 BlockData() = default;
720 class RISCVInsertVSETVLI
: public MachineFunctionPass
{
721 const RISCVSubtarget
*ST
;
722 const TargetInstrInfo
*TII
;
723 MachineRegisterInfo
*MRI
;
725 std::vector
<BlockData
> BlockInfo
;
726 std::queue
<const MachineBasicBlock
*> WorkList
;
731 RISCVInsertVSETVLI() : MachineFunctionPass(ID
) {
732 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
734 bool runOnMachineFunction(MachineFunction
&MF
) override
;
736 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
737 AU
.setPreservesCFG();
738 MachineFunctionPass::getAnalysisUsage(AU
);
741 StringRef
getPassName() const override
{ return RISCV_INSERT_VSETVLI_NAME
; }
744 bool needVSETVLI(const MachineInstr
&MI
, const VSETVLIInfo
&Require
,
745 const VSETVLIInfo
&CurInfo
) const;
746 bool needVSETVLIPHI(const VSETVLIInfo
&Require
,
747 const MachineBasicBlock
&MBB
) const;
748 void insertVSETVLI(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
749 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
);
750 void insertVSETVLI(MachineBasicBlock
&MBB
,
751 MachineBasicBlock::iterator InsertPt
, DebugLoc DL
,
752 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
);
754 void transferBefore(VSETVLIInfo
&Info
, const MachineInstr
&MI
) const;
755 void transferAfter(VSETVLIInfo
&Info
, const MachineInstr
&MI
) const;
756 bool computeVLVTYPEChanges(const MachineBasicBlock
&MBB
,
757 VSETVLIInfo
&Info
) const;
758 void computeIncomingVLVTYPE(const MachineBasicBlock
&MBB
);
759 void emitVSETVLIs(MachineBasicBlock
&MBB
);
760 void doLocalPostpass(MachineBasicBlock
&MBB
);
761 void doPRE(MachineBasicBlock
&MBB
);
762 void insertReadVL(MachineBasicBlock
&MBB
);
765 } // end anonymous namespace
767 char RISCVInsertVSETVLI::ID
= 0;
769 INITIALIZE_PASS(RISCVInsertVSETVLI
, DEBUG_TYPE
, RISCV_INSERT_VSETVLI_NAME
,
772 static VSETVLIInfo
computeInfoForInstr(const MachineInstr
&MI
, uint64_t TSFlags
,
773 const MachineRegisterInfo
*MRI
) {
774 VSETVLIInfo InstrInfo
;
776 bool TailAgnostic
= true;
777 bool MaskAgnostic
= true;
778 if (!hasUndefinedMergeOp(MI
, *MRI
)) {
779 // Start with undisturbed.
780 TailAgnostic
= false;
781 MaskAgnostic
= false;
783 // If there is a policy operand, use it.
784 if (RISCVII::hasVecPolicyOp(TSFlags
)) {
785 const MachineOperand
&Op
= MI
.getOperand(MI
.getNumExplicitOperands() - 1);
786 uint64_t Policy
= Op
.getImm();
787 assert(Policy
<= (RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
) &&
788 "Invalid Policy Value");
789 TailAgnostic
= Policy
& RISCVII::TAIL_AGNOSTIC
;
790 MaskAgnostic
= Policy
& RISCVII::MASK_AGNOSTIC
;
793 // Some pseudo instructions force a tail agnostic policy despite having a
795 if (RISCVII::doesForceTailAgnostic(TSFlags
))
798 if (!RISCVII::usesMaskPolicy(TSFlags
))
802 RISCVII::VLMUL VLMul
= RISCVII::getLMul(TSFlags
);
804 unsigned Log2SEW
= MI
.getOperand(getSEWOpNum(MI
)).getImm();
805 // A Log2SEW of 0 is an operation on mask registers only.
806 unsigned SEW
= Log2SEW
? 1 << Log2SEW
: 8;
807 assert(RISCVVType::isValidSEW(SEW
) && "Unexpected SEW");
809 if (RISCVII::hasVLOp(TSFlags
)) {
810 const MachineOperand
&VLOp
= MI
.getOperand(getVLOpNum(MI
));
812 int64_t Imm
= VLOp
.getImm();
813 // Conver the VLMax sentintel to X0 register.
814 if (Imm
== RISCV::VLMaxSentinel
)
815 InstrInfo
.setAVLReg(RISCV::X0
);
817 InstrInfo
.setAVLImm(Imm
);
819 InstrInfo
.setAVLReg(VLOp
.getReg());
822 assert(isScalarExtractInstr(MI
));
823 InstrInfo
.setAVLReg(RISCV::NoRegister
);
826 if (std::optional
<unsigned> EEW
= getEEWForLoadStore(MI
)) {
827 assert(SEW
== EEW
&& "Initial SEW doesn't match expected EEW");
830 InstrInfo
.setVTYPE(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
);
835 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
836 const VSETVLIInfo
&Info
,
837 const VSETVLIInfo
&PrevInfo
) {
838 DebugLoc DL
= MI
.getDebugLoc();
839 insertVSETVLI(MBB
, MachineBasicBlock::iterator(&MI
), DL
, Info
, PrevInfo
);
842 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
843 // VSETIVLI instruction.
844 static VSETVLIInfo
getInfoForVSETVLI(const MachineInstr
&MI
) {
846 if (MI
.getOpcode() == RISCV::PseudoVSETIVLI
) {
847 NewInfo
.setAVLImm(MI
.getOperand(1).getImm());
849 assert(MI
.getOpcode() == RISCV::PseudoVSETVLI
||
850 MI
.getOpcode() == RISCV::PseudoVSETVLIX0
);
851 Register AVLReg
= MI
.getOperand(1).getReg();
852 assert((AVLReg
!= RISCV::X0
|| MI
.getOperand(0).getReg() != RISCV::X0
) &&
853 "Can't handle X0, X0 vsetvli yet");
854 NewInfo
.setAVLReg(AVLReg
);
856 NewInfo
.setVTYPE(MI
.getOperand(2).getImm());
861 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock
&MBB
,
862 MachineBasicBlock::iterator InsertPt
, DebugLoc DL
,
863 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
) {
865 if (PrevInfo
.isValid() && !PrevInfo
.isUnknown()) {
866 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
868 if (Info
.hasSameAVL(PrevInfo
) && Info
.hasSameVLMAX(PrevInfo
)) {
869 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
870 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
871 .addReg(RISCV::X0
, RegState::Kill
)
872 .addImm(Info
.encodeVTYPE())
873 .addReg(RISCV::VL
, RegState::Implicit
);
877 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
878 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
879 // same, we can use the X0, X0 form.
880 if (Info
.hasSameVLMAX(PrevInfo
) && Info
.hasAVLReg() &&
881 Info
.getAVLReg().isVirtual()) {
882 if (MachineInstr
*DefMI
= MRI
->getVRegDef(Info
.getAVLReg())) {
883 if (isVectorConfigInstr(*DefMI
)) {
884 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
885 if (DefInfo
.hasSameAVL(PrevInfo
) && DefInfo
.hasSameVLMAX(PrevInfo
)) {
886 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
887 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
888 .addReg(RISCV::X0
, RegState::Kill
)
889 .addImm(Info
.encodeVTYPE())
890 .addReg(RISCV::VL
, RegState::Implicit
);
898 if (Info
.hasAVLImm()) {
899 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETIVLI
))
900 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
901 .addImm(Info
.getAVLImm())
902 .addImm(Info
.encodeVTYPE());
906 Register AVLReg
= Info
.getAVLReg();
907 if (AVLReg
== RISCV::NoRegister
) {
908 // We can only use x0, x0 if there's no chance of the vtype change causing
909 // the previous vl to become invalid.
910 if (PrevInfo
.isValid() && !PrevInfo
.isUnknown() &&
911 Info
.hasSameVLMAX(PrevInfo
)) {
912 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
913 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
914 .addReg(RISCV::X0
, RegState::Kill
)
915 .addImm(Info
.encodeVTYPE())
916 .addReg(RISCV::VL
, RegState::Implicit
);
919 // Otherwise use an AVL of 1 to avoid depending on previous vl.
920 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETIVLI
))
921 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
923 .addImm(Info
.encodeVTYPE());
927 if (AVLReg
.isVirtual())
928 MRI
->constrainRegClass(AVLReg
, &RISCV::GPRNoX0RegClass
);
930 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
931 // opcode if the AVLReg is X0 as they have different register classes for
933 Register DestReg
= RISCV::X0
;
934 unsigned Opcode
= RISCV::PseudoVSETVLI
;
935 if (AVLReg
== RISCV::X0
) {
936 DestReg
= MRI
->createVirtualRegister(&RISCV::GPRRegClass
);
937 Opcode
= RISCV::PseudoVSETVLIX0
;
939 BuildMI(MBB
, InsertPt
, DL
, TII
->get(Opcode
))
940 .addReg(DestReg
, RegState::Define
| RegState::Dead
)
942 .addImm(Info
.encodeVTYPE());
945 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL
) {
946 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(LMUL
);
947 return Fractional
|| LMul
== 1;
950 /// Return true if a VSETVLI is required to transition from CurInfo to Require
952 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr
&MI
,
953 const VSETVLIInfo
&Require
,
954 const VSETVLIInfo
&CurInfo
) const {
955 assert(Require
== computeInfoForInstr(MI
, MI
.getDesc().TSFlags
, MRI
));
957 if (!CurInfo
.isValid() || CurInfo
.isUnknown() || CurInfo
.hasSEWLMULRatioOnly())
960 DemandedFields Used
= getDemanded(MI
, MRI
, ST
);
962 // A slidedown/slideup with an *undefined* merge op can freely clobber
963 // elements not copied from the source vector (e.g. masked off, tail, or
964 // slideup's prefix). Notes:
965 // * We can't modify SEW here since the slide amount is in units of SEW.
966 // * VL=1 is special only because we have existing support for zero vs
967 // non-zero VL. We could generalize this if we had a VL > C predicate.
968 // * The LMUL1 restriction is for machines whose latency may depend on VL.
969 // * As above, this is only legal for tail "undefined" not "agnostic".
970 if (isVSlideInstr(MI
) && Require
.hasAVLImm() && Require
.getAVLImm() == 1 &&
971 isLMUL1OrSmaller(CurInfo
.getVLMUL()) && hasUndefinedMergeOp(MI
, *MRI
)) {
973 Used
.VLZeroness
= true;
975 Used
.TailPolicy
= false;
978 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
979 // semantically as vmv.s.x. This is particularly useful since we don't have an
980 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
981 // Since a splat is non-constant time in LMUL, we do need to be careful to not
982 // increase the number of active vector registers (unlike for vmv.s.x.)
983 if (isScalarSplatInstr(MI
) && Require
.hasAVLImm() && Require
.getAVLImm() == 1 &&
984 isLMUL1OrSmaller(CurInfo
.getVLMUL()) && hasUndefinedMergeOp(MI
, *MRI
)) {
986 Used
.SEWLMULRatio
= false;
988 if (isFloatScalarMoveOrScalarSplatInstr(MI
) && !ST
->hasVInstructionsF64())
989 Used
.SEW
= DemandedFields::SEWGreaterThanOrEqualAndLessThan64
;
991 Used
.SEW
= DemandedFields::SEWGreaterThanOrEqual
;
992 Used
.TailPolicy
= false;
995 if (CurInfo
.isCompatible(Used
, Require
, *MRI
))
998 // We didn't find a compatible value. If our AVL is a virtual register,
999 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1000 // and the last VL/VTYPE we observed is the same, we don't need a
1002 if (Require
.hasAVLReg() && Require
.getAVLReg().isVirtual() &&
1003 CurInfo
.hasCompatibleVTYPE(Used
, Require
)) {
1004 if (MachineInstr
*DefMI
= MRI
->getVRegDef(Require
.getAVLReg())) {
1005 if (isVectorConfigInstr(*DefMI
)) {
1006 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
1007 if (DefInfo
.hasSameAVL(CurInfo
) && DefInfo
.hasSameVLMAX(CurInfo
))
1016 // Given an incoming state reaching MI, modifies that state so that it is minimally
1017 // compatible with MI. The resulting state is guaranteed to be semantically legal
1018 // for MI, but may not be the state requested by MI.
1019 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo
&Info
,
1020 const MachineInstr
&MI
) const {
1021 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
1022 if (!RISCVII::hasSEWOp(TSFlags
))
1025 const VSETVLIInfo NewInfo
= computeInfoForInstr(MI
, TSFlags
, MRI
);
1026 if (Info
.isValid() && !needVSETVLI(MI
, NewInfo
, Info
))
1029 const VSETVLIInfo PrevInfo
= Info
;
1032 if (!RISCVII::hasVLOp(TSFlags
))
1035 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1036 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1038 DemandedFields Demanded
= getDemanded(MI
, MRI
, ST
);
1039 if (!Demanded
.LMUL
&& !Demanded
.SEWLMULRatio
&& Info
.isValid() &&
1040 PrevInfo
.isValid() && !Info
.isUnknown() && !PrevInfo
.isUnknown()) {
1041 if (auto NewVLMul
= RISCVVType::getSameRatioLMUL(
1042 PrevInfo
.getSEW(), PrevInfo
.getVLMUL(), Info
.getSEW()))
1043 Info
.setVLMul(*NewVLMul
);
1046 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
1047 // VL > 0. We can discard the user requested AVL and just use the last
1048 // one if we can prove it equally zero. This removes a vsetvli entirely
1049 // if the types match or allows use of cheaper avl preserving variant
1050 // if VLMAX doesn't change. If VLMAX might change, we couldn't use
1051 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
1052 // prevent extending live range of an avl register operand.
1053 // TODO: We can probably relax this for immediates.
1054 if (isScalarInsertInstr(MI
) && PrevInfo
.isValid() &&
1055 PrevInfo
.hasEquallyZeroAVL(Info
, *MRI
) &&
1056 Info
.hasSameVLMAX(PrevInfo
)) {
1057 if (PrevInfo
.hasAVLImm())
1058 Info
.setAVLImm(PrevInfo
.getAVLImm());
1060 Info
.setAVLReg(PrevInfo
.getAVLReg());
1064 // If AVL is defined by a vsetvli with the same VLMAX, we can
1065 // replace the AVL operand with the AVL of the defining vsetvli.
1066 // We avoid general register AVLs to avoid extending live ranges
1067 // without being sure we can kill the original source reg entirely.
1068 if (!Info
.hasAVLReg() || !Info
.getAVLReg().isVirtual())
1070 MachineInstr
*DefMI
= MRI
->getVRegDef(Info
.getAVLReg());
1071 if (!DefMI
|| !isVectorConfigInstr(*DefMI
))
1074 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
1075 if (DefInfo
.hasSameVLMAX(Info
) &&
1076 (DefInfo
.hasAVLImm() || DefInfo
.getAVLReg() == RISCV::X0
)) {
1077 if (DefInfo
.hasAVLImm())
1078 Info
.setAVLImm(DefInfo
.getAVLImm());
1080 Info
.setAVLReg(DefInfo
.getAVLReg());
1085 // Given a state with which we evaluated MI (see transferBefore above for why
1086 // this might be different that the state MI requested), modify the state to
1087 // reflect the changes MI might make.
1088 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo
&Info
,
1089 const MachineInstr
&MI
) const {
1090 if (isVectorConfigInstr(MI
)) {
1091 Info
= getInfoForVSETVLI(MI
);
1095 if (RISCV::isFaultFirstLoad(MI
)) {
1096 // Update AVL to vl-output of the fault first load.
1097 Info
.setAVLReg(MI
.getOperand(1).getReg());
1101 // If this is something that updates VL/VTYPE that we don't know about, set
1102 // the state to unknown.
1103 if (MI
.isCall() || MI
.isInlineAsm() || MI
.modifiesRegister(RISCV::VL
) ||
1104 MI
.modifiesRegister(RISCV::VTYPE
))
1105 Info
= VSETVLIInfo::getUnknown();
1108 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock
&MBB
,
1109 VSETVLIInfo
&Info
) const {
1110 bool HadVectorOp
= false;
1112 Info
= BlockInfo
[MBB
.getNumber()].Pred
;
1113 for (const MachineInstr
&MI
: MBB
) {
1114 transferBefore(Info
, MI
);
1116 if (isVectorConfigInstr(MI
) || RISCVII::hasSEWOp(MI
.getDesc().TSFlags
))
1119 transferAfter(Info
, MI
);
1125 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock
&MBB
) {
1127 BlockData
&BBInfo
= BlockInfo
[MBB
.getNumber()];
1129 BBInfo
.InQueue
= false;
1131 // Start with the previous entry so that we keep the most conservative state
1132 // we have ever found.
1133 VSETVLIInfo InInfo
= BBInfo
.Pred
;
1134 if (MBB
.pred_empty()) {
1135 // There are no predecessors, so use the default starting status.
1136 InInfo
.setUnknown();
1138 for (MachineBasicBlock
*P
: MBB
.predecessors())
1139 InInfo
= InInfo
.intersect(BlockInfo
[P
->getNumber()].Exit
);
1142 // If we don't have any valid predecessor value, wait until we do.
1143 if (!InInfo
.isValid())
1146 // If no change, no need to rerun block
1147 if (InInfo
== BBInfo
.Pred
)
1150 BBInfo
.Pred
= InInfo
;
1151 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB
)
1152 << " changed to " << BBInfo
.Pred
<< "\n");
1154 // Note: It's tempting to cache the state changes here, but due to the
1155 // compatibility checks performed a blocks output state can change based on
1156 // the input state. To cache, we'd have to add logic for finding
1157 // never-compatible state changes.
1158 VSETVLIInfo TmpStatus
;
1159 computeVLVTYPEChanges(MBB
, TmpStatus
);
1161 // If the new exit value matches the old exit value, we don't need to revisit
1163 if (BBInfo
.Exit
== TmpStatus
)
1166 BBInfo
.Exit
= TmpStatus
;
1167 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB
)
1168 << " changed to " << BBInfo
.Exit
<< "\n");
1170 // Add the successors to the work list so we can propagate the changed exit
1172 for (MachineBasicBlock
*S
: MBB
.successors())
1173 if (!BlockInfo
[S
->getNumber()].InQueue
) {
1174 BlockInfo
[S
->getNumber()].InQueue
= true;
1179 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1180 // be unneeded if the AVL is a phi node where all incoming values are VL
1181 // outputs from the last VSETVLI in their respective basic blocks.
1182 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo
&Require
,
1183 const MachineBasicBlock
&MBB
) const {
1184 if (DisableInsertVSETVLPHIOpt
)
1187 if (!Require
.hasAVLReg())
1190 Register AVLReg
= Require
.getAVLReg();
1191 if (!AVLReg
.isVirtual())
1194 // We need the AVL to be produce by a PHI node in this basic block.
1195 MachineInstr
*PHI
= MRI
->getVRegDef(AVLReg
);
1196 if (!PHI
|| PHI
->getOpcode() != RISCV::PHI
|| PHI
->getParent() != &MBB
)
1199 for (unsigned PHIOp
= 1, NumOps
= PHI
->getNumOperands(); PHIOp
!= NumOps
;
1201 Register InReg
= PHI
->getOperand(PHIOp
).getReg();
1202 MachineBasicBlock
*PBB
= PHI
->getOperand(PHIOp
+ 1).getMBB();
1203 const BlockData
&PBBInfo
= BlockInfo
[PBB
->getNumber()];
1204 // If the exit from the predecessor has the VTYPE we are looking for
1205 // we might be able to avoid a VSETVLI.
1206 if (PBBInfo
.Exit
.isUnknown() || !PBBInfo
.Exit
.hasSameVTYPE(Require
))
1209 // We need the PHI input to the be the output of a VSET(I)VLI.
1210 MachineInstr
*DefMI
= MRI
->getVRegDef(InReg
);
1211 if (!DefMI
|| !isVectorConfigInstr(*DefMI
))
1214 // We found a VSET(I)VLI make sure it matches the output of the
1215 // predecessor block.
1216 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
);
1217 if (!DefInfo
.hasSameAVL(PBBInfo
.Exit
) ||
1218 !DefInfo
.hasSameVTYPE(PBBInfo
.Exit
))
1222 // If all the incoming values to the PHI checked out, we don't need
1223 // to insert a VSETVLI.
1227 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock
&MBB
) {
1228 VSETVLIInfo CurInfo
= BlockInfo
[MBB
.getNumber()].Pred
;
1229 // Track whether the prefix of the block we've scanned is transparent
1230 // (meaning has not yet changed the abstract state).
1231 bool PrefixTransparent
= true;
1232 for (MachineInstr
&MI
: MBB
) {
1233 const VSETVLIInfo PrevInfo
= CurInfo
;
1234 transferBefore(CurInfo
, MI
);
1236 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1237 if (isVectorConfigInstr(MI
)) {
1238 // Conservatively, mark the VL and VTYPE as live.
1239 assert(MI
.getOperand(3).getReg() == RISCV::VL
&&
1240 MI
.getOperand(4).getReg() == RISCV::VTYPE
&&
1241 "Unexpected operands where VL and VTYPE should be");
1242 MI
.getOperand(3).setIsDead(false);
1243 MI
.getOperand(4).setIsDead(false);
1244 PrefixTransparent
= false;
1247 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
1248 if (RISCVII::hasSEWOp(TSFlags
)) {
1249 if (PrevInfo
!= CurInfo
) {
1250 // If this is the first implicit state change, and the state change
1251 // requested can be proven to produce the same register contents, we
1252 // can skip emitting the actual state change and continue as if we
1253 // had since we know the GPR result of the implicit state change
1254 // wouldn't be used and VL/VTYPE registers are correct. Note that
1255 // we *do* need to model the state as if it changed as while the
1256 // register contents are unchanged, the abstract model can change.
1257 if (!PrefixTransparent
|| needVSETVLIPHI(CurInfo
, MBB
))
1258 insertVSETVLI(MBB
, MI
, CurInfo
, PrevInfo
);
1259 PrefixTransparent
= false;
1262 if (RISCVII::hasVLOp(TSFlags
)) {
1263 MachineOperand
&VLOp
= MI
.getOperand(getVLOpNum(MI
));
1265 // Erase the AVL operand from the instruction.
1266 VLOp
.setReg(RISCV::NoRegister
);
1267 VLOp
.setIsKill(false);
1269 MI
.addOperand(MachineOperand::CreateReg(RISCV::VL
, /*isDef*/ false,
1272 MI
.addOperand(MachineOperand::CreateReg(RISCV::VTYPE
, /*isDef*/ false,
1276 if (MI
.isCall() || MI
.isInlineAsm() || MI
.modifiesRegister(RISCV::VL
) ||
1277 MI
.modifiesRegister(RISCV::VTYPE
))
1278 PrefixTransparent
= false;
1280 transferAfter(CurInfo
, MI
);
1283 // If we reach the end of the block and our current info doesn't match the
1284 // expected info, insert a vsetvli to correct.
1285 if (!UseStrictAsserts
) {
1286 const VSETVLIInfo
&ExitInfo
= BlockInfo
[MBB
.getNumber()].Exit
;
1287 if (CurInfo
.isValid() && ExitInfo
.isValid() && !ExitInfo
.isUnknown() &&
1288 CurInfo
!= ExitInfo
) {
1289 // Note there's an implicit assumption here that terminators never use
1290 // or modify VL or VTYPE. Also, fallthrough will return end().
1291 auto InsertPt
= MBB
.getFirstInstrTerminator();
1292 insertVSETVLI(MBB
, InsertPt
, MBB
.findDebugLoc(InsertPt
), ExitInfo
,
1298 if (UseStrictAsserts
&& CurInfo
.isValid()) {
1299 const auto &Info
= BlockInfo
[MBB
.getNumber()];
1300 if (CurInfo
!= Info
.Exit
) {
1301 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB
) << "\n");
1302 LLVM_DEBUG(dbgs() << " begin state: " << Info
.Pred
<< "\n");
1303 LLVM_DEBUG(dbgs() << " expected end state: " << Info
.Exit
<< "\n");
1304 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo
<< "\n");
1306 assert(CurInfo
== Info
.Exit
&&
1307 "InsertVSETVLI dataflow invariant violated");
1311 /// Return true if the VL value configured by a vset(i)vli with the
1312 /// provided Info must be equal to the requested AVL. That is, that
1314 static bool willVLBeAVL(const VSETVLIInfo
&Info
, const RISCVSubtarget
&ST
) {
1315 if (!Info
.hasAVLImm())
1316 // VLMAX is always the same value.
1317 // TODO: Could extend to other registers by looking at the associated vreg
1319 return RISCV::X0
== Info
.getAVLReg();
1321 unsigned AVL
= Info
.getAVLImm();
1322 unsigned SEW
= Info
.getSEW();
1323 unsigned AVLInBits
= AVL
* SEW
;
1327 std::tie(LMul
, Fractional
) = RISCVVType::decodeVLMUL(Info
.getVLMUL());
1330 return ST
.getRealMinVLen() / LMul
>= AVLInBits
;
1331 return ST
.getRealMinVLen() * LMul
>= AVLInBits
;
1334 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1335 /// we're about to insert by looking for cases where we can PRE from the
1336 /// beginning of one block to the end of one of its predecessors. Specifically,
1337 /// this is geared to catch the common case of a fixed length vsetvl in a single
1338 /// block loop when it could execute once in the preheader instead.
1339 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock
&MBB
) {
1340 if (!BlockInfo
[MBB
.getNumber()].Pred
.isUnknown())
1343 MachineBasicBlock
*UnavailablePred
= nullptr;
1344 VSETVLIInfo AvailableInfo
;
1345 for (MachineBasicBlock
*P
: MBB
.predecessors()) {
1346 const VSETVLIInfo
&PredInfo
= BlockInfo
[P
->getNumber()].Exit
;
1347 if (PredInfo
.isUnknown()) {
1348 if (UnavailablePred
)
1350 UnavailablePred
= P
;
1351 } else if (!AvailableInfo
.isValid()) {
1352 AvailableInfo
= PredInfo
;
1353 } else if (AvailableInfo
!= PredInfo
) {
1358 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1360 if (!UnavailablePred
|| !AvailableInfo
.isValid())
1363 // Critical edge - TODO: consider splitting?
1364 if (UnavailablePred
->succ_size() != 1)
1367 // If VL can be less than AVL, then we can't reduce the frequency of exec.
1368 if (!willVLBeAVL(AvailableInfo
, *ST
))
1371 // Model the effect of changing the input state of the block MBB to
1372 // AvailableInfo. We're looking for two issues here; one legality,
1373 // one profitability.
1374 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1375 // may hit the end of the block with a different end state. We can
1376 // not make this change without reflowing later blocks as well.
1377 // 2) If we don't actually remove a transition, inserting a vsetvli
1378 // into the predecessor block would be correct, but unprofitable.
1379 VSETVLIInfo OldInfo
= BlockInfo
[MBB
.getNumber()].Pred
;
1380 VSETVLIInfo CurInfo
= AvailableInfo
;
1381 int TransitionsRemoved
= 0;
1382 for (const MachineInstr
&MI
: MBB
) {
1383 const VSETVLIInfo LastInfo
= CurInfo
;
1384 const VSETVLIInfo LastOldInfo
= OldInfo
;
1385 transferBefore(CurInfo
, MI
);
1386 transferBefore(OldInfo
, MI
);
1387 if (CurInfo
== LastInfo
)
1388 TransitionsRemoved
++;
1389 if (LastOldInfo
== OldInfo
)
1390 TransitionsRemoved
--;
1391 transferAfter(CurInfo
, MI
);
1392 transferAfter(OldInfo
, MI
);
1393 if (CurInfo
== OldInfo
)
1394 // Convergence. All transitions after this must match by construction.
1397 if (CurInfo
!= OldInfo
|| TransitionsRemoved
<= 0)
1398 // Issues 1 and 2 above
1401 // Finally, update both data flow state and insert the actual vsetvli.
1402 // Doing both keeps the code in sync with the dataflow results, which
1403 // is critical for correctness of phase 3.
1404 auto OldExit
= BlockInfo
[UnavailablePred
->getNumber()].Exit
;
1405 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB
.getName() << " to "
1406 << UnavailablePred
->getName() << " with state "
1407 << AvailableInfo
<< "\n");
1408 BlockInfo
[UnavailablePred
->getNumber()].Exit
= AvailableInfo
;
1409 BlockInfo
[MBB
.getNumber()].Pred
= AvailableInfo
;
1411 // Note there's an implicit assumption here that terminators never use
1412 // or modify VL or VTYPE. Also, fallthrough will return end().
1413 auto InsertPt
= UnavailablePred
->getFirstInstrTerminator();
1414 insertVSETVLI(*UnavailablePred
, InsertPt
,
1415 UnavailablePred
->findDebugLoc(InsertPt
),
1416 AvailableInfo
, OldExit
);
1419 static void doUnion(DemandedFields
&A
, DemandedFields B
) {
1421 A
.VLZeroness
|= B
.VLZeroness
;
1422 A
.SEW
= std::max(A
.SEW
, B
.SEW
);
1424 A
.SEWLMULRatio
|= B
.SEWLMULRatio
;
1425 A
.TailPolicy
|= B
.TailPolicy
;
1426 A
.MaskPolicy
|= B
.MaskPolicy
;
1429 static bool isNonZeroAVL(const MachineOperand
&MO
) {
1431 return RISCV::X0
== MO
.getReg();
1433 return 0 != MO
.getImm();
1436 // Return true if we can mutate PrevMI to match MI without changing any the
1437 // fields which would be observed.
1438 static bool canMutatePriorConfig(const MachineInstr
&PrevMI
,
1439 const MachineInstr
&MI
,
1440 const DemandedFields
&Used
) {
1441 // If the VL values aren't equal, return false if either a) the former is
1442 // demanded, or b) we can't rewrite the former to be the later for
1443 // implementation reasons.
1444 if (!isVLPreservingConfig(MI
)) {
1448 // We don't bother to handle the equally zero case here as it's largely
1450 if (Used
.VLZeroness
) {
1451 if (isVLPreservingConfig(PrevMI
))
1453 if (!isNonZeroAVL(MI
.getOperand(1)) ||
1454 !isNonZeroAVL(PrevMI
.getOperand(1)))
1458 // TODO: Track whether the register is defined between
1460 if (MI
.getOperand(1).isReg() &&
1461 RISCV::X0
!= MI
.getOperand(1).getReg())
1465 if (!PrevMI
.getOperand(2).isImm() || !MI
.getOperand(2).isImm())
1468 auto PriorVType
= PrevMI
.getOperand(2).getImm();
1469 auto VType
= MI
.getOperand(2).getImm();
1470 return areCompatibleVTYPEs(PriorVType
, VType
, Used
);
1473 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock
&MBB
) {
1474 MachineInstr
*NextMI
= nullptr;
1475 // We can have arbitrary code in successors, so VL and VTYPE
1476 // must be considered demanded.
1477 DemandedFields Used
;
1480 SmallVector
<MachineInstr
*> ToDelete
;
1481 for (MachineInstr
&MI
: make_range(MBB
.rbegin(), MBB
.rend())) {
1483 if (!isVectorConfigInstr(MI
)) {
1484 doUnion(Used
, getDemanded(MI
, MRI
, ST
));
1488 Register VRegDef
= MI
.getOperand(0).getReg();
1489 if (VRegDef
!= RISCV::X0
&&
1490 !(VRegDef
.isVirtual() && MRI
->use_nodbg_empty(VRegDef
)))
1494 if (!Used
.usedVL() && !Used
.usedVTYPE()) {
1495 ToDelete
.push_back(&MI
);
1496 // Leave NextMI unchanged
1498 } else if (canMutatePriorConfig(MI
, *NextMI
, Used
)) {
1499 if (!isVLPreservingConfig(*NextMI
)) {
1500 MI
.getOperand(0).setReg(NextMI
->getOperand(0).getReg());
1501 MI
.getOperand(0).setIsDead(false);
1502 if (NextMI
->getOperand(1).isImm())
1503 MI
.getOperand(1).ChangeToImmediate(NextMI
->getOperand(1).getImm());
1505 MI
.getOperand(1).ChangeToRegister(NextMI
->getOperand(1).getReg(), false);
1506 MI
.setDesc(NextMI
->getDesc());
1508 MI
.getOperand(2).setImm(NextMI
->getOperand(2).getImm());
1509 ToDelete
.push_back(NextMI
);
1514 Used
= getDemanded(MI
, MRI
, ST
);
1517 for (auto *MI
: ToDelete
)
1518 MI
->eraseFromParent();
1521 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock
&MBB
) {
1522 for (auto I
= MBB
.begin(), E
= MBB
.end(); I
!= E
;) {
1523 MachineInstr
&MI
= *I
++;
1524 if (RISCV::isFaultFirstLoad(MI
)) {
1525 Register VLOutput
= MI
.getOperand(1).getReg();
1526 if (!MRI
->use_nodbg_empty(VLOutput
))
1527 BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(RISCV::PseudoReadVL
),
1529 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1530 MI
.getOperand(1).setReg(RISCV::X0
);
1535 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction
&MF
) {
1536 // Skip if the vector extension is not enabled.
1537 ST
= &MF
.getSubtarget
<RISCVSubtarget
>();
1538 if (!ST
->hasVInstructions())
1541 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF
.getName() << "\n");
1543 TII
= ST
->getInstrInfo();
1544 MRI
= &MF
.getRegInfo();
1546 assert(BlockInfo
.empty() && "Expect empty block infos");
1547 BlockInfo
.resize(MF
.getNumBlockIDs());
1549 bool HaveVectorOp
= false;
1551 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1552 for (const MachineBasicBlock
&MBB
: MF
) {
1553 VSETVLIInfo TmpStatus
;
1554 HaveVectorOp
|= computeVLVTYPEChanges(MBB
, TmpStatus
);
1555 // Initial exit state is whatever change we found in the block.
1556 BlockData
&BBInfo
= BlockInfo
[MBB
.getNumber()];
1557 BBInfo
.Exit
= TmpStatus
;
1558 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB
)
1559 << " is " << BBInfo
.Exit
<< "\n");
1563 // If we didn't find any instructions that need VSETVLI, we're done.
1564 if (!HaveVectorOp
) {
1569 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1570 // blocks to the list here, but will also add any that need to be revisited
1571 // during Phase 2 processing.
1572 for (const MachineBasicBlock
&MBB
: MF
) {
1573 WorkList
.push(&MBB
);
1574 BlockInfo
[MBB
.getNumber()].InQueue
= true;
1576 while (!WorkList
.empty()) {
1577 const MachineBasicBlock
&MBB
= *WorkList
.front();
1579 computeIncomingVLVTYPE(MBB
);
1582 // Perform partial redundancy elimination of vsetvli transitions.
1583 for (MachineBasicBlock
&MBB
: MF
)
1586 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1587 // Phase 2 information to avoid adding vsetvlis before the first vector
1588 // instruction in the block if the VL/VTYPE is satisfied by its
1590 for (MachineBasicBlock
&MBB
: MF
)
1593 // Now that all vsetvlis are explicit, go through and do block local
1594 // DSE and peephole based demanded fields based transforms. Note that
1595 // this *must* be done outside the main dataflow so long as we allow
1596 // any cross block analysis within the dataflow. We can't have both
1597 // demanded fields based mutation and non-local analysis in the
1598 // dataflow at the same time without introducing inconsistencies.
1599 for (MachineBasicBlock
&MBB
: MF
)
1600 doLocalPostpass(MBB
);
1602 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1603 // of VLEFF/VLSEGFF.
1604 for (MachineBasicBlock
&MBB
: MF
)
1608 return HaveVectorOp
;
1611 /// Returns an instance of the Insert VSETVLI pass.
1612 FunctionPass
*llvm::createRISCVInsertVSETVLIPass() {
1613 return new RISCVInsertVSETVLI();