1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
13 // This pass consists of 3 phases:
15 // Phase 1 collects how each basic block affects VL/VTYPE.
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
25 //===----------------------------------------------------------------------===//
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveDebugVariables.h"
31 #include "llvm/CodeGen/LiveIntervals.h"
32 #include "llvm/CodeGen/LiveStacks.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #define DEBUG_TYPE "riscv-insert-vsetvli"
38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39 #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
41 STATISTIC(NumInsertedVSETVL
, "Number of VSETVL inst inserted");
42 STATISTIC(NumCoalescedVSETVL
, "Number of VSETVL inst coalesced");
44 static cl::opt
<bool> DisableInsertVSETVLPHIOpt(
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden
,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
50 static unsigned getVLOpNum(const MachineInstr
&MI
) {
51 return RISCVII::getVLOpNum(MI
.getDesc());
54 static unsigned getSEWOpNum(const MachineInstr
&MI
) {
55 return RISCVII::getSEWOpNum(MI
.getDesc());
58 static bool isVectorConfigInstr(const MachineInstr
&MI
) {
59 return MI
.getOpcode() == RISCV::PseudoVSETVLI
||
60 MI
.getOpcode() == RISCV::PseudoVSETVLIX0
||
61 MI
.getOpcode() == RISCV::PseudoVSETIVLI
;
64 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
65 /// VL and only sets VTYPE.
66 static bool isVLPreservingConfig(const MachineInstr
&MI
) {
67 if (MI
.getOpcode() != RISCV::PseudoVSETVLIX0
)
69 assert(RISCV::X0
== MI
.getOperand(1).getReg());
70 return RISCV::X0
== MI
.getOperand(0).getReg();
73 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr
&MI
) {
74 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
83 static bool isScalarExtractInstr(const MachineInstr
&MI
) {
84 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
93 static bool isScalarInsertInstr(const MachineInstr
&MI
) {
94 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
103 static bool isScalarSplatInstr(const MachineInstr
&MI
) {
104 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
109 case RISCV::VFMV_V_F
:
114 static bool isVSlideInstr(const MachineInstr
&MI
) {
115 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
118 case RISCV::VSLIDEDOWN_VX
:
119 case RISCV::VSLIDEDOWN_VI
:
120 case RISCV::VSLIDEUP_VX
:
121 case RISCV::VSLIDEUP_VI
:
126 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
127 /// not a load or store which ignores SEW.
128 static std::optional
<unsigned> getEEWForLoadStore(const MachineInstr
&MI
) {
129 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
138 case RISCV::VLSE16_V
:
140 case RISCV::VSSE16_V
:
143 case RISCV::VLSE32_V
:
145 case RISCV::VSSE32_V
:
148 case RISCV::VLSE64_V
:
150 case RISCV::VSSE64_V
:
155 static bool isNonZeroLoadImmediate(const MachineInstr
&MI
) {
156 return MI
.getOpcode() == RISCV::ADDI
&&
157 MI
.getOperand(1).isReg() && MI
.getOperand(2).isImm() &&
158 MI
.getOperand(1).getReg() == RISCV::X0
&&
159 MI
.getOperand(2).getImm() != 0;
162 /// Return true if this is an operation on mask registers. Note that
163 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
164 static bool isMaskRegOp(const MachineInstr
&MI
) {
165 if (!RISCVII::hasSEWOp(MI
.getDesc().TSFlags
))
167 const unsigned Log2SEW
= MI
.getOperand(getSEWOpNum(MI
)).getImm();
168 // A Log2SEW of 0 is an operation on mask registers only.
172 /// Return true if the inactive elements in the result are entirely undefined.
173 /// Note that this is different from "agnostic" as defined by the vector
174 /// specification. Agnostic requires each lane to either be undisturbed, or
175 /// take the value -1; no other value is allowed.
176 static bool hasUndefinedMergeOp(const MachineInstr
&MI
) {
179 if (!MI
.isRegTiedToUseOperand(0, &UseOpIdx
))
180 // If there is no passthrough operand, then the pass through
181 // lanes are undefined.
184 // All undefined passthrus should be $noreg: see
185 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
186 const MachineOperand
&UseMO
= MI
.getOperand(UseOpIdx
);
187 return UseMO
.getReg() == RISCV::NoRegister
|| UseMO
.isUndef();
190 /// Which subfields of VL or VTYPE have values we need to preserve?
191 struct DemandedFields
{
192 // Some unknown property of VL is used. If demanded, must preserve entire
195 // Only zero vs non-zero is used. If demanded, can change non-zero values.
196 bool VLZeroness
= false;
197 // What properties of SEW we need to preserve.
199 SEWEqual
= 3, // The exact value of SEW needs to be preserved.
200 SEWGreaterThanOrEqual
= 2, // SEW can be changed as long as it's greater
201 // than or equal to the original value.
202 SEWGreaterThanOrEqualAndLessThan64
=
203 1, // SEW can be changed as long as it's greater
204 // than or equal to the original value, but must be less
206 SEWNone
= 0 // We don't need to preserve SEW at all.
209 bool SEWLMULRatio
= false;
210 bool TailPolicy
= false;
211 bool MaskPolicy
= false;
213 // Return true if any part of VTYPE was used
214 bool usedVTYPE() const {
215 return SEW
|| LMUL
|| SEWLMULRatio
|| TailPolicy
|| MaskPolicy
;
218 // Return true if any property of VL was used
220 return VLAny
|| VLZeroness
;
223 // Mark all VTYPE subfields and properties as demanded
232 // Mark all VL properties as demanded
238 // Make this the result of demanding both the fields in this and B.
239 void doUnion(const DemandedFields
&B
) {
241 VLZeroness
|= B
.VLZeroness
;
242 SEW
= std::max(SEW
, B
.SEW
);
244 SEWLMULRatio
|= B
.SEWLMULRatio
;
245 TailPolicy
|= B
.TailPolicy
;
246 MaskPolicy
|= B
.MaskPolicy
;
249 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
250 /// Support for debugging, callable in GDB: V->dump()
251 LLVM_DUMP_METHOD
void dump() const {
256 /// Implement operator<<.
257 void print(raw_ostream
&OS
) const {
259 OS
<< "VLAny=" << VLAny
<< ", ";
260 OS
<< "VLZeroness=" << VLZeroness
<< ", ";
266 case SEWGreaterThanOrEqual
:
267 OS
<< "SEWGreaterThanOrEqual";
269 case SEWGreaterThanOrEqualAndLessThan64
:
270 OS
<< "SEWGreaterThanOrEqualAndLessThan64";
277 OS
<< "LMUL=" << LMUL
<< ", ";
278 OS
<< "SEWLMULRatio=" << SEWLMULRatio
<< ", ";
279 OS
<< "TailPolicy=" << TailPolicy
<< ", ";
280 OS
<< "MaskPolicy=" << MaskPolicy
;
286 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
288 inline raw_ostream
&operator<<(raw_ostream
&OS
, const DemandedFields
&DF
) {
294 /// Return true if moving from CurVType to NewVType is
295 /// indistinguishable from the perspective of an instruction (or set
296 /// of instructions) which use only the Used subfields and properties.
297 static bool areCompatibleVTYPEs(uint64_t CurVType
, uint64_t NewVType
,
298 const DemandedFields
&Used
) {
300 case DemandedFields::SEWNone
:
302 case DemandedFields::SEWEqual
:
303 if (RISCVVType::getSEW(CurVType
) != RISCVVType::getSEW(NewVType
))
306 case DemandedFields::SEWGreaterThanOrEqual
:
307 if (RISCVVType::getSEW(NewVType
) < RISCVVType::getSEW(CurVType
))
310 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64
:
311 if (RISCVVType::getSEW(NewVType
) < RISCVVType::getSEW(CurVType
) ||
312 RISCVVType::getSEW(NewVType
) >= 64)
318 RISCVVType::getVLMUL(CurVType
) != RISCVVType::getVLMUL(NewVType
))
321 if (Used
.SEWLMULRatio
) {
322 auto Ratio1
= RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType
),
323 RISCVVType::getVLMUL(CurVType
));
324 auto Ratio2
= RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType
),
325 RISCVVType::getVLMUL(NewVType
));
326 if (Ratio1
!= Ratio2
)
330 if (Used
.TailPolicy
&& RISCVVType::isTailAgnostic(CurVType
) !=
331 RISCVVType::isTailAgnostic(NewVType
))
333 if (Used
.MaskPolicy
&& RISCVVType::isMaskAgnostic(CurVType
) !=
334 RISCVVType::isMaskAgnostic(NewVType
))
339 /// Return the fields and properties demanded by the provided instruction.
340 DemandedFields
getDemanded(const MachineInstr
&MI
, const RISCVSubtarget
*ST
) {
341 // Warning: This function has to work on both the lowered (i.e. post
342 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
343 // that it can't use the value of a SEW, VL, or Policy operand as they might
344 // be stale after lowering.
346 // Most instructions don't use any of these subfeilds.
348 // Start conservative if registers are used
349 if (MI
.isCall() || MI
.isInlineAsm() ||
350 MI
.readsRegister(RISCV::VL
, /*TRI=*/nullptr))
352 if (MI
.isCall() || MI
.isInlineAsm() ||
353 MI
.readsRegister(RISCV::VTYPE
, /*TRI=*/nullptr))
355 // Start conservative on the unlowered form too
356 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
357 if (RISCVII::hasSEWOp(TSFlags
)) {
359 if (RISCVII::hasVLOp(TSFlags
))
362 // Behavior is independent of mask policy.
363 if (!RISCVII::usesMaskPolicy(TSFlags
))
364 Res
.MaskPolicy
= false;
367 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
368 // They instead demand the ratio of the two which is used in computing
369 // EMUL, but which allows us the flexibility to change SEW and LMUL
370 // provided we don't change the ratio.
371 // Note: We assume that the instructions initial SEW is the EEW encoded
372 // in the opcode. This is asserted when constructing the VSETVLIInfo.
373 if (getEEWForLoadStore(MI
)) {
374 Res
.SEW
= DemandedFields::SEWNone
;
378 // Store instructions don't use the policy fields.
379 if (RISCVII::hasSEWOp(TSFlags
) && MI
.getNumExplicitDefs() == 0) {
380 Res
.TailPolicy
= false;
381 Res
.MaskPolicy
= false;
384 // If this is a mask reg operation, it only cares about VLMAX.
385 // TODO: Possible extensions to this logic
386 // * Probably ok if available VLMax is larger than demanded
387 // * The policy bits can probably be ignored..
388 if (isMaskRegOp(MI
)) {
389 Res
.SEW
= DemandedFields::SEWNone
;
393 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
394 if (isScalarInsertInstr(MI
)) {
396 Res
.SEWLMULRatio
= false;
398 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
399 // need to preserve any other bits and are thus compatible with any larger,
400 // etype and can disregard policy bits. Warning: It's tempting to try doing
401 // this for any tail agnostic operation, but we can't as TA requires
402 // tail lanes to either be the original value or -1. We are writing
403 // unknown bits to the lanes here.
404 if (hasUndefinedMergeOp(MI
)) {
405 if (isFloatScalarMoveOrScalarSplatInstr(MI
) && !ST
->hasVInstructionsF64())
406 Res
.SEW
= DemandedFields::SEWGreaterThanOrEqualAndLessThan64
;
408 Res
.SEW
= DemandedFields::SEWGreaterThanOrEqual
;
409 Res
.TailPolicy
= false;
413 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
414 if (isScalarExtractInstr(MI
)) {
415 assert(!RISCVII::hasVLOp(TSFlags
));
417 Res
.SEWLMULRatio
= false;
418 Res
.TailPolicy
= false;
419 Res
.MaskPolicy
= false;
425 /// Defines the abstract state with which the forward dataflow models the
426 /// values of the VL and VTYPE registers after insertion.
429 const MachineInstr
*DefMI
;
444 } State
= Uninitialized
;
446 // Fields from VTYPE.
447 RISCVII::VLMUL VLMul
= RISCVII::LMUL_1
;
449 uint8_t TailAgnostic
: 1;
450 uint8_t MaskAgnostic
: 1;
451 uint8_t SEWLMULRatioOnly
: 1;
455 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
456 SEWLMULRatioOnly(false) {}
458 static VSETVLIInfo
getUnknown() {
464 bool isValid() const { return State
!= Uninitialized
; }
465 void setUnknown() { State
= Unknown
; }
466 bool isUnknown() const { return State
== Unknown
; }
468 void setAVLRegDef(const MachineInstr
*DefMI
, Register AVLReg
) {
469 assert(DefMI
&& AVLReg
.isVirtual());
470 AVLRegDef
.DefMI
= DefMI
;
471 AVLRegDef
.DefReg
= AVLReg
;
475 void setAVLImm(unsigned Imm
) {
480 void setAVLVLMAX() { State
= AVLIsVLMAX
; }
482 void setAVLIgnored() { State
= AVLIsIgnored
; }
484 bool hasAVLImm() const { return State
== AVLIsImm
; }
485 bool hasAVLReg() const { return State
== AVLIsReg
; }
486 bool hasAVLVLMAX() const { return State
== AVLIsVLMAX
; }
487 bool hasAVLIgnored() const { return State
== AVLIsIgnored
; }
488 Register
getAVLReg() const {
489 assert(hasAVLReg() && AVLRegDef
.DefReg
.isVirtual());
490 return AVLRegDef
.DefReg
;
492 unsigned getAVLImm() const {
496 const MachineInstr
&getAVLDefMI() const {
497 assert(hasAVLReg() && AVLRegDef
.DefMI
);
498 return *AVLRegDef
.DefMI
;
501 void setAVL(VSETVLIInfo Info
) {
502 assert(Info
.isValid());
503 if (Info
.isUnknown())
505 else if (Info
.hasAVLReg())
506 setAVLRegDef(&Info
.getAVLDefMI(), Info
.getAVLReg());
507 else if (Info
.hasAVLVLMAX())
509 else if (Info
.hasAVLIgnored())
512 assert(Info
.hasAVLImm());
513 setAVLImm(Info
.getAVLImm());
517 unsigned getSEW() const { return SEW
; }
518 RISCVII::VLMUL
getVLMUL() const { return VLMul
; }
519 bool getTailAgnostic() const { return TailAgnostic
; }
520 bool getMaskAgnostic() const { return MaskAgnostic
; }
522 bool hasNonZeroAVL() const {
524 return getAVLImm() > 0;
526 return isNonZeroLoadImmediate(getAVLDefMI());
534 bool hasEquallyZeroAVL(const VSETVLIInfo
&Other
) const {
535 if (hasSameAVL(Other
))
537 return (hasNonZeroAVL() && Other
.hasNonZeroAVL());
540 bool hasSameAVL(const VSETVLIInfo
&Other
) const {
541 if (hasAVLReg() && Other
.hasAVLReg())
542 return AVLRegDef
.DefMI
== Other
.AVLRegDef
.DefMI
&&
543 AVLRegDef
.DefReg
== Other
.AVLRegDef
.DefReg
;
545 if (hasAVLImm() && Other
.hasAVLImm())
546 return getAVLImm() == Other
.getAVLImm();
549 return Other
.hasAVLVLMAX() && hasSameVLMAX(Other
);
552 return Other
.hasAVLIgnored();
557 void setVTYPE(unsigned VType
) {
558 assert(isValid() && !isUnknown() &&
559 "Can't set VTYPE for uninitialized or unknown");
560 VLMul
= RISCVVType::getVLMUL(VType
);
561 SEW
= RISCVVType::getSEW(VType
);
562 TailAgnostic
= RISCVVType::isTailAgnostic(VType
);
563 MaskAgnostic
= RISCVVType::isMaskAgnostic(VType
);
565 void setVTYPE(RISCVII::VLMUL L
, unsigned S
, bool TA
, bool MA
) {
566 assert(isValid() && !isUnknown() &&
567 "Can't set VTYPE for uninitialized or unknown");
574 void setVLMul(RISCVII::VLMUL VLMul
) { this->VLMul
= VLMul
; }
576 unsigned encodeVTYPE() const {
577 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly
&&
578 "Can't encode VTYPE for uninitialized or unknown");
579 return RISCVVType::encodeVTYPE(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
);
582 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly
; }
584 bool hasSameVTYPE(const VSETVLIInfo
&Other
) const {
585 assert(isValid() && Other
.isValid() &&
586 "Can't compare invalid VSETVLIInfos");
587 assert(!isUnknown() && !Other
.isUnknown() &&
588 "Can't compare VTYPE in unknown state");
589 assert(!SEWLMULRatioOnly
&& !Other
.SEWLMULRatioOnly
&&
590 "Can't compare when only LMUL/SEW ratio is valid.");
591 return std::tie(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
) ==
592 std::tie(Other
.VLMul
, Other
.SEW
, Other
.TailAgnostic
,
596 unsigned getSEWLMULRatio() const {
597 assert(isValid() && !isUnknown() &&
598 "Can't use VTYPE for uninitialized or unknown");
599 return RISCVVType::getSEWLMULRatio(SEW
, VLMul
);
602 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
603 // Note that having the same VLMAX ensures that both share the same
604 // function from AVL to VL; that is, they must produce the same VL value
605 // for any given AVL value.
606 bool hasSameVLMAX(const VSETVLIInfo
&Other
) const {
607 assert(isValid() && Other
.isValid() &&
608 "Can't compare invalid VSETVLIInfos");
609 assert(!isUnknown() && !Other
.isUnknown() &&
610 "Can't compare VTYPE in unknown state");
611 return getSEWLMULRatio() == Other
.getSEWLMULRatio();
614 bool hasCompatibleVTYPE(const DemandedFields
&Used
,
615 const VSETVLIInfo
&Require
) const {
616 return areCompatibleVTYPEs(Require
.encodeVTYPE(), encodeVTYPE(), Used
);
619 // Determine whether the vector instructions requirements represented by
620 // Require are compatible with the previous vsetvli instruction represented
621 // by this. MI is the instruction whose requirements we're considering.
622 bool isCompatible(const DemandedFields
&Used
, const VSETVLIInfo
&Require
,
623 const MachineRegisterInfo
&MRI
) const {
624 assert(isValid() && Require
.isValid() &&
625 "Can't compare invalid VSETVLIInfos");
626 assert(!Require
.SEWLMULRatioOnly
&&
627 "Expected a valid VTYPE for instruction!");
628 // Nothing is compatible with Unknown.
629 if (isUnknown() || Require
.isUnknown())
632 // If only our VLMAX ratio is valid, then this isn't compatible.
633 if (SEWLMULRatioOnly
)
636 if (Used
.VLAny
&& !(hasSameAVL(Require
) && hasSameVLMAX(Require
)))
639 if (Used
.VLZeroness
&& !hasEquallyZeroAVL(Require
))
642 return hasCompatibleVTYPE(Used
, Require
);
645 bool operator==(const VSETVLIInfo
&Other
) const {
646 // Uninitialized is only equal to another Uninitialized.
648 return !Other
.isValid();
649 if (!Other
.isValid())
652 // Unknown is only equal to another Unknown.
654 return Other
.isUnknown();
655 if (Other
.isUnknown())
658 if (!hasSameAVL(Other
))
661 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
662 if (SEWLMULRatioOnly
!= Other
.SEWLMULRatioOnly
)
665 // If only the VLMAX is valid, check that it is the same.
666 if (SEWLMULRatioOnly
)
667 return hasSameVLMAX(Other
);
669 // If the full VTYPE is valid, check that it is the same.
670 return hasSameVTYPE(Other
);
673 bool operator!=(const VSETVLIInfo
&Other
) const {
674 return !(*this == Other
);
677 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
678 // both predecessors.
679 VSETVLIInfo
intersect(const VSETVLIInfo
&Other
) const {
680 // If the new value isn't valid, ignore it.
681 if (!Other
.isValid())
684 // If this value isn't valid, this must be the first predecessor, use it.
688 // If either is unknown, the result is unknown.
689 if (isUnknown() || Other
.isUnknown())
690 return VSETVLIInfo::getUnknown();
692 // If we have an exact, match return this.
696 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
697 // return an SEW/LMUL ratio only value.
698 if (hasSameAVL(Other
) && hasSameVLMAX(Other
)) {
699 VSETVLIInfo MergeInfo
= *this;
700 MergeInfo
.SEWLMULRatioOnly
= true;
704 // Otherwise the result is unknown.
705 return VSETVLIInfo::getUnknown();
708 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
709 /// Support for debugging, callable in GDB: V->dump()
710 LLVM_DUMP_METHOD
void dump() const {
715 /// Implement operator<<.
717 void print(raw_ostream
&OS
) const {
720 OS
<< "Uninitialized";
724 OS
<< "AVLReg=" << (unsigned)getAVLReg();
726 OS
<< "AVLImm=" << (unsigned)AVLImm
;
732 << "VLMul=" << (unsigned)VLMul
<< ", "
733 << "SEW=" << (unsigned)SEW
<< ", "
734 << "TailAgnostic=" << (bool)TailAgnostic
<< ", "
735 << "MaskAgnostic=" << (bool)MaskAgnostic
<< ", "
736 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly
<< "}";
741 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
743 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VSETVLIInfo
&V
) {
750 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
751 // block. Calculated in Phase 2.
754 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
755 // blocks. Calculated in Phase 2, and used by Phase 3.
758 // Keeps track of whether the block is already in the queue.
759 bool InQueue
= false;
761 BlockData() = default;
764 class RISCVInsertVSETVLI
: public MachineFunctionPass
{
765 const RISCVSubtarget
*ST
;
766 const TargetInstrInfo
*TII
;
767 MachineRegisterInfo
*MRI
;
769 std::vector
<BlockData
> BlockInfo
;
770 std::queue
<const MachineBasicBlock
*> WorkList
;
775 RISCVInsertVSETVLI() : MachineFunctionPass(ID
) {}
776 bool runOnMachineFunction(MachineFunction
&MF
) override
;
778 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
779 AU
.setPreservesCFG();
780 MachineFunctionPass::getAnalysisUsage(AU
);
783 StringRef
getPassName() const override
{ return RISCV_INSERT_VSETVLI_NAME
; }
786 bool needVSETVLI(const MachineInstr
&MI
, const VSETVLIInfo
&Require
,
787 const VSETVLIInfo
&CurInfo
) const;
788 bool needVSETVLIPHI(const VSETVLIInfo
&Require
,
789 const MachineBasicBlock
&MBB
) const;
790 void insertVSETVLI(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
791 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
);
792 void insertVSETVLI(MachineBasicBlock
&MBB
,
793 MachineBasicBlock::iterator InsertPt
, DebugLoc DL
,
794 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
);
796 void transferBefore(VSETVLIInfo
&Info
, const MachineInstr
&MI
) const;
797 void transferAfter(VSETVLIInfo
&Info
, const MachineInstr
&MI
) const;
798 bool computeVLVTYPEChanges(const MachineBasicBlock
&MBB
,
799 VSETVLIInfo
&Info
) const;
800 void computeIncomingVLVTYPE(const MachineBasicBlock
&MBB
);
801 void emitVSETVLIs(MachineBasicBlock
&MBB
);
802 void doPRE(MachineBasicBlock
&MBB
);
803 void insertReadVL(MachineBasicBlock
&MBB
);
806 class RISCVCoalesceVSETVLI
: public MachineFunctionPass
{
809 const RISCVSubtarget
*ST
;
810 const TargetInstrInfo
*TII
;
811 MachineRegisterInfo
*MRI
;
814 RISCVCoalesceVSETVLI() : MachineFunctionPass(ID
) {}
815 bool runOnMachineFunction(MachineFunction
&MF
) override
;
817 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
818 AU
.setPreservesCFG();
820 AU
.addRequired
<LiveIntervals
>();
821 AU
.addPreserved
<LiveIntervals
>();
822 AU
.addRequired
<SlotIndexes
>();
823 AU
.addPreserved
<SlotIndexes
>();
824 AU
.addPreserved
<LiveDebugVariables
>();
825 AU
.addPreserved
<LiveStacks
>();
827 MachineFunctionPass::getAnalysisUsage(AU
);
830 StringRef
getPassName() const override
{ return RISCV_COALESCE_VSETVLI_NAME
; }
833 bool coalesceVSETVLIs(MachineBasicBlock
&MBB
);
836 } // end anonymous namespace
838 char RISCVInsertVSETVLI::ID
= 0;
840 INITIALIZE_PASS(RISCVInsertVSETVLI
, DEBUG_TYPE
, RISCV_INSERT_VSETVLI_NAME
,
843 char RISCVCoalesceVSETVLI::ID
= 0;
845 INITIALIZE_PASS(RISCVCoalesceVSETVLI
, "riscv-coalesce-vsetvli",
846 RISCV_COALESCE_VSETVLI_NAME
, false, false)
848 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
849 // VSETIVLI instruction.
850 static VSETVLIInfo
getInfoForVSETVLI(const MachineInstr
&MI
,
851 const MachineRegisterInfo
&MRI
) {
853 if (MI
.getOpcode() == RISCV::PseudoVSETIVLI
) {
854 NewInfo
.setAVLImm(MI
.getOperand(1).getImm());
856 assert(MI
.getOpcode() == RISCV::PseudoVSETVLI
||
857 MI
.getOpcode() == RISCV::PseudoVSETVLIX0
);
858 Register AVLReg
= MI
.getOperand(1).getReg();
859 assert((AVLReg
!= RISCV::X0
|| MI
.getOperand(0).getReg() != RISCV::X0
) &&
860 "Can't handle X0, X0 vsetvli yet");
861 if (AVLReg
== RISCV::X0
)
862 NewInfo
.setAVLVLMAX();
864 NewInfo
.setAVLRegDef(MRI
.getUniqueVRegDef(AVLReg
), AVLReg
);
866 NewInfo
.setVTYPE(MI
.getOperand(2).getImm());
871 static unsigned computeVLMAX(unsigned VLEN
, unsigned SEW
,
872 RISCVII::VLMUL VLMul
) {
873 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(VLMul
);
881 static VSETVLIInfo
computeInfoForInstr(const MachineInstr
&MI
, uint64_t TSFlags
,
882 const RISCVSubtarget
&ST
,
883 const MachineRegisterInfo
*MRI
) {
884 VSETVLIInfo InstrInfo
;
886 bool TailAgnostic
= true;
887 bool MaskAgnostic
= true;
888 if (!hasUndefinedMergeOp(MI
)) {
889 // Start with undisturbed.
890 TailAgnostic
= false;
891 MaskAgnostic
= false;
893 // If there is a policy operand, use it.
894 if (RISCVII::hasVecPolicyOp(TSFlags
)) {
895 const MachineOperand
&Op
= MI
.getOperand(MI
.getNumExplicitOperands() - 1);
896 uint64_t Policy
= Op
.getImm();
897 assert(Policy
<= (RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
) &&
898 "Invalid Policy Value");
899 TailAgnostic
= Policy
& RISCVII::TAIL_AGNOSTIC
;
900 MaskAgnostic
= Policy
& RISCVII::MASK_AGNOSTIC
;
903 // Some pseudo instructions force a tail agnostic policy despite having a
905 if (RISCVII::doesForceTailAgnostic(TSFlags
))
908 if (!RISCVII::usesMaskPolicy(TSFlags
))
912 RISCVII::VLMUL VLMul
= RISCVII::getLMul(TSFlags
);
914 unsigned Log2SEW
= MI
.getOperand(getSEWOpNum(MI
)).getImm();
915 // A Log2SEW of 0 is an operation on mask registers only.
916 unsigned SEW
= Log2SEW
? 1 << Log2SEW
: 8;
917 assert(RISCVVType::isValidSEW(SEW
) && "Unexpected SEW");
919 if (RISCVII::hasVLOp(TSFlags
)) {
920 const MachineOperand
&VLOp
= MI
.getOperand(getVLOpNum(MI
));
922 int64_t Imm
= VLOp
.getImm();
923 // Conver the VLMax sentintel to X0 register.
924 if (Imm
== RISCV::VLMaxSentinel
) {
925 // If we know the exact VLEN, see if we can use the constant encoding
926 // for the VLMAX instead. This reduces register pressure slightly.
927 const unsigned VLMAX
= computeVLMAX(ST
.getRealMaxVLen(), SEW
, VLMul
);
928 if (ST
.getRealMinVLen() == ST
.getRealMaxVLen() && VLMAX
<= 31)
929 InstrInfo
.setAVLImm(VLMAX
);
931 InstrInfo
.setAVLVLMAX();
934 InstrInfo
.setAVLImm(Imm
);
936 InstrInfo
.setAVLRegDef(MRI
->getUniqueVRegDef(VLOp
.getReg()),
940 assert(isScalarExtractInstr(MI
));
941 // TODO: If we are more clever about x0,x0 insertion then we should be able
942 // to deduce that the VL is ignored based off of DemandedFields, and remove
943 // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
944 InstrInfo
.setAVLIgnored();
947 if (std::optional
<unsigned> EEW
= getEEWForLoadStore(MI
)) {
948 assert(SEW
== EEW
&& "Initial SEW doesn't match expected EEW");
951 InstrInfo
.setVTYPE(VLMul
, SEW
, TailAgnostic
, MaskAgnostic
);
953 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
954 // AVL operand with the AVL of the defining vsetvli. We avoid general
955 // register AVLs to avoid extending live ranges without being sure we can
956 // kill the original source reg entirely.
957 if (InstrInfo
.hasAVLReg()) {
958 const MachineInstr
&DefMI
= InstrInfo
.getAVLDefMI();
959 if (isVectorConfigInstr(DefMI
)) {
960 VSETVLIInfo DefInstrInfo
= getInfoForVSETVLI(DefMI
, *MRI
);
961 if (DefInstrInfo
.hasSameVLMAX(InstrInfo
) &&
962 (DefInstrInfo
.hasAVLImm() || DefInstrInfo
.hasAVLVLMAX()))
963 InstrInfo
.setAVL(DefInstrInfo
);
970 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock
&MBB
, MachineInstr
&MI
,
971 const VSETVLIInfo
&Info
,
972 const VSETVLIInfo
&PrevInfo
) {
973 DebugLoc DL
= MI
.getDebugLoc();
974 insertVSETVLI(MBB
, MachineBasicBlock::iterator(&MI
), DL
, Info
, PrevInfo
);
977 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock
&MBB
,
978 MachineBasicBlock::iterator InsertPt
, DebugLoc DL
,
979 const VSETVLIInfo
&Info
, const VSETVLIInfo
&PrevInfo
) {
982 if (PrevInfo
.isValid() && !PrevInfo
.isUnknown()) {
983 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
985 if (Info
.hasSameAVL(PrevInfo
) && Info
.hasSameVLMAX(PrevInfo
)) {
986 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
987 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
988 .addReg(RISCV::X0
, RegState::Kill
)
989 .addImm(Info
.encodeVTYPE())
990 .addReg(RISCV::VL
, RegState::Implicit
);
994 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
995 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
996 // same, we can use the X0, X0 form.
997 if (Info
.hasSameVLMAX(PrevInfo
) && Info
.hasAVLReg()) {
998 const MachineInstr
&DefMI
= Info
.getAVLDefMI();
999 if (isVectorConfigInstr(DefMI
)) {
1000 VSETVLIInfo DefInfo
= getInfoForVSETVLI(DefMI
, *MRI
);
1001 if (DefInfo
.hasSameAVL(PrevInfo
) && DefInfo
.hasSameVLMAX(PrevInfo
)) {
1002 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
1003 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
1004 .addReg(RISCV::X0
, RegState::Kill
)
1005 .addImm(Info
.encodeVTYPE())
1006 .addReg(RISCV::VL
, RegState::Implicit
);
1013 if (Info
.hasAVLImm()) {
1014 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETIVLI
))
1015 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
1016 .addImm(Info
.getAVLImm())
1017 .addImm(Info
.encodeVTYPE());
1021 if (Info
.hasAVLIgnored()) {
1022 // We can only use x0, x0 if there's no chance of the vtype change causing
1023 // the previous vl to become invalid.
1024 if (PrevInfo
.isValid() && !PrevInfo
.isUnknown() &&
1025 Info
.hasSameVLMAX(PrevInfo
)) {
1026 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
1027 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
1028 .addReg(RISCV::X0
, RegState::Kill
)
1029 .addImm(Info
.encodeVTYPE())
1030 .addReg(RISCV::VL
, RegState::Implicit
);
1033 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1034 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETIVLI
))
1035 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
1037 .addImm(Info
.encodeVTYPE());
1041 if (Info
.hasAVLVLMAX()) {
1042 Register DestReg
= MRI
->createVirtualRegister(&RISCV::GPRRegClass
);
1043 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLIX0
))
1044 .addReg(DestReg
, RegState::Define
| RegState::Dead
)
1045 .addReg(RISCV::X0
, RegState::Kill
)
1046 .addImm(Info
.encodeVTYPE());
1050 Register AVLReg
= Info
.getAVLReg();
1051 MRI
->constrainRegClass(AVLReg
, &RISCV::GPRNoX0RegClass
);
1052 BuildMI(MBB
, InsertPt
, DL
, TII
->get(RISCV::PseudoVSETVLI
))
1053 .addReg(RISCV::X0
, RegState::Define
| RegState::Dead
)
1055 .addImm(Info
.encodeVTYPE());
1058 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL
) {
1059 auto [LMul
, Fractional
] = RISCVVType::decodeVLMUL(LMUL
);
1060 return Fractional
|| LMul
== 1;
1063 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1065 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr
&MI
,
1066 const VSETVLIInfo
&Require
,
1067 const VSETVLIInfo
&CurInfo
) const {
1068 assert(Require
== computeInfoForInstr(MI
, MI
.getDesc().TSFlags
, *ST
, MRI
));
1070 if (!CurInfo
.isValid() || CurInfo
.isUnknown() || CurInfo
.hasSEWLMULRatioOnly())
1073 DemandedFields Used
= getDemanded(MI
, ST
);
1075 // A slidedown/slideup with an *undefined* merge op can freely clobber
1076 // elements not copied from the source vector (e.g. masked off, tail, or
1077 // slideup's prefix). Notes:
1078 // * We can't modify SEW here since the slide amount is in units of SEW.
1079 // * VL=1 is special only because we have existing support for zero vs
1080 // non-zero VL. We could generalize this if we had a VL > C predicate.
1081 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1082 // * As above, this is only legal for tail "undefined" not "agnostic".
1083 if (isVSlideInstr(MI
) && Require
.hasAVLImm() && Require
.getAVLImm() == 1 &&
1084 isLMUL1OrSmaller(CurInfo
.getVLMUL()) && hasUndefinedMergeOp(MI
)) {
1086 Used
.VLZeroness
= true;
1088 Used
.TailPolicy
= false;
1091 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1092 // semantically as vmv.s.x. This is particularly useful since we don't have an
1093 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1094 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1095 // increase the number of active vector registers (unlike for vmv.s.x.)
1096 if (isScalarSplatInstr(MI
) && Require
.hasAVLImm() &&
1097 Require
.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo
.getVLMUL()) &&
1098 hasUndefinedMergeOp(MI
)) {
1100 Used
.SEWLMULRatio
= false;
1102 if (isFloatScalarMoveOrScalarSplatInstr(MI
) && !ST
->hasVInstructionsF64())
1103 Used
.SEW
= DemandedFields::SEWGreaterThanOrEqualAndLessThan64
;
1105 Used
.SEW
= DemandedFields::SEWGreaterThanOrEqual
;
1106 Used
.TailPolicy
= false;
1109 if (CurInfo
.isCompatible(Used
, Require
, *MRI
))
1112 // We didn't find a compatible value. If our AVL is a virtual register,
1113 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1114 // and the last VL/VTYPE we observed is the same, we don't need a
1116 if (Require
.hasAVLReg() && CurInfo
.hasCompatibleVTYPE(Used
, Require
)) {
1117 const MachineInstr
&DefMI
= Require
.getAVLDefMI();
1118 if (isVectorConfigInstr(DefMI
)) {
1119 VSETVLIInfo DefInfo
= getInfoForVSETVLI(DefMI
, *MRI
);
1120 if (DefInfo
.hasSameAVL(CurInfo
) && DefInfo
.hasSameVLMAX(CurInfo
))
1128 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1129 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1131 static VSETVLIInfo
adjustIncoming(VSETVLIInfo PrevInfo
, VSETVLIInfo NewInfo
,
1132 DemandedFields
&Demanded
) {
1133 VSETVLIInfo Info
= NewInfo
;
1135 if (!Demanded
.LMUL
&& !Demanded
.SEWLMULRatio
&& PrevInfo
.isValid() &&
1136 !PrevInfo
.isUnknown()) {
1137 if (auto NewVLMul
= RISCVVType::getSameRatioLMUL(
1138 PrevInfo
.getSEW(), PrevInfo
.getVLMUL(), Info
.getSEW()))
1139 Info
.setVLMul(*NewVLMul
);
1140 Demanded
.LMUL
= true;
1146 // Given an incoming state reaching MI, minimally modifies that state so that it
1147 // is compatible with MI. The resulting state is guaranteed to be semantically
1148 // legal for MI, but may not be the state requested by MI.
1149 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo
&Info
,
1150 const MachineInstr
&MI
) const {
1151 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
1152 if (!RISCVII::hasSEWOp(TSFlags
))
1155 const VSETVLIInfo NewInfo
= computeInfoForInstr(MI
, TSFlags
, *ST
, MRI
);
1156 assert(NewInfo
.isValid() && !NewInfo
.isUnknown());
1157 if (Info
.isValid() && !needVSETVLI(MI
, NewInfo
, Info
))
1160 const VSETVLIInfo PrevInfo
= Info
;
1161 if (!Info
.isValid() || Info
.isUnknown())
1164 DemandedFields Demanded
= getDemanded(MI
, ST
);
1165 const VSETVLIInfo IncomingInfo
= adjustIncoming(PrevInfo
, NewInfo
, Demanded
);
1167 // If MI only demands that VL has the same zeroness, we only need to set the
1168 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1169 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1170 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1171 // variant, so we avoid the transform to prevent extending live range of an
1172 // avl register operand.
1173 // TODO: We can probably relax this for immediates.
1174 bool EquallyZero
= IncomingInfo
.hasEquallyZeroAVL(PrevInfo
) &&
1175 IncomingInfo
.hasSameVLMAX(PrevInfo
);
1176 if (Demanded
.VLAny
|| (Demanded
.VLZeroness
&& !EquallyZero
))
1177 Info
.setAVL(IncomingInfo
);
1180 ((Demanded
.LMUL
|| Demanded
.SEWLMULRatio
) ? IncomingInfo
: Info
)
1182 ((Demanded
.SEW
|| Demanded
.SEWLMULRatio
) ? IncomingInfo
: Info
).getSEW(),
1183 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1185 (Demanded
.TailPolicy
? IncomingInfo
: Info
).getTailAgnostic() ||
1186 IncomingInfo
.getTailAgnostic(),
1187 (Demanded
.MaskPolicy
? IncomingInfo
: Info
).getMaskAgnostic() ||
1188 IncomingInfo
.getMaskAgnostic());
1190 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1192 if (Info
.hasSEWLMULRatioOnly()) {
1193 VSETVLIInfo RatiolessInfo
= IncomingInfo
;
1194 RatiolessInfo
.setAVL(Info
);
1195 Info
= RatiolessInfo
;
1199 // Given a state with which we evaluated MI (see transferBefore above for why
1200 // this might be different that the state MI requested), modify the state to
1201 // reflect the changes MI might make.
1202 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo
&Info
,
1203 const MachineInstr
&MI
) const {
1204 if (isVectorConfigInstr(MI
)) {
1205 Info
= getInfoForVSETVLI(MI
, *MRI
);
1209 if (RISCV::isFaultFirstLoad(MI
)) {
1210 // Update AVL to vl-output of the fault first load.
1211 Info
.setAVLRegDef(MRI
->getUniqueVRegDef(MI
.getOperand(1).getReg()),
1212 MI
.getOperand(1).getReg());
1216 // If this is something that updates VL/VTYPE that we don't know about, set
1217 // the state to unknown.
1218 if (MI
.isCall() || MI
.isInlineAsm() ||
1219 MI
.modifiesRegister(RISCV::VL
, /*TRI=*/nullptr) ||
1220 MI
.modifiesRegister(RISCV::VTYPE
, /*TRI=*/nullptr))
1221 Info
= VSETVLIInfo::getUnknown();
1224 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock
&MBB
,
1225 VSETVLIInfo
&Info
) const {
1226 bool HadVectorOp
= false;
1228 Info
= BlockInfo
[MBB
.getNumber()].Pred
;
1229 for (const MachineInstr
&MI
: MBB
) {
1230 transferBefore(Info
, MI
);
1232 if (isVectorConfigInstr(MI
) || RISCVII::hasSEWOp(MI
.getDesc().TSFlags
))
1235 transferAfter(Info
, MI
);
1241 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock
&MBB
) {
1243 BlockData
&BBInfo
= BlockInfo
[MBB
.getNumber()];
1245 BBInfo
.InQueue
= false;
1247 // Start with the previous entry so that we keep the most conservative state
1248 // we have ever found.
1249 VSETVLIInfo InInfo
= BBInfo
.Pred
;
1250 if (MBB
.pred_empty()) {
1251 // There are no predecessors, so use the default starting status.
1252 InInfo
.setUnknown();
1254 for (MachineBasicBlock
*P
: MBB
.predecessors())
1255 InInfo
= InInfo
.intersect(BlockInfo
[P
->getNumber()].Exit
);
1258 // If we don't have any valid predecessor value, wait until we do.
1259 if (!InInfo
.isValid())
1262 // If no change, no need to rerun block
1263 if (InInfo
== BBInfo
.Pred
)
1266 BBInfo
.Pred
= InInfo
;
1267 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB
)
1268 << " changed to " << BBInfo
.Pred
<< "\n");
1270 // Note: It's tempting to cache the state changes here, but due to the
1271 // compatibility checks performed a blocks output state can change based on
1272 // the input state. To cache, we'd have to add logic for finding
1273 // never-compatible state changes.
1274 VSETVLIInfo TmpStatus
;
1275 computeVLVTYPEChanges(MBB
, TmpStatus
);
1277 // If the new exit value matches the old exit value, we don't need to revisit
1279 if (BBInfo
.Exit
== TmpStatus
)
1282 BBInfo
.Exit
= TmpStatus
;
1283 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB
)
1284 << " changed to " << BBInfo
.Exit
<< "\n");
1286 // Add the successors to the work list so we can propagate the changed exit
1288 for (MachineBasicBlock
*S
: MBB
.successors())
1289 if (!BlockInfo
[S
->getNumber()].InQueue
) {
1290 BlockInfo
[S
->getNumber()].InQueue
= true;
1295 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1296 // be unneeded if the AVL is a phi node where all incoming values are VL
1297 // outputs from the last VSETVLI in their respective basic blocks.
1298 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo
&Require
,
1299 const MachineBasicBlock
&MBB
) const {
1300 if (DisableInsertVSETVLPHIOpt
)
1303 if (!Require
.hasAVLReg())
1306 // We need the AVL to be produce by a PHI node in this basic block.
1307 const MachineInstr
*PHI
= &Require
.getAVLDefMI();
1308 if (PHI
->getOpcode() != RISCV::PHI
|| PHI
->getParent() != &MBB
)
1311 for (unsigned PHIOp
= 1, NumOps
= PHI
->getNumOperands(); PHIOp
!= NumOps
;
1313 Register InReg
= PHI
->getOperand(PHIOp
).getReg();
1314 MachineBasicBlock
*PBB
= PHI
->getOperand(PHIOp
+ 1).getMBB();
1315 const VSETVLIInfo
&PBBExit
= BlockInfo
[PBB
->getNumber()].Exit
;
1317 // We need the PHI input to the be the output of a VSET(I)VLI.
1318 MachineInstr
*DefMI
= MRI
->getUniqueVRegDef(InReg
);
1320 if (!isVectorConfigInstr(*DefMI
))
1323 // We found a VSET(I)VLI make sure it matches the output of the
1324 // predecessor block.
1325 VSETVLIInfo DefInfo
= getInfoForVSETVLI(*DefMI
, *MRI
);
1326 if (DefInfo
!= PBBExit
)
1329 // Require has the same VL as PBBExit, so if the exit from the
1330 // predecessor has the VTYPE we are looking for we might be able
1331 // to avoid a VSETVLI.
1332 if (PBBExit
.isUnknown() || !PBBExit
.hasSameVTYPE(Require
))
1336 // If all the incoming values to the PHI checked out, we don't need
1337 // to insert a VSETVLI.
1341 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock
&MBB
) {
1342 VSETVLIInfo CurInfo
= BlockInfo
[MBB
.getNumber()].Pred
;
1343 // Track whether the prefix of the block we've scanned is transparent
1344 // (meaning has not yet changed the abstract state).
1345 bool PrefixTransparent
= true;
1346 for (MachineInstr
&MI
: MBB
) {
1347 const VSETVLIInfo PrevInfo
= CurInfo
;
1348 transferBefore(CurInfo
, MI
);
1350 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1351 if (isVectorConfigInstr(MI
)) {
1352 // Conservatively, mark the VL and VTYPE as live.
1353 assert(MI
.getOperand(3).getReg() == RISCV::VL
&&
1354 MI
.getOperand(4).getReg() == RISCV::VTYPE
&&
1355 "Unexpected operands where VL and VTYPE should be");
1356 MI
.getOperand(3).setIsDead(false);
1357 MI
.getOperand(4).setIsDead(false);
1358 PrefixTransparent
= false;
1361 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
1362 if (RISCVII::hasSEWOp(TSFlags
)) {
1363 if (PrevInfo
!= CurInfo
) {
1364 // If this is the first implicit state change, and the state change
1365 // requested can be proven to produce the same register contents, we
1366 // can skip emitting the actual state change and continue as if we
1367 // had since we know the GPR result of the implicit state change
1368 // wouldn't be used and VL/VTYPE registers are correct. Note that
1369 // we *do* need to model the state as if it changed as while the
1370 // register contents are unchanged, the abstract model can change.
1371 if (!PrefixTransparent
|| needVSETVLIPHI(CurInfo
, MBB
))
1372 insertVSETVLI(MBB
, MI
, CurInfo
, PrevInfo
);
1373 PrefixTransparent
= false;
1376 if (RISCVII::hasVLOp(TSFlags
)) {
1377 MachineOperand
&VLOp
= MI
.getOperand(getVLOpNum(MI
));
1379 Register Reg
= VLOp
.getReg();
1380 MachineInstr
*VLOpDef
= MRI
->getUniqueVRegDef(Reg
);
1383 // Erase the AVL operand from the instruction.
1384 VLOp
.setReg(RISCV::NoRegister
);
1385 VLOp
.setIsKill(false);
1387 // If the AVL was an immediate > 31, then it would have been emitted
1388 // as an ADDI. However, the ADDI might not have been used in the
1389 // vsetvli, or a vsetvli might not have been emitted, so it may be
1391 if (TII
->isAddImmediate(*VLOpDef
, Reg
) && MRI
->use_nodbg_empty(Reg
))
1392 VLOpDef
->eraseFromParent();
1394 MI
.addOperand(MachineOperand::CreateReg(RISCV::VL
, /*isDef*/ false,
1397 MI
.addOperand(MachineOperand::CreateReg(RISCV::VTYPE
, /*isDef*/ false,
1401 if (MI
.isCall() || MI
.isInlineAsm() ||
1402 MI
.modifiesRegister(RISCV::VL
, /*TRI=*/nullptr) ||
1403 MI
.modifiesRegister(RISCV::VTYPE
, /*TRI=*/nullptr))
1404 PrefixTransparent
= false;
1406 transferAfter(CurInfo
, MI
);
1409 const auto &Info
= BlockInfo
[MBB
.getNumber()];
1410 if (CurInfo
!= Info
.Exit
) {
1411 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB
) << "\n");
1412 LLVM_DEBUG(dbgs() << " begin state: " << Info
.Pred
<< "\n");
1413 LLVM_DEBUG(dbgs() << " expected end state: " << Info
.Exit
<< "\n");
1414 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo
<< "\n");
1416 assert(CurInfo
== Info
.Exit
&& "InsertVSETVLI dataflow invariant violated");
1419 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1420 /// we're about to insert by looking for cases where we can PRE from the
1421 /// beginning of one block to the end of one of its predecessors. Specifically,
1422 /// this is geared to catch the common case of a fixed length vsetvl in a single
1423 /// block loop when it could execute once in the preheader instead.
1424 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock
&MBB
) {
1425 if (!BlockInfo
[MBB
.getNumber()].Pred
.isUnknown())
1428 MachineBasicBlock
*UnavailablePred
= nullptr;
1429 VSETVLIInfo AvailableInfo
;
1430 for (MachineBasicBlock
*P
: MBB
.predecessors()) {
1431 const VSETVLIInfo
&PredInfo
= BlockInfo
[P
->getNumber()].Exit
;
1432 if (PredInfo
.isUnknown()) {
1433 if (UnavailablePred
)
1435 UnavailablePred
= P
;
1436 } else if (!AvailableInfo
.isValid()) {
1437 AvailableInfo
= PredInfo
;
1438 } else if (AvailableInfo
!= PredInfo
) {
1443 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1445 if (!UnavailablePred
|| !AvailableInfo
.isValid())
1448 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1449 // the unavailable pred.
1450 if (AvailableInfo
.hasSEWLMULRatioOnly())
1453 // Critical edge - TODO: consider splitting?
1454 if (UnavailablePred
->succ_size() != 1)
1457 // If the AVL value is a register (other than our VLMAX sentinel),
1458 // we need to prove the value is available at the point we're going
1459 // to insert the vsetvli at.
1460 if (AvailableInfo
.hasAVLReg()) {
1461 const MachineInstr
*AVLDefMI
= &AvailableInfo
.getAVLDefMI();
1462 // This is an inline dominance check which covers the case of
1463 // UnavailablePred being the preheader of a loop.
1464 if (AVLDefMI
->getParent() != UnavailablePred
)
1466 for (auto &TermMI
: UnavailablePred
->terminators())
1467 if (&TermMI
== AVLDefMI
)
1471 // If the AVL isn't used in its predecessors then bail, since we have no AVL
1472 // to insert a vsetvli with.
1473 if (AvailableInfo
.hasAVLIgnored())
1476 // Model the effect of changing the input state of the block MBB to
1477 // AvailableInfo. We're looking for two issues here; one legality,
1478 // one profitability.
1479 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1480 // may hit the end of the block with a different end state. We can
1481 // not make this change without reflowing later blocks as well.
1482 // 2) If we don't actually remove a transition, inserting a vsetvli
1483 // into the predecessor block would be correct, but unprofitable.
1484 VSETVLIInfo OldInfo
= BlockInfo
[MBB
.getNumber()].Pred
;
1485 VSETVLIInfo CurInfo
= AvailableInfo
;
1486 int TransitionsRemoved
= 0;
1487 for (const MachineInstr
&MI
: MBB
) {
1488 const VSETVLIInfo LastInfo
= CurInfo
;
1489 const VSETVLIInfo LastOldInfo
= OldInfo
;
1490 transferBefore(CurInfo
, MI
);
1491 transferBefore(OldInfo
, MI
);
1492 if (CurInfo
== LastInfo
)
1493 TransitionsRemoved
++;
1494 if (LastOldInfo
== OldInfo
)
1495 TransitionsRemoved
--;
1496 transferAfter(CurInfo
, MI
);
1497 transferAfter(OldInfo
, MI
);
1498 if (CurInfo
== OldInfo
)
1499 // Convergence. All transitions after this must match by construction.
1502 if (CurInfo
!= OldInfo
|| TransitionsRemoved
<= 0)
1503 // Issues 1 and 2 above
1506 // Finally, update both data flow state and insert the actual vsetvli.
1507 // Doing both keeps the code in sync with the dataflow results, which
1508 // is critical for correctness of phase 3.
1509 auto OldExit
= BlockInfo
[UnavailablePred
->getNumber()].Exit
;
1510 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB
.getName() << " to "
1511 << UnavailablePred
->getName() << " with state "
1512 << AvailableInfo
<< "\n");
1513 BlockInfo
[UnavailablePred
->getNumber()].Exit
= AvailableInfo
;
1514 BlockInfo
[MBB
.getNumber()].Pred
= AvailableInfo
;
1516 // Note there's an implicit assumption here that terminators never use
1517 // or modify VL or VTYPE. Also, fallthrough will return end().
1518 auto InsertPt
= UnavailablePred
->getFirstInstrTerminator();
1519 insertVSETVLI(*UnavailablePred
, InsertPt
,
1520 UnavailablePred
->findDebugLoc(InsertPt
),
1521 AvailableInfo
, OldExit
);
1524 // Return true if we can mutate PrevMI to match MI without changing any the
1525 // fields which would be observed.
1526 static bool canMutatePriorConfig(const MachineInstr
&PrevMI
,
1527 const MachineInstr
&MI
,
1528 const DemandedFields
&Used
,
1529 const MachineRegisterInfo
&MRI
) {
1530 // If the VL values aren't equal, return false if either a) the former is
1531 // demanded, or b) we can't rewrite the former to be the later for
1532 // implementation reasons.
1533 if (!isVLPreservingConfig(MI
)) {
1537 if (Used
.VLZeroness
) {
1538 if (isVLPreservingConfig(PrevMI
))
1540 if (!getInfoForVSETVLI(PrevMI
, MRI
)
1541 .hasEquallyZeroAVL(getInfoForVSETVLI(MI
, MRI
)))
1545 auto &AVL
= MI
.getOperand(1);
1546 auto &PrevAVL
= PrevMI
.getOperand(1);
1548 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1549 // For now just check that PrevMI uses the same virtual register.
1550 if (AVL
.isReg() && AVL
.getReg() != RISCV::X0
&&
1551 (!MRI
.hasOneDef(AVL
.getReg()) || !PrevAVL
.isReg() ||
1552 PrevAVL
.getReg() != AVL
.getReg()))
1556 assert(PrevMI
.getOperand(2).isImm() && MI
.getOperand(2).isImm());
1557 auto PriorVType
= PrevMI
.getOperand(2).getImm();
1558 auto VType
= MI
.getOperand(2).getImm();
1559 return areCompatibleVTYPEs(PriorVType
, VType
, Used
);
1562 bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock
&MBB
) {
1563 MachineInstr
*NextMI
= nullptr;
1564 // We can have arbitrary code in successors, so VL and VTYPE
1565 // must be considered demanded.
1566 DemandedFields Used
;
1569 SmallVector
<MachineInstr
*> ToDelete
;
1570 for (MachineInstr
&MI
: make_range(MBB
.rbegin(), MBB
.rend())) {
1572 if (!isVectorConfigInstr(MI
)) {
1573 Used
.doUnion(getDemanded(MI
, ST
));
1574 if (MI
.isCall() || MI
.isInlineAsm() ||
1575 MI
.modifiesRegister(RISCV::VL
, /*TRI=*/nullptr) ||
1576 MI
.modifiesRegister(RISCV::VTYPE
, /*TRI=*/nullptr))
1581 if (!MI
.getOperand(0).isDead())
1585 if (!Used
.usedVL() && !Used
.usedVTYPE()) {
1586 ToDelete
.push_back(&MI
);
1587 // Leave NextMI unchanged
1591 if (canMutatePriorConfig(MI
, *NextMI
, Used
, *MRI
)) {
1592 if (!isVLPreservingConfig(*NextMI
)) {
1593 Register DefReg
= NextMI
->getOperand(0).getReg();
1595 MI
.getOperand(0).setReg(DefReg
);
1596 MI
.getOperand(0).setIsDead(false);
1598 // The def of DefReg moved to MI, so extend the LiveInterval up to
1600 if (DefReg
.isVirtual()) {
1601 LiveInterval
&DefLI
= LIS
->getInterval(DefReg
);
1602 SlotIndex MISlot
= LIS
->getInstructionIndex(MI
).getRegSlot();
1603 VNInfo
*DefVNI
= DefLI
.getVNInfoAt(DefLI
.beginIndex());
1604 LiveInterval::Segment
S(MISlot
, DefLI
.beginIndex(), DefVNI
);
1605 DefLI
.addSegment(S
);
1606 DefVNI
->def
= MISlot
;
1607 // Mark DefLI as spillable if it was previously unspillable
1610 // DefReg may have had no uses, in which case we need to shrink
1611 // the LiveInterval up to MI.
1612 LIS
->shrinkToUses(&DefLI
);
1616 if (MI
.getOperand(1).isReg())
1617 OldVLReg
= MI
.getOperand(1).getReg();
1618 if (NextMI
->getOperand(1).isImm())
1619 MI
.getOperand(1).ChangeToImmediate(NextMI
->getOperand(1).getImm());
1621 MI
.getOperand(1).ChangeToRegister(NextMI
->getOperand(1).getReg(), false);
1623 // Clear NextMI's AVL early so we're not counting it as a use.
1624 if (NextMI
->getOperand(1).isReg())
1625 NextMI
->getOperand(1).setReg(RISCV::NoRegister
);
1627 if (OldVLReg
&& OldVLReg
.isVirtual()) {
1628 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1629 LIS
->shrinkToUses(&LIS
->getInterval(OldVLReg
));
1631 MachineInstr
*VLOpDef
= MRI
->getUniqueVRegDef(OldVLReg
);
1632 if (VLOpDef
&& TII
->isAddImmediate(*VLOpDef
, OldVLReg
) &&
1633 MRI
->use_nodbg_empty(OldVLReg
)) {
1634 VLOpDef
->eraseFromParent();
1635 LIS
->removeInterval(OldVLReg
);
1638 MI
.setDesc(NextMI
->getDesc());
1640 MI
.getOperand(2).setImm(NextMI
->getOperand(2).getImm());
1641 ToDelete
.push_back(NextMI
);
1646 Used
= getDemanded(MI
, ST
);
1649 NumCoalescedVSETVL
+= ToDelete
.size();
1650 for (auto *MI
: ToDelete
) {
1651 LIS
->RemoveMachineInstrFromMaps(*MI
);
1652 MI
->eraseFromParent();
1655 return !ToDelete
.empty();
1658 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock
&MBB
) {
1659 for (auto I
= MBB
.begin(), E
= MBB
.end(); I
!= E
;) {
1660 MachineInstr
&MI
= *I
++;
1661 if (RISCV::isFaultFirstLoad(MI
)) {
1662 Register VLOutput
= MI
.getOperand(1).getReg();
1663 assert(VLOutput
.isVirtual());
1664 if (!MRI
->use_nodbg_empty(VLOutput
))
1665 BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(RISCV::PseudoReadVL
),
1667 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1668 MI
.getOperand(1).setReg(RISCV::X0
);
1673 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction
&MF
) {
1674 // Skip if the vector extension is not enabled.
1675 ST
= &MF
.getSubtarget
<RISCVSubtarget
>();
1676 if (!ST
->hasVInstructions())
1679 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF
.getName() << "\n");
1681 TII
= ST
->getInstrInfo();
1682 MRI
= &MF
.getRegInfo();
1684 assert(BlockInfo
.empty() && "Expect empty block infos");
1685 BlockInfo
.resize(MF
.getNumBlockIDs());
1687 bool HaveVectorOp
= false;
1689 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1690 for (const MachineBasicBlock
&MBB
: MF
) {
1691 VSETVLIInfo TmpStatus
;
1692 HaveVectorOp
|= computeVLVTYPEChanges(MBB
, TmpStatus
);
1693 // Initial exit state is whatever change we found in the block.
1694 BlockData
&BBInfo
= BlockInfo
[MBB
.getNumber()];
1695 BBInfo
.Exit
= TmpStatus
;
1696 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB
)
1697 << " is " << BBInfo
.Exit
<< "\n");
1701 // If we didn't find any instructions that need VSETVLI, we're done.
1702 if (!HaveVectorOp
) {
1707 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1708 // blocks to the list here, but will also add any that need to be revisited
1709 // during Phase 2 processing.
1710 for (const MachineBasicBlock
&MBB
: MF
) {
1711 WorkList
.push(&MBB
);
1712 BlockInfo
[MBB
.getNumber()].InQueue
= true;
1714 while (!WorkList
.empty()) {
1715 const MachineBasicBlock
&MBB
= *WorkList
.front();
1717 computeIncomingVLVTYPE(MBB
);
1720 // Perform partial redundancy elimination of vsetvli transitions.
1721 for (MachineBasicBlock
&MBB
: MF
)
1724 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1725 // Phase 2 information to avoid adding vsetvlis before the first vector
1726 // instruction in the block if the VL/VTYPE is satisfied by its
1728 for (MachineBasicBlock
&MBB
: MF
)
1731 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1732 // of VLEFF/VLSEGFF.
1733 for (MachineBasicBlock
&MBB
: MF
)
1737 return HaveVectorOp
;
1740 /// Returns an instance of the Insert VSETVLI pass.
1741 FunctionPass
*llvm::createRISCVInsertVSETVLIPass() {
1742 return new RISCVInsertVSETVLI();
1745 // Now that all vsetvlis are explicit, go through and do block local
1746 // DSE and peephole based demanded fields based transforms. Note that
1747 // this *must* be done outside the main dataflow so long as we allow
1748 // any cross block analysis within the dataflow. We can't have both
1749 // demanded fields based mutation and non-local analysis in the
1750 // dataflow at the same time without introducing inconsistencies.
1751 bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction
&MF
) {
1752 // Skip if the vector extension is not enabled.
1753 ST
= &MF
.getSubtarget
<RISCVSubtarget
>();
1754 if (!ST
->hasVInstructions())
1756 TII
= ST
->getInstrInfo();
1757 MRI
= &MF
.getRegInfo();
1758 LIS
= &getAnalysis
<LiveIntervals
>();
1760 bool Changed
= false;
1761 for (MachineBasicBlock
&MBB
: MF
)
1762 Changed
|= coalesceVSETVLIs(MBB
);
1767 FunctionPass
*llvm::createRISCVCoalesceVSETVLIPass() {
1768 return new RISCVCoalesceVSETVLI();