[NFC][RISCV] Keep AVLReg define instr inside VSETVLInfo (#89180)
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVInsertVSETVLI.cpp
blobb5fd508fa77de22091dad808eadbf8b804253c87
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
13 // This pass consists of 3 phases:
15 // Phase 1 collects how each basic block affects VL/VTYPE.
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
25 //===----------------------------------------------------------------------===//
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveDebugVariables.h"
31 #include "llvm/CodeGen/LiveIntervals.h"
32 #include "llvm/CodeGen/LiveStacks.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include <queue>
35 using namespace llvm;
37 #define DEBUG_TYPE "riscv-insert-vsetvli"
38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39 #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
41 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
44 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
48 static cl::opt<bool> UseStrictAsserts(
49 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
50 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
52 namespace {
54 static unsigned getVLOpNum(const MachineInstr &MI) {
55 return RISCVII::getVLOpNum(MI.getDesc());
58 static unsigned getSEWOpNum(const MachineInstr &MI) {
59 return RISCVII::getSEWOpNum(MI.getDesc());
62 static bool isVectorConfigInstr(const MachineInstr &MI) {
63 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
64 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
65 MI.getOpcode() == RISCV::PseudoVSETIVLI;
68 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
69 /// VL and only sets VTYPE.
70 static bool isVLPreservingConfig(const MachineInstr &MI) {
71 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
72 return false;
73 assert(RISCV::X0 == MI.getOperand(1).getReg());
74 return RISCV::X0 == MI.getOperand(0).getReg();
77 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
78 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
79 default:
80 return false;
81 case RISCV::VFMV_S_F:
82 case RISCV::VFMV_V_F:
83 return true;
87 static bool isScalarExtractInstr(const MachineInstr &MI) {
88 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
89 default:
90 return false;
91 case RISCV::VMV_X_S:
92 case RISCV::VFMV_F_S:
93 return true;
97 static bool isScalarInsertInstr(const MachineInstr &MI) {
98 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
99 default:
100 return false;
101 case RISCV::VMV_S_X:
102 case RISCV::VFMV_S_F:
103 return true;
107 static bool isScalarSplatInstr(const MachineInstr &MI) {
108 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
109 default:
110 return false;
111 case RISCV::VMV_V_I:
112 case RISCV::VMV_V_X:
113 case RISCV::VFMV_V_F:
114 return true;
118 static bool isVSlideInstr(const MachineInstr &MI) {
119 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
120 default:
121 return false;
122 case RISCV::VSLIDEDOWN_VX:
123 case RISCV::VSLIDEDOWN_VI:
124 case RISCV::VSLIDEUP_VX:
125 case RISCV::VSLIDEUP_VI:
126 return true;
130 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
131 /// not a load or store which ignores SEW.
132 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
133 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
134 default:
135 return std::nullopt;
136 case RISCV::VLE8_V:
137 case RISCV::VLSE8_V:
138 case RISCV::VSE8_V:
139 case RISCV::VSSE8_V:
140 return 8;
141 case RISCV::VLE16_V:
142 case RISCV::VLSE16_V:
143 case RISCV::VSE16_V:
144 case RISCV::VSSE16_V:
145 return 16;
146 case RISCV::VLE32_V:
147 case RISCV::VLSE32_V:
148 case RISCV::VSE32_V:
149 case RISCV::VSSE32_V:
150 return 32;
151 case RISCV::VLE64_V:
152 case RISCV::VLSE64_V:
153 case RISCV::VSE64_V:
154 case RISCV::VSSE64_V:
155 return 64;
159 static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
160 return MI.getOpcode() == RISCV::ADDI &&
161 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
162 MI.getOperand(1).getReg() == RISCV::X0 &&
163 MI.getOperand(2).getImm() != 0;
166 /// Return true if this is an operation on mask registers. Note that
167 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
168 static bool isMaskRegOp(const MachineInstr &MI) {
169 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
170 return false;
171 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
172 // A Log2SEW of 0 is an operation on mask registers only.
173 return Log2SEW == 0;
176 /// Return true if the inactive elements in the result are entirely undefined.
177 /// Note that this is different from "agnostic" as defined by the vector
178 /// specification. Agnostic requires each lane to either be undisturbed, or
179 /// take the value -1; no other value is allowed.
180 static bool hasUndefinedMergeOp(const MachineInstr &MI,
181 const MachineRegisterInfo &MRI) {
183 unsigned UseOpIdx;
184 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
185 // If there is no passthrough operand, then the pass through
186 // lanes are undefined.
187 return true;
189 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
190 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
191 // undefined.
192 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
193 if (UseMO.getReg() == RISCV::NoRegister)
194 return true;
196 if (UseMO.isUndef())
197 return true;
198 if (UseMO.getReg().isPhysical())
199 return false;
201 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
202 if (UseMI->isImplicitDef())
203 return true;
205 if (UseMI->isRegSequence()) {
206 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
207 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
208 if (!SourceMI || !SourceMI->isImplicitDef())
209 return false;
211 return true;
214 return false;
217 /// Which subfields of VL or VTYPE have values we need to preserve?
218 struct DemandedFields {
219 // Some unknown property of VL is used. If demanded, must preserve entire
220 // value.
221 bool VLAny = false;
222 // Only zero vs non-zero is used. If demanded, can change non-zero values.
223 bool VLZeroness = false;
224 // What properties of SEW we need to preserve.
225 enum : uint8_t {
226 SEWEqual = 3, // The exact value of SEW needs to be preserved.
227 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
228 // than or equal to the original value.
229 SEWGreaterThanOrEqualAndLessThan64 =
230 1, // SEW can be changed as long as it's greater
231 // than or equal to the original value, but must be less
232 // than 64.
233 SEWNone = 0 // We don't need to preserve SEW at all.
234 } SEW = SEWNone;
235 bool LMUL = false;
236 bool SEWLMULRatio = false;
237 bool TailPolicy = false;
238 bool MaskPolicy = false;
240 // Return true if any part of VTYPE was used
241 bool usedVTYPE() const {
242 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
245 // Return true if any property of VL was used
246 bool usedVL() {
247 return VLAny || VLZeroness;
250 // Mark all VTYPE subfields and properties as demanded
251 void demandVTYPE() {
252 SEW = SEWEqual;
253 LMUL = true;
254 SEWLMULRatio = true;
255 TailPolicy = true;
256 MaskPolicy = true;
259 // Mark all VL properties as demanded
260 void demandVL() {
261 VLAny = true;
262 VLZeroness = true;
265 // Make this the result of demanding both the fields in this and B.
266 void doUnion(const DemandedFields &B) {
267 VLAny |= B.VLAny;
268 VLZeroness |= B.VLZeroness;
269 SEW = std::max(SEW, B.SEW);
270 LMUL |= B.LMUL;
271 SEWLMULRatio |= B.SEWLMULRatio;
272 TailPolicy |= B.TailPolicy;
273 MaskPolicy |= B.MaskPolicy;
276 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
277 /// Support for debugging, callable in GDB: V->dump()
278 LLVM_DUMP_METHOD void dump() const {
279 print(dbgs());
280 dbgs() << "\n";
283 /// Implement operator<<.
284 void print(raw_ostream &OS) const {
285 OS << "{";
286 OS << "VLAny=" << VLAny << ", ";
287 OS << "VLZeroness=" << VLZeroness << ", ";
288 OS << "SEW=";
289 switch (SEW) {
290 case SEWEqual:
291 OS << "SEWEqual";
292 break;
293 case SEWGreaterThanOrEqual:
294 OS << "SEWGreaterThanOrEqual";
295 break;
296 case SEWGreaterThanOrEqualAndLessThan64:
297 OS << "SEWGreaterThanOrEqualAndLessThan64";
298 break;
299 case SEWNone:
300 OS << "SEWNone";
301 break;
303 OS << ", ";
304 OS << "LMUL=" << LMUL << ", ";
305 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
306 OS << "TailPolicy=" << TailPolicy << ", ";
307 OS << "MaskPolicy=" << MaskPolicy;
308 OS << "}";
310 #endif
313 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
314 LLVM_ATTRIBUTE_USED
315 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
316 DF.print(OS);
317 return OS;
319 #endif
321 /// Return true if moving from CurVType to NewVType is
322 /// indistinguishable from the perspective of an instruction (or set
323 /// of instructions) which use only the Used subfields and properties.
324 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
325 const DemandedFields &Used) {
326 switch (Used.SEW) {
327 case DemandedFields::SEWNone:
328 break;
329 case DemandedFields::SEWEqual:
330 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
331 return false;
332 break;
333 case DemandedFields::SEWGreaterThanOrEqual:
334 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
335 return false;
336 break;
337 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
338 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
339 RISCVVType::getSEW(NewVType) >= 64)
340 return false;
341 break;
344 if (Used.LMUL &&
345 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
346 return false;
348 if (Used.SEWLMULRatio) {
349 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
350 RISCVVType::getVLMUL(CurVType));
351 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
352 RISCVVType::getVLMUL(NewVType));
353 if (Ratio1 != Ratio2)
354 return false;
357 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
358 RISCVVType::isTailAgnostic(NewVType))
359 return false;
360 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
361 RISCVVType::isMaskAgnostic(NewVType))
362 return false;
363 return true;
366 /// Return the fields and properties demanded by the provided instruction.
367 DemandedFields getDemanded(const MachineInstr &MI,
368 const MachineRegisterInfo *MRI,
369 const RISCVSubtarget *ST) {
370 // Warning: This function has to work on both the lowered (i.e. post
371 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
372 // that it can't use the value of a SEW, VL, or Policy operand as they might
373 // be stale after lowering.
375 // Most instructions don't use any of these subfeilds.
376 DemandedFields Res;
377 // Start conservative if registers are used
378 if (MI.isCall() || MI.isInlineAsm() ||
379 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
380 Res.demandVL();
381 if (MI.isCall() || MI.isInlineAsm() ||
382 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
383 Res.demandVTYPE();
384 // Start conservative on the unlowered form too
385 uint64_t TSFlags = MI.getDesc().TSFlags;
386 if (RISCVII::hasSEWOp(TSFlags)) {
387 Res.demandVTYPE();
388 if (RISCVII::hasVLOp(TSFlags))
389 Res.demandVL();
391 // Behavior is independent of mask policy.
392 if (!RISCVII::usesMaskPolicy(TSFlags))
393 Res.MaskPolicy = false;
396 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
397 // They instead demand the ratio of the two which is used in computing
398 // EMUL, but which allows us the flexibility to change SEW and LMUL
399 // provided we don't change the ratio.
400 // Note: We assume that the instructions initial SEW is the EEW encoded
401 // in the opcode. This is asserted when constructing the VSETVLIInfo.
402 if (getEEWForLoadStore(MI)) {
403 Res.SEW = DemandedFields::SEWNone;
404 Res.LMUL = false;
407 // Store instructions don't use the policy fields.
408 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
409 Res.TailPolicy = false;
410 Res.MaskPolicy = false;
413 // If this is a mask reg operation, it only cares about VLMAX.
414 // TODO: Possible extensions to this logic
415 // * Probably ok if available VLMax is larger than demanded
416 // * The policy bits can probably be ignored..
417 if (isMaskRegOp(MI)) {
418 Res.SEW = DemandedFields::SEWNone;
419 Res.LMUL = false;
422 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
423 if (isScalarInsertInstr(MI)) {
424 Res.LMUL = false;
425 Res.SEWLMULRatio = false;
426 Res.VLAny = false;
427 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
428 // need to preserve any other bits and are thus compatible with any larger,
429 // etype and can disregard policy bits. Warning: It's tempting to try doing
430 // this for any tail agnostic operation, but we can't as TA requires
431 // tail lanes to either be the original value or -1. We are writing
432 // unknown bits to the lanes here.
433 if (hasUndefinedMergeOp(MI, *MRI)) {
434 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
435 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
436 else
437 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
438 Res.TailPolicy = false;
442 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
443 if (isScalarExtractInstr(MI)) {
444 assert(!RISCVII::hasVLOp(TSFlags));
445 Res.LMUL = false;
446 Res.SEWLMULRatio = false;
447 Res.TailPolicy = false;
448 Res.MaskPolicy = false;
451 return Res;
454 /// Defines the abstract state with which the forward dataflow models the
455 /// values of the VL and VTYPE registers after insertion.
456 class VSETVLIInfo {
457 struct AVLDef {
458 const MachineInstr *DefMI;
459 Register DefReg;
461 union {
462 AVLDef AVLRegDef;
463 unsigned AVLImm;
466 enum : uint8_t {
467 Uninitialized,
468 AVLIsReg,
469 AVLIsImm,
470 AVLIsVLMAX,
471 AVLIsIgnored,
472 Unknown,
473 } State = Uninitialized;
475 // Fields from VTYPE.
476 RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
477 uint8_t SEW = 0;
478 uint8_t TailAgnostic : 1;
479 uint8_t MaskAgnostic : 1;
480 uint8_t SEWLMULRatioOnly : 1;
482 public:
483 VSETVLIInfo()
484 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
485 SEWLMULRatioOnly(false) {}
487 static VSETVLIInfo getUnknown() {
488 VSETVLIInfo Info;
489 Info.setUnknown();
490 return Info;
493 bool isValid() const { return State != Uninitialized; }
494 void setUnknown() { State = Unknown; }
495 bool isUnknown() const { return State == Unknown; }
497 void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) {
498 assert(DefMI && AVLReg.isVirtual());
499 AVLRegDef.DefMI = DefMI;
500 AVLRegDef.DefReg = AVLReg;
501 State = AVLIsReg;
504 void setAVLImm(unsigned Imm) {
505 AVLImm = Imm;
506 State = AVLIsImm;
509 void setAVLVLMAX() { State = AVLIsVLMAX; }
511 void setAVLIgnored() { State = AVLIsIgnored; }
513 bool hasAVLImm() const { return State == AVLIsImm; }
514 bool hasAVLReg() const { return State == AVLIsReg; }
515 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
516 bool hasAVLIgnored() const { return State == AVLIsIgnored; }
517 Register getAVLReg() const {
518 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
519 return AVLRegDef.DefReg;
521 unsigned getAVLImm() const {
522 assert(hasAVLImm());
523 return AVLImm;
525 const MachineInstr &getAVLDefMI() const {
526 assert(hasAVLReg() && AVLRegDef.DefMI);
527 return *AVLRegDef.DefMI;
530 void setAVL(VSETVLIInfo Info) {
531 assert(Info.isValid());
532 if (Info.isUnknown())
533 setUnknown();
534 else if (Info.hasAVLReg())
535 setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg());
536 else if (Info.hasAVLVLMAX())
537 setAVLVLMAX();
538 else if (Info.hasAVLIgnored())
539 setAVLIgnored();
540 else {
541 assert(Info.hasAVLImm());
542 setAVLImm(Info.getAVLImm());
546 unsigned getSEW() const { return SEW; }
547 RISCVII::VLMUL getVLMUL() const { return VLMul; }
548 bool getTailAgnostic() const { return TailAgnostic; }
549 bool getMaskAgnostic() const { return MaskAgnostic; }
551 bool hasNonZeroAVL() const {
552 if (hasAVLImm())
553 return getAVLImm() > 0;
554 if (hasAVLReg())
555 return isNonZeroLoadImmediate(getAVLDefMI());
556 if (hasAVLVLMAX())
557 return true;
558 if (hasAVLIgnored())
559 return false;
560 return false;
563 bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
564 if (hasSameAVL(Other))
565 return true;
566 return (hasNonZeroAVL() && Other.hasNonZeroAVL());
569 bool hasSameAVL(const VSETVLIInfo &Other) const {
570 if (hasAVLReg() && Other.hasAVLReg())
571 return getAVLDefMI().isIdenticalTo(Other.getAVLDefMI()) &&
572 getAVLReg() == Other.getAVLReg();
574 if (hasAVLImm() && Other.hasAVLImm())
575 return getAVLImm() == Other.getAVLImm();
577 if (hasAVLVLMAX())
578 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
580 if (hasAVLIgnored())
581 return Other.hasAVLIgnored();
583 return false;
586 void setVTYPE(unsigned VType) {
587 assert(isValid() && !isUnknown() &&
588 "Can't set VTYPE for uninitialized or unknown");
589 VLMul = RISCVVType::getVLMUL(VType);
590 SEW = RISCVVType::getSEW(VType);
591 TailAgnostic = RISCVVType::isTailAgnostic(VType);
592 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
594 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
595 assert(isValid() && !isUnknown() &&
596 "Can't set VTYPE for uninitialized or unknown");
597 VLMul = L;
598 SEW = S;
599 TailAgnostic = TA;
600 MaskAgnostic = MA;
603 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
605 unsigned encodeVTYPE() const {
606 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
607 "Can't encode VTYPE for uninitialized or unknown");
608 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
611 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
613 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
614 assert(isValid() && Other.isValid() &&
615 "Can't compare invalid VSETVLIInfos");
616 assert(!isUnknown() && !Other.isUnknown() &&
617 "Can't compare VTYPE in unknown state");
618 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
619 "Can't compare when only LMUL/SEW ratio is valid.");
620 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
621 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
622 Other.MaskAgnostic);
625 unsigned getSEWLMULRatio() const {
626 assert(isValid() && !isUnknown() &&
627 "Can't use VTYPE for uninitialized or unknown");
628 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
631 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
632 // Note that having the same VLMAX ensures that both share the same
633 // function from AVL to VL; that is, they must produce the same VL value
634 // for any given AVL value.
635 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
636 assert(isValid() && Other.isValid() &&
637 "Can't compare invalid VSETVLIInfos");
638 assert(!isUnknown() && !Other.isUnknown() &&
639 "Can't compare VTYPE in unknown state");
640 return getSEWLMULRatio() == Other.getSEWLMULRatio();
643 bool hasCompatibleVTYPE(const DemandedFields &Used,
644 const VSETVLIInfo &Require) const {
645 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
648 // Determine whether the vector instructions requirements represented by
649 // Require are compatible with the previous vsetvli instruction represented
650 // by this. MI is the instruction whose requirements we're considering.
651 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
652 const MachineRegisterInfo &MRI) const {
653 assert(isValid() && Require.isValid() &&
654 "Can't compare invalid VSETVLIInfos");
655 assert(!Require.SEWLMULRatioOnly &&
656 "Expected a valid VTYPE for instruction!");
657 // Nothing is compatible with Unknown.
658 if (isUnknown() || Require.isUnknown())
659 return false;
661 // If only our VLMAX ratio is valid, then this isn't compatible.
662 if (SEWLMULRatioOnly)
663 return false;
665 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
666 return false;
668 if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
669 return false;
671 return hasCompatibleVTYPE(Used, Require);
674 bool operator==(const VSETVLIInfo &Other) const {
675 // Uninitialized is only equal to another Uninitialized.
676 if (!isValid())
677 return !Other.isValid();
678 if (!Other.isValid())
679 return !isValid();
681 // Unknown is only equal to another Unknown.
682 if (isUnknown())
683 return Other.isUnknown();
684 if (Other.isUnknown())
685 return isUnknown();
687 if (!hasSameAVL(Other))
688 return false;
690 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
691 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
692 return false;
694 // If only the VLMAX is valid, check that it is the same.
695 if (SEWLMULRatioOnly)
696 return hasSameVLMAX(Other);
698 // If the full VTYPE is valid, check that it is the same.
699 return hasSameVTYPE(Other);
702 bool operator!=(const VSETVLIInfo &Other) const {
703 return !(*this == Other);
706 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
707 // both predecessors.
708 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
709 // If the new value isn't valid, ignore it.
710 if (!Other.isValid())
711 return *this;
713 // If this value isn't valid, this must be the first predecessor, use it.
714 if (!isValid())
715 return Other;
717 // If either is unknown, the result is unknown.
718 if (isUnknown() || Other.isUnknown())
719 return VSETVLIInfo::getUnknown();
721 // If we have an exact, match return this.
722 if (*this == Other)
723 return *this;
725 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
726 // return an SEW/LMUL ratio only value.
727 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
728 VSETVLIInfo MergeInfo = *this;
729 MergeInfo.SEWLMULRatioOnly = true;
730 return MergeInfo;
733 // Otherwise the result is unknown.
734 return VSETVLIInfo::getUnknown();
737 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
738 /// Support for debugging, callable in GDB: V->dump()
739 LLVM_DUMP_METHOD void dump() const {
740 print(dbgs());
741 dbgs() << "\n";
744 /// Implement operator<<.
745 /// @{
746 void print(raw_ostream &OS) const {
747 OS << "{";
748 if (!isValid())
749 OS << "Uninitialized";
750 if (isUnknown())
751 OS << "unknown";
752 if (hasAVLReg())
753 OS << "AVLReg=" << (unsigned)getAVLReg();
754 if (hasAVLImm())
755 OS << "AVLImm=" << (unsigned)AVLImm;
756 if (hasAVLVLMAX())
757 OS << "AVLVLMAX";
758 if (hasAVLIgnored())
759 OS << "AVLIgnored";
760 OS << ", "
761 << "VLMul=" << (unsigned)VLMul << ", "
762 << "SEW=" << (unsigned)SEW << ", "
763 << "TailAgnostic=" << (bool)TailAgnostic << ", "
764 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
765 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
767 #endif
770 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
771 LLVM_ATTRIBUTE_USED
772 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
773 V.print(OS);
774 return OS;
776 #endif
778 struct BlockData {
779 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
780 // block. Calculated in Phase 2.
781 VSETVLIInfo Exit;
783 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
784 // blocks. Calculated in Phase 2, and used by Phase 3.
785 VSETVLIInfo Pred;
787 // Keeps track of whether the block is already in the queue.
788 bool InQueue = false;
790 BlockData() = default;
793 class RISCVInsertVSETVLI : public MachineFunctionPass {
794 const RISCVSubtarget *ST;
795 const TargetInstrInfo *TII;
796 MachineRegisterInfo *MRI;
798 std::vector<BlockData> BlockInfo;
799 std::queue<const MachineBasicBlock *> WorkList;
801 public:
802 static char ID;
804 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
805 bool runOnMachineFunction(MachineFunction &MF) override;
807 void getAnalysisUsage(AnalysisUsage &AU) const override {
808 AU.setPreservesCFG();
809 MachineFunctionPass::getAnalysisUsage(AU);
812 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
814 private:
815 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
816 const VSETVLIInfo &CurInfo) const;
817 bool needVSETVLIPHI(const VSETVLIInfo &Require,
818 const MachineBasicBlock &MBB) const;
819 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
820 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
821 void insertVSETVLI(MachineBasicBlock &MBB,
822 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
823 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
825 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
826 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
827 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
828 VSETVLIInfo &Info) const;
829 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
830 void emitVSETVLIs(MachineBasicBlock &MBB);
831 void doPRE(MachineBasicBlock &MBB);
832 void insertReadVL(MachineBasicBlock &MBB);
835 class RISCVCoalesceVSETVLI : public MachineFunctionPass {
836 public:
837 static char ID;
838 const RISCVSubtarget *ST;
839 const TargetInstrInfo *TII;
840 MachineRegisterInfo *MRI;
841 LiveIntervals *LIS;
843 RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
844 bool runOnMachineFunction(MachineFunction &MF) override;
846 void getAnalysisUsage(AnalysisUsage &AU) const override {
847 AU.setPreservesCFG();
849 AU.addRequired<LiveIntervals>();
850 AU.addPreserved<LiveIntervals>();
851 AU.addRequired<SlotIndexes>();
852 AU.addPreserved<SlotIndexes>();
853 AU.addPreserved<LiveDebugVariables>();
854 AU.addPreserved<LiveStacks>();
856 MachineFunctionPass::getAnalysisUsage(AU);
859 StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
861 private:
862 bool coalesceVSETVLIs(MachineBasicBlock &MBB);
865 } // end anonymous namespace
867 char RISCVInsertVSETVLI::ID = 0;
869 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
870 false, false)
872 char RISCVCoalesceVSETVLI::ID = 0;
874 INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
875 RISCV_COALESCE_VSETVLI_NAME, false, false)
877 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
878 // VSETIVLI instruction.
879 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
880 const MachineRegisterInfo &MRI) {
881 VSETVLIInfo NewInfo;
882 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
883 NewInfo.setAVLImm(MI.getOperand(1).getImm());
884 } else {
885 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
886 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
887 Register AVLReg = MI.getOperand(1).getReg();
888 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
889 "Can't handle X0, X0 vsetvli yet");
890 if (AVLReg == RISCV::X0)
891 NewInfo.setAVLVLMAX();
892 else
893 NewInfo.setAVLRegDef(MRI.getVRegDef(AVLReg), AVLReg);
895 NewInfo.setVTYPE(MI.getOperand(2).getImm());
897 return NewInfo;
900 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
901 RISCVII::VLMUL VLMul) {
902 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
903 if (Fractional)
904 VLEN = VLEN / LMul;
905 else
906 VLEN = VLEN * LMul;
907 return VLEN/SEW;
910 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
911 const RISCVSubtarget &ST,
912 const MachineRegisterInfo *MRI) {
913 VSETVLIInfo InstrInfo;
915 bool TailAgnostic = true;
916 bool MaskAgnostic = true;
917 if (!hasUndefinedMergeOp(MI, *MRI)) {
918 // Start with undisturbed.
919 TailAgnostic = false;
920 MaskAgnostic = false;
922 // If there is a policy operand, use it.
923 if (RISCVII::hasVecPolicyOp(TSFlags)) {
924 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
925 uint64_t Policy = Op.getImm();
926 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
927 "Invalid Policy Value");
928 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
929 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
932 // Some pseudo instructions force a tail agnostic policy despite having a
933 // tied def.
934 if (RISCVII::doesForceTailAgnostic(TSFlags))
935 TailAgnostic = true;
937 if (!RISCVII::usesMaskPolicy(TSFlags))
938 MaskAgnostic = true;
941 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
943 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
944 // A Log2SEW of 0 is an operation on mask registers only.
945 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
946 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
948 if (RISCVII::hasVLOp(TSFlags)) {
949 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
950 if (VLOp.isImm()) {
951 int64_t Imm = VLOp.getImm();
952 // Conver the VLMax sentintel to X0 register.
953 if (Imm == RISCV::VLMaxSentinel) {
954 // If we know the exact VLEN, see if we can use the constant encoding
955 // for the VLMAX instead. This reduces register pressure slightly.
956 const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
957 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
958 InstrInfo.setAVLImm(VLMAX);
959 else
960 InstrInfo.setAVLVLMAX();
962 else
963 InstrInfo.setAVLImm(Imm);
964 } else {
965 InstrInfo.setAVLRegDef(MRI->getVRegDef(VLOp.getReg()), VLOp.getReg());
967 } else {
968 assert(isScalarExtractInstr(MI));
969 // TODO: If we are more clever about x0,x0 insertion then we should be able
970 // to deduce that the VL is ignored based off of DemandedFields, and remove
971 // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
972 InstrInfo.setAVLIgnored();
974 #ifndef NDEBUG
975 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
976 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
978 #endif
979 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
981 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
982 // AVL operand with the AVL of the defining vsetvli. We avoid general
983 // register AVLs to avoid extending live ranges without being sure we can
984 // kill the original source reg entirely.
985 if (InstrInfo.hasAVLReg()) {
986 const MachineInstr &DefMI = InstrInfo.getAVLDefMI();
987 if (isVectorConfigInstr(DefMI)) {
988 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI);
989 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
990 (DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
991 InstrInfo.setAVL(DefInstrInfo);
995 return InstrInfo;
998 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
999 const VSETVLIInfo &Info,
1000 const VSETVLIInfo &PrevInfo) {
1001 DebugLoc DL = MI.getDebugLoc();
1002 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
1005 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1006 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
1007 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1009 ++NumInsertedVSETVL;
1010 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1011 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1012 // VLMAX.
1013 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1014 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1015 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1016 .addReg(RISCV::X0, RegState::Kill)
1017 .addImm(Info.encodeVTYPE())
1018 .addReg(RISCV::VL, RegState::Implicit);
1019 return;
1022 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1023 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1024 // same, we can use the X0, X0 form.
1025 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1026 const MachineInstr &DefMI = Info.getAVLDefMI();
1027 if (isVectorConfigInstr(DefMI)) {
1028 VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
1029 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1030 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1031 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1032 .addReg(RISCV::X0, RegState::Kill)
1033 .addImm(Info.encodeVTYPE())
1034 .addReg(RISCV::VL, RegState::Implicit);
1035 return;
1041 if (Info.hasAVLImm()) {
1042 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1043 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1044 .addImm(Info.getAVLImm())
1045 .addImm(Info.encodeVTYPE());
1046 return;
1049 if (Info.hasAVLIgnored()) {
1050 // We can only use x0, x0 if there's no chance of the vtype change causing
1051 // the previous vl to become invalid.
1052 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1053 Info.hasSameVLMAX(PrevInfo)) {
1054 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1055 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1056 .addReg(RISCV::X0, RegState::Kill)
1057 .addImm(Info.encodeVTYPE())
1058 .addReg(RISCV::VL, RegState::Implicit);
1059 return;
1061 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1062 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1063 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1064 .addImm(1)
1065 .addImm(Info.encodeVTYPE());
1066 return;
1069 if (Info.hasAVLVLMAX()) {
1070 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1071 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1072 .addReg(DestReg, RegState::Define | RegState::Dead)
1073 .addReg(RISCV::X0, RegState::Kill)
1074 .addImm(Info.encodeVTYPE());
1075 return;
1078 Register AVLReg = Info.getAVLReg();
1079 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1080 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1081 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1082 .addReg(AVLReg)
1083 .addImm(Info.encodeVTYPE());
1086 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
1087 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
1088 return Fractional || LMul == 1;
1091 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1092 /// before MI.
1093 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1094 const VSETVLIInfo &Require,
1095 const VSETVLIInfo &CurInfo) const {
1096 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
1098 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1099 return true;
1101 DemandedFields Used = getDemanded(MI, MRI, ST);
1103 // A slidedown/slideup with an *undefined* merge op can freely clobber
1104 // elements not copied from the source vector (e.g. masked off, tail, or
1105 // slideup's prefix). Notes:
1106 // * We can't modify SEW here since the slide amount is in units of SEW.
1107 // * VL=1 is special only because we have existing support for zero vs
1108 // non-zero VL. We could generalize this if we had a VL > C predicate.
1109 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1110 // * As above, this is only legal for tail "undefined" not "agnostic".
1111 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1112 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1113 Used.VLAny = false;
1114 Used.VLZeroness = true;
1115 Used.LMUL = false;
1116 Used.TailPolicy = false;
1119 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1120 // semantically as vmv.s.x. This is particularly useful since we don't have an
1121 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1122 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1123 // increase the number of active vector registers (unlike for vmv.s.x.)
1124 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1125 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1126 Used.LMUL = false;
1127 Used.SEWLMULRatio = false;
1128 Used.VLAny = false;
1129 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1130 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1131 else
1132 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1133 Used.TailPolicy = false;
1136 if (CurInfo.isCompatible(Used, Require, *MRI))
1137 return false;
1139 // We didn't find a compatible value. If our AVL is a virtual register,
1140 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1141 // and the last VL/VTYPE we observed is the same, we don't need a
1142 // VSETVLI here.
1143 if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
1144 const MachineInstr &DefMI = Require.getAVLDefMI();
1145 if (isVectorConfigInstr(DefMI)) {
1146 VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
1147 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1148 return false;
1152 return true;
1155 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1156 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1157 // places.
1158 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1159 DemandedFields &Demanded) {
1160 VSETVLIInfo Info = NewInfo;
1162 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1163 !PrevInfo.isUnknown()) {
1164 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1165 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1166 Info.setVLMul(*NewVLMul);
1167 Demanded.LMUL = true;
1170 return Info;
1173 // Given an incoming state reaching MI, minimally modifies that state so that it
1174 // is compatible with MI. The resulting state is guaranteed to be semantically
1175 // legal for MI, but may not be the state requested by MI.
1176 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1177 const MachineInstr &MI) const {
1178 uint64_t TSFlags = MI.getDesc().TSFlags;
1179 if (!RISCVII::hasSEWOp(TSFlags))
1180 return;
1182 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
1183 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1184 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1185 return;
1187 const VSETVLIInfo PrevInfo = Info;
1188 if (!Info.isValid() || Info.isUnknown())
1189 Info = NewInfo;
1191 DemandedFields Demanded = getDemanded(MI, MRI, ST);
1192 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1194 // If MI only demands that VL has the same zeroness, we only need to set the
1195 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1196 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1197 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1198 // variant, so we avoid the transform to prevent extending live range of an
1199 // avl register operand.
1200 // TODO: We can probably relax this for immediates.
1201 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) &&
1202 IncomingInfo.hasSameVLMAX(PrevInfo);
1203 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1204 Info.setAVL(IncomingInfo);
1206 Info.setVTYPE(
1207 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1208 .getVLMUL(),
1209 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1210 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1211 // if needed.
1212 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1213 IncomingInfo.getTailAgnostic(),
1214 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1215 IncomingInfo.getMaskAgnostic());
1217 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1218 // the AVL.
1219 if (Info.hasSEWLMULRatioOnly()) {
1220 VSETVLIInfo RatiolessInfo = IncomingInfo;
1221 RatiolessInfo.setAVL(Info);
1222 Info = RatiolessInfo;
1226 // Given a state with which we evaluated MI (see transferBefore above for why
1227 // this might be different that the state MI requested), modify the state to
1228 // reflect the changes MI might make.
1229 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1230 const MachineInstr &MI) const {
1231 if (isVectorConfigInstr(MI)) {
1232 Info = getInfoForVSETVLI(MI, *MRI);
1233 return;
1236 if (RISCV::isFaultFirstLoad(MI)) {
1237 // Update AVL to vl-output of the fault first load.
1238 Info.setAVLRegDef(MRI->getVRegDef(MI.getOperand(1).getReg()),
1239 MI.getOperand(1).getReg());
1240 return;
1243 // If this is something that updates VL/VTYPE that we don't know about, set
1244 // the state to unknown.
1245 if (MI.isCall() || MI.isInlineAsm() ||
1246 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1247 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1248 Info = VSETVLIInfo::getUnknown();
1251 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1252 VSETVLIInfo &Info) const {
1253 bool HadVectorOp = false;
1255 Info = BlockInfo[MBB.getNumber()].Pred;
1256 for (const MachineInstr &MI : MBB) {
1257 transferBefore(Info, MI);
1259 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1260 HadVectorOp = true;
1262 transferAfter(Info, MI);
1265 return HadVectorOp;
1268 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1270 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1272 BBInfo.InQueue = false;
1274 // Start with the previous entry so that we keep the most conservative state
1275 // we have ever found.
1276 VSETVLIInfo InInfo = BBInfo.Pred;
1277 if (MBB.pred_empty()) {
1278 // There are no predecessors, so use the default starting status.
1279 InInfo.setUnknown();
1280 } else {
1281 for (MachineBasicBlock *P : MBB.predecessors())
1282 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1285 // If we don't have any valid predecessor value, wait until we do.
1286 if (!InInfo.isValid())
1287 return;
1289 // If no change, no need to rerun block
1290 if (InInfo == BBInfo.Pred)
1291 return;
1293 BBInfo.Pred = InInfo;
1294 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1295 << " changed to " << BBInfo.Pred << "\n");
1297 // Note: It's tempting to cache the state changes here, but due to the
1298 // compatibility checks performed a blocks output state can change based on
1299 // the input state. To cache, we'd have to add logic for finding
1300 // never-compatible state changes.
1301 VSETVLIInfo TmpStatus;
1302 computeVLVTYPEChanges(MBB, TmpStatus);
1304 // If the new exit value matches the old exit value, we don't need to revisit
1305 // any blocks.
1306 if (BBInfo.Exit == TmpStatus)
1307 return;
1309 BBInfo.Exit = TmpStatus;
1310 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1311 << " changed to " << BBInfo.Exit << "\n");
1313 // Add the successors to the work list so we can propagate the changed exit
1314 // status.
1315 for (MachineBasicBlock *S : MBB.successors())
1316 if (!BlockInfo[S->getNumber()].InQueue) {
1317 BlockInfo[S->getNumber()].InQueue = true;
1318 WorkList.push(S);
1322 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1323 // be unneeded if the AVL is a phi node where all incoming values are VL
1324 // outputs from the last VSETVLI in their respective basic blocks.
1325 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1326 const MachineBasicBlock &MBB) const {
1327 if (DisableInsertVSETVLPHIOpt)
1328 return true;
1330 if (!Require.hasAVLReg())
1331 return true;
1333 // We need the AVL to be produce by a PHI node in this basic block.
1334 const MachineInstr *PHI = &Require.getAVLDefMI();
1335 if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1336 return true;
1338 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1339 PHIOp += 2) {
1340 Register InReg = PHI->getOperand(PHIOp).getReg();
1341 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1342 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1344 // We need the PHI input to the be the output of a VSET(I)VLI.
1345 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1346 if (!DefMI || !isVectorConfigInstr(*DefMI))
1347 return true;
1349 // We found a VSET(I)VLI make sure it matches the output of the
1350 // predecessor block.
1351 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI);
1352 if (DefInfo != PBBExit)
1353 return true;
1355 // Require has the same VL as PBBExit, so if the exit from the
1356 // predecessor has the VTYPE we are looking for we might be able
1357 // to avoid a VSETVLI.
1358 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1359 return true;
1362 // If all the incoming values to the PHI checked out, we don't need
1363 // to insert a VSETVLI.
1364 return false;
1367 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1368 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1369 // Track whether the prefix of the block we've scanned is transparent
1370 // (meaning has not yet changed the abstract state).
1371 bool PrefixTransparent = true;
1372 for (MachineInstr &MI : MBB) {
1373 const VSETVLIInfo PrevInfo = CurInfo;
1374 transferBefore(CurInfo, MI);
1376 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1377 if (isVectorConfigInstr(MI)) {
1378 // Conservatively, mark the VL and VTYPE as live.
1379 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1380 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1381 "Unexpected operands where VL and VTYPE should be");
1382 MI.getOperand(3).setIsDead(false);
1383 MI.getOperand(4).setIsDead(false);
1384 PrefixTransparent = false;
1387 uint64_t TSFlags = MI.getDesc().TSFlags;
1388 if (RISCVII::hasSEWOp(TSFlags)) {
1389 if (PrevInfo != CurInfo) {
1390 // If this is the first implicit state change, and the state change
1391 // requested can be proven to produce the same register contents, we
1392 // can skip emitting the actual state change and continue as if we
1393 // had since we know the GPR result of the implicit state change
1394 // wouldn't be used and VL/VTYPE registers are correct. Note that
1395 // we *do* need to model the state as if it changed as while the
1396 // register contents are unchanged, the abstract model can change.
1397 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1398 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1399 PrefixTransparent = false;
1402 if (RISCVII::hasVLOp(TSFlags)) {
1403 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1404 if (VLOp.isReg()) {
1405 Register Reg = VLOp.getReg();
1406 MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
1408 // Erase the AVL operand from the instruction.
1409 VLOp.setReg(RISCV::NoRegister);
1410 VLOp.setIsKill(false);
1412 // If the AVL was an immediate > 31, then it would have been emitted
1413 // as an ADDI. However, the ADDI might not have been used in the
1414 // vsetvli, or a vsetvli might not have been emitted, so it may be
1415 // dead now.
1416 if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
1417 MRI->use_nodbg_empty(Reg))
1418 VLOpDef->eraseFromParent();
1420 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1421 /*isImp*/ true));
1423 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1424 /*isImp*/ true));
1427 if (MI.isCall() || MI.isInlineAsm() ||
1428 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1429 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1430 PrefixTransparent = false;
1432 transferAfter(CurInfo, MI);
1435 // If we reach the end of the block and our current info doesn't match the
1436 // expected info, insert a vsetvli to correct.
1437 if (!UseStrictAsserts) {
1438 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1439 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1440 CurInfo != ExitInfo) {
1441 // Note there's an implicit assumption here that terminators never use
1442 // or modify VL or VTYPE. Also, fallthrough will return end().
1443 auto InsertPt = MBB.getFirstInstrTerminator();
1444 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1445 CurInfo);
1446 CurInfo = ExitInfo;
1450 if (UseStrictAsserts && CurInfo.isValid()) {
1451 const auto &Info = BlockInfo[MBB.getNumber()];
1452 if (CurInfo != Info.Exit) {
1453 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1454 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1455 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1456 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1458 assert(CurInfo == Info.Exit &&
1459 "InsertVSETVLI dataflow invariant violated");
1463 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1464 /// we're about to insert by looking for cases where we can PRE from the
1465 /// beginning of one block to the end of one of its predecessors. Specifically,
1466 /// this is geared to catch the common case of a fixed length vsetvl in a single
1467 /// block loop when it could execute once in the preheader instead.
1468 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1469 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1470 return;
1472 MachineBasicBlock *UnavailablePred = nullptr;
1473 VSETVLIInfo AvailableInfo;
1474 for (MachineBasicBlock *P : MBB.predecessors()) {
1475 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1476 if (PredInfo.isUnknown()) {
1477 if (UnavailablePred)
1478 return;
1479 UnavailablePred = P;
1480 } else if (!AvailableInfo.isValid()) {
1481 AvailableInfo = PredInfo;
1482 } else if (AvailableInfo != PredInfo) {
1483 return;
1487 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1488 // phase 3.
1489 if (!UnavailablePred || !AvailableInfo.isValid())
1490 return;
1492 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1493 // the unavailable pred.
1494 if (AvailableInfo.hasSEWLMULRatioOnly())
1495 return;
1497 // Critical edge - TODO: consider splitting?
1498 if (UnavailablePred->succ_size() != 1)
1499 return;
1501 // If the AVL value is a register (other than our VLMAX sentinel),
1502 // we need to prove the value is available at the point we're going
1503 // to insert the vsetvli at.
1504 if (AvailableInfo.hasAVLReg()) {
1505 const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI();
1506 // This is an inline dominance check which covers the case of
1507 // UnavailablePred being the preheader of a loop.
1508 if (AVLDefMI->getParent() != UnavailablePred)
1509 return;
1510 for (auto &TermMI : UnavailablePred->terminators())
1511 if (&TermMI == AVLDefMI)
1512 return;
1515 // If the AVL isn't used in its predecessors then bail, since we have no AVL
1516 // to insert a vsetvli with.
1517 if (AvailableInfo.hasAVLIgnored())
1518 return;
1520 // Model the effect of changing the input state of the block MBB to
1521 // AvailableInfo. We're looking for two issues here; one legality,
1522 // one profitability.
1523 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1524 // may hit the end of the block with a different end state. We can
1525 // not make this change without reflowing later blocks as well.
1526 // 2) If we don't actually remove a transition, inserting a vsetvli
1527 // into the predecessor block would be correct, but unprofitable.
1528 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1529 VSETVLIInfo CurInfo = AvailableInfo;
1530 int TransitionsRemoved = 0;
1531 for (const MachineInstr &MI : MBB) {
1532 const VSETVLIInfo LastInfo = CurInfo;
1533 const VSETVLIInfo LastOldInfo = OldInfo;
1534 transferBefore(CurInfo, MI);
1535 transferBefore(OldInfo, MI);
1536 if (CurInfo == LastInfo)
1537 TransitionsRemoved++;
1538 if (LastOldInfo == OldInfo)
1539 TransitionsRemoved--;
1540 transferAfter(CurInfo, MI);
1541 transferAfter(OldInfo, MI);
1542 if (CurInfo == OldInfo)
1543 // Convergence. All transitions after this must match by construction.
1544 break;
1546 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1547 // Issues 1 and 2 above
1548 return;
1550 // Finally, update both data flow state and insert the actual vsetvli.
1551 // Doing both keeps the code in sync with the dataflow results, which
1552 // is critical for correctness of phase 3.
1553 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1554 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1555 << UnavailablePred->getName() << " with state "
1556 << AvailableInfo << "\n");
1557 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1558 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1560 // Note there's an implicit assumption here that terminators never use
1561 // or modify VL or VTYPE. Also, fallthrough will return end().
1562 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1563 insertVSETVLI(*UnavailablePred, InsertPt,
1564 UnavailablePred->findDebugLoc(InsertPt),
1565 AvailableInfo, OldExit);
1568 // Return true if we can mutate PrevMI to match MI without changing any the
1569 // fields which would be observed.
1570 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1571 const MachineInstr &MI,
1572 const DemandedFields &Used,
1573 const MachineRegisterInfo &MRI) {
1574 // If the VL values aren't equal, return false if either a) the former is
1575 // demanded, or b) we can't rewrite the former to be the later for
1576 // implementation reasons.
1577 if (!isVLPreservingConfig(MI)) {
1578 if (Used.VLAny)
1579 return false;
1581 if (Used.VLZeroness) {
1582 if (isVLPreservingConfig(PrevMI))
1583 return false;
1584 if (!getInfoForVSETVLI(PrevMI, MRI)
1585 .hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI)))
1586 return false;
1589 auto &AVL = MI.getOperand(1);
1590 auto &PrevAVL = PrevMI.getOperand(1);
1592 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1593 // For now just check that PrevMI uses the same virtual register.
1594 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1595 (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1596 PrevAVL.getReg() != AVL.getReg()))
1597 return false;
1600 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1601 auto PriorVType = PrevMI.getOperand(2).getImm();
1602 auto VType = MI.getOperand(2).getImm();
1603 return areCompatibleVTYPEs(PriorVType, VType, Used);
1606 bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
1607 MachineInstr *NextMI = nullptr;
1608 // We can have arbitrary code in successors, so VL and VTYPE
1609 // must be considered demanded.
1610 DemandedFields Used;
1611 Used.demandVL();
1612 Used.demandVTYPE();
1613 SmallVector<MachineInstr*> ToDelete;
1614 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1616 if (!isVectorConfigInstr(MI)) {
1617 Used.doUnion(getDemanded(MI, MRI, ST));
1618 if (MI.isCall() || MI.isInlineAsm() ||
1619 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1620 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1621 NextMI = nullptr;
1622 continue;
1625 Register RegDef = MI.getOperand(0).getReg();
1626 assert(RegDef == RISCV::X0 || RegDef.isVirtual());
1627 if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
1628 Used.demandVL();
1630 if (NextMI) {
1631 if (!Used.usedVL() && !Used.usedVTYPE()) {
1632 ToDelete.push_back(&MI);
1633 // Leave NextMI unchanged
1634 continue;
1637 if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1638 if (!isVLPreservingConfig(*NextMI)) {
1639 Register DefReg = NextMI->getOperand(0).getReg();
1641 MI.getOperand(0).setReg(DefReg);
1642 MI.getOperand(0).setIsDead(false);
1644 // The def of DefReg moved to MI, so extend the LiveInterval up to
1645 // it.
1646 if (DefReg.isVirtual()) {
1647 LiveInterval &DefLI = LIS->getInterval(DefReg);
1648 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1649 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1650 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1651 DefLI.addSegment(S);
1652 DefVNI->def = MISlot;
1653 // Mark DefLI as spillable if it was previously unspillable
1654 DefLI.setWeight(0);
1656 // DefReg may have had no uses, in which case we need to shrink
1657 // the LiveInterval up to MI.
1658 LIS->shrinkToUses(&DefLI);
1661 Register OldVLReg;
1662 if (MI.getOperand(1).isReg())
1663 OldVLReg = MI.getOperand(1).getReg();
1664 if (NextMI->getOperand(1).isImm())
1665 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1666 else
1667 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1669 // Clear NextMI's AVL early so we're not counting it as a use.
1670 if (NextMI->getOperand(1).isReg())
1671 NextMI->getOperand(1).setReg(RISCV::NoRegister);
1673 if (OldVLReg && OldVLReg.isVirtual()) {
1674 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1675 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1677 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1678 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1679 MRI->use_nodbg_empty(OldVLReg)) {
1680 VLOpDef->eraseFromParent();
1681 LIS->removeInterval(OldVLReg);
1684 MI.setDesc(NextMI->getDesc());
1686 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1687 ToDelete.push_back(NextMI);
1688 // fallthrough
1691 NextMI = &MI;
1692 Used = getDemanded(MI, MRI, ST);
1695 NumCoalescedVSETVL += ToDelete.size();
1696 for (auto *MI : ToDelete) {
1697 LIS->RemoveMachineInstrFromMaps(*MI);
1698 MI->eraseFromParent();
1701 return !ToDelete.empty();
1704 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1705 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1706 MachineInstr &MI = *I++;
1707 if (RISCV::isFaultFirstLoad(MI)) {
1708 Register VLOutput = MI.getOperand(1).getReg();
1709 if (!MRI->use_nodbg_empty(VLOutput))
1710 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1711 VLOutput);
1712 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1713 MI.getOperand(1).setReg(RISCV::X0);
1718 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1719 // Skip if the vector extension is not enabled.
1720 ST = &MF.getSubtarget<RISCVSubtarget>();
1721 if (!ST->hasVInstructions())
1722 return false;
1724 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1726 TII = ST->getInstrInfo();
1727 MRI = &MF.getRegInfo();
1729 assert(BlockInfo.empty() && "Expect empty block infos");
1730 BlockInfo.resize(MF.getNumBlockIDs());
1732 bool HaveVectorOp = false;
1734 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1735 for (const MachineBasicBlock &MBB : MF) {
1736 VSETVLIInfo TmpStatus;
1737 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1738 // Initial exit state is whatever change we found in the block.
1739 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1740 BBInfo.Exit = TmpStatus;
1741 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1742 << " is " << BBInfo.Exit << "\n");
1746 // If we didn't find any instructions that need VSETVLI, we're done.
1747 if (!HaveVectorOp) {
1748 BlockInfo.clear();
1749 return false;
1752 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1753 // blocks to the list here, but will also add any that need to be revisited
1754 // during Phase 2 processing.
1755 for (const MachineBasicBlock &MBB : MF) {
1756 WorkList.push(&MBB);
1757 BlockInfo[MBB.getNumber()].InQueue = true;
1759 while (!WorkList.empty()) {
1760 const MachineBasicBlock &MBB = *WorkList.front();
1761 WorkList.pop();
1762 computeIncomingVLVTYPE(MBB);
1765 // Perform partial redundancy elimination of vsetvli transitions.
1766 for (MachineBasicBlock &MBB : MF)
1767 doPRE(MBB);
1769 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1770 // Phase 2 information to avoid adding vsetvlis before the first vector
1771 // instruction in the block if the VL/VTYPE is satisfied by its
1772 // predecessors.
1773 for (MachineBasicBlock &MBB : MF)
1774 emitVSETVLIs(MBB);
1776 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1777 // of VLEFF/VLSEGFF.
1778 for (MachineBasicBlock &MBB : MF)
1779 insertReadVL(MBB);
1781 BlockInfo.clear();
1782 return HaveVectorOp;
1785 /// Returns an instance of the Insert VSETVLI pass.
1786 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1787 return new RISCVInsertVSETVLI();
1790 // Now that all vsetvlis are explicit, go through and do block local
1791 // DSE and peephole based demanded fields based transforms. Note that
1792 // this *must* be done outside the main dataflow so long as we allow
1793 // any cross block analysis within the dataflow. We can't have both
1794 // demanded fields based mutation and non-local analysis in the
1795 // dataflow at the same time without introducing inconsistencies.
1796 bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1797 // Skip if the vector extension is not enabled.
1798 ST = &MF.getSubtarget<RISCVSubtarget>();
1799 if (!ST->hasVInstructions())
1800 return false;
1801 TII = ST->getInstrInfo();
1802 MRI = &MF.getRegInfo();
1803 LIS = &getAnalysis<LiveIntervals>();
1805 bool Changed = false;
1806 for (MachineBasicBlock &MBB : MF)
1807 Changed |= coalesceVSETVLIs(MBB);
1809 return Changed;
1812 FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
1813 return new RISCVCoalesceVSETVLI();