Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVInsertVSETVLI.cpp
blobf6d8b1f0a70e13d2c6ec1215cf1a0add73967db1
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
13 // This pass consists of 3 phases:
15 // Phase 1 collects how each basic block affects VL/VTYPE.
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
25 //===----------------------------------------------------------------------===//
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39 cl::desc("Disable looking through phis when inserting vsetvlis."));
41 static cl::opt<bool> UseStrictAsserts(
42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
45 namespace {
47 static unsigned getVLOpNum(const MachineInstr &MI) {
48 return RISCVII::getVLOpNum(MI.getDesc());
51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52 return RISCVII::getSEWOpNum(MI.getDesc());
55 static bool isVectorConfigInstr(const MachineInstr &MI) {
56 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
57 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
58 MI.getOpcode() == RISCV::PseudoVSETIVLI;
61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
62 /// VL and only sets VTYPE.
63 static bool isVLPreservingConfig(const MachineInstr &MI) {
64 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
65 return false;
66 assert(RISCV::X0 == MI.getOperand(1).getReg());
67 return RISCV::X0 == MI.getOperand(0).getReg();
70 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
71 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
72 default:
73 return false;
74 case RISCV::VFMV_S_F:
75 case RISCV::VFMV_V_F:
76 return true;
80 static bool isScalarExtractInstr(const MachineInstr &MI) {
81 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
82 default:
83 return false;
84 case RISCV::VMV_X_S:
85 case RISCV::VFMV_F_S:
86 return true;
90 static bool isScalarInsertInstr(const MachineInstr &MI) {
91 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
92 default:
93 return false;
94 case RISCV::VMV_S_X:
95 case RISCV::VFMV_S_F:
96 return true;
100 static bool isScalarSplatInstr(const MachineInstr &MI) {
101 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
102 default:
103 return false;
104 case RISCV::VMV_V_I:
105 case RISCV::VMV_V_X:
106 case RISCV::VFMV_V_F:
107 return true;
111 static bool isVSlideInstr(const MachineInstr &MI) {
112 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
113 default:
114 return false;
115 case RISCV::VSLIDEDOWN_VX:
116 case RISCV::VSLIDEDOWN_VI:
117 case RISCV::VSLIDEUP_VX:
118 case RISCV::VSLIDEUP_VI:
119 return true;
123 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
124 /// not a load or store which ignores SEW.
125 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
126 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
127 default:
128 return std::nullopt;
129 case RISCV::VLE8_V:
130 case RISCV::VLSE8_V:
131 case RISCV::VSE8_V:
132 case RISCV::VSSE8_V:
133 return 8;
134 case RISCV::VLE16_V:
135 case RISCV::VLSE16_V:
136 case RISCV::VSE16_V:
137 case RISCV::VSSE16_V:
138 return 16;
139 case RISCV::VLE32_V:
140 case RISCV::VLSE32_V:
141 case RISCV::VSE32_V:
142 case RISCV::VSSE32_V:
143 return 32;
144 case RISCV::VLE64_V:
145 case RISCV::VLSE64_V:
146 case RISCV::VSE64_V:
147 case RISCV::VSSE64_V:
148 return 64;
152 /// Return true if this is an operation on mask registers. Note that
153 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
154 static bool isMaskRegOp(const MachineInstr &MI) {
155 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
156 return false;
157 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
158 // A Log2SEW of 0 is an operation on mask registers only.
159 return Log2SEW == 0;
162 /// Return true if the inactive elements in the result are entirely undefined.
163 /// Note that this is different from "agnostic" as defined by the vector
164 /// specification. Agnostic requires each lane to either be undisturbed, or
165 /// take the value -1; no other value is allowed.
166 static bool hasUndefinedMergeOp(const MachineInstr &MI,
167 const MachineRegisterInfo &MRI) {
169 unsigned UseOpIdx;
170 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
171 // If there is no passthrough operand, then the pass through
172 // lanes are undefined.
173 return true;
175 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
176 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
177 // undefined.
178 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
179 if (UseMO.getReg() == RISCV::NoRegister)
180 return true;
182 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
183 if (UseMI->isImplicitDef())
184 return true;
186 if (UseMI->isRegSequence()) {
187 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
188 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
189 if (!SourceMI || !SourceMI->isImplicitDef())
190 return false;
192 return true;
195 return false;
198 /// Which subfields of VL or VTYPE have values we need to preserve?
199 struct DemandedFields {
200 // Some unknown property of VL is used. If demanded, must preserve entire
201 // value.
202 bool VLAny = false;
203 // Only zero vs non-zero is used. If demanded, can change non-zero values.
204 bool VLZeroness = false;
205 // What properties of SEW we need to preserve.
206 enum : uint8_t {
207 SEWEqual = 3, // The exact value of SEW needs to be preserved.
208 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
209 // than or equal to the original value.
210 SEWGreaterThanOrEqualAndLessThan64 =
211 1, // SEW can be changed as long as it's greater
212 // than or equal to the original value, but must be less
213 // than 64.
214 SEWNone = 0 // We don't need to preserve SEW at all.
215 } SEW = SEWNone;
216 bool LMUL = false;
217 bool SEWLMULRatio = false;
218 bool TailPolicy = false;
219 bool MaskPolicy = false;
221 // Return true if any part of VTYPE was used
222 bool usedVTYPE() const {
223 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
226 // Return true if any property of VL was used
227 bool usedVL() {
228 return VLAny || VLZeroness;
231 // Mark all VTYPE subfields and properties as demanded
232 void demandVTYPE() {
233 SEW = SEWEqual;
234 LMUL = true;
235 SEWLMULRatio = true;
236 TailPolicy = true;
237 MaskPolicy = true;
240 // Mark all VL properties as demanded
241 void demandVL() {
242 VLAny = true;
243 VLZeroness = true;
246 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
247 /// Support for debugging, callable in GDB: V->dump()
248 LLVM_DUMP_METHOD void dump() const {
249 print(dbgs());
250 dbgs() << "\n";
253 /// Implement operator<<.
254 void print(raw_ostream &OS) const {
255 OS << "{";
256 OS << "VLAny=" << VLAny << ", ";
257 OS << "VLZeroness=" << VLZeroness << ", ";
258 OS << "SEW=";
259 switch (SEW) {
260 case SEWEqual:
261 OS << "SEWEqual";
262 break;
263 case SEWGreaterThanOrEqual:
264 OS << "SEWGreaterThanOrEqual";
265 break;
266 case SEWGreaterThanOrEqualAndLessThan64:
267 OS << "SEWGreaterThanOrEqualAndLessThan64";
268 break;
269 case SEWNone:
270 OS << "SEWNone";
271 break;
273 OS << ", ";
274 OS << "LMUL=" << LMUL << ", ";
275 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
276 OS << "TailPolicy=" << TailPolicy << ", ";
277 OS << "MaskPolicy=" << MaskPolicy;
278 OS << "}";
280 #endif
283 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
284 LLVM_ATTRIBUTE_USED
285 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
286 DF.print(OS);
287 return OS;
289 #endif
291 /// Return true if moving from CurVType to NewVType is
292 /// indistinguishable from the perspective of an instruction (or set
293 /// of instructions) which use only the Used subfields and properties.
294 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
295 const DemandedFields &Used) {
296 switch (Used.SEW) {
297 case DemandedFields::SEWNone:
298 break;
299 case DemandedFields::SEWEqual:
300 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
301 return false;
302 break;
303 case DemandedFields::SEWGreaterThanOrEqual:
304 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
305 return false;
306 break;
307 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
308 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
309 RISCVVType::getSEW(NewVType) >= 64)
310 return false;
311 break;
314 if (Used.LMUL &&
315 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
316 return false;
318 if (Used.SEWLMULRatio) {
319 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
320 RISCVVType::getVLMUL(CurVType));
321 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
322 RISCVVType::getVLMUL(NewVType));
323 if (Ratio1 != Ratio2)
324 return false;
327 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
328 RISCVVType::isTailAgnostic(NewVType))
329 return false;
330 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
331 RISCVVType::isMaskAgnostic(NewVType))
332 return false;
333 return true;
336 /// Return the fields and properties demanded by the provided instruction.
337 DemandedFields getDemanded(const MachineInstr &MI,
338 const MachineRegisterInfo *MRI,
339 const RISCVSubtarget *ST) {
340 // Warning: This function has to work on both the lowered (i.e. post
341 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
342 // that it can't use the value of a SEW, VL, or Policy operand as they might
343 // be stale after lowering.
345 // Most instructions don't use any of these subfeilds.
346 DemandedFields Res;
347 // Start conservative if registers are used
348 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
349 Res.demandVL();
350 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
351 Res.demandVTYPE();
352 // Start conservative on the unlowered form too
353 uint64_t TSFlags = MI.getDesc().TSFlags;
354 if (RISCVII::hasSEWOp(TSFlags)) {
355 Res.demandVTYPE();
356 if (RISCVII::hasVLOp(TSFlags))
357 Res.demandVL();
359 // Behavior is independent of mask policy.
360 if (!RISCVII::usesMaskPolicy(TSFlags))
361 Res.MaskPolicy = false;
364 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
365 // They instead demand the ratio of the two which is used in computing
366 // EMUL, but which allows us the flexibility to change SEW and LMUL
367 // provided we don't change the ratio.
368 // Note: We assume that the instructions initial SEW is the EEW encoded
369 // in the opcode. This is asserted when constructing the VSETVLIInfo.
370 if (getEEWForLoadStore(MI)) {
371 Res.SEW = DemandedFields::SEWNone;
372 Res.LMUL = false;
375 // Store instructions don't use the policy fields.
376 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
377 Res.TailPolicy = false;
378 Res.MaskPolicy = false;
381 // If this is a mask reg operation, it only cares about VLMAX.
382 // TODO: Possible extensions to this logic
383 // * Probably ok if available VLMax is larger than demanded
384 // * The policy bits can probably be ignored..
385 if (isMaskRegOp(MI)) {
386 Res.SEW = DemandedFields::SEWNone;
387 Res.LMUL = false;
390 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
391 if (isScalarInsertInstr(MI)) {
392 Res.LMUL = false;
393 Res.SEWLMULRatio = false;
394 Res.VLAny = false;
395 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
396 // need to preserve any other bits and are thus compatible with any larger,
397 // etype and can disregard policy bits. Warning: It's tempting to try doing
398 // this for any tail agnostic operation, but we can't as TA requires
399 // tail lanes to either be the original value or -1. We are writing
400 // unknown bits to the lanes here.
401 if (hasUndefinedMergeOp(MI, *MRI)) {
402 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
403 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
404 else
405 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
406 Res.TailPolicy = false;
410 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
411 if (isScalarExtractInstr(MI)) {
412 assert(!RISCVII::hasVLOp(TSFlags));
413 Res.LMUL = false;
414 Res.SEWLMULRatio = false;
415 Res.TailPolicy = false;
416 Res.MaskPolicy = false;
419 return Res;
422 /// Defines the abstract state with which the forward dataflow models the
423 /// values of the VL and VTYPE registers after insertion.
424 class VSETVLIInfo {
425 union {
426 Register AVLReg;
427 unsigned AVLImm;
430 enum : uint8_t {
431 Uninitialized,
432 AVLIsReg,
433 AVLIsImm,
434 Unknown,
435 } State = Uninitialized;
437 // Fields from VTYPE.
438 RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
439 uint8_t SEW = 0;
440 uint8_t TailAgnostic : 1;
441 uint8_t MaskAgnostic : 1;
442 uint8_t SEWLMULRatioOnly : 1;
444 public:
445 VSETVLIInfo()
446 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
447 SEWLMULRatioOnly(false) {}
449 static VSETVLIInfo getUnknown() {
450 VSETVLIInfo Info;
451 Info.setUnknown();
452 return Info;
455 bool isValid() const { return State != Uninitialized; }
456 void setUnknown() { State = Unknown; }
457 bool isUnknown() const { return State == Unknown; }
459 void setAVLReg(Register Reg) {
460 AVLReg = Reg;
461 State = AVLIsReg;
464 void setAVLImm(unsigned Imm) {
465 AVLImm = Imm;
466 State = AVLIsImm;
469 bool hasAVLImm() const { return State == AVLIsImm; }
470 bool hasAVLReg() const { return State == AVLIsReg; }
471 Register getAVLReg() const {
472 assert(hasAVLReg());
473 return AVLReg;
475 unsigned getAVLImm() const {
476 assert(hasAVLImm());
477 return AVLImm;
480 unsigned getSEW() const { return SEW; }
481 RISCVII::VLMUL getVLMUL() const { return VLMul; }
483 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
484 if (hasAVLImm())
485 return getAVLImm() > 0;
486 if (hasAVLReg()) {
487 if (getAVLReg() == RISCV::X0)
488 return true;
489 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
490 MI && MI->getOpcode() == RISCV::ADDI &&
491 MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
492 MI->getOperand(1).getReg() == RISCV::X0 &&
493 MI->getOperand(2).getImm() != 0)
494 return true;
495 return false;
497 return false;
500 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
501 const MachineRegisterInfo &MRI) const {
502 if (hasSameAVL(Other))
503 return true;
504 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
507 bool hasSameAVL(const VSETVLIInfo &Other) const {
508 if (hasAVLReg() && Other.hasAVLReg())
509 return getAVLReg() == Other.getAVLReg();
511 if (hasAVLImm() && Other.hasAVLImm())
512 return getAVLImm() == Other.getAVLImm();
514 return false;
517 void setVTYPE(unsigned VType) {
518 assert(isValid() && !isUnknown() &&
519 "Can't set VTYPE for uninitialized or unknown");
520 VLMul = RISCVVType::getVLMUL(VType);
521 SEW = RISCVVType::getSEW(VType);
522 TailAgnostic = RISCVVType::isTailAgnostic(VType);
523 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
525 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
526 assert(isValid() && !isUnknown() &&
527 "Can't set VTYPE for uninitialized or unknown");
528 VLMul = L;
529 SEW = S;
530 TailAgnostic = TA;
531 MaskAgnostic = MA;
534 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
536 unsigned encodeVTYPE() const {
537 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
538 "Can't encode VTYPE for uninitialized or unknown");
539 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
542 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
544 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
545 assert(isValid() && Other.isValid() &&
546 "Can't compare invalid VSETVLIInfos");
547 assert(!isUnknown() && !Other.isUnknown() &&
548 "Can't compare VTYPE in unknown state");
549 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
550 "Can't compare when only LMUL/SEW ratio is valid.");
551 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
552 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
553 Other.MaskAgnostic);
556 unsigned getSEWLMULRatio() const {
557 assert(isValid() && !isUnknown() &&
558 "Can't use VTYPE for uninitialized or unknown");
559 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
562 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
563 // Note that having the same VLMAX ensures that both share the same
564 // function from AVL to VL; that is, they must produce the same VL value
565 // for any given AVL value.
566 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
567 assert(isValid() && Other.isValid() &&
568 "Can't compare invalid VSETVLIInfos");
569 assert(!isUnknown() && !Other.isUnknown() &&
570 "Can't compare VTYPE in unknown state");
571 return getSEWLMULRatio() == Other.getSEWLMULRatio();
574 bool hasCompatibleVTYPE(const DemandedFields &Used,
575 const VSETVLIInfo &Require) const {
576 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
579 // Determine whether the vector instructions requirements represented by
580 // Require are compatible with the previous vsetvli instruction represented
581 // by this. MI is the instruction whose requirements we're considering.
582 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
583 const MachineRegisterInfo &MRI) const {
584 assert(isValid() && Require.isValid() &&
585 "Can't compare invalid VSETVLIInfos");
586 assert(!Require.SEWLMULRatioOnly &&
587 "Expected a valid VTYPE for instruction!");
588 // Nothing is compatible with Unknown.
589 if (isUnknown() || Require.isUnknown())
590 return false;
592 // If only our VLMAX ratio is valid, then this isn't compatible.
593 if (SEWLMULRatioOnly)
594 return false;
596 if (Used.VLAny && !hasSameAVL(Require))
597 return false;
599 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
600 return false;
602 return hasCompatibleVTYPE(Used, Require);
605 bool operator==(const VSETVLIInfo &Other) const {
606 // Uninitialized is only equal to another Uninitialized.
607 if (!isValid())
608 return !Other.isValid();
609 if (!Other.isValid())
610 return !isValid();
612 // Unknown is only equal to another Unknown.
613 if (isUnknown())
614 return Other.isUnknown();
615 if (Other.isUnknown())
616 return isUnknown();
618 if (!hasSameAVL(Other))
619 return false;
621 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
622 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
623 return false;
625 // If only the VLMAX is valid, check that it is the same.
626 if (SEWLMULRatioOnly)
627 return hasSameVLMAX(Other);
629 // If the full VTYPE is valid, check that it is the same.
630 return hasSameVTYPE(Other);
633 bool operator!=(const VSETVLIInfo &Other) const {
634 return !(*this == Other);
637 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
638 // both predecessors.
639 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
640 // If the new value isn't valid, ignore it.
641 if (!Other.isValid())
642 return *this;
644 // If this value isn't valid, this must be the first predecessor, use it.
645 if (!isValid())
646 return Other;
648 // If either is unknown, the result is unknown.
649 if (isUnknown() || Other.isUnknown())
650 return VSETVLIInfo::getUnknown();
652 // If we have an exact, match return this.
653 if (*this == Other)
654 return *this;
656 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
657 // return an SEW/LMUL ratio only value.
658 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
659 VSETVLIInfo MergeInfo = *this;
660 MergeInfo.SEWLMULRatioOnly = true;
661 return MergeInfo;
664 // Otherwise the result is unknown.
665 return VSETVLIInfo::getUnknown();
668 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 /// Support for debugging, callable in GDB: V->dump()
670 LLVM_DUMP_METHOD void dump() const {
671 print(dbgs());
672 dbgs() << "\n";
675 /// Implement operator<<.
676 /// @{
677 void print(raw_ostream &OS) const {
678 OS << "{";
679 if (!isValid())
680 OS << "Uninitialized";
681 if (isUnknown())
682 OS << "unknown";
683 if (hasAVLReg())
684 OS << "AVLReg=" << (unsigned)AVLReg;
685 if (hasAVLImm())
686 OS << "AVLImm=" << (unsigned)AVLImm;
687 OS << ", "
688 << "VLMul=" << (unsigned)VLMul << ", "
689 << "SEW=" << (unsigned)SEW << ", "
690 << "TailAgnostic=" << (bool)TailAgnostic << ", "
691 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
692 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
694 #endif
697 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
698 LLVM_ATTRIBUTE_USED
699 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
700 V.print(OS);
701 return OS;
703 #endif
705 struct BlockData {
706 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
707 // block. Calculated in Phase 2.
708 VSETVLIInfo Exit;
710 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
711 // blocks. Calculated in Phase 2, and used by Phase 3.
712 VSETVLIInfo Pred;
714 // Keeps track of whether the block is already in the queue.
715 bool InQueue = false;
717 BlockData() = default;
720 class RISCVInsertVSETVLI : public MachineFunctionPass {
721 const RISCVSubtarget *ST;
722 const TargetInstrInfo *TII;
723 MachineRegisterInfo *MRI;
725 std::vector<BlockData> BlockInfo;
726 std::queue<const MachineBasicBlock *> WorkList;
728 public:
729 static char ID;
731 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
732 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
734 bool runOnMachineFunction(MachineFunction &MF) override;
736 void getAnalysisUsage(AnalysisUsage &AU) const override {
737 AU.setPreservesCFG();
738 MachineFunctionPass::getAnalysisUsage(AU);
741 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
743 private:
744 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
745 const VSETVLIInfo &CurInfo) const;
746 bool needVSETVLIPHI(const VSETVLIInfo &Require,
747 const MachineBasicBlock &MBB) const;
748 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
749 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
750 void insertVSETVLI(MachineBasicBlock &MBB,
751 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
752 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
754 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
755 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
756 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
757 VSETVLIInfo &Info) const;
758 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
759 void emitVSETVLIs(MachineBasicBlock &MBB);
760 void doLocalPostpass(MachineBasicBlock &MBB);
761 void doPRE(MachineBasicBlock &MBB);
762 void insertReadVL(MachineBasicBlock &MBB);
765 } // end anonymous namespace
767 char RISCVInsertVSETVLI::ID = 0;
769 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
770 false, false)
772 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
773 const MachineRegisterInfo *MRI) {
774 VSETVLIInfo InstrInfo;
776 bool TailAgnostic = true;
777 bool MaskAgnostic = true;
778 if (!hasUndefinedMergeOp(MI, *MRI)) {
779 // Start with undisturbed.
780 TailAgnostic = false;
781 MaskAgnostic = false;
783 // If there is a policy operand, use it.
784 if (RISCVII::hasVecPolicyOp(TSFlags)) {
785 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
786 uint64_t Policy = Op.getImm();
787 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
788 "Invalid Policy Value");
789 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
790 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
793 // Some pseudo instructions force a tail agnostic policy despite having a
794 // tied def.
795 if (RISCVII::doesForceTailAgnostic(TSFlags))
796 TailAgnostic = true;
798 if (!RISCVII::usesMaskPolicy(TSFlags))
799 MaskAgnostic = true;
802 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
804 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
805 // A Log2SEW of 0 is an operation on mask registers only.
806 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
807 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
809 if (RISCVII::hasVLOp(TSFlags)) {
810 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
811 if (VLOp.isImm()) {
812 int64_t Imm = VLOp.getImm();
813 // Conver the VLMax sentintel to X0 register.
814 if (Imm == RISCV::VLMaxSentinel)
815 InstrInfo.setAVLReg(RISCV::X0);
816 else
817 InstrInfo.setAVLImm(Imm);
818 } else {
819 InstrInfo.setAVLReg(VLOp.getReg());
821 } else {
822 assert(isScalarExtractInstr(MI));
823 InstrInfo.setAVLReg(RISCV::NoRegister);
825 #ifndef NDEBUG
826 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
827 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
829 #endif
830 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
832 return InstrInfo;
835 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
836 const VSETVLIInfo &Info,
837 const VSETVLIInfo &PrevInfo) {
838 DebugLoc DL = MI.getDebugLoc();
839 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
842 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
843 // VSETIVLI instruction.
844 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
845 VSETVLIInfo NewInfo;
846 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
847 NewInfo.setAVLImm(MI.getOperand(1).getImm());
848 } else {
849 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
850 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
851 Register AVLReg = MI.getOperand(1).getReg();
852 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
853 "Can't handle X0, X0 vsetvli yet");
854 NewInfo.setAVLReg(AVLReg);
856 NewInfo.setVTYPE(MI.getOperand(2).getImm());
858 return NewInfo;
861 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
862 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
863 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
865 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
866 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
867 // VLMAX.
868 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
869 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
870 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
871 .addReg(RISCV::X0, RegState::Kill)
872 .addImm(Info.encodeVTYPE())
873 .addReg(RISCV::VL, RegState::Implicit);
874 return;
877 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
878 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
879 // same, we can use the X0, X0 form.
880 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
881 Info.getAVLReg().isVirtual()) {
882 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
883 if (isVectorConfigInstr(*DefMI)) {
884 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
885 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
886 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
887 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
888 .addReg(RISCV::X0, RegState::Kill)
889 .addImm(Info.encodeVTYPE())
890 .addReg(RISCV::VL, RegState::Implicit);
891 return;
898 if (Info.hasAVLImm()) {
899 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
900 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
901 .addImm(Info.getAVLImm())
902 .addImm(Info.encodeVTYPE());
903 return;
906 Register AVLReg = Info.getAVLReg();
907 if (AVLReg == RISCV::NoRegister) {
908 // We can only use x0, x0 if there's no chance of the vtype change causing
909 // the previous vl to become invalid.
910 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
911 Info.hasSameVLMAX(PrevInfo)) {
912 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
913 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
914 .addReg(RISCV::X0, RegState::Kill)
915 .addImm(Info.encodeVTYPE())
916 .addReg(RISCV::VL, RegState::Implicit);
917 return;
919 // Otherwise use an AVL of 1 to avoid depending on previous vl.
920 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
921 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
922 .addImm(1)
923 .addImm(Info.encodeVTYPE());
924 return;
927 if (AVLReg.isVirtual())
928 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
930 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
931 // opcode if the AVLReg is X0 as they have different register classes for
932 // the AVL operand.
933 Register DestReg = RISCV::X0;
934 unsigned Opcode = RISCV::PseudoVSETVLI;
935 if (AVLReg == RISCV::X0) {
936 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
937 Opcode = RISCV::PseudoVSETVLIX0;
939 BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
940 .addReg(DestReg, RegState::Define | RegState::Dead)
941 .addReg(AVLReg)
942 .addImm(Info.encodeVTYPE());
945 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
946 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
947 return Fractional || LMul == 1;
950 /// Return true if a VSETVLI is required to transition from CurInfo to Require
951 /// before MI.
952 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
953 const VSETVLIInfo &Require,
954 const VSETVLIInfo &CurInfo) const {
955 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
957 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
958 return true;
960 DemandedFields Used = getDemanded(MI, MRI, ST);
962 // A slidedown/slideup with an *undefined* merge op can freely clobber
963 // elements not copied from the source vector (e.g. masked off, tail, or
964 // slideup's prefix). Notes:
965 // * We can't modify SEW here since the slide amount is in units of SEW.
966 // * VL=1 is special only because we have existing support for zero vs
967 // non-zero VL. We could generalize this if we had a VL > C predicate.
968 // * The LMUL1 restriction is for machines whose latency may depend on VL.
969 // * As above, this is only legal for tail "undefined" not "agnostic".
970 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
971 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
972 Used.VLAny = false;
973 Used.VLZeroness = true;
974 Used.LMUL = false;
975 Used.TailPolicy = false;
978 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
979 // semantically as vmv.s.x. This is particularly useful since we don't have an
980 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
981 // Since a splat is non-constant time in LMUL, we do need to be careful to not
982 // increase the number of active vector registers (unlike for vmv.s.x.)
983 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
984 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
985 Used.LMUL = false;
986 Used.SEWLMULRatio = false;
987 Used.VLAny = false;
988 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
989 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
990 else
991 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
992 Used.TailPolicy = false;
995 if (CurInfo.isCompatible(Used, Require, *MRI))
996 return false;
998 // We didn't find a compatible value. If our AVL is a virtual register,
999 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1000 // and the last VL/VTYPE we observed is the same, we don't need a
1001 // VSETVLI here.
1002 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
1003 CurInfo.hasCompatibleVTYPE(Used, Require)) {
1004 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1005 if (isVectorConfigInstr(*DefMI)) {
1006 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1007 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1008 return false;
1013 return true;
1016 // Given an incoming state reaching MI, modifies that state so that it is minimally
1017 // compatible with MI. The resulting state is guaranteed to be semantically legal
1018 // for MI, but may not be the state requested by MI.
1019 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1020 const MachineInstr &MI) const {
1021 uint64_t TSFlags = MI.getDesc().TSFlags;
1022 if (!RISCVII::hasSEWOp(TSFlags))
1023 return;
1025 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1026 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1027 return;
1029 const VSETVLIInfo PrevInfo = Info;
1030 Info = NewInfo;
1032 if (!RISCVII::hasVLOp(TSFlags))
1033 return;
1035 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1036 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1037 // places.
1038 DemandedFields Demanded = getDemanded(MI, MRI, ST);
1039 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
1040 PrevInfo.isValid() && !Info.isUnknown() && !PrevInfo.isUnknown()) {
1041 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1042 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1043 Info.setVLMul(*NewVLMul);
1046 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
1047 // VL > 0. We can discard the user requested AVL and just use the last
1048 // one if we can prove it equally zero. This removes a vsetvli entirely
1049 // if the types match or allows use of cheaper avl preserving variant
1050 // if VLMAX doesn't change. If VLMAX might change, we couldn't use
1051 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
1052 // prevent extending live range of an avl register operand.
1053 // TODO: We can probably relax this for immediates.
1054 if (isScalarInsertInstr(MI) && PrevInfo.isValid() &&
1055 PrevInfo.hasEquallyZeroAVL(Info, *MRI) &&
1056 Info.hasSameVLMAX(PrevInfo)) {
1057 if (PrevInfo.hasAVLImm())
1058 Info.setAVLImm(PrevInfo.getAVLImm());
1059 else
1060 Info.setAVLReg(PrevInfo.getAVLReg());
1061 return;
1064 // If AVL is defined by a vsetvli with the same VLMAX, we can
1065 // replace the AVL operand with the AVL of the defining vsetvli.
1066 // We avoid general register AVLs to avoid extending live ranges
1067 // without being sure we can kill the original source reg entirely.
1068 if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
1069 return;
1070 MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
1071 if (!DefMI || !isVectorConfigInstr(*DefMI))
1072 return;
1074 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1075 if (DefInfo.hasSameVLMAX(Info) &&
1076 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1077 if (DefInfo.hasAVLImm())
1078 Info.setAVLImm(DefInfo.getAVLImm());
1079 else
1080 Info.setAVLReg(DefInfo.getAVLReg());
1081 return;
1085 // Given a state with which we evaluated MI (see transferBefore above for why
1086 // this might be different that the state MI requested), modify the state to
1087 // reflect the changes MI might make.
1088 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1089 const MachineInstr &MI) const {
1090 if (isVectorConfigInstr(MI)) {
1091 Info = getInfoForVSETVLI(MI);
1092 return;
1095 if (RISCV::isFaultFirstLoad(MI)) {
1096 // Update AVL to vl-output of the fault first load.
1097 Info.setAVLReg(MI.getOperand(1).getReg());
1098 return;
1101 // If this is something that updates VL/VTYPE that we don't know about, set
1102 // the state to unknown.
1103 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1104 MI.modifiesRegister(RISCV::VTYPE))
1105 Info = VSETVLIInfo::getUnknown();
1108 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1109 VSETVLIInfo &Info) const {
1110 bool HadVectorOp = false;
1112 Info = BlockInfo[MBB.getNumber()].Pred;
1113 for (const MachineInstr &MI : MBB) {
1114 transferBefore(Info, MI);
1116 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1117 HadVectorOp = true;
1119 transferAfter(Info, MI);
1122 return HadVectorOp;
1125 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1127 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1129 BBInfo.InQueue = false;
1131 // Start with the previous entry so that we keep the most conservative state
1132 // we have ever found.
1133 VSETVLIInfo InInfo = BBInfo.Pred;
1134 if (MBB.pred_empty()) {
1135 // There are no predecessors, so use the default starting status.
1136 InInfo.setUnknown();
1137 } else {
1138 for (MachineBasicBlock *P : MBB.predecessors())
1139 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1142 // If we don't have any valid predecessor value, wait until we do.
1143 if (!InInfo.isValid())
1144 return;
1146 // If no change, no need to rerun block
1147 if (InInfo == BBInfo.Pred)
1148 return;
1150 BBInfo.Pred = InInfo;
1151 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1152 << " changed to " << BBInfo.Pred << "\n");
1154 // Note: It's tempting to cache the state changes here, but due to the
1155 // compatibility checks performed a blocks output state can change based on
1156 // the input state. To cache, we'd have to add logic for finding
1157 // never-compatible state changes.
1158 VSETVLIInfo TmpStatus;
1159 computeVLVTYPEChanges(MBB, TmpStatus);
1161 // If the new exit value matches the old exit value, we don't need to revisit
1162 // any blocks.
1163 if (BBInfo.Exit == TmpStatus)
1164 return;
1166 BBInfo.Exit = TmpStatus;
1167 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1168 << " changed to " << BBInfo.Exit << "\n");
1170 // Add the successors to the work list so we can propagate the changed exit
1171 // status.
1172 for (MachineBasicBlock *S : MBB.successors())
1173 if (!BlockInfo[S->getNumber()].InQueue) {
1174 BlockInfo[S->getNumber()].InQueue = true;
1175 WorkList.push(S);
1179 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1180 // be unneeded if the AVL is a phi node where all incoming values are VL
1181 // outputs from the last VSETVLI in their respective basic blocks.
1182 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1183 const MachineBasicBlock &MBB) const {
1184 if (DisableInsertVSETVLPHIOpt)
1185 return true;
1187 if (!Require.hasAVLReg())
1188 return true;
1190 Register AVLReg = Require.getAVLReg();
1191 if (!AVLReg.isVirtual())
1192 return true;
1194 // We need the AVL to be produce by a PHI node in this basic block.
1195 MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1196 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1197 return true;
1199 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1200 PHIOp += 2) {
1201 Register InReg = PHI->getOperand(PHIOp).getReg();
1202 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1203 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1204 // If the exit from the predecessor has the VTYPE we are looking for
1205 // we might be able to avoid a VSETVLI.
1206 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1207 return true;
1209 // We need the PHI input to the be the output of a VSET(I)VLI.
1210 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1211 if (!DefMI || !isVectorConfigInstr(*DefMI))
1212 return true;
1214 // We found a VSET(I)VLI make sure it matches the output of the
1215 // predecessor block.
1216 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1217 if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1218 !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1219 return true;
1222 // If all the incoming values to the PHI checked out, we don't need
1223 // to insert a VSETVLI.
1224 return false;
1227 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1228 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1229 // Track whether the prefix of the block we've scanned is transparent
1230 // (meaning has not yet changed the abstract state).
1231 bool PrefixTransparent = true;
1232 for (MachineInstr &MI : MBB) {
1233 const VSETVLIInfo PrevInfo = CurInfo;
1234 transferBefore(CurInfo, MI);
1236 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1237 if (isVectorConfigInstr(MI)) {
1238 // Conservatively, mark the VL and VTYPE as live.
1239 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1240 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1241 "Unexpected operands where VL and VTYPE should be");
1242 MI.getOperand(3).setIsDead(false);
1243 MI.getOperand(4).setIsDead(false);
1244 PrefixTransparent = false;
1247 uint64_t TSFlags = MI.getDesc().TSFlags;
1248 if (RISCVII::hasSEWOp(TSFlags)) {
1249 if (PrevInfo != CurInfo) {
1250 // If this is the first implicit state change, and the state change
1251 // requested can be proven to produce the same register contents, we
1252 // can skip emitting the actual state change and continue as if we
1253 // had since we know the GPR result of the implicit state change
1254 // wouldn't be used and VL/VTYPE registers are correct. Note that
1255 // we *do* need to model the state as if it changed as while the
1256 // register contents are unchanged, the abstract model can change.
1257 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1258 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1259 PrefixTransparent = false;
1262 if (RISCVII::hasVLOp(TSFlags)) {
1263 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1264 if (VLOp.isReg()) {
1265 // Erase the AVL operand from the instruction.
1266 VLOp.setReg(RISCV::NoRegister);
1267 VLOp.setIsKill(false);
1269 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1270 /*isImp*/ true));
1272 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1273 /*isImp*/ true));
1276 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1277 MI.modifiesRegister(RISCV::VTYPE))
1278 PrefixTransparent = false;
1280 transferAfter(CurInfo, MI);
1283 // If we reach the end of the block and our current info doesn't match the
1284 // expected info, insert a vsetvli to correct.
1285 if (!UseStrictAsserts) {
1286 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1287 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1288 CurInfo != ExitInfo) {
1289 // Note there's an implicit assumption here that terminators never use
1290 // or modify VL or VTYPE. Also, fallthrough will return end().
1291 auto InsertPt = MBB.getFirstInstrTerminator();
1292 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1293 CurInfo);
1294 CurInfo = ExitInfo;
1298 if (UseStrictAsserts && CurInfo.isValid()) {
1299 const auto &Info = BlockInfo[MBB.getNumber()];
1300 if (CurInfo != Info.Exit) {
1301 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1302 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1303 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1304 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1306 assert(CurInfo == Info.Exit &&
1307 "InsertVSETVLI dataflow invariant violated");
1311 /// Return true if the VL value configured by a vset(i)vli with the
1312 /// provided Info must be equal to the requested AVL. That is, that
1313 /// AVL <= VLMAX.
1314 static bool willVLBeAVL(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1315 if (!Info.hasAVLImm())
1316 // VLMAX is always the same value.
1317 // TODO: Could extend to other registers by looking at the associated vreg
1318 // def placement.
1319 return RISCV::X0 == Info.getAVLReg();
1321 unsigned AVL = Info.getAVLImm();
1322 unsigned SEW = Info.getSEW();
1323 unsigned AVLInBits = AVL * SEW;
1325 unsigned LMul;
1326 bool Fractional;
1327 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1329 if (Fractional)
1330 return ST.getRealMinVLen() / LMul >= AVLInBits;
1331 return ST.getRealMinVLen() * LMul >= AVLInBits;
1334 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1335 /// we're about to insert by looking for cases where we can PRE from the
1336 /// beginning of one block to the end of one of its predecessors. Specifically,
1337 /// this is geared to catch the common case of a fixed length vsetvl in a single
1338 /// block loop when it could execute once in the preheader instead.
1339 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1340 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1341 return;
1343 MachineBasicBlock *UnavailablePred = nullptr;
1344 VSETVLIInfo AvailableInfo;
1345 for (MachineBasicBlock *P : MBB.predecessors()) {
1346 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1347 if (PredInfo.isUnknown()) {
1348 if (UnavailablePred)
1349 return;
1350 UnavailablePred = P;
1351 } else if (!AvailableInfo.isValid()) {
1352 AvailableInfo = PredInfo;
1353 } else if (AvailableInfo != PredInfo) {
1354 return;
1358 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1359 // phase 3.
1360 if (!UnavailablePred || !AvailableInfo.isValid())
1361 return;
1363 // Critical edge - TODO: consider splitting?
1364 if (UnavailablePred->succ_size() != 1)
1365 return;
1367 // If VL can be less than AVL, then we can't reduce the frequency of exec.
1368 if (!willVLBeAVL(AvailableInfo, *ST))
1369 return;
1371 // Model the effect of changing the input state of the block MBB to
1372 // AvailableInfo. We're looking for two issues here; one legality,
1373 // one profitability.
1374 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1375 // may hit the end of the block with a different end state. We can
1376 // not make this change without reflowing later blocks as well.
1377 // 2) If we don't actually remove a transition, inserting a vsetvli
1378 // into the predecessor block would be correct, but unprofitable.
1379 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1380 VSETVLIInfo CurInfo = AvailableInfo;
1381 int TransitionsRemoved = 0;
1382 for (const MachineInstr &MI : MBB) {
1383 const VSETVLIInfo LastInfo = CurInfo;
1384 const VSETVLIInfo LastOldInfo = OldInfo;
1385 transferBefore(CurInfo, MI);
1386 transferBefore(OldInfo, MI);
1387 if (CurInfo == LastInfo)
1388 TransitionsRemoved++;
1389 if (LastOldInfo == OldInfo)
1390 TransitionsRemoved--;
1391 transferAfter(CurInfo, MI);
1392 transferAfter(OldInfo, MI);
1393 if (CurInfo == OldInfo)
1394 // Convergence. All transitions after this must match by construction.
1395 break;
1397 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1398 // Issues 1 and 2 above
1399 return;
1401 // Finally, update both data flow state and insert the actual vsetvli.
1402 // Doing both keeps the code in sync with the dataflow results, which
1403 // is critical for correctness of phase 3.
1404 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1405 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1406 << UnavailablePred->getName() << " with state "
1407 << AvailableInfo << "\n");
1408 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1409 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1411 // Note there's an implicit assumption here that terminators never use
1412 // or modify VL or VTYPE. Also, fallthrough will return end().
1413 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1414 insertVSETVLI(*UnavailablePred, InsertPt,
1415 UnavailablePred->findDebugLoc(InsertPt),
1416 AvailableInfo, OldExit);
1419 static void doUnion(DemandedFields &A, DemandedFields B) {
1420 A.VLAny |= B.VLAny;
1421 A.VLZeroness |= B.VLZeroness;
1422 A.SEW = std::max(A.SEW, B.SEW);
1423 A.LMUL |= B.LMUL;
1424 A.SEWLMULRatio |= B.SEWLMULRatio;
1425 A.TailPolicy |= B.TailPolicy;
1426 A.MaskPolicy |= B.MaskPolicy;
1429 static bool isNonZeroAVL(const MachineOperand &MO) {
1430 if (MO.isReg())
1431 return RISCV::X0 == MO.getReg();
1432 assert(MO.isImm());
1433 return 0 != MO.getImm();
1436 // Return true if we can mutate PrevMI to match MI without changing any the
1437 // fields which would be observed.
1438 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1439 const MachineInstr &MI,
1440 const DemandedFields &Used) {
1441 // If the VL values aren't equal, return false if either a) the former is
1442 // demanded, or b) we can't rewrite the former to be the later for
1443 // implementation reasons.
1444 if (!isVLPreservingConfig(MI)) {
1445 if (Used.VLAny)
1446 return false;
1448 // We don't bother to handle the equally zero case here as it's largely
1449 // uninteresting.
1450 if (Used.VLZeroness) {
1451 if (isVLPreservingConfig(PrevMI))
1452 return false;
1453 if (!isNonZeroAVL(MI.getOperand(1)) ||
1454 !isNonZeroAVL(PrevMI.getOperand(1)))
1455 return false;
1458 // TODO: Track whether the register is defined between
1459 // PrevMI and MI.
1460 if (MI.getOperand(1).isReg() &&
1461 RISCV::X0 != MI.getOperand(1).getReg())
1462 return false;
1465 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1466 return false;
1468 auto PriorVType = PrevMI.getOperand(2).getImm();
1469 auto VType = MI.getOperand(2).getImm();
1470 return areCompatibleVTYPEs(PriorVType, VType, Used);
1473 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1474 MachineInstr *NextMI = nullptr;
1475 // We can have arbitrary code in successors, so VL and VTYPE
1476 // must be considered demanded.
1477 DemandedFields Used;
1478 Used.demandVL();
1479 Used.demandVTYPE();
1480 SmallVector<MachineInstr*> ToDelete;
1481 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1483 if (!isVectorConfigInstr(MI)) {
1484 doUnion(Used, getDemanded(MI, MRI, ST));
1485 continue;
1488 Register VRegDef = MI.getOperand(0).getReg();
1489 if (VRegDef != RISCV::X0 &&
1490 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1491 Used.demandVL();
1493 if (NextMI) {
1494 if (!Used.usedVL() && !Used.usedVTYPE()) {
1495 ToDelete.push_back(&MI);
1496 // Leave NextMI unchanged
1497 continue;
1498 } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
1499 if (!isVLPreservingConfig(*NextMI)) {
1500 MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1501 MI.getOperand(0).setIsDead(false);
1502 if (NextMI->getOperand(1).isImm())
1503 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1504 else
1505 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1506 MI.setDesc(NextMI->getDesc());
1508 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1509 ToDelete.push_back(NextMI);
1510 // fallthrough
1513 NextMI = &MI;
1514 Used = getDemanded(MI, MRI, ST);
1517 for (auto *MI : ToDelete)
1518 MI->eraseFromParent();
1521 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1522 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1523 MachineInstr &MI = *I++;
1524 if (RISCV::isFaultFirstLoad(MI)) {
1525 Register VLOutput = MI.getOperand(1).getReg();
1526 if (!MRI->use_nodbg_empty(VLOutput))
1527 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1528 VLOutput);
1529 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1530 MI.getOperand(1).setReg(RISCV::X0);
1535 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1536 // Skip if the vector extension is not enabled.
1537 ST = &MF.getSubtarget<RISCVSubtarget>();
1538 if (!ST->hasVInstructions())
1539 return false;
1541 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1543 TII = ST->getInstrInfo();
1544 MRI = &MF.getRegInfo();
1546 assert(BlockInfo.empty() && "Expect empty block infos");
1547 BlockInfo.resize(MF.getNumBlockIDs());
1549 bool HaveVectorOp = false;
1551 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1552 for (const MachineBasicBlock &MBB : MF) {
1553 VSETVLIInfo TmpStatus;
1554 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1555 // Initial exit state is whatever change we found in the block.
1556 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1557 BBInfo.Exit = TmpStatus;
1558 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1559 << " is " << BBInfo.Exit << "\n");
1563 // If we didn't find any instructions that need VSETVLI, we're done.
1564 if (!HaveVectorOp) {
1565 BlockInfo.clear();
1566 return false;
1569 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1570 // blocks to the list here, but will also add any that need to be revisited
1571 // during Phase 2 processing.
1572 for (const MachineBasicBlock &MBB : MF) {
1573 WorkList.push(&MBB);
1574 BlockInfo[MBB.getNumber()].InQueue = true;
1576 while (!WorkList.empty()) {
1577 const MachineBasicBlock &MBB = *WorkList.front();
1578 WorkList.pop();
1579 computeIncomingVLVTYPE(MBB);
1582 // Perform partial redundancy elimination of vsetvli transitions.
1583 for (MachineBasicBlock &MBB : MF)
1584 doPRE(MBB);
1586 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1587 // Phase 2 information to avoid adding vsetvlis before the first vector
1588 // instruction in the block if the VL/VTYPE is satisfied by its
1589 // predecessors.
1590 for (MachineBasicBlock &MBB : MF)
1591 emitVSETVLIs(MBB);
1593 // Now that all vsetvlis are explicit, go through and do block local
1594 // DSE and peephole based demanded fields based transforms. Note that
1595 // this *must* be done outside the main dataflow so long as we allow
1596 // any cross block analysis within the dataflow. We can't have both
1597 // demanded fields based mutation and non-local analysis in the
1598 // dataflow at the same time without introducing inconsistencies.
1599 for (MachineBasicBlock &MBB : MF)
1600 doLocalPostpass(MBB);
1602 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1603 // of VLEFF/VLSEGFF.
1604 for (MachineBasicBlock &MBB : MF)
1605 insertReadVL(MBB);
1607 BlockInfo.clear();
1608 return HaveVectorOp;
1611 /// Returns an instance of the Insert VSETVLI pass.
1612 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1613 return new RISCVInsertVSETVLI();