[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata (#90217)
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVInsertVSETVLI.cpp
blob7a8ff84995ead712b9e50ee5bfe170be97646084
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
13 // This pass consists of 3 phases:
15 // Phase 1 collects how each basic block affects VL/VTYPE.
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
25 //===----------------------------------------------------------------------===//
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveDebugVariables.h"
31 #include "llvm/CodeGen/LiveIntervals.h"
32 #include "llvm/CodeGen/LiveStacks.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include <queue>
35 using namespace llvm;
37 #define DEBUG_TYPE "riscv-insert-vsetvli"
38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39 #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
41 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
44 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
48 namespace {
50 static unsigned getVLOpNum(const MachineInstr &MI) {
51 return RISCVII::getVLOpNum(MI.getDesc());
54 static unsigned getSEWOpNum(const MachineInstr &MI) {
55 return RISCVII::getSEWOpNum(MI.getDesc());
58 static bool isVectorConfigInstr(const MachineInstr &MI) {
59 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
60 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
61 MI.getOpcode() == RISCV::PseudoVSETIVLI;
64 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
65 /// VL and only sets VTYPE.
66 static bool isVLPreservingConfig(const MachineInstr &MI) {
67 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
68 return false;
69 assert(RISCV::X0 == MI.getOperand(1).getReg());
70 return RISCV::X0 == MI.getOperand(0).getReg();
73 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
74 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
75 default:
76 return false;
77 case RISCV::VFMV_S_F:
78 case RISCV::VFMV_V_F:
79 return true;
83 static bool isScalarExtractInstr(const MachineInstr &MI) {
84 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
85 default:
86 return false;
87 case RISCV::VMV_X_S:
88 case RISCV::VFMV_F_S:
89 return true;
93 static bool isScalarInsertInstr(const MachineInstr &MI) {
94 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
95 default:
96 return false;
97 case RISCV::VMV_S_X:
98 case RISCV::VFMV_S_F:
99 return true;
103 static bool isScalarSplatInstr(const MachineInstr &MI) {
104 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
105 default:
106 return false;
107 case RISCV::VMV_V_I:
108 case RISCV::VMV_V_X:
109 case RISCV::VFMV_V_F:
110 return true;
114 static bool isVSlideInstr(const MachineInstr &MI) {
115 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
116 default:
117 return false;
118 case RISCV::VSLIDEDOWN_VX:
119 case RISCV::VSLIDEDOWN_VI:
120 case RISCV::VSLIDEUP_VX:
121 case RISCV::VSLIDEUP_VI:
122 return true;
126 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
127 /// not a load or store which ignores SEW.
128 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
129 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
130 default:
131 return std::nullopt;
132 case RISCV::VLE8_V:
133 case RISCV::VLSE8_V:
134 case RISCV::VSE8_V:
135 case RISCV::VSSE8_V:
136 return 8;
137 case RISCV::VLE16_V:
138 case RISCV::VLSE16_V:
139 case RISCV::VSE16_V:
140 case RISCV::VSSE16_V:
141 return 16;
142 case RISCV::VLE32_V:
143 case RISCV::VLSE32_V:
144 case RISCV::VSE32_V:
145 case RISCV::VSSE32_V:
146 return 32;
147 case RISCV::VLE64_V:
148 case RISCV::VLSE64_V:
149 case RISCV::VSE64_V:
150 case RISCV::VSSE64_V:
151 return 64;
155 static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
156 return MI.getOpcode() == RISCV::ADDI &&
157 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
158 MI.getOperand(1).getReg() == RISCV::X0 &&
159 MI.getOperand(2).getImm() != 0;
162 /// Return true if this is an operation on mask registers. Note that
163 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
164 static bool isMaskRegOp(const MachineInstr &MI) {
165 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
166 return false;
167 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
168 // A Log2SEW of 0 is an operation on mask registers only.
169 return Log2SEW == 0;
172 /// Return true if the inactive elements in the result are entirely undefined.
173 /// Note that this is different from "agnostic" as defined by the vector
174 /// specification. Agnostic requires each lane to either be undisturbed, or
175 /// take the value -1; no other value is allowed.
176 static bool hasUndefinedMergeOp(const MachineInstr &MI) {
178 unsigned UseOpIdx;
179 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
180 // If there is no passthrough operand, then the pass through
181 // lanes are undefined.
182 return true;
184 // All undefined passthrus should be $noreg: see
185 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
186 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
187 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
190 /// Which subfields of VL or VTYPE have values we need to preserve?
191 struct DemandedFields {
192 // Some unknown property of VL is used. If demanded, must preserve entire
193 // value.
194 bool VLAny = false;
195 // Only zero vs non-zero is used. If demanded, can change non-zero values.
196 bool VLZeroness = false;
197 // What properties of SEW we need to preserve.
198 enum : uint8_t {
199 SEWEqual = 3, // The exact value of SEW needs to be preserved.
200 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
201 // than or equal to the original value.
202 SEWGreaterThanOrEqualAndLessThan64 =
203 1, // SEW can be changed as long as it's greater
204 // than or equal to the original value, but must be less
205 // than 64.
206 SEWNone = 0 // We don't need to preserve SEW at all.
207 } SEW = SEWNone;
208 bool LMUL = false;
209 bool SEWLMULRatio = false;
210 bool TailPolicy = false;
211 bool MaskPolicy = false;
213 // Return true if any part of VTYPE was used
214 bool usedVTYPE() const {
215 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
218 // Return true if any property of VL was used
219 bool usedVL() {
220 return VLAny || VLZeroness;
223 // Mark all VTYPE subfields and properties as demanded
224 void demandVTYPE() {
225 SEW = SEWEqual;
226 LMUL = true;
227 SEWLMULRatio = true;
228 TailPolicy = true;
229 MaskPolicy = true;
232 // Mark all VL properties as demanded
233 void demandVL() {
234 VLAny = true;
235 VLZeroness = true;
238 // Make this the result of demanding both the fields in this and B.
239 void doUnion(const DemandedFields &B) {
240 VLAny |= B.VLAny;
241 VLZeroness |= B.VLZeroness;
242 SEW = std::max(SEW, B.SEW);
243 LMUL |= B.LMUL;
244 SEWLMULRatio |= B.SEWLMULRatio;
245 TailPolicy |= B.TailPolicy;
246 MaskPolicy |= B.MaskPolicy;
249 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
250 /// Support for debugging, callable in GDB: V->dump()
251 LLVM_DUMP_METHOD void dump() const {
252 print(dbgs());
253 dbgs() << "\n";
256 /// Implement operator<<.
257 void print(raw_ostream &OS) const {
258 OS << "{";
259 OS << "VLAny=" << VLAny << ", ";
260 OS << "VLZeroness=" << VLZeroness << ", ";
261 OS << "SEW=";
262 switch (SEW) {
263 case SEWEqual:
264 OS << "SEWEqual";
265 break;
266 case SEWGreaterThanOrEqual:
267 OS << "SEWGreaterThanOrEqual";
268 break;
269 case SEWGreaterThanOrEqualAndLessThan64:
270 OS << "SEWGreaterThanOrEqualAndLessThan64";
271 break;
272 case SEWNone:
273 OS << "SEWNone";
274 break;
276 OS << ", ";
277 OS << "LMUL=" << LMUL << ", ";
278 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
279 OS << "TailPolicy=" << TailPolicy << ", ";
280 OS << "MaskPolicy=" << MaskPolicy;
281 OS << "}";
283 #endif
286 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
287 LLVM_ATTRIBUTE_USED
288 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
289 DF.print(OS);
290 return OS;
292 #endif
294 /// Return true if moving from CurVType to NewVType is
295 /// indistinguishable from the perspective of an instruction (or set
296 /// of instructions) which use only the Used subfields and properties.
297 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
298 const DemandedFields &Used) {
299 switch (Used.SEW) {
300 case DemandedFields::SEWNone:
301 break;
302 case DemandedFields::SEWEqual:
303 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
304 return false;
305 break;
306 case DemandedFields::SEWGreaterThanOrEqual:
307 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
308 return false;
309 break;
310 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
311 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
312 RISCVVType::getSEW(NewVType) >= 64)
313 return false;
314 break;
317 if (Used.LMUL &&
318 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
319 return false;
321 if (Used.SEWLMULRatio) {
322 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
323 RISCVVType::getVLMUL(CurVType));
324 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
325 RISCVVType::getVLMUL(NewVType));
326 if (Ratio1 != Ratio2)
327 return false;
330 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
331 RISCVVType::isTailAgnostic(NewVType))
332 return false;
333 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
334 RISCVVType::isMaskAgnostic(NewVType))
335 return false;
336 return true;
339 /// Return the fields and properties demanded by the provided instruction.
340 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
341 // Warning: This function has to work on both the lowered (i.e. post
342 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
343 // that it can't use the value of a SEW, VL, or Policy operand as they might
344 // be stale after lowering.
346 // Most instructions don't use any of these subfeilds.
347 DemandedFields Res;
348 // Start conservative if registers are used
349 if (MI.isCall() || MI.isInlineAsm() ||
350 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
351 Res.demandVL();
352 if (MI.isCall() || MI.isInlineAsm() ||
353 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
354 Res.demandVTYPE();
355 // Start conservative on the unlowered form too
356 uint64_t TSFlags = MI.getDesc().TSFlags;
357 if (RISCVII::hasSEWOp(TSFlags)) {
358 Res.demandVTYPE();
359 if (RISCVII::hasVLOp(TSFlags))
360 Res.demandVL();
362 // Behavior is independent of mask policy.
363 if (!RISCVII::usesMaskPolicy(TSFlags))
364 Res.MaskPolicy = false;
367 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
368 // They instead demand the ratio of the two which is used in computing
369 // EMUL, but which allows us the flexibility to change SEW and LMUL
370 // provided we don't change the ratio.
371 // Note: We assume that the instructions initial SEW is the EEW encoded
372 // in the opcode. This is asserted when constructing the VSETVLIInfo.
373 if (getEEWForLoadStore(MI)) {
374 Res.SEW = DemandedFields::SEWNone;
375 Res.LMUL = false;
378 // Store instructions don't use the policy fields.
379 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
380 Res.TailPolicy = false;
381 Res.MaskPolicy = false;
384 // If this is a mask reg operation, it only cares about VLMAX.
385 // TODO: Possible extensions to this logic
386 // * Probably ok if available VLMax is larger than demanded
387 // * The policy bits can probably be ignored..
388 if (isMaskRegOp(MI)) {
389 Res.SEW = DemandedFields::SEWNone;
390 Res.LMUL = false;
393 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
394 if (isScalarInsertInstr(MI)) {
395 Res.LMUL = false;
396 Res.SEWLMULRatio = false;
397 Res.VLAny = false;
398 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
399 // need to preserve any other bits and are thus compatible with any larger,
400 // etype and can disregard policy bits. Warning: It's tempting to try doing
401 // this for any tail agnostic operation, but we can't as TA requires
402 // tail lanes to either be the original value or -1. We are writing
403 // unknown bits to the lanes here.
404 if (hasUndefinedMergeOp(MI)) {
405 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
406 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
407 else
408 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
409 Res.TailPolicy = false;
413 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
414 if (isScalarExtractInstr(MI)) {
415 assert(!RISCVII::hasVLOp(TSFlags));
416 Res.LMUL = false;
417 Res.SEWLMULRatio = false;
418 Res.TailPolicy = false;
419 Res.MaskPolicy = false;
422 return Res;
425 /// Defines the abstract state with which the forward dataflow models the
426 /// values of the VL and VTYPE registers after insertion.
427 class VSETVLIInfo {
428 struct AVLDef {
429 const MachineInstr *DefMI;
430 Register DefReg;
432 union {
433 AVLDef AVLRegDef;
434 unsigned AVLImm;
437 enum : uint8_t {
438 Uninitialized,
439 AVLIsReg,
440 AVLIsImm,
441 AVLIsVLMAX,
442 AVLIsIgnored,
443 Unknown,
444 } State = Uninitialized;
446 // Fields from VTYPE.
447 RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
448 uint8_t SEW = 0;
449 uint8_t TailAgnostic : 1;
450 uint8_t MaskAgnostic : 1;
451 uint8_t SEWLMULRatioOnly : 1;
453 public:
454 VSETVLIInfo()
455 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
456 SEWLMULRatioOnly(false) {}
458 static VSETVLIInfo getUnknown() {
459 VSETVLIInfo Info;
460 Info.setUnknown();
461 return Info;
464 bool isValid() const { return State != Uninitialized; }
465 void setUnknown() { State = Unknown; }
466 bool isUnknown() const { return State == Unknown; }
468 void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) {
469 assert(DefMI && AVLReg.isVirtual());
470 AVLRegDef.DefMI = DefMI;
471 AVLRegDef.DefReg = AVLReg;
472 State = AVLIsReg;
475 void setAVLImm(unsigned Imm) {
476 AVLImm = Imm;
477 State = AVLIsImm;
480 void setAVLVLMAX() { State = AVLIsVLMAX; }
482 void setAVLIgnored() { State = AVLIsIgnored; }
484 bool hasAVLImm() const { return State == AVLIsImm; }
485 bool hasAVLReg() const { return State == AVLIsReg; }
486 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
487 bool hasAVLIgnored() const { return State == AVLIsIgnored; }
488 Register getAVLReg() const {
489 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
490 return AVLRegDef.DefReg;
492 unsigned getAVLImm() const {
493 assert(hasAVLImm());
494 return AVLImm;
496 const MachineInstr &getAVLDefMI() const {
497 assert(hasAVLReg() && AVLRegDef.DefMI);
498 return *AVLRegDef.DefMI;
501 void setAVL(VSETVLIInfo Info) {
502 assert(Info.isValid());
503 if (Info.isUnknown())
504 setUnknown();
505 else if (Info.hasAVLReg())
506 setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg());
507 else if (Info.hasAVLVLMAX())
508 setAVLVLMAX();
509 else if (Info.hasAVLIgnored())
510 setAVLIgnored();
511 else {
512 assert(Info.hasAVLImm());
513 setAVLImm(Info.getAVLImm());
517 unsigned getSEW() const { return SEW; }
518 RISCVII::VLMUL getVLMUL() const { return VLMul; }
519 bool getTailAgnostic() const { return TailAgnostic; }
520 bool getMaskAgnostic() const { return MaskAgnostic; }
522 bool hasNonZeroAVL() const {
523 if (hasAVLImm())
524 return getAVLImm() > 0;
525 if (hasAVLReg())
526 return isNonZeroLoadImmediate(getAVLDefMI());
527 if (hasAVLVLMAX())
528 return true;
529 if (hasAVLIgnored())
530 return false;
531 return false;
534 bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
535 if (hasSameAVL(Other))
536 return true;
537 return (hasNonZeroAVL() && Other.hasNonZeroAVL());
540 bool hasSameAVL(const VSETVLIInfo &Other) const {
541 if (hasAVLReg() && Other.hasAVLReg())
542 return AVLRegDef.DefMI == Other.AVLRegDef.DefMI &&
543 AVLRegDef.DefReg == Other.AVLRegDef.DefReg;
545 if (hasAVLImm() && Other.hasAVLImm())
546 return getAVLImm() == Other.getAVLImm();
548 if (hasAVLVLMAX())
549 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
551 if (hasAVLIgnored())
552 return Other.hasAVLIgnored();
554 return false;
557 void setVTYPE(unsigned VType) {
558 assert(isValid() && !isUnknown() &&
559 "Can't set VTYPE for uninitialized or unknown");
560 VLMul = RISCVVType::getVLMUL(VType);
561 SEW = RISCVVType::getSEW(VType);
562 TailAgnostic = RISCVVType::isTailAgnostic(VType);
563 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
565 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
566 assert(isValid() && !isUnknown() &&
567 "Can't set VTYPE for uninitialized or unknown");
568 VLMul = L;
569 SEW = S;
570 TailAgnostic = TA;
571 MaskAgnostic = MA;
574 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
576 unsigned encodeVTYPE() const {
577 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
578 "Can't encode VTYPE for uninitialized or unknown");
579 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
582 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
584 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
585 assert(isValid() && Other.isValid() &&
586 "Can't compare invalid VSETVLIInfos");
587 assert(!isUnknown() && !Other.isUnknown() &&
588 "Can't compare VTYPE in unknown state");
589 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
590 "Can't compare when only LMUL/SEW ratio is valid.");
591 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
592 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
593 Other.MaskAgnostic);
596 unsigned getSEWLMULRatio() const {
597 assert(isValid() && !isUnknown() &&
598 "Can't use VTYPE for uninitialized or unknown");
599 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
602 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
603 // Note that having the same VLMAX ensures that both share the same
604 // function from AVL to VL; that is, they must produce the same VL value
605 // for any given AVL value.
606 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
607 assert(isValid() && Other.isValid() &&
608 "Can't compare invalid VSETVLIInfos");
609 assert(!isUnknown() && !Other.isUnknown() &&
610 "Can't compare VTYPE in unknown state");
611 return getSEWLMULRatio() == Other.getSEWLMULRatio();
614 bool hasCompatibleVTYPE(const DemandedFields &Used,
615 const VSETVLIInfo &Require) const {
616 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
619 // Determine whether the vector instructions requirements represented by
620 // Require are compatible with the previous vsetvli instruction represented
621 // by this. MI is the instruction whose requirements we're considering.
622 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
623 const MachineRegisterInfo &MRI) const {
624 assert(isValid() && Require.isValid() &&
625 "Can't compare invalid VSETVLIInfos");
626 assert(!Require.SEWLMULRatioOnly &&
627 "Expected a valid VTYPE for instruction!");
628 // Nothing is compatible with Unknown.
629 if (isUnknown() || Require.isUnknown())
630 return false;
632 // If only our VLMAX ratio is valid, then this isn't compatible.
633 if (SEWLMULRatioOnly)
634 return false;
636 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
637 return false;
639 if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
640 return false;
642 return hasCompatibleVTYPE(Used, Require);
645 bool operator==(const VSETVLIInfo &Other) const {
646 // Uninitialized is only equal to another Uninitialized.
647 if (!isValid())
648 return !Other.isValid();
649 if (!Other.isValid())
650 return !isValid();
652 // Unknown is only equal to another Unknown.
653 if (isUnknown())
654 return Other.isUnknown();
655 if (Other.isUnknown())
656 return isUnknown();
658 if (!hasSameAVL(Other))
659 return false;
661 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
662 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
663 return false;
665 // If only the VLMAX is valid, check that it is the same.
666 if (SEWLMULRatioOnly)
667 return hasSameVLMAX(Other);
669 // If the full VTYPE is valid, check that it is the same.
670 return hasSameVTYPE(Other);
673 bool operator!=(const VSETVLIInfo &Other) const {
674 return !(*this == Other);
677 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
678 // both predecessors.
679 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
680 // If the new value isn't valid, ignore it.
681 if (!Other.isValid())
682 return *this;
684 // If this value isn't valid, this must be the first predecessor, use it.
685 if (!isValid())
686 return Other;
688 // If either is unknown, the result is unknown.
689 if (isUnknown() || Other.isUnknown())
690 return VSETVLIInfo::getUnknown();
692 // If we have an exact, match return this.
693 if (*this == Other)
694 return *this;
696 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
697 // return an SEW/LMUL ratio only value.
698 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
699 VSETVLIInfo MergeInfo = *this;
700 MergeInfo.SEWLMULRatioOnly = true;
701 return MergeInfo;
704 // Otherwise the result is unknown.
705 return VSETVLIInfo::getUnknown();
708 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
709 /// Support for debugging, callable in GDB: V->dump()
710 LLVM_DUMP_METHOD void dump() const {
711 print(dbgs());
712 dbgs() << "\n";
715 /// Implement operator<<.
716 /// @{
717 void print(raw_ostream &OS) const {
718 OS << "{";
719 if (!isValid())
720 OS << "Uninitialized";
721 if (isUnknown())
722 OS << "unknown";
723 if (hasAVLReg())
724 OS << "AVLReg=" << (unsigned)getAVLReg();
725 if (hasAVLImm())
726 OS << "AVLImm=" << (unsigned)AVLImm;
727 if (hasAVLVLMAX())
728 OS << "AVLVLMAX";
729 if (hasAVLIgnored())
730 OS << "AVLIgnored";
731 OS << ", "
732 << "VLMul=" << (unsigned)VLMul << ", "
733 << "SEW=" << (unsigned)SEW << ", "
734 << "TailAgnostic=" << (bool)TailAgnostic << ", "
735 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
736 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
738 #endif
741 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
742 LLVM_ATTRIBUTE_USED
743 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
744 V.print(OS);
745 return OS;
747 #endif
749 struct BlockData {
750 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
751 // block. Calculated in Phase 2.
752 VSETVLIInfo Exit;
754 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
755 // blocks. Calculated in Phase 2, and used by Phase 3.
756 VSETVLIInfo Pred;
758 // Keeps track of whether the block is already in the queue.
759 bool InQueue = false;
761 BlockData() = default;
764 class RISCVInsertVSETVLI : public MachineFunctionPass {
765 const RISCVSubtarget *ST;
766 const TargetInstrInfo *TII;
767 MachineRegisterInfo *MRI;
769 std::vector<BlockData> BlockInfo;
770 std::queue<const MachineBasicBlock *> WorkList;
772 public:
773 static char ID;
775 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
776 bool runOnMachineFunction(MachineFunction &MF) override;
778 void getAnalysisUsage(AnalysisUsage &AU) const override {
779 AU.setPreservesCFG();
780 MachineFunctionPass::getAnalysisUsage(AU);
783 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
785 private:
786 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
787 const VSETVLIInfo &CurInfo) const;
788 bool needVSETVLIPHI(const VSETVLIInfo &Require,
789 const MachineBasicBlock &MBB) const;
790 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
791 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
792 void insertVSETVLI(MachineBasicBlock &MBB,
793 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
794 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
796 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
797 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
798 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
799 VSETVLIInfo &Info) const;
800 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
801 void emitVSETVLIs(MachineBasicBlock &MBB);
802 void doPRE(MachineBasicBlock &MBB);
803 void insertReadVL(MachineBasicBlock &MBB);
806 class RISCVCoalesceVSETVLI : public MachineFunctionPass {
807 public:
808 static char ID;
809 const RISCVSubtarget *ST;
810 const TargetInstrInfo *TII;
811 MachineRegisterInfo *MRI;
812 LiveIntervals *LIS;
814 RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
815 bool runOnMachineFunction(MachineFunction &MF) override;
817 void getAnalysisUsage(AnalysisUsage &AU) const override {
818 AU.setPreservesCFG();
820 AU.addRequired<LiveIntervals>();
821 AU.addPreserved<LiveIntervals>();
822 AU.addRequired<SlotIndexes>();
823 AU.addPreserved<SlotIndexes>();
824 AU.addPreserved<LiveDebugVariables>();
825 AU.addPreserved<LiveStacks>();
827 MachineFunctionPass::getAnalysisUsage(AU);
830 StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
832 private:
833 bool coalesceVSETVLIs(MachineBasicBlock &MBB);
836 } // end anonymous namespace
838 char RISCVInsertVSETVLI::ID = 0;
840 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
841 false, false)
843 char RISCVCoalesceVSETVLI::ID = 0;
845 INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
846 RISCV_COALESCE_VSETVLI_NAME, false, false)
848 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
849 // VSETIVLI instruction.
850 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
851 const MachineRegisterInfo &MRI) {
852 VSETVLIInfo NewInfo;
853 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
854 NewInfo.setAVLImm(MI.getOperand(1).getImm());
855 } else {
856 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
857 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
858 Register AVLReg = MI.getOperand(1).getReg();
859 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
860 "Can't handle X0, X0 vsetvli yet");
861 if (AVLReg == RISCV::X0)
862 NewInfo.setAVLVLMAX();
863 else
864 NewInfo.setAVLRegDef(MRI.getUniqueVRegDef(AVLReg), AVLReg);
866 NewInfo.setVTYPE(MI.getOperand(2).getImm());
868 return NewInfo;
871 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
872 RISCVII::VLMUL VLMul) {
873 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
874 if (Fractional)
875 VLEN = VLEN / LMul;
876 else
877 VLEN = VLEN * LMul;
878 return VLEN/SEW;
881 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
882 const RISCVSubtarget &ST,
883 const MachineRegisterInfo *MRI) {
884 VSETVLIInfo InstrInfo;
886 bool TailAgnostic = true;
887 bool MaskAgnostic = true;
888 if (!hasUndefinedMergeOp(MI)) {
889 // Start with undisturbed.
890 TailAgnostic = false;
891 MaskAgnostic = false;
893 // If there is a policy operand, use it.
894 if (RISCVII::hasVecPolicyOp(TSFlags)) {
895 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
896 uint64_t Policy = Op.getImm();
897 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
898 "Invalid Policy Value");
899 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
900 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
903 // Some pseudo instructions force a tail agnostic policy despite having a
904 // tied def.
905 if (RISCVII::doesForceTailAgnostic(TSFlags))
906 TailAgnostic = true;
908 if (!RISCVII::usesMaskPolicy(TSFlags))
909 MaskAgnostic = true;
912 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
914 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
915 // A Log2SEW of 0 is an operation on mask registers only.
916 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
917 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
919 if (RISCVII::hasVLOp(TSFlags)) {
920 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
921 if (VLOp.isImm()) {
922 int64_t Imm = VLOp.getImm();
923 // Conver the VLMax sentintel to X0 register.
924 if (Imm == RISCV::VLMaxSentinel) {
925 // If we know the exact VLEN, see if we can use the constant encoding
926 // for the VLMAX instead. This reduces register pressure slightly.
927 const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
928 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
929 InstrInfo.setAVLImm(VLMAX);
930 else
931 InstrInfo.setAVLVLMAX();
933 else
934 InstrInfo.setAVLImm(Imm);
935 } else {
936 InstrInfo.setAVLRegDef(MRI->getUniqueVRegDef(VLOp.getReg()),
937 VLOp.getReg());
939 } else {
940 assert(isScalarExtractInstr(MI));
941 // TODO: If we are more clever about x0,x0 insertion then we should be able
942 // to deduce that the VL is ignored based off of DemandedFields, and remove
943 // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
944 InstrInfo.setAVLIgnored();
946 #ifndef NDEBUG
947 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
948 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
950 #endif
951 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
953 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
954 // AVL operand with the AVL of the defining vsetvli. We avoid general
955 // register AVLs to avoid extending live ranges without being sure we can
956 // kill the original source reg entirely.
957 if (InstrInfo.hasAVLReg()) {
958 const MachineInstr &DefMI = InstrInfo.getAVLDefMI();
959 if (isVectorConfigInstr(DefMI)) {
960 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI);
961 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
962 (DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
963 InstrInfo.setAVL(DefInstrInfo);
967 return InstrInfo;
970 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
971 const VSETVLIInfo &Info,
972 const VSETVLIInfo &PrevInfo) {
973 DebugLoc DL = MI.getDebugLoc();
974 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
977 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
978 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
979 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
981 ++NumInsertedVSETVL;
982 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
983 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
984 // VLMAX.
985 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
986 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
987 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
988 .addReg(RISCV::X0, RegState::Kill)
989 .addImm(Info.encodeVTYPE())
990 .addReg(RISCV::VL, RegState::Implicit);
991 return;
994 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
995 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
996 // same, we can use the X0, X0 form.
997 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
998 const MachineInstr &DefMI = Info.getAVLDefMI();
999 if (isVectorConfigInstr(DefMI)) {
1000 VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
1001 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1002 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1003 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1004 .addReg(RISCV::X0, RegState::Kill)
1005 .addImm(Info.encodeVTYPE())
1006 .addReg(RISCV::VL, RegState::Implicit);
1007 return;
1013 if (Info.hasAVLImm()) {
1014 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1015 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1016 .addImm(Info.getAVLImm())
1017 .addImm(Info.encodeVTYPE());
1018 return;
1021 if (Info.hasAVLIgnored()) {
1022 // We can only use x0, x0 if there's no chance of the vtype change causing
1023 // the previous vl to become invalid.
1024 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1025 Info.hasSameVLMAX(PrevInfo)) {
1026 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1027 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1028 .addReg(RISCV::X0, RegState::Kill)
1029 .addImm(Info.encodeVTYPE())
1030 .addReg(RISCV::VL, RegState::Implicit);
1031 return;
1033 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1034 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1035 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1036 .addImm(1)
1037 .addImm(Info.encodeVTYPE());
1038 return;
1041 if (Info.hasAVLVLMAX()) {
1042 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1043 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1044 .addReg(DestReg, RegState::Define | RegState::Dead)
1045 .addReg(RISCV::X0, RegState::Kill)
1046 .addImm(Info.encodeVTYPE());
1047 return;
1050 Register AVLReg = Info.getAVLReg();
1051 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1052 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1053 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1054 .addReg(AVLReg)
1055 .addImm(Info.encodeVTYPE());
1058 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
1059 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
1060 return Fractional || LMul == 1;
1063 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1064 /// before MI.
1065 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1066 const VSETVLIInfo &Require,
1067 const VSETVLIInfo &CurInfo) const {
1068 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
1070 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1071 return true;
1073 DemandedFields Used = getDemanded(MI, ST);
1075 // A slidedown/slideup with an *undefined* merge op can freely clobber
1076 // elements not copied from the source vector (e.g. masked off, tail, or
1077 // slideup's prefix). Notes:
1078 // * We can't modify SEW here since the slide amount is in units of SEW.
1079 // * VL=1 is special only because we have existing support for zero vs
1080 // non-zero VL. We could generalize this if we had a VL > C predicate.
1081 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1082 // * As above, this is only legal for tail "undefined" not "agnostic".
1083 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1084 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI)) {
1085 Used.VLAny = false;
1086 Used.VLZeroness = true;
1087 Used.LMUL = false;
1088 Used.TailPolicy = false;
1091 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1092 // semantically as vmv.s.x. This is particularly useful since we don't have an
1093 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1094 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1095 // increase the number of active vector registers (unlike for vmv.s.x.)
1096 if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
1097 Require.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo.getVLMUL()) &&
1098 hasUndefinedMergeOp(MI)) {
1099 Used.LMUL = false;
1100 Used.SEWLMULRatio = false;
1101 Used.VLAny = false;
1102 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1103 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1104 else
1105 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1106 Used.TailPolicy = false;
1109 if (CurInfo.isCompatible(Used, Require, *MRI))
1110 return false;
1112 // We didn't find a compatible value. If our AVL is a virtual register,
1113 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1114 // and the last VL/VTYPE we observed is the same, we don't need a
1115 // VSETVLI here.
1116 if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
1117 const MachineInstr &DefMI = Require.getAVLDefMI();
1118 if (isVectorConfigInstr(DefMI)) {
1119 VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
1120 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1121 return false;
1125 return true;
1128 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1129 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1130 // places.
1131 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1132 DemandedFields &Demanded) {
1133 VSETVLIInfo Info = NewInfo;
1135 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1136 !PrevInfo.isUnknown()) {
1137 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1138 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1139 Info.setVLMul(*NewVLMul);
1140 Demanded.LMUL = true;
1143 return Info;
1146 // Given an incoming state reaching MI, minimally modifies that state so that it
1147 // is compatible with MI. The resulting state is guaranteed to be semantically
1148 // legal for MI, but may not be the state requested by MI.
1149 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1150 const MachineInstr &MI) const {
1151 uint64_t TSFlags = MI.getDesc().TSFlags;
1152 if (!RISCVII::hasSEWOp(TSFlags))
1153 return;
1155 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
1156 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1157 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1158 return;
1160 const VSETVLIInfo PrevInfo = Info;
1161 if (!Info.isValid() || Info.isUnknown())
1162 Info = NewInfo;
1164 DemandedFields Demanded = getDemanded(MI, ST);
1165 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1167 // If MI only demands that VL has the same zeroness, we only need to set the
1168 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1169 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1170 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1171 // variant, so we avoid the transform to prevent extending live range of an
1172 // avl register operand.
1173 // TODO: We can probably relax this for immediates.
1174 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) &&
1175 IncomingInfo.hasSameVLMAX(PrevInfo);
1176 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1177 Info.setAVL(IncomingInfo);
1179 Info.setVTYPE(
1180 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1181 .getVLMUL(),
1182 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1183 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1184 // if needed.
1185 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1186 IncomingInfo.getTailAgnostic(),
1187 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1188 IncomingInfo.getMaskAgnostic());
1190 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1191 // the AVL.
1192 if (Info.hasSEWLMULRatioOnly()) {
1193 VSETVLIInfo RatiolessInfo = IncomingInfo;
1194 RatiolessInfo.setAVL(Info);
1195 Info = RatiolessInfo;
1199 // Given a state with which we evaluated MI (see transferBefore above for why
1200 // this might be different that the state MI requested), modify the state to
1201 // reflect the changes MI might make.
1202 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1203 const MachineInstr &MI) const {
1204 if (isVectorConfigInstr(MI)) {
1205 Info = getInfoForVSETVLI(MI, *MRI);
1206 return;
1209 if (RISCV::isFaultFirstLoad(MI)) {
1210 // Update AVL to vl-output of the fault first load.
1211 Info.setAVLRegDef(MRI->getUniqueVRegDef(MI.getOperand(1).getReg()),
1212 MI.getOperand(1).getReg());
1213 return;
1216 // If this is something that updates VL/VTYPE that we don't know about, set
1217 // the state to unknown.
1218 if (MI.isCall() || MI.isInlineAsm() ||
1219 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1220 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1221 Info = VSETVLIInfo::getUnknown();
1224 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1225 VSETVLIInfo &Info) const {
1226 bool HadVectorOp = false;
1228 Info = BlockInfo[MBB.getNumber()].Pred;
1229 for (const MachineInstr &MI : MBB) {
1230 transferBefore(Info, MI);
1232 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1233 HadVectorOp = true;
1235 transferAfter(Info, MI);
1238 return HadVectorOp;
1241 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1243 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1245 BBInfo.InQueue = false;
1247 // Start with the previous entry so that we keep the most conservative state
1248 // we have ever found.
1249 VSETVLIInfo InInfo = BBInfo.Pred;
1250 if (MBB.pred_empty()) {
1251 // There are no predecessors, so use the default starting status.
1252 InInfo.setUnknown();
1253 } else {
1254 for (MachineBasicBlock *P : MBB.predecessors())
1255 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1258 // If we don't have any valid predecessor value, wait until we do.
1259 if (!InInfo.isValid())
1260 return;
1262 // If no change, no need to rerun block
1263 if (InInfo == BBInfo.Pred)
1264 return;
1266 BBInfo.Pred = InInfo;
1267 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1268 << " changed to " << BBInfo.Pred << "\n");
1270 // Note: It's tempting to cache the state changes here, but due to the
1271 // compatibility checks performed a blocks output state can change based on
1272 // the input state. To cache, we'd have to add logic for finding
1273 // never-compatible state changes.
1274 VSETVLIInfo TmpStatus;
1275 computeVLVTYPEChanges(MBB, TmpStatus);
1277 // If the new exit value matches the old exit value, we don't need to revisit
1278 // any blocks.
1279 if (BBInfo.Exit == TmpStatus)
1280 return;
1282 BBInfo.Exit = TmpStatus;
1283 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1284 << " changed to " << BBInfo.Exit << "\n");
1286 // Add the successors to the work list so we can propagate the changed exit
1287 // status.
1288 for (MachineBasicBlock *S : MBB.successors())
1289 if (!BlockInfo[S->getNumber()].InQueue) {
1290 BlockInfo[S->getNumber()].InQueue = true;
1291 WorkList.push(S);
1295 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1296 // be unneeded if the AVL is a phi node where all incoming values are VL
1297 // outputs from the last VSETVLI in their respective basic blocks.
1298 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1299 const MachineBasicBlock &MBB) const {
1300 if (DisableInsertVSETVLPHIOpt)
1301 return true;
1303 if (!Require.hasAVLReg())
1304 return true;
1306 // We need the AVL to be produce by a PHI node in this basic block.
1307 const MachineInstr *PHI = &Require.getAVLDefMI();
1308 if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1309 return true;
1311 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1312 PHIOp += 2) {
1313 Register InReg = PHI->getOperand(PHIOp).getReg();
1314 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1315 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1317 // We need the PHI input to the be the output of a VSET(I)VLI.
1318 MachineInstr *DefMI = MRI->getUniqueVRegDef(InReg);
1319 assert(DefMI);
1320 if (!isVectorConfigInstr(*DefMI))
1321 return true;
1323 // We found a VSET(I)VLI make sure it matches the output of the
1324 // predecessor block.
1325 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI);
1326 if (DefInfo != PBBExit)
1327 return true;
1329 // Require has the same VL as PBBExit, so if the exit from the
1330 // predecessor has the VTYPE we are looking for we might be able
1331 // to avoid a VSETVLI.
1332 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1333 return true;
1336 // If all the incoming values to the PHI checked out, we don't need
1337 // to insert a VSETVLI.
1338 return false;
1341 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1342 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1343 // Track whether the prefix of the block we've scanned is transparent
1344 // (meaning has not yet changed the abstract state).
1345 bool PrefixTransparent = true;
1346 for (MachineInstr &MI : MBB) {
1347 const VSETVLIInfo PrevInfo = CurInfo;
1348 transferBefore(CurInfo, MI);
1350 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1351 if (isVectorConfigInstr(MI)) {
1352 // Conservatively, mark the VL and VTYPE as live.
1353 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1354 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1355 "Unexpected operands where VL and VTYPE should be");
1356 MI.getOperand(3).setIsDead(false);
1357 MI.getOperand(4).setIsDead(false);
1358 PrefixTransparent = false;
1361 uint64_t TSFlags = MI.getDesc().TSFlags;
1362 if (RISCVII::hasSEWOp(TSFlags)) {
1363 if (PrevInfo != CurInfo) {
1364 // If this is the first implicit state change, and the state change
1365 // requested can be proven to produce the same register contents, we
1366 // can skip emitting the actual state change and continue as if we
1367 // had since we know the GPR result of the implicit state change
1368 // wouldn't be used and VL/VTYPE registers are correct. Note that
1369 // we *do* need to model the state as if it changed as while the
1370 // register contents are unchanged, the abstract model can change.
1371 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1372 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1373 PrefixTransparent = false;
1376 if (RISCVII::hasVLOp(TSFlags)) {
1377 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1378 if (VLOp.isReg()) {
1379 Register Reg = VLOp.getReg();
1380 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg);
1381 assert(VLOpDef);
1383 // Erase the AVL operand from the instruction.
1384 VLOp.setReg(RISCV::NoRegister);
1385 VLOp.setIsKill(false);
1387 // If the AVL was an immediate > 31, then it would have been emitted
1388 // as an ADDI. However, the ADDI might not have been used in the
1389 // vsetvli, or a vsetvli might not have been emitted, so it may be
1390 // dead now.
1391 if (TII->isAddImmediate(*VLOpDef, Reg) && MRI->use_nodbg_empty(Reg))
1392 VLOpDef->eraseFromParent();
1394 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1395 /*isImp*/ true));
1397 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1398 /*isImp*/ true));
1401 if (MI.isCall() || MI.isInlineAsm() ||
1402 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1403 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1404 PrefixTransparent = false;
1406 transferAfter(CurInfo, MI);
1409 const auto &Info = BlockInfo[MBB.getNumber()];
1410 if (CurInfo != Info.Exit) {
1411 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1412 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1413 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1414 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1416 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1419 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1420 /// we're about to insert by looking for cases where we can PRE from the
1421 /// beginning of one block to the end of one of its predecessors. Specifically,
1422 /// this is geared to catch the common case of a fixed length vsetvl in a single
1423 /// block loop when it could execute once in the preheader instead.
1424 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1425 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1426 return;
1428 MachineBasicBlock *UnavailablePred = nullptr;
1429 VSETVLIInfo AvailableInfo;
1430 for (MachineBasicBlock *P : MBB.predecessors()) {
1431 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1432 if (PredInfo.isUnknown()) {
1433 if (UnavailablePred)
1434 return;
1435 UnavailablePred = P;
1436 } else if (!AvailableInfo.isValid()) {
1437 AvailableInfo = PredInfo;
1438 } else if (AvailableInfo != PredInfo) {
1439 return;
1443 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1444 // phase 3.
1445 if (!UnavailablePred || !AvailableInfo.isValid())
1446 return;
1448 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1449 // the unavailable pred.
1450 if (AvailableInfo.hasSEWLMULRatioOnly())
1451 return;
1453 // Critical edge - TODO: consider splitting?
1454 if (UnavailablePred->succ_size() != 1)
1455 return;
1457 // If the AVL value is a register (other than our VLMAX sentinel),
1458 // we need to prove the value is available at the point we're going
1459 // to insert the vsetvli at.
1460 if (AvailableInfo.hasAVLReg()) {
1461 const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI();
1462 // This is an inline dominance check which covers the case of
1463 // UnavailablePred being the preheader of a loop.
1464 if (AVLDefMI->getParent() != UnavailablePred)
1465 return;
1466 for (auto &TermMI : UnavailablePred->terminators())
1467 if (&TermMI == AVLDefMI)
1468 return;
1471 // If the AVL isn't used in its predecessors then bail, since we have no AVL
1472 // to insert a vsetvli with.
1473 if (AvailableInfo.hasAVLIgnored())
1474 return;
1476 // Model the effect of changing the input state of the block MBB to
1477 // AvailableInfo. We're looking for two issues here; one legality,
1478 // one profitability.
1479 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1480 // may hit the end of the block with a different end state. We can
1481 // not make this change without reflowing later blocks as well.
1482 // 2) If we don't actually remove a transition, inserting a vsetvli
1483 // into the predecessor block would be correct, but unprofitable.
1484 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1485 VSETVLIInfo CurInfo = AvailableInfo;
1486 int TransitionsRemoved = 0;
1487 for (const MachineInstr &MI : MBB) {
1488 const VSETVLIInfo LastInfo = CurInfo;
1489 const VSETVLIInfo LastOldInfo = OldInfo;
1490 transferBefore(CurInfo, MI);
1491 transferBefore(OldInfo, MI);
1492 if (CurInfo == LastInfo)
1493 TransitionsRemoved++;
1494 if (LastOldInfo == OldInfo)
1495 TransitionsRemoved--;
1496 transferAfter(CurInfo, MI);
1497 transferAfter(OldInfo, MI);
1498 if (CurInfo == OldInfo)
1499 // Convergence. All transitions after this must match by construction.
1500 break;
1502 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1503 // Issues 1 and 2 above
1504 return;
1506 // Finally, update both data flow state and insert the actual vsetvli.
1507 // Doing both keeps the code in sync with the dataflow results, which
1508 // is critical for correctness of phase 3.
1509 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1510 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1511 << UnavailablePred->getName() << " with state "
1512 << AvailableInfo << "\n");
1513 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1514 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1516 // Note there's an implicit assumption here that terminators never use
1517 // or modify VL or VTYPE. Also, fallthrough will return end().
1518 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1519 insertVSETVLI(*UnavailablePred, InsertPt,
1520 UnavailablePred->findDebugLoc(InsertPt),
1521 AvailableInfo, OldExit);
1524 // Return true if we can mutate PrevMI to match MI without changing any the
1525 // fields which would be observed.
1526 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1527 const MachineInstr &MI,
1528 const DemandedFields &Used,
1529 const MachineRegisterInfo &MRI) {
1530 // If the VL values aren't equal, return false if either a) the former is
1531 // demanded, or b) we can't rewrite the former to be the later for
1532 // implementation reasons.
1533 if (!isVLPreservingConfig(MI)) {
1534 if (Used.VLAny)
1535 return false;
1537 if (Used.VLZeroness) {
1538 if (isVLPreservingConfig(PrevMI))
1539 return false;
1540 if (!getInfoForVSETVLI(PrevMI, MRI)
1541 .hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI)))
1542 return false;
1545 auto &AVL = MI.getOperand(1);
1546 auto &PrevAVL = PrevMI.getOperand(1);
1548 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1549 // For now just check that PrevMI uses the same virtual register.
1550 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1551 (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1552 PrevAVL.getReg() != AVL.getReg()))
1553 return false;
1556 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1557 auto PriorVType = PrevMI.getOperand(2).getImm();
1558 auto VType = MI.getOperand(2).getImm();
1559 return areCompatibleVTYPEs(PriorVType, VType, Used);
1562 bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
1563 MachineInstr *NextMI = nullptr;
1564 // We can have arbitrary code in successors, so VL and VTYPE
1565 // must be considered demanded.
1566 DemandedFields Used;
1567 Used.demandVL();
1568 Used.demandVTYPE();
1569 SmallVector<MachineInstr*> ToDelete;
1570 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1572 if (!isVectorConfigInstr(MI)) {
1573 Used.doUnion(getDemanded(MI, ST));
1574 if (MI.isCall() || MI.isInlineAsm() ||
1575 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1576 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1577 NextMI = nullptr;
1578 continue;
1581 if (!MI.getOperand(0).isDead())
1582 Used.demandVL();
1584 if (NextMI) {
1585 if (!Used.usedVL() && !Used.usedVTYPE()) {
1586 ToDelete.push_back(&MI);
1587 // Leave NextMI unchanged
1588 continue;
1591 if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1592 if (!isVLPreservingConfig(*NextMI)) {
1593 Register DefReg = NextMI->getOperand(0).getReg();
1595 MI.getOperand(0).setReg(DefReg);
1596 MI.getOperand(0).setIsDead(false);
1598 // The def of DefReg moved to MI, so extend the LiveInterval up to
1599 // it.
1600 if (DefReg.isVirtual()) {
1601 LiveInterval &DefLI = LIS->getInterval(DefReg);
1602 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1603 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1604 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1605 DefLI.addSegment(S);
1606 DefVNI->def = MISlot;
1607 // Mark DefLI as spillable if it was previously unspillable
1608 DefLI.setWeight(0);
1610 // DefReg may have had no uses, in which case we need to shrink
1611 // the LiveInterval up to MI.
1612 LIS->shrinkToUses(&DefLI);
1615 Register OldVLReg;
1616 if (MI.getOperand(1).isReg())
1617 OldVLReg = MI.getOperand(1).getReg();
1618 if (NextMI->getOperand(1).isImm())
1619 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1620 else
1621 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1623 // Clear NextMI's AVL early so we're not counting it as a use.
1624 if (NextMI->getOperand(1).isReg())
1625 NextMI->getOperand(1).setReg(RISCV::NoRegister);
1627 if (OldVLReg && OldVLReg.isVirtual()) {
1628 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1629 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1631 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1632 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1633 MRI->use_nodbg_empty(OldVLReg)) {
1634 VLOpDef->eraseFromParent();
1635 LIS->removeInterval(OldVLReg);
1638 MI.setDesc(NextMI->getDesc());
1640 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1641 ToDelete.push_back(NextMI);
1642 // fallthrough
1645 NextMI = &MI;
1646 Used = getDemanded(MI, ST);
1649 NumCoalescedVSETVL += ToDelete.size();
1650 for (auto *MI : ToDelete) {
1651 LIS->RemoveMachineInstrFromMaps(*MI);
1652 MI->eraseFromParent();
1655 return !ToDelete.empty();
1658 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1659 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1660 MachineInstr &MI = *I++;
1661 if (RISCV::isFaultFirstLoad(MI)) {
1662 Register VLOutput = MI.getOperand(1).getReg();
1663 assert(VLOutput.isVirtual());
1664 if (!MRI->use_nodbg_empty(VLOutput))
1665 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1666 VLOutput);
1667 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1668 MI.getOperand(1).setReg(RISCV::X0);
1673 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1674 // Skip if the vector extension is not enabled.
1675 ST = &MF.getSubtarget<RISCVSubtarget>();
1676 if (!ST->hasVInstructions())
1677 return false;
1679 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1681 TII = ST->getInstrInfo();
1682 MRI = &MF.getRegInfo();
1684 assert(BlockInfo.empty() && "Expect empty block infos");
1685 BlockInfo.resize(MF.getNumBlockIDs());
1687 bool HaveVectorOp = false;
1689 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1690 for (const MachineBasicBlock &MBB : MF) {
1691 VSETVLIInfo TmpStatus;
1692 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1693 // Initial exit state is whatever change we found in the block.
1694 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1695 BBInfo.Exit = TmpStatus;
1696 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1697 << " is " << BBInfo.Exit << "\n");
1701 // If we didn't find any instructions that need VSETVLI, we're done.
1702 if (!HaveVectorOp) {
1703 BlockInfo.clear();
1704 return false;
1707 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1708 // blocks to the list here, but will also add any that need to be revisited
1709 // during Phase 2 processing.
1710 for (const MachineBasicBlock &MBB : MF) {
1711 WorkList.push(&MBB);
1712 BlockInfo[MBB.getNumber()].InQueue = true;
1714 while (!WorkList.empty()) {
1715 const MachineBasicBlock &MBB = *WorkList.front();
1716 WorkList.pop();
1717 computeIncomingVLVTYPE(MBB);
1720 // Perform partial redundancy elimination of vsetvli transitions.
1721 for (MachineBasicBlock &MBB : MF)
1722 doPRE(MBB);
1724 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1725 // Phase 2 information to avoid adding vsetvlis before the first vector
1726 // instruction in the block if the VL/VTYPE is satisfied by its
1727 // predecessors.
1728 for (MachineBasicBlock &MBB : MF)
1729 emitVSETVLIs(MBB);
1731 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1732 // of VLEFF/VLSEGFF.
1733 for (MachineBasicBlock &MBB : MF)
1734 insertReadVL(MBB);
1736 BlockInfo.clear();
1737 return HaveVectorOp;
1740 /// Returns an instance of the Insert VSETVLI pass.
1741 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1742 return new RISCVInsertVSETVLI();
1745 // Now that all vsetvlis are explicit, go through and do block local
1746 // DSE and peephole based demanded fields based transforms. Note that
1747 // this *must* be done outside the main dataflow so long as we allow
1748 // any cross block analysis within the dataflow. We can't have both
1749 // demanded fields based mutation and non-local analysis in the
1750 // dataflow at the same time without introducing inconsistencies.
1751 bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1752 // Skip if the vector extension is not enabled.
1753 ST = &MF.getSubtarget<RISCVSubtarget>();
1754 if (!ST->hasVInstructions())
1755 return false;
1756 TII = ST->getInstrInfo();
1757 MRI = &MF.getRegInfo();
1758 LIS = &getAnalysis<LiveIntervals>();
1760 bool Changed = false;
1761 for (MachineBasicBlock &MBB : MF)
1762 Changed |= coalesceVSETVLIs(MBB);
1764 return Changed;
1767 FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
1768 return new RISCVCoalesceVSETVLI();