[lld][WebAssembly] Introduce Ctx::arg
[llvm-project.git] / llvm / lib / Transforms / Vectorize / VPlan.h
blobae68e1fc63a1391f4b79ff3b609d5d53c73bd883
1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 /// VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14 /// within VPBasicBlocks;
15 /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16 /// also inherit from VPValue.
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18 /// instruction;
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
23 //===----------------------------------------------------------------------===//
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
28 #include "VPlanAnalysis.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallBitVector.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Twine.h"
35 #include "llvm/ADT/ilist.h"
36 #include "llvm/ADT/ilist_node.h"
37 #include "llvm/Analysis/DomTreeUpdater.h"
38 #include "llvm/Analysis/IVDescriptors.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/TargetTransformInfo.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/FMF.h"
44 #include "llvm/IR/Operator.h"
45 #include "llvm/Support/InstructionCost.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <cstddef>
49 #include <string>
51 namespace llvm {
53 class BasicBlock;
54 class DominatorTree;
55 class InnerLoopVectorizer;
56 class IRBuilderBase;
57 class LoopInfo;
58 class raw_ostream;
59 class RecurrenceDescriptor;
60 class SCEV;
61 class Type;
62 class VPBasicBlock;
63 class VPRegionBlock;
64 class VPlan;
65 class VPReplicateRecipe;
66 class VPlanSlp;
67 class Value;
68 class LoopVectorizationCostModel;
69 class LoopVersioning;
71 struct VPCostContext;
73 namespace Intrinsic {
74 typedef unsigned ID;
77 /// Returns a calculation for the total number of elements for a given \p VF.
78 /// For fixed width vectors this value is a constant, whereas for scalable
79 /// vectors it is an expression determined at runtime.
80 Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
82 /// Return a value for Step multiplied by VF.
83 Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
86 /// A helper function that returns the reciprocal of the block probability of
87 /// predicated blocks. If we return X, we are assuming the predicated block
88 /// will execute once for every X iterations of the loop header.
89 ///
90 /// TODO: We should use actual block probability here, if available. Currently,
91 /// we always assume predicated blocks have a 50% chance of executing.
92 inline unsigned getReciprocalPredBlockProb() { return 2; }
94 /// A range of powers-of-2 vectorization factors with fixed start and
95 /// adjustable end. The range includes start and excludes end, e.g.,:
96 /// [1, 16) = {1, 2, 4, 8}
97 struct VFRange {
98 // A power of 2.
99 const ElementCount Start;
101 // A power of 2. If End <= Start range is empty.
102 ElementCount End;
104 bool isEmpty() const {
105 return End.getKnownMinValue() <= Start.getKnownMinValue();
108 VFRange(const ElementCount &Start, const ElementCount &End)
109 : Start(Start), End(End) {
110 assert(Start.isScalable() == End.isScalable() &&
111 "Both Start and End should have the same scalable flag");
112 assert(isPowerOf2_32(Start.getKnownMinValue()) &&
113 "Expected Start to be a power of 2");
114 assert(isPowerOf2_32(End.getKnownMinValue()) &&
115 "Expected End to be a power of 2");
118 /// Iterator to iterate over vectorization factors in a VFRange.
119 class iterator
120 : public iterator_facade_base<iterator, std::forward_iterator_tag,
121 ElementCount> {
122 ElementCount VF;
124 public:
125 iterator(ElementCount VF) : VF(VF) {}
127 bool operator==(const iterator &Other) const { return VF == Other.VF; }
129 ElementCount operator*() const { return VF; }
131 iterator &operator++() {
132 VF *= 2;
133 return *this;
137 iterator begin() { return iterator(Start); }
138 iterator end() {
139 assert(isPowerOf2_32(End.getKnownMinValue()));
140 return iterator(End);
144 using VPlanPtr = std::unique_ptr<VPlan>;
146 /// In what follows, the term "input IR" refers to code that is fed into the
147 /// vectorizer whereas the term "output IR" refers to code that is generated by
148 /// the vectorizer.
150 /// VPLane provides a way to access lanes in both fixed width and scalable
151 /// vectors, where for the latter the lane index sometimes needs calculating
152 /// as a runtime expression.
153 class VPLane {
154 public:
155 /// Kind describes how to interpret Lane.
156 enum class Kind : uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
159 First,
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
164 ScalableLast
167 private:
168 /// in [0..VF)
169 unsigned Lane;
171 /// Indicates how the Lane should be interpreted, as described above.
172 Kind LaneKind;
174 public:
175 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
176 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
178 static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
180 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
181 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
184 Kind LaneKind;
185 if (VF.isScalable())
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
188 LaneKind = VPLane::Kind::ScalableLast;
189 else
190 LaneKind = VPLane::Kind::First;
191 return VPLane(LaneOffset, LaneKind);
194 static VPLane getLastLaneForVF(const ElementCount &VF) {
195 return getLaneFromEnd(VF, 1);
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind == Kind::First);
202 return Lane;
205 /// Returns an expression describing the lane index that can be used at
206 /// runtime.
207 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
209 /// Returns the Kind of lane offset.
210 Kind getKind() const { return LaneKind; }
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount &VF) const {
217 switch (LaneKind) {
218 case VPLane::Kind::ScalableLast:
219 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
220 return VF.getKnownMinValue() + Lane;
221 default:
222 assert(Lane < VF.getKnownMinValue());
223 return Lane;
227 /// Returns the maxmimum number of lanes that we are able to consider
228 /// caching for \p VF.
229 static unsigned getNumCachedLanes(const ElementCount &VF) {
230 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
234 /// VPTransformState holds information passed down when "executing" a VPlan,
235 /// needed for generating the output IR.
236 struct VPTransformState {
237 VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
238 LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
239 InnerLoopVectorizer *ILV, VPlan *Plan, Type *CanonicalIVTy);
240 /// Target Transform Info.
241 const TargetTransformInfo *TTI;
243 /// The chosen Vectorization Factor of the loop being vectorized.
244 ElementCount VF;
246 /// Hold the index to generate specific scalar instructions. Null indicates
247 /// that all instances are to be generated, using either scalar or vector
248 /// instructions.
249 std::optional<VPLane> Lane;
251 struct DataState {
252 // Each value from the original loop, when vectorized, is represented by a
253 // vector value in the map.
254 DenseMap<VPValue *, Value *> VPV2Vector;
256 DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
257 } Data;
259 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
260 /// is false, otherwise return the generated scalar. \See set.
261 Value *get(VPValue *Def, bool IsScalar = false);
263 /// Get the generated Value for a given VPValue and given Part and Lane.
264 Value *get(VPValue *Def, const VPLane &Lane);
266 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
268 bool hasScalarValue(VPValue *Def, VPLane Lane) {
269 auto I = Data.VPV2Scalars.find(Def);
270 if (I == Data.VPV2Scalars.end())
271 return false;
272 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
273 return CacheIdx < I->second.size() && I->second[CacheIdx];
276 /// Set the generated vector Value for a given VPValue, if \p
277 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
278 void set(VPValue *Def, Value *V, bool IsScalar = false) {
279 if (IsScalar) {
280 set(Def, V, VPLane(0));
281 return;
283 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
284 "scalar values must be stored as (0, 0)");
285 Data.VPV2Vector[Def] = V;
288 /// Reset an existing vector value for \p Def and a given \p Part.
289 void reset(VPValue *Def, Value *V) {
290 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
291 Data.VPV2Vector[Def] = V;
294 /// Set the generated scalar \p V for \p Def and the given \p Lane.
295 void set(VPValue *Def, Value *V, const VPLane &Lane) {
296 auto &Scalars = Data.VPV2Scalars[Def];
297 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
298 if (Scalars.size() <= CacheIdx)
299 Scalars.resize(CacheIdx + 1);
300 assert(!Scalars[CacheIdx] && "should overwrite existing value");
301 Scalars[CacheIdx] = V;
304 /// Reset an existing scalar value for \p Def and a given \p Lane.
305 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
306 auto Iter = Data.VPV2Scalars.find(Def);
307 assert(Iter != Data.VPV2Scalars.end() &&
308 "need to overwrite existing value");
309 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
310 assert(CacheIdx < Iter->second.size() &&
311 "need to overwrite existing value");
312 Iter->second[CacheIdx] = V;
315 /// Add additional metadata to \p To that was not present on \p Orig.
317 /// Currently this is used to add the noalias annotations based on the
318 /// inserted memchecks. Use this for instructions that are *cloned* into the
319 /// vector loop.
320 void addNewMetadata(Instruction *To, const Instruction *Orig);
322 /// Add metadata from one instruction to another.
324 /// This includes both the original MDs from \p From and additional ones (\see
325 /// addNewMetadata). Use this for *newly created* instructions in the vector
326 /// loop.
327 void addMetadata(Value *To, Instruction *From);
329 /// Set the debug location in the builder using the debug location \p DL.
330 void setDebugLocFrom(DebugLoc DL);
332 /// Construct the vector value of a scalarized value \p V one lane at a time.
333 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
335 /// Hold state information used when constructing the CFG of the output IR,
336 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
337 struct CFGState {
338 /// The previous VPBasicBlock visited. Initially set to null.
339 VPBasicBlock *PrevVPBB = nullptr;
341 /// The previous IR BasicBlock created or used. Initially set to the new
342 /// header BasicBlock.
343 BasicBlock *PrevBB = nullptr;
345 /// The last IR BasicBlock in the output IR. Set to the exit block of the
346 /// vector loop.
347 BasicBlock *ExitBB = nullptr;
349 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
350 /// of replication, maps the BasicBlock of the last replica created.
351 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
353 /// Updater for the DominatorTree.
354 DomTreeUpdater DTU;
356 CFGState(DominatorTree *DT)
357 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
359 /// Returns the BasicBlock* mapped to the pre-header of the loop region
360 /// containing \p R.
361 BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
362 } CFG;
364 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
365 LoopInfo *LI;
367 /// Hold a reference to the IRBuilder used to generate output IR code.
368 IRBuilderBase &Builder;
370 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
371 InnerLoopVectorizer *ILV;
373 /// Pointer to the VPlan code is generated for.
374 VPlan *Plan;
376 /// The loop object for the current parent region, or nullptr.
377 Loop *CurrentVectorLoop = nullptr;
379 /// LoopVersioning. It's only set up (non-null) if memchecks were
380 /// used.
382 /// This is currently only used to add no-alias metadata based on the
383 /// memchecks. The actually versioning is performed manually.
384 LoopVersioning *LVer = nullptr;
386 /// Map SCEVs to their expanded values. Populated when executing
387 /// VPExpandSCEVRecipes.
388 DenseMap<const SCEV *, Value *> ExpandedSCEVs;
390 /// VPlan-based type analysis.
391 VPTypeAnalysis TypeAnalysis;
394 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
395 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
396 class VPBlockBase {
397 friend class VPBlockUtils;
399 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
401 /// An optional name for the block.
402 std::string Name;
404 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
405 /// it is a topmost VPBlockBase.
406 VPRegionBlock *Parent = nullptr;
408 /// List of predecessor blocks.
409 SmallVector<VPBlockBase *, 1> Predecessors;
411 /// List of successor blocks.
412 SmallVector<VPBlockBase *, 1> Successors;
414 /// VPlan containing the block. Can only be set on the entry block of the
415 /// plan.
416 VPlan *Plan = nullptr;
418 /// Add \p Successor as the last successor to this block.
419 void appendSuccessor(VPBlockBase *Successor) {
420 assert(Successor && "Cannot add nullptr successor!");
421 Successors.push_back(Successor);
424 /// Add \p Predecessor as the last predecessor to this block.
425 void appendPredecessor(VPBlockBase *Predecessor) {
426 assert(Predecessor && "Cannot add nullptr predecessor!");
427 Predecessors.push_back(Predecessor);
430 /// Remove \p Predecessor from the predecessors of this block.
431 void removePredecessor(VPBlockBase *Predecessor) {
432 auto Pos = find(Predecessors, Predecessor);
433 assert(Pos && "Predecessor does not exist");
434 Predecessors.erase(Pos);
437 /// Remove \p Successor from the successors of this block.
438 void removeSuccessor(VPBlockBase *Successor) {
439 auto Pos = find(Successors, Successor);
440 assert(Pos && "Successor does not exist");
441 Successors.erase(Pos);
444 /// This function replaces one predecessor with another, useful when
445 /// trying to replace an old block in the CFG with a new one.
446 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
447 auto I = find(Predecessors, Old);
448 assert(I != Predecessors.end());
449 assert(Old->getParent() == New->getParent() &&
450 "replaced predecessor must have the same parent");
451 *I = New;
454 /// This function replaces one successor with another, useful when
455 /// trying to replace an old block in the CFG with a new one.
456 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
457 auto I = find(Successors, Old);
458 assert(I != Successors.end());
459 assert(Old->getParent() == New->getParent() &&
460 "replaced successor must have the same parent");
461 *I = New;
464 protected:
465 VPBlockBase(const unsigned char SC, const std::string &N)
466 : SubclassID(SC), Name(N) {}
468 public:
469 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
470 /// that are actually instantiated. Values of this enumeration are kept in the
471 /// SubclassID field of the VPBlockBase objects. They are used for concrete
472 /// type identification.
473 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
475 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
477 virtual ~VPBlockBase() = default;
479 const std::string &getName() const { return Name; }
481 void setName(const Twine &newName) { Name = newName.str(); }
483 /// \return an ID for the concrete type of this object.
484 /// This is used to implement the classof checks. This should not be used
485 /// for any other purpose, as the values may change as LLVM evolves.
486 unsigned getVPBlockID() const { return SubclassID; }
488 VPRegionBlock *getParent() { return Parent; }
489 const VPRegionBlock *getParent() const { return Parent; }
491 /// \return A pointer to the plan containing the current block.
492 VPlan *getPlan();
493 const VPlan *getPlan() const;
495 /// Sets the pointer of the plan containing the block. The block must be the
496 /// entry block into the VPlan.
497 void setPlan(VPlan *ParentPlan);
499 void setParent(VPRegionBlock *P) { Parent = P; }
501 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
502 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
503 /// VPBlockBase is a VPBasicBlock, it is returned.
504 const VPBasicBlock *getEntryBasicBlock() const;
505 VPBasicBlock *getEntryBasicBlock();
507 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
508 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
509 /// VPBlockBase is a VPBasicBlock, it is returned.
510 const VPBasicBlock *getExitingBasicBlock() const;
511 VPBasicBlock *getExitingBasicBlock();
513 const VPBlocksTy &getSuccessors() const { return Successors; }
514 VPBlocksTy &getSuccessors() { return Successors; }
516 iterator_range<VPBlockBase **> successors() { return Successors; }
517 iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
519 const VPBlocksTy &getPredecessors() const { return Predecessors; }
520 VPBlocksTy &getPredecessors() { return Predecessors; }
522 /// \return the successor of this VPBlockBase if it has a single successor.
523 /// Otherwise return a null pointer.
524 VPBlockBase *getSingleSuccessor() const {
525 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
528 /// \return the predecessor of this VPBlockBase if it has a single
529 /// predecessor. Otherwise return a null pointer.
530 VPBlockBase *getSinglePredecessor() const {
531 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
534 size_t getNumSuccessors() const { return Successors.size(); }
535 size_t getNumPredecessors() const { return Predecessors.size(); }
537 /// An Enclosing Block of a block B is any block containing B, including B
538 /// itself. \return the closest enclosing block starting from "this", which
539 /// has successors. \return the root enclosing block if all enclosing blocks
540 /// have no successors.
541 VPBlockBase *getEnclosingBlockWithSuccessors();
543 /// \return the closest enclosing block starting from "this", which has
544 /// predecessors. \return the root enclosing block if all enclosing blocks
545 /// have no predecessors.
546 VPBlockBase *getEnclosingBlockWithPredecessors();
548 /// \return the successors either attached directly to this VPBlockBase or, if
549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
550 /// successors of its own, search recursively for the first enclosing
551 /// VPRegionBlock that has successors and return them. If no such
552 /// VPRegionBlock exists, return the (empty) successors of the topmost
553 /// VPBlockBase reached.
554 const VPBlocksTy &getHierarchicalSuccessors() {
555 return getEnclosingBlockWithSuccessors()->getSuccessors();
558 /// \return the hierarchical successor of this VPBlockBase if it has a single
559 /// hierarchical successor. Otherwise return a null pointer.
560 VPBlockBase *getSingleHierarchicalSuccessor() {
561 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
564 /// \return the predecessors either attached directly to this VPBlockBase or,
565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566 /// predecessors of its own, search recursively for the first enclosing
567 /// VPRegionBlock that has predecessors and return them. If no such
568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
569 /// VPBlockBase reached.
570 const VPBlocksTy &getHierarchicalPredecessors() {
571 return getEnclosingBlockWithPredecessors()->getPredecessors();
574 /// \return the hierarchical predecessor of this VPBlockBase if it has a
575 /// single hierarchical predecessor. Otherwise return a null pointer.
576 VPBlockBase *getSingleHierarchicalPredecessor() {
577 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
580 /// Set a given VPBlockBase \p Successor as the single successor of this
581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582 /// This VPBlockBase must have no successors.
583 void setOneSuccessor(VPBlockBase *Successor) {
584 assert(Successors.empty() && "Setting one successor when others exist.");
585 assert(Successor->getParent() == getParent() &&
586 "connected blocks must have the same parent");
587 appendSuccessor(Successor);
590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591 /// successors of this VPBlockBase. This VPBlockBase is not added as
592 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
593 /// successors.
594 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
595 assert(Successors.empty() && "Setting two successors when others exist.");
596 appendSuccessor(IfTrue);
597 appendSuccessor(IfFalse);
600 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602 /// as successor of any VPBasicBlock in \p NewPreds.
603 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
604 assert(Predecessors.empty() && "Block predecessors already set.");
605 for (auto *Pred : NewPreds)
606 appendPredecessor(Pred);
609 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
610 /// This VPBlockBase must have no successors. This VPBlockBase is not added
611 /// as predecessor of any VPBasicBlock in \p NewSuccs.
612 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
613 assert(Successors.empty() && "Block successors already set.");
614 for (auto *Succ : NewSuccs)
615 appendSuccessor(Succ);
618 /// Remove all the predecessor of this block.
619 void clearPredecessors() { Predecessors.clear(); }
621 /// Remove all the successors of this block.
622 void clearSuccessors() { Successors.clear(); }
624 /// Swap successors of the block. The block must have exactly 2 successors.
625 // TODO: This should be part of introducing conditional branch recipes rather
626 // than being independent.
627 void swapSuccessors() {
628 assert(Successors.size() == 2 && "must have 2 successors to swap");
629 std::swap(Successors[0], Successors[1]);
632 /// The method which generates the output IR that correspond to this
633 /// VPBlockBase, thereby "executing" the VPlan.
634 virtual void execute(VPTransformState *State) = 0;
636 /// Return the cost of the block.
637 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
639 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
640 static void deleteCFG(VPBlockBase *Entry);
642 /// Return true if it is legal to hoist instructions into this block.
643 bool isLegalToHoistInto() {
644 // There are currently no constraints that prevent an instruction to be
645 // hoisted into a VPBlockBase.
646 return true;
649 /// Replace all operands of VPUsers in the block with \p NewValue and also
650 /// replaces all uses of VPValues defined in the block with NewValue.
651 virtual void dropAllReferences(VPValue *NewValue) = 0;
653 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
654 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
655 OS << getName();
658 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
659 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
660 /// consequtive numbers.
662 /// Note that the numbering is applied to the whole VPlan, so printing
663 /// individual blocks is consistent with the whole VPlan printing.
664 virtual void print(raw_ostream &O, const Twine &Indent,
665 VPSlotTracker &SlotTracker) const = 0;
667 /// Print plain-text dump of this VPlan to \p O.
668 void print(raw_ostream &O) const {
669 VPSlotTracker SlotTracker(getPlan());
670 print(O, "", SlotTracker);
673 /// Print the successors of this block to \p O, prefixing all lines with \p
674 /// Indent.
675 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
677 /// Dump this VPBlockBase to dbgs().
678 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
679 #endif
681 /// Clone the current block and it's recipes without updating the operands of
682 /// the cloned recipes, including all blocks in the single-entry single-exit
683 /// region for VPRegionBlocks.
684 virtual VPBlockBase *clone() = 0;
687 /// Struct to hold various analysis needed for cost computations.
688 struct VPCostContext {
689 const TargetTransformInfo &TTI;
690 const TargetLibraryInfo &TLI;
691 VPTypeAnalysis Types;
692 LLVMContext &LLVMCtx;
693 LoopVectorizationCostModel &CM;
694 SmallPtrSet<Instruction *, 8> SkipCostComputation;
696 VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
697 Type *CanIVTy, LoopVectorizationCostModel &CM)
698 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
699 CM(CM) {}
701 /// Return the cost for \p UI with \p VF using the legacy cost model as
702 /// fallback until computing the cost of all recipes migrates to VPlan.
703 InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
705 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
706 /// has already been pre-computed.
707 bool skipCostComputation(Instruction *UI, bool IsVector) const;
709 /// Returns the OperandInfo for \p V, if it is a live-in.
710 TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
713 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
714 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef
715 /// and is responsible for deleting its defined values. Single-value
716 /// recipes must inherit from VPSingleDef instead of inheriting from both
717 /// VPRecipeBase and VPValue separately.
718 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
719 public VPDef,
720 public VPUser {
721 friend VPBasicBlock;
722 friend class VPBlockUtils;
724 /// Each VPRecipe belongs to a single VPBasicBlock.
725 VPBasicBlock *Parent = nullptr;
727 /// The debug location for the recipe.
728 DebugLoc DL;
730 public:
731 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
732 DebugLoc DL = {})
733 : VPDef(SC), VPUser(Operands), DL(DL) {}
735 template <typename IterT>
736 VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
737 DebugLoc DL = {})
738 : VPDef(SC), VPUser(Operands), DL(DL) {}
739 virtual ~VPRecipeBase() = default;
741 /// Clone the current recipe.
742 virtual VPRecipeBase *clone() = 0;
744 /// \return the VPBasicBlock which this VPRecipe belongs to.
745 VPBasicBlock *getParent() { return Parent; }
746 const VPBasicBlock *getParent() const { return Parent; }
748 /// The method which generates the output IR instructions that correspond to
749 /// this VPRecipe, thereby "executing" the VPlan.
750 virtual void execute(VPTransformState &State) = 0;
752 /// Return the cost of this recipe, taking into account if the cost
753 /// computation should be skipped and the ForceTargetInstructionCost flag.
754 /// Also takes care of printing the cost for debugging.
755 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
757 /// Insert an unlinked recipe into a basic block immediately before
758 /// the specified recipe.
759 void insertBefore(VPRecipeBase *InsertPos);
760 /// Insert an unlinked recipe into \p BB immediately before the insertion
761 /// point \p IP;
762 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
764 /// Insert an unlinked Recipe into a basic block immediately after
765 /// the specified Recipe.
766 void insertAfter(VPRecipeBase *InsertPos);
768 /// Unlink this recipe from its current VPBasicBlock and insert it into
769 /// the VPBasicBlock that MovePos lives in, right after MovePos.
770 void moveAfter(VPRecipeBase *MovePos);
772 /// Unlink this recipe and insert into BB before I.
774 /// \pre I is a valid iterator into BB.
775 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
777 /// This method unlinks 'this' from the containing basic block, but does not
778 /// delete it.
779 void removeFromParent();
781 /// This method unlinks 'this' from the containing basic block and deletes it.
783 /// \returns an iterator pointing to the element after the erased one
784 iplist<VPRecipeBase>::iterator eraseFromParent();
786 /// Method to support type inquiry through isa, cast, and dyn_cast.
787 static inline bool classof(const VPDef *D) {
788 // All VPDefs are also VPRecipeBases.
789 return true;
792 static inline bool classof(const VPUser *U) { return true; }
794 /// Returns true if the recipe may have side-effects.
795 bool mayHaveSideEffects() const;
797 /// Returns true for PHI-like recipes.
798 bool isPhi() const {
799 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
802 /// Returns true if the recipe may read from memory.
803 bool mayReadFromMemory() const;
805 /// Returns true if the recipe may write to memory.
806 bool mayWriteToMemory() const;
808 /// Returns true if the recipe may read from or write to memory.
809 bool mayReadOrWriteMemory() const {
810 return mayReadFromMemory() || mayWriteToMemory();
813 /// Returns the debug location of the recipe.
814 DebugLoc getDebugLoc() const { return DL; }
816 protected:
817 /// Compute the cost of this recipe either using a recipe's specialized
818 /// implementation or using the legacy cost model and the underlying
819 /// instructions.
820 virtual InstructionCost computeCost(ElementCount VF,
821 VPCostContext &Ctx) const;
824 // Helper macro to define common classof implementations for recipes.
825 #define VP_CLASSOF_IMPL(VPDefID) \
826 static inline bool classof(const VPDef *D) { \
827 return D->getVPDefID() == VPDefID; \
829 static inline bool classof(const VPValue *V) { \
830 auto *R = V->getDefiningRecipe(); \
831 return R && R->getVPDefID() == VPDefID; \
833 static inline bool classof(const VPUser *U) { \
834 auto *R = dyn_cast<VPRecipeBase>(U); \
835 return R && R->getVPDefID() == VPDefID; \
837 static inline bool classof(const VPRecipeBase *R) { \
838 return R->getVPDefID() == VPDefID; \
840 static inline bool classof(const VPSingleDefRecipe *R) { \
841 return R->getVPDefID() == VPDefID; \
844 /// VPSingleDef is a base class for recipes for modeling a sequence of one or
845 /// more output IR that define a single result VPValue.
846 /// Note that VPRecipeBase must be inherited from before VPValue.
847 class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
848 public:
849 template <typename IterT>
850 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
851 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
853 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
854 DebugLoc DL = {})
855 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
857 template <typename IterT>
858 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
859 DebugLoc DL = {})
860 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
862 static inline bool classof(const VPRecipeBase *R) {
863 switch (R->getVPDefID()) {
864 case VPRecipeBase::VPDerivedIVSC:
865 case VPRecipeBase::VPEVLBasedIVPHISC:
866 case VPRecipeBase::VPExpandSCEVSC:
867 case VPRecipeBase::VPInstructionSC:
868 case VPRecipeBase::VPReductionEVLSC:
869 case VPRecipeBase::VPReductionSC:
870 case VPRecipeBase::VPReplicateSC:
871 case VPRecipeBase::VPScalarIVStepsSC:
872 case VPRecipeBase::VPVectorPointerSC:
873 case VPRecipeBase::VPReverseVectorPointerSC:
874 case VPRecipeBase::VPWidenCallSC:
875 case VPRecipeBase::VPWidenCanonicalIVSC:
876 case VPRecipeBase::VPWidenCastSC:
877 case VPRecipeBase::VPWidenGEPSC:
878 case VPRecipeBase::VPWidenIntrinsicSC:
879 case VPRecipeBase::VPWidenSC:
880 case VPRecipeBase::VPWidenEVLSC:
881 case VPRecipeBase::VPWidenSelectSC:
882 case VPRecipeBase::VPBlendSC:
883 case VPRecipeBase::VPPredInstPHISC:
884 case VPRecipeBase::VPCanonicalIVPHISC:
885 case VPRecipeBase::VPActiveLaneMaskPHISC:
886 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
887 case VPRecipeBase::VPWidenPHISC:
888 case VPRecipeBase::VPWidenIntOrFpInductionSC:
889 case VPRecipeBase::VPWidenPointerInductionSC:
890 case VPRecipeBase::VPReductionPHISC:
891 case VPRecipeBase::VPScalarCastSC:
892 return true;
893 case VPRecipeBase::VPBranchOnMaskSC:
894 case VPRecipeBase::VPInterleaveSC:
895 case VPRecipeBase::VPIRInstructionSC:
896 case VPRecipeBase::VPWidenLoadEVLSC:
897 case VPRecipeBase::VPWidenLoadSC:
898 case VPRecipeBase::VPWidenStoreEVLSC:
899 case VPRecipeBase::VPWidenStoreSC:
900 case VPRecipeBase::VPHistogramSC:
901 // TODO: Widened stores don't define a value, but widened loads do. Split
902 // the recipes to be able to make widened loads VPSingleDefRecipes.
903 return false;
905 llvm_unreachable("Unhandled VPDefID");
908 static inline bool classof(const VPUser *U) {
909 auto *R = dyn_cast<VPRecipeBase>(U);
910 return R && classof(R);
913 virtual VPSingleDefRecipe *clone() override = 0;
915 /// Returns the underlying instruction.
916 Instruction *getUnderlyingInstr() {
917 return cast<Instruction>(getUnderlyingValue());
919 const Instruction *getUnderlyingInstr() const {
920 return cast<Instruction>(getUnderlyingValue());
923 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
925 LLVM_DUMP_METHOD void dump() const;
926 #endif
929 /// Class to record LLVM IR flag for a recipe along with it.
930 class VPRecipeWithIRFlags : public VPSingleDefRecipe {
931 enum class OperationType : unsigned char {
932 Cmp,
933 OverflowingBinOp,
934 DisjointOp,
935 PossiblyExactOp,
936 GEPOp,
937 FPMathOp,
938 NonNegOp,
939 Other
942 public:
943 struct WrapFlagsTy {
944 char HasNUW : 1;
945 char HasNSW : 1;
947 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
950 struct DisjointFlagsTy {
951 char IsDisjoint : 1;
952 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
955 struct GEPFlagsTy {
956 char IsInBounds : 1;
957 GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
960 private:
961 struct ExactFlagsTy {
962 char IsExact : 1;
964 struct NonNegFlagsTy {
965 char NonNeg : 1;
967 struct FastMathFlagsTy {
968 char AllowReassoc : 1;
969 char NoNaNs : 1;
970 char NoInfs : 1;
971 char NoSignedZeros : 1;
972 char AllowReciprocal : 1;
973 char AllowContract : 1;
974 char ApproxFunc : 1;
976 FastMathFlagsTy(const FastMathFlags &FMF);
979 OperationType OpType;
981 union {
982 CmpInst::Predicate CmpPredicate;
983 WrapFlagsTy WrapFlags;
984 DisjointFlagsTy DisjointFlags;
985 ExactFlagsTy ExactFlags;
986 GEPFlagsTy GEPFlags;
987 NonNegFlagsTy NonNegFlags;
988 FastMathFlagsTy FMFs;
989 unsigned AllFlags;
992 protected:
993 void transferFlags(VPRecipeWithIRFlags &Other) {
994 OpType = Other.OpType;
995 AllFlags = Other.AllFlags;
998 public:
999 template <typename IterT>
1000 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
1001 : VPSingleDefRecipe(SC, Operands, DL) {
1002 OpType = OperationType::Other;
1003 AllFlags = 0;
1006 template <typename IterT>
1007 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1008 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
1009 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1010 OpType = OperationType::Cmp;
1011 CmpPredicate = Op->getPredicate();
1012 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1013 OpType = OperationType::DisjointOp;
1014 DisjointFlags.IsDisjoint = Op->isDisjoint();
1015 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1016 OpType = OperationType::OverflowingBinOp;
1017 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1018 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1019 OpType = OperationType::PossiblyExactOp;
1020 ExactFlags.IsExact = Op->isExact();
1021 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1022 OpType = OperationType::GEPOp;
1023 GEPFlags.IsInBounds = GEP->isInBounds();
1024 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1025 OpType = OperationType::NonNegOp;
1026 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1027 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1028 OpType = OperationType::FPMathOp;
1029 FMFs = Op->getFastMathFlags();
1030 } else {
1031 OpType = OperationType::Other;
1032 AllFlags = 0;
1036 template <typename IterT>
1037 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1038 CmpInst::Predicate Pred, DebugLoc DL = {})
1039 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1040 CmpPredicate(Pred) {}
1042 template <typename IterT>
1043 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1044 WrapFlagsTy WrapFlags, DebugLoc DL = {})
1045 : VPSingleDefRecipe(SC, Operands, DL),
1046 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1048 template <typename IterT>
1049 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1050 FastMathFlags FMFs, DebugLoc DL = {})
1051 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1052 FMFs(FMFs) {}
1054 template <typename IterT>
1055 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1056 DisjointFlagsTy DisjointFlags, DebugLoc DL = {})
1057 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1058 DisjointFlags(DisjointFlags) {}
1060 protected:
1061 template <typename IterT>
1062 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1063 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1064 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1065 GEPFlags(GEPFlags) {}
1067 public:
1068 static inline bool classof(const VPRecipeBase *R) {
1069 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1070 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1071 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1072 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1073 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1074 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1075 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1076 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1079 static inline bool classof(const VPUser *U) {
1080 auto *R = dyn_cast<VPRecipeBase>(U);
1081 return R && classof(R);
1084 /// Drop all poison-generating flags.
1085 void dropPoisonGeneratingFlags() {
1086 // NOTE: This needs to be kept in-sync with
1087 // Instruction::dropPoisonGeneratingFlags.
1088 switch (OpType) {
1089 case OperationType::OverflowingBinOp:
1090 WrapFlags.HasNUW = false;
1091 WrapFlags.HasNSW = false;
1092 break;
1093 case OperationType::DisjointOp:
1094 DisjointFlags.IsDisjoint = false;
1095 break;
1096 case OperationType::PossiblyExactOp:
1097 ExactFlags.IsExact = false;
1098 break;
1099 case OperationType::GEPOp:
1100 GEPFlags.IsInBounds = false;
1101 break;
1102 case OperationType::FPMathOp:
1103 FMFs.NoNaNs = false;
1104 FMFs.NoInfs = false;
1105 break;
1106 case OperationType::NonNegOp:
1107 NonNegFlags.NonNeg = false;
1108 break;
1109 case OperationType::Cmp:
1110 case OperationType::Other:
1111 break;
1115 /// Set the IR flags for \p I.
1116 void setFlags(Instruction *I) const {
1117 switch (OpType) {
1118 case OperationType::OverflowingBinOp:
1119 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1120 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1121 break;
1122 case OperationType::DisjointOp:
1123 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1124 break;
1125 case OperationType::PossiblyExactOp:
1126 I->setIsExact(ExactFlags.IsExact);
1127 break;
1128 case OperationType::GEPOp:
1129 // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1130 cast<GetElementPtrInst>(I)->setNoWrapFlags(
1131 GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
1132 : GEPNoWrapFlags::none());
1133 break;
1134 case OperationType::FPMathOp:
1135 I->setHasAllowReassoc(FMFs.AllowReassoc);
1136 I->setHasNoNaNs(FMFs.NoNaNs);
1137 I->setHasNoInfs(FMFs.NoInfs);
1138 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1139 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1140 I->setHasAllowContract(FMFs.AllowContract);
1141 I->setHasApproxFunc(FMFs.ApproxFunc);
1142 break;
1143 case OperationType::NonNegOp:
1144 I->setNonNeg(NonNegFlags.NonNeg);
1145 break;
1146 case OperationType::Cmp:
1147 case OperationType::Other:
1148 break;
1152 CmpInst::Predicate getPredicate() const {
1153 assert(OpType == OperationType::Cmp &&
1154 "recipe doesn't have a compare predicate");
1155 return CmpPredicate;
1158 bool isInBounds() const {
1159 assert(OpType == OperationType::GEPOp &&
1160 "recipe doesn't have inbounds flag");
1161 return GEPFlags.IsInBounds;
1164 /// Returns true if the recipe has fast-math flags.
1165 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1167 FastMathFlags getFastMathFlags() const;
1169 bool hasNoUnsignedWrap() const {
1170 assert(OpType == OperationType::OverflowingBinOp &&
1171 "recipe doesn't have a NUW flag");
1172 return WrapFlags.HasNUW;
1175 bool hasNoSignedWrap() const {
1176 assert(OpType == OperationType::OverflowingBinOp &&
1177 "recipe doesn't have a NSW flag");
1178 return WrapFlags.HasNSW;
1181 bool isDisjoint() const {
1182 assert(OpType == OperationType::DisjointOp &&
1183 "recipe cannot have a disjoing flag");
1184 return DisjointFlags.IsDisjoint;
1187 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1188 void printFlags(raw_ostream &O) const;
1189 #endif
1192 /// Helper to access the operand that contains the unroll part for this recipe
1193 /// after unrolling.
1194 template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1195 protected:
1196 /// Return the VPValue operand containing the unroll part or null if there is
1197 /// no such operand.
1198 VPValue *getUnrollPartOperand(VPUser &U) const;
1200 /// Return the unroll part.
1201 unsigned getUnrollPart(VPUser &U) const;
1204 /// This is a concrete Recipe that models a single VPlan-level instruction.
1205 /// While as any Recipe it may generate a sequence of IR instructions when
1206 /// executed, these instructions would always form a single-def expression as
1207 /// the VPInstruction is also a single def-use vertex.
1208 class VPInstruction : public VPRecipeWithIRFlags,
1209 public VPUnrollPartAccessor<1> {
1210 friend class VPlanSlp;
1212 public:
1213 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1214 enum {
1215 FirstOrderRecurrenceSplice =
1216 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1217 // values of a first-order recurrence.
1218 Not,
1219 SLPLoad,
1220 SLPStore,
1221 ActiveLaneMask,
1222 ExplicitVectorLength,
1223 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1224 /// The first operand is the incoming value from the predecessor in VPlan,
1225 /// the second operand is the incoming value for all other predecessors
1226 /// (which are currently not modeled in VPlan).
1227 ResumePhi,
1228 CalculateTripCountMinusVF,
1229 // Increment the canonical IV separately for each unrolled part.
1230 CanonicalIVIncrementForPart,
1231 BranchOnCount,
1232 BranchOnCond,
1233 ComputeReductionResult,
1234 // Takes the VPValue to extract from as first operand and the lane or part
1235 // to extract as second operand, counting from the end starting with 1 for
1236 // last. The second operand must be a positive constant and <= VF.
1237 ExtractFromEnd,
1238 LogicalAnd, // Non-poison propagating logical And.
1239 // Add an offset in bytes (second operand) to a base pointer (first
1240 // operand). Only generates scalar values (either for the first lane only or
1241 // for all lanes, depending on its uses).
1242 PtrAdd,
1243 // Returns a scalar boolean value, which is true if any lane of its single
1244 // operand is true.
1245 AnyOf,
1248 private:
1249 typedef unsigned char OpcodeTy;
1250 OpcodeTy Opcode;
1252 /// An optional name that can be used for the generated IR instruction.
1253 const std::string Name;
1255 /// Returns true if this VPInstruction generates scalar values for all lanes.
1256 /// Most VPInstructions generate a single value per part, either vector or
1257 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1258 /// values per all lanes, stemming from an original ingredient. This method
1259 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1260 /// underlying ingredient.
1261 bool doesGeneratePerAllLanes() const;
1263 /// Returns true if we can generate a scalar for the first lane only if
1264 /// needed.
1265 bool canGenerateScalarForFirstLane() const;
1267 /// Utility methods serving execute(): generates a single vector instance of
1268 /// the modeled instruction. \returns the generated value. . In some cases an
1269 /// existing value is returned rather than a generated one.
1270 Value *generate(VPTransformState &State);
1272 /// Utility methods serving execute(): generates a scalar single instance of
1273 /// the modeled instruction for a given lane. \returns the scalar generated
1274 /// value for lane \p Lane.
1275 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1277 #if !defined(NDEBUG)
1278 /// Return true if the VPInstruction is a floating point math operation, i.e.
1279 /// has fast-math flags.
1280 bool isFPMathOp() const;
1281 #endif
1283 public:
1284 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
1285 const Twine &Name = "")
1286 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1287 Opcode(Opcode), Name(Name.str()) {}
1289 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1290 DebugLoc DL = {}, const Twine &Name = "")
1291 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
1293 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1294 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1296 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1297 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1298 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1299 Opcode(Opcode), Name(Name.str()) {}
1301 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1302 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1303 const Twine &Name = "")
1304 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1305 Opcode(Opcode), Name(Name.str()) {
1306 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1309 VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags,
1310 DebugLoc DL = {}, const Twine &Name = "")
1311 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1312 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1313 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1315 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1316 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1318 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1320 VPInstruction *clone() override {
1321 SmallVector<VPValue *, 2> Operands(operands());
1322 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1323 New->transferFlags(*this);
1324 return New;
1327 unsigned getOpcode() const { return Opcode; }
1329 /// Generate the instruction.
1330 /// TODO: We currently execute only per-part unless a specific instance is
1331 /// provided.
1332 void execute(VPTransformState &State) override;
1334 /// Return the cost of this VPInstruction.
1335 InstructionCost computeCost(ElementCount VF,
1336 VPCostContext &Ctx) const override {
1337 // TODO: Compute accurate cost after retiring the legacy cost model.
1338 return 0;
1341 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1342 /// Print the VPInstruction to \p O.
1343 void print(raw_ostream &O, const Twine &Indent,
1344 VPSlotTracker &SlotTracker) const override;
1346 /// Print the VPInstruction to dbgs() (for debugging).
1347 LLVM_DUMP_METHOD void dump() const;
1348 #endif
1350 /// Return true if this instruction may modify memory.
1351 bool mayWriteToMemory() const {
1352 // TODO: we can use attributes of the called function to rule out memory
1353 // modifications.
1354 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1355 Opcode == Instruction::Invoke || Opcode == SLPStore;
1358 bool hasResult() const {
1359 // CallInst may or may not have a result, depending on the called function.
1360 // Conservatively return calls have results for now.
1361 switch (getOpcode()) {
1362 case Instruction::Ret:
1363 case Instruction::Br:
1364 case Instruction::Store:
1365 case Instruction::Switch:
1366 case Instruction::IndirectBr:
1367 case Instruction::Resume:
1368 case Instruction::CatchRet:
1369 case Instruction::Unreachable:
1370 case Instruction::Fence:
1371 case Instruction::AtomicRMW:
1372 case VPInstruction::BranchOnCond:
1373 case VPInstruction::BranchOnCount:
1374 return false;
1375 default:
1376 return true;
1380 /// Returns true if the recipe only uses the first lane of operand \p Op.
1381 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1383 /// Returns true if the recipe only uses the first part of operand \p Op.
1384 bool onlyFirstPartUsed(const VPValue *Op) const override;
1386 /// Returns true if this VPInstruction produces a scalar value from a vector,
1387 /// e.g. by performing a reduction or extracting a lane.
1388 bool isVectorToScalar() const;
1390 /// Returns true if this VPInstruction's operands are single scalars and the
1391 /// result is also a single scalar.
1392 bool isSingleScalar() const;
1394 /// Returns the symbolic name assigned to the VPInstruction.
1395 StringRef getName() const { return Name; }
1398 /// A recipe to wrap on original IR instruction not to be modified during
1399 /// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1400 /// and it is used to add a new incoming value for the single predecessor VPBB.
1401 /// Expect PHIs, VPIRInstructions cannot have any operands.
1402 class VPIRInstruction : public VPRecipeBase {
1403 Instruction &I;
1405 public:
1406 VPIRInstruction(Instruction &I)
1407 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1409 ~VPIRInstruction() override = default;
1411 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1413 VPIRInstruction *clone() override {
1414 auto *R = new VPIRInstruction(I);
1415 for (auto *Op : operands())
1416 R->addOperand(Op);
1417 return R;
1420 void execute(VPTransformState &State) override;
1422 /// Return the cost of this VPIRInstruction.
1423 InstructionCost computeCost(ElementCount VF,
1424 VPCostContext &Ctx) const override;
1426 Instruction &getInstruction() const { return I; }
1428 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1429 /// Print the recipe.
1430 void print(raw_ostream &O, const Twine &Indent,
1431 VPSlotTracker &SlotTracker) const override;
1432 #endif
1434 bool usesScalars(const VPValue *Op) const override {
1435 assert(is_contained(operands(), Op) &&
1436 "Op must be an operand of the recipe");
1437 return true;
1440 bool onlyFirstPartUsed(const VPValue *Op) const override {
1441 assert(is_contained(operands(), Op) &&
1442 "Op must be an operand of the recipe");
1443 return true;
1447 /// VPWidenRecipe is a recipe for producing a widened instruction using the
1448 /// opcode and operands of the recipe. This recipe covers most of the
1449 /// traditional vectorization cases where each recipe transforms into a
1450 /// vectorized version of itself.
1451 class VPWidenRecipe : public VPRecipeWithIRFlags {
1452 unsigned Opcode;
1454 protected:
1455 template <typename IterT>
1456 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1457 iterator_range<IterT> Operands)
1458 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1460 public:
1461 template <typename IterT>
1462 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1463 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1465 ~VPWidenRecipe() override = default;
1467 VPWidenRecipe *clone() override {
1468 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1469 R->transferFlags(*this);
1470 return R;
1473 static inline bool classof(const VPRecipeBase *R) {
1474 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1475 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1478 static inline bool classof(const VPUser *U) {
1479 auto *R = dyn_cast<VPRecipeBase>(U);
1480 return R && classof(R);
1483 /// Produce a widened instruction using the opcode and operands of the recipe,
1484 /// processing State.VF elements.
1485 void execute(VPTransformState &State) override;
1487 /// Return the cost of this VPWidenRecipe.
1488 InstructionCost computeCost(ElementCount VF,
1489 VPCostContext &Ctx) const override;
1491 unsigned getOpcode() const { return Opcode; }
1493 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1494 /// Print the recipe.
1495 void print(raw_ostream &O, const Twine &Indent,
1496 VPSlotTracker &SlotTracker) const override;
1497 #endif
1500 /// A recipe for widening operations with vector-predication intrinsics with
1501 /// explicit vector length (EVL).
1502 class VPWidenEVLRecipe : public VPWidenRecipe {
1503 using VPRecipeWithIRFlags::transferFlags;
1505 public:
1506 template <typename IterT>
1507 VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1508 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1509 addOperand(&EVL);
1511 VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
1512 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1513 transferFlags(W);
1516 ~VPWidenEVLRecipe() override = default;
1518 VPWidenRecipe *clone() override final {
1519 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1520 return nullptr;
1523 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1525 VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1526 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1528 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1529 /// processing EVL elements.
1530 void execute(VPTransformState &State) override final;
1532 /// Returns true if the recipe only uses the first lane of operand \p Op.
1533 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1534 assert(is_contained(operands(), Op) &&
1535 "Op must be an operand of the recipe");
1536 // EVL in that recipe is always the last operand, thus any use before means
1537 // the VPValue should be vectorized.
1538 return getEVL() == Op;
1541 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1542 /// Print the recipe.
1543 void print(raw_ostream &O, const Twine &Indent,
1544 VPSlotTracker &SlotTracker) const override final;
1545 #endif
1548 /// VPWidenCastRecipe is a recipe to create vector cast instructions.
1549 class VPWidenCastRecipe : public VPRecipeWithIRFlags {
1550 /// Cast instruction opcode.
1551 Instruction::CastOps Opcode;
1553 /// Result type for the cast.
1554 Type *ResultTy;
1556 public:
1557 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1558 CastInst &UI)
1559 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1560 ResultTy(ResultTy) {
1561 assert(UI.getOpcode() == Opcode &&
1562 "opcode of underlying cast doesn't match");
1565 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1566 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1567 ResultTy(ResultTy) {}
1569 ~VPWidenCastRecipe() override = default;
1571 VPWidenCastRecipe *clone() override {
1572 if (auto *UV = getUnderlyingValue())
1573 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1574 *cast<CastInst>(UV));
1576 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1579 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1581 /// Produce widened copies of the cast.
1582 void execute(VPTransformState &State) override;
1584 /// Return the cost of this VPWidenCastRecipe.
1585 InstructionCost computeCost(ElementCount VF,
1586 VPCostContext &Ctx) const override;
1588 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1589 /// Print the recipe.
1590 void print(raw_ostream &O, const Twine &Indent,
1591 VPSlotTracker &SlotTracker) const override;
1592 #endif
1594 Instruction::CastOps getOpcode() const { return Opcode; }
1596 /// Returns the result type of the cast.
1597 Type *getResultType() const { return ResultTy; }
1600 /// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1601 class VPScalarCastRecipe : public VPSingleDefRecipe {
1602 Instruction::CastOps Opcode;
1604 Type *ResultTy;
1606 Value *generate(VPTransformState &State);
1608 public:
1609 VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1610 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1611 ResultTy(ResultTy) {}
1613 ~VPScalarCastRecipe() override = default;
1615 VPScalarCastRecipe *clone() override {
1616 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1619 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1621 void execute(VPTransformState &State) override;
1623 /// Return the cost of this VPScalarCastRecipe.
1624 InstructionCost computeCost(ElementCount VF,
1625 VPCostContext &Ctx) const override {
1626 // TODO: Compute accurate cost after retiring the legacy cost model.
1627 return 0;
1630 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1631 void print(raw_ostream &O, const Twine &Indent,
1632 VPSlotTracker &SlotTracker) const override;
1633 #endif
1635 /// Returns the result type of the cast.
1636 Type *getResultType() const { return ResultTy; }
1638 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1639 // At the moment, only uniform codegen is implemented.
1640 assert(is_contained(operands(), Op) &&
1641 "Op must be an operand of the recipe");
1642 return true;
1646 /// A recipe for widening vector intrinsics.
1647 class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
1648 /// ID of the vector intrinsic to widen.
1649 Intrinsic::ID VectorIntrinsicID;
1651 /// Scalar return type of the intrinsic.
1652 Type *ResultTy;
1654 /// True if the intrinsic may read from memory.
1655 bool MayReadFromMemory;
1657 /// True if the intrinsic may read write to memory.
1658 bool MayWriteToMemory;
1660 /// True if the intrinsic may have side-effects.
1661 bool MayHaveSideEffects;
1663 public:
1664 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1665 ArrayRef<VPValue *> CallArguments, Type *Ty,
1666 DebugLoc DL = {})
1667 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1668 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1669 MayReadFromMemory(CI.mayReadFromMemory()),
1670 MayWriteToMemory(CI.mayWriteToMemory()),
1671 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1673 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1674 ArrayRef<VPValue *> CallArguments, Type *Ty,
1675 DebugLoc DL = {})
1676 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
1677 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1678 LLVMContext &Ctx = Ty->getContext();
1679 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1680 MemoryEffects ME = Attrs.getMemoryEffects();
1681 MayReadFromMemory = ME.onlyWritesMemory();
1682 MayWriteToMemory = ME.onlyReadsMemory();
1683 MayHaveSideEffects = MayWriteToMemory ||
1684 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1685 !Attrs.hasFnAttr(Attribute::WillReturn);
1688 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1689 std::initializer_list<VPValue *> CallArguments,
1690 Type *Ty, DebugLoc DL = {})
1691 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1692 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1694 ~VPWidenIntrinsicRecipe() override = default;
1696 VPWidenIntrinsicRecipe *clone() override {
1697 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1698 VectorIntrinsicID, {op_begin(), op_end()},
1699 ResultTy, getDebugLoc());
1702 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1704 /// Produce a widened version of the vector intrinsic.
1705 void execute(VPTransformState &State) override;
1707 /// Return the cost of this vector intrinsic.
1708 InstructionCost computeCost(ElementCount VF,
1709 VPCostContext &Ctx) const override;
1711 /// Return the scalar return type of the intrinsic.
1712 Type *getResultType() const { return ResultTy; }
1714 /// Return to name of the intrinsic as string.
1715 StringRef getIntrinsicName() const;
1717 /// Returns true if the intrinsic may read from memory.
1718 bool mayReadFromMemory() const { return MayReadFromMemory; }
1720 /// Returns true if the intrinsic may write to memory.
1721 bool mayWriteToMemory() const { return MayWriteToMemory; }
1723 /// Returns true if the intrinsic may have side-effects.
1724 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1726 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1727 /// Print the recipe.
1728 void print(raw_ostream &O, const Twine &Indent,
1729 VPSlotTracker &SlotTracker) const override;
1730 #endif
1732 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1735 /// A recipe for widening Call instructions using library calls.
1736 class VPWidenCallRecipe : public VPRecipeWithIRFlags {
1737 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1738 /// between a given VF and the chosen vectorized variant, so there will be a
1739 /// different VPlan for each VF with a valid variant.
1740 Function *Variant;
1742 public:
1743 VPWidenCallRecipe(Value *UV, Function *Variant,
1744 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1745 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1746 *cast<Instruction>(UV)),
1747 Variant(Variant) {
1748 assert(
1749 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1750 "last operand must be the called function");
1753 ~VPWidenCallRecipe() override = default;
1755 VPWidenCallRecipe *clone() override {
1756 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1757 {op_begin(), op_end()}, getDebugLoc());
1760 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1762 /// Produce a widened version of the call instruction.
1763 void execute(VPTransformState &State) override;
1765 /// Return the cost of this VPWidenCallRecipe.
1766 InstructionCost computeCost(ElementCount VF,
1767 VPCostContext &Ctx) const override;
1769 Function *getCalledScalarFunction() const {
1770 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1773 operand_range arg_operands() {
1774 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1776 const_operand_range arg_operands() const {
1777 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1780 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1781 /// Print the recipe.
1782 void print(raw_ostream &O, const Twine &Indent,
1783 VPSlotTracker &SlotTracker) const override;
1784 #endif
1787 /// A recipe representing a sequence of load -> update -> store as part of
1788 /// a histogram operation. This means there may be aliasing between vector
1789 /// lanes, which is handled by the llvm.experimental.vector.histogram family
1790 /// of intrinsics. The only update operations currently supported are
1791 /// 'add' and 'sub' where the other term is loop-invariant.
1792 class VPHistogramRecipe : public VPRecipeBase {
1793 /// Opcode of the update operation, currently either add or sub.
1794 unsigned Opcode;
1796 public:
1797 template <typename IterT>
1798 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1799 DebugLoc DL = {})
1800 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1802 ~VPHistogramRecipe() override = default;
1804 VPHistogramRecipe *clone() override {
1805 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1808 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1810 /// Produce a vectorized histogram operation.
1811 void execute(VPTransformState &State) override;
1813 /// Return the cost of this VPHistogramRecipe.
1814 InstructionCost computeCost(ElementCount VF,
1815 VPCostContext &Ctx) const override;
1817 unsigned getOpcode() const { return Opcode; }
1819 /// Return the mask operand if one was provided, or a null pointer if all
1820 /// lanes should be executed unconditionally.
1821 VPValue *getMask() const {
1822 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1825 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1826 /// Print the recipe
1827 void print(raw_ostream &O, const Twine &Indent,
1828 VPSlotTracker &SlotTracker) const override;
1829 #endif
1832 /// A recipe for widening select instructions.
1833 struct VPWidenSelectRecipe : public VPSingleDefRecipe {
1834 template <typename IterT>
1835 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1836 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1837 I.getDebugLoc()) {}
1839 ~VPWidenSelectRecipe() override = default;
1841 VPWidenSelectRecipe *clone() override {
1842 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1843 operands());
1846 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1848 /// Produce a widened version of the select instruction.
1849 void execute(VPTransformState &State) override;
1851 /// Return the cost of this VPWidenSelectRecipe.
1852 InstructionCost computeCost(ElementCount VF,
1853 VPCostContext &Ctx) const override;
1855 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1856 /// Print the recipe.
1857 void print(raw_ostream &O, const Twine &Indent,
1858 VPSlotTracker &SlotTracker) const override;
1859 #endif
1861 VPValue *getCond() const {
1862 return getOperand(0);
1865 bool isInvariantCond() const {
1866 return getCond()->isDefinedOutsideLoopRegions();
1870 /// A recipe for handling GEP instructions.
1871 class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
1872 bool isPointerLoopInvariant() const {
1873 return getOperand(0)->isDefinedOutsideLoopRegions();
1876 bool isIndexLoopInvariant(unsigned I) const {
1877 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1880 bool areAllOperandsInvariant() const {
1881 return all_of(operands(), [](VPValue *Op) {
1882 return Op->isDefinedOutsideLoopRegions();
1886 public:
1887 template <typename IterT>
1888 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
1889 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1891 ~VPWidenGEPRecipe() override = default;
1893 VPWidenGEPRecipe *clone() override {
1894 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1895 operands());
1898 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1900 /// Generate the gep nodes.
1901 void execute(VPTransformState &State) override;
1903 /// Return the cost of this VPWidenGEPRecipe.
1904 InstructionCost computeCost(ElementCount VF,
1905 VPCostContext &Ctx) const override {
1906 // TODO: Compute accurate cost after retiring the legacy cost model.
1907 return 0;
1910 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 /// Print the recipe.
1912 void print(raw_ostream &O, const Twine &Indent,
1913 VPSlotTracker &SlotTracker) const override;
1914 #endif
1917 /// A recipe to compute the pointers for widened memory accesses of IndexTy
1918 /// in reverse order.
1919 class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
1920 public VPUnrollPartAccessor<2> {
1921 Type *IndexedTy;
1923 public:
1924 VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
1925 bool IsInBounds, DebugLoc DL)
1926 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1927 ArrayRef<VPValue *>({Ptr, VF}),
1928 GEPFlagsTy(IsInBounds), DL),
1929 IndexedTy(IndexedTy) {}
1931 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1933 VPValue *getVFValue() { return getOperand(1); }
1934 const VPValue *getVFValue() const { return getOperand(1); }
1936 void execute(VPTransformState &State) override;
1938 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1939 assert(is_contained(operands(), Op) &&
1940 "Op must be an operand of the recipe");
1941 return true;
1944 /// Return the cost of this VPVectorPointerRecipe.
1945 InstructionCost computeCost(ElementCount VF,
1946 VPCostContext &Ctx) const override {
1947 // TODO: Compute accurate cost after retiring the legacy cost model.
1948 return 0;
1951 /// Returns true if the recipe only uses the first part of operand \p Op.
1952 bool onlyFirstPartUsed(const VPValue *Op) const override {
1953 assert(is_contained(operands(), Op) &&
1954 "Op must be an operand of the recipe");
1955 assert(getNumOperands() <= 2 && "must have at most two operands");
1956 return true;
1959 VPReverseVectorPointerRecipe *clone() override {
1960 return new VPReverseVectorPointerRecipe(
1961 getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
1964 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1965 /// Print the recipe.
1966 void print(raw_ostream &O, const Twine &Indent,
1967 VPSlotTracker &SlotTracker) const override;
1968 #endif
1971 /// A recipe to compute the pointers for widened memory accesses of IndexTy.
1972 class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
1973 public VPUnrollPartAccessor<1> {
1974 Type *IndexedTy;
1976 public:
1977 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
1978 DebugLoc DL)
1979 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1980 GEPFlagsTy(IsInBounds), DL),
1981 IndexedTy(IndexedTy) {}
1983 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1985 void execute(VPTransformState &State) override;
1987 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1988 assert(is_contained(operands(), Op) &&
1989 "Op must be an operand of the recipe");
1990 return true;
1993 /// Returns true if the recipe only uses the first part of operand \p Op.
1994 bool onlyFirstPartUsed(const VPValue *Op) const override {
1995 assert(is_contained(operands(), Op) &&
1996 "Op must be an operand of the recipe");
1997 assert(getNumOperands() <= 2 && "must have at most two operands");
1998 return true;
2001 VPVectorPointerRecipe *clone() override {
2002 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
2003 getDebugLoc());
2006 /// Return the cost of this VPHeaderPHIRecipe.
2007 InstructionCost computeCost(ElementCount VF,
2008 VPCostContext &Ctx) const override {
2009 // TODO: Compute accurate cost after retiring the legacy cost model.
2010 return 0;
2013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2014 /// Print the recipe.
2015 void print(raw_ostream &O, const Twine &Indent,
2016 VPSlotTracker &SlotTracker) const override;
2017 #endif
2020 /// A pure virtual base class for all recipes modeling header phis, including
2021 /// phis for first order recurrences, pointer inductions and reductions. The
2022 /// start value is the first operand of the recipe and the incoming value from
2023 /// the backedge is the second operand.
2025 /// Inductions are modeled using the following sub-classes:
2026 /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2027 /// starting at a specified value (zero for the main vector loop, the resume
2028 /// value for the epilogue vector loop) and stepping by 1. The induction
2029 /// controls exiting of the vector loop by comparing against the vector trip
2030 /// count. Produces a single scalar PHI for the induction value per
2031 /// iteration.
2032 /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2033 /// floating point inductions with arbitrary start and step values. Produces
2034 /// a vector PHI per-part.
2035 /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2036 /// value of an IV with different start and step values. Produces a single
2037 /// scalar value per iteration
2038 /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2039 /// canonical or derived induction.
2040 /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2041 /// pointer induction. Produces either a vector PHI per-part or scalar values
2042 /// per-lane based on the canonical induction.
2043 class VPHeaderPHIRecipe : public VPSingleDefRecipe {
2044 protected:
2045 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2046 VPValue *Start = nullptr, DebugLoc DL = {})
2047 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2048 if (Start)
2049 addOperand(Start);
2052 public:
2053 ~VPHeaderPHIRecipe() override = default;
2055 /// Method to support type inquiry through isa, cast, and dyn_cast.
2056 static inline bool classof(const VPRecipeBase *B) {
2057 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2058 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2060 static inline bool classof(const VPValue *V) {
2061 auto *B = V->getDefiningRecipe();
2062 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2063 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2066 /// Generate the phi nodes.
2067 void execute(VPTransformState &State) override = 0;
2069 /// Return the cost of this header phi recipe.
2070 InstructionCost computeCost(ElementCount VF,
2071 VPCostContext &Ctx) const override;
2073 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2074 /// Print the recipe.
2075 void print(raw_ostream &O, const Twine &Indent,
2076 VPSlotTracker &SlotTracker) const override = 0;
2077 #endif
2079 /// Returns the start value of the phi, if one is set.
2080 VPValue *getStartValue() {
2081 return getNumOperands() == 0 ? nullptr : getOperand(0);
2083 VPValue *getStartValue() const {
2084 return getNumOperands() == 0 ? nullptr : getOperand(0);
2087 /// Update the start value of the recipe.
2088 void setStartValue(VPValue *V) { setOperand(0, V); }
2090 /// Returns the incoming value from the loop backedge.
2091 virtual VPValue *getBackedgeValue() {
2092 return getOperand(1);
2095 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2096 /// to be a recipe.
2097 virtual VPRecipeBase &getBackedgeRecipe() {
2098 return *getBackedgeValue()->getDefiningRecipe();
2102 /// A recipe for handling phi nodes of integer and floating-point inductions,
2103 /// producing their vector values.
2104 class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
2105 PHINode *IV;
2106 TruncInst *Trunc;
2107 const InductionDescriptor &IndDesc;
2109 public:
2110 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
2111 VPValue *VF, const InductionDescriptor &IndDesc)
2112 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
2113 Trunc(nullptr), IndDesc(IndDesc) {
2114 addOperand(Step);
2115 addOperand(VF);
2118 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
2119 VPValue *VF, const InductionDescriptor &IndDesc,
2120 TruncInst *Trunc)
2121 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
2122 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
2123 addOperand(Step);
2124 addOperand(VF);
2127 ~VPWidenIntOrFpInductionRecipe() override = default;
2129 VPWidenIntOrFpInductionRecipe *clone() override {
2130 return new VPWidenIntOrFpInductionRecipe(
2131 IV, getStartValue(), getStepValue(), getVFValue(), IndDesc, Trunc);
2134 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2136 /// Generate the vectorized and scalarized versions of the phi node as
2137 /// needed by their users.
2138 void execute(VPTransformState &State) override;
2140 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2141 /// Print the recipe.
2142 void print(raw_ostream &O, const Twine &Indent,
2143 VPSlotTracker &SlotTracker) const override;
2144 #endif
2146 VPValue *getBackedgeValue() override {
2147 // TODO: All operands of base recipe must exist and be at same index in
2148 // derived recipe.
2149 llvm_unreachable(
2150 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2153 VPRecipeBase &getBackedgeRecipe() override {
2154 // TODO: All operands of base recipe must exist and be at same index in
2155 // derived recipe.
2156 llvm_unreachable(
2157 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2160 /// Returns the step value of the induction.
2161 VPValue *getStepValue() { return getOperand(1); }
2162 const VPValue *getStepValue() const { return getOperand(1); }
2164 VPValue *getVFValue() { return getOperand(2); }
2165 const VPValue *getVFValue() const { return getOperand(2); }
2167 VPValue *getSplatVFValue() {
2168 // If the recipe has been unrolled (4 operands), return the VPValue for the
2169 // induction increment.
2170 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2173 /// Returns the first defined value as TruncInst, if it is one or nullptr
2174 /// otherwise.
2175 TruncInst *getTruncInst() { return Trunc; }
2176 const TruncInst *getTruncInst() const { return Trunc; }
2178 PHINode *getPHINode() { return IV; }
2180 /// Returns the induction descriptor for the recipe.
2181 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2183 /// Returns true if the induction is canonical, i.e. starting at 0 and
2184 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2185 /// same type as the canonical induction.
2186 bool isCanonical() const;
2188 /// Returns the scalar type of the induction.
2189 Type *getScalarType() const {
2190 return Trunc ? Trunc->getType() : IV->getType();
2193 /// Returns the VPValue representing the value of this induction at
2194 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2195 /// take place.
2196 VPValue *getLastUnrolledPartOperand() {
2197 return getNumOperands() == 5 ? getOperand(4) : this;
2201 class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
2202 public VPUnrollPartAccessor<3> {
2203 const InductionDescriptor &IndDesc;
2205 bool IsScalarAfterVectorization;
2207 public:
2208 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2209 /// Start.
2210 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2211 const InductionDescriptor &IndDesc,
2212 bool IsScalarAfterVectorization)
2213 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
2214 IndDesc(IndDesc),
2215 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2216 addOperand(Start);
2217 addOperand(Step);
2220 ~VPWidenPointerInductionRecipe() override = default;
2222 VPWidenPointerInductionRecipe *clone() override {
2223 return new VPWidenPointerInductionRecipe(
2224 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2225 IndDesc, IsScalarAfterVectorization);
2228 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2230 /// Generate vector values for the pointer induction.
2231 void execute(VPTransformState &State) override;
2233 /// Returns true if only scalar values will be generated.
2234 bool onlyScalarsGenerated(bool IsScalable);
2236 /// Returns the induction descriptor for the recipe.
2237 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2239 /// Returns the VPValue representing the value of this induction at
2240 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2241 /// take place.
2242 VPValue *getFirstUnrolledPartOperand() {
2243 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2246 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2247 /// Print the recipe.
2248 void print(raw_ostream &O, const Twine &Indent,
2249 VPSlotTracker &SlotTracker) const override;
2250 #endif
2253 /// Recipe to generate a scalar PHI. Used to generate code for recipes that
2254 /// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2255 /// VPEVLBasedIVPHIRecipe.
2256 class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
2257 std::string Name;
2259 public:
2260 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2261 StringRef Name)
2262 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2263 Name(Name.str()) {
2264 addOperand(BackedgeValue);
2267 ~VPScalarPHIRecipe() override = default;
2269 VPScalarPHIRecipe *clone() override {
2270 llvm_unreachable("cloning not implemented yet");
2273 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2275 /// Generate the phi/select nodes.
2276 void execute(VPTransformState &State) override;
2278 /// Returns true if the recipe only uses the first lane of operand \p Op.
2279 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2280 assert(is_contained(operands(), Op) &&
2281 "Op must be an operand of the recipe");
2282 return true;
2285 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2286 /// Print the recipe.
2287 void print(raw_ostream &O, const Twine &Indent,
2288 VPSlotTracker &SlotTracker) const override;
2289 #endif
2292 /// A recipe for handling phis that are widened in the vector loop.
2293 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2294 /// managed in the recipe directly.
2295 class VPWidenPHIRecipe : public VPSingleDefRecipe {
2296 /// List of incoming blocks. Only used in the VPlan native path.
2297 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2299 public:
2300 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2301 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
2302 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
2303 if (Start)
2304 addOperand(Start);
2307 VPWidenPHIRecipe *clone() override {
2308 llvm_unreachable("cloning not implemented yet");
2311 ~VPWidenPHIRecipe() override = default;
2313 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2315 /// Generate the phi/select nodes.
2316 void execute(VPTransformState &State) override;
2318 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2319 /// Print the recipe.
2320 void print(raw_ostream &O, const Twine &Indent,
2321 VPSlotTracker &SlotTracker) const override;
2322 #endif
2324 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2325 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2326 addOperand(IncomingV);
2327 IncomingBlocks.push_back(IncomingBlock);
2330 /// Returns the \p I th incoming VPBasicBlock.
2331 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2333 /// Returns the \p I th incoming VPValue.
2334 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2337 /// A recipe for handling first-order recurrence phis. The start value is the
2338 /// first operand of the recipe and the incoming value from the backedge is the
2339 /// second operand.
2340 struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
2341 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
2342 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2344 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2346 static inline bool classof(const VPHeaderPHIRecipe *R) {
2347 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2350 VPFirstOrderRecurrencePHIRecipe *clone() override {
2351 return new VPFirstOrderRecurrencePHIRecipe(
2352 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2355 void execute(VPTransformState &State) override;
2357 /// Return the cost of this first-order recurrence phi recipe.
2358 InstructionCost computeCost(ElementCount VF,
2359 VPCostContext &Ctx) const override;
2361 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 /// Print the recipe.
2363 void print(raw_ostream &O, const Twine &Indent,
2364 VPSlotTracker &SlotTracker) const override;
2365 #endif
2368 /// A recipe for handling reduction phis. The start value is the first operand
2369 /// of the recipe and the incoming value from the backedge is the second
2370 /// operand.
2371 class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
2372 public VPUnrollPartAccessor<2> {
2373 /// Descriptor for the reduction.
2374 const RecurrenceDescriptor &RdxDesc;
2376 /// The phi is part of an in-loop reduction.
2377 bool IsInLoop;
2379 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2380 bool IsOrdered;
2382 public:
2383 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2384 /// RdxDesc.
2385 VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
2386 VPValue &Start, bool IsInLoop = false,
2387 bool IsOrdered = false)
2388 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2389 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
2390 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2393 ~VPReductionPHIRecipe() override = default;
2395 VPReductionPHIRecipe *clone() override {
2396 auto *R =
2397 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
2398 *getOperand(0), IsInLoop, IsOrdered);
2399 R->addOperand(getBackedgeValue());
2400 return R;
2403 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2405 static inline bool classof(const VPHeaderPHIRecipe *R) {
2406 return R->getVPDefID() == VPDef::VPReductionPHISC;
2409 /// Generate the phi/select nodes.
2410 void execute(VPTransformState &State) override;
2412 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2413 /// Print the recipe.
2414 void print(raw_ostream &O, const Twine &Indent,
2415 VPSlotTracker &SlotTracker) const override;
2416 #endif
2418 const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2419 return RdxDesc;
2422 /// Returns true, if the phi is part of an ordered reduction.
2423 bool isOrdered() const { return IsOrdered; }
2425 /// Returns true, if the phi is part of an in-loop reduction.
2426 bool isInLoop() const { return IsInLoop; }
2429 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
2430 /// instructions.
2431 class VPBlendRecipe : public VPSingleDefRecipe {
2432 public:
2433 /// The blend operation is a User of the incoming values and of their
2434 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2435 /// be omitted (implied by passing an odd number of operands) in which case
2436 /// all other incoming values are merged into it.
2437 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
2438 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2439 assert(Operands.size() > 0 && "Expected at least one operand!");
2442 VPBlendRecipe *clone() override {
2443 SmallVector<VPValue *> Ops(operands());
2444 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2447 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2449 /// A normalized blend is one that has an odd number of operands, whereby the
2450 /// first operand does not have an associated mask.
2451 bool isNormalized() const { return getNumOperands() % 2; }
2453 /// Return the number of incoming values, taking into account when normalized
2454 /// the first incoming value will have no mask.
2455 unsigned getNumIncomingValues() const {
2456 return (getNumOperands() + isNormalized()) / 2;
2459 /// Return incoming value number \p Idx.
2460 VPValue *getIncomingValue(unsigned Idx) const {
2461 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2464 /// Return mask number \p Idx.
2465 VPValue *getMask(unsigned Idx) const {
2466 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2467 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2470 /// Generate the phi/select nodes.
2471 void execute(VPTransformState &State) override;
2473 /// Return the cost of this VPWidenMemoryRecipe.
2474 InstructionCost computeCost(ElementCount VF,
2475 VPCostContext &Ctx) const override;
2477 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2478 /// Print the recipe.
2479 void print(raw_ostream &O, const Twine &Indent,
2480 VPSlotTracker &SlotTracker) const override;
2481 #endif
2483 /// Returns true if the recipe only uses the first lane of operand \p Op.
2484 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2485 assert(is_contained(operands(), Op) &&
2486 "Op must be an operand of the recipe");
2487 // Recursing through Blend recipes only, must terminate at header phi's the
2488 // latest.
2489 return all_of(users(),
2490 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2494 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2495 /// or stores into one wide load/store and shuffles. The first operand of a
2496 /// VPInterleave recipe is the address, followed by the stored values, followed
2497 /// by an optional mask.
2498 class VPInterleaveRecipe : public VPRecipeBase {
2499 const InterleaveGroup<Instruction> *IG;
2501 /// Indicates if the interleave group is in a conditional block and requires a
2502 /// mask.
2503 bool HasMask = false;
2505 /// Indicates if gaps between members of the group need to be masked out or if
2506 /// unusued gaps can be loaded speculatively.
2507 bool NeedsMaskForGaps = false;
2509 public:
2510 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2511 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2512 bool NeedsMaskForGaps)
2513 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2514 NeedsMaskForGaps(NeedsMaskForGaps) {
2515 for (unsigned i = 0; i < IG->getFactor(); ++i)
2516 if (Instruction *I = IG->getMember(i)) {
2517 if (I->getType()->isVoidTy())
2518 continue;
2519 new VPValue(I, this);
2522 for (auto *SV : StoredValues)
2523 addOperand(SV);
2524 if (Mask) {
2525 HasMask = true;
2526 addOperand(Mask);
2529 ~VPInterleaveRecipe() override = default;
2531 VPInterleaveRecipe *clone() override {
2532 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2533 NeedsMaskForGaps);
2536 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2538 /// Return the address accessed by this recipe.
2539 VPValue *getAddr() const {
2540 return getOperand(0); // Address is the 1st, mandatory operand.
2543 /// Return the mask used by this recipe. Note that a full mask is represented
2544 /// by a nullptr.
2545 VPValue *getMask() const {
2546 // Mask is optional and therefore the last, currently 2nd operand.
2547 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2550 /// Return the VPValues stored by this interleave group. If it is a load
2551 /// interleave group, return an empty ArrayRef.
2552 ArrayRef<VPValue *> getStoredValues() const {
2553 // The first operand is the address, followed by the stored values, followed
2554 // by an optional mask.
2555 return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2556 .slice(1, getNumStoreOperands());
2559 /// Generate the wide load or store, and shuffles.
2560 void execute(VPTransformState &State) override;
2562 /// Return the cost of this VPInterleaveRecipe.
2563 InstructionCost computeCost(ElementCount VF,
2564 VPCostContext &Ctx) const override;
2566 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2567 /// Print the recipe.
2568 void print(raw_ostream &O, const Twine &Indent,
2569 VPSlotTracker &SlotTracker) const override;
2570 #endif
2572 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2574 /// Returns the number of stored operands of this interleave group. Returns 0
2575 /// for load interleave groups.
2576 unsigned getNumStoreOperands() const {
2577 return getNumOperands() - (HasMask ? 2 : 1);
2580 /// The recipe only uses the first lane of the address.
2581 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2582 assert(is_contained(operands(), Op) &&
2583 "Op must be an operand of the recipe");
2584 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2587 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2590 /// A recipe to represent inloop reduction operations, performing a reduction on
2591 /// a vector operand into a scalar value, and adding the result to a chain.
2592 /// The Operands are {ChainOp, VecOp, [Condition]}.
2593 class VPReductionRecipe : public VPSingleDefRecipe {
2594 /// The recurrence decriptor for the reduction in question.
2595 const RecurrenceDescriptor &RdxDesc;
2596 bool IsOrdered;
2597 /// Whether the reduction is conditional.
2598 bool IsConditional = false;
2600 protected:
2601 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2602 Instruction *I, ArrayRef<VPValue *> Operands,
2603 VPValue *CondOp, bool IsOrdered)
2604 : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2605 if (CondOp) {
2606 IsConditional = true;
2607 addOperand(CondOp);
2611 public:
2612 VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
2613 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2614 bool IsOrdered)
2615 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2616 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2617 IsOrdered) {}
2619 ~VPReductionRecipe() override = default;
2621 VPReductionRecipe *clone() override {
2622 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2623 getVecOp(), getCondOp(), IsOrdered);
2626 static inline bool classof(const VPRecipeBase *R) {
2627 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2628 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2631 static inline bool classof(const VPUser *U) {
2632 auto *R = dyn_cast<VPRecipeBase>(U);
2633 return R && classof(R);
2636 /// Generate the reduction in the loop
2637 void execute(VPTransformState &State) override;
2639 /// Return the cost of VPReductionRecipe.
2640 InstructionCost computeCost(ElementCount VF,
2641 VPCostContext &Ctx) const override;
2643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2644 /// Print the recipe.
2645 void print(raw_ostream &O, const Twine &Indent,
2646 VPSlotTracker &SlotTracker) const override;
2647 #endif
2649 /// Return the recurrence decriptor for the in-loop reduction.
2650 const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2651 return RdxDesc;
2653 /// Return true if the in-loop reduction is ordered.
2654 bool isOrdered() const { return IsOrdered; };
2655 /// Return true if the in-loop reduction is conditional.
2656 bool isConditional() const { return IsConditional; };
2657 /// The VPValue of the scalar Chain being accumulated.
2658 VPValue *getChainOp() const { return getOperand(0); }
2659 /// The VPValue of the vector value to be reduced.
2660 VPValue *getVecOp() const { return getOperand(1); }
2661 /// The VPValue of the condition for the block.
2662 VPValue *getCondOp() const {
2663 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2667 /// A recipe to represent inloop reduction operations with vector-predication
2668 /// intrinsics, performing a reduction on a vector operand with the explicit
2669 /// vector length (EVL) into a scalar value, and adding the result to a chain.
2670 /// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2671 class VPReductionEVLRecipe : public VPReductionRecipe {
2672 public:
2673 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
2674 : VPReductionRecipe(
2675 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2676 cast_or_null<Instruction>(R.getUnderlyingValue()),
2677 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2678 R.isOrdered()) {}
2680 ~VPReductionEVLRecipe() override = default;
2682 VPReductionEVLRecipe *clone() override {
2683 llvm_unreachable("cloning not implemented yet");
2686 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2688 /// Generate the reduction in the loop
2689 void execute(VPTransformState &State) override;
2691 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2692 /// Print the recipe.
2693 void print(raw_ostream &O, const Twine &Indent,
2694 VPSlotTracker &SlotTracker) const override;
2695 #endif
2697 /// The VPValue of the explicit vector length.
2698 VPValue *getEVL() const { return getOperand(2); }
2700 /// Returns true if the recipe only uses the first lane of operand \p Op.
2701 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2702 assert(is_contained(operands(), Op) &&
2703 "Op must be an operand of the recipe");
2704 return Op == getEVL();
2708 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
2709 /// copies of the original scalar type, one per lane, instead of producing a
2710 /// single copy of widened type for all lanes. If the instruction is known to be
2711 /// uniform only one copy, per lane zero, will be generated.
2712 class VPReplicateRecipe : public VPRecipeWithIRFlags {
2713 /// Indicator if only a single replica per lane is needed.
2714 bool IsUniform;
2716 /// Indicator if the replicas are also predicated.
2717 bool IsPredicated;
2719 public:
2720 template <typename IterT>
2721 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
2722 bool IsUniform, VPValue *Mask = nullptr)
2723 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2724 IsUniform(IsUniform), IsPredicated(Mask) {
2725 if (Mask)
2726 addOperand(Mask);
2729 ~VPReplicateRecipe() override = default;
2731 VPReplicateRecipe *clone() override {
2732 auto *Copy =
2733 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2734 isPredicated() ? getMask() : nullptr);
2735 Copy->transferFlags(*this);
2736 return Copy;
2739 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2741 /// Generate replicas of the desired Ingredient. Replicas will be generated
2742 /// for all parts and lanes unless a specific part and lane are specified in
2743 /// the \p State.
2744 void execute(VPTransformState &State) override;
2746 /// Return the cost of this VPReplicateRecipe.
2747 InstructionCost computeCost(ElementCount VF,
2748 VPCostContext &Ctx) const override;
2750 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2751 /// Print the recipe.
2752 void print(raw_ostream &O, const Twine &Indent,
2753 VPSlotTracker &SlotTracker) const override;
2754 #endif
2756 bool isUniform() const { return IsUniform; }
2758 bool isPredicated() const { return IsPredicated; }
2760 /// Returns true if the recipe only uses the first lane of operand \p Op.
2761 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2762 assert(is_contained(operands(), Op) &&
2763 "Op must be an operand of the recipe");
2764 return isUniform();
2767 /// Returns true if the recipe uses scalars of operand \p Op.
2768 bool usesScalars(const VPValue *Op) const override {
2769 assert(is_contained(operands(), Op) &&
2770 "Op must be an operand of the recipe");
2771 return true;
2774 /// Returns true if the recipe is used by a widened recipe via an intervening
2775 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2776 /// in a vector.
2777 bool shouldPack() const;
2779 /// Return the mask of a predicated VPReplicateRecipe.
2780 VPValue *getMask() {
2781 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2782 return getOperand(getNumOperands() - 1);
2785 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2788 /// A recipe for generating conditional branches on the bits of a mask.
2789 class VPBranchOnMaskRecipe : public VPRecipeBase {
2790 public:
2791 VPBranchOnMaskRecipe(VPValue *BlockInMask)
2792 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2793 if (BlockInMask) // nullptr means all-one mask.
2794 addOperand(BlockInMask);
2797 VPBranchOnMaskRecipe *clone() override {
2798 return new VPBranchOnMaskRecipe(getOperand(0));
2801 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2803 /// Generate the extraction of the appropriate bit from the block mask and the
2804 /// conditional branch.
2805 void execute(VPTransformState &State) override;
2807 /// Return the cost of this VPBranchOnMaskRecipe.
2808 InstructionCost computeCost(ElementCount VF,
2809 VPCostContext &Ctx) const override;
2811 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2812 /// Print the recipe.
2813 void print(raw_ostream &O, const Twine &Indent,
2814 VPSlotTracker &SlotTracker) const override {
2815 O << Indent << "BRANCH-ON-MASK ";
2816 if (VPValue *Mask = getMask())
2817 Mask->printAsOperand(O, SlotTracker);
2818 else
2819 O << " All-One";
2821 #endif
2823 /// Return the mask used by this recipe. Note that a full mask is represented
2824 /// by a nullptr.
2825 VPValue *getMask() const {
2826 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2827 // Mask is optional.
2828 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2831 /// Returns true if the recipe uses scalars of operand \p Op.
2832 bool usesScalars(const VPValue *Op) const override {
2833 assert(is_contained(operands(), Op) &&
2834 "Op must be an operand of the recipe");
2835 return true;
2839 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2840 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
2841 /// order to merge values that are set under such a branch and feed their uses.
2842 /// The phi nodes can be scalar or vector depending on the users of the value.
2843 /// This recipe works in concert with VPBranchOnMaskRecipe.
2844 class VPPredInstPHIRecipe : public VPSingleDefRecipe {
2845 public:
2846 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2847 /// nodes after merging back from a Branch-on-Mask.
2848 VPPredInstPHIRecipe(VPValue *PredV)
2849 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2850 ~VPPredInstPHIRecipe() override = default;
2852 VPPredInstPHIRecipe *clone() override {
2853 return new VPPredInstPHIRecipe(getOperand(0));
2856 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2858 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2859 /// retain SSA form.
2860 void execute(VPTransformState &State) override;
2862 /// Return the cost of this VPPredInstPHIRecipe.
2863 InstructionCost computeCost(ElementCount VF,
2864 VPCostContext &Ctx) const override {
2865 // TODO: Compute accurate cost after retiring the legacy cost model.
2866 return 0;
2869 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2870 /// Print the recipe.
2871 void print(raw_ostream &O, const Twine &Indent,
2872 VPSlotTracker &SlotTracker) const override;
2873 #endif
2875 /// Returns true if the recipe uses scalars of operand \p Op.
2876 bool usesScalars(const VPValue *Op) const override {
2877 assert(is_contained(operands(), Op) &&
2878 "Op must be an operand of the recipe");
2879 return true;
2883 /// A common base class for widening memory operations. An optional mask can be
2884 /// provided as the last operand.
2885 class VPWidenMemoryRecipe : public VPRecipeBase {
2886 protected:
2887 Instruction &Ingredient;
2889 /// Whether the accessed addresses are consecutive.
2890 bool Consecutive;
2892 /// Whether the consecutive accessed addresses are in reverse order.
2893 bool Reverse;
2895 /// Whether the memory access is masked.
2896 bool IsMasked = false;
2898 void setMask(VPValue *Mask) {
2899 assert(!IsMasked && "cannot re-set mask");
2900 if (!Mask)
2901 return;
2902 addOperand(Mask);
2903 IsMasked = true;
2906 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2907 std::initializer_list<VPValue *> Operands,
2908 bool Consecutive, bool Reverse, DebugLoc DL)
2909 : VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2910 Reverse(Reverse) {
2911 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2914 public:
2915 VPWidenMemoryRecipe *clone() override {
2916 llvm_unreachable("cloning not supported");
2919 static inline bool classof(const VPRecipeBase *R) {
2920 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2921 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2922 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2923 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2926 static inline bool classof(const VPUser *U) {
2927 auto *R = dyn_cast<VPRecipeBase>(U);
2928 return R && classof(R);
2931 /// Return whether the loaded-from / stored-to addresses are consecutive.
2932 bool isConsecutive() const { return Consecutive; }
2934 /// Return whether the consecutive loaded/stored addresses are in reverse
2935 /// order.
2936 bool isReverse() const { return Reverse; }
2938 /// Return the address accessed by this recipe.
2939 VPValue *getAddr() const { return getOperand(0); }
2941 /// Returns true if the recipe is masked.
2942 bool isMasked() const { return IsMasked; }
2944 /// Return the mask used by this recipe. Note that a full mask is represented
2945 /// by a nullptr.
2946 VPValue *getMask() const {
2947 // Mask is optional and therefore the last operand.
2948 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2951 /// Generate the wide load/store.
2952 void execute(VPTransformState &State) override {
2953 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2956 /// Return the cost of this VPWidenMemoryRecipe.
2957 InstructionCost computeCost(ElementCount VF,
2958 VPCostContext &Ctx) const override;
2960 Instruction &getIngredient() const { return Ingredient; }
2963 /// A recipe for widening load operations, using the address to load from and an
2964 /// optional mask.
2965 struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2966 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2967 bool Consecutive, bool Reverse, DebugLoc DL)
2968 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2969 Reverse, DL),
2970 VPValue(this, &Load) {
2971 setMask(Mask);
2974 VPWidenLoadRecipe *clone() override {
2975 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2976 getMask(), Consecutive, Reverse,
2977 getDebugLoc());
2980 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2982 /// Generate a wide load or gather.
2983 void execute(VPTransformState &State) override;
2985 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2986 /// Print the recipe.
2987 void print(raw_ostream &O, const Twine &Indent,
2988 VPSlotTracker &SlotTracker) const override;
2989 #endif
2991 /// Returns true if the recipe only uses the first lane of operand \p Op.
2992 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2993 assert(is_contained(operands(), Op) &&
2994 "Op must be an operand of the recipe");
2995 // Widened, consecutive loads operations only demand the first lane of
2996 // their address.
2997 return Op == getAddr() && isConsecutive();
3001 /// A recipe for widening load operations with vector-predication intrinsics,
3002 /// using the address to load from, the explicit vector length and an optional
3003 /// mask.
3004 struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3005 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
3006 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3007 {L.getAddr(), &EVL}, L.isConsecutive(),
3008 L.isReverse(), L.getDebugLoc()),
3009 VPValue(this, &getIngredient()) {
3010 setMask(Mask);
3013 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3015 /// Return the EVL operand.
3016 VPValue *getEVL() const { return getOperand(1); }
3018 /// Generate the wide load or gather.
3019 void execute(VPTransformState &State) override;
3021 /// Return the cost of this VPWidenLoadEVLRecipe.
3022 InstructionCost computeCost(ElementCount VF,
3023 VPCostContext &Ctx) const override;
3025 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3026 /// Print the recipe.
3027 void print(raw_ostream &O, const Twine &Indent,
3028 VPSlotTracker &SlotTracker) const override;
3029 #endif
3031 /// Returns true if the recipe only uses the first lane of operand \p Op.
3032 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3033 assert(is_contained(operands(), Op) &&
3034 "Op must be an operand of the recipe");
3035 // Widened loads only demand the first lane of EVL and consecutive loads
3036 // only demand the first lane of their address.
3037 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3041 /// A recipe for widening store operations, using the stored value, the address
3042 /// to store to and an optional mask.
3043 struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
3044 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3045 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3046 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3047 Consecutive, Reverse, DL) {
3048 setMask(Mask);
3051 VPWidenStoreRecipe *clone() override {
3052 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3053 getStoredValue(), getMask(), Consecutive,
3054 Reverse, getDebugLoc());
3057 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3059 /// Return the value stored by this recipe.
3060 VPValue *getStoredValue() const { return getOperand(1); }
3062 /// Generate a wide store or scatter.
3063 void execute(VPTransformState &State) override;
3065 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3066 /// Print the recipe.
3067 void print(raw_ostream &O, const Twine &Indent,
3068 VPSlotTracker &SlotTracker) const override;
3069 #endif
3071 /// Returns true if the recipe only uses the first lane of operand \p Op.
3072 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3073 assert(is_contained(operands(), Op) &&
3074 "Op must be an operand of the recipe");
3075 // Widened, consecutive stores only demand the first lane of their address,
3076 // unless the same operand is also stored.
3077 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3081 /// A recipe for widening store operations with vector-predication intrinsics,
3082 /// using the value to store, the address to store to, the explicit vector
3083 /// length and an optional mask.
3084 struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
3085 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
3086 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3087 {S.getAddr(), S.getStoredValue(), &EVL},
3088 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3089 setMask(Mask);
3092 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3094 /// Return the address accessed by this recipe.
3095 VPValue *getStoredValue() const { return getOperand(1); }
3097 /// Return the EVL operand.
3098 VPValue *getEVL() const { return getOperand(2); }
3100 /// Generate the wide store or scatter.
3101 void execute(VPTransformState &State) override;
3103 /// Return the cost of this VPWidenStoreEVLRecipe.
3104 InstructionCost computeCost(ElementCount VF,
3105 VPCostContext &Ctx) const override;
3107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3108 /// Print the recipe.
3109 void print(raw_ostream &O, const Twine &Indent,
3110 VPSlotTracker &SlotTracker) const override;
3111 #endif
3113 /// Returns true if the recipe only uses the first lane of operand \p Op.
3114 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3115 assert(is_contained(operands(), Op) &&
3116 "Op must be an operand of the recipe");
3117 if (Op == getEVL()) {
3118 assert(getStoredValue() != Op && "unexpected store of EVL");
3119 return true;
3121 // Widened, consecutive memory operations only demand the first lane of
3122 // their address, unless the same operand is also stored. That latter can
3123 // happen with opaque pointers.
3124 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3128 /// Recipe to expand a SCEV expression.
3129 class VPExpandSCEVRecipe : public VPSingleDefRecipe {
3130 const SCEV *Expr;
3131 ScalarEvolution &SE;
3133 public:
3134 VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
3135 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3137 ~VPExpandSCEVRecipe() override = default;
3139 VPExpandSCEVRecipe *clone() override {
3140 return new VPExpandSCEVRecipe(Expr, SE);
3143 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3145 /// Generate a canonical vector induction variable of the vector loop, with
3146 void execute(VPTransformState &State) override;
3148 /// Return the cost of this VPExpandSCEVRecipe.
3149 InstructionCost computeCost(ElementCount VF,
3150 VPCostContext &Ctx) const override {
3151 // TODO: Compute accurate cost after retiring the legacy cost model.
3152 return 0;
3155 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3156 /// Print the recipe.
3157 void print(raw_ostream &O, const Twine &Indent,
3158 VPSlotTracker &SlotTracker) const override;
3159 #endif
3161 const SCEV *getSCEV() const { return Expr; }
3164 /// Canonical scalar induction phi of the vector loop. Starting at the specified
3165 /// start value (either 0 or the resume value when vectorizing the epilogue
3166 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3167 /// canonical induction variable.
3168 class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
3169 public:
3170 VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
3171 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3173 ~VPCanonicalIVPHIRecipe() override = default;
3175 VPCanonicalIVPHIRecipe *clone() override {
3176 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3177 R->addOperand(getBackedgeValue());
3178 return R;
3181 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3183 static inline bool classof(const VPHeaderPHIRecipe *D) {
3184 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3187 void execute(VPTransformState &State) override {
3188 llvm_unreachable(
3189 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3192 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3193 /// Print the recipe.
3194 void print(raw_ostream &O, const Twine &Indent,
3195 VPSlotTracker &SlotTracker) const override;
3196 #endif
3198 /// Returns the scalar type of the induction.
3199 Type *getScalarType() const {
3200 return getStartValue()->getLiveInIRValue()->getType();
3203 /// Returns true if the recipe only uses the first lane of operand \p Op.
3204 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3205 assert(is_contained(operands(), Op) &&
3206 "Op must be an operand of the recipe");
3207 return true;
3210 /// Returns true if the recipe only uses the first part of operand \p Op.
3211 bool onlyFirstPartUsed(const VPValue *Op) const override {
3212 assert(is_contained(operands(), Op) &&
3213 "Op must be an operand of the recipe");
3214 return true;
3217 /// Check if the induction described by \p Kind, /p Start and \p Step is
3218 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
3219 bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
3220 VPValue *Step) const;
3222 /// Return the cost of this VPCanonicalIVPHIRecipe.
3223 InstructionCost computeCost(ElementCount VF,
3224 VPCostContext &Ctx) const override {
3225 // For now, match the behavior of the legacy cost model.
3226 return 0;
3230 /// A recipe for generating the active lane mask for the vector loop that is
3231 /// used to predicate the vector operations.
3232 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3233 /// remove VPActiveLaneMaskPHIRecipe.
3234 class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
3235 public:
3236 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3237 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3238 DL) {}
3240 ~VPActiveLaneMaskPHIRecipe() override = default;
3242 VPActiveLaneMaskPHIRecipe *clone() override {
3243 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
3244 if (getNumOperands() == 2)
3245 R->addOperand(getOperand(1));
3246 return R;
3249 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3251 static inline bool classof(const VPHeaderPHIRecipe *D) {
3252 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3255 /// Generate the active lane mask phi of the vector loop.
3256 void execute(VPTransformState &State) override;
3258 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3259 /// Print the recipe.
3260 void print(raw_ostream &O, const Twine &Indent,
3261 VPSlotTracker &SlotTracker) const override;
3262 #endif
3265 /// A recipe for generating the phi node for the current index of elements,
3266 /// adjusted in accordance with EVL value. It starts at the start value of the
3267 /// canonical induction and gets incremented by EVL in each iteration of the
3268 /// vector loop.
3269 class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
3270 public:
3271 VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
3272 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3274 ~VPEVLBasedIVPHIRecipe() override = default;
3276 VPEVLBasedIVPHIRecipe *clone() override {
3277 llvm_unreachable("cloning not implemented yet");
3280 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3282 static inline bool classof(const VPHeaderPHIRecipe *D) {
3283 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3286 void execute(VPTransformState &State) override {
3287 llvm_unreachable(
3288 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3291 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3292 InstructionCost computeCost(ElementCount VF,
3293 VPCostContext &Ctx) const override {
3294 // For now, match the behavior of the legacy cost model.
3295 return 0;
3298 /// Returns true if the recipe only uses the first lane of operand \p Op.
3299 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3300 assert(is_contained(operands(), Op) &&
3301 "Op must be an operand of the recipe");
3302 return true;
3305 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3306 /// Print the recipe.
3307 void print(raw_ostream &O, const Twine &Indent,
3308 VPSlotTracker &SlotTracker) const override;
3309 #endif
3312 /// A Recipe for widening the canonical induction variable of the vector loop.
3313 class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
3314 public VPUnrollPartAccessor<1> {
3315 public:
3316 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
3317 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3319 ~VPWidenCanonicalIVRecipe() override = default;
3321 VPWidenCanonicalIVRecipe *clone() override {
3322 return new VPWidenCanonicalIVRecipe(
3323 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3326 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3328 /// Generate a canonical vector induction variable of the vector loop, with
3329 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3330 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3331 void execute(VPTransformState &State) override;
3333 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3334 InstructionCost computeCost(ElementCount VF,
3335 VPCostContext &Ctx) const override {
3336 // TODO: Compute accurate cost after retiring the legacy cost model.
3337 return 0;
3340 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3341 /// Print the recipe.
3342 void print(raw_ostream &O, const Twine &Indent,
3343 VPSlotTracker &SlotTracker) const override;
3344 #endif
3347 /// A recipe for converting the input value \p IV value to the corresponding
3348 /// value of an IV with different start and step values, using Start + IV *
3349 /// Step.
3350 class VPDerivedIVRecipe : public VPSingleDefRecipe {
3351 /// Kind of the induction.
3352 const InductionDescriptor::InductionKind Kind;
3353 /// If not nullptr, the floating point induction binary operator. Must be set
3354 /// for floating point inductions.
3355 const FPMathOperator *FPBinOp;
3357 /// Name to use for the generated IR instruction for the derived IV.
3358 std::string Name;
3360 public:
3361 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
3362 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3363 const Twine &Name = "")
3364 : VPDerivedIVRecipe(
3365 IndDesc.getKind(),
3366 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3367 Start, CanonicalIV, Step, Name) {}
3369 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
3370 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3371 VPValue *Step, const Twine &Name = "")
3372 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3373 FPBinOp(FPBinOp), Name(Name.str()) {}
3375 ~VPDerivedIVRecipe() override = default;
3377 VPDerivedIVRecipe *clone() override {
3378 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3379 getStepValue());
3382 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3384 /// Generate the transformed value of the induction at offset StartValue (1.
3385 /// operand) + IV (2. operand) * StepValue (3, operand).
3386 void execute(VPTransformState &State) override;
3388 /// Return the cost of this VPDerivedIVRecipe.
3389 InstructionCost computeCost(ElementCount VF,
3390 VPCostContext &Ctx) const override {
3391 // TODO: Compute accurate cost after retiring the legacy cost model.
3392 return 0;
3395 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3396 /// Print the recipe.
3397 void print(raw_ostream &O, const Twine &Indent,
3398 VPSlotTracker &SlotTracker) const override;
3399 #endif
3401 Type *getScalarType() const {
3402 return getStartValue()->getLiveInIRValue()->getType();
3405 VPValue *getStartValue() const { return getOperand(0); }
3406 VPValue *getStepValue() const { return getOperand(2); }
3408 /// Returns true if the recipe only uses the first lane of operand \p Op.
3409 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3410 assert(is_contained(operands(), Op) &&
3411 "Op must be an operand of the recipe");
3412 return true;
3416 /// A recipe for handling phi nodes of integer and floating-point inductions,
3417 /// producing their scalar values.
3418 class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
3419 public VPUnrollPartAccessor<2> {
3420 Instruction::BinaryOps InductionOpcode;
3422 public:
3423 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
3424 Instruction::BinaryOps Opcode, FastMathFlags FMFs)
3425 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3426 ArrayRef<VPValue *>({IV, Step}), FMFs),
3427 InductionOpcode(Opcode) {}
3429 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
3430 VPValue *Step)
3431 : VPScalarIVStepsRecipe(
3432 IV, Step, IndDesc.getInductionOpcode(),
3433 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3434 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3435 : FastMathFlags()) {}
3437 ~VPScalarIVStepsRecipe() override = default;
3439 VPScalarIVStepsRecipe *clone() override {
3440 return new VPScalarIVStepsRecipe(
3441 getOperand(0), getOperand(1), InductionOpcode,
3442 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
3445 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3447 /// Generate the scalarized versions of the phi node as needed by their users.
3448 void execute(VPTransformState &State) override;
3450 /// Return the cost of this VPScalarIVStepsRecipe.
3451 InstructionCost computeCost(ElementCount VF,
3452 VPCostContext &Ctx) const override {
3453 // TODO: Compute accurate cost after retiring the legacy cost model.
3454 return 0;
3457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3458 /// Print the recipe.
3459 void print(raw_ostream &O, const Twine &Indent,
3460 VPSlotTracker &SlotTracker) const override;
3461 #endif
3463 VPValue *getStepValue() const { return getOperand(1); }
3465 /// Returns true if the recipe only uses the first lane of operand \p Op.
3466 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3467 assert(is_contained(operands(), Op) &&
3468 "Op must be an operand of the recipe");
3469 return true;
3473 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3474 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
3475 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3476 class VPBasicBlock : public VPBlockBase {
3477 public:
3478 using RecipeListTy = iplist<VPRecipeBase>;
3480 protected:
3481 /// The VPRecipes held in the order of output instructions to generate.
3482 RecipeListTy Recipes;
3484 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3485 : VPBlockBase(BlockSC, Name.str()) {}
3487 public:
3488 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3489 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3490 if (Recipe)
3491 appendRecipe(Recipe);
3494 ~VPBasicBlock() override {
3495 while (!Recipes.empty())
3496 Recipes.pop_back();
3499 /// Instruction iterators...
3500 using iterator = RecipeListTy::iterator;
3501 using const_iterator = RecipeListTy::const_iterator;
3502 using reverse_iterator = RecipeListTy::reverse_iterator;
3503 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
3505 //===--------------------------------------------------------------------===//
3506 /// Recipe iterator methods
3508 inline iterator begin() { return Recipes.begin(); }
3509 inline const_iterator begin() const { return Recipes.begin(); }
3510 inline iterator end() { return Recipes.end(); }
3511 inline const_iterator end() const { return Recipes.end(); }
3513 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3514 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3515 inline reverse_iterator rend() { return Recipes.rend(); }
3516 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3518 inline size_t size() const { return Recipes.size(); }
3519 inline bool empty() const { return Recipes.empty(); }
3520 inline const VPRecipeBase &front() const { return Recipes.front(); }
3521 inline VPRecipeBase &front() { return Recipes.front(); }
3522 inline const VPRecipeBase &back() const { return Recipes.back(); }
3523 inline VPRecipeBase &back() { return Recipes.back(); }
3525 /// Returns a reference to the list of recipes.
3526 RecipeListTy &getRecipeList() { return Recipes; }
3528 /// Returns a pointer to a member of the recipe list.
3529 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
3530 return &VPBasicBlock::Recipes;
3533 /// Method to support type inquiry through isa, cast, and dyn_cast.
3534 static inline bool classof(const VPBlockBase *V) {
3535 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3536 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3539 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3540 assert(Recipe && "No recipe to append.");
3541 assert(!Recipe->Parent && "Recipe already in VPlan");
3542 Recipe->Parent = this;
3543 Recipes.insert(InsertPt, Recipe);
3546 /// Augment the existing recipes of a VPBasicBlock with an additional
3547 /// \p Recipe as the last recipe.
3548 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3550 /// The method which generates the output IR instructions that correspond to
3551 /// this VPBasicBlock, thereby "executing" the VPlan.
3552 void execute(VPTransformState *State) override;
3554 /// Return the cost of this VPBasicBlock.
3555 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3557 /// Return the position of the first non-phi node recipe in the block.
3558 iterator getFirstNonPhi();
3560 /// Returns an iterator range over the PHI-like recipes in the block.
3561 iterator_range<iterator> phis() {
3562 return make_range(begin(), getFirstNonPhi());
3565 void dropAllReferences(VPValue *NewValue) override;
3567 /// Split current block at \p SplitAt by inserting a new block between the
3568 /// current block and its successors and moving all recipes starting at
3569 /// SplitAt to the new block. Returns the new block.
3570 VPBasicBlock *splitAt(iterator SplitAt);
3572 VPRegionBlock *getEnclosingLoopRegion();
3573 const VPRegionBlock *getEnclosingLoopRegion() const;
3575 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3576 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3577 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3579 /// Note that the numbering is applied to the whole VPlan, so printing
3580 /// individual blocks is consistent with the whole VPlan printing.
3581 void print(raw_ostream &O, const Twine &Indent,
3582 VPSlotTracker &SlotTracker) const override;
3583 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3584 #endif
3586 /// If the block has multiple successors, return the branch recipe terminating
3587 /// the block. If there are no or only a single successor, return nullptr;
3588 VPRecipeBase *getTerminator();
3589 const VPRecipeBase *getTerminator() const;
3591 /// Returns true if the block is exiting it's parent region.
3592 bool isExiting() const;
3594 /// Clone the current block and it's recipes, without updating the operands of
3595 /// the cloned recipes.
3596 VPBasicBlock *clone() override {
3597 auto *NewBlock = new VPBasicBlock(getName());
3598 for (VPRecipeBase &R : *this)
3599 NewBlock->appendRecipe(R.clone());
3600 return NewBlock;
3603 protected:
3604 /// Execute the recipes in the IR basic block \p BB.
3605 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3607 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3608 /// generated for this VPBB.
3609 void connectToPredecessors(VPTransformState::CFGState &CFG);
3611 private:
3612 /// Create an IR BasicBlock to hold the output instructions generated by this
3613 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3614 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3617 /// A special type of VPBasicBlock that wraps an existing IR basic block.
3618 /// Recipes of the block get added before the first non-phi instruction in the
3619 /// wrapped block.
3620 /// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3621 /// preheader block.
3622 class VPIRBasicBlock : public VPBasicBlock {
3623 BasicBlock *IRBB;
3625 public:
3626 VPIRBasicBlock(BasicBlock *IRBB)
3627 : VPBasicBlock(VPIRBasicBlockSC,
3628 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3629 IRBB(IRBB) {}
3631 ~VPIRBasicBlock() override {}
3633 static inline bool classof(const VPBlockBase *V) {
3634 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3637 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
3638 /// instructions in \p IRBB, except its terminator which is managed in VPlan.
3639 static VPIRBasicBlock *fromBasicBlock(BasicBlock *IRBB);
3641 /// The method which generates the output IR instructions that correspond to
3642 /// this VPBasicBlock, thereby "executing" the VPlan.
3643 void execute(VPTransformState *State) override;
3645 VPIRBasicBlock *clone() override {
3646 auto *NewBlock = new VPIRBasicBlock(IRBB);
3647 for (VPRecipeBase &R : Recipes)
3648 NewBlock->appendRecipe(R.clone());
3649 return NewBlock;
3652 BasicBlock *getIRBasicBlock() const { return IRBB; }
3655 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3656 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3657 /// A VPRegionBlock may indicate that its contents are to be replicated several
3658 /// times. This is designed to support predicated scalarization, in which a
3659 /// scalar if-then code structure needs to be generated VF * UF times. Having
3660 /// this replication indicator helps to keep a single model for multiple
3661 /// candidate VF's. The actual replication takes place only once the desired VF
3662 /// and UF have been determined.
3663 class VPRegionBlock : public VPBlockBase {
3664 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3665 VPBlockBase *Entry;
3667 /// Hold the Single Exiting block of the SESE region modelled by the
3668 /// VPRegionBlock.
3669 VPBlockBase *Exiting;
3671 /// An indicator whether this region is to generate multiple replicated
3672 /// instances of output IR corresponding to its VPBlockBases.
3673 bool IsReplicator;
3675 public:
3676 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
3677 const std::string &Name = "", bool IsReplicator = false)
3678 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3679 IsReplicator(IsReplicator) {
3680 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3681 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3682 Entry->setParent(this);
3683 Exiting->setParent(this);
3685 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3686 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3687 IsReplicator(IsReplicator) {}
3689 ~VPRegionBlock() override {
3690 if (Entry) {
3691 VPValue DummyValue;
3692 Entry->dropAllReferences(&DummyValue);
3693 deleteCFG(Entry);
3697 /// Method to support type inquiry through isa, cast, and dyn_cast.
3698 static inline bool classof(const VPBlockBase *V) {
3699 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3702 const VPBlockBase *getEntry() const { return Entry; }
3703 VPBlockBase *getEntry() { return Entry; }
3705 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3706 /// EntryBlock must have no predecessors.
3707 void setEntry(VPBlockBase *EntryBlock) {
3708 assert(EntryBlock->getPredecessors().empty() &&
3709 "Entry block cannot have predecessors.");
3710 Entry = EntryBlock;
3711 EntryBlock->setParent(this);
3714 const VPBlockBase *getExiting() const { return Exiting; }
3715 VPBlockBase *getExiting() { return Exiting; }
3717 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3718 /// ExitingBlock must have no successors.
3719 void setExiting(VPBlockBase *ExitingBlock) {
3720 assert(ExitingBlock->getSuccessors().empty() &&
3721 "Exit block cannot have successors.");
3722 Exiting = ExitingBlock;
3723 ExitingBlock->setParent(this);
3726 /// Returns the pre-header VPBasicBlock of the loop region.
3727 VPBasicBlock *getPreheaderVPBB() {
3728 assert(!isReplicator() && "should only get pre-header of loop regions");
3729 return getSinglePredecessor()->getExitingBasicBlock();
3732 /// An indicator whether this region is to generate multiple replicated
3733 /// instances of output IR corresponding to its VPBlockBases.
3734 bool isReplicator() const { return IsReplicator; }
3736 /// The method which generates the output IR instructions that correspond to
3737 /// this VPRegionBlock, thereby "executing" the VPlan.
3738 void execute(VPTransformState *State) override;
3740 // Return the cost of this region.
3741 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3743 void dropAllReferences(VPValue *NewValue) override;
3745 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3746 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3747 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3748 /// consequtive numbers.
3750 /// Note that the numbering is applied to the whole VPlan, so printing
3751 /// individual regions is consistent with the whole VPlan printing.
3752 void print(raw_ostream &O, const Twine &Indent,
3753 VPSlotTracker &SlotTracker) const override;
3754 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3755 #endif
3757 /// Clone all blocks in the single-entry single-exit region of the block and
3758 /// their recipes without updating the operands of the cloned recipes.
3759 VPRegionBlock *clone() override;
3762 /// VPlan models a candidate for vectorization, encoding various decisions take
3763 /// to produce efficient output IR, including which branches, basic-blocks and
3764 /// output IR instructions to generate, and their cost. VPlan holds a
3765 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3766 /// VPBasicBlock.
3767 class VPlan {
3768 friend class VPlanPrinter;
3769 friend class VPSlotTracker;
3771 /// VPBasicBlock corresponding to the original preheader. Used to place
3772 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3773 /// rest of VPlan execution.
3774 /// When this VPlan is used for the epilogue vector loop, the entry will be
3775 /// replaced by a new entry block created during skeleton creation.
3776 VPBasicBlock *Entry;
3778 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3779 VPIRBasicBlock *ScalarHeader;
3781 /// Holds the VFs applicable to this VPlan.
3782 SmallSetVector<ElementCount, 2> VFs;
3784 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3785 /// any UF.
3786 SmallSetVector<unsigned, 2> UFs;
3788 /// Holds the name of the VPlan, for printing.
3789 std::string Name;
3791 /// Represents the trip count of the original loop, for folding
3792 /// the tail.
3793 VPValue *TripCount = nullptr;
3795 /// Represents the backedge taken count of the original loop, for folding
3796 /// the tail. It equals TripCount - 1.
3797 VPValue *BackedgeTakenCount = nullptr;
3799 /// Represents the vector trip count.
3800 VPValue VectorTripCount;
3802 /// Represents the vectorization factor of the loop.
3803 VPValue VF;
3805 /// Represents the loop-invariant VF * UF of the vector loop region.
3806 VPValue VFxUF;
3808 /// Holds a mapping between Values and their corresponding VPValue inside
3809 /// VPlan.
3810 Value2VPValueTy Value2VPValue;
3812 /// Contains all the external definitions created for this VPlan. External
3813 /// definitions are VPValues that hold a pointer to their underlying IR.
3814 SmallVector<VPValue *, 16> VPLiveInsToFree;
3816 /// Mapping from SCEVs to the VPValues representing their expansions.
3817 /// NOTE: This mapping is temporary and will be removed once all users have
3818 /// been modeled in VPlan directly.
3819 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3821 public:
3822 /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and
3823 /// with \p ScalarHeader wrapping the original header of the scalar loop.
3824 VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
3825 : VPlan(Entry, ScalarHeader) {
3826 TripCount = TC;
3829 /// Constructor variants that take disconnected preheader and entry blocks,
3830 /// connecting them as part of construction.
3831 /// FIXME: Only used to reduce the need of code changes during transition.
3832 VPlan(VPBasicBlock *OriginalPreheader, VPValue *TC,
3833 VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader);
3834 VPlan(VPBasicBlock *OriginalPreheader, VPBasicBlock *EntryVectorPreHeader,
3835 VPIRBasicBlock *ScalarHeader);
3837 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3838 /// wrapping the original header of the scalar loop.
3839 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3840 : Entry(Entry), ScalarHeader(ScalarHeader) {
3841 Entry->setPlan(this);
3842 assert(ScalarHeader->getNumSuccessors() == 0 &&
3843 "scalar header must be a leaf node");
3846 ~VPlan();
3848 void setEntry(VPBasicBlock *VPBB) {
3849 Entry = VPBB;
3850 VPBB->setPlan(this);
3853 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3854 /// original scalar pre-header) which contains SCEV expansions that need
3855 /// to happen before the CFG is modified (when executing a VPlan for the
3856 /// epilogue vector loop, the original entry needs to be replaced by a new
3857 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3858 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3859 /// to guard executing the scalar epilogue loop, it will be added to the
3860 /// middle block, together with VPBasicBlocks for the scalar preheader and
3861 /// exit blocks. \p InductionTy is the type of the canonical induction and
3862 /// used for related values, like the trip count expression.
3863 static VPlanPtr createInitialVPlan(Type *InductionTy,
3864 PredicatedScalarEvolution &PSE,
3865 bool RequiresScalarEpilogueCheck,
3866 bool TailFolded, Loop *TheLoop);
3868 /// Prepare the plan for execution, setting up the required live-in values.
3869 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3870 VPTransformState &State);
3872 /// Generate the IR code for this VPlan.
3873 void execute(VPTransformState *State);
3875 /// Return the cost of this plan.
3876 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
3878 VPBasicBlock *getEntry() { return Entry; }
3879 const VPBasicBlock *getEntry() const { return Entry; }
3881 /// Returns the preheader of the vector loop region.
3882 VPBasicBlock *getVectorPreheader() {
3883 return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
3886 /// Returns the VPRegionBlock of the vector loop.
3887 VPRegionBlock *getVectorLoopRegion();
3888 const VPRegionBlock *getVectorLoopRegion() const;
3890 /// Returns the 'middle' block of the plan, that is the block that selects
3891 /// whether to execute the scalar tail loop or the exit block from the loop
3892 /// latch.
3893 const VPBasicBlock *getMiddleBlock() const {
3894 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3896 VPBasicBlock *getMiddleBlock() {
3897 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3900 /// Return the VPBasicBlock for the preheader of the scalar loop.
3901 VPBasicBlock *getScalarPreheader() const {
3902 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3905 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3906 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3908 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3909 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3910 /// VPlanHCFG, as the definition of the type needs access to the definitions
3911 /// of VPBlockShallowTraversalWrapper.
3912 auto getExitBlocks();
3914 /// The trip count of the original loop.
3915 VPValue *getTripCount() const {
3916 assert(TripCount && "trip count needs to be set before accessing it");
3917 return TripCount;
3920 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3921 /// the original trip count have been replaced.
3922 void resetTripCount(VPValue *NewTripCount) {
3923 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3924 "TripCount always must be set");
3925 TripCount = NewTripCount;
3928 /// The backedge taken count of the original loop.
3929 VPValue *getOrCreateBackedgeTakenCount() {
3930 if (!BackedgeTakenCount)
3931 BackedgeTakenCount = new VPValue();
3932 return BackedgeTakenCount;
3935 /// The vector trip count.
3936 VPValue &getVectorTripCount() { return VectorTripCount; }
3938 /// Returns the VF of the vector loop region.
3939 VPValue &getVF() { return VF; };
3941 /// Returns VF * UF of the vector loop region.
3942 VPValue &getVFxUF() { return VFxUF; }
3944 void addVF(ElementCount VF) { VFs.insert(VF); }
3946 void setVF(ElementCount VF) {
3947 assert(hasVF(VF) && "Cannot set VF not already in plan");
3948 VFs.clear();
3949 VFs.insert(VF);
3952 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3953 bool hasScalableVF() {
3954 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3957 /// Returns an iterator range over all VFs of the plan.
3958 iterator_range<SmallSetVector<ElementCount, 2>::iterator>
3959 vectorFactors() const {
3960 return {VFs.begin(), VFs.end()};
3963 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3965 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3967 unsigned getUF() const {
3968 assert(UFs.size() == 1 && "Expected a single UF");
3969 return UFs[0];
3972 void setUF(unsigned UF) {
3973 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3974 UFs.clear();
3975 UFs.insert(UF);
3978 /// Return a string with the name of the plan and the applicable VFs and UFs.
3979 std::string getName() const;
3981 void setName(const Twine &newName) { Name = newName.str(); }
3983 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3984 /// yet) for \p V.
3985 VPValue *getOrAddLiveIn(Value *V) {
3986 assert(V && "Trying to get or add the VPValue of a null Value");
3987 if (!Value2VPValue.count(V)) {
3988 VPValue *VPV = new VPValue(V);
3989 VPLiveInsToFree.push_back(VPV);
3990 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3991 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3992 Value2VPValue[V] = VPV;
3995 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3996 assert(Value2VPValue[V]->isLiveIn() &&
3997 "Only live-ins should be in mapping");
3998 return Value2VPValue[V];
4001 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4002 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4004 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4005 /// Print the live-ins of this VPlan to \p O.
4006 void printLiveIns(raw_ostream &O) const;
4008 /// Print this VPlan to \p O.
4009 void print(raw_ostream &O) const;
4011 /// Print this VPlan in DOT format to \p O.
4012 void printDOT(raw_ostream &O) const;
4014 /// Dump the plan to stderr (for debugging).
4015 LLVM_DUMP_METHOD void dump() const;
4016 #endif
4018 /// Returns the canonical induction recipe of the vector loop.
4019 VPCanonicalIVPHIRecipe *getCanonicalIV() {
4020 VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
4021 if (EntryVPBB->empty()) {
4022 // VPlan native path.
4023 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4025 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4028 VPValue *getSCEVExpansion(const SCEV *S) const {
4029 return SCEVToExpansion.lookup(S);
4032 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4033 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4034 SCEVToExpansion[S] = V;
4037 /// \return The block corresponding to the original preheader.
4038 /// FIXME: There's no separate preheader any longer and Entry now serves the
4039 /// same purpose as the original preheader. Remove after transition.
4040 VPBasicBlock *getPreheader() { return Entry; }
4041 const VPBasicBlock *getPreheader() const { return Entry; }
4043 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4044 /// recipes to refer to the clones, and return it.
4045 VPlan *duplicate();
4048 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4049 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4050 /// indented and follows the dot format.
4051 class VPlanPrinter {
4052 raw_ostream &OS;
4053 const VPlan &Plan;
4054 unsigned Depth = 0;
4055 unsigned TabWidth = 2;
4056 std::string Indent;
4057 unsigned BID = 0;
4058 SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
4060 VPSlotTracker SlotTracker;
4062 /// Handle indentation.
4063 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4065 /// Print a given \p Block of the Plan.
4066 void dumpBlock(const VPBlockBase *Block);
4068 /// Print the information related to the CFG edges going out of a given
4069 /// \p Block, followed by printing the successor blocks themselves.
4070 void dumpEdges(const VPBlockBase *Block);
4072 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4073 /// its successor blocks.
4074 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4076 /// Print a given \p Region of the Plan.
4077 void dumpRegion(const VPRegionBlock *Region);
4079 unsigned getOrCreateBID(const VPBlockBase *Block) {
4080 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4083 Twine getOrCreateName(const VPBlockBase *Block);
4085 Twine getUID(const VPBlockBase *Block);
4087 /// Print the information related to a CFG edge between two VPBlockBases.
4088 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4089 const Twine &Label);
4091 public:
4092 VPlanPrinter(raw_ostream &O, const VPlan &P)
4093 : OS(O), Plan(P), SlotTracker(&P) {}
4095 LLVM_DUMP_METHOD void dump();
4098 struct VPlanIngredient {
4099 const Value *V;
4101 VPlanIngredient(const Value *V) : V(V) {}
4103 void print(raw_ostream &O) const;
4106 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
4107 I.print(OS);
4108 return OS;
4111 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4112 Plan.print(OS);
4113 return OS;
4115 #endif
4117 //===----------------------------------------------------------------------===//
4118 // VPlan Utilities
4119 //===----------------------------------------------------------------------===//
4121 /// Class that provides utilities for VPBlockBases in VPlan.
4122 class VPBlockUtils {
4123 public:
4124 VPBlockUtils() = delete;
4126 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
4127 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
4128 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
4129 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
4130 /// have neither successors nor predecessors.
4131 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4132 assert(NewBlock->getSuccessors().empty() &&
4133 NewBlock->getPredecessors().empty() &&
4134 "Can't insert new block with predecessors or successors.");
4135 NewBlock->setParent(BlockPtr->getParent());
4136 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
4137 for (VPBlockBase *Succ : Succs) {
4138 disconnectBlocks(BlockPtr, Succ);
4139 connectBlocks(NewBlock, Succ);
4141 connectBlocks(BlockPtr, NewBlock);
4144 /// Insert disconnected block \p NewBlock before \p Blockptr. First
4145 /// disconnects all predecessors of \p BlockPtr and connects them to \p
4146 /// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as
4147 /// successor of \p NewBlock.
4148 static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4149 assert(NewBlock->getSuccessors().empty() &&
4150 NewBlock->getPredecessors().empty() &&
4151 "Can't insert new block with predecessors or successors.");
4152 NewBlock->setParent(BlockPtr->getParent());
4153 for (VPBlockBase *Pred : to_vector(BlockPtr->predecessors())) {
4154 disconnectBlocks(Pred, BlockPtr);
4155 connectBlocks(Pred, NewBlock);
4157 connectBlocks(NewBlock, BlockPtr);
4160 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
4161 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
4162 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
4163 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
4164 /// and \p IfTrue and \p IfFalse must have neither successors nor
4165 /// predecessors.
4166 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
4167 VPBlockBase *BlockPtr) {
4168 assert(IfTrue->getSuccessors().empty() &&
4169 "Can't insert IfTrue with successors.");
4170 assert(IfFalse->getSuccessors().empty() &&
4171 "Can't insert IfFalse with successors.");
4172 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
4173 IfTrue->setPredecessors({BlockPtr});
4174 IfFalse->setPredecessors({BlockPtr});
4175 IfTrue->setParent(BlockPtr->getParent());
4176 IfFalse->setParent(BlockPtr->getParent());
4179 /// Connect VPBlockBases \p From and \p To bi-directionally. If \p PredIdx is
4180 /// -1, append \p From to the predecessors of \p To, otherwise set \p To's
4181 /// predecessor at \p PredIdx to \p From. If \p SuccIdx is -1, append \p To to
4182 /// the successors of \p From, otherwise set \p From's successor at \p SuccIdx
4183 /// to \p To. Both VPBlockBases must have the same parent, which can be null.
4184 /// Both VPBlockBases can be already connected to other VPBlockBases.
4185 static void connectBlocks(VPBlockBase *From, VPBlockBase *To,
4186 unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
4187 assert((From->getParent() == To->getParent()) &&
4188 "Can't connect two block with different parents");
4189 assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
4190 "Blocks can't have more than two successors.");
4191 if (SuccIdx == -1u)
4192 From->appendSuccessor(To);
4193 else
4194 From->getSuccessors()[SuccIdx] = To;
4196 if (PredIdx == -1u)
4197 To->appendPredecessor(From);
4198 else
4199 To->getPredecessors()[PredIdx] = From;
4202 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
4203 /// from the successors of \p From and \p From from the predecessors of \p To.
4204 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
4205 assert(To && "Successor to disconnect is null.");
4206 From->removeSuccessor(To);
4207 To->removePredecessor(From);
4210 /// Reassociate all the blocks connected to \p Old so that they now point to
4211 /// \p New.
4212 static void reassociateBlocks(VPBlockBase *Old, VPBlockBase *New) {
4213 for (auto *Pred : to_vector(Old->getPredecessors()))
4214 Pred->replaceSuccessor(Old, New);
4215 for (auto *Succ : to_vector(Old->getSuccessors()))
4216 Succ->replacePredecessor(Old, New);
4217 New->setPredecessors(Old->getPredecessors());
4218 New->setSuccessors(Old->getSuccessors());
4219 Old->clearPredecessors();
4220 Old->clearSuccessors();
4223 /// Return an iterator range over \p Range which only includes \p BlockTy
4224 /// blocks. The accesses are casted to \p BlockTy.
4225 template <typename BlockTy, typename T>
4226 static auto blocksOnly(const T &Range) {
4227 // Create BaseTy with correct const-ness based on BlockTy.
4228 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
4229 const VPBlockBase, VPBlockBase>;
4231 // We need to first create an iterator range over (const) BlocktTy & instead
4232 // of (const) BlockTy * for filter_range to work properly.
4233 auto Mapped =
4234 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
4235 auto Filter = make_filter_range(
4236 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
4237 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
4238 return cast<BlockTy>(&Block);
4242 /// Inserts \p BlockPtr on the edge between \p From and \p To. That is, update
4243 /// \p From's successor to \p To to point to \p BlockPtr and \p To's
4244 /// predecessor from \p From to \p BlockPtr. \p From and \p To are added to \p
4245 /// BlockPtr's predecessors and successors respectively. There must be a
4246 /// single edge between \p From and \p To.
4247 static void insertOnEdge(VPBlockBase *From, VPBlockBase *To,
4248 VPBlockBase *BlockPtr) {
4249 auto &Successors = From->getSuccessors();
4250 auto &Predecessors = To->getPredecessors();
4251 assert(count(Successors, To) == 1 && count(Predecessors, From) == 1 &&
4252 "must have single between From and To");
4253 unsigned SuccIdx = std::distance(Successors.begin(), find(Successors, To));
4254 unsigned PredIx =
4255 std::distance(Predecessors.begin(), find(Predecessors, From));
4256 VPBlockUtils::connectBlocks(From, BlockPtr, -1, SuccIdx);
4257 VPBlockUtils::connectBlocks(BlockPtr, To, PredIx, -1);
4261 class VPInterleavedAccessInfo {
4262 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
4263 InterleaveGroupMap;
4265 /// Type for mapping of instruction based interleave groups to VPInstruction
4266 /// interleave groups
4267 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
4268 InterleaveGroup<VPInstruction> *>;
4270 /// Recursively \p Region and populate VPlan based interleave groups based on
4271 /// \p IAI.
4272 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4273 InterleavedAccessInfo &IAI);
4274 /// Recursively traverse \p Block and populate VPlan based interleave groups
4275 /// based on \p IAI.
4276 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4277 InterleavedAccessInfo &IAI);
4279 public:
4280 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
4282 ~VPInterleavedAccessInfo() {
4283 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
4284 // Avoid releasing a pointer twice.
4285 for (auto &I : InterleaveGroupMap)
4286 DelSet.insert(I.second);
4287 for (auto *Ptr : DelSet)
4288 delete Ptr;
4291 /// Get the interleave group that \p Instr belongs to.
4293 /// \returns nullptr if doesn't have such group.
4294 InterleaveGroup<VPInstruction> *
4295 getInterleaveGroup(VPInstruction *Instr) const {
4296 return InterleaveGroupMap.lookup(Instr);
4300 /// Class that maps (parts of) an existing VPlan to trees of combined
4301 /// VPInstructions.
4302 class VPlanSlp {
4303 enum class OpMode { Failed, Load, Opcode };
4305 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4306 /// DenseMap keys.
4307 struct BundleDenseMapInfo {
4308 static SmallVector<VPValue *, 4> getEmptyKey() {
4309 return {reinterpret_cast<VPValue *>(-1)};
4312 static SmallVector<VPValue *, 4> getTombstoneKey() {
4313 return {reinterpret_cast<VPValue *>(-2)};
4316 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4317 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4320 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4321 const SmallVector<VPValue *, 4> &RHS) {
4322 return LHS == RHS;
4326 /// Mapping of values in the original VPlan to a combined VPInstruction.
4327 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
4328 BundleToCombined;
4330 VPInterleavedAccessInfo &IAI;
4332 /// Basic block to operate on. For now, only instructions in a single BB are
4333 /// considered.
4334 const VPBasicBlock &BB;
4336 /// Indicates whether we managed to combine all visited instructions or not.
4337 bool CompletelySLP = true;
4339 /// Width of the widest combined bundle in bits.
4340 unsigned WidestBundleBits = 0;
4342 using MultiNodeOpTy =
4343 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4345 // Input operand bundles for the current multi node. Each multi node operand
4346 // bundle contains values not matching the multi node's opcode. They will
4347 // be reordered in reorderMultiNodeOps, once we completed building a
4348 // multi node.
4349 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4351 /// Indicates whether we are building a multi node currently.
4352 bool MultiNodeActive = false;
4354 /// Check if we can vectorize Operands together.
4355 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4357 /// Add combined instruction \p New for the bundle \p Operands.
4358 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4360 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4361 VPInstruction *markFailed();
4363 /// Reorder operands in the multi node to maximize sequential memory access
4364 /// and commutative operations.
4365 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4367 /// Choose the best candidate to use for the lane after \p Last. The set of
4368 /// candidates to choose from are values with an opcode matching \p Last's
4369 /// or loads consecutive to \p Last.
4370 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4371 SmallPtrSetImpl<VPValue *> &Candidates,
4372 VPInterleavedAccessInfo &IAI);
4374 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4375 /// Print bundle \p Values to dbgs().
4376 void dumpBundle(ArrayRef<VPValue *> Values);
4377 #endif
4379 public:
4380 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4382 ~VPlanSlp() = default;
4384 /// Tries to build an SLP tree rooted at \p Operands and returns a
4385 /// VPInstruction combining \p Operands, if they can be combined.
4386 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
4388 /// Return the width of the widest combined bundle in bits.
4389 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4391 /// Return true if all visited instruction can be combined.
4392 bool isCompletelySLP() const { return CompletelySLP; }
4394 } // end namespace llvm
4396 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H