1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 /// VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14 /// within VPBasicBlocks;
15 /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16 /// also inherit from VPValue.
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
23 //===----------------------------------------------------------------------===//
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
28 #include "VPlanAnalysis.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallBitVector.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Twine.h"
35 #include "llvm/ADT/ilist.h"
36 #include "llvm/ADT/ilist_node.h"
37 #include "llvm/Analysis/DomTreeUpdater.h"
38 #include "llvm/Analysis/IVDescriptors.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/TargetTransformInfo.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/FMF.h"
44 #include "llvm/IR/Operator.h"
45 #include "llvm/Support/InstructionCost.h"
55 class InnerLoopVectorizer
;
59 class RecurrenceDescriptor
;
65 class VPReplicateRecipe
;
68 class LoopVectorizationCostModel
;
77 /// Returns a calculation for the total number of elements for a given \p VF.
78 /// For fixed width vectors this value is a constant, whereas for scalable
79 /// vectors it is an expression determined at runtime.
80 Value
*getRuntimeVF(IRBuilderBase
&B
, Type
*Ty
, ElementCount VF
);
82 /// Return a value for Step multiplied by VF.
83 Value
*createStepForVF(IRBuilderBase
&B
, Type
*Ty
, ElementCount VF
,
86 /// A helper function that returns the reciprocal of the block probability of
87 /// predicated blocks. If we return X, we are assuming the predicated block
88 /// will execute once for every X iterations of the loop header.
90 /// TODO: We should use actual block probability here, if available. Currently,
91 /// we always assume predicated blocks have a 50% chance of executing.
92 inline unsigned getReciprocalPredBlockProb() { return 2; }
94 /// A range of powers-of-2 vectorization factors with fixed start and
95 /// adjustable end. The range includes start and excludes end, e.g.,:
96 /// [1, 16) = {1, 2, 4, 8}
99 const ElementCount Start
;
101 // A power of 2. If End <= Start range is empty.
104 bool isEmpty() const {
105 return End
.getKnownMinValue() <= Start
.getKnownMinValue();
108 VFRange(const ElementCount
&Start
, const ElementCount
&End
)
109 : Start(Start
), End(End
) {
110 assert(Start
.isScalable() == End
.isScalable() &&
111 "Both Start and End should have the same scalable flag");
112 assert(isPowerOf2_32(Start
.getKnownMinValue()) &&
113 "Expected Start to be a power of 2");
114 assert(isPowerOf2_32(End
.getKnownMinValue()) &&
115 "Expected End to be a power of 2");
118 /// Iterator to iterate over vectorization factors in a VFRange.
120 : public iterator_facade_base
<iterator
, std::forward_iterator_tag
,
125 iterator(ElementCount VF
) : VF(VF
) {}
127 bool operator==(const iterator
&Other
) const { return VF
== Other
.VF
; }
129 ElementCount
operator*() const { return VF
; }
131 iterator
&operator++() {
137 iterator
begin() { return iterator(Start
); }
139 assert(isPowerOf2_32(End
.getKnownMinValue()));
140 return iterator(End
);
144 using VPlanPtr
= std::unique_ptr
<VPlan
>;
146 /// In what follows, the term "input IR" refers to code that is fed into the
147 /// vectorizer whereas the term "output IR" refers to code that is generated by
150 /// VPLane provides a way to access lanes in both fixed width and scalable
151 /// vectors, where for the latter the lane index sometimes needs calculating
152 /// as a runtime expression.
155 /// Kind describes how to interpret Lane.
156 enum class Kind
: uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
171 /// Indicates how the Lane should be interpreted, as described above.
175 VPLane(unsigned Lane
) : Lane(Lane
), LaneKind(VPLane::Kind::First
) {}
176 VPLane(unsigned Lane
, Kind LaneKind
) : Lane(Lane
), LaneKind(LaneKind
) {}
178 static VPLane
getFirstLane() { return VPLane(0, VPLane::Kind::First
); }
180 static VPLane
getLaneFromEnd(const ElementCount
&VF
, unsigned Offset
) {
181 assert(Offset
> 0 && Offset
<= VF
.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset
= VF
.getKnownMinValue() - Offset
;
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
188 LaneKind
= VPLane::Kind::ScalableLast
;
190 LaneKind
= VPLane::Kind::First
;
191 return VPLane(LaneOffset
, LaneKind
);
194 static VPLane
getLastLaneForVF(const ElementCount
&VF
) {
195 return getLaneFromEnd(VF
, 1);
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind
== Kind::First
);
205 /// Returns an expression describing the lane index that can be used at
207 Value
*getAsRuntimeExpr(IRBuilderBase
&Builder
, const ElementCount
&VF
) const;
209 /// Returns the Kind of lane offset.
210 Kind
getKind() const { return LaneKind
; }
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane
== 0 && LaneKind
== Kind::First
; }
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount
&VF
) const {
218 case VPLane::Kind::ScalableLast
:
219 assert(VF
.isScalable() && Lane
< VF
.getKnownMinValue());
220 return VF
.getKnownMinValue() + Lane
;
222 assert(Lane
< VF
.getKnownMinValue());
227 /// Returns the maxmimum number of lanes that we are able to consider
228 /// caching for \p VF.
229 static unsigned getNumCachedLanes(const ElementCount
&VF
) {
230 return VF
.getKnownMinValue() * (VF
.isScalable() ? 2 : 1);
234 /// VPTransformState holds information passed down when "executing" a VPlan,
235 /// needed for generating the output IR.
236 struct VPTransformState
{
237 VPTransformState(const TargetTransformInfo
*TTI
, ElementCount VF
, unsigned UF
,
238 LoopInfo
*LI
, DominatorTree
*DT
, IRBuilderBase
&Builder
,
239 InnerLoopVectorizer
*ILV
, VPlan
*Plan
, Type
*CanonicalIVTy
);
240 /// Target Transform Info.
241 const TargetTransformInfo
*TTI
;
243 /// The chosen Vectorization Factor of the loop being vectorized.
246 /// Hold the index to generate specific scalar instructions. Null indicates
247 /// that all instances are to be generated, using either scalar or vector
249 std::optional
<VPLane
> Lane
;
252 // Each value from the original loop, when vectorized, is represented by a
253 // vector value in the map.
254 DenseMap
<VPValue
*, Value
*> VPV2Vector
;
256 DenseMap
<VPValue
*, SmallVector
<Value
*, 4>> VPV2Scalars
;
259 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
260 /// is false, otherwise return the generated scalar. \See set.
261 Value
*get(VPValue
*Def
, bool IsScalar
= false);
263 /// Get the generated Value for a given VPValue and given Part and Lane.
264 Value
*get(VPValue
*Def
, const VPLane
&Lane
);
266 bool hasVectorValue(VPValue
*Def
) { return Data
.VPV2Vector
.contains(Def
); }
268 bool hasScalarValue(VPValue
*Def
, VPLane Lane
) {
269 auto I
= Data
.VPV2Scalars
.find(Def
);
270 if (I
== Data
.VPV2Scalars
.end())
272 unsigned CacheIdx
= Lane
.mapToCacheIndex(VF
);
273 return CacheIdx
< I
->second
.size() && I
->second
[CacheIdx
];
276 /// Set the generated vector Value for a given VPValue, if \p
277 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
278 void set(VPValue
*Def
, Value
*V
, bool IsScalar
= false) {
280 set(Def
, V
, VPLane(0));
283 assert((VF
.isScalar() || V
->getType()->isVectorTy()) &&
284 "scalar values must be stored as (0, 0)");
285 Data
.VPV2Vector
[Def
] = V
;
288 /// Reset an existing vector value for \p Def and a given \p Part.
289 void reset(VPValue
*Def
, Value
*V
) {
290 assert(Data
.VPV2Vector
.contains(Def
) && "need to overwrite existing value");
291 Data
.VPV2Vector
[Def
] = V
;
294 /// Set the generated scalar \p V for \p Def and the given \p Lane.
295 void set(VPValue
*Def
, Value
*V
, const VPLane
&Lane
) {
296 auto &Scalars
= Data
.VPV2Scalars
[Def
];
297 unsigned CacheIdx
= Lane
.mapToCacheIndex(VF
);
298 if (Scalars
.size() <= CacheIdx
)
299 Scalars
.resize(CacheIdx
+ 1);
300 assert(!Scalars
[CacheIdx
] && "should overwrite existing value");
301 Scalars
[CacheIdx
] = V
;
304 /// Reset an existing scalar value for \p Def and a given \p Lane.
305 void reset(VPValue
*Def
, Value
*V
, const VPLane
&Lane
) {
306 auto Iter
= Data
.VPV2Scalars
.find(Def
);
307 assert(Iter
!= Data
.VPV2Scalars
.end() &&
308 "need to overwrite existing value");
309 unsigned CacheIdx
= Lane
.mapToCacheIndex(VF
);
310 assert(CacheIdx
< Iter
->second
.size() &&
311 "need to overwrite existing value");
312 Iter
->second
[CacheIdx
] = V
;
315 /// Add additional metadata to \p To that was not present on \p Orig.
317 /// Currently this is used to add the noalias annotations based on the
318 /// inserted memchecks. Use this for instructions that are *cloned* into the
320 void addNewMetadata(Instruction
*To
, const Instruction
*Orig
);
322 /// Add metadata from one instruction to another.
324 /// This includes both the original MDs from \p From and additional ones (\see
325 /// addNewMetadata). Use this for *newly created* instructions in the vector
327 void addMetadata(Value
*To
, Instruction
*From
);
329 /// Set the debug location in the builder using the debug location \p DL.
330 void setDebugLocFrom(DebugLoc DL
);
332 /// Construct the vector value of a scalarized value \p V one lane at a time.
333 void packScalarIntoVectorValue(VPValue
*Def
, const VPLane
&Lane
);
335 /// Hold state information used when constructing the CFG of the output IR,
336 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
338 /// The previous VPBasicBlock visited. Initially set to null.
339 VPBasicBlock
*PrevVPBB
= nullptr;
341 /// The previous IR BasicBlock created or used. Initially set to the new
342 /// header BasicBlock.
343 BasicBlock
*PrevBB
= nullptr;
345 /// The last IR BasicBlock in the output IR. Set to the exit block of the
347 BasicBlock
*ExitBB
= nullptr;
349 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
350 /// of replication, maps the BasicBlock of the last replica created.
351 SmallDenseMap
<VPBasicBlock
*, BasicBlock
*> VPBB2IRBB
;
353 /// Updater for the DominatorTree.
356 CFGState(DominatorTree
*DT
)
357 : DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
) {}
359 /// Returns the BasicBlock* mapped to the pre-header of the loop region
361 BasicBlock
*getPreheaderBBFor(VPRecipeBase
*R
);
364 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
367 /// Hold a reference to the IRBuilder used to generate output IR code.
368 IRBuilderBase
&Builder
;
370 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
371 InnerLoopVectorizer
*ILV
;
373 /// Pointer to the VPlan code is generated for.
376 /// The loop object for the current parent region, or nullptr.
377 Loop
*CurrentVectorLoop
= nullptr;
379 /// LoopVersioning. It's only set up (non-null) if memchecks were
382 /// This is currently only used to add no-alias metadata based on the
383 /// memchecks. The actually versioning is performed manually.
384 LoopVersioning
*LVer
= nullptr;
386 /// Map SCEVs to their expanded values. Populated when executing
387 /// VPExpandSCEVRecipes.
388 DenseMap
<const SCEV
*, Value
*> ExpandedSCEVs
;
390 /// VPlan-based type analysis.
391 VPTypeAnalysis TypeAnalysis
;
394 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
395 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
397 friend class VPBlockUtils
;
399 const unsigned char SubclassID
; ///< Subclass identifier (for isa/dyn_cast).
401 /// An optional name for the block.
404 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
405 /// it is a topmost VPBlockBase.
406 VPRegionBlock
*Parent
= nullptr;
408 /// List of predecessor blocks.
409 SmallVector
<VPBlockBase
*, 1> Predecessors
;
411 /// List of successor blocks.
412 SmallVector
<VPBlockBase
*, 1> Successors
;
414 /// VPlan containing the block. Can only be set on the entry block of the
416 VPlan
*Plan
= nullptr;
418 /// Add \p Successor as the last successor to this block.
419 void appendSuccessor(VPBlockBase
*Successor
) {
420 assert(Successor
&& "Cannot add nullptr successor!");
421 Successors
.push_back(Successor
);
424 /// Add \p Predecessor as the last predecessor to this block.
425 void appendPredecessor(VPBlockBase
*Predecessor
) {
426 assert(Predecessor
&& "Cannot add nullptr predecessor!");
427 Predecessors
.push_back(Predecessor
);
430 /// Remove \p Predecessor from the predecessors of this block.
431 void removePredecessor(VPBlockBase
*Predecessor
) {
432 auto Pos
= find(Predecessors
, Predecessor
);
433 assert(Pos
&& "Predecessor does not exist");
434 Predecessors
.erase(Pos
);
437 /// Remove \p Successor from the successors of this block.
438 void removeSuccessor(VPBlockBase
*Successor
) {
439 auto Pos
= find(Successors
, Successor
);
440 assert(Pos
&& "Successor does not exist");
441 Successors
.erase(Pos
);
444 /// This function replaces one predecessor with another, useful when
445 /// trying to replace an old block in the CFG with a new one.
446 void replacePredecessor(VPBlockBase
*Old
, VPBlockBase
*New
) {
447 auto I
= find(Predecessors
, Old
);
448 assert(I
!= Predecessors
.end());
449 assert(Old
->getParent() == New
->getParent() &&
450 "replaced predecessor must have the same parent");
454 /// This function replaces one successor with another, useful when
455 /// trying to replace an old block in the CFG with a new one.
456 void replaceSuccessor(VPBlockBase
*Old
, VPBlockBase
*New
) {
457 auto I
= find(Successors
, Old
);
458 assert(I
!= Successors
.end());
459 assert(Old
->getParent() == New
->getParent() &&
460 "replaced successor must have the same parent");
465 VPBlockBase(const unsigned char SC
, const std::string
&N
)
466 : SubclassID(SC
), Name(N
) {}
469 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
470 /// that are actually instantiated. Values of this enumeration are kept in the
471 /// SubclassID field of the VPBlockBase objects. They are used for concrete
472 /// type identification.
473 using VPBlockTy
= enum { VPRegionBlockSC
, VPBasicBlockSC
, VPIRBasicBlockSC
};
475 using VPBlocksTy
= SmallVectorImpl
<VPBlockBase
*>;
477 virtual ~VPBlockBase() = default;
479 const std::string
&getName() const { return Name
; }
481 void setName(const Twine
&newName
) { Name
= newName
.str(); }
483 /// \return an ID for the concrete type of this object.
484 /// This is used to implement the classof checks. This should not be used
485 /// for any other purpose, as the values may change as LLVM evolves.
486 unsigned getVPBlockID() const { return SubclassID
; }
488 VPRegionBlock
*getParent() { return Parent
; }
489 const VPRegionBlock
*getParent() const { return Parent
; }
491 /// \return A pointer to the plan containing the current block.
493 const VPlan
*getPlan() const;
495 /// Sets the pointer of the plan containing the block. The block must be the
496 /// entry block into the VPlan.
497 void setPlan(VPlan
*ParentPlan
);
499 void setParent(VPRegionBlock
*P
) { Parent
= P
; }
501 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
502 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
503 /// VPBlockBase is a VPBasicBlock, it is returned.
504 const VPBasicBlock
*getEntryBasicBlock() const;
505 VPBasicBlock
*getEntryBasicBlock();
507 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
508 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
509 /// VPBlockBase is a VPBasicBlock, it is returned.
510 const VPBasicBlock
*getExitingBasicBlock() const;
511 VPBasicBlock
*getExitingBasicBlock();
513 const VPBlocksTy
&getSuccessors() const { return Successors
; }
514 VPBlocksTy
&getSuccessors() { return Successors
; }
516 iterator_range
<VPBlockBase
**> successors() { return Successors
; }
517 iterator_range
<VPBlockBase
**> predecessors() { return Predecessors
; }
519 const VPBlocksTy
&getPredecessors() const { return Predecessors
; }
520 VPBlocksTy
&getPredecessors() { return Predecessors
; }
522 /// \return the successor of this VPBlockBase if it has a single successor.
523 /// Otherwise return a null pointer.
524 VPBlockBase
*getSingleSuccessor() const {
525 return (Successors
.size() == 1 ? *Successors
.begin() : nullptr);
528 /// \return the predecessor of this VPBlockBase if it has a single
529 /// predecessor. Otherwise return a null pointer.
530 VPBlockBase
*getSinglePredecessor() const {
531 return (Predecessors
.size() == 1 ? *Predecessors
.begin() : nullptr);
534 size_t getNumSuccessors() const { return Successors
.size(); }
535 size_t getNumPredecessors() const { return Predecessors
.size(); }
537 /// An Enclosing Block of a block B is any block containing B, including B
538 /// itself. \return the closest enclosing block starting from "this", which
539 /// has successors. \return the root enclosing block if all enclosing blocks
540 /// have no successors.
541 VPBlockBase
*getEnclosingBlockWithSuccessors();
543 /// \return the closest enclosing block starting from "this", which has
544 /// predecessors. \return the root enclosing block if all enclosing blocks
545 /// have no predecessors.
546 VPBlockBase
*getEnclosingBlockWithPredecessors();
548 /// \return the successors either attached directly to this VPBlockBase or, if
549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
550 /// successors of its own, search recursively for the first enclosing
551 /// VPRegionBlock that has successors and return them. If no such
552 /// VPRegionBlock exists, return the (empty) successors of the topmost
553 /// VPBlockBase reached.
554 const VPBlocksTy
&getHierarchicalSuccessors() {
555 return getEnclosingBlockWithSuccessors()->getSuccessors();
558 /// \return the hierarchical successor of this VPBlockBase if it has a single
559 /// hierarchical successor. Otherwise return a null pointer.
560 VPBlockBase
*getSingleHierarchicalSuccessor() {
561 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
564 /// \return the predecessors either attached directly to this VPBlockBase or,
565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566 /// predecessors of its own, search recursively for the first enclosing
567 /// VPRegionBlock that has predecessors and return them. If no such
568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
569 /// VPBlockBase reached.
570 const VPBlocksTy
&getHierarchicalPredecessors() {
571 return getEnclosingBlockWithPredecessors()->getPredecessors();
574 /// \return the hierarchical predecessor of this VPBlockBase if it has a
575 /// single hierarchical predecessor. Otherwise return a null pointer.
576 VPBlockBase
*getSingleHierarchicalPredecessor() {
577 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
580 /// Set a given VPBlockBase \p Successor as the single successor of this
581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582 /// This VPBlockBase must have no successors.
583 void setOneSuccessor(VPBlockBase
*Successor
) {
584 assert(Successors
.empty() && "Setting one successor when others exist.");
585 assert(Successor
->getParent() == getParent() &&
586 "connected blocks must have the same parent");
587 appendSuccessor(Successor
);
590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591 /// successors of this VPBlockBase. This VPBlockBase is not added as
592 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
594 void setTwoSuccessors(VPBlockBase
*IfTrue
, VPBlockBase
*IfFalse
) {
595 assert(Successors
.empty() && "Setting two successors when others exist.");
596 appendSuccessor(IfTrue
);
597 appendSuccessor(IfFalse
);
600 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602 /// as successor of any VPBasicBlock in \p NewPreds.
603 void setPredecessors(ArrayRef
<VPBlockBase
*> NewPreds
) {
604 assert(Predecessors
.empty() && "Block predecessors already set.");
605 for (auto *Pred
: NewPreds
)
606 appendPredecessor(Pred
);
609 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
610 /// This VPBlockBase must have no successors. This VPBlockBase is not added
611 /// as predecessor of any VPBasicBlock in \p NewSuccs.
612 void setSuccessors(ArrayRef
<VPBlockBase
*> NewSuccs
) {
613 assert(Successors
.empty() && "Block successors already set.");
614 for (auto *Succ
: NewSuccs
)
615 appendSuccessor(Succ
);
618 /// Remove all the predecessor of this block.
619 void clearPredecessors() { Predecessors
.clear(); }
621 /// Remove all the successors of this block.
622 void clearSuccessors() { Successors
.clear(); }
624 /// Swap successors of the block. The block must have exactly 2 successors.
625 // TODO: This should be part of introducing conditional branch recipes rather
626 // than being independent.
627 void swapSuccessors() {
628 assert(Successors
.size() == 2 && "must have 2 successors to swap");
629 std::swap(Successors
[0], Successors
[1]);
632 /// The method which generates the output IR that correspond to this
633 /// VPBlockBase, thereby "executing" the VPlan.
634 virtual void execute(VPTransformState
*State
) = 0;
636 /// Return the cost of the block.
637 virtual InstructionCost
cost(ElementCount VF
, VPCostContext
&Ctx
) = 0;
639 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
640 static void deleteCFG(VPBlockBase
*Entry
);
642 /// Return true if it is legal to hoist instructions into this block.
643 bool isLegalToHoistInto() {
644 // There are currently no constraints that prevent an instruction to be
645 // hoisted into a VPBlockBase.
649 /// Replace all operands of VPUsers in the block with \p NewValue and also
650 /// replaces all uses of VPValues defined in the block with NewValue.
651 virtual void dropAllReferences(VPValue
*NewValue
) = 0;
653 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
654 void printAsOperand(raw_ostream
&OS
, bool PrintType
= false) const {
658 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
659 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
660 /// consequtive numbers.
662 /// Note that the numbering is applied to the whole VPlan, so printing
663 /// individual blocks is consistent with the whole VPlan printing.
664 virtual void print(raw_ostream
&O
, const Twine
&Indent
,
665 VPSlotTracker
&SlotTracker
) const = 0;
667 /// Print plain-text dump of this VPlan to \p O.
668 void print(raw_ostream
&O
) const {
669 VPSlotTracker
SlotTracker(getPlan());
670 print(O
, "", SlotTracker
);
673 /// Print the successors of this block to \p O, prefixing all lines with \p
675 void printSuccessors(raw_ostream
&O
, const Twine
&Indent
) const;
677 /// Dump this VPBlockBase to dbgs().
678 LLVM_DUMP_METHOD
void dump() const { print(dbgs()); }
681 /// Clone the current block and it's recipes without updating the operands of
682 /// the cloned recipes, including all blocks in the single-entry single-exit
683 /// region for VPRegionBlocks.
684 virtual VPBlockBase
*clone() = 0;
687 /// Struct to hold various analysis needed for cost computations.
688 struct VPCostContext
{
689 const TargetTransformInfo
&TTI
;
690 const TargetLibraryInfo
&TLI
;
691 VPTypeAnalysis Types
;
692 LLVMContext
&LLVMCtx
;
693 LoopVectorizationCostModel
&CM
;
694 SmallPtrSet
<Instruction
*, 8> SkipCostComputation
;
696 VPCostContext(const TargetTransformInfo
&TTI
, const TargetLibraryInfo
&TLI
,
697 Type
*CanIVTy
, LoopVectorizationCostModel
&CM
)
698 : TTI(TTI
), TLI(TLI
), Types(CanIVTy
), LLVMCtx(CanIVTy
->getContext()),
701 /// Return the cost for \p UI with \p VF using the legacy cost model as
702 /// fallback until computing the cost of all recipes migrates to VPlan.
703 InstructionCost
getLegacyCost(Instruction
*UI
, ElementCount VF
) const;
705 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
706 /// has already been pre-computed.
707 bool skipCostComputation(Instruction
*UI
, bool IsVector
) const;
709 /// Returns the OperandInfo for \p V, if it is a live-in.
710 TargetTransformInfo::OperandValueInfo
getOperandInfo(VPValue
*V
) const;
713 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
714 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef
715 /// and is responsible for deleting its defined values. Single-value
716 /// recipes must inherit from VPSingleDef instead of inheriting from both
717 /// VPRecipeBase and VPValue separately.
718 class VPRecipeBase
: public ilist_node_with_parent
<VPRecipeBase
, VPBasicBlock
>,
722 friend class VPBlockUtils
;
724 /// Each VPRecipe belongs to a single VPBasicBlock.
725 VPBasicBlock
*Parent
= nullptr;
727 /// The debug location for the recipe.
731 VPRecipeBase(const unsigned char SC
, ArrayRef
<VPValue
*> Operands
,
733 : VPDef(SC
), VPUser(Operands
), DL(DL
) {}
735 template <typename IterT
>
736 VPRecipeBase(const unsigned char SC
, iterator_range
<IterT
> Operands
,
738 : VPDef(SC
), VPUser(Operands
), DL(DL
) {}
739 virtual ~VPRecipeBase() = default;
741 /// Clone the current recipe.
742 virtual VPRecipeBase
*clone() = 0;
744 /// \return the VPBasicBlock which this VPRecipe belongs to.
745 VPBasicBlock
*getParent() { return Parent
; }
746 const VPBasicBlock
*getParent() const { return Parent
; }
748 /// The method which generates the output IR instructions that correspond to
749 /// this VPRecipe, thereby "executing" the VPlan.
750 virtual void execute(VPTransformState
&State
) = 0;
752 /// Return the cost of this recipe, taking into account if the cost
753 /// computation should be skipped and the ForceTargetInstructionCost flag.
754 /// Also takes care of printing the cost for debugging.
755 InstructionCost
cost(ElementCount VF
, VPCostContext
&Ctx
);
757 /// Insert an unlinked recipe into a basic block immediately before
758 /// the specified recipe.
759 void insertBefore(VPRecipeBase
*InsertPos
);
760 /// Insert an unlinked recipe into \p BB immediately before the insertion
762 void insertBefore(VPBasicBlock
&BB
, iplist
<VPRecipeBase
>::iterator IP
);
764 /// Insert an unlinked Recipe into a basic block immediately after
765 /// the specified Recipe.
766 void insertAfter(VPRecipeBase
*InsertPos
);
768 /// Unlink this recipe from its current VPBasicBlock and insert it into
769 /// the VPBasicBlock that MovePos lives in, right after MovePos.
770 void moveAfter(VPRecipeBase
*MovePos
);
772 /// Unlink this recipe and insert into BB before I.
774 /// \pre I is a valid iterator into BB.
775 void moveBefore(VPBasicBlock
&BB
, iplist
<VPRecipeBase
>::iterator I
);
777 /// This method unlinks 'this' from the containing basic block, but does not
779 void removeFromParent();
781 /// This method unlinks 'this' from the containing basic block and deletes it.
783 /// \returns an iterator pointing to the element after the erased one
784 iplist
<VPRecipeBase
>::iterator
eraseFromParent();
786 /// Method to support type inquiry through isa, cast, and dyn_cast.
787 static inline bool classof(const VPDef
*D
) {
788 // All VPDefs are also VPRecipeBases.
792 static inline bool classof(const VPUser
*U
) { return true; }
794 /// Returns true if the recipe may have side-effects.
795 bool mayHaveSideEffects() const;
797 /// Returns true for PHI-like recipes.
799 return getVPDefID() >= VPFirstPHISC
&& getVPDefID() <= VPLastPHISC
;
802 /// Returns true if the recipe may read from memory.
803 bool mayReadFromMemory() const;
805 /// Returns true if the recipe may write to memory.
806 bool mayWriteToMemory() const;
808 /// Returns true if the recipe may read from or write to memory.
809 bool mayReadOrWriteMemory() const {
810 return mayReadFromMemory() || mayWriteToMemory();
813 /// Returns the debug location of the recipe.
814 DebugLoc
getDebugLoc() const { return DL
; }
817 /// Compute the cost of this recipe either using a recipe's specialized
818 /// implementation or using the legacy cost model and the underlying
820 virtual InstructionCost
computeCost(ElementCount VF
,
821 VPCostContext
&Ctx
) const;
824 // Helper macro to define common classof implementations for recipes.
825 #define VP_CLASSOF_IMPL(VPDefID) \
826 static inline bool classof(const VPDef *D) { \
827 return D->getVPDefID() == VPDefID; \
829 static inline bool classof(const VPValue *V) { \
830 auto *R = V->getDefiningRecipe(); \
831 return R && R->getVPDefID() == VPDefID; \
833 static inline bool classof(const VPUser *U) { \
834 auto *R = dyn_cast<VPRecipeBase>(U); \
835 return R && R->getVPDefID() == VPDefID; \
837 static inline bool classof(const VPRecipeBase *R) { \
838 return R->getVPDefID() == VPDefID; \
840 static inline bool classof(const VPSingleDefRecipe *R) { \
841 return R->getVPDefID() == VPDefID; \
844 /// VPSingleDef is a base class for recipes for modeling a sequence of one or
845 /// more output IR that define a single result VPValue.
846 /// Note that VPRecipeBase must be inherited from before VPValue.
847 class VPSingleDefRecipe
: public VPRecipeBase
, public VPValue
{
849 template <typename IterT
>
850 VPSingleDefRecipe(const unsigned char SC
, IterT Operands
, DebugLoc DL
= {})
851 : VPRecipeBase(SC
, Operands
, DL
), VPValue(this) {}
853 VPSingleDefRecipe(const unsigned char SC
, ArrayRef
<VPValue
*> Operands
,
855 : VPRecipeBase(SC
, Operands
, DL
), VPValue(this) {}
857 template <typename IterT
>
858 VPSingleDefRecipe(const unsigned char SC
, IterT Operands
, Value
*UV
,
860 : VPRecipeBase(SC
, Operands
, DL
), VPValue(this, UV
) {}
862 static inline bool classof(const VPRecipeBase
*R
) {
863 switch (R
->getVPDefID()) {
864 case VPRecipeBase::VPDerivedIVSC
:
865 case VPRecipeBase::VPEVLBasedIVPHISC
:
866 case VPRecipeBase::VPExpandSCEVSC
:
867 case VPRecipeBase::VPInstructionSC
:
868 case VPRecipeBase::VPReductionEVLSC
:
869 case VPRecipeBase::VPReductionSC
:
870 case VPRecipeBase::VPReplicateSC
:
871 case VPRecipeBase::VPScalarIVStepsSC
:
872 case VPRecipeBase::VPVectorPointerSC
:
873 case VPRecipeBase::VPReverseVectorPointerSC
:
874 case VPRecipeBase::VPWidenCallSC
:
875 case VPRecipeBase::VPWidenCanonicalIVSC
:
876 case VPRecipeBase::VPWidenCastSC
:
877 case VPRecipeBase::VPWidenGEPSC
:
878 case VPRecipeBase::VPWidenIntrinsicSC
:
879 case VPRecipeBase::VPWidenSC
:
880 case VPRecipeBase::VPWidenEVLSC
:
881 case VPRecipeBase::VPWidenSelectSC
:
882 case VPRecipeBase::VPBlendSC
:
883 case VPRecipeBase::VPPredInstPHISC
:
884 case VPRecipeBase::VPCanonicalIVPHISC
:
885 case VPRecipeBase::VPActiveLaneMaskPHISC
:
886 case VPRecipeBase::VPFirstOrderRecurrencePHISC
:
887 case VPRecipeBase::VPWidenPHISC
:
888 case VPRecipeBase::VPWidenIntOrFpInductionSC
:
889 case VPRecipeBase::VPWidenPointerInductionSC
:
890 case VPRecipeBase::VPReductionPHISC
:
891 case VPRecipeBase::VPScalarCastSC
:
893 case VPRecipeBase::VPBranchOnMaskSC
:
894 case VPRecipeBase::VPInterleaveSC
:
895 case VPRecipeBase::VPIRInstructionSC
:
896 case VPRecipeBase::VPWidenLoadEVLSC
:
897 case VPRecipeBase::VPWidenLoadSC
:
898 case VPRecipeBase::VPWidenStoreEVLSC
:
899 case VPRecipeBase::VPWidenStoreSC
:
900 case VPRecipeBase::VPHistogramSC
:
901 // TODO: Widened stores don't define a value, but widened loads do. Split
902 // the recipes to be able to make widened loads VPSingleDefRecipes.
905 llvm_unreachable("Unhandled VPDefID");
908 static inline bool classof(const VPUser
*U
) {
909 auto *R
= dyn_cast
<VPRecipeBase
>(U
);
910 return R
&& classof(R
);
913 virtual VPSingleDefRecipe
*clone() override
= 0;
915 /// Returns the underlying instruction.
916 Instruction
*getUnderlyingInstr() {
917 return cast
<Instruction
>(getUnderlyingValue());
919 const Instruction
*getUnderlyingInstr() const {
920 return cast
<Instruction
>(getUnderlyingValue());
923 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
925 LLVM_DUMP_METHOD
void dump() const;
929 /// Class to record LLVM IR flag for a recipe along with it.
930 class VPRecipeWithIRFlags
: public VPSingleDefRecipe
{
931 enum class OperationType
: unsigned char {
947 WrapFlagsTy(bool HasNUW
, bool HasNSW
) : HasNUW(HasNUW
), HasNSW(HasNSW
) {}
950 struct DisjointFlagsTy
{
952 DisjointFlagsTy(bool IsDisjoint
) : IsDisjoint(IsDisjoint
) {}
957 GEPFlagsTy(bool IsInBounds
) : IsInBounds(IsInBounds
) {}
961 struct ExactFlagsTy
{
964 struct NonNegFlagsTy
{
967 struct FastMathFlagsTy
{
968 char AllowReassoc
: 1;
971 char NoSignedZeros
: 1;
972 char AllowReciprocal
: 1;
973 char AllowContract
: 1;
976 FastMathFlagsTy(const FastMathFlags
&FMF
);
979 OperationType OpType
;
982 CmpInst::Predicate CmpPredicate
;
983 WrapFlagsTy WrapFlags
;
984 DisjointFlagsTy DisjointFlags
;
985 ExactFlagsTy ExactFlags
;
987 NonNegFlagsTy NonNegFlags
;
988 FastMathFlagsTy FMFs
;
993 void transferFlags(VPRecipeWithIRFlags
&Other
) {
994 OpType
= Other
.OpType
;
995 AllFlags
= Other
.AllFlags
;
999 template <typename IterT
>
1000 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
, DebugLoc DL
= {})
1001 : VPSingleDefRecipe(SC
, Operands
, DL
) {
1002 OpType
= OperationType::Other
;
1006 template <typename IterT
>
1007 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
, Instruction
&I
)
1008 : VPSingleDefRecipe(SC
, Operands
, &I
, I
.getDebugLoc()) {
1009 if (auto *Op
= dyn_cast
<CmpInst
>(&I
)) {
1010 OpType
= OperationType::Cmp
;
1011 CmpPredicate
= Op
->getPredicate();
1012 } else if (auto *Op
= dyn_cast
<PossiblyDisjointInst
>(&I
)) {
1013 OpType
= OperationType::DisjointOp
;
1014 DisjointFlags
.IsDisjoint
= Op
->isDisjoint();
1015 } else if (auto *Op
= dyn_cast
<OverflowingBinaryOperator
>(&I
)) {
1016 OpType
= OperationType::OverflowingBinOp
;
1017 WrapFlags
= {Op
->hasNoUnsignedWrap(), Op
->hasNoSignedWrap()};
1018 } else if (auto *Op
= dyn_cast
<PossiblyExactOperator
>(&I
)) {
1019 OpType
= OperationType::PossiblyExactOp
;
1020 ExactFlags
.IsExact
= Op
->isExact();
1021 } else if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(&I
)) {
1022 OpType
= OperationType::GEPOp
;
1023 GEPFlags
.IsInBounds
= GEP
->isInBounds();
1024 } else if (auto *PNNI
= dyn_cast
<PossiblyNonNegInst
>(&I
)) {
1025 OpType
= OperationType::NonNegOp
;
1026 NonNegFlags
.NonNeg
= PNNI
->hasNonNeg();
1027 } else if (auto *Op
= dyn_cast
<FPMathOperator
>(&I
)) {
1028 OpType
= OperationType::FPMathOp
;
1029 FMFs
= Op
->getFastMathFlags();
1031 OpType
= OperationType::Other
;
1036 template <typename IterT
>
1037 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1038 CmpInst::Predicate Pred
, DebugLoc DL
= {})
1039 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::Cmp
),
1040 CmpPredicate(Pred
) {}
1042 template <typename IterT
>
1043 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1044 WrapFlagsTy WrapFlags
, DebugLoc DL
= {})
1045 : VPSingleDefRecipe(SC
, Operands
, DL
),
1046 OpType(OperationType::OverflowingBinOp
), WrapFlags(WrapFlags
) {}
1048 template <typename IterT
>
1049 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1050 FastMathFlags FMFs
, DebugLoc DL
= {})
1051 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::FPMathOp
),
1054 template <typename IterT
>
1055 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1056 DisjointFlagsTy DisjointFlags
, DebugLoc DL
= {})
1057 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::DisjointOp
),
1058 DisjointFlags(DisjointFlags
) {}
1061 template <typename IterT
>
1062 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1063 GEPFlagsTy GEPFlags
, DebugLoc DL
= {})
1064 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::GEPOp
),
1065 GEPFlags(GEPFlags
) {}
1068 static inline bool classof(const VPRecipeBase
*R
) {
1069 return R
->getVPDefID() == VPRecipeBase::VPInstructionSC
||
1070 R
->getVPDefID() == VPRecipeBase::VPWidenSC
||
1071 R
->getVPDefID() == VPRecipeBase::VPWidenEVLSC
||
1072 R
->getVPDefID() == VPRecipeBase::VPWidenGEPSC
||
1073 R
->getVPDefID() == VPRecipeBase::VPWidenCastSC
||
1074 R
->getVPDefID() == VPRecipeBase::VPReplicateSC
||
1075 R
->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC
||
1076 R
->getVPDefID() == VPRecipeBase::VPVectorPointerSC
;
1079 static inline bool classof(const VPUser
*U
) {
1080 auto *R
= dyn_cast
<VPRecipeBase
>(U
);
1081 return R
&& classof(R
);
1084 /// Drop all poison-generating flags.
1085 void dropPoisonGeneratingFlags() {
1086 // NOTE: This needs to be kept in-sync with
1087 // Instruction::dropPoisonGeneratingFlags.
1089 case OperationType::OverflowingBinOp
:
1090 WrapFlags
.HasNUW
= false;
1091 WrapFlags
.HasNSW
= false;
1093 case OperationType::DisjointOp
:
1094 DisjointFlags
.IsDisjoint
= false;
1096 case OperationType::PossiblyExactOp
:
1097 ExactFlags
.IsExact
= false;
1099 case OperationType::GEPOp
:
1100 GEPFlags
.IsInBounds
= false;
1102 case OperationType::FPMathOp
:
1103 FMFs
.NoNaNs
= false;
1104 FMFs
.NoInfs
= false;
1106 case OperationType::NonNegOp
:
1107 NonNegFlags
.NonNeg
= false;
1109 case OperationType::Cmp
:
1110 case OperationType::Other
:
1115 /// Set the IR flags for \p I.
1116 void setFlags(Instruction
*I
) const {
1118 case OperationType::OverflowingBinOp
:
1119 I
->setHasNoUnsignedWrap(WrapFlags
.HasNUW
);
1120 I
->setHasNoSignedWrap(WrapFlags
.HasNSW
);
1122 case OperationType::DisjointOp
:
1123 cast
<PossiblyDisjointInst
>(I
)->setIsDisjoint(DisjointFlags
.IsDisjoint
);
1125 case OperationType::PossiblyExactOp
:
1126 I
->setIsExact(ExactFlags
.IsExact
);
1128 case OperationType::GEPOp
:
1129 // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1130 cast
<GetElementPtrInst
>(I
)->setNoWrapFlags(
1131 GEPFlags
.IsInBounds
? GEPNoWrapFlags::inBounds()
1132 : GEPNoWrapFlags::none());
1134 case OperationType::FPMathOp
:
1135 I
->setHasAllowReassoc(FMFs
.AllowReassoc
);
1136 I
->setHasNoNaNs(FMFs
.NoNaNs
);
1137 I
->setHasNoInfs(FMFs
.NoInfs
);
1138 I
->setHasNoSignedZeros(FMFs
.NoSignedZeros
);
1139 I
->setHasAllowReciprocal(FMFs
.AllowReciprocal
);
1140 I
->setHasAllowContract(FMFs
.AllowContract
);
1141 I
->setHasApproxFunc(FMFs
.ApproxFunc
);
1143 case OperationType::NonNegOp
:
1144 I
->setNonNeg(NonNegFlags
.NonNeg
);
1146 case OperationType::Cmp
:
1147 case OperationType::Other
:
1152 CmpInst::Predicate
getPredicate() const {
1153 assert(OpType
== OperationType::Cmp
&&
1154 "recipe doesn't have a compare predicate");
1155 return CmpPredicate
;
1158 bool isInBounds() const {
1159 assert(OpType
== OperationType::GEPOp
&&
1160 "recipe doesn't have inbounds flag");
1161 return GEPFlags
.IsInBounds
;
1164 /// Returns true if the recipe has fast-math flags.
1165 bool hasFastMathFlags() const { return OpType
== OperationType::FPMathOp
; }
1167 FastMathFlags
getFastMathFlags() const;
1169 bool hasNoUnsignedWrap() const {
1170 assert(OpType
== OperationType::OverflowingBinOp
&&
1171 "recipe doesn't have a NUW flag");
1172 return WrapFlags
.HasNUW
;
1175 bool hasNoSignedWrap() const {
1176 assert(OpType
== OperationType::OverflowingBinOp
&&
1177 "recipe doesn't have a NSW flag");
1178 return WrapFlags
.HasNSW
;
1181 bool isDisjoint() const {
1182 assert(OpType
== OperationType::DisjointOp
&&
1183 "recipe cannot have a disjoing flag");
1184 return DisjointFlags
.IsDisjoint
;
1187 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1188 void printFlags(raw_ostream
&O
) const;
1192 /// Helper to access the operand that contains the unroll part for this recipe
1193 /// after unrolling.
1194 template <unsigned PartOpIdx
> class VPUnrollPartAccessor
{
1196 /// Return the VPValue operand containing the unroll part or null if there is
1197 /// no such operand.
1198 VPValue
*getUnrollPartOperand(VPUser
&U
) const;
1200 /// Return the unroll part.
1201 unsigned getUnrollPart(VPUser
&U
) const;
1204 /// This is a concrete Recipe that models a single VPlan-level instruction.
1205 /// While as any Recipe it may generate a sequence of IR instructions when
1206 /// executed, these instructions would always form a single-def expression as
1207 /// the VPInstruction is also a single def-use vertex.
1208 class VPInstruction
: public VPRecipeWithIRFlags
,
1209 public VPUnrollPartAccessor
<1> {
1210 friend class VPlanSlp
;
1213 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1215 FirstOrderRecurrenceSplice
=
1216 Instruction::OtherOpsEnd
+ 1, // Combines the incoming and previous
1217 // values of a first-order recurrence.
1222 ExplicitVectorLength
,
1223 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1224 /// The first operand is the incoming value from the predecessor in VPlan,
1225 /// the second operand is the incoming value for all other predecessors
1226 /// (which are currently not modeled in VPlan).
1228 CalculateTripCountMinusVF
,
1229 // Increment the canonical IV separately for each unrolled part.
1230 CanonicalIVIncrementForPart
,
1233 ComputeReductionResult
,
1234 // Takes the VPValue to extract from as first operand and the lane or part
1235 // to extract as second operand, counting from the end starting with 1 for
1236 // last. The second operand must be a positive constant and <= VF.
1238 LogicalAnd
, // Non-poison propagating logical And.
1239 // Add an offset in bytes (second operand) to a base pointer (first
1240 // operand). Only generates scalar values (either for the first lane only or
1241 // for all lanes, depending on its uses).
1243 // Returns a scalar boolean value, which is true if any lane of its single
1249 typedef unsigned char OpcodeTy
;
1252 /// An optional name that can be used for the generated IR instruction.
1253 const std::string Name
;
1255 /// Returns true if this VPInstruction generates scalar values for all lanes.
1256 /// Most VPInstructions generate a single value per part, either vector or
1257 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1258 /// values per all lanes, stemming from an original ingredient. This method
1259 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1260 /// underlying ingredient.
1261 bool doesGeneratePerAllLanes() const;
1263 /// Returns true if we can generate a scalar for the first lane only if
1265 bool canGenerateScalarForFirstLane() const;
1267 /// Utility methods serving execute(): generates a single vector instance of
1268 /// the modeled instruction. \returns the generated value. . In some cases an
1269 /// existing value is returned rather than a generated one.
1270 Value
*generate(VPTransformState
&State
);
1272 /// Utility methods serving execute(): generates a scalar single instance of
1273 /// the modeled instruction for a given lane. \returns the scalar generated
1274 /// value for lane \p Lane.
1275 Value
*generatePerLane(VPTransformState
&State
, const VPLane
&Lane
);
1277 #if !defined(NDEBUG)
1278 /// Return true if the VPInstruction is a floating point math operation, i.e.
1279 /// has fast-math flags.
1280 bool isFPMathOp() const;
1284 VPInstruction(unsigned Opcode
, ArrayRef
<VPValue
*> Operands
, DebugLoc DL
,
1285 const Twine
&Name
= "")
1286 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, Operands
, DL
),
1287 Opcode(Opcode
), Name(Name
.str()) {}
1289 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1290 DebugLoc DL
= {}, const Twine
&Name
= "")
1291 : VPInstruction(Opcode
, ArrayRef
<VPValue
*>(Operands
), DL
, Name
) {}
1293 VPInstruction(unsigned Opcode
, CmpInst::Predicate Pred
, VPValue
*A
,
1294 VPValue
*B
, DebugLoc DL
= {}, const Twine
&Name
= "");
1296 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1297 WrapFlagsTy WrapFlags
, DebugLoc DL
= {}, const Twine
&Name
= "")
1298 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, Operands
, WrapFlags
, DL
),
1299 Opcode(Opcode
), Name(Name
.str()) {}
1301 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1302 DisjointFlagsTy DisjointFlag
, DebugLoc DL
= {},
1303 const Twine
&Name
= "")
1304 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, Operands
, DisjointFlag
, DL
),
1305 Opcode(Opcode
), Name(Name
.str()) {
1306 assert(Opcode
== Instruction::Or
&& "only OR opcodes can be disjoint");
1309 VPInstruction(VPValue
*Ptr
, VPValue
*Offset
, GEPFlagsTy Flags
,
1310 DebugLoc DL
= {}, const Twine
&Name
= "")
1311 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
,
1312 ArrayRef
<VPValue
*>({Ptr
, Offset
}), Flags
, DL
),
1313 Opcode(VPInstruction::PtrAdd
), Name(Name
.str()) {}
1315 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1316 FastMathFlags FMFs
, DebugLoc DL
= {}, const Twine
&Name
= "");
1318 VP_CLASSOF_IMPL(VPDef::VPInstructionSC
)
1320 VPInstruction
*clone() override
{
1321 SmallVector
<VPValue
*, 2> Operands(operands());
1322 auto *New
= new VPInstruction(Opcode
, Operands
, getDebugLoc(), Name
);
1323 New
->transferFlags(*this);
1327 unsigned getOpcode() const { return Opcode
; }
1329 /// Generate the instruction.
1330 /// TODO: We currently execute only per-part unless a specific instance is
1332 void execute(VPTransformState
&State
) override
;
1334 /// Return the cost of this VPInstruction.
1335 InstructionCost
computeCost(ElementCount VF
,
1336 VPCostContext
&Ctx
) const override
{
1337 // TODO: Compute accurate cost after retiring the legacy cost model.
1341 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1342 /// Print the VPInstruction to \p O.
1343 void print(raw_ostream
&O
, const Twine
&Indent
,
1344 VPSlotTracker
&SlotTracker
) const override
;
1346 /// Print the VPInstruction to dbgs() (for debugging).
1347 LLVM_DUMP_METHOD
void dump() const;
1350 /// Return true if this instruction may modify memory.
1351 bool mayWriteToMemory() const {
1352 // TODO: we can use attributes of the called function to rule out memory
1354 return Opcode
== Instruction::Store
|| Opcode
== Instruction::Call
||
1355 Opcode
== Instruction::Invoke
|| Opcode
== SLPStore
;
1358 bool hasResult() const {
1359 // CallInst may or may not have a result, depending on the called function.
1360 // Conservatively return calls have results for now.
1361 switch (getOpcode()) {
1362 case Instruction::Ret
:
1363 case Instruction::Br
:
1364 case Instruction::Store
:
1365 case Instruction::Switch
:
1366 case Instruction::IndirectBr
:
1367 case Instruction::Resume
:
1368 case Instruction::CatchRet
:
1369 case Instruction::Unreachable
:
1370 case Instruction::Fence
:
1371 case Instruction::AtomicRMW
:
1372 case VPInstruction::BranchOnCond
:
1373 case VPInstruction::BranchOnCount
:
1380 /// Returns true if the recipe only uses the first lane of operand \p Op.
1381 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
;
1383 /// Returns true if the recipe only uses the first part of operand \p Op.
1384 bool onlyFirstPartUsed(const VPValue
*Op
) const override
;
1386 /// Returns true if this VPInstruction produces a scalar value from a vector,
1387 /// e.g. by performing a reduction or extracting a lane.
1388 bool isVectorToScalar() const;
1390 /// Returns true if this VPInstruction's operands are single scalars and the
1391 /// result is also a single scalar.
1392 bool isSingleScalar() const;
1394 /// Returns the symbolic name assigned to the VPInstruction.
1395 StringRef
getName() const { return Name
; }
1398 /// A recipe to wrap on original IR instruction not to be modified during
1399 /// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1400 /// and it is used to add a new incoming value for the single predecessor VPBB.
1401 /// Expect PHIs, VPIRInstructions cannot have any operands.
1402 class VPIRInstruction
: public VPRecipeBase
{
1406 VPIRInstruction(Instruction
&I
)
1407 : VPRecipeBase(VPDef::VPIRInstructionSC
, ArrayRef
<VPValue
*>()), I(I
) {}
1409 ~VPIRInstruction() override
= default;
1411 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC
)
1413 VPIRInstruction
*clone() override
{
1414 auto *R
= new VPIRInstruction(I
);
1415 for (auto *Op
: operands())
1420 void execute(VPTransformState
&State
) override
;
1422 /// Return the cost of this VPIRInstruction.
1423 InstructionCost
computeCost(ElementCount VF
,
1424 VPCostContext
&Ctx
) const override
;
1426 Instruction
&getInstruction() const { return I
; }
1428 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1429 /// Print the recipe.
1430 void print(raw_ostream
&O
, const Twine
&Indent
,
1431 VPSlotTracker
&SlotTracker
) const override
;
1434 bool usesScalars(const VPValue
*Op
) const override
{
1435 assert(is_contained(operands(), Op
) &&
1436 "Op must be an operand of the recipe");
1440 bool onlyFirstPartUsed(const VPValue
*Op
) const override
{
1441 assert(is_contained(operands(), Op
) &&
1442 "Op must be an operand of the recipe");
1447 /// VPWidenRecipe is a recipe for producing a widened instruction using the
1448 /// opcode and operands of the recipe. This recipe covers most of the
1449 /// traditional vectorization cases where each recipe transforms into a
1450 /// vectorized version of itself.
1451 class VPWidenRecipe
: public VPRecipeWithIRFlags
{
1455 template <typename IterT
>
1456 VPWidenRecipe(unsigned VPDefOpcode
, Instruction
&I
,
1457 iterator_range
<IterT
> Operands
)
1458 : VPRecipeWithIRFlags(VPDefOpcode
, Operands
, I
), Opcode(I
.getOpcode()) {}
1461 template <typename IterT
>
1462 VPWidenRecipe(Instruction
&I
, iterator_range
<IterT
> Operands
)
1463 : VPWidenRecipe(VPDef::VPWidenSC
, I
, Operands
) {}
1465 ~VPWidenRecipe() override
= default;
1467 VPWidenRecipe
*clone() override
{
1468 auto *R
= new VPWidenRecipe(*getUnderlyingInstr(), operands());
1469 R
->transferFlags(*this);
1473 static inline bool classof(const VPRecipeBase
*R
) {
1474 return R
->getVPDefID() == VPRecipeBase::VPWidenSC
||
1475 R
->getVPDefID() == VPRecipeBase::VPWidenEVLSC
;
1478 static inline bool classof(const VPUser
*U
) {
1479 auto *R
= dyn_cast
<VPRecipeBase
>(U
);
1480 return R
&& classof(R
);
1483 /// Produce a widened instruction using the opcode and operands of the recipe,
1484 /// processing State.VF elements.
1485 void execute(VPTransformState
&State
) override
;
1487 /// Return the cost of this VPWidenRecipe.
1488 InstructionCost
computeCost(ElementCount VF
,
1489 VPCostContext
&Ctx
) const override
;
1491 unsigned getOpcode() const { return Opcode
; }
1493 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1494 /// Print the recipe.
1495 void print(raw_ostream
&O
, const Twine
&Indent
,
1496 VPSlotTracker
&SlotTracker
) const override
;
1500 /// A recipe for widening operations with vector-predication intrinsics with
1501 /// explicit vector length (EVL).
1502 class VPWidenEVLRecipe
: public VPWidenRecipe
{
1503 using VPRecipeWithIRFlags::transferFlags
;
1506 template <typename IterT
>
1507 VPWidenEVLRecipe(Instruction
&I
, iterator_range
<IterT
> Operands
, VPValue
&EVL
)
1508 : VPWidenRecipe(VPDef::VPWidenEVLSC
, I
, Operands
) {
1511 VPWidenEVLRecipe(VPWidenRecipe
&W
, VPValue
&EVL
)
1512 : VPWidenEVLRecipe(*W
.getUnderlyingInstr(), W
.operands(), EVL
) {
1516 ~VPWidenEVLRecipe() override
= default;
1518 VPWidenRecipe
*clone() override final
{
1519 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1523 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC
);
1525 VPValue
*getEVL() { return getOperand(getNumOperands() - 1); }
1526 const VPValue
*getEVL() const { return getOperand(getNumOperands() - 1); }
1528 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1529 /// processing EVL elements.
1530 void execute(VPTransformState
&State
) override final
;
1532 /// Returns true if the recipe only uses the first lane of operand \p Op.
1533 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1534 assert(is_contained(operands(), Op
) &&
1535 "Op must be an operand of the recipe");
1536 // EVL in that recipe is always the last operand, thus any use before means
1537 // the VPValue should be vectorized.
1538 return getEVL() == Op
;
1541 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1542 /// Print the recipe.
1543 void print(raw_ostream
&O
, const Twine
&Indent
,
1544 VPSlotTracker
&SlotTracker
) const override final
;
1548 /// VPWidenCastRecipe is a recipe to create vector cast instructions.
1549 class VPWidenCastRecipe
: public VPRecipeWithIRFlags
{
1550 /// Cast instruction opcode.
1551 Instruction::CastOps Opcode
;
1553 /// Result type for the cast.
1557 VPWidenCastRecipe(Instruction::CastOps Opcode
, VPValue
*Op
, Type
*ResultTy
,
1559 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC
, Op
, UI
), Opcode(Opcode
),
1560 ResultTy(ResultTy
) {
1561 assert(UI
.getOpcode() == Opcode
&&
1562 "opcode of underlying cast doesn't match");
1565 VPWidenCastRecipe(Instruction::CastOps Opcode
, VPValue
*Op
, Type
*ResultTy
)
1566 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC
, Op
), Opcode(Opcode
),
1567 ResultTy(ResultTy
) {}
1569 ~VPWidenCastRecipe() override
= default;
1571 VPWidenCastRecipe
*clone() override
{
1572 if (auto *UV
= getUnderlyingValue())
1573 return new VPWidenCastRecipe(Opcode
, getOperand(0), ResultTy
,
1574 *cast
<CastInst
>(UV
));
1576 return new VPWidenCastRecipe(Opcode
, getOperand(0), ResultTy
);
1579 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC
)
1581 /// Produce widened copies of the cast.
1582 void execute(VPTransformState
&State
) override
;
1584 /// Return the cost of this VPWidenCastRecipe.
1585 InstructionCost
computeCost(ElementCount VF
,
1586 VPCostContext
&Ctx
) const override
;
1588 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1589 /// Print the recipe.
1590 void print(raw_ostream
&O
, const Twine
&Indent
,
1591 VPSlotTracker
&SlotTracker
) const override
;
1594 Instruction::CastOps
getOpcode() const { return Opcode
; }
1596 /// Returns the result type of the cast.
1597 Type
*getResultType() const { return ResultTy
; }
1600 /// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1601 class VPScalarCastRecipe
: public VPSingleDefRecipe
{
1602 Instruction::CastOps Opcode
;
1606 Value
*generate(VPTransformState
&State
);
1609 VPScalarCastRecipe(Instruction::CastOps Opcode
, VPValue
*Op
, Type
*ResultTy
)
1610 : VPSingleDefRecipe(VPDef::VPScalarCastSC
, {Op
}), Opcode(Opcode
),
1611 ResultTy(ResultTy
) {}
1613 ~VPScalarCastRecipe() override
= default;
1615 VPScalarCastRecipe
*clone() override
{
1616 return new VPScalarCastRecipe(Opcode
, getOperand(0), ResultTy
);
1619 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC
)
1621 void execute(VPTransformState
&State
) override
;
1623 /// Return the cost of this VPScalarCastRecipe.
1624 InstructionCost
computeCost(ElementCount VF
,
1625 VPCostContext
&Ctx
) const override
{
1626 // TODO: Compute accurate cost after retiring the legacy cost model.
1630 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1631 void print(raw_ostream
&O
, const Twine
&Indent
,
1632 VPSlotTracker
&SlotTracker
) const override
;
1635 /// Returns the result type of the cast.
1636 Type
*getResultType() const { return ResultTy
; }
1638 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1639 // At the moment, only uniform codegen is implemented.
1640 assert(is_contained(operands(), Op
) &&
1641 "Op must be an operand of the recipe");
1646 /// A recipe for widening vector intrinsics.
1647 class VPWidenIntrinsicRecipe
: public VPRecipeWithIRFlags
{
1648 /// ID of the vector intrinsic to widen.
1649 Intrinsic::ID VectorIntrinsicID
;
1651 /// Scalar return type of the intrinsic.
1654 /// True if the intrinsic may read from memory.
1655 bool MayReadFromMemory
;
1657 /// True if the intrinsic may read write to memory.
1658 bool MayWriteToMemory
;
1660 /// True if the intrinsic may have side-effects.
1661 bool MayHaveSideEffects
;
1664 VPWidenIntrinsicRecipe(CallInst
&CI
, Intrinsic::ID VectorIntrinsicID
,
1665 ArrayRef
<VPValue
*> CallArguments
, Type
*Ty
,
1667 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC
, CallArguments
, CI
),
1668 VectorIntrinsicID(VectorIntrinsicID
), ResultTy(Ty
),
1669 MayReadFromMemory(CI
.mayReadFromMemory()),
1670 MayWriteToMemory(CI
.mayWriteToMemory()),
1671 MayHaveSideEffects(CI
.mayHaveSideEffects()) {}
1673 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID
,
1674 ArrayRef
<VPValue
*> CallArguments
, Type
*Ty
,
1676 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC
, CallArguments
),
1677 VectorIntrinsicID(VectorIntrinsicID
), ResultTy(Ty
) {
1678 LLVMContext
&Ctx
= Ty
->getContext();
1679 AttributeList Attrs
= Intrinsic::getAttributes(Ctx
, VectorIntrinsicID
);
1680 MemoryEffects ME
= Attrs
.getMemoryEffects();
1681 MayReadFromMemory
= ME
.onlyWritesMemory();
1682 MayWriteToMemory
= ME
.onlyReadsMemory();
1683 MayHaveSideEffects
= MayWriteToMemory
||
1684 !Attrs
.hasFnAttr(Attribute::NoUnwind
) ||
1685 !Attrs
.hasFnAttr(Attribute::WillReturn
);
1688 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID
,
1689 std::initializer_list
<VPValue
*> CallArguments
,
1690 Type
*Ty
, DebugLoc DL
= {})
1691 : VPWidenIntrinsicRecipe(VectorIntrinsicID
,
1692 ArrayRef
<VPValue
*>(CallArguments
), Ty
, DL
) {}
1694 ~VPWidenIntrinsicRecipe() override
= default;
1696 VPWidenIntrinsicRecipe
*clone() override
{
1697 return new VPWidenIntrinsicRecipe(*cast
<CallInst
>(getUnderlyingValue()),
1698 VectorIntrinsicID
, {op_begin(), op_end()},
1699 ResultTy
, getDebugLoc());
1702 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC
)
1704 /// Produce a widened version of the vector intrinsic.
1705 void execute(VPTransformState
&State
) override
;
1707 /// Return the cost of this vector intrinsic.
1708 InstructionCost
computeCost(ElementCount VF
,
1709 VPCostContext
&Ctx
) const override
;
1711 /// Return the scalar return type of the intrinsic.
1712 Type
*getResultType() const { return ResultTy
; }
1714 /// Return to name of the intrinsic as string.
1715 StringRef
getIntrinsicName() const;
1717 /// Returns true if the intrinsic may read from memory.
1718 bool mayReadFromMemory() const { return MayReadFromMemory
; }
1720 /// Returns true if the intrinsic may write to memory.
1721 bool mayWriteToMemory() const { return MayWriteToMemory
; }
1723 /// Returns true if the intrinsic may have side-effects.
1724 bool mayHaveSideEffects() const { return MayHaveSideEffects
; }
1726 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1727 /// Print the recipe.
1728 void print(raw_ostream
&O
, const Twine
&Indent
,
1729 VPSlotTracker
&SlotTracker
) const override
;
1732 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
;
1735 /// A recipe for widening Call instructions using library calls.
1736 class VPWidenCallRecipe
: public VPRecipeWithIRFlags
{
1737 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1738 /// between a given VF and the chosen vectorized variant, so there will be a
1739 /// different VPlan for each VF with a valid variant.
1743 VPWidenCallRecipe(Value
*UV
, Function
*Variant
,
1744 ArrayRef
<VPValue
*> CallArguments
, DebugLoc DL
= {})
1745 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC
, CallArguments
,
1746 *cast
<Instruction
>(UV
)),
1749 isa
<Function
>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1750 "last operand must be the called function");
1753 ~VPWidenCallRecipe() override
= default;
1755 VPWidenCallRecipe
*clone() override
{
1756 return new VPWidenCallRecipe(getUnderlyingValue(), Variant
,
1757 {op_begin(), op_end()}, getDebugLoc());
1760 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC
)
1762 /// Produce a widened version of the call instruction.
1763 void execute(VPTransformState
&State
) override
;
1765 /// Return the cost of this VPWidenCallRecipe.
1766 InstructionCost
computeCost(ElementCount VF
,
1767 VPCostContext
&Ctx
) const override
;
1769 Function
*getCalledScalarFunction() const {
1770 return cast
<Function
>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1773 operand_range
arg_operands() {
1774 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1776 const_operand_range
arg_operands() const {
1777 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1780 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1781 /// Print the recipe.
1782 void print(raw_ostream
&O
, const Twine
&Indent
,
1783 VPSlotTracker
&SlotTracker
) const override
;
1787 /// A recipe representing a sequence of load -> update -> store as part of
1788 /// a histogram operation. This means there may be aliasing between vector
1789 /// lanes, which is handled by the llvm.experimental.vector.histogram family
1790 /// of intrinsics. The only update operations currently supported are
1791 /// 'add' and 'sub' where the other term is loop-invariant.
1792 class VPHistogramRecipe
: public VPRecipeBase
{
1793 /// Opcode of the update operation, currently either add or sub.
1797 template <typename IterT
>
1798 VPHistogramRecipe(unsigned Opcode
, iterator_range
<IterT
> Operands
,
1800 : VPRecipeBase(VPDef::VPHistogramSC
, Operands
, DL
), Opcode(Opcode
) {}
1802 ~VPHistogramRecipe() override
= default;
1804 VPHistogramRecipe
*clone() override
{
1805 return new VPHistogramRecipe(Opcode
, operands(), getDebugLoc());
1808 VP_CLASSOF_IMPL(VPDef::VPHistogramSC
);
1810 /// Produce a vectorized histogram operation.
1811 void execute(VPTransformState
&State
) override
;
1813 /// Return the cost of this VPHistogramRecipe.
1814 InstructionCost
computeCost(ElementCount VF
,
1815 VPCostContext
&Ctx
) const override
;
1817 unsigned getOpcode() const { return Opcode
; }
1819 /// Return the mask operand if one was provided, or a null pointer if all
1820 /// lanes should be executed unconditionally.
1821 VPValue
*getMask() const {
1822 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1825 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1826 /// Print the recipe
1827 void print(raw_ostream
&O
, const Twine
&Indent
,
1828 VPSlotTracker
&SlotTracker
) const override
;
1832 /// A recipe for widening select instructions.
1833 struct VPWidenSelectRecipe
: public VPSingleDefRecipe
{
1834 template <typename IterT
>
1835 VPWidenSelectRecipe(SelectInst
&I
, iterator_range
<IterT
> Operands
)
1836 : VPSingleDefRecipe(VPDef::VPWidenSelectSC
, Operands
, &I
,
1839 ~VPWidenSelectRecipe() override
= default;
1841 VPWidenSelectRecipe
*clone() override
{
1842 return new VPWidenSelectRecipe(*cast
<SelectInst
>(getUnderlyingInstr()),
1846 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC
)
1848 /// Produce a widened version of the select instruction.
1849 void execute(VPTransformState
&State
) override
;
1851 /// Return the cost of this VPWidenSelectRecipe.
1852 InstructionCost
computeCost(ElementCount VF
,
1853 VPCostContext
&Ctx
) const override
;
1855 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1856 /// Print the recipe.
1857 void print(raw_ostream
&O
, const Twine
&Indent
,
1858 VPSlotTracker
&SlotTracker
) const override
;
1861 VPValue
*getCond() const {
1862 return getOperand(0);
1865 bool isInvariantCond() const {
1866 return getCond()->isDefinedOutsideLoopRegions();
1870 /// A recipe for handling GEP instructions.
1871 class VPWidenGEPRecipe
: public VPRecipeWithIRFlags
{
1872 bool isPointerLoopInvariant() const {
1873 return getOperand(0)->isDefinedOutsideLoopRegions();
1876 bool isIndexLoopInvariant(unsigned I
) const {
1877 return getOperand(I
+ 1)->isDefinedOutsideLoopRegions();
1880 bool areAllOperandsInvariant() const {
1881 return all_of(operands(), [](VPValue
*Op
) {
1882 return Op
->isDefinedOutsideLoopRegions();
1887 template <typename IterT
>
1888 VPWidenGEPRecipe(GetElementPtrInst
*GEP
, iterator_range
<IterT
> Operands
)
1889 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC
, Operands
, *GEP
) {}
1891 ~VPWidenGEPRecipe() override
= default;
1893 VPWidenGEPRecipe
*clone() override
{
1894 return new VPWidenGEPRecipe(cast
<GetElementPtrInst
>(getUnderlyingInstr()),
1898 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC
)
1900 /// Generate the gep nodes.
1901 void execute(VPTransformState
&State
) override
;
1903 /// Return the cost of this VPWidenGEPRecipe.
1904 InstructionCost
computeCost(ElementCount VF
,
1905 VPCostContext
&Ctx
) const override
{
1906 // TODO: Compute accurate cost after retiring the legacy cost model.
1910 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 /// Print the recipe.
1912 void print(raw_ostream
&O
, const Twine
&Indent
,
1913 VPSlotTracker
&SlotTracker
) const override
;
1917 /// A recipe to compute the pointers for widened memory accesses of IndexTy
1918 /// in reverse order.
1919 class VPReverseVectorPointerRecipe
: public VPRecipeWithIRFlags
,
1920 public VPUnrollPartAccessor
<2> {
1924 VPReverseVectorPointerRecipe(VPValue
*Ptr
, VPValue
*VF
, Type
*IndexedTy
,
1925 bool IsInBounds
, DebugLoc DL
)
1926 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC
,
1927 ArrayRef
<VPValue
*>({Ptr
, VF
}),
1928 GEPFlagsTy(IsInBounds
), DL
),
1929 IndexedTy(IndexedTy
) {}
1931 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC
)
1933 VPValue
*getVFValue() { return getOperand(1); }
1934 const VPValue
*getVFValue() const { return getOperand(1); }
1936 void execute(VPTransformState
&State
) override
;
1938 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1939 assert(is_contained(operands(), Op
) &&
1940 "Op must be an operand of the recipe");
1944 /// Return the cost of this VPVectorPointerRecipe.
1945 InstructionCost
computeCost(ElementCount VF
,
1946 VPCostContext
&Ctx
) const override
{
1947 // TODO: Compute accurate cost after retiring the legacy cost model.
1951 /// Returns true if the recipe only uses the first part of operand \p Op.
1952 bool onlyFirstPartUsed(const VPValue
*Op
) const override
{
1953 assert(is_contained(operands(), Op
) &&
1954 "Op must be an operand of the recipe");
1955 assert(getNumOperands() <= 2 && "must have at most two operands");
1959 VPReverseVectorPointerRecipe
*clone() override
{
1960 return new VPReverseVectorPointerRecipe(
1961 getOperand(0), getVFValue(), IndexedTy
, isInBounds(), getDebugLoc());
1964 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1965 /// Print the recipe.
1966 void print(raw_ostream
&O
, const Twine
&Indent
,
1967 VPSlotTracker
&SlotTracker
) const override
;
1971 /// A recipe to compute the pointers for widened memory accesses of IndexTy.
1972 class VPVectorPointerRecipe
: public VPRecipeWithIRFlags
,
1973 public VPUnrollPartAccessor
<1> {
1977 VPVectorPointerRecipe(VPValue
*Ptr
, Type
*IndexedTy
, bool IsInBounds
,
1979 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC
, ArrayRef
<VPValue
*>(Ptr
),
1980 GEPFlagsTy(IsInBounds
), DL
),
1981 IndexedTy(IndexedTy
) {}
1983 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC
)
1985 void execute(VPTransformState
&State
) override
;
1987 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1988 assert(is_contained(operands(), Op
) &&
1989 "Op must be an operand of the recipe");
1993 /// Returns true if the recipe only uses the first part of operand \p Op.
1994 bool onlyFirstPartUsed(const VPValue
*Op
) const override
{
1995 assert(is_contained(operands(), Op
) &&
1996 "Op must be an operand of the recipe");
1997 assert(getNumOperands() <= 2 && "must have at most two operands");
2001 VPVectorPointerRecipe
*clone() override
{
2002 return new VPVectorPointerRecipe(getOperand(0), IndexedTy
, isInBounds(),
2006 /// Return the cost of this VPHeaderPHIRecipe.
2007 InstructionCost
computeCost(ElementCount VF
,
2008 VPCostContext
&Ctx
) const override
{
2009 // TODO: Compute accurate cost after retiring the legacy cost model.
2013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2014 /// Print the recipe.
2015 void print(raw_ostream
&O
, const Twine
&Indent
,
2016 VPSlotTracker
&SlotTracker
) const override
;
2020 /// A pure virtual base class for all recipes modeling header phis, including
2021 /// phis for first order recurrences, pointer inductions and reductions. The
2022 /// start value is the first operand of the recipe and the incoming value from
2023 /// the backedge is the second operand.
2025 /// Inductions are modeled using the following sub-classes:
2026 /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2027 /// starting at a specified value (zero for the main vector loop, the resume
2028 /// value for the epilogue vector loop) and stepping by 1. The induction
2029 /// controls exiting of the vector loop by comparing against the vector trip
2030 /// count. Produces a single scalar PHI for the induction value per
2032 /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2033 /// floating point inductions with arbitrary start and step values. Produces
2034 /// a vector PHI per-part.
2035 /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2036 /// value of an IV with different start and step values. Produces a single
2037 /// scalar value per iteration
2038 /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2039 /// canonical or derived induction.
2040 /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2041 /// pointer induction. Produces either a vector PHI per-part or scalar values
2042 /// per-lane based on the canonical induction.
2043 class VPHeaderPHIRecipe
: public VPSingleDefRecipe
{
2045 VPHeaderPHIRecipe(unsigned char VPDefID
, Instruction
*UnderlyingInstr
,
2046 VPValue
*Start
= nullptr, DebugLoc DL
= {})
2047 : VPSingleDefRecipe(VPDefID
, ArrayRef
<VPValue
*>(), UnderlyingInstr
, DL
) {
2053 ~VPHeaderPHIRecipe() override
= default;
2055 /// Method to support type inquiry through isa, cast, and dyn_cast.
2056 static inline bool classof(const VPRecipeBase
*B
) {
2057 return B
->getVPDefID() >= VPDef::VPFirstHeaderPHISC
&&
2058 B
->getVPDefID() <= VPDef::VPLastHeaderPHISC
;
2060 static inline bool classof(const VPValue
*V
) {
2061 auto *B
= V
->getDefiningRecipe();
2062 return B
&& B
->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC
&&
2063 B
->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC
;
2066 /// Generate the phi nodes.
2067 void execute(VPTransformState
&State
) override
= 0;
2069 /// Return the cost of this header phi recipe.
2070 InstructionCost
computeCost(ElementCount VF
,
2071 VPCostContext
&Ctx
) const override
;
2073 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2074 /// Print the recipe.
2075 void print(raw_ostream
&O
, const Twine
&Indent
,
2076 VPSlotTracker
&SlotTracker
) const override
= 0;
2079 /// Returns the start value of the phi, if one is set.
2080 VPValue
*getStartValue() {
2081 return getNumOperands() == 0 ? nullptr : getOperand(0);
2083 VPValue
*getStartValue() const {
2084 return getNumOperands() == 0 ? nullptr : getOperand(0);
2087 /// Update the start value of the recipe.
2088 void setStartValue(VPValue
*V
) { setOperand(0, V
); }
2090 /// Returns the incoming value from the loop backedge.
2091 virtual VPValue
*getBackedgeValue() {
2092 return getOperand(1);
2095 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2097 virtual VPRecipeBase
&getBackedgeRecipe() {
2098 return *getBackedgeValue()->getDefiningRecipe();
2102 /// A recipe for handling phi nodes of integer and floating-point inductions,
2103 /// producing their vector values.
2104 class VPWidenIntOrFpInductionRecipe
: public VPHeaderPHIRecipe
{
2107 const InductionDescriptor
&IndDesc
;
2110 VPWidenIntOrFpInductionRecipe(PHINode
*IV
, VPValue
*Start
, VPValue
*Step
,
2111 VPValue
*VF
, const InductionDescriptor
&IndDesc
)
2112 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC
, IV
, Start
), IV(IV
),
2113 Trunc(nullptr), IndDesc(IndDesc
) {
2118 VPWidenIntOrFpInductionRecipe(PHINode
*IV
, VPValue
*Start
, VPValue
*Step
,
2119 VPValue
*VF
, const InductionDescriptor
&IndDesc
,
2121 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC
, Trunc
, Start
),
2122 IV(IV
), Trunc(Trunc
), IndDesc(IndDesc
) {
2127 ~VPWidenIntOrFpInductionRecipe() override
= default;
2129 VPWidenIntOrFpInductionRecipe
*clone() override
{
2130 return new VPWidenIntOrFpInductionRecipe(
2131 IV
, getStartValue(), getStepValue(), getVFValue(), IndDesc
, Trunc
);
2134 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC
)
2136 /// Generate the vectorized and scalarized versions of the phi node as
2137 /// needed by their users.
2138 void execute(VPTransformState
&State
) override
;
2140 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2141 /// Print the recipe.
2142 void print(raw_ostream
&O
, const Twine
&Indent
,
2143 VPSlotTracker
&SlotTracker
) const override
;
2146 VPValue
*getBackedgeValue() override
{
2147 // TODO: All operands of base recipe must exist and be at same index in
2150 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2153 VPRecipeBase
&getBackedgeRecipe() override
{
2154 // TODO: All operands of base recipe must exist and be at same index in
2157 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2160 /// Returns the step value of the induction.
2161 VPValue
*getStepValue() { return getOperand(1); }
2162 const VPValue
*getStepValue() const { return getOperand(1); }
2164 VPValue
*getVFValue() { return getOperand(2); }
2165 const VPValue
*getVFValue() const { return getOperand(2); }
2167 VPValue
*getSplatVFValue() {
2168 // If the recipe has been unrolled (4 operands), return the VPValue for the
2169 // induction increment.
2170 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2173 /// Returns the first defined value as TruncInst, if it is one or nullptr
2175 TruncInst
*getTruncInst() { return Trunc
; }
2176 const TruncInst
*getTruncInst() const { return Trunc
; }
2178 PHINode
*getPHINode() { return IV
; }
2180 /// Returns the induction descriptor for the recipe.
2181 const InductionDescriptor
&getInductionDescriptor() const { return IndDesc
; }
2183 /// Returns true if the induction is canonical, i.e. starting at 0 and
2184 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2185 /// same type as the canonical induction.
2186 bool isCanonical() const;
2188 /// Returns the scalar type of the induction.
2189 Type
*getScalarType() const {
2190 return Trunc
? Trunc
->getType() : IV
->getType();
2193 /// Returns the VPValue representing the value of this induction at
2194 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2196 VPValue
*getLastUnrolledPartOperand() {
2197 return getNumOperands() == 5 ? getOperand(4) : this;
2201 class VPWidenPointerInductionRecipe
: public VPHeaderPHIRecipe
,
2202 public VPUnrollPartAccessor
<3> {
2203 const InductionDescriptor
&IndDesc
;
2205 bool IsScalarAfterVectorization
;
2208 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2210 VPWidenPointerInductionRecipe(PHINode
*Phi
, VPValue
*Start
, VPValue
*Step
,
2211 const InductionDescriptor
&IndDesc
,
2212 bool IsScalarAfterVectorization
)
2213 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC
, Phi
),
2215 IsScalarAfterVectorization(IsScalarAfterVectorization
) {
2220 ~VPWidenPointerInductionRecipe() override
= default;
2222 VPWidenPointerInductionRecipe
*clone() override
{
2223 return new VPWidenPointerInductionRecipe(
2224 cast
<PHINode
>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2225 IndDesc
, IsScalarAfterVectorization
);
2228 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC
)
2230 /// Generate vector values for the pointer induction.
2231 void execute(VPTransformState
&State
) override
;
2233 /// Returns true if only scalar values will be generated.
2234 bool onlyScalarsGenerated(bool IsScalable
);
2236 /// Returns the induction descriptor for the recipe.
2237 const InductionDescriptor
&getInductionDescriptor() const { return IndDesc
; }
2239 /// Returns the VPValue representing the value of this induction at
2240 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2242 VPValue
*getFirstUnrolledPartOperand() {
2243 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2246 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2247 /// Print the recipe.
2248 void print(raw_ostream
&O
, const Twine
&Indent
,
2249 VPSlotTracker
&SlotTracker
) const override
;
2253 /// Recipe to generate a scalar PHI. Used to generate code for recipes that
2254 /// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2255 /// VPEVLBasedIVPHIRecipe.
2256 class VPScalarPHIRecipe
: public VPHeaderPHIRecipe
{
2260 VPScalarPHIRecipe(VPValue
*Start
, VPValue
*BackedgeValue
, DebugLoc DL
,
2262 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC
, nullptr, Start
, DL
),
2264 addOperand(BackedgeValue
);
2267 ~VPScalarPHIRecipe() override
= default;
2269 VPScalarPHIRecipe
*clone() override
{
2270 llvm_unreachable("cloning not implemented yet");
2273 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC
)
2275 /// Generate the phi/select nodes.
2276 void execute(VPTransformState
&State
) override
;
2278 /// Returns true if the recipe only uses the first lane of operand \p Op.
2279 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2280 assert(is_contained(operands(), Op
) &&
2281 "Op must be an operand of the recipe");
2285 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2286 /// Print the recipe.
2287 void print(raw_ostream
&O
, const Twine
&Indent
,
2288 VPSlotTracker
&SlotTracker
) const override
;
2292 /// A recipe for handling phis that are widened in the vector loop.
2293 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2294 /// managed in the recipe directly.
2295 class VPWidenPHIRecipe
: public VPSingleDefRecipe
{
2296 /// List of incoming blocks. Only used in the VPlan native path.
2297 SmallVector
<VPBasicBlock
*, 2> IncomingBlocks
;
2300 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2301 VPWidenPHIRecipe(PHINode
*Phi
, VPValue
*Start
= nullptr)
2302 : VPSingleDefRecipe(VPDef::VPWidenPHISC
, ArrayRef
<VPValue
*>(), Phi
) {
2307 VPWidenPHIRecipe
*clone() override
{
2308 llvm_unreachable("cloning not implemented yet");
2311 ~VPWidenPHIRecipe() override
= default;
2313 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC
)
2315 /// Generate the phi/select nodes.
2316 void execute(VPTransformState
&State
) override
;
2318 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2319 /// Print the recipe.
2320 void print(raw_ostream
&O
, const Twine
&Indent
,
2321 VPSlotTracker
&SlotTracker
) const override
;
2324 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2325 void addIncoming(VPValue
*IncomingV
, VPBasicBlock
*IncomingBlock
) {
2326 addOperand(IncomingV
);
2327 IncomingBlocks
.push_back(IncomingBlock
);
2330 /// Returns the \p I th incoming VPBasicBlock.
2331 VPBasicBlock
*getIncomingBlock(unsigned I
) { return IncomingBlocks
[I
]; }
2333 /// Returns the \p I th incoming VPValue.
2334 VPValue
*getIncomingValue(unsigned I
) { return getOperand(I
); }
2337 /// A recipe for handling first-order recurrence phis. The start value is the
2338 /// first operand of the recipe and the incoming value from the backedge is the
2340 struct VPFirstOrderRecurrencePHIRecipe
: public VPHeaderPHIRecipe
{
2341 VPFirstOrderRecurrencePHIRecipe(PHINode
*Phi
, VPValue
&Start
)
2342 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC
, Phi
, &Start
) {}
2344 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC
)
2346 static inline bool classof(const VPHeaderPHIRecipe
*R
) {
2347 return R
->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC
;
2350 VPFirstOrderRecurrencePHIRecipe
*clone() override
{
2351 return new VPFirstOrderRecurrencePHIRecipe(
2352 cast
<PHINode
>(getUnderlyingInstr()), *getOperand(0));
2355 void execute(VPTransformState
&State
) override
;
2357 /// Return the cost of this first-order recurrence phi recipe.
2358 InstructionCost
computeCost(ElementCount VF
,
2359 VPCostContext
&Ctx
) const override
;
2361 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 /// Print the recipe.
2363 void print(raw_ostream
&O
, const Twine
&Indent
,
2364 VPSlotTracker
&SlotTracker
) const override
;
2368 /// A recipe for handling reduction phis. The start value is the first operand
2369 /// of the recipe and the incoming value from the backedge is the second
2371 class VPReductionPHIRecipe
: public VPHeaderPHIRecipe
,
2372 public VPUnrollPartAccessor
<2> {
2373 /// Descriptor for the reduction.
2374 const RecurrenceDescriptor
&RdxDesc
;
2376 /// The phi is part of an in-loop reduction.
2379 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2383 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2385 VPReductionPHIRecipe(PHINode
*Phi
, const RecurrenceDescriptor
&RdxDesc
,
2386 VPValue
&Start
, bool IsInLoop
= false,
2387 bool IsOrdered
= false)
2388 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC
, Phi
, &Start
),
2389 RdxDesc(RdxDesc
), IsInLoop(IsInLoop
), IsOrdered(IsOrdered
) {
2390 assert((!IsOrdered
|| IsInLoop
) && "IsOrdered requires IsInLoop");
2393 ~VPReductionPHIRecipe() override
= default;
2395 VPReductionPHIRecipe
*clone() override
{
2397 new VPReductionPHIRecipe(cast
<PHINode
>(getUnderlyingInstr()), RdxDesc
,
2398 *getOperand(0), IsInLoop
, IsOrdered
);
2399 R
->addOperand(getBackedgeValue());
2403 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC
)
2405 static inline bool classof(const VPHeaderPHIRecipe
*R
) {
2406 return R
->getVPDefID() == VPDef::VPReductionPHISC
;
2409 /// Generate the phi/select nodes.
2410 void execute(VPTransformState
&State
) override
;
2412 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2413 /// Print the recipe.
2414 void print(raw_ostream
&O
, const Twine
&Indent
,
2415 VPSlotTracker
&SlotTracker
) const override
;
2418 const RecurrenceDescriptor
&getRecurrenceDescriptor() const {
2422 /// Returns true, if the phi is part of an ordered reduction.
2423 bool isOrdered() const { return IsOrdered
; }
2425 /// Returns true, if the phi is part of an in-loop reduction.
2426 bool isInLoop() const { return IsInLoop
; }
2429 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
2431 class VPBlendRecipe
: public VPSingleDefRecipe
{
2433 /// The blend operation is a User of the incoming values and of their
2434 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2435 /// be omitted (implied by passing an odd number of operands) in which case
2436 /// all other incoming values are merged into it.
2437 VPBlendRecipe(PHINode
*Phi
, ArrayRef
<VPValue
*> Operands
)
2438 : VPSingleDefRecipe(VPDef::VPBlendSC
, Operands
, Phi
, Phi
->getDebugLoc()) {
2439 assert(Operands
.size() > 0 && "Expected at least one operand!");
2442 VPBlendRecipe
*clone() override
{
2443 SmallVector
<VPValue
*> Ops(operands());
2444 return new VPBlendRecipe(cast
<PHINode
>(getUnderlyingValue()), Ops
);
2447 VP_CLASSOF_IMPL(VPDef::VPBlendSC
)
2449 /// A normalized blend is one that has an odd number of operands, whereby the
2450 /// first operand does not have an associated mask.
2451 bool isNormalized() const { return getNumOperands() % 2; }
2453 /// Return the number of incoming values, taking into account when normalized
2454 /// the first incoming value will have no mask.
2455 unsigned getNumIncomingValues() const {
2456 return (getNumOperands() + isNormalized()) / 2;
2459 /// Return incoming value number \p Idx.
2460 VPValue
*getIncomingValue(unsigned Idx
) const {
2461 return Idx
== 0 ? getOperand(0) : getOperand(Idx
* 2 - isNormalized());
2464 /// Return mask number \p Idx.
2465 VPValue
*getMask(unsigned Idx
) const {
2466 assert((Idx
> 0 || !isNormalized()) && "First index has no mask!");
2467 return Idx
== 0 ? getOperand(1) : getOperand(Idx
* 2 + !isNormalized());
2470 /// Generate the phi/select nodes.
2471 void execute(VPTransformState
&State
) override
;
2473 /// Return the cost of this VPWidenMemoryRecipe.
2474 InstructionCost
computeCost(ElementCount VF
,
2475 VPCostContext
&Ctx
) const override
;
2477 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2478 /// Print the recipe.
2479 void print(raw_ostream
&O
, const Twine
&Indent
,
2480 VPSlotTracker
&SlotTracker
) const override
;
2483 /// Returns true if the recipe only uses the first lane of operand \p Op.
2484 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2485 assert(is_contained(operands(), Op
) &&
2486 "Op must be an operand of the recipe");
2487 // Recursing through Blend recipes only, must terminate at header phi's the
2489 return all_of(users(),
2490 [this](VPUser
*U
) { return U
->onlyFirstLaneUsed(this); });
2494 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2495 /// or stores into one wide load/store and shuffles. The first operand of a
2496 /// VPInterleave recipe is the address, followed by the stored values, followed
2497 /// by an optional mask.
2498 class VPInterleaveRecipe
: public VPRecipeBase
{
2499 const InterleaveGroup
<Instruction
> *IG
;
2501 /// Indicates if the interleave group is in a conditional block and requires a
2503 bool HasMask
= false;
2505 /// Indicates if gaps between members of the group need to be masked out or if
2506 /// unusued gaps can be loaded speculatively.
2507 bool NeedsMaskForGaps
= false;
2510 VPInterleaveRecipe(const InterleaveGroup
<Instruction
> *IG
, VPValue
*Addr
,
2511 ArrayRef
<VPValue
*> StoredValues
, VPValue
*Mask
,
2512 bool NeedsMaskForGaps
)
2513 : VPRecipeBase(VPDef::VPInterleaveSC
, {Addr
}), IG(IG
),
2514 NeedsMaskForGaps(NeedsMaskForGaps
) {
2515 for (unsigned i
= 0; i
< IG
->getFactor(); ++i
)
2516 if (Instruction
*I
= IG
->getMember(i
)) {
2517 if (I
->getType()->isVoidTy())
2519 new VPValue(I
, this);
2522 for (auto *SV
: StoredValues
)
2529 ~VPInterleaveRecipe() override
= default;
2531 VPInterleaveRecipe
*clone() override
{
2532 return new VPInterleaveRecipe(IG
, getAddr(), getStoredValues(), getMask(),
2536 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC
)
2538 /// Return the address accessed by this recipe.
2539 VPValue
*getAddr() const {
2540 return getOperand(0); // Address is the 1st, mandatory operand.
2543 /// Return the mask used by this recipe. Note that a full mask is represented
2545 VPValue
*getMask() const {
2546 // Mask is optional and therefore the last, currently 2nd operand.
2547 return HasMask
? getOperand(getNumOperands() - 1) : nullptr;
2550 /// Return the VPValues stored by this interleave group. If it is a load
2551 /// interleave group, return an empty ArrayRef.
2552 ArrayRef
<VPValue
*> getStoredValues() const {
2553 // The first operand is the address, followed by the stored values, followed
2554 // by an optional mask.
2555 return ArrayRef
<VPValue
*>(op_begin(), getNumOperands())
2556 .slice(1, getNumStoreOperands());
2559 /// Generate the wide load or store, and shuffles.
2560 void execute(VPTransformState
&State
) override
;
2562 /// Return the cost of this VPInterleaveRecipe.
2563 InstructionCost
computeCost(ElementCount VF
,
2564 VPCostContext
&Ctx
) const override
;
2566 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2567 /// Print the recipe.
2568 void print(raw_ostream
&O
, const Twine
&Indent
,
2569 VPSlotTracker
&SlotTracker
) const override
;
2572 const InterleaveGroup
<Instruction
> *getInterleaveGroup() { return IG
; }
2574 /// Returns the number of stored operands of this interleave group. Returns 0
2575 /// for load interleave groups.
2576 unsigned getNumStoreOperands() const {
2577 return getNumOperands() - (HasMask
? 2 : 1);
2580 /// The recipe only uses the first lane of the address.
2581 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2582 assert(is_contained(operands(), Op
) &&
2583 "Op must be an operand of the recipe");
2584 return Op
== getAddr() && !llvm::is_contained(getStoredValues(), Op
);
2587 Instruction
*getInsertPos() const { return IG
->getInsertPos(); }
2590 /// A recipe to represent inloop reduction operations, performing a reduction on
2591 /// a vector operand into a scalar value, and adding the result to a chain.
2592 /// The Operands are {ChainOp, VecOp, [Condition]}.
2593 class VPReductionRecipe
: public VPSingleDefRecipe
{
2594 /// The recurrence decriptor for the reduction in question.
2595 const RecurrenceDescriptor
&RdxDesc
;
2597 /// Whether the reduction is conditional.
2598 bool IsConditional
= false;
2601 VPReductionRecipe(const unsigned char SC
, const RecurrenceDescriptor
&R
,
2602 Instruction
*I
, ArrayRef
<VPValue
*> Operands
,
2603 VPValue
*CondOp
, bool IsOrdered
)
2604 : VPSingleDefRecipe(SC
, Operands
, I
), RdxDesc(R
), IsOrdered(IsOrdered
) {
2606 IsConditional
= true;
2612 VPReductionRecipe(const RecurrenceDescriptor
&R
, Instruction
*I
,
2613 VPValue
*ChainOp
, VPValue
*VecOp
, VPValue
*CondOp
,
2615 : VPReductionRecipe(VPDef::VPReductionSC
, R
, I
,
2616 ArrayRef
<VPValue
*>({ChainOp
, VecOp
}), CondOp
,
2619 ~VPReductionRecipe() override
= default;
2621 VPReductionRecipe
*clone() override
{
2622 return new VPReductionRecipe(RdxDesc
, getUnderlyingInstr(), getChainOp(),
2623 getVecOp(), getCondOp(), IsOrdered
);
2626 static inline bool classof(const VPRecipeBase
*R
) {
2627 return R
->getVPDefID() == VPRecipeBase::VPReductionSC
||
2628 R
->getVPDefID() == VPRecipeBase::VPReductionEVLSC
;
2631 static inline bool classof(const VPUser
*U
) {
2632 auto *R
= dyn_cast
<VPRecipeBase
>(U
);
2633 return R
&& classof(R
);
2636 /// Generate the reduction in the loop
2637 void execute(VPTransformState
&State
) override
;
2639 /// Return the cost of VPReductionRecipe.
2640 InstructionCost
computeCost(ElementCount VF
,
2641 VPCostContext
&Ctx
) const override
;
2643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2644 /// Print the recipe.
2645 void print(raw_ostream
&O
, const Twine
&Indent
,
2646 VPSlotTracker
&SlotTracker
) const override
;
2649 /// Return the recurrence decriptor for the in-loop reduction.
2650 const RecurrenceDescriptor
&getRecurrenceDescriptor() const {
2653 /// Return true if the in-loop reduction is ordered.
2654 bool isOrdered() const { return IsOrdered
; };
2655 /// Return true if the in-loop reduction is conditional.
2656 bool isConditional() const { return IsConditional
; };
2657 /// The VPValue of the scalar Chain being accumulated.
2658 VPValue
*getChainOp() const { return getOperand(0); }
2659 /// The VPValue of the vector value to be reduced.
2660 VPValue
*getVecOp() const { return getOperand(1); }
2661 /// The VPValue of the condition for the block.
2662 VPValue
*getCondOp() const {
2663 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2667 /// A recipe to represent inloop reduction operations with vector-predication
2668 /// intrinsics, performing a reduction on a vector operand with the explicit
2669 /// vector length (EVL) into a scalar value, and adding the result to a chain.
2670 /// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2671 class VPReductionEVLRecipe
: public VPReductionRecipe
{
2673 VPReductionEVLRecipe(VPReductionRecipe
&R
, VPValue
&EVL
, VPValue
*CondOp
)
2674 : VPReductionRecipe(
2675 VPDef::VPReductionEVLSC
, R
.getRecurrenceDescriptor(),
2676 cast_or_null
<Instruction
>(R
.getUnderlyingValue()),
2677 ArrayRef
<VPValue
*>({R
.getChainOp(), R
.getVecOp(), &EVL
}), CondOp
,
2680 ~VPReductionEVLRecipe() override
= default;
2682 VPReductionEVLRecipe
*clone() override
{
2683 llvm_unreachable("cloning not implemented yet");
2686 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC
)
2688 /// Generate the reduction in the loop
2689 void execute(VPTransformState
&State
) override
;
2691 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2692 /// Print the recipe.
2693 void print(raw_ostream
&O
, const Twine
&Indent
,
2694 VPSlotTracker
&SlotTracker
) const override
;
2697 /// The VPValue of the explicit vector length.
2698 VPValue
*getEVL() const { return getOperand(2); }
2700 /// Returns true if the recipe only uses the first lane of operand \p Op.
2701 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2702 assert(is_contained(operands(), Op
) &&
2703 "Op must be an operand of the recipe");
2704 return Op
== getEVL();
2708 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
2709 /// copies of the original scalar type, one per lane, instead of producing a
2710 /// single copy of widened type for all lanes. If the instruction is known to be
2711 /// uniform only one copy, per lane zero, will be generated.
2712 class VPReplicateRecipe
: public VPRecipeWithIRFlags
{
2713 /// Indicator if only a single replica per lane is needed.
2716 /// Indicator if the replicas are also predicated.
2720 template <typename IterT
>
2721 VPReplicateRecipe(Instruction
*I
, iterator_range
<IterT
> Operands
,
2722 bool IsUniform
, VPValue
*Mask
= nullptr)
2723 : VPRecipeWithIRFlags(VPDef::VPReplicateSC
, Operands
, *I
),
2724 IsUniform(IsUniform
), IsPredicated(Mask
) {
2729 ~VPReplicateRecipe() override
= default;
2731 VPReplicateRecipe
*clone() override
{
2733 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform
,
2734 isPredicated() ? getMask() : nullptr);
2735 Copy
->transferFlags(*this);
2739 VP_CLASSOF_IMPL(VPDef::VPReplicateSC
)
2741 /// Generate replicas of the desired Ingredient. Replicas will be generated
2742 /// for all parts and lanes unless a specific part and lane are specified in
2744 void execute(VPTransformState
&State
) override
;
2746 /// Return the cost of this VPReplicateRecipe.
2747 InstructionCost
computeCost(ElementCount VF
,
2748 VPCostContext
&Ctx
) const override
;
2750 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2751 /// Print the recipe.
2752 void print(raw_ostream
&O
, const Twine
&Indent
,
2753 VPSlotTracker
&SlotTracker
) const override
;
2756 bool isUniform() const { return IsUniform
; }
2758 bool isPredicated() const { return IsPredicated
; }
2760 /// Returns true if the recipe only uses the first lane of operand \p Op.
2761 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2762 assert(is_contained(operands(), Op
) &&
2763 "Op must be an operand of the recipe");
2767 /// Returns true if the recipe uses scalars of operand \p Op.
2768 bool usesScalars(const VPValue
*Op
) const override
{
2769 assert(is_contained(operands(), Op
) &&
2770 "Op must be an operand of the recipe");
2774 /// Returns true if the recipe is used by a widened recipe via an intervening
2775 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2777 bool shouldPack() const;
2779 /// Return the mask of a predicated VPReplicateRecipe.
2780 VPValue
*getMask() {
2781 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2782 return getOperand(getNumOperands() - 1);
2785 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2788 /// A recipe for generating conditional branches on the bits of a mask.
2789 class VPBranchOnMaskRecipe
: public VPRecipeBase
{
2791 VPBranchOnMaskRecipe(VPValue
*BlockInMask
)
2792 : VPRecipeBase(VPDef::VPBranchOnMaskSC
, {}) {
2793 if (BlockInMask
) // nullptr means all-one mask.
2794 addOperand(BlockInMask
);
2797 VPBranchOnMaskRecipe
*clone() override
{
2798 return new VPBranchOnMaskRecipe(getOperand(0));
2801 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC
)
2803 /// Generate the extraction of the appropriate bit from the block mask and the
2804 /// conditional branch.
2805 void execute(VPTransformState
&State
) override
;
2807 /// Return the cost of this VPBranchOnMaskRecipe.
2808 InstructionCost
computeCost(ElementCount VF
,
2809 VPCostContext
&Ctx
) const override
;
2811 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2812 /// Print the recipe.
2813 void print(raw_ostream
&O
, const Twine
&Indent
,
2814 VPSlotTracker
&SlotTracker
) const override
{
2815 O
<< Indent
<< "BRANCH-ON-MASK ";
2816 if (VPValue
*Mask
= getMask())
2817 Mask
->printAsOperand(O
, SlotTracker
);
2823 /// Return the mask used by this recipe. Note that a full mask is represented
2825 VPValue
*getMask() const {
2826 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2827 // Mask is optional.
2828 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2831 /// Returns true if the recipe uses scalars of operand \p Op.
2832 bool usesScalars(const VPValue
*Op
) const override
{
2833 assert(is_contained(operands(), Op
) &&
2834 "Op must be an operand of the recipe");
2839 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2840 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
2841 /// order to merge values that are set under such a branch and feed their uses.
2842 /// The phi nodes can be scalar or vector depending on the users of the value.
2843 /// This recipe works in concert with VPBranchOnMaskRecipe.
2844 class VPPredInstPHIRecipe
: public VPSingleDefRecipe
{
2846 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2847 /// nodes after merging back from a Branch-on-Mask.
2848 VPPredInstPHIRecipe(VPValue
*PredV
)
2849 : VPSingleDefRecipe(VPDef::VPPredInstPHISC
, PredV
) {}
2850 ~VPPredInstPHIRecipe() override
= default;
2852 VPPredInstPHIRecipe
*clone() override
{
2853 return new VPPredInstPHIRecipe(getOperand(0));
2856 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC
)
2858 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2859 /// retain SSA form.
2860 void execute(VPTransformState
&State
) override
;
2862 /// Return the cost of this VPPredInstPHIRecipe.
2863 InstructionCost
computeCost(ElementCount VF
,
2864 VPCostContext
&Ctx
) const override
{
2865 // TODO: Compute accurate cost after retiring the legacy cost model.
2869 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2870 /// Print the recipe.
2871 void print(raw_ostream
&O
, const Twine
&Indent
,
2872 VPSlotTracker
&SlotTracker
) const override
;
2875 /// Returns true if the recipe uses scalars of operand \p Op.
2876 bool usesScalars(const VPValue
*Op
) const override
{
2877 assert(is_contained(operands(), Op
) &&
2878 "Op must be an operand of the recipe");
2883 /// A common base class for widening memory operations. An optional mask can be
2884 /// provided as the last operand.
2885 class VPWidenMemoryRecipe
: public VPRecipeBase
{
2887 Instruction
&Ingredient
;
2889 /// Whether the accessed addresses are consecutive.
2892 /// Whether the consecutive accessed addresses are in reverse order.
2895 /// Whether the memory access is masked.
2896 bool IsMasked
= false;
2898 void setMask(VPValue
*Mask
) {
2899 assert(!IsMasked
&& "cannot re-set mask");
2906 VPWidenMemoryRecipe(const char unsigned SC
, Instruction
&I
,
2907 std::initializer_list
<VPValue
*> Operands
,
2908 bool Consecutive
, bool Reverse
, DebugLoc DL
)
2909 : VPRecipeBase(SC
, Operands
, DL
), Ingredient(I
), Consecutive(Consecutive
),
2911 assert((Consecutive
|| !Reverse
) && "Reverse implies consecutive");
2915 VPWidenMemoryRecipe
*clone() override
{
2916 llvm_unreachable("cloning not supported");
2919 static inline bool classof(const VPRecipeBase
*R
) {
2920 return R
->getVPDefID() == VPRecipeBase::VPWidenLoadSC
||
2921 R
->getVPDefID() == VPRecipeBase::VPWidenStoreSC
||
2922 R
->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC
||
2923 R
->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC
;
2926 static inline bool classof(const VPUser
*U
) {
2927 auto *R
= dyn_cast
<VPRecipeBase
>(U
);
2928 return R
&& classof(R
);
2931 /// Return whether the loaded-from / stored-to addresses are consecutive.
2932 bool isConsecutive() const { return Consecutive
; }
2934 /// Return whether the consecutive loaded/stored addresses are in reverse
2936 bool isReverse() const { return Reverse
; }
2938 /// Return the address accessed by this recipe.
2939 VPValue
*getAddr() const { return getOperand(0); }
2941 /// Returns true if the recipe is masked.
2942 bool isMasked() const { return IsMasked
; }
2944 /// Return the mask used by this recipe. Note that a full mask is represented
2946 VPValue
*getMask() const {
2947 // Mask is optional and therefore the last operand.
2948 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2951 /// Generate the wide load/store.
2952 void execute(VPTransformState
&State
) override
{
2953 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2956 /// Return the cost of this VPWidenMemoryRecipe.
2957 InstructionCost
computeCost(ElementCount VF
,
2958 VPCostContext
&Ctx
) const override
;
2960 Instruction
&getIngredient() const { return Ingredient
; }
2963 /// A recipe for widening load operations, using the address to load from and an
2965 struct VPWidenLoadRecipe final
: public VPWidenMemoryRecipe
, public VPValue
{
2966 VPWidenLoadRecipe(LoadInst
&Load
, VPValue
*Addr
, VPValue
*Mask
,
2967 bool Consecutive
, bool Reverse
, DebugLoc DL
)
2968 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC
, Load
, {Addr
}, Consecutive
,
2970 VPValue(this, &Load
) {
2974 VPWidenLoadRecipe
*clone() override
{
2975 return new VPWidenLoadRecipe(cast
<LoadInst
>(Ingredient
), getAddr(),
2976 getMask(), Consecutive
, Reverse
,
2980 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC
);
2982 /// Generate a wide load or gather.
2983 void execute(VPTransformState
&State
) override
;
2985 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2986 /// Print the recipe.
2987 void print(raw_ostream
&O
, const Twine
&Indent
,
2988 VPSlotTracker
&SlotTracker
) const override
;
2991 /// Returns true if the recipe only uses the first lane of operand \p Op.
2992 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2993 assert(is_contained(operands(), Op
) &&
2994 "Op must be an operand of the recipe");
2995 // Widened, consecutive loads operations only demand the first lane of
2997 return Op
== getAddr() && isConsecutive();
3001 /// A recipe for widening load operations with vector-predication intrinsics,
3002 /// using the address to load from, the explicit vector length and an optional
3004 struct VPWidenLoadEVLRecipe final
: public VPWidenMemoryRecipe
, public VPValue
{
3005 VPWidenLoadEVLRecipe(VPWidenLoadRecipe
&L
, VPValue
&EVL
, VPValue
*Mask
)
3006 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC
, L
.getIngredient(),
3007 {L
.getAddr(), &EVL
}, L
.isConsecutive(),
3008 L
.isReverse(), L
.getDebugLoc()),
3009 VPValue(this, &getIngredient()) {
3013 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC
)
3015 /// Return the EVL operand.
3016 VPValue
*getEVL() const { return getOperand(1); }
3018 /// Generate the wide load or gather.
3019 void execute(VPTransformState
&State
) override
;
3021 /// Return the cost of this VPWidenLoadEVLRecipe.
3022 InstructionCost
computeCost(ElementCount VF
,
3023 VPCostContext
&Ctx
) const override
;
3025 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3026 /// Print the recipe.
3027 void print(raw_ostream
&O
, const Twine
&Indent
,
3028 VPSlotTracker
&SlotTracker
) const override
;
3031 /// Returns true if the recipe only uses the first lane of operand \p Op.
3032 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3033 assert(is_contained(operands(), Op
) &&
3034 "Op must be an operand of the recipe");
3035 // Widened loads only demand the first lane of EVL and consecutive loads
3036 // only demand the first lane of their address.
3037 return Op
== getEVL() || (Op
== getAddr() && isConsecutive());
3041 /// A recipe for widening store operations, using the stored value, the address
3042 /// to store to and an optional mask.
3043 struct VPWidenStoreRecipe final
: public VPWidenMemoryRecipe
{
3044 VPWidenStoreRecipe(StoreInst
&Store
, VPValue
*Addr
, VPValue
*StoredVal
,
3045 VPValue
*Mask
, bool Consecutive
, bool Reverse
, DebugLoc DL
)
3046 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC
, Store
, {Addr
, StoredVal
},
3047 Consecutive
, Reverse
, DL
) {
3051 VPWidenStoreRecipe
*clone() override
{
3052 return new VPWidenStoreRecipe(cast
<StoreInst
>(Ingredient
), getAddr(),
3053 getStoredValue(), getMask(), Consecutive
,
3054 Reverse
, getDebugLoc());
3057 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC
);
3059 /// Return the value stored by this recipe.
3060 VPValue
*getStoredValue() const { return getOperand(1); }
3062 /// Generate a wide store or scatter.
3063 void execute(VPTransformState
&State
) override
;
3065 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3066 /// Print the recipe.
3067 void print(raw_ostream
&O
, const Twine
&Indent
,
3068 VPSlotTracker
&SlotTracker
) const override
;
3071 /// Returns true if the recipe only uses the first lane of operand \p Op.
3072 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3073 assert(is_contained(operands(), Op
) &&
3074 "Op must be an operand of the recipe");
3075 // Widened, consecutive stores only demand the first lane of their address,
3076 // unless the same operand is also stored.
3077 return Op
== getAddr() && isConsecutive() && Op
!= getStoredValue();
3081 /// A recipe for widening store operations with vector-predication intrinsics,
3082 /// using the value to store, the address to store to, the explicit vector
3083 /// length and an optional mask.
3084 struct VPWidenStoreEVLRecipe final
: public VPWidenMemoryRecipe
{
3085 VPWidenStoreEVLRecipe(VPWidenStoreRecipe
&S
, VPValue
&EVL
, VPValue
*Mask
)
3086 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC
, S
.getIngredient(),
3087 {S
.getAddr(), S
.getStoredValue(), &EVL
},
3088 S
.isConsecutive(), S
.isReverse(), S
.getDebugLoc()) {
3092 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC
)
3094 /// Return the address accessed by this recipe.
3095 VPValue
*getStoredValue() const { return getOperand(1); }
3097 /// Return the EVL operand.
3098 VPValue
*getEVL() const { return getOperand(2); }
3100 /// Generate the wide store or scatter.
3101 void execute(VPTransformState
&State
) override
;
3103 /// Return the cost of this VPWidenStoreEVLRecipe.
3104 InstructionCost
computeCost(ElementCount VF
,
3105 VPCostContext
&Ctx
) const override
;
3107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3108 /// Print the recipe.
3109 void print(raw_ostream
&O
, const Twine
&Indent
,
3110 VPSlotTracker
&SlotTracker
) const override
;
3113 /// Returns true if the recipe only uses the first lane of operand \p Op.
3114 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3115 assert(is_contained(operands(), Op
) &&
3116 "Op must be an operand of the recipe");
3117 if (Op
== getEVL()) {
3118 assert(getStoredValue() != Op
&& "unexpected store of EVL");
3121 // Widened, consecutive memory operations only demand the first lane of
3122 // their address, unless the same operand is also stored. That latter can
3123 // happen with opaque pointers.
3124 return Op
== getAddr() && isConsecutive() && Op
!= getStoredValue();
3128 /// Recipe to expand a SCEV expression.
3129 class VPExpandSCEVRecipe
: public VPSingleDefRecipe
{
3131 ScalarEvolution
&SE
;
3134 VPExpandSCEVRecipe(const SCEV
*Expr
, ScalarEvolution
&SE
)
3135 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC
, {}), Expr(Expr
), SE(SE
) {}
3137 ~VPExpandSCEVRecipe() override
= default;
3139 VPExpandSCEVRecipe
*clone() override
{
3140 return new VPExpandSCEVRecipe(Expr
, SE
);
3143 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC
)
3145 /// Generate a canonical vector induction variable of the vector loop, with
3146 void execute(VPTransformState
&State
) override
;
3148 /// Return the cost of this VPExpandSCEVRecipe.
3149 InstructionCost
computeCost(ElementCount VF
,
3150 VPCostContext
&Ctx
) const override
{
3151 // TODO: Compute accurate cost after retiring the legacy cost model.
3155 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3156 /// Print the recipe.
3157 void print(raw_ostream
&O
, const Twine
&Indent
,
3158 VPSlotTracker
&SlotTracker
) const override
;
3161 const SCEV
*getSCEV() const { return Expr
; }
3164 /// Canonical scalar induction phi of the vector loop. Starting at the specified
3165 /// start value (either 0 or the resume value when vectorizing the epilogue
3166 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3167 /// canonical induction variable.
3168 class VPCanonicalIVPHIRecipe
: public VPHeaderPHIRecipe
{
3170 VPCanonicalIVPHIRecipe(VPValue
*StartV
, DebugLoc DL
)
3171 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC
, nullptr, StartV
, DL
) {}
3173 ~VPCanonicalIVPHIRecipe() override
= default;
3175 VPCanonicalIVPHIRecipe
*clone() override
{
3176 auto *R
= new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3177 R
->addOperand(getBackedgeValue());
3181 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC
)
3183 static inline bool classof(const VPHeaderPHIRecipe
*D
) {
3184 return D
->getVPDefID() == VPDef::VPCanonicalIVPHISC
;
3187 void execute(VPTransformState
&State
) override
{
3189 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3192 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3193 /// Print the recipe.
3194 void print(raw_ostream
&O
, const Twine
&Indent
,
3195 VPSlotTracker
&SlotTracker
) const override
;
3198 /// Returns the scalar type of the induction.
3199 Type
*getScalarType() const {
3200 return getStartValue()->getLiveInIRValue()->getType();
3203 /// Returns true if the recipe only uses the first lane of operand \p Op.
3204 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3205 assert(is_contained(operands(), Op
) &&
3206 "Op must be an operand of the recipe");
3210 /// Returns true if the recipe only uses the first part of operand \p Op.
3211 bool onlyFirstPartUsed(const VPValue
*Op
) const override
{
3212 assert(is_contained(operands(), Op
) &&
3213 "Op must be an operand of the recipe");
3217 /// Check if the induction described by \p Kind, /p Start and \p Step is
3218 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
3219 bool isCanonical(InductionDescriptor::InductionKind Kind
, VPValue
*Start
,
3220 VPValue
*Step
) const;
3222 /// Return the cost of this VPCanonicalIVPHIRecipe.
3223 InstructionCost
computeCost(ElementCount VF
,
3224 VPCostContext
&Ctx
) const override
{
3225 // For now, match the behavior of the legacy cost model.
3230 /// A recipe for generating the active lane mask for the vector loop that is
3231 /// used to predicate the vector operations.
3232 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3233 /// remove VPActiveLaneMaskPHIRecipe.
3234 class VPActiveLaneMaskPHIRecipe
: public VPHeaderPHIRecipe
{
3236 VPActiveLaneMaskPHIRecipe(VPValue
*StartMask
, DebugLoc DL
)
3237 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC
, nullptr, StartMask
,
3240 ~VPActiveLaneMaskPHIRecipe() override
= default;
3242 VPActiveLaneMaskPHIRecipe
*clone() override
{
3243 auto *R
= new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
3244 if (getNumOperands() == 2)
3245 R
->addOperand(getOperand(1));
3249 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC
)
3251 static inline bool classof(const VPHeaderPHIRecipe
*D
) {
3252 return D
->getVPDefID() == VPDef::VPActiveLaneMaskPHISC
;
3255 /// Generate the active lane mask phi of the vector loop.
3256 void execute(VPTransformState
&State
) override
;
3258 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3259 /// Print the recipe.
3260 void print(raw_ostream
&O
, const Twine
&Indent
,
3261 VPSlotTracker
&SlotTracker
) const override
;
3265 /// A recipe for generating the phi node for the current index of elements,
3266 /// adjusted in accordance with EVL value. It starts at the start value of the
3267 /// canonical induction and gets incremented by EVL in each iteration of the
3269 class VPEVLBasedIVPHIRecipe
: public VPHeaderPHIRecipe
{
3271 VPEVLBasedIVPHIRecipe(VPValue
*StartIV
, DebugLoc DL
)
3272 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC
, nullptr, StartIV
, DL
) {}
3274 ~VPEVLBasedIVPHIRecipe() override
= default;
3276 VPEVLBasedIVPHIRecipe
*clone() override
{
3277 llvm_unreachable("cloning not implemented yet");
3280 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC
)
3282 static inline bool classof(const VPHeaderPHIRecipe
*D
) {
3283 return D
->getVPDefID() == VPDef::VPEVLBasedIVPHISC
;
3286 void execute(VPTransformState
&State
) override
{
3288 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3291 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3292 InstructionCost
computeCost(ElementCount VF
,
3293 VPCostContext
&Ctx
) const override
{
3294 // For now, match the behavior of the legacy cost model.
3298 /// Returns true if the recipe only uses the first lane of operand \p Op.
3299 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3300 assert(is_contained(operands(), Op
) &&
3301 "Op must be an operand of the recipe");
3305 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3306 /// Print the recipe.
3307 void print(raw_ostream
&O
, const Twine
&Indent
,
3308 VPSlotTracker
&SlotTracker
) const override
;
3312 /// A Recipe for widening the canonical induction variable of the vector loop.
3313 class VPWidenCanonicalIVRecipe
: public VPSingleDefRecipe
,
3314 public VPUnrollPartAccessor
<1> {
3316 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe
*CanonicalIV
)
3317 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC
, {CanonicalIV
}) {}
3319 ~VPWidenCanonicalIVRecipe() override
= default;
3321 VPWidenCanonicalIVRecipe
*clone() override
{
3322 return new VPWidenCanonicalIVRecipe(
3323 cast
<VPCanonicalIVPHIRecipe
>(getOperand(0)));
3326 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC
)
3328 /// Generate a canonical vector induction variable of the vector loop, with
3329 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3330 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3331 void execute(VPTransformState
&State
) override
;
3333 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3334 InstructionCost
computeCost(ElementCount VF
,
3335 VPCostContext
&Ctx
) const override
{
3336 // TODO: Compute accurate cost after retiring the legacy cost model.
3340 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3341 /// Print the recipe.
3342 void print(raw_ostream
&O
, const Twine
&Indent
,
3343 VPSlotTracker
&SlotTracker
) const override
;
3347 /// A recipe for converting the input value \p IV value to the corresponding
3348 /// value of an IV with different start and step values, using Start + IV *
3350 class VPDerivedIVRecipe
: public VPSingleDefRecipe
{
3351 /// Kind of the induction.
3352 const InductionDescriptor::InductionKind Kind
;
3353 /// If not nullptr, the floating point induction binary operator. Must be set
3354 /// for floating point inductions.
3355 const FPMathOperator
*FPBinOp
;
3357 /// Name to use for the generated IR instruction for the derived IV.
3361 VPDerivedIVRecipe(const InductionDescriptor
&IndDesc
, VPValue
*Start
,
3362 VPCanonicalIVPHIRecipe
*CanonicalIV
, VPValue
*Step
,
3363 const Twine
&Name
= "")
3364 : VPDerivedIVRecipe(
3366 dyn_cast_or_null
<FPMathOperator
>(IndDesc
.getInductionBinOp()),
3367 Start
, CanonicalIV
, Step
, Name
) {}
3369 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind
,
3370 const FPMathOperator
*FPBinOp
, VPValue
*Start
, VPValue
*IV
,
3371 VPValue
*Step
, const Twine
&Name
= "")
3372 : VPSingleDefRecipe(VPDef::VPDerivedIVSC
, {Start
, IV
, Step
}), Kind(Kind
),
3373 FPBinOp(FPBinOp
), Name(Name
.str()) {}
3375 ~VPDerivedIVRecipe() override
= default;
3377 VPDerivedIVRecipe
*clone() override
{
3378 return new VPDerivedIVRecipe(Kind
, FPBinOp
, getStartValue(), getOperand(1),
3382 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC
)
3384 /// Generate the transformed value of the induction at offset StartValue (1.
3385 /// operand) + IV (2. operand) * StepValue (3, operand).
3386 void execute(VPTransformState
&State
) override
;
3388 /// Return the cost of this VPDerivedIVRecipe.
3389 InstructionCost
computeCost(ElementCount VF
,
3390 VPCostContext
&Ctx
) const override
{
3391 // TODO: Compute accurate cost after retiring the legacy cost model.
3395 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3396 /// Print the recipe.
3397 void print(raw_ostream
&O
, const Twine
&Indent
,
3398 VPSlotTracker
&SlotTracker
) const override
;
3401 Type
*getScalarType() const {
3402 return getStartValue()->getLiveInIRValue()->getType();
3405 VPValue
*getStartValue() const { return getOperand(0); }
3406 VPValue
*getStepValue() const { return getOperand(2); }
3408 /// Returns true if the recipe only uses the first lane of operand \p Op.
3409 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3410 assert(is_contained(operands(), Op
) &&
3411 "Op must be an operand of the recipe");
3416 /// A recipe for handling phi nodes of integer and floating-point inductions,
3417 /// producing their scalar values.
3418 class VPScalarIVStepsRecipe
: public VPRecipeWithIRFlags
,
3419 public VPUnrollPartAccessor
<2> {
3420 Instruction::BinaryOps InductionOpcode
;
3423 VPScalarIVStepsRecipe(VPValue
*IV
, VPValue
*Step
,
3424 Instruction::BinaryOps Opcode
, FastMathFlags FMFs
)
3425 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC
,
3426 ArrayRef
<VPValue
*>({IV
, Step
}), FMFs
),
3427 InductionOpcode(Opcode
) {}
3429 VPScalarIVStepsRecipe(const InductionDescriptor
&IndDesc
, VPValue
*IV
,
3431 : VPScalarIVStepsRecipe(
3432 IV
, Step
, IndDesc
.getInductionOpcode(),
3433 dyn_cast_or_null
<FPMathOperator
>(IndDesc
.getInductionBinOp())
3434 ? IndDesc
.getInductionBinOp()->getFastMathFlags()
3435 : FastMathFlags()) {}
3437 ~VPScalarIVStepsRecipe() override
= default;
3439 VPScalarIVStepsRecipe
*clone() override
{
3440 return new VPScalarIVStepsRecipe(
3441 getOperand(0), getOperand(1), InductionOpcode
,
3442 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
3445 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC
)
3447 /// Generate the scalarized versions of the phi node as needed by their users.
3448 void execute(VPTransformState
&State
) override
;
3450 /// Return the cost of this VPScalarIVStepsRecipe.
3451 InstructionCost
computeCost(ElementCount VF
,
3452 VPCostContext
&Ctx
) const override
{
3453 // TODO: Compute accurate cost after retiring the legacy cost model.
3457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3458 /// Print the recipe.
3459 void print(raw_ostream
&O
, const Twine
&Indent
,
3460 VPSlotTracker
&SlotTracker
) const override
;
3463 VPValue
*getStepValue() const { return getOperand(1); }
3465 /// Returns true if the recipe only uses the first lane of operand \p Op.
3466 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
3467 assert(is_contained(operands(), Op
) &&
3468 "Op must be an operand of the recipe");
3473 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3474 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
3475 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3476 class VPBasicBlock
: public VPBlockBase
{
3478 using RecipeListTy
= iplist
<VPRecipeBase
>;
3481 /// The VPRecipes held in the order of output instructions to generate.
3482 RecipeListTy Recipes
;
3484 VPBasicBlock(const unsigned char BlockSC
, const Twine
&Name
= "")
3485 : VPBlockBase(BlockSC
, Name
.str()) {}
3488 VPBasicBlock(const Twine
&Name
= "", VPRecipeBase
*Recipe
= nullptr)
3489 : VPBlockBase(VPBasicBlockSC
, Name
.str()) {
3491 appendRecipe(Recipe
);
3494 ~VPBasicBlock() override
{
3495 while (!Recipes
.empty())
3499 /// Instruction iterators...
3500 using iterator
= RecipeListTy::iterator
;
3501 using const_iterator
= RecipeListTy::const_iterator
;
3502 using reverse_iterator
= RecipeListTy::reverse_iterator
;
3503 using const_reverse_iterator
= RecipeListTy::const_reverse_iterator
;
3505 //===--------------------------------------------------------------------===//
3506 /// Recipe iterator methods
3508 inline iterator
begin() { return Recipes
.begin(); }
3509 inline const_iterator
begin() const { return Recipes
.begin(); }
3510 inline iterator
end() { return Recipes
.end(); }
3511 inline const_iterator
end() const { return Recipes
.end(); }
3513 inline reverse_iterator
rbegin() { return Recipes
.rbegin(); }
3514 inline const_reverse_iterator
rbegin() const { return Recipes
.rbegin(); }
3515 inline reverse_iterator
rend() { return Recipes
.rend(); }
3516 inline const_reverse_iterator
rend() const { return Recipes
.rend(); }
3518 inline size_t size() const { return Recipes
.size(); }
3519 inline bool empty() const { return Recipes
.empty(); }
3520 inline const VPRecipeBase
&front() const { return Recipes
.front(); }
3521 inline VPRecipeBase
&front() { return Recipes
.front(); }
3522 inline const VPRecipeBase
&back() const { return Recipes
.back(); }
3523 inline VPRecipeBase
&back() { return Recipes
.back(); }
3525 /// Returns a reference to the list of recipes.
3526 RecipeListTy
&getRecipeList() { return Recipes
; }
3528 /// Returns a pointer to a member of the recipe list.
3529 static RecipeListTy
VPBasicBlock::*getSublistAccess(VPRecipeBase
*) {
3530 return &VPBasicBlock::Recipes
;
3533 /// Method to support type inquiry through isa, cast, and dyn_cast.
3534 static inline bool classof(const VPBlockBase
*V
) {
3535 return V
->getVPBlockID() == VPBlockBase::VPBasicBlockSC
||
3536 V
->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC
;
3539 void insert(VPRecipeBase
*Recipe
, iterator InsertPt
) {
3540 assert(Recipe
&& "No recipe to append.");
3541 assert(!Recipe
->Parent
&& "Recipe already in VPlan");
3542 Recipe
->Parent
= this;
3543 Recipes
.insert(InsertPt
, Recipe
);
3546 /// Augment the existing recipes of a VPBasicBlock with an additional
3547 /// \p Recipe as the last recipe.
3548 void appendRecipe(VPRecipeBase
*Recipe
) { insert(Recipe
, end()); }
3550 /// The method which generates the output IR instructions that correspond to
3551 /// this VPBasicBlock, thereby "executing" the VPlan.
3552 void execute(VPTransformState
*State
) override
;
3554 /// Return the cost of this VPBasicBlock.
3555 InstructionCost
cost(ElementCount VF
, VPCostContext
&Ctx
) override
;
3557 /// Return the position of the first non-phi node recipe in the block.
3558 iterator
getFirstNonPhi();
3560 /// Returns an iterator range over the PHI-like recipes in the block.
3561 iterator_range
<iterator
> phis() {
3562 return make_range(begin(), getFirstNonPhi());
3565 void dropAllReferences(VPValue
*NewValue
) override
;
3567 /// Split current block at \p SplitAt by inserting a new block between the
3568 /// current block and its successors and moving all recipes starting at
3569 /// SplitAt to the new block. Returns the new block.
3570 VPBasicBlock
*splitAt(iterator SplitAt
);
3572 VPRegionBlock
*getEnclosingLoopRegion();
3573 const VPRegionBlock
*getEnclosingLoopRegion() const;
3575 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3576 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3577 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3579 /// Note that the numbering is applied to the whole VPlan, so printing
3580 /// individual blocks is consistent with the whole VPlan printing.
3581 void print(raw_ostream
&O
, const Twine
&Indent
,
3582 VPSlotTracker
&SlotTracker
) const override
;
3583 using VPBlockBase::print
; // Get the print(raw_stream &O) version.
3586 /// If the block has multiple successors, return the branch recipe terminating
3587 /// the block. If there are no or only a single successor, return nullptr;
3588 VPRecipeBase
*getTerminator();
3589 const VPRecipeBase
*getTerminator() const;
3591 /// Returns true if the block is exiting it's parent region.
3592 bool isExiting() const;
3594 /// Clone the current block and it's recipes, without updating the operands of
3595 /// the cloned recipes.
3596 VPBasicBlock
*clone() override
{
3597 auto *NewBlock
= new VPBasicBlock(getName());
3598 for (VPRecipeBase
&R
: *this)
3599 NewBlock
->appendRecipe(R
.clone());
3604 /// Execute the recipes in the IR basic block \p BB.
3605 void executeRecipes(VPTransformState
*State
, BasicBlock
*BB
);
3607 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3608 /// generated for this VPBB.
3609 void connectToPredecessors(VPTransformState::CFGState
&CFG
);
3612 /// Create an IR BasicBlock to hold the output instructions generated by this
3613 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3614 BasicBlock
*createEmptyBasicBlock(VPTransformState::CFGState
&CFG
);
3617 /// A special type of VPBasicBlock that wraps an existing IR basic block.
3618 /// Recipes of the block get added before the first non-phi instruction in the
3620 /// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3621 /// preheader block.
3622 class VPIRBasicBlock
: public VPBasicBlock
{
3626 VPIRBasicBlock(BasicBlock
*IRBB
)
3627 : VPBasicBlock(VPIRBasicBlockSC
,
3628 (Twine("ir-bb<") + IRBB
->getName() + Twine(">")).str()),
3631 ~VPIRBasicBlock() override
{}
3633 static inline bool classof(const VPBlockBase
*V
) {
3634 return V
->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC
;
3637 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
3638 /// instructions in \p IRBB, except its terminator which is managed in VPlan.
3639 static VPIRBasicBlock
*fromBasicBlock(BasicBlock
*IRBB
);
3641 /// The method which generates the output IR instructions that correspond to
3642 /// this VPBasicBlock, thereby "executing" the VPlan.
3643 void execute(VPTransformState
*State
) override
;
3645 VPIRBasicBlock
*clone() override
{
3646 auto *NewBlock
= new VPIRBasicBlock(IRBB
);
3647 for (VPRecipeBase
&R
: Recipes
)
3648 NewBlock
->appendRecipe(R
.clone());
3652 BasicBlock
*getIRBasicBlock() const { return IRBB
; }
3655 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3656 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3657 /// A VPRegionBlock may indicate that its contents are to be replicated several
3658 /// times. This is designed to support predicated scalarization, in which a
3659 /// scalar if-then code structure needs to be generated VF * UF times. Having
3660 /// this replication indicator helps to keep a single model for multiple
3661 /// candidate VF's. The actual replication takes place only once the desired VF
3662 /// and UF have been determined.
3663 class VPRegionBlock
: public VPBlockBase
{
3664 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3667 /// Hold the Single Exiting block of the SESE region modelled by the
3669 VPBlockBase
*Exiting
;
3671 /// An indicator whether this region is to generate multiple replicated
3672 /// instances of output IR corresponding to its VPBlockBases.
3676 VPRegionBlock(VPBlockBase
*Entry
, VPBlockBase
*Exiting
,
3677 const std::string
&Name
= "", bool IsReplicator
= false)
3678 : VPBlockBase(VPRegionBlockSC
, Name
), Entry(Entry
), Exiting(Exiting
),
3679 IsReplicator(IsReplicator
) {
3680 assert(Entry
->getPredecessors().empty() && "Entry block has predecessors.");
3681 assert(Exiting
->getSuccessors().empty() && "Exit block has successors.");
3682 Entry
->setParent(this);
3683 Exiting
->setParent(this);
3685 VPRegionBlock(const std::string
&Name
= "", bool IsReplicator
= false)
3686 : VPBlockBase(VPRegionBlockSC
, Name
), Entry(nullptr), Exiting(nullptr),
3687 IsReplicator(IsReplicator
) {}
3689 ~VPRegionBlock() override
{
3692 Entry
->dropAllReferences(&DummyValue
);
3697 /// Method to support type inquiry through isa, cast, and dyn_cast.
3698 static inline bool classof(const VPBlockBase
*V
) {
3699 return V
->getVPBlockID() == VPBlockBase::VPRegionBlockSC
;
3702 const VPBlockBase
*getEntry() const { return Entry
; }
3703 VPBlockBase
*getEntry() { return Entry
; }
3705 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3706 /// EntryBlock must have no predecessors.
3707 void setEntry(VPBlockBase
*EntryBlock
) {
3708 assert(EntryBlock
->getPredecessors().empty() &&
3709 "Entry block cannot have predecessors.");
3711 EntryBlock
->setParent(this);
3714 const VPBlockBase
*getExiting() const { return Exiting
; }
3715 VPBlockBase
*getExiting() { return Exiting
; }
3717 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3718 /// ExitingBlock must have no successors.
3719 void setExiting(VPBlockBase
*ExitingBlock
) {
3720 assert(ExitingBlock
->getSuccessors().empty() &&
3721 "Exit block cannot have successors.");
3722 Exiting
= ExitingBlock
;
3723 ExitingBlock
->setParent(this);
3726 /// Returns the pre-header VPBasicBlock of the loop region.
3727 VPBasicBlock
*getPreheaderVPBB() {
3728 assert(!isReplicator() && "should only get pre-header of loop regions");
3729 return getSinglePredecessor()->getExitingBasicBlock();
3732 /// An indicator whether this region is to generate multiple replicated
3733 /// instances of output IR corresponding to its VPBlockBases.
3734 bool isReplicator() const { return IsReplicator
; }
3736 /// The method which generates the output IR instructions that correspond to
3737 /// this VPRegionBlock, thereby "executing" the VPlan.
3738 void execute(VPTransformState
*State
) override
;
3740 // Return the cost of this region.
3741 InstructionCost
cost(ElementCount VF
, VPCostContext
&Ctx
) override
;
3743 void dropAllReferences(VPValue
*NewValue
) override
;
3745 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3746 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3747 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3748 /// consequtive numbers.
3750 /// Note that the numbering is applied to the whole VPlan, so printing
3751 /// individual regions is consistent with the whole VPlan printing.
3752 void print(raw_ostream
&O
, const Twine
&Indent
,
3753 VPSlotTracker
&SlotTracker
) const override
;
3754 using VPBlockBase::print
; // Get the print(raw_stream &O) version.
3757 /// Clone all blocks in the single-entry single-exit region of the block and
3758 /// their recipes without updating the operands of the cloned recipes.
3759 VPRegionBlock
*clone() override
;
3762 /// VPlan models a candidate for vectorization, encoding various decisions take
3763 /// to produce efficient output IR, including which branches, basic-blocks and
3764 /// output IR instructions to generate, and their cost. VPlan holds a
3765 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3768 friend class VPlanPrinter
;
3769 friend class VPSlotTracker
;
3771 /// VPBasicBlock corresponding to the original preheader. Used to place
3772 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3773 /// rest of VPlan execution.
3774 /// When this VPlan is used for the epilogue vector loop, the entry will be
3775 /// replaced by a new entry block created during skeleton creation.
3776 VPBasicBlock
*Entry
;
3778 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3779 VPIRBasicBlock
*ScalarHeader
;
3781 /// Holds the VFs applicable to this VPlan.
3782 SmallSetVector
<ElementCount
, 2> VFs
;
3784 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3786 SmallSetVector
<unsigned, 2> UFs
;
3788 /// Holds the name of the VPlan, for printing.
3791 /// Represents the trip count of the original loop, for folding
3793 VPValue
*TripCount
= nullptr;
3795 /// Represents the backedge taken count of the original loop, for folding
3796 /// the tail. It equals TripCount - 1.
3797 VPValue
*BackedgeTakenCount
= nullptr;
3799 /// Represents the vector trip count.
3800 VPValue VectorTripCount
;
3802 /// Represents the vectorization factor of the loop.
3805 /// Represents the loop-invariant VF * UF of the vector loop region.
3808 /// Holds a mapping between Values and their corresponding VPValue inside
3810 Value2VPValueTy Value2VPValue
;
3812 /// Contains all the external definitions created for this VPlan. External
3813 /// definitions are VPValues that hold a pointer to their underlying IR.
3814 SmallVector
<VPValue
*, 16> VPLiveInsToFree
;
3816 /// Mapping from SCEVs to the VPValues representing their expansions.
3817 /// NOTE: This mapping is temporary and will be removed once all users have
3818 /// been modeled in VPlan directly.
3819 DenseMap
<const SCEV
*, VPValue
*> SCEVToExpansion
;
3822 /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and
3823 /// with \p ScalarHeader wrapping the original header of the scalar loop.
3824 VPlan(VPBasicBlock
*Entry
, VPValue
*TC
, VPIRBasicBlock
*ScalarHeader
)
3825 : VPlan(Entry
, ScalarHeader
) {
3829 /// Constructor variants that take disconnected preheader and entry blocks,
3830 /// connecting them as part of construction.
3831 /// FIXME: Only used to reduce the need of code changes during transition.
3832 VPlan(VPBasicBlock
*OriginalPreheader
, VPValue
*TC
,
3833 VPBasicBlock
*EntryVectorPreHeader
, VPIRBasicBlock
*ScalarHeader
);
3834 VPlan(VPBasicBlock
*OriginalPreheader
, VPBasicBlock
*EntryVectorPreHeader
,
3835 VPIRBasicBlock
*ScalarHeader
);
3837 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3838 /// wrapping the original header of the scalar loop.
3839 VPlan(VPBasicBlock
*Entry
, VPIRBasicBlock
*ScalarHeader
)
3840 : Entry(Entry
), ScalarHeader(ScalarHeader
) {
3841 Entry
->setPlan(this);
3842 assert(ScalarHeader
->getNumSuccessors() == 0 &&
3843 "scalar header must be a leaf node");
3848 void setEntry(VPBasicBlock
*VPBB
) {
3850 VPBB
->setPlan(this);
3853 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3854 /// original scalar pre-header) which contains SCEV expansions that need
3855 /// to happen before the CFG is modified (when executing a VPlan for the
3856 /// epilogue vector loop, the original entry needs to be replaced by a new
3857 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3858 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3859 /// to guard executing the scalar epilogue loop, it will be added to the
3860 /// middle block, together with VPBasicBlocks for the scalar preheader and
3861 /// exit blocks. \p InductionTy is the type of the canonical induction and
3862 /// used for related values, like the trip count expression.
3863 static VPlanPtr
createInitialVPlan(Type
*InductionTy
,
3864 PredicatedScalarEvolution
&PSE
,
3865 bool RequiresScalarEpilogueCheck
,
3866 bool TailFolded
, Loop
*TheLoop
);
3868 /// Prepare the plan for execution, setting up the required live-in values.
3869 void prepareToExecute(Value
*TripCount
, Value
*VectorTripCount
,
3870 VPTransformState
&State
);
3872 /// Generate the IR code for this VPlan.
3873 void execute(VPTransformState
*State
);
3875 /// Return the cost of this plan.
3876 InstructionCost
cost(ElementCount VF
, VPCostContext
&Ctx
);
3878 VPBasicBlock
*getEntry() { return Entry
; }
3879 const VPBasicBlock
*getEntry() const { return Entry
; }
3881 /// Returns the preheader of the vector loop region.
3882 VPBasicBlock
*getVectorPreheader() {
3883 return cast
<VPBasicBlock
>(getVectorLoopRegion()->getSinglePredecessor());
3886 /// Returns the VPRegionBlock of the vector loop.
3887 VPRegionBlock
*getVectorLoopRegion();
3888 const VPRegionBlock
*getVectorLoopRegion() const;
3890 /// Returns the 'middle' block of the plan, that is the block that selects
3891 /// whether to execute the scalar tail loop or the exit block from the loop
3893 const VPBasicBlock
*getMiddleBlock() const {
3894 return cast
<VPBasicBlock
>(getScalarPreheader()->getPredecessors().front());
3896 VPBasicBlock
*getMiddleBlock() {
3897 return cast
<VPBasicBlock
>(getScalarPreheader()->getPredecessors().front());
3900 /// Return the VPBasicBlock for the preheader of the scalar loop.
3901 VPBasicBlock
*getScalarPreheader() const {
3902 return cast
<VPBasicBlock
>(getScalarHeader()->getSinglePredecessor());
3905 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3906 VPIRBasicBlock
*getScalarHeader() const { return ScalarHeader
; }
3908 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3909 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3910 /// VPlanHCFG, as the definition of the type needs access to the definitions
3911 /// of VPBlockShallowTraversalWrapper.
3912 auto getExitBlocks();
3914 /// The trip count of the original loop.
3915 VPValue
*getTripCount() const {
3916 assert(TripCount
&& "trip count needs to be set before accessing it");
3920 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3921 /// the original trip count have been replaced.
3922 void resetTripCount(VPValue
*NewTripCount
) {
3923 assert(TripCount
&& NewTripCount
&& TripCount
->getNumUsers() == 0 &&
3924 "TripCount always must be set");
3925 TripCount
= NewTripCount
;
3928 /// The backedge taken count of the original loop.
3929 VPValue
*getOrCreateBackedgeTakenCount() {
3930 if (!BackedgeTakenCount
)
3931 BackedgeTakenCount
= new VPValue();
3932 return BackedgeTakenCount
;
3935 /// The vector trip count.
3936 VPValue
&getVectorTripCount() { return VectorTripCount
; }
3938 /// Returns the VF of the vector loop region.
3939 VPValue
&getVF() { return VF
; };
3941 /// Returns VF * UF of the vector loop region.
3942 VPValue
&getVFxUF() { return VFxUF
; }
3944 void addVF(ElementCount VF
) { VFs
.insert(VF
); }
3946 void setVF(ElementCount VF
) {
3947 assert(hasVF(VF
) && "Cannot set VF not already in plan");
3952 bool hasVF(ElementCount VF
) { return VFs
.count(VF
); }
3953 bool hasScalableVF() {
3954 return any_of(VFs
, [](ElementCount VF
) { return VF
.isScalable(); });
3957 /// Returns an iterator range over all VFs of the plan.
3958 iterator_range
<SmallSetVector
<ElementCount
, 2>::iterator
>
3959 vectorFactors() const {
3960 return {VFs
.begin(), VFs
.end()};
3963 bool hasScalarVFOnly() const { return VFs
.size() == 1 && VFs
[0].isScalar(); }
3965 bool hasUF(unsigned UF
) const { return UFs
.empty() || UFs
.contains(UF
); }
3967 unsigned getUF() const {
3968 assert(UFs
.size() == 1 && "Expected a single UF");
3972 void setUF(unsigned UF
) {
3973 assert(hasUF(UF
) && "Cannot set the UF not already in plan");
3978 /// Return a string with the name of the plan and the applicable VFs and UFs.
3979 std::string
getName() const;
3981 void setName(const Twine
&newName
) { Name
= newName
.str(); }
3983 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3985 VPValue
*getOrAddLiveIn(Value
*V
) {
3986 assert(V
&& "Trying to get or add the VPValue of a null Value");
3987 if (!Value2VPValue
.count(V
)) {
3988 VPValue
*VPV
= new VPValue(V
);
3989 VPLiveInsToFree
.push_back(VPV
);
3990 assert(VPV
->isLiveIn() && "VPV must be a live-in.");
3991 assert(!Value2VPValue
.count(V
) && "Value already exists in VPlan");
3992 Value2VPValue
[V
] = VPV
;
3995 assert(Value2VPValue
.count(V
) && "Value does not exist in VPlan");
3996 assert(Value2VPValue
[V
]->isLiveIn() &&
3997 "Only live-ins should be in mapping");
3998 return Value2VPValue
[V
];
4001 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4002 VPValue
*getLiveIn(Value
*V
) const { return Value2VPValue
.lookup(V
); }
4004 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4005 /// Print the live-ins of this VPlan to \p O.
4006 void printLiveIns(raw_ostream
&O
) const;
4008 /// Print this VPlan to \p O.
4009 void print(raw_ostream
&O
) const;
4011 /// Print this VPlan in DOT format to \p O.
4012 void printDOT(raw_ostream
&O
) const;
4014 /// Dump the plan to stderr (for debugging).
4015 LLVM_DUMP_METHOD
void dump() const;
4018 /// Returns the canonical induction recipe of the vector loop.
4019 VPCanonicalIVPHIRecipe
*getCanonicalIV() {
4020 VPBasicBlock
*EntryVPBB
= getVectorLoopRegion()->getEntryBasicBlock();
4021 if (EntryVPBB
->empty()) {
4022 // VPlan native path.
4023 EntryVPBB
= cast
<VPBasicBlock
>(EntryVPBB
->getSingleSuccessor());
4025 return cast
<VPCanonicalIVPHIRecipe
>(&*EntryVPBB
->begin());
4028 VPValue
*getSCEVExpansion(const SCEV
*S
) const {
4029 return SCEVToExpansion
.lookup(S
);
4032 void addSCEVExpansion(const SCEV
*S
, VPValue
*V
) {
4033 assert(!SCEVToExpansion
.contains(S
) && "SCEV already expanded");
4034 SCEVToExpansion
[S
] = V
;
4037 /// \return The block corresponding to the original preheader.
4038 /// FIXME: There's no separate preheader any longer and Entry now serves the
4039 /// same purpose as the original preheader. Remove after transition.
4040 VPBasicBlock
*getPreheader() { return Entry
; }
4041 const VPBasicBlock
*getPreheader() const { return Entry
; }
4043 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4044 /// recipes to refer to the clones, and return it.
4048 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4049 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4050 /// indented and follows the dot format.
4051 class VPlanPrinter
{
4055 unsigned TabWidth
= 2;
4058 SmallDenseMap
<const VPBlockBase
*, unsigned> BlockID
;
4060 VPSlotTracker SlotTracker
;
4062 /// Handle indentation.
4063 void bumpIndent(int b
) { Indent
= std::string((Depth
+= b
) * TabWidth
, ' '); }
4065 /// Print a given \p Block of the Plan.
4066 void dumpBlock(const VPBlockBase
*Block
);
4068 /// Print the information related to the CFG edges going out of a given
4069 /// \p Block, followed by printing the successor blocks themselves.
4070 void dumpEdges(const VPBlockBase
*Block
);
4072 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4073 /// its successor blocks.
4074 void dumpBasicBlock(const VPBasicBlock
*BasicBlock
);
4076 /// Print a given \p Region of the Plan.
4077 void dumpRegion(const VPRegionBlock
*Region
);
4079 unsigned getOrCreateBID(const VPBlockBase
*Block
) {
4080 return BlockID
.count(Block
) ? BlockID
[Block
] : BlockID
[Block
] = BID
++;
4083 Twine
getOrCreateName(const VPBlockBase
*Block
);
4085 Twine
getUID(const VPBlockBase
*Block
);
4087 /// Print the information related to a CFG edge between two VPBlockBases.
4088 void drawEdge(const VPBlockBase
*From
, const VPBlockBase
*To
, bool Hidden
,
4089 const Twine
&Label
);
4092 VPlanPrinter(raw_ostream
&O
, const VPlan
&P
)
4093 : OS(O
), Plan(P
), SlotTracker(&P
) {}
4095 LLVM_DUMP_METHOD
void dump();
4098 struct VPlanIngredient
{
4101 VPlanIngredient(const Value
*V
) : V(V
) {}
4103 void print(raw_ostream
&O
) const;
4106 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VPlanIngredient
&I
) {
4111 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VPlan
&Plan
) {
4117 //===----------------------------------------------------------------------===//
4119 //===----------------------------------------------------------------------===//
4121 /// Class that provides utilities for VPBlockBases in VPlan.
4122 class VPBlockUtils
{
4124 VPBlockUtils() = delete;
4126 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
4127 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
4128 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
4129 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
4130 /// have neither successors nor predecessors.
4131 static void insertBlockAfter(VPBlockBase
*NewBlock
, VPBlockBase
*BlockPtr
) {
4132 assert(NewBlock
->getSuccessors().empty() &&
4133 NewBlock
->getPredecessors().empty() &&
4134 "Can't insert new block with predecessors or successors.");
4135 NewBlock
->setParent(BlockPtr
->getParent());
4136 SmallVector
<VPBlockBase
*> Succs(BlockPtr
->successors());
4137 for (VPBlockBase
*Succ
: Succs
) {
4138 disconnectBlocks(BlockPtr
, Succ
);
4139 connectBlocks(NewBlock
, Succ
);
4141 connectBlocks(BlockPtr
, NewBlock
);
4144 /// Insert disconnected block \p NewBlock before \p Blockptr. First
4145 /// disconnects all predecessors of \p BlockPtr and connects them to \p
4146 /// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as
4147 /// successor of \p NewBlock.
4148 static void insertBlockBefore(VPBlockBase
*NewBlock
, VPBlockBase
*BlockPtr
) {
4149 assert(NewBlock
->getSuccessors().empty() &&
4150 NewBlock
->getPredecessors().empty() &&
4151 "Can't insert new block with predecessors or successors.");
4152 NewBlock
->setParent(BlockPtr
->getParent());
4153 for (VPBlockBase
*Pred
: to_vector(BlockPtr
->predecessors())) {
4154 disconnectBlocks(Pred
, BlockPtr
);
4155 connectBlocks(Pred
, NewBlock
);
4157 connectBlocks(NewBlock
, BlockPtr
);
4160 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
4161 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
4162 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
4163 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
4164 /// and \p IfTrue and \p IfFalse must have neither successors nor
4166 static void insertTwoBlocksAfter(VPBlockBase
*IfTrue
, VPBlockBase
*IfFalse
,
4167 VPBlockBase
*BlockPtr
) {
4168 assert(IfTrue
->getSuccessors().empty() &&
4169 "Can't insert IfTrue with successors.");
4170 assert(IfFalse
->getSuccessors().empty() &&
4171 "Can't insert IfFalse with successors.");
4172 BlockPtr
->setTwoSuccessors(IfTrue
, IfFalse
);
4173 IfTrue
->setPredecessors({BlockPtr
});
4174 IfFalse
->setPredecessors({BlockPtr
});
4175 IfTrue
->setParent(BlockPtr
->getParent());
4176 IfFalse
->setParent(BlockPtr
->getParent());
4179 /// Connect VPBlockBases \p From and \p To bi-directionally. If \p PredIdx is
4180 /// -1, append \p From to the predecessors of \p To, otherwise set \p To's
4181 /// predecessor at \p PredIdx to \p From. If \p SuccIdx is -1, append \p To to
4182 /// the successors of \p From, otherwise set \p From's successor at \p SuccIdx
4183 /// to \p To. Both VPBlockBases must have the same parent, which can be null.
4184 /// Both VPBlockBases can be already connected to other VPBlockBases.
4185 static void connectBlocks(VPBlockBase
*From
, VPBlockBase
*To
,
4186 unsigned PredIdx
= -1u, unsigned SuccIdx
= -1u) {
4187 assert((From
->getParent() == To
->getParent()) &&
4188 "Can't connect two block with different parents");
4189 assert((SuccIdx
!= -1u || From
->getNumSuccessors() < 2) &&
4190 "Blocks can't have more than two successors.");
4192 From
->appendSuccessor(To
);
4194 From
->getSuccessors()[SuccIdx
] = To
;
4197 To
->appendPredecessor(From
);
4199 To
->getPredecessors()[PredIdx
] = From
;
4202 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
4203 /// from the successors of \p From and \p From from the predecessors of \p To.
4204 static void disconnectBlocks(VPBlockBase
*From
, VPBlockBase
*To
) {
4205 assert(To
&& "Successor to disconnect is null.");
4206 From
->removeSuccessor(To
);
4207 To
->removePredecessor(From
);
4210 /// Reassociate all the blocks connected to \p Old so that they now point to
4212 static void reassociateBlocks(VPBlockBase
*Old
, VPBlockBase
*New
) {
4213 for (auto *Pred
: to_vector(Old
->getPredecessors()))
4214 Pred
->replaceSuccessor(Old
, New
);
4215 for (auto *Succ
: to_vector(Old
->getSuccessors()))
4216 Succ
->replacePredecessor(Old
, New
);
4217 New
->setPredecessors(Old
->getPredecessors());
4218 New
->setSuccessors(Old
->getSuccessors());
4219 Old
->clearPredecessors();
4220 Old
->clearSuccessors();
4223 /// Return an iterator range over \p Range which only includes \p BlockTy
4224 /// blocks. The accesses are casted to \p BlockTy.
4225 template <typename BlockTy
, typename T
>
4226 static auto blocksOnly(const T
&Range
) {
4227 // Create BaseTy with correct const-ness based on BlockTy.
4228 using BaseTy
= std::conditional_t
<std::is_const
<BlockTy
>::value
,
4229 const VPBlockBase
, VPBlockBase
>;
4231 // We need to first create an iterator range over (const) BlocktTy & instead
4232 // of (const) BlockTy * for filter_range to work properly.
4234 map_range(Range
, [](BaseTy
*Block
) -> BaseTy
& { return *Block
; });
4235 auto Filter
= make_filter_range(
4236 Mapped
, [](BaseTy
&Block
) { return isa
<BlockTy
>(&Block
); });
4237 return map_range(Filter
, [](BaseTy
&Block
) -> BlockTy
* {
4238 return cast
<BlockTy
>(&Block
);
4242 /// Inserts \p BlockPtr on the edge between \p From and \p To. That is, update
4243 /// \p From's successor to \p To to point to \p BlockPtr and \p To's
4244 /// predecessor from \p From to \p BlockPtr. \p From and \p To are added to \p
4245 /// BlockPtr's predecessors and successors respectively. There must be a
4246 /// single edge between \p From and \p To.
4247 static void insertOnEdge(VPBlockBase
*From
, VPBlockBase
*To
,
4248 VPBlockBase
*BlockPtr
) {
4249 auto &Successors
= From
->getSuccessors();
4250 auto &Predecessors
= To
->getPredecessors();
4251 assert(count(Successors
, To
) == 1 && count(Predecessors
, From
) == 1 &&
4252 "must have single between From and To");
4253 unsigned SuccIdx
= std::distance(Successors
.begin(), find(Successors
, To
));
4255 std::distance(Predecessors
.begin(), find(Predecessors
, From
));
4256 VPBlockUtils::connectBlocks(From
, BlockPtr
, -1, SuccIdx
);
4257 VPBlockUtils::connectBlocks(BlockPtr
, To
, PredIx
, -1);
4261 class VPInterleavedAccessInfo
{
4262 DenseMap
<VPInstruction
*, InterleaveGroup
<VPInstruction
> *>
4265 /// Type for mapping of instruction based interleave groups to VPInstruction
4266 /// interleave groups
4267 using Old2NewTy
= DenseMap
<InterleaveGroup
<Instruction
> *,
4268 InterleaveGroup
<VPInstruction
> *>;
4270 /// Recursively \p Region and populate VPlan based interleave groups based on
4272 void visitRegion(VPRegionBlock
*Region
, Old2NewTy
&Old2New
,
4273 InterleavedAccessInfo
&IAI
);
4274 /// Recursively traverse \p Block and populate VPlan based interleave groups
4275 /// based on \p IAI.
4276 void visitBlock(VPBlockBase
*Block
, Old2NewTy
&Old2New
,
4277 InterleavedAccessInfo
&IAI
);
4280 VPInterleavedAccessInfo(VPlan
&Plan
, InterleavedAccessInfo
&IAI
);
4282 ~VPInterleavedAccessInfo() {
4283 SmallPtrSet
<InterleaveGroup
<VPInstruction
> *, 4> DelSet
;
4284 // Avoid releasing a pointer twice.
4285 for (auto &I
: InterleaveGroupMap
)
4286 DelSet
.insert(I
.second
);
4287 for (auto *Ptr
: DelSet
)
4291 /// Get the interleave group that \p Instr belongs to.
4293 /// \returns nullptr if doesn't have such group.
4294 InterleaveGroup
<VPInstruction
> *
4295 getInterleaveGroup(VPInstruction
*Instr
) const {
4296 return InterleaveGroupMap
.lookup(Instr
);
4300 /// Class that maps (parts of) an existing VPlan to trees of combined
4303 enum class OpMode
{ Failed
, Load
, Opcode
};
4305 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4307 struct BundleDenseMapInfo
{
4308 static SmallVector
<VPValue
*, 4> getEmptyKey() {
4309 return {reinterpret_cast<VPValue
*>(-1)};
4312 static SmallVector
<VPValue
*, 4> getTombstoneKey() {
4313 return {reinterpret_cast<VPValue
*>(-2)};
4316 static unsigned getHashValue(const SmallVector
<VPValue
*, 4> &V
) {
4317 return static_cast<unsigned>(hash_combine_range(V
.begin(), V
.end()));
4320 static bool isEqual(const SmallVector
<VPValue
*, 4> &LHS
,
4321 const SmallVector
<VPValue
*, 4> &RHS
) {
4326 /// Mapping of values in the original VPlan to a combined VPInstruction.
4327 DenseMap
<SmallVector
<VPValue
*, 4>, VPInstruction
*, BundleDenseMapInfo
>
4330 VPInterleavedAccessInfo
&IAI
;
4332 /// Basic block to operate on. For now, only instructions in a single BB are
4334 const VPBasicBlock
&BB
;
4336 /// Indicates whether we managed to combine all visited instructions or not.
4337 bool CompletelySLP
= true;
4339 /// Width of the widest combined bundle in bits.
4340 unsigned WidestBundleBits
= 0;
4342 using MultiNodeOpTy
=
4343 typename
std::pair
<VPInstruction
*, SmallVector
<VPValue
*, 4>>;
4345 // Input operand bundles for the current multi node. Each multi node operand
4346 // bundle contains values not matching the multi node's opcode. They will
4347 // be reordered in reorderMultiNodeOps, once we completed building a
4349 SmallVector
<MultiNodeOpTy
, 4> MultiNodeOps
;
4351 /// Indicates whether we are building a multi node currently.
4352 bool MultiNodeActive
= false;
4354 /// Check if we can vectorize Operands together.
4355 bool areVectorizable(ArrayRef
<VPValue
*> Operands
) const;
4357 /// Add combined instruction \p New for the bundle \p Operands.
4358 void addCombined(ArrayRef
<VPValue
*> Operands
, VPInstruction
*New
);
4360 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4361 VPInstruction
*markFailed();
4363 /// Reorder operands in the multi node to maximize sequential memory access
4364 /// and commutative operations.
4365 SmallVector
<MultiNodeOpTy
, 4> reorderMultiNodeOps();
4367 /// Choose the best candidate to use for the lane after \p Last. The set of
4368 /// candidates to choose from are values with an opcode matching \p Last's
4369 /// or loads consecutive to \p Last.
4370 std::pair
<OpMode
, VPValue
*> getBest(OpMode Mode
, VPValue
*Last
,
4371 SmallPtrSetImpl
<VPValue
*> &Candidates
,
4372 VPInterleavedAccessInfo
&IAI
);
4374 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4375 /// Print bundle \p Values to dbgs().
4376 void dumpBundle(ArrayRef
<VPValue
*> Values
);
4380 VPlanSlp(VPInterleavedAccessInfo
&IAI
, VPBasicBlock
&BB
) : IAI(IAI
), BB(BB
) {}
4382 ~VPlanSlp() = default;
4384 /// Tries to build an SLP tree rooted at \p Operands and returns a
4385 /// VPInstruction combining \p Operands, if they can be combined.
4386 VPInstruction
*buildGraph(ArrayRef
<VPValue
*> Operands
);
4388 /// Return the width of the widest combined bundle in bits.
4389 unsigned getWidestBundleBits() const { return WidestBundleBits
; }
4391 /// Return true if all visited instruction can be combined.
4392 bool isCompletelySLP() const { return CompletelySLP
; }
4394 } // end namespace llvm
4396 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H