1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 /// VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14 /// within VPBasicBlocks;
15 /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16 /// also inherit from VPValue.
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
23 //===----------------------------------------------------------------------===//
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
28 #include "VPlanAnalysis.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/MapVector.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Twine.h"
36 #include "llvm/ADT/ilist.h"
37 #include "llvm/ADT/ilist_node.h"
38 #include "llvm/Analysis/IVDescriptors.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/VectorUtils.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/FMF.h"
43 #include "llvm/IR/Operator.h"
53 class InnerLoopVectorizer
;
57 class RecurrenceDescriptor
;
63 class VPReplicateRecipe
;
72 /// Returns a calculation for the total number of elements for a given \p VF.
73 /// For fixed width vectors this value is a constant, whereas for scalable
74 /// vectors it is an expression determined at runtime.
75 Value
*getRuntimeVF(IRBuilderBase
&B
, Type
*Ty
, ElementCount VF
);
77 /// Return a value for Step multiplied by VF.
78 Value
*createStepForVF(IRBuilderBase
&B
, Type
*Ty
, ElementCount VF
,
81 const SCEV
*createTripCountSCEV(Type
*IdxTy
, PredicatedScalarEvolution
&PSE
,
82 Loop
*CurLoop
= nullptr);
84 /// A range of powers-of-2 vectorization factors with fixed start and
85 /// adjustable end. The range includes start and excludes end, e.g.,:
86 /// [1, 16) = {1, 2, 4, 8}
89 const ElementCount Start
;
91 // A power of 2. If End <= Start range is empty.
94 bool isEmpty() const {
95 return End
.getKnownMinValue() <= Start
.getKnownMinValue();
98 VFRange(const ElementCount
&Start
, const ElementCount
&End
)
99 : Start(Start
), End(End
) {
100 assert(Start
.isScalable() == End
.isScalable() &&
101 "Both Start and End should have the same scalable flag");
102 assert(isPowerOf2_32(Start
.getKnownMinValue()) &&
103 "Expected Start to be a power of 2");
104 assert(isPowerOf2_32(End
.getKnownMinValue()) &&
105 "Expected End to be a power of 2");
108 /// Iterator to iterate over vectorization factors in a VFRange.
110 : public iterator_facade_base
<iterator
, std::forward_iterator_tag
,
115 iterator(ElementCount VF
) : VF(VF
) {}
117 bool operator==(const iterator
&Other
) const { return VF
== Other
.VF
; }
119 ElementCount
operator*() const { return VF
; }
121 iterator
&operator++() {
127 iterator
begin() { return iterator(Start
); }
129 assert(isPowerOf2_32(End
.getKnownMinValue()));
130 return iterator(End
);
134 using VPlanPtr
= std::unique_ptr
<VPlan
>;
136 /// In what follows, the term "input IR" refers to code that is fed into the
137 /// vectorizer whereas the term "output IR" refers to code that is generated by
140 /// VPLane provides a way to access lanes in both fixed width and scalable
141 /// vectors, where for the latter the lane index sometimes needs calculating
142 /// as a runtime expression.
145 /// Kind describes how to interpret Lane.
146 enum class Kind
: uint8_t {
147 /// For First, Lane is the index into the first N elements of a
148 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
150 /// For ScalableLast, Lane is the offset from the start of the last
151 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
152 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
153 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
161 /// Indicates how the Lane should be interpreted, as described above.
165 VPLane(unsigned Lane
, Kind LaneKind
) : Lane(Lane
), LaneKind(LaneKind
) {}
167 static VPLane
getFirstLane() { return VPLane(0, VPLane::Kind::First
); }
169 static VPLane
getLastLaneForVF(const ElementCount
&VF
) {
170 unsigned LaneOffset
= VF
.getKnownMinValue() - 1;
173 // In this case 'LaneOffset' refers to the offset from the start of the
174 // last subvector with VF.getKnownMinValue() elements.
175 LaneKind
= VPLane::Kind::ScalableLast
;
177 LaneKind
= VPLane::Kind::First
;
178 return VPLane(LaneOffset
, LaneKind
);
181 /// Returns a compile-time known value for the lane index and asserts if the
182 /// lane can only be calculated at runtime.
183 unsigned getKnownLane() const {
184 assert(LaneKind
== Kind::First
);
188 /// Returns an expression describing the lane index that can be used at
190 Value
*getAsRuntimeExpr(IRBuilderBase
&Builder
, const ElementCount
&VF
) const;
192 /// Returns the Kind of lane offset.
193 Kind
getKind() const { return LaneKind
; }
195 /// Returns true if this is the first lane of the whole vector.
196 bool isFirstLane() const { return Lane
== 0 && LaneKind
== Kind::First
; }
198 /// Maps the lane to a cache index based on \p VF.
199 unsigned mapToCacheIndex(const ElementCount
&VF
) const {
201 case VPLane::Kind::ScalableLast
:
202 assert(VF
.isScalable() && Lane
< VF
.getKnownMinValue());
203 return VF
.getKnownMinValue() + Lane
;
205 assert(Lane
< VF
.getKnownMinValue());
210 /// Returns the maxmimum number of lanes that we are able to consider
211 /// caching for \p VF.
212 static unsigned getNumCachedLanes(const ElementCount
&VF
) {
213 return VF
.getKnownMinValue() * (VF
.isScalable() ? 2 : 1);
217 /// VPIteration represents a single point in the iteration space of the output
218 /// (vectorized and/or unrolled) IR loop.
225 VPIteration(unsigned Part
, unsigned Lane
,
226 VPLane::Kind Kind
= VPLane::Kind::First
)
227 : Part(Part
), Lane(Lane
, Kind
) {}
229 VPIteration(unsigned Part
, const VPLane
&Lane
) : Part(Part
), Lane(Lane
) {}
231 bool isFirstIteration() const { return Part
== 0 && Lane
.isFirstLane(); }
234 /// VPTransformState holds information passed down when "executing" a VPlan,
235 /// needed for generating the output IR.
236 struct VPTransformState
{
237 VPTransformState(ElementCount VF
, unsigned UF
, LoopInfo
*LI
,
238 DominatorTree
*DT
, IRBuilderBase
&Builder
,
239 InnerLoopVectorizer
*ILV
, VPlan
*Plan
, LLVMContext
&Ctx
)
240 : VF(VF
), UF(UF
), LI(LI
), DT(DT
), Builder(Builder
), ILV(ILV
), Plan(Plan
),
241 LVer(nullptr), TypeAnalysis(Ctx
) {}
243 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
247 /// Hold the indices to generate specific scalar instructions. Null indicates
248 /// that all instances are to be generated, using either scalar or vector
250 std::optional
<VPIteration
> Instance
;
253 /// A type for vectorized values in the new loop. Each value from the
254 /// original loop, when vectorized, is represented by UF vector values in
255 /// the new unrolled loop, where UF is the unroll factor.
256 typedef SmallVector
<Value
*, 2> PerPartValuesTy
;
258 DenseMap
<VPValue
*, PerPartValuesTy
> PerPartOutput
;
260 using ScalarsPerPartValuesTy
= SmallVector
<SmallVector
<Value
*, 4>, 2>;
261 DenseMap
<VPValue
*, ScalarsPerPartValuesTy
> PerPartScalars
;
264 /// Get the generated Value for a given VPValue and a given Part. Note that
265 /// as some Defs are still created by ILV and managed in its ValueMap, this
266 /// method will delegate the call to ILV in such cases in order to provide
267 /// callers a consistent API.
269 Value
*get(VPValue
*Def
, unsigned Part
);
271 /// Get the generated Value for a given VPValue and given Part and Lane.
272 Value
*get(VPValue
*Def
, const VPIteration
&Instance
);
274 bool hasVectorValue(VPValue
*Def
, unsigned Part
) {
275 auto I
= Data
.PerPartOutput
.find(Def
);
276 return I
!= Data
.PerPartOutput
.end() && Part
< I
->second
.size() &&
280 bool hasScalarValue(VPValue
*Def
, VPIteration Instance
) {
281 auto I
= Data
.PerPartScalars
.find(Def
);
282 if (I
== Data
.PerPartScalars
.end())
284 unsigned CacheIdx
= Instance
.Lane
.mapToCacheIndex(VF
);
285 return Instance
.Part
< I
->second
.size() &&
286 CacheIdx
< I
->second
[Instance
.Part
].size() &&
287 I
->second
[Instance
.Part
][CacheIdx
];
290 /// Set the generated Value for a given VPValue and a given Part.
291 void set(VPValue
*Def
, Value
*V
, unsigned Part
) {
292 if (!Data
.PerPartOutput
.count(Def
)) {
293 DataState::PerPartValuesTy
Entry(UF
);
294 Data
.PerPartOutput
[Def
] = Entry
;
296 Data
.PerPartOutput
[Def
][Part
] = V
;
298 /// Reset an existing vector value for \p Def and a given \p Part.
299 void reset(VPValue
*Def
, Value
*V
, unsigned Part
) {
300 auto Iter
= Data
.PerPartOutput
.find(Def
);
301 assert(Iter
!= Data
.PerPartOutput
.end() &&
302 "need to overwrite existing value");
303 Iter
->second
[Part
] = V
;
306 /// Set the generated scalar \p V for \p Def and the given \p Instance.
307 void set(VPValue
*Def
, Value
*V
, const VPIteration
&Instance
) {
308 auto Iter
= Data
.PerPartScalars
.insert({Def
, {}});
309 auto &PerPartVec
= Iter
.first
->second
;
310 while (PerPartVec
.size() <= Instance
.Part
)
311 PerPartVec
.emplace_back();
312 auto &Scalars
= PerPartVec
[Instance
.Part
];
313 unsigned CacheIdx
= Instance
.Lane
.mapToCacheIndex(VF
);
314 while (Scalars
.size() <= CacheIdx
)
315 Scalars
.push_back(nullptr);
316 assert(!Scalars
[CacheIdx
] && "should overwrite existing value");
317 Scalars
[CacheIdx
] = V
;
320 /// Reset an existing scalar value for \p Def and a given \p Instance.
321 void reset(VPValue
*Def
, Value
*V
, const VPIteration
&Instance
) {
322 auto Iter
= Data
.PerPartScalars
.find(Def
);
323 assert(Iter
!= Data
.PerPartScalars
.end() &&
324 "need to overwrite existing value");
325 assert(Instance
.Part
< Iter
->second
.size() &&
326 "need to overwrite existing value");
327 unsigned CacheIdx
= Instance
.Lane
.mapToCacheIndex(VF
);
328 assert(CacheIdx
< Iter
->second
[Instance
.Part
].size() &&
329 "need to overwrite existing value");
330 Iter
->second
[Instance
.Part
][CacheIdx
] = V
;
333 /// Add additional metadata to \p To that was not present on \p Orig.
335 /// Currently this is used to add the noalias annotations based on the
336 /// inserted memchecks. Use this for instructions that are *cloned* into the
338 void addNewMetadata(Instruction
*To
, const Instruction
*Orig
);
340 /// Add metadata from one instruction to another.
342 /// This includes both the original MDs from \p From and additional ones (\see
343 /// addNewMetadata). Use this for *newly created* instructions in the vector
345 void addMetadata(Instruction
*To
, Instruction
*From
);
347 /// Similar to the previous function but it adds the metadata to a
348 /// vector of instructions.
349 void addMetadata(ArrayRef
<Value
*> To
, Instruction
*From
);
351 /// Set the debug location in the builder using the debug location \p DL.
352 void setDebugLocFrom(DebugLoc DL
);
354 /// Construct the vector value of a scalarized value \p V one lane at a time.
355 void packScalarIntoVectorValue(VPValue
*Def
, const VPIteration
&Instance
);
357 /// Hold state information used when constructing the CFG of the output IR,
358 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
360 /// The previous VPBasicBlock visited. Initially set to null.
361 VPBasicBlock
*PrevVPBB
= nullptr;
363 /// The previous IR BasicBlock created or used. Initially set to the new
364 /// header BasicBlock.
365 BasicBlock
*PrevBB
= nullptr;
367 /// The last IR BasicBlock in the output IR. Set to the exit block of the
369 BasicBlock
*ExitBB
= nullptr;
371 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
372 /// of replication, maps the BasicBlock of the last replica created.
373 SmallDenseMap
<VPBasicBlock
*, BasicBlock
*> VPBB2IRBB
;
375 CFGState() = default;
377 /// Returns the BasicBlock* mapped to the pre-header of the loop region
379 BasicBlock
*getPreheaderBBFor(VPRecipeBase
*R
);
382 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
385 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
388 /// Hold a reference to the IRBuilder used to generate output IR code.
389 IRBuilderBase
&Builder
;
391 VPValue2ValueTy VPValue2Value
;
393 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
394 Value
*CanonicalIV
= nullptr;
396 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
397 InnerLoopVectorizer
*ILV
;
399 /// Pointer to the VPlan code is generated for.
402 /// The loop object for the current parent region, or nullptr.
403 Loop
*CurrentVectorLoop
= nullptr;
405 /// LoopVersioning. It's only set up (non-null) if memchecks were
408 /// This is currently only used to add no-alias metadata based on the
409 /// memchecks. The actually versioning is performed manually.
410 LoopVersioning
*LVer
= nullptr;
412 /// Map SCEVs to their expanded values. Populated when executing
413 /// VPExpandSCEVRecipes.
414 DenseMap
<const SCEV
*, Value
*> ExpandedSCEVs
;
416 /// VPlan-based type analysis.
417 VPTypeAnalysis TypeAnalysis
;
420 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
421 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
423 friend class VPBlockUtils
;
425 const unsigned char SubclassID
; ///< Subclass identifier (for isa/dyn_cast).
427 /// An optional name for the block.
430 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
431 /// it is a topmost VPBlockBase.
432 VPRegionBlock
*Parent
= nullptr;
434 /// List of predecessor blocks.
435 SmallVector
<VPBlockBase
*, 1> Predecessors
;
437 /// List of successor blocks.
438 SmallVector
<VPBlockBase
*, 1> Successors
;
440 /// VPlan containing the block. Can only be set on the entry block of the
442 VPlan
*Plan
= nullptr;
444 /// Add \p Successor as the last successor to this block.
445 void appendSuccessor(VPBlockBase
*Successor
) {
446 assert(Successor
&& "Cannot add nullptr successor!");
447 Successors
.push_back(Successor
);
450 /// Add \p Predecessor as the last predecessor to this block.
451 void appendPredecessor(VPBlockBase
*Predecessor
) {
452 assert(Predecessor
&& "Cannot add nullptr predecessor!");
453 Predecessors
.push_back(Predecessor
);
456 /// Remove \p Predecessor from the predecessors of this block.
457 void removePredecessor(VPBlockBase
*Predecessor
) {
458 auto Pos
= find(Predecessors
, Predecessor
);
459 assert(Pos
&& "Predecessor does not exist");
460 Predecessors
.erase(Pos
);
463 /// Remove \p Successor from the successors of this block.
464 void removeSuccessor(VPBlockBase
*Successor
) {
465 auto Pos
= find(Successors
, Successor
);
466 assert(Pos
&& "Successor does not exist");
467 Successors
.erase(Pos
);
471 VPBlockBase(const unsigned char SC
, const std::string
&N
)
472 : SubclassID(SC
), Name(N
) {}
475 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
476 /// that are actually instantiated. Values of this enumeration are kept in the
477 /// SubclassID field of the VPBlockBase objects. They are used for concrete
478 /// type identification.
479 using VPBlockTy
= enum { VPBasicBlockSC
, VPRegionBlockSC
};
481 using VPBlocksTy
= SmallVectorImpl
<VPBlockBase
*>;
483 virtual ~VPBlockBase() = default;
485 const std::string
&getName() const { return Name
; }
487 void setName(const Twine
&newName
) { Name
= newName
.str(); }
489 /// \return an ID for the concrete type of this object.
490 /// This is used to implement the classof checks. This should not be used
491 /// for any other purpose, as the values may change as LLVM evolves.
492 unsigned getVPBlockID() const { return SubclassID
; }
494 VPRegionBlock
*getParent() { return Parent
; }
495 const VPRegionBlock
*getParent() const { return Parent
; }
497 /// \return A pointer to the plan containing the current block.
499 const VPlan
*getPlan() const;
501 /// Sets the pointer of the plan containing the block. The block must be the
502 /// entry block into the VPlan.
503 void setPlan(VPlan
*ParentPlan
);
505 void setParent(VPRegionBlock
*P
) { Parent
= P
; }
507 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
508 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
509 /// VPBlockBase is a VPBasicBlock, it is returned.
510 const VPBasicBlock
*getEntryBasicBlock() const;
511 VPBasicBlock
*getEntryBasicBlock();
513 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
514 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
515 /// VPBlockBase is a VPBasicBlock, it is returned.
516 const VPBasicBlock
*getExitingBasicBlock() const;
517 VPBasicBlock
*getExitingBasicBlock();
519 const VPBlocksTy
&getSuccessors() const { return Successors
; }
520 VPBlocksTy
&getSuccessors() { return Successors
; }
522 iterator_range
<VPBlockBase
**> successors() { return Successors
; }
524 const VPBlocksTy
&getPredecessors() const { return Predecessors
; }
525 VPBlocksTy
&getPredecessors() { return Predecessors
; }
527 /// \return the successor of this VPBlockBase if it has a single successor.
528 /// Otherwise return a null pointer.
529 VPBlockBase
*getSingleSuccessor() const {
530 return (Successors
.size() == 1 ? *Successors
.begin() : nullptr);
533 /// \return the predecessor of this VPBlockBase if it has a single
534 /// predecessor. Otherwise return a null pointer.
535 VPBlockBase
*getSinglePredecessor() const {
536 return (Predecessors
.size() == 1 ? *Predecessors
.begin() : nullptr);
539 size_t getNumSuccessors() const { return Successors
.size(); }
540 size_t getNumPredecessors() const { return Predecessors
.size(); }
542 /// An Enclosing Block of a block B is any block containing B, including B
543 /// itself. \return the closest enclosing block starting from "this", which
544 /// has successors. \return the root enclosing block if all enclosing blocks
545 /// have no successors.
546 VPBlockBase
*getEnclosingBlockWithSuccessors();
548 /// \return the closest enclosing block starting from "this", which has
549 /// predecessors. \return the root enclosing block if all enclosing blocks
550 /// have no predecessors.
551 VPBlockBase
*getEnclosingBlockWithPredecessors();
553 /// \return the successors either attached directly to this VPBlockBase or, if
554 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
555 /// successors of its own, search recursively for the first enclosing
556 /// VPRegionBlock that has successors and return them. If no such
557 /// VPRegionBlock exists, return the (empty) successors of the topmost
558 /// VPBlockBase reached.
559 const VPBlocksTy
&getHierarchicalSuccessors() {
560 return getEnclosingBlockWithSuccessors()->getSuccessors();
563 /// \return the hierarchical successor of this VPBlockBase if it has a single
564 /// hierarchical successor. Otherwise return a null pointer.
565 VPBlockBase
*getSingleHierarchicalSuccessor() {
566 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
569 /// \return the predecessors either attached directly to this VPBlockBase or,
570 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
571 /// predecessors of its own, search recursively for the first enclosing
572 /// VPRegionBlock that has predecessors and return them. If no such
573 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
574 /// VPBlockBase reached.
575 const VPBlocksTy
&getHierarchicalPredecessors() {
576 return getEnclosingBlockWithPredecessors()->getPredecessors();
579 /// \return the hierarchical predecessor of this VPBlockBase if it has a
580 /// single hierarchical predecessor. Otherwise return a null pointer.
581 VPBlockBase
*getSingleHierarchicalPredecessor() {
582 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
585 /// Set a given VPBlockBase \p Successor as the single successor of this
586 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
587 /// This VPBlockBase must have no successors.
588 void setOneSuccessor(VPBlockBase
*Successor
) {
589 assert(Successors
.empty() && "Setting one successor when others exist.");
590 assert(Successor
->getParent() == getParent() &&
591 "connected blocks must have the same parent");
592 appendSuccessor(Successor
);
595 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
596 /// successors of this VPBlockBase. This VPBlockBase is not added as
597 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
599 void setTwoSuccessors(VPBlockBase
*IfTrue
, VPBlockBase
*IfFalse
) {
600 assert(Successors
.empty() && "Setting two successors when others exist.");
601 appendSuccessor(IfTrue
);
602 appendSuccessor(IfFalse
);
605 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
606 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
607 /// as successor of any VPBasicBlock in \p NewPreds.
608 void setPredecessors(ArrayRef
<VPBlockBase
*> NewPreds
) {
609 assert(Predecessors
.empty() && "Block predecessors already set.");
610 for (auto *Pred
: NewPreds
)
611 appendPredecessor(Pred
);
614 /// Remove all the predecessor of this block.
615 void clearPredecessors() { Predecessors
.clear(); }
617 /// Remove all the successors of this block.
618 void clearSuccessors() { Successors
.clear(); }
620 /// The method which generates the output IR that correspond to this
621 /// VPBlockBase, thereby "executing" the VPlan.
622 virtual void execute(VPTransformState
*State
) = 0;
624 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
625 static void deleteCFG(VPBlockBase
*Entry
);
627 /// Return true if it is legal to hoist instructions into this block.
628 bool isLegalToHoistInto() {
629 // There are currently no constraints that prevent an instruction to be
630 // hoisted into a VPBlockBase.
634 /// Replace all operands of VPUsers in the block with \p NewValue and also
635 /// replaces all uses of VPValues defined in the block with NewValue.
636 virtual void dropAllReferences(VPValue
*NewValue
) = 0;
638 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
639 void printAsOperand(raw_ostream
&OS
, bool PrintType
) const {
643 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
644 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
645 /// consequtive numbers.
647 /// Note that the numbering is applied to the whole VPlan, so printing
648 /// individual blocks is consistent with the whole VPlan printing.
649 virtual void print(raw_ostream
&O
, const Twine
&Indent
,
650 VPSlotTracker
&SlotTracker
) const = 0;
652 /// Print plain-text dump of this VPlan to \p O.
653 void print(raw_ostream
&O
) const {
654 VPSlotTracker
SlotTracker(getPlan());
655 print(O
, "", SlotTracker
);
658 /// Print the successors of this block to \p O, prefixing all lines with \p
660 void printSuccessors(raw_ostream
&O
, const Twine
&Indent
) const;
662 /// Dump this VPBlockBase to dbgs().
663 LLVM_DUMP_METHOD
void dump() const { print(dbgs()); }
667 /// A value that is used outside the VPlan. The operand of the user needs to be
668 /// added to the associated LCSSA phi node.
669 class VPLiveOut
: public VPUser
{
673 VPLiveOut(PHINode
*Phi
, VPValue
*Op
)
674 : VPUser({Op
}, VPUser::VPUserID::LiveOut
), Phi(Phi
) {}
676 static inline bool classof(const VPUser
*U
) {
677 return U
->getVPUserID() == VPUser::VPUserID::LiveOut
;
680 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
681 /// means we need to add the appropriate incoming value from the middle
682 /// block as exiting edges from the scalar epilogue loop (if present) are
683 /// already in place, and we exit the vector loop exclusively to the middle
685 void fixPhi(VPlan
&Plan
, VPTransformState
&State
);
687 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
688 bool usesScalars(const VPValue
*Op
) const override
{
689 assert(is_contained(operands(), Op
) &&
690 "Op must be an operand of the recipe");
694 PHINode
*getPhi() const { return Phi
; }
696 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
697 /// Print the VPLiveOut to \p O.
698 void print(raw_ostream
&O
, VPSlotTracker
&SlotTracker
) const;
702 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
703 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef
704 /// and is responsible for deleting its defined values. Single-value
705 /// recipes must inherit from VPSingleDef instead of inheriting from both
706 /// VPRecipeBase and VPValue separately.
707 class VPRecipeBase
: public ilist_node_with_parent
<VPRecipeBase
, VPBasicBlock
>,
711 friend class VPBlockUtils
;
713 /// Each VPRecipe belongs to a single VPBasicBlock.
714 VPBasicBlock
*Parent
= nullptr;
716 /// The debug location for the recipe.
720 VPRecipeBase(const unsigned char SC
, ArrayRef
<VPValue
*> Operands
,
722 : VPDef(SC
), VPUser(Operands
, VPUser::VPUserID::Recipe
), DL(DL
) {}
724 template <typename IterT
>
725 VPRecipeBase(const unsigned char SC
, iterator_range
<IterT
> Operands
,
727 : VPDef(SC
), VPUser(Operands
, VPUser::VPUserID::Recipe
), DL(DL
) {}
728 virtual ~VPRecipeBase() = default;
730 /// \return the VPBasicBlock which this VPRecipe belongs to.
731 VPBasicBlock
*getParent() { return Parent
; }
732 const VPBasicBlock
*getParent() const { return Parent
; }
734 /// The method which generates the output IR instructions that correspond to
735 /// this VPRecipe, thereby "executing" the VPlan.
736 virtual void execute(VPTransformState
&State
) = 0;
738 /// Insert an unlinked recipe into a basic block immediately before
739 /// the specified recipe.
740 void insertBefore(VPRecipeBase
*InsertPos
);
741 /// Insert an unlinked recipe into \p BB immediately before the insertion
743 void insertBefore(VPBasicBlock
&BB
, iplist
<VPRecipeBase
>::iterator IP
);
745 /// Insert an unlinked Recipe into a basic block immediately after
746 /// the specified Recipe.
747 void insertAfter(VPRecipeBase
*InsertPos
);
749 /// Unlink this recipe from its current VPBasicBlock and insert it into
750 /// the VPBasicBlock that MovePos lives in, right after MovePos.
751 void moveAfter(VPRecipeBase
*MovePos
);
753 /// Unlink this recipe and insert into BB before I.
755 /// \pre I is a valid iterator into BB.
756 void moveBefore(VPBasicBlock
&BB
, iplist
<VPRecipeBase
>::iterator I
);
758 /// This method unlinks 'this' from the containing basic block, but does not
760 void removeFromParent();
762 /// This method unlinks 'this' from the containing basic block and deletes it.
764 /// \returns an iterator pointing to the element after the erased one
765 iplist
<VPRecipeBase
>::iterator
eraseFromParent();
767 /// Method to support type inquiry through isa, cast, and dyn_cast.
768 static inline bool classof(const VPDef
*D
) {
769 // All VPDefs are also VPRecipeBases.
773 static inline bool classof(const VPUser
*U
) {
774 return U
->getVPUserID() == VPUser::VPUserID::Recipe
;
777 /// Returns true if the recipe may have side-effects.
778 bool mayHaveSideEffects() const;
780 /// Returns true for PHI-like recipes.
782 return getVPDefID() >= VPFirstPHISC
&& getVPDefID() <= VPLastPHISC
;
785 /// Returns true if the recipe may read from memory.
786 bool mayReadFromMemory() const;
788 /// Returns true if the recipe may write to memory.
789 bool mayWriteToMemory() const;
791 /// Returns true if the recipe may read from or write to memory.
792 bool mayReadOrWriteMemory() const {
793 return mayReadFromMemory() || mayWriteToMemory();
796 /// Returns the debug location of the recipe.
797 DebugLoc
getDebugLoc() const { return DL
; }
800 // Helper macro to define common classof implementations for recipes.
801 #define VP_CLASSOF_IMPL(VPDefID) \
802 static inline bool classof(const VPDef *D) { \
803 return D->getVPDefID() == VPDefID; \
805 static inline bool classof(const VPValue *V) { \
806 auto *R = V->getDefiningRecipe(); \
807 return R && R->getVPDefID() == VPDefID; \
809 static inline bool classof(const VPUser *U) { \
810 auto *R = dyn_cast<VPRecipeBase>(U); \
811 return R && R->getVPDefID() == VPDefID; \
813 static inline bool classof(const VPRecipeBase *R) { \
814 return R->getVPDefID() == VPDefID; \
816 static inline bool classof(const VPSingleDefRecipe *R) { \
817 return R->getVPDefID() == VPDefID; \
820 /// VPSingleDef is a base class for recipes for modeling a sequence of one or
821 /// more output IR that define a single result VPValue.
822 /// Note that VPRecipeBase must be inherited from before VPValue.
823 class VPSingleDefRecipe
: public VPRecipeBase
, public VPValue
{
825 template <typename IterT
>
826 VPSingleDefRecipe(const unsigned char SC
, IterT Operands
, DebugLoc DL
= {})
827 : VPRecipeBase(SC
, Operands
, DL
), VPValue(this) {}
829 VPSingleDefRecipe(const unsigned char SC
, ArrayRef
<VPValue
*> Operands
,
831 : VPRecipeBase(SC
, Operands
, DL
), VPValue(this) {}
833 template <typename IterT
>
834 VPSingleDefRecipe(const unsigned char SC
, IterT Operands
, Value
*UV
,
836 : VPRecipeBase(SC
, Operands
, DL
), VPValue(this, UV
) {}
838 static inline bool classof(const VPRecipeBase
*R
) {
839 switch (R
->getVPDefID()) {
840 case VPRecipeBase::VPDerivedIVSC
:
841 case VPRecipeBase::VPExpandSCEVSC
:
842 case VPRecipeBase::VPInstructionSC
:
843 case VPRecipeBase::VPReductionSC
:
844 case VPRecipeBase::VPReplicateSC
:
845 case VPRecipeBase::VPScalarIVStepsSC
:
846 case VPRecipeBase::VPVectorPointerSC
:
847 case VPRecipeBase::VPWidenCallSC
:
848 case VPRecipeBase::VPWidenCanonicalIVSC
:
849 case VPRecipeBase::VPWidenCastSC
:
850 case VPRecipeBase::VPWidenGEPSC
:
851 case VPRecipeBase::VPWidenSC
:
852 case VPRecipeBase::VPWidenSelectSC
:
853 case VPRecipeBase::VPBlendSC
:
854 case VPRecipeBase::VPPredInstPHISC
:
855 case VPRecipeBase::VPCanonicalIVPHISC
:
856 case VPRecipeBase::VPActiveLaneMaskPHISC
:
857 case VPRecipeBase::VPFirstOrderRecurrencePHISC
:
858 case VPRecipeBase::VPWidenPHISC
:
859 case VPRecipeBase::VPWidenIntOrFpInductionSC
:
860 case VPRecipeBase::VPWidenPointerInductionSC
:
861 case VPRecipeBase::VPReductionPHISC
:
863 case VPRecipeBase::VPInterleaveSC
:
864 case VPRecipeBase::VPBranchOnMaskSC
:
865 case VPRecipeBase::VPWidenMemoryInstructionSC
:
866 // TODO: Widened stores don't define a value, but widened loads do. Split
867 // the recipes to be able to make widened loads VPSingleDefRecipes.
870 llvm_unreachable("Unhandled VPDefID");
873 static inline bool classof(const VPUser
*U
) {
874 auto *R
= dyn_cast
<VPRecipeBase
>(U
);
875 return R
&& classof(R
);
878 /// Returns the underlying instruction.
879 Instruction
*getUnderlyingInstr() {
880 return cast
<Instruction
>(getUnderlyingValue());
882 const Instruction
*getUnderlyingInstr() const {
883 return cast
<Instruction
>(getUnderlyingValue());
887 /// Class to record LLVM IR flag for a recipe along with it.
888 class VPRecipeWithIRFlags
: public VPSingleDefRecipe
{
889 enum class OperationType
: unsigned char {
905 WrapFlagsTy(bool HasNUW
, bool HasNSW
) : HasNUW(HasNUW
), HasNSW(HasNSW
) {}
911 GEPFlagsTy(bool IsInBounds
) : IsInBounds(IsInBounds
) {}
915 struct DisjointFlagsTy
{
918 struct ExactFlagsTy
{
921 struct NonNegFlagsTy
{
924 struct FastMathFlagsTy
{
925 char AllowReassoc
: 1;
928 char NoSignedZeros
: 1;
929 char AllowReciprocal
: 1;
930 char AllowContract
: 1;
933 FastMathFlagsTy(const FastMathFlags
&FMF
);
936 OperationType OpType
;
939 CmpInst::Predicate CmpPredicate
;
940 WrapFlagsTy WrapFlags
;
941 DisjointFlagsTy DisjointFlags
;
942 ExactFlagsTy ExactFlags
;
944 NonNegFlagsTy NonNegFlags
;
945 FastMathFlagsTy FMFs
;
950 template <typename IterT
>
951 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
, DebugLoc DL
= {})
952 : VPSingleDefRecipe(SC
, Operands
, DL
) {
953 OpType
= OperationType::Other
;
957 template <typename IterT
>
958 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
, Instruction
&I
)
959 : VPSingleDefRecipe(SC
, Operands
, &I
, I
.getDebugLoc()) {
960 if (auto *Op
= dyn_cast
<CmpInst
>(&I
)) {
961 OpType
= OperationType::Cmp
;
962 CmpPredicate
= Op
->getPredicate();
963 } else if (auto *Op
= dyn_cast
<PossiblyDisjointInst
>(&I
)) {
964 OpType
= OperationType::DisjointOp
;
965 DisjointFlags
.IsDisjoint
= Op
->isDisjoint();
966 } else if (auto *Op
= dyn_cast
<OverflowingBinaryOperator
>(&I
)) {
967 OpType
= OperationType::OverflowingBinOp
;
968 WrapFlags
= {Op
->hasNoUnsignedWrap(), Op
->hasNoSignedWrap()};
969 } else if (auto *Op
= dyn_cast
<PossiblyExactOperator
>(&I
)) {
970 OpType
= OperationType::PossiblyExactOp
;
971 ExactFlags
.IsExact
= Op
->isExact();
972 } else if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(&I
)) {
973 OpType
= OperationType::GEPOp
;
974 GEPFlags
.IsInBounds
= GEP
->isInBounds();
975 } else if (auto *PNNI
= dyn_cast
<PossiblyNonNegInst
>(&I
)) {
976 OpType
= OperationType::NonNegOp
;
977 NonNegFlags
.NonNeg
= PNNI
->hasNonNeg();
978 } else if (auto *Op
= dyn_cast
<FPMathOperator
>(&I
)) {
979 OpType
= OperationType::FPMathOp
;
980 FMFs
= Op
->getFastMathFlags();
982 OpType
= OperationType::Other
;
987 template <typename IterT
>
988 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
989 CmpInst::Predicate Pred
, DebugLoc DL
= {})
990 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::Cmp
),
991 CmpPredicate(Pred
) {}
993 template <typename IterT
>
994 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
995 WrapFlagsTy WrapFlags
, DebugLoc DL
= {})
996 : VPSingleDefRecipe(SC
, Operands
, DL
),
997 OpType(OperationType::OverflowingBinOp
), WrapFlags(WrapFlags
) {}
999 template <typename IterT
>
1000 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1001 FastMathFlags FMFs
, DebugLoc DL
= {})
1002 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::FPMathOp
),
1006 template <typename IterT
>
1007 VPRecipeWithIRFlags(const unsigned char SC
, IterT Operands
,
1008 GEPFlagsTy GEPFlags
, DebugLoc DL
= {})
1009 : VPSingleDefRecipe(SC
, Operands
, DL
), OpType(OperationType::GEPOp
),
1010 GEPFlags(GEPFlags
) {}
1013 static inline bool classof(const VPRecipeBase
*R
) {
1014 return R
->getVPDefID() == VPRecipeBase::VPInstructionSC
||
1015 R
->getVPDefID() == VPRecipeBase::VPWidenSC
||
1016 R
->getVPDefID() == VPRecipeBase::VPWidenGEPSC
||
1017 R
->getVPDefID() == VPRecipeBase::VPWidenCastSC
||
1018 R
->getVPDefID() == VPRecipeBase::VPReplicateSC
||
1019 R
->getVPDefID() == VPRecipeBase::VPVectorPointerSC
;
1022 /// Drop all poison-generating flags.
1023 void dropPoisonGeneratingFlags() {
1024 // NOTE: This needs to be kept in-sync with
1025 // Instruction::dropPoisonGeneratingFlags.
1027 case OperationType::OverflowingBinOp
:
1028 WrapFlags
.HasNUW
= false;
1029 WrapFlags
.HasNSW
= false;
1031 case OperationType::DisjointOp
:
1032 DisjointFlags
.IsDisjoint
= false;
1034 case OperationType::PossiblyExactOp
:
1035 ExactFlags
.IsExact
= false;
1037 case OperationType::GEPOp
:
1038 GEPFlags
.IsInBounds
= false;
1040 case OperationType::FPMathOp
:
1041 FMFs
.NoNaNs
= false;
1042 FMFs
.NoInfs
= false;
1044 case OperationType::NonNegOp
:
1045 NonNegFlags
.NonNeg
= false;
1047 case OperationType::Cmp
:
1048 case OperationType::Other
:
1053 /// Set the IR flags for \p I.
1054 void setFlags(Instruction
*I
) const {
1056 case OperationType::OverflowingBinOp
:
1057 I
->setHasNoUnsignedWrap(WrapFlags
.HasNUW
);
1058 I
->setHasNoSignedWrap(WrapFlags
.HasNSW
);
1060 case OperationType::DisjointOp
:
1061 cast
<PossiblyDisjointInst
>(I
)->setIsDisjoint(DisjointFlags
.IsDisjoint
);
1063 case OperationType::PossiblyExactOp
:
1064 I
->setIsExact(ExactFlags
.IsExact
);
1066 case OperationType::GEPOp
:
1067 cast
<GetElementPtrInst
>(I
)->setIsInBounds(GEPFlags
.IsInBounds
);
1069 case OperationType::FPMathOp
:
1070 I
->setHasAllowReassoc(FMFs
.AllowReassoc
);
1071 I
->setHasNoNaNs(FMFs
.NoNaNs
);
1072 I
->setHasNoInfs(FMFs
.NoInfs
);
1073 I
->setHasNoSignedZeros(FMFs
.NoSignedZeros
);
1074 I
->setHasAllowReciprocal(FMFs
.AllowReciprocal
);
1075 I
->setHasAllowContract(FMFs
.AllowContract
);
1076 I
->setHasApproxFunc(FMFs
.ApproxFunc
);
1078 case OperationType::NonNegOp
:
1079 I
->setNonNeg(NonNegFlags
.NonNeg
);
1081 case OperationType::Cmp
:
1082 case OperationType::Other
:
1087 CmpInst::Predicate
getPredicate() const {
1088 assert(OpType
== OperationType::Cmp
&&
1089 "recipe doesn't have a compare predicate");
1090 return CmpPredicate
;
1093 bool isInBounds() const {
1094 assert(OpType
== OperationType::GEPOp
&&
1095 "recipe doesn't have inbounds flag");
1096 return GEPFlags
.IsInBounds
;
1099 /// Returns true if the recipe has fast-math flags.
1100 bool hasFastMathFlags() const { return OpType
== OperationType::FPMathOp
; }
1102 FastMathFlags
getFastMathFlags() const;
1104 bool hasNoUnsignedWrap() const {
1105 assert(OpType
== OperationType::OverflowingBinOp
&&
1106 "recipe doesn't have a NUW flag");
1107 return WrapFlags
.HasNUW
;
1110 bool hasNoSignedWrap() const {
1111 assert(OpType
== OperationType::OverflowingBinOp
&&
1112 "recipe doesn't have a NSW flag");
1113 return WrapFlags
.HasNSW
;
1116 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1117 void printFlags(raw_ostream
&O
) const;
1121 /// This is a concrete Recipe that models a single VPlan-level instruction.
1122 /// While as any Recipe it may generate a sequence of IR instructions when
1123 /// executed, these instructions would always form a single-def expression as
1124 /// the VPInstruction is also a single def-use vertex.
1125 class VPInstruction
: public VPRecipeWithIRFlags
{
1126 friend class VPlanSlp
;
1129 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1131 FirstOrderRecurrenceSplice
=
1132 Instruction::OtherOpsEnd
+ 1, // Combines the incoming and previous
1133 // values of a first-order recurrence.
1138 CalculateTripCountMinusVF
,
1139 // Increment the canonical IV separately for each unrolled part.
1140 CanonicalIVIncrementForPart
,
1143 ComputeReductionResult
,
1147 typedef unsigned char OpcodeTy
;
1150 /// An optional name that can be used for the generated IR instruction.
1151 const std::string Name
;
1153 /// Utility method serving execute(): generates a single instance of the
1154 /// modeled instruction. \returns the generated value for \p Part.
1155 /// In some cases an existing value is returned rather than a generated
1157 Value
*generateInstruction(VPTransformState
&State
, unsigned Part
);
1159 #if !defined(NDEBUG)
1160 /// Return true if the VPInstruction is a floating point math operation, i.e.
1161 /// has fast-math flags.
1162 bool isFPMathOp() const;
1166 void setUnderlyingInstr(Instruction
*I
) { setUnderlyingValue(I
); }
1169 VPInstruction(unsigned Opcode
, ArrayRef
<VPValue
*> Operands
, DebugLoc DL
,
1170 const Twine
&Name
= "")
1171 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, Operands
, DL
),
1172 Opcode(Opcode
), Name(Name
.str()) {}
1174 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1175 DebugLoc DL
= {}, const Twine
&Name
= "")
1176 : VPInstruction(Opcode
, ArrayRef
<VPValue
*>(Operands
), DL
, Name
) {}
1178 VPInstruction(unsigned Opcode
, CmpInst::Predicate Pred
, VPValue
*A
,
1179 VPValue
*B
, DebugLoc DL
= {}, const Twine
&Name
= "");
1181 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1182 WrapFlagsTy WrapFlags
, DebugLoc DL
= {}, const Twine
&Name
= "")
1183 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, Operands
, WrapFlags
, DL
),
1184 Opcode(Opcode
), Name(Name
.str()) {}
1186 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
,
1187 FastMathFlags FMFs
, DebugLoc DL
= {}, const Twine
&Name
= "");
1189 VP_CLASSOF_IMPL(VPDef::VPInstructionSC
)
1191 unsigned getOpcode() const { return Opcode
; }
1193 /// Generate the instruction.
1194 /// TODO: We currently execute only per-part unless a specific instance is
1196 void execute(VPTransformState
&State
) override
;
1198 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1199 /// Print the VPInstruction to \p O.
1200 void print(raw_ostream
&O
, const Twine
&Indent
,
1201 VPSlotTracker
&SlotTracker
) const override
;
1203 /// Print the VPInstruction to dbgs() (for debugging).
1204 LLVM_DUMP_METHOD
void dump() const;
1207 /// Return true if this instruction may modify memory.
1208 bool mayWriteToMemory() const {
1209 // TODO: we can use attributes of the called function to rule out memory
1211 return Opcode
== Instruction::Store
|| Opcode
== Instruction::Call
||
1212 Opcode
== Instruction::Invoke
|| Opcode
== SLPStore
;
1215 bool hasResult() const {
1216 // CallInst may or may not have a result, depending on the called function.
1217 // Conservatively return calls have results for now.
1218 switch (getOpcode()) {
1219 case Instruction::Ret
:
1220 case Instruction::Br
:
1221 case Instruction::Store
:
1222 case Instruction::Switch
:
1223 case Instruction::IndirectBr
:
1224 case Instruction::Resume
:
1225 case Instruction::CatchRet
:
1226 case Instruction::Unreachable
:
1227 case Instruction::Fence
:
1228 case Instruction::AtomicRMW
:
1229 case VPInstruction::BranchOnCond
:
1230 case VPInstruction::BranchOnCount
:
1237 /// Returns true if the recipe only uses the first lane of operand \p Op.
1238 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1239 assert(is_contained(operands(), Op
) &&
1240 "Op must be an operand of the recipe");
1241 if (getOperand(0) != Op
)
1243 switch (getOpcode()) {
1246 case VPInstruction::ActiveLaneMask
:
1247 case VPInstruction::CalculateTripCountMinusVF
:
1248 case VPInstruction::CanonicalIVIncrementForPart
:
1249 case VPInstruction::BranchOnCount
:
1252 llvm_unreachable("switch should return");
1255 /// Returns true if the recipe only uses the first part of operand \p Op.
1256 bool onlyFirstPartUsed(const VPValue
*Op
) const override
{
1257 assert(is_contained(operands(), Op
) &&
1258 "Op must be an operand of the recipe");
1259 if (getOperand(0) != Op
)
1261 switch (getOpcode()) {
1264 case VPInstruction::BranchOnCount
:
1267 llvm_unreachable("switch should return");
1271 /// VPWidenRecipe is a recipe for producing a copy of vector type its
1272 /// ingredient. This recipe covers most of the traditional vectorization cases
1273 /// where each ingredient transforms into a vectorized version of itself.
1274 class VPWidenRecipe
: public VPRecipeWithIRFlags
{
1278 template <typename IterT
>
1279 VPWidenRecipe(Instruction
&I
, iterator_range
<IterT
> Operands
)
1280 : VPRecipeWithIRFlags(VPDef::VPWidenSC
, Operands
, I
),
1281 Opcode(I
.getOpcode()) {}
1283 ~VPWidenRecipe() override
= default;
1285 VP_CLASSOF_IMPL(VPDef::VPWidenSC
)
1287 /// Produce widened copies of all Ingredients.
1288 void execute(VPTransformState
&State
) override
;
1290 unsigned getOpcode() const { return Opcode
; }
1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1293 /// Print the recipe.
1294 void print(raw_ostream
&O
, const Twine
&Indent
,
1295 VPSlotTracker
&SlotTracker
) const override
;
1299 /// VPWidenCastRecipe is a recipe to create vector cast instructions.
1300 class VPWidenCastRecipe
: public VPRecipeWithIRFlags
{
1301 /// Cast instruction opcode.
1302 Instruction::CastOps Opcode
;
1304 /// Result type for the cast.
1308 VPWidenCastRecipe(Instruction::CastOps Opcode
, VPValue
*Op
, Type
*ResultTy
,
1310 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC
, Op
, UI
), Opcode(Opcode
),
1311 ResultTy(ResultTy
) {
1312 assert(UI
.getOpcode() == Opcode
&&
1313 "opcode of underlying cast doesn't match");
1314 assert(UI
.getType() == ResultTy
&&
1315 "result type of underlying cast doesn't match");
1318 VPWidenCastRecipe(Instruction::CastOps Opcode
, VPValue
*Op
, Type
*ResultTy
)
1319 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC
, Op
), Opcode(Opcode
),
1320 ResultTy(ResultTy
) {}
1322 ~VPWidenCastRecipe() override
= default;
1324 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC
)
1326 /// Produce widened copies of the cast.
1327 void execute(VPTransformState
&State
) override
;
1329 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1330 /// Print the recipe.
1331 void print(raw_ostream
&O
, const Twine
&Indent
,
1332 VPSlotTracker
&SlotTracker
) const override
;
1335 Instruction::CastOps
getOpcode() const { return Opcode
; }
1337 /// Returns the result type of the cast.
1338 Type
*getResultType() const { return ResultTy
; }
1341 /// A recipe for widening Call instructions.
1342 class VPWidenCallRecipe
: public VPSingleDefRecipe
{
1343 /// ID of the vector intrinsic to call when widening the call. If set the
1344 /// Intrinsic::not_intrinsic, a library call will be used instead.
1345 Intrinsic::ID VectorIntrinsicID
;
1346 /// If this recipe represents a library call, Variant stores a pointer to
1347 /// the chosen function. There is a 1:1 mapping between a given VF and the
1348 /// chosen vectorized variant, so there will be a different vplan for each
1349 /// VF with a valid variant.
1353 template <typename IterT
>
1354 VPWidenCallRecipe(CallInst
&I
, iterator_range
<IterT
> CallArguments
,
1355 Intrinsic::ID VectorIntrinsicID
, DebugLoc DL
= {},
1356 Function
*Variant
= nullptr)
1357 : VPSingleDefRecipe(VPDef::VPWidenCallSC
, CallArguments
, &I
, DL
),
1358 VectorIntrinsicID(VectorIntrinsicID
), Variant(Variant
) {}
1360 ~VPWidenCallRecipe() override
= default;
1362 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC
)
1364 /// Produce a widened version of the call instruction.
1365 void execute(VPTransformState
&State
) override
;
1367 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1368 /// Print the recipe.
1369 void print(raw_ostream
&O
, const Twine
&Indent
,
1370 VPSlotTracker
&SlotTracker
) const override
;
1374 /// A recipe for widening select instructions.
1375 struct VPWidenSelectRecipe
: public VPSingleDefRecipe
{
1376 template <typename IterT
>
1377 VPWidenSelectRecipe(SelectInst
&I
, iterator_range
<IterT
> Operands
)
1378 : VPSingleDefRecipe(VPDef::VPWidenSelectSC
, Operands
, &I
,
1381 ~VPWidenSelectRecipe() override
= default;
1383 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC
)
1385 /// Produce a widened version of the select instruction.
1386 void execute(VPTransformState
&State
) override
;
1388 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1389 /// Print the recipe.
1390 void print(raw_ostream
&O
, const Twine
&Indent
,
1391 VPSlotTracker
&SlotTracker
) const override
;
1394 VPValue
*getCond() const {
1395 return getOperand(0);
1398 bool isInvariantCond() const {
1399 return getCond()->isDefinedOutsideVectorRegions();
1403 /// A recipe for handling GEP instructions.
1404 class VPWidenGEPRecipe
: public VPRecipeWithIRFlags
{
1405 bool isPointerLoopInvariant() const {
1406 return getOperand(0)->isDefinedOutsideVectorRegions();
1409 bool isIndexLoopInvariant(unsigned I
) const {
1410 return getOperand(I
+ 1)->isDefinedOutsideVectorRegions();
1413 bool areAllOperandsInvariant() const {
1414 return all_of(operands(), [](VPValue
*Op
) {
1415 return Op
->isDefinedOutsideVectorRegions();
1420 template <typename IterT
>
1421 VPWidenGEPRecipe(GetElementPtrInst
*GEP
, iterator_range
<IterT
> Operands
)
1422 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC
, Operands
, *GEP
) {}
1424 ~VPWidenGEPRecipe() override
= default;
1426 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC
)
1428 /// Generate the gep nodes.
1429 void execute(VPTransformState
&State
) override
;
1431 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432 /// Print the recipe.
1433 void print(raw_ostream
&O
, const Twine
&Indent
,
1434 VPSlotTracker
&SlotTracker
) const override
;
1438 /// A recipe to compute the pointers for widened memory accesses of IndexTy for
1439 /// all parts. If IsReverse is true, compute pointers for accessing the input in
1440 /// reverse order per part.
1441 class VPVectorPointerRecipe
: public VPRecipeWithIRFlags
{
1446 VPVectorPointerRecipe(VPValue
*Ptr
, Type
*IndexedTy
, bool IsReverse
,
1447 bool IsInBounds
, DebugLoc DL
)
1448 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC
, ArrayRef
<VPValue
*>(Ptr
),
1449 GEPFlagsTy(IsInBounds
), DL
),
1450 IndexedTy(IndexedTy
), IsReverse(IsReverse
) {}
1452 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC
)
1454 void execute(VPTransformState
&State
) override
;
1456 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1457 assert(is_contained(operands(), Op
) &&
1458 "Op must be an operand of the recipe");
1462 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1463 /// Print the recipe.
1464 void print(raw_ostream
&O
, const Twine
&Indent
,
1465 VPSlotTracker
&SlotTracker
) const override
;
1469 /// A pure virtual base class for all recipes modeling header phis, including
1470 /// phis for first order recurrences, pointer inductions and reductions. The
1471 /// start value is the first operand of the recipe and the incoming value from
1472 /// the backedge is the second operand.
1474 /// Inductions are modeled using the following sub-classes:
1475 /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1476 /// starting at a specified value (zero for the main vector loop, the resume
1477 /// value for the epilogue vector loop) and stepping by 1. The induction
1478 /// controls exiting of the vector loop by comparing against the vector trip
1479 /// count. Produces a single scalar PHI for the induction value per
1481 /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1482 /// floating point inductions with arbitrary start and step values. Produces
1483 /// a vector PHI per-part.
1484 /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1485 /// value of an IV with different start and step values. Produces a single
1486 /// scalar value per iteration
1487 /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1488 /// canonical or derived induction.
1489 /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1490 /// pointer induction. Produces either a vector PHI per-part or scalar values
1491 /// per-lane based on the canonical induction.
1492 class VPHeaderPHIRecipe
: public VPSingleDefRecipe
{
1494 VPHeaderPHIRecipe(unsigned char VPDefID
, Instruction
*UnderlyingInstr
,
1495 VPValue
*Start
= nullptr, DebugLoc DL
= {})
1496 : VPSingleDefRecipe(VPDefID
, ArrayRef
<VPValue
*>(), UnderlyingInstr
, DL
) {
1502 ~VPHeaderPHIRecipe() override
= default;
1504 /// Method to support type inquiry through isa, cast, and dyn_cast.
1505 static inline bool classof(const VPRecipeBase
*B
) {
1506 return B
->getVPDefID() >= VPDef::VPFirstHeaderPHISC
&&
1507 B
->getVPDefID() <= VPDef::VPLastHeaderPHISC
;
1509 static inline bool classof(const VPValue
*V
) {
1510 auto *B
= V
->getDefiningRecipe();
1511 return B
&& B
->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC
&&
1512 B
->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC
;
1515 /// Generate the phi nodes.
1516 void execute(VPTransformState
&State
) override
= 0;
1518 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1519 /// Print the recipe.
1520 void print(raw_ostream
&O
, const Twine
&Indent
,
1521 VPSlotTracker
&SlotTracker
) const override
= 0;
1524 /// Returns the start value of the phi, if one is set.
1525 VPValue
*getStartValue() {
1526 return getNumOperands() == 0 ? nullptr : getOperand(0);
1528 VPValue
*getStartValue() const {
1529 return getNumOperands() == 0 ? nullptr : getOperand(0);
1532 /// Update the start value of the recipe.
1533 void setStartValue(VPValue
*V
) { setOperand(0, V
); }
1535 /// Returns the incoming value from the loop backedge.
1536 virtual VPValue
*getBackedgeValue() {
1537 return getOperand(1);
1540 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1542 virtual VPRecipeBase
&getBackedgeRecipe() {
1543 return *getBackedgeValue()->getDefiningRecipe();
1547 /// A recipe for handling phi nodes of integer and floating-point inductions,
1548 /// producing their vector values.
1549 class VPWidenIntOrFpInductionRecipe
: public VPHeaderPHIRecipe
{
1552 const InductionDescriptor
&IndDesc
;
1555 VPWidenIntOrFpInductionRecipe(PHINode
*IV
, VPValue
*Start
, VPValue
*Step
,
1556 const InductionDescriptor
&IndDesc
)
1557 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC
, IV
, Start
), IV(IV
),
1558 Trunc(nullptr), IndDesc(IndDesc
) {
1562 VPWidenIntOrFpInductionRecipe(PHINode
*IV
, VPValue
*Start
, VPValue
*Step
,
1563 const InductionDescriptor
&IndDesc
,
1565 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC
, Trunc
, Start
),
1566 IV(IV
), Trunc(Trunc
), IndDesc(IndDesc
) {
1570 ~VPWidenIntOrFpInductionRecipe() override
= default;
1572 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC
)
1574 /// Generate the vectorized and scalarized versions of the phi node as
1575 /// needed by their users.
1576 void execute(VPTransformState
&State
) override
;
1578 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1579 /// Print the recipe.
1580 void print(raw_ostream
&O
, const Twine
&Indent
,
1581 VPSlotTracker
&SlotTracker
) const override
;
1584 VPValue
*getBackedgeValue() override
{
1585 // TODO: All operands of base recipe must exist and be at same index in
1588 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1591 VPRecipeBase
&getBackedgeRecipe() override
{
1592 // TODO: All operands of base recipe must exist and be at same index in
1595 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1598 /// Returns the step value of the induction.
1599 VPValue
*getStepValue() { return getOperand(1); }
1600 const VPValue
*getStepValue() const { return getOperand(1); }
1602 /// Returns the first defined value as TruncInst, if it is one or nullptr
1604 TruncInst
*getTruncInst() { return Trunc
; }
1605 const TruncInst
*getTruncInst() const { return Trunc
; }
1607 PHINode
*getPHINode() { return IV
; }
1609 /// Returns the induction descriptor for the recipe.
1610 const InductionDescriptor
&getInductionDescriptor() const { return IndDesc
; }
1612 /// Returns true if the induction is canonical, i.e. starting at 0 and
1613 /// incremented by UF * VF (= the original IV is incremented by 1).
1614 bool isCanonical() const;
1616 /// Returns the scalar type of the induction.
1617 Type
*getScalarType() const {
1618 return Trunc
? Trunc
->getType() : IV
->getType();
1622 class VPWidenPointerInductionRecipe
: public VPHeaderPHIRecipe
{
1623 const InductionDescriptor
&IndDesc
;
1625 bool IsScalarAfterVectorization
;
1628 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1630 VPWidenPointerInductionRecipe(PHINode
*Phi
, VPValue
*Start
, VPValue
*Step
,
1631 const InductionDescriptor
&IndDesc
,
1632 bool IsScalarAfterVectorization
)
1633 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC
, Phi
),
1635 IsScalarAfterVectorization(IsScalarAfterVectorization
) {
1640 ~VPWidenPointerInductionRecipe() override
= default;
1642 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC
)
1644 /// Generate vector values for the pointer induction.
1645 void execute(VPTransformState
&State
) override
;
1647 /// Returns true if only scalar values will be generated.
1648 bool onlyScalarsGenerated(ElementCount VF
);
1650 /// Returns the induction descriptor for the recipe.
1651 const InductionDescriptor
&getInductionDescriptor() const { return IndDesc
; }
1653 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1654 /// Print the recipe.
1655 void print(raw_ostream
&O
, const Twine
&Indent
,
1656 VPSlotTracker
&SlotTracker
) const override
;
1660 /// A recipe for handling header phis that are widened in the vector loop.
1661 /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1662 /// managed in the recipe directly.
1663 class VPWidenPHIRecipe
: public VPHeaderPHIRecipe
{
1664 /// List of incoming blocks. Only used in the VPlan native path.
1665 SmallVector
<VPBasicBlock
*, 2> IncomingBlocks
;
1668 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1669 VPWidenPHIRecipe(PHINode
*Phi
, VPValue
*Start
= nullptr)
1670 : VPHeaderPHIRecipe(VPDef::VPWidenPHISC
, Phi
) {
1675 ~VPWidenPHIRecipe() override
= default;
1677 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC
)
1679 /// Generate the phi/select nodes.
1680 void execute(VPTransformState
&State
) override
;
1682 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1683 /// Print the recipe.
1684 void print(raw_ostream
&O
, const Twine
&Indent
,
1685 VPSlotTracker
&SlotTracker
) const override
;
1688 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1689 void addIncoming(VPValue
*IncomingV
, VPBasicBlock
*IncomingBlock
) {
1690 addOperand(IncomingV
);
1691 IncomingBlocks
.push_back(IncomingBlock
);
1694 /// Returns the \p I th incoming VPBasicBlock.
1695 VPBasicBlock
*getIncomingBlock(unsigned I
) { return IncomingBlocks
[I
]; }
1697 /// Returns the \p I th incoming VPValue.
1698 VPValue
*getIncomingValue(unsigned I
) { return getOperand(I
); }
1701 /// A recipe for handling first-order recurrence phis. The start value is the
1702 /// first operand of the recipe and the incoming value from the backedge is the
1704 struct VPFirstOrderRecurrencePHIRecipe
: public VPHeaderPHIRecipe
{
1705 VPFirstOrderRecurrencePHIRecipe(PHINode
*Phi
, VPValue
&Start
)
1706 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC
, Phi
, &Start
) {}
1708 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC
)
1710 static inline bool classof(const VPHeaderPHIRecipe
*R
) {
1711 return R
->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC
;
1714 void execute(VPTransformState
&State
) override
;
1716 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1717 /// Print the recipe.
1718 void print(raw_ostream
&O
, const Twine
&Indent
,
1719 VPSlotTracker
&SlotTracker
) const override
;
1723 /// A recipe for handling reduction phis. The start value is the first operand
1724 /// of the recipe and the incoming value from the backedge is the second
1726 class VPReductionPHIRecipe
: public VPHeaderPHIRecipe
{
1727 /// Descriptor for the reduction.
1728 const RecurrenceDescriptor
&RdxDesc
;
1730 /// The phi is part of an in-loop reduction.
1733 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1737 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1739 VPReductionPHIRecipe(PHINode
*Phi
, const RecurrenceDescriptor
&RdxDesc
,
1740 VPValue
&Start
, bool IsInLoop
= false,
1741 bool IsOrdered
= false)
1742 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC
, Phi
, &Start
),
1743 RdxDesc(RdxDesc
), IsInLoop(IsInLoop
), IsOrdered(IsOrdered
) {
1744 assert((!IsOrdered
|| IsInLoop
) && "IsOrdered requires IsInLoop");
1747 ~VPReductionPHIRecipe() override
= default;
1749 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC
)
1751 static inline bool classof(const VPHeaderPHIRecipe
*R
) {
1752 return R
->getVPDefID() == VPDef::VPReductionPHISC
;
1755 /// Generate the phi/select nodes.
1756 void execute(VPTransformState
&State
) override
;
1758 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1759 /// Print the recipe.
1760 void print(raw_ostream
&O
, const Twine
&Indent
,
1761 VPSlotTracker
&SlotTracker
) const override
;
1764 const RecurrenceDescriptor
&getRecurrenceDescriptor() const {
1768 /// Returns true, if the phi is part of an ordered reduction.
1769 bool isOrdered() const { return IsOrdered
; }
1771 /// Returns true, if the phi is part of an in-loop reduction.
1772 bool isInLoop() const { return IsInLoop
; }
1775 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
1777 class VPBlendRecipe
: public VPSingleDefRecipe
{
1779 /// The blend operation is a User of the incoming values and of their
1780 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1781 /// might be incoming with a full mask for which there is no VPValue.
1782 VPBlendRecipe(PHINode
*Phi
, ArrayRef
<VPValue
*> Operands
)
1783 : VPSingleDefRecipe(VPDef::VPBlendSC
, Operands
, Phi
, Phi
->getDebugLoc()) {
1784 assert(Operands
.size() > 0 &&
1785 ((Operands
.size() == 1) || (Operands
.size() % 2 == 0)) &&
1786 "Expected either a single incoming value or a positive even number "
1790 VP_CLASSOF_IMPL(VPDef::VPBlendSC
)
1792 /// Return the number of incoming values, taking into account that a single
1793 /// incoming value has no mask.
1794 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1796 /// Return incoming value number \p Idx.
1797 VPValue
*getIncomingValue(unsigned Idx
) const { return getOperand(Idx
* 2); }
1799 /// Return mask number \p Idx.
1800 VPValue
*getMask(unsigned Idx
) const { return getOperand(Idx
* 2 + 1); }
1802 /// Generate the phi/select nodes.
1803 void execute(VPTransformState
&State
) override
;
1805 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1806 /// Print the recipe.
1807 void print(raw_ostream
&O
, const Twine
&Indent
,
1808 VPSlotTracker
&SlotTracker
) const override
;
1811 /// Returns true if the recipe only uses the first lane of operand \p Op.
1812 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1813 assert(is_contained(operands(), Op
) &&
1814 "Op must be an operand of the recipe");
1815 // Recursing through Blend recipes only, must terminate at header phi's the
1817 return all_of(users(),
1818 [this](VPUser
*U
) { return U
->onlyFirstLaneUsed(this); });
1822 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1823 /// or stores into one wide load/store and shuffles. The first operand of a
1824 /// VPInterleave recipe is the address, followed by the stored values, followed
1825 /// by an optional mask.
1826 class VPInterleaveRecipe
: public VPRecipeBase
{
1827 const InterleaveGroup
<Instruction
> *IG
;
1829 /// Indicates if the interleave group is in a conditional block and requires a
1831 bool HasMask
= false;
1833 /// Indicates if gaps between members of the group need to be masked out or if
1834 /// unusued gaps can be loaded speculatively.
1835 bool NeedsMaskForGaps
= false;
1838 VPInterleaveRecipe(const InterleaveGroup
<Instruction
> *IG
, VPValue
*Addr
,
1839 ArrayRef
<VPValue
*> StoredValues
, VPValue
*Mask
,
1840 bool NeedsMaskForGaps
)
1841 : VPRecipeBase(VPDef::VPInterleaveSC
, {Addr
}), IG(IG
),
1842 NeedsMaskForGaps(NeedsMaskForGaps
) {
1843 for (unsigned i
= 0; i
< IG
->getFactor(); ++i
)
1844 if (Instruction
*I
= IG
->getMember(i
)) {
1845 if (I
->getType()->isVoidTy())
1847 new VPValue(I
, this);
1850 for (auto *SV
: StoredValues
)
1857 ~VPInterleaveRecipe() override
= default;
1859 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC
)
1861 /// Return the address accessed by this recipe.
1862 VPValue
*getAddr() const {
1863 return getOperand(0); // Address is the 1st, mandatory operand.
1866 /// Return the mask used by this recipe. Note that a full mask is represented
1868 VPValue
*getMask() const {
1869 // Mask is optional and therefore the last, currently 2nd operand.
1870 return HasMask
? getOperand(getNumOperands() - 1) : nullptr;
1873 /// Return the VPValues stored by this interleave group. If it is a load
1874 /// interleave group, return an empty ArrayRef.
1875 ArrayRef
<VPValue
*> getStoredValues() const {
1876 // The first operand is the address, followed by the stored values, followed
1877 // by an optional mask.
1878 return ArrayRef
<VPValue
*>(op_begin(), getNumOperands())
1879 .slice(1, getNumStoreOperands());
1882 /// Generate the wide load or store, and shuffles.
1883 void execute(VPTransformState
&State
) override
;
1885 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1886 /// Print the recipe.
1887 void print(raw_ostream
&O
, const Twine
&Indent
,
1888 VPSlotTracker
&SlotTracker
) const override
;
1891 const InterleaveGroup
<Instruction
> *getInterleaveGroup() { return IG
; }
1893 /// Returns the number of stored operands of this interleave group. Returns 0
1894 /// for load interleave groups.
1895 unsigned getNumStoreOperands() const {
1896 return getNumOperands() - (HasMask
? 2 : 1);
1899 /// The recipe only uses the first lane of the address.
1900 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1901 assert(is_contained(operands(), Op
) &&
1902 "Op must be an operand of the recipe");
1903 return Op
== getAddr() && !llvm::is_contained(getStoredValues(), Op
);
1907 /// A recipe to represent inloop reduction operations, performing a reduction on
1908 /// a vector operand into a scalar value, and adding the result to a chain.
1909 /// The Operands are {ChainOp, VecOp, [Condition]}.
1910 class VPReductionRecipe
: public VPSingleDefRecipe
{
1911 /// The recurrence decriptor for the reduction in question.
1912 const RecurrenceDescriptor
&RdxDesc
;
1915 VPReductionRecipe(const RecurrenceDescriptor
&R
, Instruction
*I
,
1916 VPValue
*ChainOp
, VPValue
*VecOp
, VPValue
*CondOp
)
1917 : VPSingleDefRecipe(VPDef::VPReductionSC
,
1918 ArrayRef
<VPValue
*>({ChainOp
, VecOp
}), I
),
1924 ~VPReductionRecipe() override
= default;
1926 VP_CLASSOF_IMPL(VPDef::VPReductionSC
)
1928 /// Generate the reduction in the loop
1929 void execute(VPTransformState
&State
) override
;
1931 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1932 /// Print the recipe.
1933 void print(raw_ostream
&O
, const Twine
&Indent
,
1934 VPSlotTracker
&SlotTracker
) const override
;
1937 /// The VPValue of the scalar Chain being accumulated.
1938 VPValue
*getChainOp() const { return getOperand(0); }
1939 /// The VPValue of the vector value to be reduced.
1940 VPValue
*getVecOp() const { return getOperand(1); }
1941 /// The VPValue of the condition for the block.
1942 VPValue
*getCondOp() const {
1943 return getNumOperands() > 2 ? getOperand(2) : nullptr;
1947 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1948 /// copies of the original scalar type, one per lane, instead of producing a
1949 /// single copy of widened type for all lanes. If the instruction is known to be
1950 /// uniform only one copy, per lane zero, will be generated.
1951 class VPReplicateRecipe
: public VPRecipeWithIRFlags
{
1952 /// Indicator if only a single replica per lane is needed.
1955 /// Indicator if the replicas are also predicated.
1959 template <typename IterT
>
1960 VPReplicateRecipe(Instruction
*I
, iterator_range
<IterT
> Operands
,
1961 bool IsUniform
, VPValue
*Mask
= nullptr)
1962 : VPRecipeWithIRFlags(VPDef::VPReplicateSC
, Operands
, *I
),
1963 IsUniform(IsUniform
), IsPredicated(Mask
) {
1968 ~VPReplicateRecipe() override
= default;
1970 VP_CLASSOF_IMPL(VPDef::VPReplicateSC
)
1972 /// Generate replicas of the desired Ingredient. Replicas will be generated
1973 /// for all parts and lanes unless a specific part and lane are specified in
1975 void execute(VPTransformState
&State
) override
;
1977 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1978 /// Print the recipe.
1979 void print(raw_ostream
&O
, const Twine
&Indent
,
1980 VPSlotTracker
&SlotTracker
) const override
;
1983 bool isUniform() const { return IsUniform
; }
1985 bool isPredicated() const { return IsPredicated
; }
1987 /// Returns true if the recipe only uses the first lane of operand \p Op.
1988 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
1989 assert(is_contained(operands(), Op
) &&
1990 "Op must be an operand of the recipe");
1994 /// Returns true if the recipe uses scalars of operand \p Op.
1995 bool usesScalars(const VPValue
*Op
) const override
{
1996 assert(is_contained(operands(), Op
) &&
1997 "Op must be an operand of the recipe");
2001 /// Returns true if the recipe is used by a widened recipe via an intervening
2002 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2004 bool shouldPack() const;
2006 /// Return the mask of a predicated VPReplicateRecipe.
2007 VPValue
*getMask() {
2008 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2009 return getOperand(getNumOperands() - 1);
2013 /// A recipe for generating conditional branches on the bits of a mask.
2014 class VPBranchOnMaskRecipe
: public VPRecipeBase
{
2016 VPBranchOnMaskRecipe(VPValue
*BlockInMask
)
2017 : VPRecipeBase(VPDef::VPBranchOnMaskSC
, {}) {
2018 if (BlockInMask
) // nullptr means all-one mask.
2019 addOperand(BlockInMask
);
2022 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC
)
2024 /// Generate the extraction of the appropriate bit from the block mask and the
2025 /// conditional branch.
2026 void execute(VPTransformState
&State
) override
;
2028 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2029 /// Print the recipe.
2030 void print(raw_ostream
&O
, const Twine
&Indent
,
2031 VPSlotTracker
&SlotTracker
) const override
{
2032 O
<< Indent
<< "BRANCH-ON-MASK ";
2033 if (VPValue
*Mask
= getMask())
2034 Mask
->printAsOperand(O
, SlotTracker
);
2040 /// Return the mask used by this recipe. Note that a full mask is represented
2042 VPValue
*getMask() const {
2043 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2044 // Mask is optional.
2045 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2048 /// Returns true if the recipe uses scalars of operand \p Op.
2049 bool usesScalars(const VPValue
*Op
) const override
{
2050 assert(is_contained(operands(), Op
) &&
2051 "Op must be an operand of the recipe");
2056 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2057 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
2058 /// order to merge values that are set under such a branch and feed their uses.
2059 /// The phi nodes can be scalar or vector depending on the users of the value.
2060 /// This recipe works in concert with VPBranchOnMaskRecipe.
2061 class VPPredInstPHIRecipe
: public VPSingleDefRecipe
{
2063 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2064 /// nodes after merging back from a Branch-on-Mask.
2065 VPPredInstPHIRecipe(VPValue
*PredV
)
2066 : VPSingleDefRecipe(VPDef::VPPredInstPHISC
, PredV
) {}
2067 ~VPPredInstPHIRecipe() override
= default;
2069 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC
)
2071 /// Generates phi nodes for live-outs as needed to retain SSA form.
2072 void execute(VPTransformState
&State
) override
;
2074 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2075 /// Print the recipe.
2076 void print(raw_ostream
&O
, const Twine
&Indent
,
2077 VPSlotTracker
&SlotTracker
) const override
;
2080 /// Returns true if the recipe uses scalars of operand \p Op.
2081 bool usesScalars(const VPValue
*Op
) const override
{
2082 assert(is_contained(operands(), Op
) &&
2083 "Op must be an operand of the recipe");
2088 /// A Recipe for widening load/store operations.
2089 /// The recipe uses the following VPValues:
2090 /// - For load: Address, optional mask
2091 /// - For store: Address, stored value, optional mask
2092 /// TODO: We currently execute only per-part unless a specific instance is
2094 class VPWidenMemoryInstructionRecipe
: public VPRecipeBase
{
2095 Instruction
&Ingredient
;
2097 // Whether the loaded-from / stored-to addresses are consecutive.
2100 // Whether the consecutive loaded/stored addresses are in reverse order.
2103 void setMask(VPValue
*Mask
) {
2109 bool isMasked() const {
2110 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
2114 VPWidenMemoryInstructionRecipe(LoadInst
&Load
, VPValue
*Addr
, VPValue
*Mask
,
2115 bool Consecutive
, bool Reverse
)
2116 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC
, {Addr
}),
2117 Ingredient(Load
), Consecutive(Consecutive
), Reverse(Reverse
) {
2118 assert((Consecutive
|| !Reverse
) && "Reverse implies consecutive");
2119 new VPValue(this, &Load
);
2123 VPWidenMemoryInstructionRecipe(StoreInst
&Store
, VPValue
*Addr
,
2124 VPValue
*StoredValue
, VPValue
*Mask
,
2125 bool Consecutive
, bool Reverse
)
2126 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC
, {Addr
, StoredValue
}),
2127 Ingredient(Store
), Consecutive(Consecutive
), Reverse(Reverse
) {
2128 assert((Consecutive
|| !Reverse
) && "Reverse implies consecutive");
2132 VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC
)
2134 /// Return the address accessed by this recipe.
2135 VPValue
*getAddr() const {
2136 return getOperand(0); // Address is the 1st, mandatory operand.
2139 /// Return the mask used by this recipe. Note that a full mask is represented
2141 VPValue
*getMask() const {
2142 // Mask is optional and therefore the last operand.
2143 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2146 /// Returns true if this recipe is a store.
2147 bool isStore() const { return isa
<StoreInst
>(Ingredient
); }
2149 /// Return the address accessed by this recipe.
2150 VPValue
*getStoredValue() const {
2151 assert(isStore() && "Stored value only available for store instructions");
2152 return getOperand(1); // Stored value is the 2nd, mandatory operand.
2155 // Return whether the loaded-from / stored-to addresses are consecutive.
2156 bool isConsecutive() const { return Consecutive
; }
2158 // Return whether the consecutive loaded/stored addresses are in reverse
2160 bool isReverse() const { return Reverse
; }
2162 /// Generate the wide load/store.
2163 void execute(VPTransformState
&State
) override
;
2165 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2166 /// Print the recipe.
2167 void print(raw_ostream
&O
, const Twine
&Indent
,
2168 VPSlotTracker
&SlotTracker
) const override
;
2171 /// Returns true if the recipe only uses the first lane of operand \p Op.
2172 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2173 assert(is_contained(operands(), Op
) &&
2174 "Op must be an operand of the recipe");
2176 // Widened, consecutive memory operations only demand the first lane of
2177 // their address, unless the same operand is also stored. That latter can
2178 // happen with opaque pointers.
2179 return Op
== getAddr() && isConsecutive() &&
2180 (!isStore() || Op
!= getStoredValue());
2183 Instruction
&getIngredient() const { return Ingredient
; }
2186 /// Recipe to expand a SCEV expression.
2187 class VPExpandSCEVRecipe
: public VPSingleDefRecipe
{
2189 ScalarEvolution
&SE
;
2192 VPExpandSCEVRecipe(const SCEV
*Expr
, ScalarEvolution
&SE
)
2193 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC
, {}), Expr(Expr
), SE(SE
) {}
2195 ~VPExpandSCEVRecipe() override
= default;
2197 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC
)
2199 /// Generate a canonical vector induction variable of the vector loop, with
2200 void execute(VPTransformState
&State
) override
;
2202 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2203 /// Print the recipe.
2204 void print(raw_ostream
&O
, const Twine
&Indent
,
2205 VPSlotTracker
&SlotTracker
) const override
;
2208 const SCEV
*getSCEV() const { return Expr
; }
2211 /// Canonical scalar induction phi of the vector loop. Starting at the specified
2212 /// start value (either 0 or the resume value when vectorizing the epilogue
2213 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2214 /// canonical induction variable.
2215 class VPCanonicalIVPHIRecipe
: public VPHeaderPHIRecipe
{
2217 VPCanonicalIVPHIRecipe(VPValue
*StartV
, DebugLoc DL
)
2218 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC
, nullptr, StartV
, DL
) {}
2220 ~VPCanonicalIVPHIRecipe() override
= default;
2222 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC
)
2224 static inline bool classof(const VPHeaderPHIRecipe
*D
) {
2225 return D
->getVPDefID() == VPDef::VPCanonicalIVPHISC
;
2228 /// Generate the canonical scalar induction phi of the vector loop.
2229 void execute(VPTransformState
&State
) override
;
2231 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2232 /// Print the recipe.
2233 void print(raw_ostream
&O
, const Twine
&Indent
,
2234 VPSlotTracker
&SlotTracker
) const override
;
2237 /// Returns the scalar type of the induction.
2238 Type
*getScalarType() const {
2239 return getStartValue()->getLiveInIRValue()->getType();
2242 /// Returns true if the recipe only uses the first lane of operand \p Op.
2243 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2244 assert(is_contained(operands(), Op
) &&
2245 "Op must be an operand of the recipe");
2249 /// Returns true if the recipe only uses the first part of operand \p Op.
2250 bool onlyFirstPartUsed(const VPValue
*Op
) const override
{
2251 assert(is_contained(operands(), Op
) &&
2252 "Op must be an operand of the recipe");
2256 /// Check if the induction described by \p Kind, /p Start and \p Step is
2257 /// canonical, i.e. has the same start, step (of 1), and type as the
2259 bool isCanonical(InductionDescriptor::InductionKind Kind
, VPValue
*Start
,
2260 VPValue
*Step
, Type
*Ty
) const;
2263 /// A recipe for generating the active lane mask for the vector loop that is
2264 /// used to predicate the vector operations.
2265 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2266 /// remove VPActiveLaneMaskPHIRecipe.
2267 class VPActiveLaneMaskPHIRecipe
: public VPHeaderPHIRecipe
{
2269 VPActiveLaneMaskPHIRecipe(VPValue
*StartMask
, DebugLoc DL
)
2270 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC
, nullptr, StartMask
,
2273 ~VPActiveLaneMaskPHIRecipe() override
= default;
2275 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC
)
2277 static inline bool classof(const VPHeaderPHIRecipe
*D
) {
2278 return D
->getVPDefID() == VPDef::VPActiveLaneMaskPHISC
;
2281 /// Generate the active lane mask phi of the vector loop.
2282 void execute(VPTransformState
&State
) override
;
2284 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2285 /// Print the recipe.
2286 void print(raw_ostream
&O
, const Twine
&Indent
,
2287 VPSlotTracker
&SlotTracker
) const override
;
2291 /// A Recipe for widening the canonical induction variable of the vector loop.
2292 class VPWidenCanonicalIVRecipe
: public VPSingleDefRecipe
{
2294 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe
*CanonicalIV
)
2295 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC
, {CanonicalIV
}) {}
2297 ~VPWidenCanonicalIVRecipe() override
= default;
2299 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC
)
2301 /// Generate a canonical vector induction variable of the vector loop, with
2302 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2303 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2304 void execute(VPTransformState
&State
) override
;
2306 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2307 /// Print the recipe.
2308 void print(raw_ostream
&O
, const Twine
&Indent
,
2309 VPSlotTracker
&SlotTracker
) const override
;
2312 /// Returns the scalar type of the induction.
2313 const Type
*getScalarType() const {
2314 return cast
<VPCanonicalIVPHIRecipe
>(getOperand(0)->getDefiningRecipe())
2319 /// A recipe for converting the canonical IV value to the corresponding value of
2320 /// an IV with different start and step values, using Start + CanonicalIV *
2322 class VPDerivedIVRecipe
: public VPSingleDefRecipe
{
2323 /// If not nullptr, the result of the induction will get truncated to
2325 Type
*TruncResultTy
;
2327 /// Kind of the induction.
2328 const InductionDescriptor::InductionKind Kind
;
2329 /// If not nullptr, the floating point induction binary operator. Must be set
2330 /// for floating point inductions.
2331 const FPMathOperator
*FPBinOp
;
2334 VPDerivedIVRecipe(const InductionDescriptor
&IndDesc
, VPValue
*Start
,
2335 VPCanonicalIVPHIRecipe
*CanonicalIV
, VPValue
*Step
,
2336 Type
*TruncResultTy
)
2337 : VPSingleDefRecipe(VPDef::VPDerivedIVSC
, {Start
, CanonicalIV
, Step
}),
2338 TruncResultTy(TruncResultTy
), Kind(IndDesc
.getKind()),
2339 FPBinOp(dyn_cast_or_null
<FPMathOperator
>(IndDesc
.getInductionBinOp())) {
2342 ~VPDerivedIVRecipe() override
= default;
2344 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC
)
2346 /// Generate the transformed value of the induction at offset StartValue (1.
2347 /// operand) + IV (2. operand) * StepValue (3, operand).
2348 void execute(VPTransformState
&State
) override
;
2350 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2351 /// Print the recipe.
2352 void print(raw_ostream
&O
, const Twine
&Indent
,
2353 VPSlotTracker
&SlotTracker
) const override
;
2356 Type
*getScalarType() const {
2357 return TruncResultTy
? TruncResultTy
2358 : getStartValue()->getLiveInIRValue()->getType();
2361 VPValue
*getStartValue() const { return getOperand(0); }
2362 VPValue
*getCanonicalIV() const { return getOperand(1); }
2363 VPValue
*getStepValue() const { return getOperand(2); }
2365 /// Returns true if the recipe only uses the first lane of operand \p Op.
2366 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2367 assert(is_contained(operands(), Op
) &&
2368 "Op must be an operand of the recipe");
2373 /// A recipe for handling phi nodes of integer and floating-point inductions,
2374 /// producing their scalar values.
2375 class VPScalarIVStepsRecipe
: public VPRecipeWithIRFlags
{
2376 Instruction::BinaryOps InductionOpcode
;
2379 VPScalarIVStepsRecipe(VPValue
*IV
, VPValue
*Step
,
2380 Instruction::BinaryOps Opcode
, FastMathFlags FMFs
)
2381 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC
,
2382 ArrayRef
<VPValue
*>({IV
, Step
}), FMFs
),
2383 InductionOpcode(Opcode
) {}
2385 VPScalarIVStepsRecipe(const InductionDescriptor
&IndDesc
, VPValue
*IV
,
2387 : VPScalarIVStepsRecipe(
2388 IV
, Step
, IndDesc
.getInductionOpcode(),
2389 dyn_cast_or_null
<FPMathOperator
>(IndDesc
.getInductionBinOp())
2390 ? IndDesc
.getInductionBinOp()->getFastMathFlags()
2391 : FastMathFlags()) {}
2393 ~VPScalarIVStepsRecipe() override
= default;
2395 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC
)
2397 /// Generate the scalarized versions of the phi node as needed by their users.
2398 void execute(VPTransformState
&State
) override
;
2400 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2401 /// Print the recipe.
2402 void print(raw_ostream
&O
, const Twine
&Indent
,
2403 VPSlotTracker
&SlotTracker
) const override
;
2406 VPValue
*getStepValue() const { return getOperand(1); }
2408 /// Returns true if the recipe only uses the first lane of operand \p Op.
2409 bool onlyFirstLaneUsed(const VPValue
*Op
) const override
{
2410 assert(is_contained(operands(), Op
) &&
2411 "Op must be an operand of the recipe");
2416 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2417 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
2418 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2419 class VPBasicBlock
: public VPBlockBase
{
2421 using RecipeListTy
= iplist
<VPRecipeBase
>;
2424 /// The VPRecipes held in the order of output instructions to generate.
2425 RecipeListTy Recipes
;
2428 VPBasicBlock(const Twine
&Name
= "", VPRecipeBase
*Recipe
= nullptr)
2429 : VPBlockBase(VPBasicBlockSC
, Name
.str()) {
2431 appendRecipe(Recipe
);
2434 ~VPBasicBlock() override
{
2435 while (!Recipes
.empty())
2439 /// Instruction iterators...
2440 using iterator
= RecipeListTy::iterator
;
2441 using const_iterator
= RecipeListTy::const_iterator
;
2442 using reverse_iterator
= RecipeListTy::reverse_iterator
;
2443 using const_reverse_iterator
= RecipeListTy::const_reverse_iterator
;
2445 //===--------------------------------------------------------------------===//
2446 /// Recipe iterator methods
2448 inline iterator
begin() { return Recipes
.begin(); }
2449 inline const_iterator
begin() const { return Recipes
.begin(); }
2450 inline iterator
end() { return Recipes
.end(); }
2451 inline const_iterator
end() const { return Recipes
.end(); }
2453 inline reverse_iterator
rbegin() { return Recipes
.rbegin(); }
2454 inline const_reverse_iterator
rbegin() const { return Recipes
.rbegin(); }
2455 inline reverse_iterator
rend() { return Recipes
.rend(); }
2456 inline const_reverse_iterator
rend() const { return Recipes
.rend(); }
2458 inline size_t size() const { return Recipes
.size(); }
2459 inline bool empty() const { return Recipes
.empty(); }
2460 inline const VPRecipeBase
&front() const { return Recipes
.front(); }
2461 inline VPRecipeBase
&front() { return Recipes
.front(); }
2462 inline const VPRecipeBase
&back() const { return Recipes
.back(); }
2463 inline VPRecipeBase
&back() { return Recipes
.back(); }
2465 /// Returns a reference to the list of recipes.
2466 RecipeListTy
&getRecipeList() { return Recipes
; }
2468 /// Returns a pointer to a member of the recipe list.
2469 static RecipeListTy
VPBasicBlock::*getSublistAccess(VPRecipeBase
*) {
2470 return &VPBasicBlock::Recipes
;
2473 /// Method to support type inquiry through isa, cast, and dyn_cast.
2474 static inline bool classof(const VPBlockBase
*V
) {
2475 return V
->getVPBlockID() == VPBlockBase::VPBasicBlockSC
;
2478 void insert(VPRecipeBase
*Recipe
, iterator InsertPt
) {
2479 assert(Recipe
&& "No recipe to append.");
2480 assert(!Recipe
->Parent
&& "Recipe already in VPlan");
2481 Recipe
->Parent
= this;
2482 Recipes
.insert(InsertPt
, Recipe
);
2485 /// Augment the existing recipes of a VPBasicBlock with an additional
2486 /// \p Recipe as the last recipe.
2487 void appendRecipe(VPRecipeBase
*Recipe
) { insert(Recipe
, end()); }
2489 /// The method which generates the output IR instructions that correspond to
2490 /// this VPBasicBlock, thereby "executing" the VPlan.
2491 void execute(VPTransformState
*State
) override
;
2493 /// Return the position of the first non-phi node recipe in the block.
2494 iterator
getFirstNonPhi();
2496 /// Returns an iterator range over the PHI-like recipes in the block.
2497 iterator_range
<iterator
> phis() {
2498 return make_range(begin(), getFirstNonPhi());
2501 void dropAllReferences(VPValue
*NewValue
) override
;
2503 /// Split current block at \p SplitAt by inserting a new block between the
2504 /// current block and its successors and moving all recipes starting at
2505 /// SplitAt to the new block. Returns the new block.
2506 VPBasicBlock
*splitAt(iterator SplitAt
);
2508 VPRegionBlock
*getEnclosingLoopRegion();
2510 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2511 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2512 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2514 /// Note that the numbering is applied to the whole VPlan, so printing
2515 /// individual blocks is consistent with the whole VPlan printing.
2516 void print(raw_ostream
&O
, const Twine
&Indent
,
2517 VPSlotTracker
&SlotTracker
) const override
;
2518 using VPBlockBase::print
; // Get the print(raw_stream &O) version.
2521 /// If the block has multiple successors, return the branch recipe terminating
2522 /// the block. If there are no or only a single successor, return nullptr;
2523 VPRecipeBase
*getTerminator();
2524 const VPRecipeBase
*getTerminator() const;
2526 /// Returns true if the block is exiting it's parent region.
2527 bool isExiting() const;
2530 /// Create an IR BasicBlock to hold the output instructions generated by this
2531 /// VPBasicBlock, and return it. Update the CFGState accordingly.
2532 BasicBlock
*createEmptyBasicBlock(VPTransformState::CFGState
&CFG
);
2535 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2536 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2537 /// A VPRegionBlock may indicate that its contents are to be replicated several
2538 /// times. This is designed to support predicated scalarization, in which a
2539 /// scalar if-then code structure needs to be generated VF * UF times. Having
2540 /// this replication indicator helps to keep a single model for multiple
2541 /// candidate VF's. The actual replication takes place only once the desired VF
2542 /// and UF have been determined.
2543 class VPRegionBlock
: public VPBlockBase
{
2544 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2547 /// Hold the Single Exiting block of the SESE region modelled by the
2549 VPBlockBase
*Exiting
;
2551 /// An indicator whether this region is to generate multiple replicated
2552 /// instances of output IR corresponding to its VPBlockBases.
2556 VPRegionBlock(VPBlockBase
*Entry
, VPBlockBase
*Exiting
,
2557 const std::string
&Name
= "", bool IsReplicator
= false)
2558 : VPBlockBase(VPRegionBlockSC
, Name
), Entry(Entry
), Exiting(Exiting
),
2559 IsReplicator(IsReplicator
) {
2560 assert(Entry
->getPredecessors().empty() && "Entry block has predecessors.");
2561 assert(Exiting
->getSuccessors().empty() && "Exit block has successors.");
2562 Entry
->setParent(this);
2563 Exiting
->setParent(this);
2565 VPRegionBlock(const std::string
&Name
= "", bool IsReplicator
= false)
2566 : VPBlockBase(VPRegionBlockSC
, Name
), Entry(nullptr), Exiting(nullptr),
2567 IsReplicator(IsReplicator
) {}
2569 ~VPRegionBlock() override
{
2572 Entry
->dropAllReferences(&DummyValue
);
2577 /// Method to support type inquiry through isa, cast, and dyn_cast.
2578 static inline bool classof(const VPBlockBase
*V
) {
2579 return V
->getVPBlockID() == VPBlockBase::VPRegionBlockSC
;
2582 const VPBlockBase
*getEntry() const { return Entry
; }
2583 VPBlockBase
*getEntry() { return Entry
; }
2585 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2586 /// EntryBlock must have no predecessors.
2587 void setEntry(VPBlockBase
*EntryBlock
) {
2588 assert(EntryBlock
->getPredecessors().empty() &&
2589 "Entry block cannot have predecessors.");
2591 EntryBlock
->setParent(this);
2594 const VPBlockBase
*getExiting() const { return Exiting
; }
2595 VPBlockBase
*getExiting() { return Exiting
; }
2597 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2598 /// ExitingBlock must have no successors.
2599 void setExiting(VPBlockBase
*ExitingBlock
) {
2600 assert(ExitingBlock
->getSuccessors().empty() &&
2601 "Exit block cannot have successors.");
2602 Exiting
= ExitingBlock
;
2603 ExitingBlock
->setParent(this);
2606 /// Returns the pre-header VPBasicBlock of the loop region.
2607 VPBasicBlock
*getPreheaderVPBB() {
2608 assert(!isReplicator() && "should only get pre-header of loop regions");
2609 return getSinglePredecessor()->getExitingBasicBlock();
2612 /// An indicator whether this region is to generate multiple replicated
2613 /// instances of output IR corresponding to its VPBlockBases.
2614 bool isReplicator() const { return IsReplicator
; }
2616 /// The method which generates the output IR instructions that correspond to
2617 /// this VPRegionBlock, thereby "executing" the VPlan.
2618 void execute(VPTransformState
*State
) override
;
2620 void dropAllReferences(VPValue
*NewValue
) override
;
2622 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2623 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
2624 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
2625 /// consequtive numbers.
2627 /// Note that the numbering is applied to the whole VPlan, so printing
2628 /// individual regions is consistent with the whole VPlan printing.
2629 void print(raw_ostream
&O
, const Twine
&Indent
,
2630 VPSlotTracker
&SlotTracker
) const override
;
2631 using VPBlockBase::print
; // Get the print(raw_stream &O) version.
2635 /// VPlan models a candidate for vectorization, encoding various decisions take
2636 /// to produce efficient output IR, including which branches, basic-blocks and
2637 /// output IR instructions to generate, and their cost. VPlan holds a
2638 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2641 friend class VPlanPrinter
;
2642 friend class VPSlotTracker
;
2644 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
2645 /// preheader of the vector loop.
2646 VPBasicBlock
*Entry
;
2648 /// VPBasicBlock corresponding to the original preheader. Used to place
2649 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
2650 /// rest of VPlan execution.
2651 VPBasicBlock
*Preheader
;
2653 /// Holds the VFs applicable to this VPlan.
2654 SmallSetVector
<ElementCount
, 2> VFs
;
2656 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
2658 SmallSetVector
<unsigned, 2> UFs
;
2660 /// Holds the name of the VPlan, for printing.
2663 /// Represents the trip count of the original loop, for folding
2665 VPValue
*TripCount
= nullptr;
2667 /// Represents the backedge taken count of the original loop, for folding
2668 /// the tail. It equals TripCount - 1.
2669 VPValue
*BackedgeTakenCount
= nullptr;
2671 /// Represents the vector trip count.
2672 VPValue VectorTripCount
;
2674 /// Represents the loop-invariant VF * UF of the vector loop region.
2677 /// Holds a mapping between Values and their corresponding VPValue inside
2679 Value2VPValueTy Value2VPValue
;
2681 /// Contains all the external definitions created for this VPlan. External
2682 /// definitions are VPValues that hold a pointer to their underlying IR.
2683 SmallVector
<VPValue
*, 16> VPLiveInsToFree
;
2685 /// Indicates whether it is safe use the Value2VPValue mapping or if the
2686 /// mapping cannot be used any longer, because it is stale.
2687 bool Value2VPValueEnabled
= true;
2689 /// Values used outside the plan.
2690 MapVector
<PHINode
*, VPLiveOut
*> LiveOuts
;
2692 /// Mapping from SCEVs to the VPValues representing their expansions.
2693 /// NOTE: This mapping is temporary and will be removed once all users have
2694 /// been modeled in VPlan directly.
2695 DenseMap
<const SCEV
*, VPValue
*> SCEVToExpansion
;
2698 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
2699 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
2700 /// be disconnected, as the bypass blocks between them are not yet modeled in
2702 VPlan(VPBasicBlock
*Preheader
, VPValue
*TC
, VPBasicBlock
*Entry
)
2703 : VPlan(Preheader
, Entry
) {
2707 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
2708 /// the plan. At the moment, \p Preheader and \p Entry need to be
2709 /// disconnected, as the bypass blocks between them are not yet modeled in
2711 VPlan(VPBasicBlock
*Preheader
, VPBasicBlock
*Entry
)
2712 : Entry(Entry
), Preheader(Preheader
) {
2713 Entry
->setPlan(this);
2714 Preheader
->setPlan(this);
2715 assert(Preheader
->getNumSuccessors() == 0 &&
2716 Preheader
->getNumPredecessors() == 0 &&
2717 "preheader must be disconnected");
2722 /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
2723 /// original scalar pre-header) which contains SCEV expansions that need to
2724 /// happen before the CFG is modified; a VPBasicBlock for the vector
2725 /// pre-header, followed by a region for the vector loop, followed by the
2726 /// middle VPBasicBlock.
2727 static VPlanPtr
createInitialVPlan(const SCEV
*TripCount
,
2728 ScalarEvolution
&PSE
);
2730 /// Prepare the plan for execution, setting up the required live-in values.
2731 void prepareToExecute(Value
*TripCount
, Value
*VectorTripCount
,
2732 Value
*CanonicalIVStartValue
, VPTransformState
&State
);
2734 /// Generate the IR code for this VPlan.
2735 void execute(VPTransformState
*State
);
2737 VPBasicBlock
*getEntry() { return Entry
; }
2738 const VPBasicBlock
*getEntry() const { return Entry
; }
2740 /// The trip count of the original loop.
2741 VPValue
*getTripCount() const {
2742 assert(TripCount
&& "trip count needs to be set before accessing it");
2746 /// The backedge taken count of the original loop.
2747 VPValue
*getOrCreateBackedgeTakenCount() {
2748 if (!BackedgeTakenCount
)
2749 BackedgeTakenCount
= new VPValue();
2750 return BackedgeTakenCount
;
2753 /// The vector trip count.
2754 VPValue
&getVectorTripCount() { return VectorTripCount
; }
2756 /// Returns VF * UF of the vector loop region.
2757 VPValue
&getVFxUF() { return VFxUF
; }
2759 /// Mark the plan to indicate that using Value2VPValue is not safe any
2760 /// longer, because it may be stale.
2761 void disableValue2VPValue() { Value2VPValueEnabled
= false; }
2763 void addVF(ElementCount VF
) { VFs
.insert(VF
); }
2765 void setVF(ElementCount VF
) {
2766 assert(hasVF(VF
) && "Cannot set VF not already in plan");
2771 bool hasVF(ElementCount VF
) { return VFs
.count(VF
); }
2773 bool hasScalarVFOnly() const { return VFs
.size() == 1 && VFs
[0].isScalar(); }
2775 bool hasUF(unsigned UF
) const { return UFs
.empty() || UFs
.contains(UF
); }
2777 void setUF(unsigned UF
) {
2778 assert(hasUF(UF
) && "Cannot set the UF not already in plan");
2783 /// Return a string with the name of the plan and the applicable VFs and UFs.
2784 std::string
getName() const;
2786 void setName(const Twine
&newName
) { Name
= newName
.str(); }
2788 void addVPValue(Value
*V
, VPValue
*VPV
) {
2789 assert((Value2VPValueEnabled
|| VPV
->isLiveIn()) &&
2790 "Value2VPValue mapping may be out of date!");
2791 assert(V
&& "Trying to add a null Value to VPlan");
2792 assert(!Value2VPValue
.count(V
) && "Value already exists in VPlan");
2793 Value2VPValue
[V
] = VPV
;
2796 /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2797 /// /// checking whether it is safe to query VPValues using IR Values.
2798 VPValue
*getVPValue(Value
*V
, bool OverrideAllowed
= false) {
2799 assert(V
&& "Trying to get the VPValue of a null Value");
2800 assert(Value2VPValue
.count(V
) && "Value does not exist in VPlan");
2801 assert((Value2VPValueEnabled
|| OverrideAllowed
||
2802 Value2VPValue
[V
]->isLiveIn()) &&
2803 "Value2VPValue mapping may be out of date!");
2804 return Value2VPValue
[V
];
2807 /// Gets the VPValue for \p V or adds a new live-in (if none exists yet) for
2809 VPValue
*getVPValueOrAddLiveIn(Value
*V
) {
2810 assert(V
&& "Trying to get or add the VPValue of a null Value");
2811 if (!Value2VPValue
.count(V
)) {
2812 VPValue
*VPV
= new VPValue(V
);
2813 VPLiveInsToFree
.push_back(VPV
);
2817 return getVPValue(V
);
2820 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2821 /// Print the live-ins of this VPlan to \p O.
2822 void printLiveIns(raw_ostream
&O
) const;
2824 /// Print this VPlan to \p O.
2825 void print(raw_ostream
&O
) const;
2827 /// Print this VPlan in DOT format to \p O.
2828 void printDOT(raw_ostream
&O
) const;
2830 /// Dump the plan to stderr (for debugging).
2831 LLVM_DUMP_METHOD
void dump() const;
2834 /// Returns a range mapping the values the range \p Operands to their
2835 /// corresponding VPValues.
2836 iterator_range
<mapped_iterator
<Use
*, std::function
<VPValue
*(Value
*)>>>
2837 mapToVPValues(User::op_range Operands
) {
2838 std::function
<VPValue
*(Value
*)> Fn
= [this](Value
*Op
) {
2839 return getVPValueOrAddLiveIn(Op
);
2841 return map_range(Operands
, Fn
);
2844 /// Returns the VPRegionBlock of the vector loop.
2845 VPRegionBlock
*getVectorLoopRegion() {
2846 return cast
<VPRegionBlock
>(getEntry()->getSingleSuccessor());
2848 const VPRegionBlock
*getVectorLoopRegion() const {
2849 return cast
<VPRegionBlock
>(getEntry()->getSingleSuccessor());
2852 /// Returns the canonical induction recipe of the vector loop.
2853 VPCanonicalIVPHIRecipe
*getCanonicalIV() {
2854 VPBasicBlock
*EntryVPBB
= getVectorLoopRegion()->getEntryBasicBlock();
2855 if (EntryVPBB
->empty()) {
2856 // VPlan native path.
2857 EntryVPBB
= cast
<VPBasicBlock
>(EntryVPBB
->getSingleSuccessor());
2859 return cast
<VPCanonicalIVPHIRecipe
>(&*EntryVPBB
->begin());
2862 void addLiveOut(PHINode
*PN
, VPValue
*V
);
2864 void removeLiveOut(PHINode
*PN
) {
2865 delete LiveOuts
[PN
];
2869 const MapVector
<PHINode
*, VPLiveOut
*> &getLiveOuts() const {
2873 VPValue
*getSCEVExpansion(const SCEV
*S
) const {
2874 return SCEVToExpansion
.lookup(S
);
2877 void addSCEVExpansion(const SCEV
*S
, VPValue
*V
) {
2878 assert(!SCEVToExpansion
.contains(S
) && "SCEV already expanded");
2879 SCEVToExpansion
[S
] = V
;
2882 /// \return The block corresponding to the original preheader.
2883 VPBasicBlock
*getPreheader() { return Preheader
; }
2884 const VPBasicBlock
*getPreheader() const { return Preheader
; }
2887 /// Add to the given dominator tree the header block and every new basic block
2888 /// that was created between it and the latch block, inclusive.
2889 static void updateDominatorTree(DominatorTree
*DT
, BasicBlock
*LoopLatchBB
,
2890 BasicBlock
*LoopPreHeaderBB
,
2891 BasicBlock
*LoopExitBB
);
2894 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2895 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2896 /// indented and follows the dot format.
2897 class VPlanPrinter
{
2901 unsigned TabWidth
= 2;
2904 SmallDenseMap
<const VPBlockBase
*, unsigned> BlockID
;
2906 VPSlotTracker SlotTracker
;
2908 /// Handle indentation.
2909 void bumpIndent(int b
) { Indent
= std::string((Depth
+= b
) * TabWidth
, ' '); }
2911 /// Print a given \p Block of the Plan.
2912 void dumpBlock(const VPBlockBase
*Block
);
2914 /// Print the information related to the CFG edges going out of a given
2915 /// \p Block, followed by printing the successor blocks themselves.
2916 void dumpEdges(const VPBlockBase
*Block
);
2918 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2919 /// its successor blocks.
2920 void dumpBasicBlock(const VPBasicBlock
*BasicBlock
);
2922 /// Print a given \p Region of the Plan.
2923 void dumpRegion(const VPRegionBlock
*Region
);
2925 unsigned getOrCreateBID(const VPBlockBase
*Block
) {
2926 return BlockID
.count(Block
) ? BlockID
[Block
] : BlockID
[Block
] = BID
++;
2929 Twine
getOrCreateName(const VPBlockBase
*Block
);
2931 Twine
getUID(const VPBlockBase
*Block
);
2933 /// Print the information related to a CFG edge between two VPBlockBases.
2934 void drawEdge(const VPBlockBase
*From
, const VPBlockBase
*To
, bool Hidden
,
2935 const Twine
&Label
);
2938 VPlanPrinter(raw_ostream
&O
, const VPlan
&P
)
2939 : OS(O
), Plan(P
), SlotTracker(&P
) {}
2941 LLVM_DUMP_METHOD
void dump();
2944 struct VPlanIngredient
{
2947 VPlanIngredient(const Value
*V
) : V(V
) {}
2949 void print(raw_ostream
&O
) const;
2952 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VPlanIngredient
&I
) {
2957 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VPlan
&Plan
) {
2963 //===----------------------------------------------------------------------===//
2965 //===----------------------------------------------------------------------===//
2967 /// Class that provides utilities for VPBlockBases in VPlan.
2968 class VPBlockUtils
{
2970 VPBlockUtils() = delete;
2972 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2973 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2974 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2975 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2976 /// have neither successors nor predecessors.
2977 static void insertBlockAfter(VPBlockBase
*NewBlock
, VPBlockBase
*BlockPtr
) {
2978 assert(NewBlock
->getSuccessors().empty() &&
2979 NewBlock
->getPredecessors().empty() &&
2980 "Can't insert new block with predecessors or successors.");
2981 NewBlock
->setParent(BlockPtr
->getParent());
2982 SmallVector
<VPBlockBase
*> Succs(BlockPtr
->successors());
2983 for (VPBlockBase
*Succ
: Succs
) {
2984 disconnectBlocks(BlockPtr
, Succ
);
2985 connectBlocks(NewBlock
, Succ
);
2987 connectBlocks(BlockPtr
, NewBlock
);
2990 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2991 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2992 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2993 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2994 /// and \p IfTrue and \p IfFalse must have neither successors nor
2996 static void insertTwoBlocksAfter(VPBlockBase
*IfTrue
, VPBlockBase
*IfFalse
,
2997 VPBlockBase
*BlockPtr
) {
2998 assert(IfTrue
->getSuccessors().empty() &&
2999 "Can't insert IfTrue with successors.");
3000 assert(IfFalse
->getSuccessors().empty() &&
3001 "Can't insert IfFalse with successors.");
3002 BlockPtr
->setTwoSuccessors(IfTrue
, IfFalse
);
3003 IfTrue
->setPredecessors({BlockPtr
});
3004 IfFalse
->setPredecessors({BlockPtr
});
3005 IfTrue
->setParent(BlockPtr
->getParent());
3006 IfFalse
->setParent(BlockPtr
->getParent());
3009 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3010 /// the successors of \p From and \p From to the predecessors of \p To. Both
3011 /// VPBlockBases must have the same parent, which can be null. Both
3012 /// VPBlockBases can be already connected to other VPBlockBases.
3013 static void connectBlocks(VPBlockBase
*From
, VPBlockBase
*To
) {
3014 assert((From
->getParent() == To
->getParent()) &&
3015 "Can't connect two block with different parents");
3016 assert(From
->getNumSuccessors() < 2 &&
3017 "Blocks can't have more than two successors.");
3018 From
->appendSuccessor(To
);
3019 To
->appendPredecessor(From
);
3022 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3023 /// from the successors of \p From and \p From from the predecessors of \p To.
3024 static void disconnectBlocks(VPBlockBase
*From
, VPBlockBase
*To
) {
3025 assert(To
&& "Successor to disconnect is null.");
3026 From
->removeSuccessor(To
);
3027 To
->removePredecessor(From
);
3030 /// Return an iterator range over \p Range which only includes \p BlockTy
3031 /// blocks. The accesses are casted to \p BlockTy.
3032 template <typename BlockTy
, typename T
>
3033 static auto blocksOnly(const T
&Range
) {
3034 // Create BaseTy with correct const-ness based on BlockTy.
3035 using BaseTy
= std::conditional_t
<std::is_const
<BlockTy
>::value
,
3036 const VPBlockBase
, VPBlockBase
>;
3038 // We need to first create an iterator range over (const) BlocktTy & instead
3039 // of (const) BlockTy * for filter_range to work properly.
3041 map_range(Range
, [](BaseTy
*Block
) -> BaseTy
& { return *Block
; });
3042 auto Filter
= make_filter_range(
3043 Mapped
, [](BaseTy
&Block
) { return isa
<BlockTy
>(&Block
); });
3044 return map_range(Filter
, [](BaseTy
&Block
) -> BlockTy
* {
3045 return cast
<BlockTy
>(&Block
);
3050 class VPInterleavedAccessInfo
{
3051 DenseMap
<VPInstruction
*, InterleaveGroup
<VPInstruction
> *>
3054 /// Type for mapping of instruction based interleave groups to VPInstruction
3055 /// interleave groups
3056 using Old2NewTy
= DenseMap
<InterleaveGroup
<Instruction
> *,
3057 InterleaveGroup
<VPInstruction
> *>;
3059 /// Recursively \p Region and populate VPlan based interleave groups based on
3061 void visitRegion(VPRegionBlock
*Region
, Old2NewTy
&Old2New
,
3062 InterleavedAccessInfo
&IAI
);
3063 /// Recursively traverse \p Block and populate VPlan based interleave groups
3064 /// based on \p IAI.
3065 void visitBlock(VPBlockBase
*Block
, Old2NewTy
&Old2New
,
3066 InterleavedAccessInfo
&IAI
);
3069 VPInterleavedAccessInfo(VPlan
&Plan
, InterleavedAccessInfo
&IAI
);
3071 ~VPInterleavedAccessInfo() {
3072 SmallPtrSet
<InterleaveGroup
<VPInstruction
> *, 4> DelSet
;
3073 // Avoid releasing a pointer twice.
3074 for (auto &I
: InterleaveGroupMap
)
3075 DelSet
.insert(I
.second
);
3076 for (auto *Ptr
: DelSet
)
3080 /// Get the interleave group that \p Instr belongs to.
3082 /// \returns nullptr if doesn't have such group.
3083 InterleaveGroup
<VPInstruction
> *
3084 getInterleaveGroup(VPInstruction
*Instr
) const {
3085 return InterleaveGroupMap
.lookup(Instr
);
3089 /// Class that maps (parts of) an existing VPlan to trees of combined
3092 enum class OpMode
{ Failed
, Load
, Opcode
};
3094 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3096 struct BundleDenseMapInfo
{
3097 static SmallVector
<VPValue
*, 4> getEmptyKey() {
3098 return {reinterpret_cast<VPValue
*>(-1)};
3101 static SmallVector
<VPValue
*, 4> getTombstoneKey() {
3102 return {reinterpret_cast<VPValue
*>(-2)};
3105 static unsigned getHashValue(const SmallVector
<VPValue
*, 4> &V
) {
3106 return static_cast<unsigned>(hash_combine_range(V
.begin(), V
.end()));
3109 static bool isEqual(const SmallVector
<VPValue
*, 4> &LHS
,
3110 const SmallVector
<VPValue
*, 4> &RHS
) {
3115 /// Mapping of values in the original VPlan to a combined VPInstruction.
3116 DenseMap
<SmallVector
<VPValue
*, 4>, VPInstruction
*, BundleDenseMapInfo
>
3119 VPInterleavedAccessInfo
&IAI
;
3121 /// Basic block to operate on. For now, only instructions in a single BB are
3123 const VPBasicBlock
&BB
;
3125 /// Indicates whether we managed to combine all visited instructions or not.
3126 bool CompletelySLP
= true;
3128 /// Width of the widest combined bundle in bits.
3129 unsigned WidestBundleBits
= 0;
3131 using MultiNodeOpTy
=
3132 typename
std::pair
<VPInstruction
*, SmallVector
<VPValue
*, 4>>;
3134 // Input operand bundles for the current multi node. Each multi node operand
3135 // bundle contains values not matching the multi node's opcode. They will
3136 // be reordered in reorderMultiNodeOps, once we completed building a
3138 SmallVector
<MultiNodeOpTy
, 4> MultiNodeOps
;
3140 /// Indicates whether we are building a multi node currently.
3141 bool MultiNodeActive
= false;
3143 /// Check if we can vectorize Operands together.
3144 bool areVectorizable(ArrayRef
<VPValue
*> Operands
) const;
3146 /// Add combined instruction \p New for the bundle \p Operands.
3147 void addCombined(ArrayRef
<VPValue
*> Operands
, VPInstruction
*New
);
3149 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3150 VPInstruction
*markFailed();
3152 /// Reorder operands in the multi node to maximize sequential memory access
3153 /// and commutative operations.
3154 SmallVector
<MultiNodeOpTy
, 4> reorderMultiNodeOps();
3156 /// Choose the best candidate to use for the lane after \p Last. The set of
3157 /// candidates to choose from are values with an opcode matching \p Last's
3158 /// or loads consecutive to \p Last.
3159 std::pair
<OpMode
, VPValue
*> getBest(OpMode Mode
, VPValue
*Last
,
3160 SmallPtrSetImpl
<VPValue
*> &Candidates
,
3161 VPInterleavedAccessInfo
&IAI
);
3163 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3164 /// Print bundle \p Values to dbgs().
3165 void dumpBundle(ArrayRef
<VPValue
*> Values
);
3169 VPlanSlp(VPInterleavedAccessInfo
&IAI
, VPBasicBlock
&BB
) : IAI(IAI
), BB(BB
) {}
3171 ~VPlanSlp() = default;
3173 /// Tries to build an SLP tree rooted at \p Operands and returns a
3174 /// VPInstruction combining \p Operands, if they can be combined.
3175 VPInstruction
*buildGraph(ArrayRef
<VPValue
*> Operands
);
3177 /// Return the width of the widest combined bundle in bits.
3178 unsigned getWidestBundleBits() const { return WidestBundleBits
; }
3180 /// Return true if all visited instruction can be combined.
3181 bool isCompletelySLP() const { return CompletelySLP
; }
3186 /// Returns true if only the first lane of \p Def is used.
3187 bool onlyFirstLaneUsed(VPValue
*Def
);
3189 /// Returns true if only the first part of \p Def is used.
3190 bool onlyFirstPartUsed(VPValue
*Def
);
3192 /// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3193 /// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3194 /// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3195 /// pre-header already contains a recipe expanding \p Expr, return it. If not,
3196 /// create a new one.
3197 VPValue
*getOrCreateVPValueForSCEVExpr(VPlan
&Plan
, const SCEV
*Expr
,
3198 ScalarEvolution
&SE
);
3200 /// Returns true if \p VPV is uniform after vectorization.
3201 inline bool isUniformAfterVectorization(VPValue
*VPV
) {
3202 // A value defined outside the vector region must be uniform after
3203 // vectorization inside a vector region.
3204 if (VPV
->isDefinedOutsideVectorRegions())
3206 VPRecipeBase
*Def
= VPV
->getDefiningRecipe();
3207 assert(Def
&& "Must have definition for value defined inside vector region");
3208 if (auto Rep
= dyn_cast
<VPReplicateRecipe
>(Def
))
3209 return Rep
->isUniform();
3210 if (auto *GEP
= dyn_cast
<VPWidenGEPRecipe
>(Def
))
3211 return all_of(GEP
->operands(), isUniformAfterVectorization
);
3212 if (auto *VPI
= dyn_cast
<VPInstruction
>(Def
))
3213 return VPI
->getOpcode() == VPInstruction::ComputeReductionResult
;
3216 } // end namespace vputils
3218 } // end namespace llvm
3220 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H