1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 /// VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14 /// treated as proper graphs for generic algorithms;
15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16 /// within VPBasicBlocks;
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
23 //===----------------------------------------------------------------------===//
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
28 #include "VPlanLoopInfo.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DepthFirstIterator.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/SmallBitVector.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Twine.h"
39 #include "llvm/ADT/ilist.h"
40 #include "llvm/ADT/ilist_node.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/Support/InstructionCost.h"
54 class InnerLoopVectorizer
;
57 class RecurrenceDescriptor
;
64 /// Returns a calculation for the total number of elements for a given \p VF.
65 /// For fixed width vectors this value is a constant, whereas for scalable
66 /// vectors it is an expression determined at runtime.
67 Value
*getRuntimeVF(IRBuilder
<> &B
, Type
*Ty
, ElementCount VF
);
69 /// A range of powers-of-2 vectorization factors with fixed start and
70 /// adjustable end. The range includes start and excludes end, e.g.,:
71 /// [1, 9) = {1, 2, 4, 8}
74 const ElementCount Start
;
76 // Need not be a power of 2. If End <= Start range is empty.
79 bool isEmpty() const {
80 return End
.getKnownMinValue() <= Start
.getKnownMinValue();
83 VFRange(const ElementCount
&Start
, const ElementCount
&End
)
84 : Start(Start
), End(End
) {
85 assert(Start
.isScalable() == End
.isScalable() &&
86 "Both Start and End should have the same scalable flag");
87 assert(isPowerOf2_32(Start
.getKnownMinValue()) &&
88 "Expected Start to be a power of 2");
92 using VPlanPtr
= std::unique_ptr
<VPlan
>;
94 /// In what follows, the term "input IR" refers to code that is fed into the
95 /// vectorizer whereas the term "output IR" refers to code that is generated by
98 /// VPLane provides a way to access lanes in both fixed width and scalable
99 /// vectors, where for the latter the lane index sometimes needs calculating
100 /// as a runtime expression.
103 /// Kind describes how to interpret Lane.
104 enum class Kind
: uint8_t {
105 /// For First, Lane is the index into the first N elements of a
106 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
108 /// For ScalableLast, Lane is the offset from the start of the last
109 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
110 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
111 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
119 /// Indicates how the Lane should be interpreted, as described above.
123 VPLane(unsigned Lane
, Kind LaneKind
) : Lane(Lane
), LaneKind(LaneKind
) {}
125 static VPLane
getFirstLane() { return VPLane(0, VPLane::Kind::First
); }
127 static VPLane
getLastLaneForVF(const ElementCount
&VF
) {
128 unsigned LaneOffset
= VF
.getKnownMinValue() - 1;
131 // In this case 'LaneOffset' refers to the offset from the start of the
132 // last subvector with VF.getKnownMinValue() elements.
133 LaneKind
= VPLane::Kind::ScalableLast
;
135 LaneKind
= VPLane::Kind::First
;
136 return VPLane(LaneOffset
, LaneKind
);
139 /// Returns a compile-time known value for the lane index and asserts if the
140 /// lane can only be calculated at runtime.
141 unsigned getKnownLane() const {
142 assert(LaneKind
== Kind::First
);
146 /// Returns an expression describing the lane index that can be used at
148 Value
*getAsRuntimeExpr(IRBuilder
<> &Builder
, const ElementCount
&VF
) const;
150 /// Returns the Kind of lane offset.
151 Kind
getKind() const { return LaneKind
; }
153 /// Returns true if this is the first lane of the whole vector.
154 bool isFirstLane() const { return Lane
== 0 && LaneKind
== Kind::First
; }
156 /// Maps the lane to a cache index based on \p VF.
157 unsigned mapToCacheIndex(const ElementCount
&VF
) const {
159 case VPLane::Kind::ScalableLast
:
160 assert(VF
.isScalable() && Lane
< VF
.getKnownMinValue());
161 return VF
.getKnownMinValue() + Lane
;
163 assert(Lane
< VF
.getKnownMinValue());
168 /// Returns the maxmimum number of lanes that we are able to consider
169 /// caching for \p VF.
170 static unsigned getNumCachedLanes(const ElementCount
&VF
) {
171 return VF
.getKnownMinValue() * (VF
.isScalable() ? 2 : 1);
175 /// VPIteration represents a single point in the iteration space of the output
176 /// (vectorized and/or unrolled) IR loop.
183 VPIteration(unsigned Part
, unsigned Lane
,
184 VPLane::Kind Kind
= VPLane::Kind::First
)
185 : Part(Part
), Lane(Lane
, Kind
) {}
187 VPIteration(unsigned Part
, const VPLane
&Lane
) : Part(Part
), Lane(Lane
) {}
189 bool isFirstIteration() const { return Part
== 0 && Lane
.isFirstLane(); }
192 /// VPTransformState holds information passed down when "executing" a VPlan,
193 /// needed for generating the output IR.
194 struct VPTransformState
{
195 VPTransformState(ElementCount VF
, unsigned UF
, LoopInfo
*LI
,
196 DominatorTree
*DT
, IRBuilder
<> &Builder
,
197 InnerLoopVectorizer
*ILV
, VPlan
*Plan
)
198 : VF(VF
), UF(UF
), Instance(), LI(LI
), DT(DT
), Builder(Builder
), ILV(ILV
),
201 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
205 /// Hold the indices to generate specific scalar instructions. Null indicates
206 /// that all instances are to be generated, using either scalar or vector
208 Optional
<VPIteration
> Instance
;
211 /// A type for vectorized values in the new loop. Each value from the
212 /// original loop, when vectorized, is represented by UF vector values in
213 /// the new unrolled loop, where UF is the unroll factor.
214 typedef SmallVector
<Value
*, 2> PerPartValuesTy
;
216 DenseMap
<VPValue
*, PerPartValuesTy
> PerPartOutput
;
218 using ScalarsPerPartValuesTy
= SmallVector
<SmallVector
<Value
*, 4>, 2>;
219 DenseMap
<VPValue
*, ScalarsPerPartValuesTy
> PerPartScalars
;
222 /// Get the generated Value for a given VPValue and a given Part. Note that
223 /// as some Defs are still created by ILV and managed in its ValueMap, this
224 /// method will delegate the call to ILV in such cases in order to provide
225 /// callers a consistent API.
227 Value
*get(VPValue
*Def
, unsigned Part
);
229 /// Get the generated Value for a given VPValue and given Part and Lane.
230 Value
*get(VPValue
*Def
, const VPIteration
&Instance
);
232 bool hasVectorValue(VPValue
*Def
, unsigned Part
) {
233 auto I
= Data
.PerPartOutput
.find(Def
);
234 return I
!= Data
.PerPartOutput
.end() && Part
< I
->second
.size() &&
238 bool hasAnyVectorValue(VPValue
*Def
) const {
239 return Data
.PerPartOutput
.find(Def
) != Data
.PerPartOutput
.end();
242 bool hasScalarValue(VPValue
*Def
, VPIteration Instance
) {
243 auto I
= Data
.PerPartScalars
.find(Def
);
244 if (I
== Data
.PerPartScalars
.end())
246 unsigned CacheIdx
= Instance
.Lane
.mapToCacheIndex(VF
);
247 return Instance
.Part
< I
->second
.size() &&
248 CacheIdx
< I
->second
[Instance
.Part
].size() &&
249 I
->second
[Instance
.Part
][CacheIdx
];
252 /// Set the generated Value for a given VPValue and a given Part.
253 void set(VPValue
*Def
, Value
*V
, unsigned Part
) {
254 if (!Data
.PerPartOutput
.count(Def
)) {
255 DataState::PerPartValuesTy
Entry(UF
);
256 Data
.PerPartOutput
[Def
] = Entry
;
258 Data
.PerPartOutput
[Def
][Part
] = V
;
260 /// Reset an existing vector value for \p Def and a given \p Part.
261 void reset(VPValue
*Def
, Value
*V
, unsigned Part
) {
262 auto Iter
= Data
.PerPartOutput
.find(Def
);
263 assert(Iter
!= Data
.PerPartOutput
.end() &&
264 "need to overwrite existing value");
265 Iter
->second
[Part
] = V
;
268 /// Set the generated scalar \p V for \p Def and the given \p Instance.
269 void set(VPValue
*Def
, Value
*V
, const VPIteration
&Instance
) {
270 auto Iter
= Data
.PerPartScalars
.insert({Def
, {}});
271 auto &PerPartVec
= Iter
.first
->second
;
272 while (PerPartVec
.size() <= Instance
.Part
)
273 PerPartVec
.emplace_back();
274 auto &Scalars
= PerPartVec
[Instance
.Part
];
275 unsigned CacheIdx
= Instance
.Lane
.mapToCacheIndex(VF
);
276 while (Scalars
.size() <= CacheIdx
)
277 Scalars
.push_back(nullptr);
278 assert(!Scalars
[CacheIdx
] && "should overwrite existing value");
279 Scalars
[CacheIdx
] = V
;
282 /// Reset an existing scalar value for \p Def and a given \p Instance.
283 void reset(VPValue
*Def
, Value
*V
, const VPIteration
&Instance
) {
284 auto Iter
= Data
.PerPartScalars
.find(Def
);
285 assert(Iter
!= Data
.PerPartScalars
.end() &&
286 "need to overwrite existing value");
287 assert(Instance
.Part
< Iter
->second
.size() &&
288 "need to overwrite existing value");
289 unsigned CacheIdx
= Instance
.Lane
.mapToCacheIndex(VF
);
290 assert(CacheIdx
< Iter
->second
[Instance
.Part
].size() &&
291 "need to overwrite existing value");
292 Iter
->second
[Instance
.Part
][CacheIdx
] = V
;
295 /// Hold state information used when constructing the CFG of the output IR,
296 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
298 /// The previous VPBasicBlock visited. Initially set to null.
299 VPBasicBlock
*PrevVPBB
= nullptr;
301 /// The previous IR BasicBlock created or used. Initially set to the new
302 /// header BasicBlock.
303 BasicBlock
*PrevBB
= nullptr;
305 /// The last IR BasicBlock in the output IR. Set to the new latch
306 /// BasicBlock, used for placing the newly created BasicBlocks.
307 BasicBlock
*LastBB
= nullptr;
309 /// The IR BasicBlock that is the preheader of the vector loop in the output
311 /// FIXME: The vector preheader should also be modeled in VPlan, so any code
312 /// that needs to be added to the preheader gets directly generated by
313 /// VPlan. There should be no need to manage a pointer to the IR BasicBlock.
314 BasicBlock
*VectorPreHeader
= nullptr;
316 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
317 /// of replication, maps the BasicBlock of the last replica created.
318 SmallDenseMap
<VPBasicBlock
*, BasicBlock
*> VPBB2IRBB
;
320 /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
321 /// up at the end of vector code generation.
322 SmallVector
<VPBasicBlock
*, 8> VPBBsToFix
;
324 CFGState() = default;
327 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
330 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
333 /// Hold a reference to the IRBuilder used to generate output IR code.
334 IRBuilder
<> &Builder
;
336 VPValue2ValueTy VPValue2Value
;
338 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
339 Value
*CanonicalIV
= nullptr;
341 /// Hold the trip count of the scalar loop.
342 Value
*TripCount
= nullptr;
344 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
345 InnerLoopVectorizer
*ILV
;
347 /// Pointer to the VPlan code is generated for.
351 /// VPUsers instance used by VPBlockBase to manage CondBit and the block
352 /// predicate. Currently VPBlockUsers are used in VPBlockBase for historical
353 /// reasons, but in the future the only VPUsers should either be recipes or
354 /// live-outs.VPBlockBase uses.
355 struct VPBlockUser
: public VPUser
{
356 VPBlockUser() : VPUser({}, VPUserID::Block
) {}
358 VPValue
*getSingleOperandOrNull() {
359 if (getNumOperands() == 1)
360 return getOperand(0);
364 const VPValue
*getSingleOperandOrNull() const {
365 if (getNumOperands() == 1)
366 return getOperand(0);
371 void resetSingleOpUser(VPValue
*NewVal
) {
372 assert(getNumOperands() <= 1 && "Didn't expect more than one operand!");
374 if (getNumOperands() == 1)
379 if (getNumOperands() == 1)
380 setOperand(0, NewVal
);
386 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
387 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
389 friend class VPBlockUtils
;
391 const unsigned char SubclassID
; ///< Subclass identifier (for isa/dyn_cast).
393 /// An optional name for the block.
396 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
397 /// it is a topmost VPBlockBase.
398 VPRegionBlock
*Parent
= nullptr;
400 /// List of predecessor blocks.
401 SmallVector
<VPBlockBase
*, 1> Predecessors
;
403 /// List of successor blocks.
404 SmallVector
<VPBlockBase
*, 1> Successors
;
406 /// Successor selector managed by a VPUser. For blocks with zero or one
407 /// successors, there is no operand. Otherwise there is exactly one operand
408 /// which is the branch condition.
409 VPBlockUser CondBitUser
;
411 /// If the block is predicated, its predicate is stored as an operand of this
412 /// VPUser to maintain the def-use relations. Otherwise there is no operand
414 VPBlockUser PredicateUser
;
416 /// VPlan containing the block. Can only be set on the entry block of the
418 VPlan
*Plan
= nullptr;
420 /// Add \p Successor as the last successor to this block.
421 void appendSuccessor(VPBlockBase
*Successor
) {
422 assert(Successor
&& "Cannot add nullptr successor!");
423 Successors
.push_back(Successor
);
426 /// Add \p Predecessor as the last predecessor to this block.
427 void appendPredecessor(VPBlockBase
*Predecessor
) {
428 assert(Predecessor
&& "Cannot add nullptr predecessor!");
429 Predecessors
.push_back(Predecessor
);
432 /// Remove \p Predecessor from the predecessors of this block.
433 void removePredecessor(VPBlockBase
*Predecessor
) {
434 auto Pos
= find(Predecessors
, Predecessor
);
435 assert(Pos
&& "Predecessor does not exist");
436 Predecessors
.erase(Pos
);
439 /// Remove \p Successor from the successors of this block.
440 void removeSuccessor(VPBlockBase
*Successor
) {
441 auto Pos
= find(Successors
, Successor
);
442 assert(Pos
&& "Successor does not exist");
443 Successors
.erase(Pos
);
447 VPBlockBase(const unsigned char SC
, const std::string
&N
)
448 : SubclassID(SC
), Name(N
) {}
451 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
452 /// that are actually instantiated. Values of this enumeration are kept in the
453 /// SubclassID field of the VPBlockBase objects. They are used for concrete
454 /// type identification.
455 using VPBlockTy
= enum { VPBasicBlockSC
, VPRegionBlockSC
};
457 using VPBlocksTy
= SmallVectorImpl
<VPBlockBase
*>;
459 virtual ~VPBlockBase() = default;
461 const std::string
&getName() const { return Name
; }
463 void setName(const Twine
&newName
) { Name
= newName
.str(); }
465 /// \return an ID for the concrete type of this object.
466 /// This is used to implement the classof checks. This should not be used
467 /// for any other purpose, as the values may change as LLVM evolves.
468 unsigned getVPBlockID() const { return SubclassID
; }
470 VPRegionBlock
*getParent() { return Parent
; }
471 const VPRegionBlock
*getParent() const { return Parent
; }
473 /// \return A pointer to the plan containing the current block.
475 const VPlan
*getPlan() const;
477 /// Sets the pointer of the plan containing the block. The block must be the
478 /// entry block into the VPlan.
479 void setPlan(VPlan
*ParentPlan
);
481 void setParent(VPRegionBlock
*P
) { Parent
= P
; }
483 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
484 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
485 /// VPBlockBase is a VPBasicBlock, it is returned.
486 const VPBasicBlock
*getEntryBasicBlock() const;
487 VPBasicBlock
*getEntryBasicBlock();
489 /// \return the VPBasicBlock that is the exit of this VPBlockBase,
490 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
491 /// VPBlockBase is a VPBasicBlock, it is returned.
492 const VPBasicBlock
*getExitBasicBlock() const;
493 VPBasicBlock
*getExitBasicBlock();
495 const VPBlocksTy
&getSuccessors() const { return Successors
; }
496 VPBlocksTy
&getSuccessors() { return Successors
; }
498 const VPBlocksTy
&getPredecessors() const { return Predecessors
; }
499 VPBlocksTy
&getPredecessors() { return Predecessors
; }
501 /// \return the successor of this VPBlockBase if it has a single successor.
502 /// Otherwise return a null pointer.
503 VPBlockBase
*getSingleSuccessor() const {
504 return (Successors
.size() == 1 ? *Successors
.begin() : nullptr);
507 /// \return the predecessor of this VPBlockBase if it has a single
508 /// predecessor. Otherwise return a null pointer.
509 VPBlockBase
*getSinglePredecessor() const {
510 return (Predecessors
.size() == 1 ? *Predecessors
.begin() : nullptr);
513 size_t getNumSuccessors() const { return Successors
.size(); }
514 size_t getNumPredecessors() const { return Predecessors
.size(); }
516 /// An Enclosing Block of a block B is any block containing B, including B
517 /// itself. \return the closest enclosing block starting from "this", which
518 /// has successors. \return the root enclosing block if all enclosing blocks
519 /// have no successors.
520 VPBlockBase
*getEnclosingBlockWithSuccessors();
522 /// \return the closest enclosing block starting from "this", which has
523 /// predecessors. \return the root enclosing block if all enclosing blocks
524 /// have no predecessors.
525 VPBlockBase
*getEnclosingBlockWithPredecessors();
527 /// \return the successors either attached directly to this VPBlockBase or, if
528 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
529 /// successors of its own, search recursively for the first enclosing
530 /// VPRegionBlock that has successors and return them. If no such
531 /// VPRegionBlock exists, return the (empty) successors of the topmost
532 /// VPBlockBase reached.
533 const VPBlocksTy
&getHierarchicalSuccessors() {
534 return getEnclosingBlockWithSuccessors()->getSuccessors();
537 /// \return the hierarchical successor of this VPBlockBase if it has a single
538 /// hierarchical successor. Otherwise return a null pointer.
539 VPBlockBase
*getSingleHierarchicalSuccessor() {
540 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
543 /// \return the predecessors either attached directly to this VPBlockBase or,
544 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
545 /// predecessors of its own, search recursively for the first enclosing
546 /// VPRegionBlock that has predecessors and return them. If no such
547 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
548 /// VPBlockBase reached.
549 const VPBlocksTy
&getHierarchicalPredecessors() {
550 return getEnclosingBlockWithPredecessors()->getPredecessors();
553 /// \return the hierarchical predecessor of this VPBlockBase if it has a
554 /// single hierarchical predecessor. Otherwise return a null pointer.
555 VPBlockBase
*getSingleHierarchicalPredecessor() {
556 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
559 /// \return the condition bit selecting the successor.
560 VPValue
*getCondBit();
561 /// \return the condition bit selecting the successor.
562 const VPValue
*getCondBit() const;
563 /// Set the condition bit selecting the successor.
564 void setCondBit(VPValue
*CV
);
566 /// \return the block's predicate.
567 VPValue
*getPredicate();
568 /// \return the block's predicate.
569 const VPValue
*getPredicate() const;
570 /// Set the block's predicate.
571 void setPredicate(VPValue
*Pred
);
573 /// Set a given VPBlockBase \p Successor as the single successor of this
574 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
575 /// This VPBlockBase must have no successors.
576 void setOneSuccessor(VPBlockBase
*Successor
) {
577 assert(Successors
.empty() && "Setting one successor when others exist.");
578 appendSuccessor(Successor
);
581 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
582 /// successors of this VPBlockBase. \p Condition is set as the successor
583 /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
584 /// IfFalse. This VPBlockBase must have no successors.
585 void setTwoSuccessors(VPBlockBase
*IfTrue
, VPBlockBase
*IfFalse
,
586 VPValue
*Condition
) {
587 assert(Successors
.empty() && "Setting two successors when others exist.");
588 assert(Condition
&& "Setting two successors without condition!");
589 setCondBit(Condition
);
590 appendSuccessor(IfTrue
);
591 appendSuccessor(IfFalse
);
594 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
595 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
596 /// as successor of any VPBasicBlock in \p NewPreds.
597 void setPredecessors(ArrayRef
<VPBlockBase
*> NewPreds
) {
598 assert(Predecessors
.empty() && "Block predecessors already set.");
599 for (auto *Pred
: NewPreds
)
600 appendPredecessor(Pred
);
603 /// Remove all the predecessor of this block.
604 void clearPredecessors() { Predecessors
.clear(); }
606 /// Remove all the successors of this block and set to null its condition bit
607 void clearSuccessors() {
612 /// The method which generates the output IR that correspond to this
613 /// VPBlockBase, thereby "executing" the VPlan.
614 virtual void execute(struct VPTransformState
*State
) = 0;
616 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
617 static void deleteCFG(VPBlockBase
*Entry
);
619 /// Return true if it is legal to hoist instructions into this block.
620 bool isLegalToHoistInto() {
621 // There are currently no constraints that prevent an instruction to be
622 // hoisted into a VPBlockBase.
626 /// Replace all operands of VPUsers in the block with \p NewValue and also
627 /// replaces all uses of VPValues defined in the block with NewValue.
628 virtual void dropAllReferences(VPValue
*NewValue
) = 0;
630 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
631 void printAsOperand(raw_ostream
&OS
, bool PrintType
) const {
635 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
636 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
637 /// consequtive numbers.
639 /// Note that the numbering is applied to the whole VPlan, so printing
640 /// individual blocks is consistent with the whole VPlan printing.
641 virtual void print(raw_ostream
&O
, const Twine
&Indent
,
642 VPSlotTracker
&SlotTracker
) const = 0;
644 /// Print plain-text dump of this VPlan to \p O.
645 void print(raw_ostream
&O
) const {
646 VPSlotTracker
SlotTracker(getPlan());
647 print(O
, "", SlotTracker
);
650 /// Print the successors of this block to \p O, prefixing all lines with \p
652 void printSuccessors(raw_ostream
&O
, const Twine
&Indent
) const;
654 /// Dump this VPBlockBase to dbgs().
655 LLVM_DUMP_METHOD
void dump() const { print(dbgs()); }
659 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
660 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
661 /// and is responsible for deleting its defined values. Single-value
662 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from
663 /// VPRecipeBase before VPValue.
664 class VPRecipeBase
: public ilist_node_with_parent
<VPRecipeBase
, VPBasicBlock
>,
668 friend class VPBlockUtils
;
670 /// Each VPRecipe belongs to a single VPBasicBlock.
671 VPBasicBlock
*Parent
= nullptr;
674 VPRecipeBase(const unsigned char SC
, ArrayRef
<VPValue
*> Operands
)
675 : VPDef(SC
), VPUser(Operands
, VPUser::VPUserID::Recipe
) {}
677 template <typename IterT
>
678 VPRecipeBase(const unsigned char SC
, iterator_range
<IterT
> Operands
)
679 : VPDef(SC
), VPUser(Operands
, VPUser::VPUserID::Recipe
) {}
680 virtual ~VPRecipeBase() = default;
682 /// \return the VPBasicBlock which this VPRecipe belongs to.
683 VPBasicBlock
*getParent() { return Parent
; }
684 const VPBasicBlock
*getParent() const { return Parent
; }
686 /// The method which generates the output IR instructions that correspond to
687 /// this VPRecipe, thereby "executing" the VPlan.
688 virtual void execute(struct VPTransformState
&State
) = 0;
690 /// Insert an unlinked recipe into a basic block immediately before
691 /// the specified recipe.
692 void insertBefore(VPRecipeBase
*InsertPos
);
694 /// Insert an unlinked Recipe into a basic block immediately after
695 /// the specified Recipe.
696 void insertAfter(VPRecipeBase
*InsertPos
);
698 /// Unlink this recipe from its current VPBasicBlock and insert it into
699 /// the VPBasicBlock that MovePos lives in, right after MovePos.
700 void moveAfter(VPRecipeBase
*MovePos
);
702 /// Unlink this recipe and insert into BB before I.
704 /// \pre I is a valid iterator into BB.
705 void moveBefore(VPBasicBlock
&BB
, iplist
<VPRecipeBase
>::iterator I
);
707 /// This method unlinks 'this' from the containing basic block, but does not
709 void removeFromParent();
711 /// This method unlinks 'this' from the containing basic block and deletes it.
713 /// \returns an iterator pointing to the element after the erased one
714 iplist
<VPRecipeBase
>::iterator
eraseFromParent();
716 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
718 Instruction
*getUnderlyingInstr() {
719 return cast
<Instruction
>(getVPSingleValue()->getUnderlyingValue());
721 const Instruction
*getUnderlyingInstr() const {
722 return cast
<Instruction
>(getVPSingleValue()->getUnderlyingValue());
725 /// Method to support type inquiry through isa, cast, and dyn_cast.
726 static inline bool classof(const VPDef
*D
) {
727 // All VPDefs are also VPRecipeBases.
731 static inline bool classof(const VPUser
*U
) {
732 return U
->getVPUserID() == VPUser::VPUserID::Recipe
;
735 /// Returns true if the recipe may have side-effects.
736 bool mayHaveSideEffects() const;
738 /// Returns true for PHI-like recipes.
740 return getVPDefID() >= VPFirstPHISC
&& getVPDefID() <= VPLastPHISC
;
743 /// Returns true if the recipe may read from memory.
744 bool mayReadFromMemory() const;
746 /// Returns true if the recipe may write to memory.
747 bool mayWriteToMemory() const;
749 /// Returns true if the recipe may read from or write to memory.
750 bool mayReadOrWriteMemory() const {
751 return mayReadFromMemory() || mayWriteToMemory();
755 inline bool VPUser::classof(const VPDef
*Def
) {
756 return Def
->getVPDefID() == VPRecipeBase::VPInstructionSC
||
757 Def
->getVPDefID() == VPRecipeBase::VPWidenSC
||
758 Def
->getVPDefID() == VPRecipeBase::VPWidenCallSC
||
759 Def
->getVPDefID() == VPRecipeBase::VPWidenSelectSC
||
760 Def
->getVPDefID() == VPRecipeBase::VPWidenGEPSC
||
761 Def
->getVPDefID() == VPRecipeBase::VPBlendSC
||
762 Def
->getVPDefID() == VPRecipeBase::VPInterleaveSC
||
763 Def
->getVPDefID() == VPRecipeBase::VPReplicateSC
||
764 Def
->getVPDefID() == VPRecipeBase::VPReductionSC
||
765 Def
->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC
||
766 Def
->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC
;
769 /// This is a concrete Recipe that models a single VPlan-level instruction.
770 /// While as any Recipe it may generate a sequence of IR instructions when
771 /// executed, these instructions would always form a single-def expression as
772 /// the VPInstruction is also a single def-use vertex.
773 class VPInstruction
: public VPRecipeBase
, public VPValue
{
774 friend class VPlanSlp
;
777 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
779 FirstOrderRecurrenceSplice
=
780 Instruction::OtherOpsEnd
+ 1, // Combines the incoming and previous
781 // values of a first-order recurrence.
790 typedef unsigned char OpcodeTy
;
793 /// Utility method serving execute(): generates a single instance of the
794 /// modeled instruction.
795 void generateInstruction(VPTransformState
&State
, unsigned Part
);
798 void setUnderlyingInstr(Instruction
*I
) { setUnderlyingValue(I
); }
801 VPInstruction(unsigned Opcode
, ArrayRef
<VPValue
*> Operands
)
802 : VPRecipeBase(VPRecipeBase::VPInstructionSC
, Operands
),
803 VPValue(VPValue::VPVInstructionSC
, nullptr, this), Opcode(Opcode
) {}
805 VPInstruction(unsigned Opcode
, ArrayRef
<VPInstruction
*> Operands
)
806 : VPRecipeBase(VPRecipeBase::VPInstructionSC
, {}),
807 VPValue(VPValue::VPVInstructionSC
, nullptr, this), Opcode(Opcode
) {
808 for (auto *I
: Operands
)
809 addOperand(I
->getVPSingleValue());
812 VPInstruction(unsigned Opcode
, std::initializer_list
<VPValue
*> Operands
)
813 : VPInstruction(Opcode
, ArrayRef
<VPValue
*>(Operands
)) {}
815 /// Method to support type inquiry through isa, cast, and dyn_cast.
816 static inline bool classof(const VPValue
*V
) {
817 return V
->getVPValueID() == VPValue::VPVInstructionSC
;
820 VPInstruction
*clone() const {
821 SmallVector
<VPValue
*, 2> Operands(operands());
822 return new VPInstruction(Opcode
, Operands
);
825 /// Method to support type inquiry through isa, cast, and dyn_cast.
826 static inline bool classof(const VPDef
*R
) {
827 return R
->getVPDefID() == VPRecipeBase::VPInstructionSC
;
830 unsigned getOpcode() const { return Opcode
; }
832 /// Generate the instruction.
833 /// TODO: We currently execute only per-part unless a specific instance is
835 void execute(VPTransformState
&State
) override
;
837 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
838 /// Print the VPInstruction to \p O.
839 void print(raw_ostream
&O
, const Twine
&Indent
,
840 VPSlotTracker
&SlotTracker
) const override
;
842 /// Print the VPInstruction to dbgs() (for debugging).
843 LLVM_DUMP_METHOD
void dump() const;
846 /// Return true if this instruction may modify memory.
847 bool mayWriteToMemory() const {
848 // TODO: we can use attributes of the called function to rule out memory
850 return Opcode
== Instruction::Store
|| Opcode
== Instruction::Call
||
851 Opcode
== Instruction::Invoke
|| Opcode
== SLPStore
;
854 bool hasResult() const {
855 // CallInst may or may not have a result, depending on the called function.
856 // Conservatively return calls have results for now.
857 switch (getOpcode()) {
858 case Instruction::Ret
:
859 case Instruction::Br
:
860 case Instruction::Store
:
861 case Instruction::Switch
:
862 case Instruction::IndirectBr
:
863 case Instruction::Resume
:
864 case Instruction::CatchRet
:
865 case Instruction::Unreachable
:
866 case Instruction::Fence
:
867 case Instruction::AtomicRMW
:
875 /// VPWidenRecipe is a recipe for producing a copy of vector type its
876 /// ingredient. This recipe covers most of the traditional vectorization cases
877 /// where each ingredient transforms into a vectorized version of itself.
878 class VPWidenRecipe
: public VPRecipeBase
, public VPValue
{
880 template <typename IterT
>
881 VPWidenRecipe(Instruction
&I
, iterator_range
<IterT
> Operands
)
882 : VPRecipeBase(VPRecipeBase::VPWidenSC
, Operands
),
883 VPValue(VPValue::VPVWidenSC
, &I
, this) {}
885 ~VPWidenRecipe() override
= default;
887 /// Method to support type inquiry through isa, cast, and dyn_cast.
888 static inline bool classof(const VPDef
*D
) {
889 return D
->getVPDefID() == VPRecipeBase::VPWidenSC
;
891 static inline bool classof(const VPValue
*V
) {
892 return V
->getVPValueID() == VPValue::VPVWidenSC
;
895 /// Produce widened copies of all Ingredients.
896 void execute(VPTransformState
&State
) override
;
898 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
899 /// Print the recipe.
900 void print(raw_ostream
&O
, const Twine
&Indent
,
901 VPSlotTracker
&SlotTracker
) const override
;
905 /// A recipe for widening Call instructions.
906 class VPWidenCallRecipe
: public VPRecipeBase
, public VPValue
{
909 template <typename IterT
>
910 VPWidenCallRecipe(CallInst
&I
, iterator_range
<IterT
> CallArguments
)
911 : VPRecipeBase(VPRecipeBase::VPWidenCallSC
, CallArguments
),
912 VPValue(VPValue::VPVWidenCallSC
, &I
, this) {}
914 ~VPWidenCallRecipe() override
= default;
916 /// Method to support type inquiry through isa, cast, and dyn_cast.
917 static inline bool classof(const VPDef
*D
) {
918 return D
->getVPDefID() == VPRecipeBase::VPWidenCallSC
;
921 /// Produce a widened version of the call instruction.
922 void execute(VPTransformState
&State
) override
;
924 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
925 /// Print the recipe.
926 void print(raw_ostream
&O
, const Twine
&Indent
,
927 VPSlotTracker
&SlotTracker
) const override
;
931 /// A recipe for widening select instructions.
932 class VPWidenSelectRecipe
: public VPRecipeBase
, public VPValue
{
934 /// Is the condition of the select loop invariant?
938 template <typename IterT
>
939 VPWidenSelectRecipe(SelectInst
&I
, iterator_range
<IterT
> Operands
,
941 : VPRecipeBase(VPRecipeBase::VPWidenSelectSC
, Operands
),
942 VPValue(VPValue::VPVWidenSelectSC
, &I
, this),
943 InvariantCond(InvariantCond
) {}
945 ~VPWidenSelectRecipe() override
= default;
947 /// Method to support type inquiry through isa, cast, and dyn_cast.
948 static inline bool classof(const VPDef
*D
) {
949 return D
->getVPDefID() == VPRecipeBase::VPWidenSelectSC
;
952 /// Produce a widened version of the select instruction.
953 void execute(VPTransformState
&State
) override
;
955 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
956 /// Print the recipe.
957 void print(raw_ostream
&O
, const Twine
&Indent
,
958 VPSlotTracker
&SlotTracker
) const override
;
962 /// A recipe for handling GEP instructions.
963 class VPWidenGEPRecipe
: public VPRecipeBase
, public VPValue
{
964 bool IsPtrLoopInvariant
;
965 SmallBitVector IsIndexLoopInvariant
;
968 template <typename IterT
>
969 VPWidenGEPRecipe(GetElementPtrInst
*GEP
, iterator_range
<IterT
> Operands
)
970 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC
, Operands
),
971 VPValue(VPWidenGEPSC
, GEP
, this),
972 IsIndexLoopInvariant(GEP
->getNumIndices(), false) {}
974 template <typename IterT
>
975 VPWidenGEPRecipe(GetElementPtrInst
*GEP
, iterator_range
<IterT
> Operands
,
977 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC
, Operands
),
978 VPValue(VPValue::VPVWidenGEPSC
, GEP
, this),
979 IsIndexLoopInvariant(GEP
->getNumIndices(), false) {
980 IsPtrLoopInvariant
= OrigLoop
->isLoopInvariant(GEP
->getPointerOperand());
981 for (auto Index
: enumerate(GEP
->indices()))
982 IsIndexLoopInvariant
[Index
.index()] =
983 OrigLoop
->isLoopInvariant(Index
.value().get());
985 ~VPWidenGEPRecipe() override
= default;
987 /// Method to support type inquiry through isa, cast, and dyn_cast.
988 static inline bool classof(const VPDef
*D
) {
989 return D
->getVPDefID() == VPRecipeBase::VPWidenGEPSC
;
992 /// Generate the gep nodes.
993 void execute(VPTransformState
&State
) override
;
995 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
996 /// Print the recipe.
997 void print(raw_ostream
&O
, const Twine
&Indent
,
998 VPSlotTracker
&SlotTracker
) const override
;
1002 /// A recipe for handling phi nodes of integer and floating-point inductions,
1003 /// producing their vector and scalar values.
1004 class VPWidenIntOrFpInductionRecipe
: public VPRecipeBase
{
1008 VPWidenIntOrFpInductionRecipe(PHINode
*IV
, VPValue
*Start
, Instruction
*Cast
,
1009 TruncInst
*Trunc
= nullptr)
1010 : VPRecipeBase(VPWidenIntOrFpInductionSC
, {Start
}), IV(IV
) {
1012 new VPValue(Trunc
, this);
1014 new VPValue(IV
, this);
1017 new VPValue(Cast
, this);
1019 ~VPWidenIntOrFpInductionRecipe() override
= default;
1021 /// Method to support type inquiry through isa, cast, and dyn_cast.
1022 static inline bool classof(const VPDef
*D
) {
1023 return D
->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC
;
1026 /// Generate the vectorized and scalarized versions of the phi node as
1027 /// needed by their users.
1028 void execute(VPTransformState
&State
) override
;
1030 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1031 /// Print the recipe.
1032 void print(raw_ostream
&O
, const Twine
&Indent
,
1033 VPSlotTracker
&SlotTracker
) const override
;
1036 /// Returns the start value of the induction.
1037 VPValue
*getStartValue() { return getOperand(0); }
1039 /// Returns the cast VPValue, if one is attached, or nullptr otherwise.
1040 VPValue
*getCastValue() {
1041 if (getNumDefinedValues() != 2)
1043 return getVPValue(1);
1046 /// Returns the first defined value as TruncInst, if it is one or nullptr
1048 TruncInst
*getTruncInst() {
1049 return dyn_cast_or_null
<TruncInst
>(getVPValue(0)->getUnderlyingValue());
1051 const TruncInst
*getTruncInst() const {
1052 return dyn_cast_or_null
<TruncInst
>(getVPValue(0)->getUnderlyingValue());
1056 /// A recipe for handling first order recurrences and pointer inductions. For
1057 /// first-order recurrences, the start value is the first operand of the recipe
1058 /// and the incoming value from the backedge is the second operand. It also
1059 /// serves as base class for VPReductionPHIRecipe. In the VPlan native path, all
1060 /// incoming VPValues & VPBasicBlock pairs are managed in the recipe directly.
1061 class VPWidenPHIRecipe
: public VPRecipeBase
, public VPValue
{
1062 /// List of incoming blocks. Only used in the VPlan native path.
1063 SmallVector
<VPBasicBlock
*, 2> IncomingBlocks
;
1066 VPWidenPHIRecipe(unsigned char VPVID
, unsigned char VPDefID
, PHINode
*Phi
,
1067 VPValue
*Start
= nullptr)
1068 : VPRecipeBase(VPDefID
, {}), VPValue(VPVID
, Phi
, this) {
1074 /// Create a VPWidenPHIRecipe for \p Phi
1075 VPWidenPHIRecipe(PHINode
*Phi
)
1076 : VPWidenPHIRecipe(VPVWidenPHISC
, VPWidenPHISC
, Phi
) {}
1078 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1079 VPWidenPHIRecipe(PHINode
*Phi
, VPValue
&Start
) : VPWidenPHIRecipe(Phi
) {
1083 ~VPWidenPHIRecipe() override
= default;
1085 /// Method to support type inquiry through isa, cast, and dyn_cast.
1086 static inline bool classof(const VPRecipeBase
*B
) {
1087 return B
->getVPDefID() == VPRecipeBase::VPWidenPHISC
||
1088 B
->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC
||
1089 B
->getVPDefID() == VPRecipeBase::VPReductionPHISC
;
1091 static inline bool classof(const VPValue
*V
) {
1092 return V
->getVPValueID() == VPValue::VPVWidenPHISC
||
1093 V
->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC
||
1094 V
->getVPValueID() == VPValue::VPVReductionPHISC
;
1097 /// Generate the phi/select nodes.
1098 void execute(VPTransformState
&State
) override
;
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 /// Print the recipe.
1102 void print(raw_ostream
&O
, const Twine
&Indent
,
1103 VPSlotTracker
&SlotTracker
) const override
;
1106 /// Returns the start value of the phi, if it is a reduction or first-order
1108 VPValue
*getStartValue() {
1109 return getNumOperands() == 0 ? nullptr : getOperand(0);
1112 /// Returns the incoming value from the loop backedge, if it is a reduction or
1113 /// first-order recurrence.
1114 VPValue
*getBackedgeValue() {
1115 return getOperand(1);
1118 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1120 VPRecipeBase
*getBackedgeRecipe() {
1121 return cast
<VPRecipeBase
>(getBackedgeValue()->getDef());
1124 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1125 void addIncoming(VPValue
*IncomingV
, VPBasicBlock
*IncomingBlock
) {
1126 addOperand(IncomingV
);
1127 IncomingBlocks
.push_back(IncomingBlock
);
1130 /// Returns the \p I th incoming VPValue.
1131 VPValue
*getIncomingValue(unsigned I
) { return getOperand(I
); }
1133 /// Returns the \p I th incoming VPBasicBlock.
1134 VPBasicBlock
*getIncomingBlock(unsigned I
) { return IncomingBlocks
[I
]; }
1137 /// A recipe for handling first-order recurrence phis. The start value is the
1138 /// first operand of the recipe and the incoming value from the backedge is the
1140 struct VPFirstOrderRecurrencePHIRecipe
: public VPWidenPHIRecipe
{
1141 VPFirstOrderRecurrencePHIRecipe(PHINode
*Phi
, VPValue
&Start
)
1142 : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC
,
1143 VPFirstOrderRecurrencePHISC
, Phi
, &Start
) {}
1145 /// Method to support type inquiry through isa, cast, and dyn_cast.
1146 static inline bool classof(const VPRecipeBase
*R
) {
1147 return R
->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC
;
1149 static inline bool classof(const VPWidenPHIRecipe
*D
) {
1150 return D
->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC
;
1152 static inline bool classof(const VPValue
*V
) {
1153 return V
->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC
;
1156 void execute(VPTransformState
&State
) override
;
1158 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1159 /// Print the recipe.
1160 void print(raw_ostream
&O
, const Twine
&Indent
,
1161 VPSlotTracker
&SlotTracker
) const override
;
1165 /// A recipe for handling reduction phis. The start value is the first operand
1166 /// of the recipe and the incoming value from the backedge is the second
1168 class VPReductionPHIRecipe
: public VPWidenPHIRecipe
{
1169 /// Descriptor for the reduction.
1170 RecurrenceDescriptor
&RdxDesc
;
1172 /// The phi is part of an in-loop reduction.
1175 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1179 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1181 VPReductionPHIRecipe(PHINode
*Phi
, RecurrenceDescriptor
&RdxDesc
,
1182 VPValue
&Start
, bool IsInLoop
= false,
1183 bool IsOrdered
= false)
1184 : VPWidenPHIRecipe(VPVReductionPHISC
, VPReductionPHISC
, Phi
, &Start
),
1185 RdxDesc(RdxDesc
), IsInLoop(IsInLoop
), IsOrdered(IsOrdered
) {
1186 assert((!IsOrdered
|| IsInLoop
) && "IsOrdered requires IsInLoop");
1189 ~VPReductionPHIRecipe() override
= default;
1191 /// Method to support type inquiry through isa, cast, and dyn_cast.
1192 static inline bool classof(const VPRecipeBase
*R
) {
1193 return R
->getVPDefID() == VPRecipeBase::VPReductionPHISC
;
1195 static inline bool classof(const VPValue
*V
) {
1196 return V
->getVPValueID() == VPValue::VPVReductionPHISC
;
1198 static inline bool classof(const VPWidenPHIRecipe
*R
) {
1199 return R
->getVPDefID() == VPRecipeBase::VPReductionPHISC
;
1202 /// Generate the phi/select nodes.
1203 void execute(VPTransformState
&State
) override
;
1205 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1206 /// Print the recipe.
1207 void print(raw_ostream
&O
, const Twine
&Indent
,
1208 VPSlotTracker
&SlotTracker
) const override
;
1211 RecurrenceDescriptor
&getRecurrenceDescriptor() { return RdxDesc
; }
1213 /// Returns true, if the phi is part of an ordered reduction.
1214 bool isOrdered() const { return IsOrdered
; }
1216 /// Returns true, if the phi is part of an in-loop reduction.
1217 bool isInLoop() const { return IsInLoop
; }
1220 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
1222 class VPBlendRecipe
: public VPRecipeBase
, public VPValue
{
1226 /// The blend operation is a User of the incoming values and of their
1227 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1228 /// might be incoming with a full mask for which there is no VPValue.
1229 VPBlendRecipe(PHINode
*Phi
, ArrayRef
<VPValue
*> Operands
)
1230 : VPRecipeBase(VPBlendSC
, Operands
),
1231 VPValue(VPValue::VPVBlendSC
, Phi
, this), Phi(Phi
) {
1232 assert(Operands
.size() > 0 &&
1233 ((Operands
.size() == 1) || (Operands
.size() % 2 == 0)) &&
1234 "Expected either a single incoming value or a positive even number "
1238 /// Method to support type inquiry through isa, cast, and dyn_cast.
1239 static inline bool classof(const VPDef
*D
) {
1240 return D
->getVPDefID() == VPRecipeBase::VPBlendSC
;
1243 /// Return the number of incoming values, taking into account that a single
1244 /// incoming value has no mask.
1245 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1247 /// Return incoming value number \p Idx.
1248 VPValue
*getIncomingValue(unsigned Idx
) const { return getOperand(Idx
* 2); }
1250 /// Return mask number \p Idx.
1251 VPValue
*getMask(unsigned Idx
) const { return getOperand(Idx
* 2 + 1); }
1253 /// Generate the phi/select nodes.
1254 void execute(VPTransformState
&State
) override
;
1256 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1257 /// Print the recipe.
1258 void print(raw_ostream
&O
, const Twine
&Indent
,
1259 VPSlotTracker
&SlotTracker
) const override
;
1263 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1264 /// or stores into one wide load/store and shuffles. The first operand of a
1265 /// VPInterleave recipe is the address, followed by the stored values, followed
1266 /// by an optional mask.
1267 class VPInterleaveRecipe
: public VPRecipeBase
{
1268 const InterleaveGroup
<Instruction
> *IG
;
1270 bool HasMask
= false;
1273 VPInterleaveRecipe(const InterleaveGroup
<Instruction
> *IG
, VPValue
*Addr
,
1274 ArrayRef
<VPValue
*> StoredValues
, VPValue
*Mask
)
1275 : VPRecipeBase(VPInterleaveSC
, {Addr
}), IG(IG
) {
1276 for (unsigned i
= 0; i
< IG
->getFactor(); ++i
)
1277 if (Instruction
*I
= IG
->getMember(i
)) {
1278 if (I
->getType()->isVoidTy())
1280 new VPValue(I
, this);
1283 for (auto *SV
: StoredValues
)
1290 ~VPInterleaveRecipe() override
= default;
1292 /// Method to support type inquiry through isa, cast, and dyn_cast.
1293 static inline bool classof(const VPDef
*D
) {
1294 return D
->getVPDefID() == VPRecipeBase::VPInterleaveSC
;
1297 /// Return the address accessed by this recipe.
1298 VPValue
*getAddr() const {
1299 return getOperand(0); // Address is the 1st, mandatory operand.
1302 /// Return the mask used by this recipe. Note that a full mask is represented
1304 VPValue
*getMask() const {
1305 // Mask is optional and therefore the last, currently 2nd operand.
1306 return HasMask
? getOperand(getNumOperands() - 1) : nullptr;
1309 /// Return the VPValues stored by this interleave group. If it is a load
1310 /// interleave group, return an empty ArrayRef.
1311 ArrayRef
<VPValue
*> getStoredValues() const {
1312 // The first operand is the address, followed by the stored values, followed
1313 // by an optional mask.
1314 return ArrayRef
<VPValue
*>(op_begin(), getNumOperands())
1315 .slice(1, getNumStoreOperands());
1318 /// Generate the wide load or store, and shuffles.
1319 void execute(VPTransformState
&State
) override
;
1321 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1322 /// Print the recipe.
1323 void print(raw_ostream
&O
, const Twine
&Indent
,
1324 VPSlotTracker
&SlotTracker
) const override
;
1327 const InterleaveGroup
<Instruction
> *getInterleaveGroup() { return IG
; }
1329 /// Returns the number of stored operands of this interleave group. Returns 0
1330 /// for load interleave groups.
1331 unsigned getNumStoreOperands() const {
1332 return getNumOperands() - (HasMask
? 2 : 1);
1336 /// A recipe to represent inloop reduction operations, performing a reduction on
1337 /// a vector operand into a scalar value, and adding the result to a chain.
1338 /// The Operands are {ChainOp, VecOp, [Condition]}.
1339 class VPReductionRecipe
: public VPRecipeBase
, public VPValue
{
1340 /// The recurrence decriptor for the reduction in question.
1341 RecurrenceDescriptor
*RdxDesc
;
1342 /// Pointer to the TTI, needed to create the target reduction
1343 const TargetTransformInfo
*TTI
;
1346 VPReductionRecipe(RecurrenceDescriptor
*R
, Instruction
*I
, VPValue
*ChainOp
,
1347 VPValue
*VecOp
, VPValue
*CondOp
,
1348 const TargetTransformInfo
*TTI
)
1349 : VPRecipeBase(VPRecipeBase::VPReductionSC
, {ChainOp
, VecOp
}),
1350 VPValue(VPValue::VPVReductionSC
, I
, this), RdxDesc(R
), TTI(TTI
) {
1355 ~VPReductionRecipe() override
= default;
1357 /// Method to support type inquiry through isa, cast, and dyn_cast.
1358 static inline bool classof(const VPValue
*V
) {
1359 return V
->getVPValueID() == VPValue::VPVReductionSC
;
1362 /// Generate the reduction in the loop
1363 void execute(VPTransformState
&State
) override
;
1365 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1366 /// Print the recipe.
1367 void print(raw_ostream
&O
, const Twine
&Indent
,
1368 VPSlotTracker
&SlotTracker
) const override
;
1371 /// The VPValue of the scalar Chain being accumulated.
1372 VPValue
*getChainOp() const { return getOperand(0); }
1373 /// The VPValue of the vector value to be reduced.
1374 VPValue
*getVecOp() const { return getOperand(1); }
1375 /// The VPValue of the condition for the block.
1376 VPValue
*getCondOp() const {
1377 return getNumOperands() > 2 ? getOperand(2) : nullptr;
1381 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1382 /// copies of the original scalar type, one per lane, instead of producing a
1383 /// single copy of widened type for all lanes. If the instruction is known to be
1384 /// uniform only one copy, per lane zero, will be generated.
1385 class VPReplicateRecipe
: public VPRecipeBase
, public VPValue
{
1386 /// Indicator if only a single replica per lane is needed.
1389 /// Indicator if the replicas are also predicated.
1392 /// Indicator if the scalar values should also be packed into a vector.
1396 template <typename IterT
>
1397 VPReplicateRecipe(Instruction
*I
, iterator_range
<IterT
> Operands
,
1398 bool IsUniform
, bool IsPredicated
= false)
1399 : VPRecipeBase(VPReplicateSC
, Operands
), VPValue(VPVReplicateSC
, I
, this),
1400 IsUniform(IsUniform
), IsPredicated(IsPredicated
) {
1401 // Retain the previous behavior of predicateInstructions(), where an
1402 // insert-element of a predicated instruction got hoisted into the
1403 // predicated basic block iff it was its only user. This is achieved by
1404 // having predicated instructions also pack their values into a vector by
1405 // default unless they have a replicated user which uses their scalar value.
1406 AlsoPack
= IsPredicated
&& !I
->use_empty();
1409 ~VPReplicateRecipe() override
= default;
1411 /// Method to support type inquiry through isa, cast, and dyn_cast.
1412 static inline bool classof(const VPDef
*D
) {
1413 return D
->getVPDefID() == VPRecipeBase::VPReplicateSC
;
1416 static inline bool classof(const VPValue
*V
) {
1417 return V
->getVPValueID() == VPValue::VPVReplicateSC
;
1420 /// Generate replicas of the desired Ingredient. Replicas will be generated
1421 /// for all parts and lanes unless a specific part and lane are specified in
1423 void execute(VPTransformState
&State
) override
;
1425 void setAlsoPack(bool Pack
) { AlsoPack
= Pack
; }
1427 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1428 /// Print the recipe.
1429 void print(raw_ostream
&O
, const Twine
&Indent
,
1430 VPSlotTracker
&SlotTracker
) const override
;
1433 bool isUniform() const { return IsUniform
; }
1435 bool isPacked() const { return AlsoPack
; }
1437 bool isPredicated() const { return IsPredicated
; }
1440 /// A recipe for generating conditional branches on the bits of a mask.
1441 class VPBranchOnMaskRecipe
: public VPRecipeBase
{
1443 VPBranchOnMaskRecipe(VPValue
*BlockInMask
)
1444 : VPRecipeBase(VPBranchOnMaskSC
, {}) {
1445 if (BlockInMask
) // nullptr means all-one mask.
1446 addOperand(BlockInMask
);
1449 /// Method to support type inquiry through isa, cast, and dyn_cast.
1450 static inline bool classof(const VPDef
*D
) {
1451 return D
->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC
;
1454 /// Generate the extraction of the appropriate bit from the block mask and the
1455 /// conditional branch.
1456 void execute(VPTransformState
&State
) override
;
1458 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1459 /// Print the recipe.
1460 void print(raw_ostream
&O
, const Twine
&Indent
,
1461 VPSlotTracker
&SlotTracker
) const override
{
1462 O
<< Indent
<< "BRANCH-ON-MASK ";
1463 if (VPValue
*Mask
= getMask())
1464 Mask
->printAsOperand(O
, SlotTracker
);
1470 /// Return the mask used by this recipe. Note that a full mask is represented
1472 VPValue
*getMask() const {
1473 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1474 // Mask is optional.
1475 return getNumOperands() == 1 ? getOperand(0) : nullptr;
1479 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1480 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
1481 /// order to merge values that are set under such a branch and feed their uses.
1482 /// The phi nodes can be scalar or vector depending on the users of the value.
1483 /// This recipe works in concert with VPBranchOnMaskRecipe.
1484 class VPPredInstPHIRecipe
: public VPRecipeBase
, public VPValue
{
1486 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1487 /// nodes after merging back from a Branch-on-Mask.
1488 VPPredInstPHIRecipe(VPValue
*PredV
)
1489 : VPRecipeBase(VPPredInstPHISC
, PredV
),
1490 VPValue(VPValue::VPVPredInstPHI
, nullptr, this) {}
1491 ~VPPredInstPHIRecipe() override
= default;
1493 /// Method to support type inquiry through isa, cast, and dyn_cast.
1494 static inline bool classof(const VPDef
*D
) {
1495 return D
->getVPDefID() == VPRecipeBase::VPPredInstPHISC
;
1498 /// Generates phi nodes for live-outs as needed to retain SSA form.
1499 void execute(VPTransformState
&State
) override
;
1501 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1502 /// Print the recipe.
1503 void print(raw_ostream
&O
, const Twine
&Indent
,
1504 VPSlotTracker
&SlotTracker
) const override
;
1508 /// A Recipe for widening load/store operations.
1509 /// The recipe uses the following VPValues:
1510 /// - For load: Address, optional mask
1511 /// - For store: Address, stored value, optional mask
1512 /// TODO: We currently execute only per-part unless a specific instance is
1514 class VPWidenMemoryInstructionRecipe
: public VPRecipeBase
{
1515 Instruction
&Ingredient
;
1517 void setMask(VPValue
*Mask
) {
1523 bool isMasked() const {
1524 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1528 VPWidenMemoryInstructionRecipe(LoadInst
&Load
, VPValue
*Addr
, VPValue
*Mask
)
1529 : VPRecipeBase(VPWidenMemoryInstructionSC
, {Addr
}), Ingredient(Load
) {
1530 new VPValue(VPValue::VPVMemoryInstructionSC
, &Load
, this);
1534 VPWidenMemoryInstructionRecipe(StoreInst
&Store
, VPValue
*Addr
,
1535 VPValue
*StoredValue
, VPValue
*Mask
)
1536 : VPRecipeBase(VPWidenMemoryInstructionSC
, {Addr
, StoredValue
}),
1541 /// Method to support type inquiry through isa, cast, and dyn_cast.
1542 static inline bool classof(const VPDef
*D
) {
1543 return D
->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC
;
1546 /// Return the address accessed by this recipe.
1547 VPValue
*getAddr() const {
1548 return getOperand(0); // Address is the 1st, mandatory operand.
1551 /// Return the mask used by this recipe. Note that a full mask is represented
1553 VPValue
*getMask() const {
1554 // Mask is optional and therefore the last operand.
1555 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1558 /// Returns true if this recipe is a store.
1559 bool isStore() const { return isa
<StoreInst
>(Ingredient
); }
1561 /// Return the address accessed by this recipe.
1562 VPValue
*getStoredValue() const {
1563 assert(isStore() && "Stored value only available for store instructions");
1564 return getOperand(1); // Stored value is the 2nd, mandatory operand.
1567 /// Generate the wide load/store.
1568 void execute(VPTransformState
&State
) override
;
1570 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1571 /// Print the recipe.
1572 void print(raw_ostream
&O
, const Twine
&Indent
,
1573 VPSlotTracker
&SlotTracker
) const override
;
1577 /// A Recipe for widening the canonical induction variable of the vector loop.
1578 class VPWidenCanonicalIVRecipe
: public VPRecipeBase
{
1580 VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC
, {}) {
1581 new VPValue(nullptr, this);
1584 ~VPWidenCanonicalIVRecipe() override
= default;
1586 /// Method to support type inquiry through isa, cast, and dyn_cast.
1587 static inline bool classof(const VPDef
*D
) {
1588 return D
->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC
;
1591 /// Generate a canonical vector induction variable of the vector loop, with
1592 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1593 /// step = <VF*UF, VF*UF, ..., VF*UF>.
1594 void execute(VPTransformState
&State
) override
;
1596 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1597 /// Print the recipe.
1598 void print(raw_ostream
&O
, const Twine
&Indent
,
1599 VPSlotTracker
&SlotTracker
) const override
;
1603 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1604 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
1605 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
1606 class VPBasicBlock
: public VPBlockBase
{
1608 using RecipeListTy
= iplist
<VPRecipeBase
>;
1611 /// The VPRecipes held in the order of output instructions to generate.
1612 RecipeListTy Recipes
;
1615 VPBasicBlock(const Twine
&Name
= "", VPRecipeBase
*Recipe
= nullptr)
1616 : VPBlockBase(VPBasicBlockSC
, Name
.str()) {
1618 appendRecipe(Recipe
);
1621 ~VPBasicBlock() override
{
1622 while (!Recipes
.empty())
1626 /// Instruction iterators...
1627 using iterator
= RecipeListTy::iterator
;
1628 using const_iterator
= RecipeListTy::const_iterator
;
1629 using reverse_iterator
= RecipeListTy::reverse_iterator
;
1630 using const_reverse_iterator
= RecipeListTy::const_reverse_iterator
;
1632 //===--------------------------------------------------------------------===//
1633 /// Recipe iterator methods
1635 inline iterator
begin() { return Recipes
.begin(); }
1636 inline const_iterator
begin() const { return Recipes
.begin(); }
1637 inline iterator
end() { return Recipes
.end(); }
1638 inline const_iterator
end() const { return Recipes
.end(); }
1640 inline reverse_iterator
rbegin() { return Recipes
.rbegin(); }
1641 inline const_reverse_iterator
rbegin() const { return Recipes
.rbegin(); }
1642 inline reverse_iterator
rend() { return Recipes
.rend(); }
1643 inline const_reverse_iterator
rend() const { return Recipes
.rend(); }
1645 inline size_t size() const { return Recipes
.size(); }
1646 inline bool empty() const { return Recipes
.empty(); }
1647 inline const VPRecipeBase
&front() const { return Recipes
.front(); }
1648 inline VPRecipeBase
&front() { return Recipes
.front(); }
1649 inline const VPRecipeBase
&back() const { return Recipes
.back(); }
1650 inline VPRecipeBase
&back() { return Recipes
.back(); }
1652 /// Returns a reference to the list of recipes.
1653 RecipeListTy
&getRecipeList() { return Recipes
; }
1655 /// Returns a pointer to a member of the recipe list.
1656 static RecipeListTy
VPBasicBlock::*getSublistAccess(VPRecipeBase
*) {
1657 return &VPBasicBlock::Recipes
;
1660 /// Method to support type inquiry through isa, cast, and dyn_cast.
1661 static inline bool classof(const VPBlockBase
*V
) {
1662 return V
->getVPBlockID() == VPBlockBase::VPBasicBlockSC
;
1665 void insert(VPRecipeBase
*Recipe
, iterator InsertPt
) {
1666 assert(Recipe
&& "No recipe to append.");
1667 assert(!Recipe
->Parent
&& "Recipe already in VPlan");
1668 Recipe
->Parent
= this;
1669 Recipes
.insert(InsertPt
, Recipe
);
1672 /// Augment the existing recipes of a VPBasicBlock with an additional
1673 /// \p Recipe as the last recipe.
1674 void appendRecipe(VPRecipeBase
*Recipe
) { insert(Recipe
, end()); }
1676 /// The method which generates the output IR instructions that correspond to
1677 /// this VPBasicBlock, thereby "executing" the VPlan.
1678 void execute(struct VPTransformState
*State
) override
;
1680 /// Return the position of the first non-phi node recipe in the block.
1681 iterator
getFirstNonPhi();
1683 /// Returns an iterator range over the PHI-like recipes in the block.
1684 iterator_range
<iterator
> phis() {
1685 return make_range(begin(), getFirstNonPhi());
1688 void dropAllReferences(VPValue
*NewValue
) override
;
1690 /// Split current block at \p SplitAt by inserting a new block between the
1691 /// current block and its successors and moving all recipes starting at
1692 /// SplitAt to the new block. Returns the new block.
1693 VPBasicBlock
*splitAt(iterator SplitAt
);
1695 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1696 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
1697 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
1699 /// Note that the numbering is applied to the whole VPlan, so printing
1700 /// individual blocks is consistent with the whole VPlan printing.
1701 void print(raw_ostream
&O
, const Twine
&Indent
,
1702 VPSlotTracker
&SlotTracker
) const override
;
1703 using VPBlockBase::print
; // Get the print(raw_stream &O) version.
1707 /// Create an IR BasicBlock to hold the output instructions generated by this
1708 /// VPBasicBlock, and return it. Update the CFGState accordingly.
1709 BasicBlock
*createEmptyBasicBlock(VPTransformState::CFGState
&CFG
);
1712 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1713 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1714 /// A VPRegionBlock may indicate that its contents are to be replicated several
1715 /// times. This is designed to support predicated scalarization, in which a
1716 /// scalar if-then code structure needs to be generated VF * UF times. Having
1717 /// this replication indicator helps to keep a single model for multiple
1718 /// candidate VF's. The actual replication takes place only once the desired VF
1719 /// and UF have been determined.
1720 class VPRegionBlock
: public VPBlockBase
{
1721 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1724 /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1727 /// An indicator whether this region is to generate multiple replicated
1728 /// instances of output IR corresponding to its VPBlockBases.
1732 VPRegionBlock(VPBlockBase
*Entry
, VPBlockBase
*Exit
,
1733 const std::string
&Name
= "", bool IsReplicator
= false)
1734 : VPBlockBase(VPRegionBlockSC
, Name
), Entry(Entry
), Exit(Exit
),
1735 IsReplicator(IsReplicator
) {
1736 assert(Entry
->getPredecessors().empty() && "Entry block has predecessors.");
1737 assert(Exit
->getSuccessors().empty() && "Exit block has successors.");
1738 Entry
->setParent(this);
1739 Exit
->setParent(this);
1741 VPRegionBlock(const std::string
&Name
= "", bool IsReplicator
= false)
1742 : VPBlockBase(VPRegionBlockSC
, Name
), Entry(nullptr), Exit(nullptr),
1743 IsReplicator(IsReplicator
) {}
1745 ~VPRegionBlock() override
{
1748 Entry
->dropAllReferences(&DummyValue
);
1753 /// Method to support type inquiry through isa, cast, and dyn_cast.
1754 static inline bool classof(const VPBlockBase
*V
) {
1755 return V
->getVPBlockID() == VPBlockBase::VPRegionBlockSC
;
1758 const VPBlockBase
*getEntry() const { return Entry
; }
1759 VPBlockBase
*getEntry() { return Entry
; }
1761 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1762 /// EntryBlock must have no predecessors.
1763 void setEntry(VPBlockBase
*EntryBlock
) {
1764 assert(EntryBlock
->getPredecessors().empty() &&
1765 "Entry block cannot have predecessors.");
1767 EntryBlock
->setParent(this);
1770 // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1771 // specific interface of llvm::Function, instead of using
1772 // GraphTraints::getEntryNode. We should add a new template parameter to
1773 // DominatorTreeBase representing the Graph type.
1774 VPBlockBase
&front() const { return *Entry
; }
1776 const VPBlockBase
*getExit() const { return Exit
; }
1777 VPBlockBase
*getExit() { return Exit
; }
1779 /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1780 /// ExitBlock must have no successors.
1781 void setExit(VPBlockBase
*ExitBlock
) {
1782 assert(ExitBlock
->getSuccessors().empty() &&
1783 "Exit block cannot have successors.");
1785 ExitBlock
->setParent(this);
1788 /// An indicator whether this region is to generate multiple replicated
1789 /// instances of output IR corresponding to its VPBlockBases.
1790 bool isReplicator() const { return IsReplicator
; }
1792 /// The method which generates the output IR instructions that correspond to
1793 /// this VPRegionBlock, thereby "executing" the VPlan.
1794 void execute(struct VPTransformState
*State
) override
;
1796 void dropAllReferences(VPValue
*NewValue
) override
;
1798 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1799 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
1800 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
1801 /// consequtive numbers.
1803 /// Note that the numbering is applied to the whole VPlan, so printing
1804 /// individual regions is consistent with the whole VPlan printing.
1805 void print(raw_ostream
&O
, const Twine
&Indent
,
1806 VPSlotTracker
&SlotTracker
) const override
;
1807 using VPBlockBase::print
; // Get the print(raw_stream &O) version.
1811 //===----------------------------------------------------------------------===//
1812 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
1813 //===----------------------------------------------------------------------===//
1815 // The following set of template specializations implement GraphTraits to treat
1816 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1817 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1818 // VPBlockBase is a VPRegionBlock, this specialization provides access to its
1819 // successors/predecessors but not to the blocks inside the region.
1821 template <> struct GraphTraits
<VPBlockBase
*> {
1822 using NodeRef
= VPBlockBase
*;
1823 using ChildIteratorType
= SmallVectorImpl
<VPBlockBase
*>::iterator
;
1825 static NodeRef
getEntryNode(NodeRef N
) { return N
; }
1827 static inline ChildIteratorType
child_begin(NodeRef N
) {
1828 return N
->getSuccessors().begin();
1831 static inline ChildIteratorType
child_end(NodeRef N
) {
1832 return N
->getSuccessors().end();
1836 template <> struct GraphTraits
<const VPBlockBase
*> {
1837 using NodeRef
= const VPBlockBase
*;
1838 using ChildIteratorType
= SmallVectorImpl
<VPBlockBase
*>::const_iterator
;
1840 static NodeRef
getEntryNode(NodeRef N
) { return N
; }
1842 static inline ChildIteratorType
child_begin(NodeRef N
) {
1843 return N
->getSuccessors().begin();
1846 static inline ChildIteratorType
child_end(NodeRef N
) {
1847 return N
->getSuccessors().end();
1851 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1852 // of successors for the inverse traversal.
1853 template <> struct GraphTraits
<Inverse
<VPBlockBase
*>> {
1854 using NodeRef
= VPBlockBase
*;
1855 using ChildIteratorType
= SmallVectorImpl
<VPBlockBase
*>::iterator
;
1857 static NodeRef
getEntryNode(Inverse
<NodeRef
> B
) { return B
.Graph
; }
1859 static inline ChildIteratorType
child_begin(NodeRef N
) {
1860 return N
->getPredecessors().begin();
1863 static inline ChildIteratorType
child_end(NodeRef N
) {
1864 return N
->getPredecessors().end();
1868 // The following set of template specializations implement GraphTraits to
1869 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1870 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1871 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1872 // there won't be automatic recursion into other VPBlockBases that turn to be
1876 struct GraphTraits
<VPRegionBlock
*> : public GraphTraits
<VPBlockBase
*> {
1877 using GraphRef
= VPRegionBlock
*;
1878 using nodes_iterator
= df_iterator
<NodeRef
>;
1880 static NodeRef
getEntryNode(GraphRef N
) { return N
->getEntry(); }
1882 static nodes_iterator
nodes_begin(GraphRef N
) {
1883 return nodes_iterator::begin(N
->getEntry());
1886 static nodes_iterator
nodes_end(GraphRef N
) {
1887 // df_iterator::end() returns an empty iterator so the node used doesn't
1889 return nodes_iterator::end(N
);
1894 struct GraphTraits
<const VPRegionBlock
*>
1895 : public GraphTraits
<const VPBlockBase
*> {
1896 using GraphRef
= const VPRegionBlock
*;
1897 using nodes_iterator
= df_iterator
<NodeRef
>;
1899 static NodeRef
getEntryNode(GraphRef N
) { return N
->getEntry(); }
1901 static nodes_iterator
nodes_begin(GraphRef N
) {
1902 return nodes_iterator::begin(N
->getEntry());
1905 static nodes_iterator
nodes_end(GraphRef N
) {
1906 // df_iterator::end() returns an empty iterator so the node used doesn't
1908 return nodes_iterator::end(N
);
1913 struct GraphTraits
<Inverse
<VPRegionBlock
*>>
1914 : public GraphTraits
<Inverse
<VPBlockBase
*>> {
1915 using GraphRef
= VPRegionBlock
*;
1916 using nodes_iterator
= df_iterator
<NodeRef
>;
1918 static NodeRef
getEntryNode(Inverse
<GraphRef
> N
) {
1919 return N
.Graph
->getExit();
1922 static nodes_iterator
nodes_begin(GraphRef N
) {
1923 return nodes_iterator::begin(N
->getExit());
1926 static nodes_iterator
nodes_end(GraphRef N
) {
1927 // df_iterator::end() returns an empty iterator so the node used doesn't
1929 return nodes_iterator::end(N
);
1933 /// Iterator to traverse all successors of a VPBlockBase node. This includes the
1934 /// entry node of VPRegionBlocks. Exit blocks of a region implicitly have their
1935 /// parent region's successors. This ensures all blocks in a region are visited
1936 /// before any blocks in a successor region when doing a reverse post-order
1937 // traversal of the graph.
1938 template <typename BlockPtrTy
>
1939 class VPAllSuccessorsIterator
1940 : public iterator_facade_base
<VPAllSuccessorsIterator
<BlockPtrTy
>,
1941 std::forward_iterator_tag
, VPBlockBase
> {
1943 /// Index of the current successor. For VPBasicBlock nodes, this simply is the
1944 /// index for the successor array. For VPRegionBlock, SuccessorIdx == 0 is
1945 /// used for the region's entry block, and SuccessorIdx - 1 are the indices
1946 /// for the successor array.
1947 size_t SuccessorIdx
;
1949 static BlockPtrTy
getBlockWithSuccs(BlockPtrTy Current
) {
1950 while (Current
&& Current
->getNumSuccessors() == 0)
1951 Current
= Current
->getParent();
1955 /// Templated helper to dereference successor \p SuccIdx of \p Block. Used by
1956 /// both the const and non-const operator* implementations.
1957 template <typename T1
> static T1
deref(T1 Block
, unsigned SuccIdx
) {
1958 if (auto *R
= dyn_cast
<VPRegionBlock
>(Block
)) {
1960 return R
->getEntry();
1964 // For exit blocks, use the next parent region with successors.
1965 return getBlockWithSuccs(Block
)->getSuccessors()[SuccIdx
];
1969 VPAllSuccessorsIterator(BlockPtrTy Block
, size_t Idx
= 0)
1970 : Block(Block
), SuccessorIdx(Idx
) {}
1971 VPAllSuccessorsIterator(const VPAllSuccessorsIterator
&Other
)
1972 : Block(Other
.Block
), SuccessorIdx(Other
.SuccessorIdx
) {}
1974 VPAllSuccessorsIterator
&operator=(const VPAllSuccessorsIterator
&R
) {
1976 SuccessorIdx
= R
.SuccessorIdx
;
1980 static VPAllSuccessorsIterator
end(BlockPtrTy Block
) {
1981 BlockPtrTy ParentWithSuccs
= getBlockWithSuccs(Block
);
1982 unsigned NumSuccessors
= ParentWithSuccs
1983 ? ParentWithSuccs
->getNumSuccessors()
1984 : Block
->getNumSuccessors();
1986 if (auto *R
= dyn_cast
<VPRegionBlock
>(Block
))
1987 return {R
, NumSuccessors
+ 1};
1988 return {Block
, NumSuccessors
};
1991 bool operator==(const VPAllSuccessorsIterator
&R
) const {
1992 return Block
== R
.Block
&& SuccessorIdx
== R
.SuccessorIdx
;
1995 const VPBlockBase
*operator*() const { return deref(Block
, SuccessorIdx
); }
1997 BlockPtrTy
operator*() { return deref(Block
, SuccessorIdx
); }
1999 VPAllSuccessorsIterator
&operator++() {
2004 VPAllSuccessorsIterator
operator++(int X
) {
2005 VPAllSuccessorsIterator Orig
= *this;
2011 /// Helper for GraphTraits specialization that traverses through VPRegionBlocks.
2012 template <typename BlockTy
> class VPBlockRecursiveTraversalWrapper
{
2016 VPBlockRecursiveTraversalWrapper(BlockTy Entry
) : Entry(Entry
) {}
2017 BlockTy
getEntry() { return Entry
; }
2020 /// GraphTraits specialization to recursively traverse VPBlockBase nodes,
2021 /// including traversing through VPRegionBlocks. Exit blocks of a region
2022 /// implicitly have their parent region's successors. This ensures all blocks in
2023 /// a region are visited before any blocks in a successor region when doing a
2024 /// reverse post-order traversal of the graph.
2026 struct GraphTraits
<VPBlockRecursiveTraversalWrapper
<VPBlockBase
*>> {
2027 using NodeRef
= VPBlockBase
*;
2028 using ChildIteratorType
= VPAllSuccessorsIterator
<VPBlockBase
*>;
2031 getEntryNode(VPBlockRecursiveTraversalWrapper
<VPBlockBase
*> N
) {
2032 return N
.getEntry();
2035 static inline ChildIteratorType
child_begin(NodeRef N
) {
2036 return ChildIteratorType(N
);
2039 static inline ChildIteratorType
child_end(NodeRef N
) {
2040 return ChildIteratorType::end(N
);
2045 struct GraphTraits
<VPBlockRecursiveTraversalWrapper
<const VPBlockBase
*>> {
2046 using NodeRef
= const VPBlockBase
*;
2047 using ChildIteratorType
= VPAllSuccessorsIterator
<const VPBlockBase
*>;
2050 getEntryNode(VPBlockRecursiveTraversalWrapper
<const VPBlockBase
*> N
) {
2051 return N
.getEntry();
2054 static inline ChildIteratorType
child_begin(NodeRef N
) {
2055 return ChildIteratorType(N
);
2058 static inline ChildIteratorType
child_end(NodeRef N
) {
2059 return ChildIteratorType::end(N
);
2063 /// VPlan models a candidate for vectorization, encoding various decisions take
2064 /// to produce efficient output IR, including which branches, basic-blocks and
2065 /// output IR instructions to generate, and their cost. VPlan holds a
2066 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2069 friend class VPlanPrinter
;
2070 friend class VPSlotTracker
;
2072 /// Hold the single entry to the Hierarchical CFG of the VPlan.
2075 /// Holds the VFs applicable to this VPlan.
2076 SmallSetVector
<ElementCount
, 2> VFs
;
2078 /// Holds the name of the VPlan, for printing.
2081 /// Holds all the external definitions created for this VPlan.
2082 // TODO: Introduce a specific representation for external definitions in
2083 // VPlan. External definitions must be immutable and hold a pointer to its
2084 // underlying IR that will be used to implement its structural comparison
2085 // (operators '==' and '<').
2086 SetVector
<VPValue
*> VPExternalDefs
;
2088 /// Represents the backedge taken count of the original loop, for folding
2090 VPValue
*BackedgeTakenCount
= nullptr;
2092 /// Holds a mapping between Values and their corresponding VPValue inside
2094 Value2VPValueTy Value2VPValue
;
2096 /// Contains all VPValues that been allocated by addVPValue directly and need
2097 /// to be free when the plan's destructor is called.
2098 SmallVector
<VPValue
*, 16> VPValuesToFree
;
2100 /// Holds the VPLoopInfo analysis for this VPlan.
2104 VPlan(VPBlockBase
*Entry
= nullptr) : Entry(Entry
) {
2106 Entry
->setPlan(this);
2112 for (VPBlockBase
*Block
: depth_first(Entry
))
2113 Block
->dropAllReferences(&DummyValue
);
2115 VPBlockBase::deleteCFG(Entry
);
2117 for (VPValue
*VPV
: VPValuesToFree
)
2119 if (BackedgeTakenCount
)
2120 delete BackedgeTakenCount
;
2121 for (VPValue
*Def
: VPExternalDefs
)
2125 /// Generate the IR code for this VPlan.
2126 void execute(struct VPTransformState
*State
);
2128 VPBlockBase
*getEntry() { return Entry
; }
2129 const VPBlockBase
*getEntry() const { return Entry
; }
2131 VPBlockBase
*setEntry(VPBlockBase
*Block
) {
2133 Block
->setPlan(this);
2137 /// The backedge taken count of the original loop.
2138 VPValue
*getOrCreateBackedgeTakenCount() {
2139 if (!BackedgeTakenCount
)
2140 BackedgeTakenCount
= new VPValue();
2141 return BackedgeTakenCount
;
2144 void addVF(ElementCount VF
) { VFs
.insert(VF
); }
2146 bool hasVF(ElementCount VF
) { return VFs
.count(VF
); }
2148 const std::string
&getName() const { return Name
; }
2150 void setName(const Twine
&newName
) { Name
= newName
.str(); }
2152 /// Add \p VPVal to the pool of external definitions if it's not already
2154 void addExternalDef(VPValue
*VPVal
) { VPExternalDefs
.insert(VPVal
); }
2156 void addVPValue(Value
*V
) {
2157 assert(V
&& "Trying to add a null Value to VPlan");
2158 assert(!Value2VPValue
.count(V
) && "Value already exists in VPlan");
2159 VPValue
*VPV
= new VPValue(V
);
2160 Value2VPValue
[V
] = VPV
;
2161 VPValuesToFree
.push_back(VPV
);
2164 void addVPValue(Value
*V
, VPValue
*VPV
) {
2165 assert(V
&& "Trying to add a null Value to VPlan");
2166 assert(!Value2VPValue
.count(V
) && "Value already exists in VPlan");
2167 Value2VPValue
[V
] = VPV
;
2170 VPValue
*getVPValue(Value
*V
) {
2171 assert(V
&& "Trying to get the VPValue of a null Value");
2172 assert(Value2VPValue
.count(V
) && "Value does not exist in VPlan");
2173 return Value2VPValue
[V
];
2176 VPValue
*getOrAddVPValue(Value
*V
) {
2177 assert(V
&& "Trying to get or add the VPValue of a null Value");
2178 if (!Value2VPValue
.count(V
))
2180 return getVPValue(V
);
2183 void removeVPValueFor(Value
*V
) { Value2VPValue
.erase(V
); }
2185 /// Return the VPLoopInfo analysis for this VPlan.
2186 VPLoopInfo
&getVPLoopInfo() { return VPLInfo
; }
2187 const VPLoopInfo
&getVPLoopInfo() const { return VPLInfo
; }
2189 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2190 /// Print this VPlan to \p O.
2191 void print(raw_ostream
&O
) const;
2193 /// Print this VPlan in DOT format to \p O.
2194 void printDOT(raw_ostream
&O
) const;
2196 /// Dump the plan to stderr (for debugging).
2197 LLVM_DUMP_METHOD
void dump() const;
2200 /// Returns a range mapping the values the range \p Operands to their
2201 /// corresponding VPValues.
2202 iterator_range
<mapped_iterator
<Use
*, std::function
<VPValue
*(Value
*)>>>
2203 mapToVPValues(User::op_range Operands
) {
2204 std::function
<VPValue
*(Value
*)> Fn
= [this](Value
*Op
) {
2205 return getOrAddVPValue(Op
);
2207 return map_range(Operands
, Fn
);
2211 /// Add to the given dominator tree the header block and every new basic block
2212 /// that was created between it and the latch block, inclusive.
2213 static void updateDominatorTree(DominatorTree
*DT
, BasicBlock
*LoopLatchBB
,
2214 BasicBlock
*LoopPreHeaderBB
,
2215 BasicBlock
*LoopExitBB
);
2218 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2219 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2220 /// indented and follows the dot format.
2221 class VPlanPrinter
{
2225 unsigned TabWidth
= 2;
2228 SmallDenseMap
<const VPBlockBase
*, unsigned> BlockID
;
2230 VPSlotTracker SlotTracker
;
2232 /// Handle indentation.
2233 void bumpIndent(int b
) { Indent
= std::string((Depth
+= b
) * TabWidth
, ' '); }
2235 /// Print a given \p Block of the Plan.
2236 void dumpBlock(const VPBlockBase
*Block
);
2238 /// Print the information related to the CFG edges going out of a given
2239 /// \p Block, followed by printing the successor blocks themselves.
2240 void dumpEdges(const VPBlockBase
*Block
);
2242 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2243 /// its successor blocks.
2244 void dumpBasicBlock(const VPBasicBlock
*BasicBlock
);
2246 /// Print a given \p Region of the Plan.
2247 void dumpRegion(const VPRegionBlock
*Region
);
2249 unsigned getOrCreateBID(const VPBlockBase
*Block
) {
2250 return BlockID
.count(Block
) ? BlockID
[Block
] : BlockID
[Block
] = BID
++;
2253 Twine
getOrCreateName(const VPBlockBase
*Block
);
2255 Twine
getUID(const VPBlockBase
*Block
);
2257 /// Print the information related to a CFG edge between two VPBlockBases.
2258 void drawEdge(const VPBlockBase
*From
, const VPBlockBase
*To
, bool Hidden
,
2259 const Twine
&Label
);
2262 VPlanPrinter(raw_ostream
&O
, const VPlan
&P
)
2263 : OS(O
), Plan(P
), SlotTracker(&P
) {}
2265 LLVM_DUMP_METHOD
void dump();
2268 struct VPlanIngredient
{
2271 VPlanIngredient(const Value
*V
) : V(V
) {}
2273 void print(raw_ostream
&O
) const;
2276 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VPlanIngredient
&I
) {
2281 inline raw_ostream
&operator<<(raw_ostream
&OS
, const VPlan
&Plan
) {
2287 //===----------------------------------------------------------------------===//
2289 //===----------------------------------------------------------------------===//
2291 /// Class that provides utilities for VPBlockBases in VPlan.
2292 class VPBlockUtils
{
2294 VPBlockUtils() = delete;
2296 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2297 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2298 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
2299 /// has more than one successor, its conditional bit is propagated to \p
2300 /// NewBlock. \p NewBlock must have neither successors nor predecessors.
2301 static void insertBlockAfter(VPBlockBase
*NewBlock
, VPBlockBase
*BlockPtr
) {
2302 assert(NewBlock
->getSuccessors().empty() &&
2303 "Can't insert new block with successors.");
2304 // TODO: move successors from BlockPtr to NewBlock when this functionality
2305 // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
2306 // already has successors.
2307 BlockPtr
->setOneSuccessor(NewBlock
);
2308 NewBlock
->setPredecessors({BlockPtr
});
2309 NewBlock
->setParent(BlockPtr
->getParent());
2312 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2313 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2314 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2315 /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
2316 /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
2317 /// must have neither successors nor predecessors.
2318 static void insertTwoBlocksAfter(VPBlockBase
*IfTrue
, VPBlockBase
*IfFalse
,
2319 VPValue
*Condition
, VPBlockBase
*BlockPtr
) {
2320 assert(IfTrue
->getSuccessors().empty() &&
2321 "Can't insert IfTrue with successors.");
2322 assert(IfFalse
->getSuccessors().empty() &&
2323 "Can't insert IfFalse with successors.");
2324 BlockPtr
->setTwoSuccessors(IfTrue
, IfFalse
, Condition
);
2325 IfTrue
->setPredecessors({BlockPtr
});
2326 IfFalse
->setPredecessors({BlockPtr
});
2327 IfTrue
->setParent(BlockPtr
->getParent());
2328 IfFalse
->setParent(BlockPtr
->getParent());
2331 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
2332 /// the successors of \p From and \p From to the predecessors of \p To. Both
2333 /// VPBlockBases must have the same parent, which can be null. Both
2334 /// VPBlockBases can be already connected to other VPBlockBases.
2335 static void connectBlocks(VPBlockBase
*From
, VPBlockBase
*To
) {
2336 assert((From
->getParent() == To
->getParent()) &&
2337 "Can't connect two block with different parents");
2338 assert(From
->getNumSuccessors() < 2 &&
2339 "Blocks can't have more than two successors.");
2340 From
->appendSuccessor(To
);
2341 To
->appendPredecessor(From
);
2344 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
2345 /// from the successors of \p From and \p From from the predecessors of \p To.
2346 static void disconnectBlocks(VPBlockBase
*From
, VPBlockBase
*To
) {
2347 assert(To
&& "Successor to disconnect is null.");
2348 From
->removeSuccessor(To
);
2349 To
->removePredecessor(From
);
2352 /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
2353 static bool isBackEdge(const VPBlockBase
*FromBlock
,
2354 const VPBlockBase
*ToBlock
, const VPLoopInfo
*VPLI
) {
2355 assert(FromBlock
->getParent() == ToBlock
->getParent() &&
2356 FromBlock
->getParent() && "Must be in same region");
2357 const VPLoop
*FromLoop
= VPLI
->getLoopFor(FromBlock
);
2358 const VPLoop
*ToLoop
= VPLI
->getLoopFor(ToBlock
);
2359 if (!FromLoop
|| !ToLoop
|| FromLoop
!= ToLoop
)
2362 // A back-edge is a branch from the loop latch to its header.
2363 return ToLoop
->isLoopLatch(FromBlock
) && ToBlock
== ToLoop
->getHeader();
2366 /// Returns true if \p Block is a loop latch
2367 static bool blockIsLoopLatch(const VPBlockBase
*Block
,
2368 const VPLoopInfo
*VPLInfo
) {
2369 if (const VPLoop
*ParentVPL
= VPLInfo
->getLoopFor(Block
))
2370 return ParentVPL
->isLoopLatch(Block
);
2375 /// Count and return the number of succesors of \p PredBlock excluding any
2377 static unsigned countSuccessorsNoBE(VPBlockBase
*PredBlock
,
2380 for (VPBlockBase
*SuccBlock
: PredBlock
->getSuccessors()) {
2381 if (!VPBlockUtils::isBackEdge(PredBlock
, SuccBlock
, VPLI
))
2387 /// Return an iterator range over \p Range which only includes \p BlockTy
2388 /// blocks. The accesses are casted to \p BlockTy.
2389 template <typename BlockTy
, typename T
>
2390 static auto blocksOnly(const T
&Range
) {
2391 // Create BaseTy with correct const-ness based on BlockTy.
2393 typename
std::conditional
<std::is_const
<BlockTy
>::value
,
2394 const VPBlockBase
, VPBlockBase
>::type
;
2396 // We need to first create an iterator range over (const) BlocktTy & instead
2397 // of (const) BlockTy * for filter_range to work properly.
2399 map_range(Range
, [](BaseTy
*Block
) -> BaseTy
& { return *Block
; });
2400 auto Filter
= make_filter_range(
2401 Mapped
, [](BaseTy
&Block
) { return isa
<BlockTy
>(&Block
); });
2402 return map_range(Filter
, [](BaseTy
&Block
) -> BlockTy
* {
2403 return cast
<BlockTy
>(&Block
);
2408 class VPInterleavedAccessInfo
{
2409 DenseMap
<VPInstruction
*, InterleaveGroup
<VPInstruction
> *>
2412 /// Type for mapping of instruction based interleave groups to VPInstruction
2413 /// interleave groups
2414 using Old2NewTy
= DenseMap
<InterleaveGroup
<Instruction
> *,
2415 InterleaveGroup
<VPInstruction
> *>;
2417 /// Recursively \p Region and populate VPlan based interleave groups based on
2419 void visitRegion(VPRegionBlock
*Region
, Old2NewTy
&Old2New
,
2420 InterleavedAccessInfo
&IAI
);
2421 /// Recursively traverse \p Block and populate VPlan based interleave groups
2422 /// based on \p IAI.
2423 void visitBlock(VPBlockBase
*Block
, Old2NewTy
&Old2New
,
2424 InterleavedAccessInfo
&IAI
);
2427 VPInterleavedAccessInfo(VPlan
&Plan
, InterleavedAccessInfo
&IAI
);
2429 ~VPInterleavedAccessInfo() {
2430 SmallPtrSet
<InterleaveGroup
<VPInstruction
> *, 4> DelSet
;
2431 // Avoid releasing a pointer twice.
2432 for (auto &I
: InterleaveGroupMap
)
2433 DelSet
.insert(I
.second
);
2434 for (auto *Ptr
: DelSet
)
2438 /// Get the interleave group that \p Instr belongs to.
2440 /// \returns nullptr if doesn't have such group.
2441 InterleaveGroup
<VPInstruction
> *
2442 getInterleaveGroup(VPInstruction
*Instr
) const {
2443 return InterleaveGroupMap
.lookup(Instr
);
2447 /// Class that maps (parts of) an existing VPlan to trees of combined
2450 enum class OpMode
{ Failed
, Load
, Opcode
};
2452 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2454 struct BundleDenseMapInfo
{
2455 static SmallVector
<VPValue
*, 4> getEmptyKey() {
2456 return {reinterpret_cast<VPValue
*>(-1)};
2459 static SmallVector
<VPValue
*, 4> getTombstoneKey() {
2460 return {reinterpret_cast<VPValue
*>(-2)};
2463 static unsigned getHashValue(const SmallVector
<VPValue
*, 4> &V
) {
2464 return static_cast<unsigned>(hash_combine_range(V
.begin(), V
.end()));
2467 static bool isEqual(const SmallVector
<VPValue
*, 4> &LHS
,
2468 const SmallVector
<VPValue
*, 4> &RHS
) {
2473 /// Mapping of values in the original VPlan to a combined VPInstruction.
2474 DenseMap
<SmallVector
<VPValue
*, 4>, VPInstruction
*, BundleDenseMapInfo
>
2477 VPInterleavedAccessInfo
&IAI
;
2479 /// Basic block to operate on. For now, only instructions in a single BB are
2481 const VPBasicBlock
&BB
;
2483 /// Indicates whether we managed to combine all visited instructions or not.
2484 bool CompletelySLP
= true;
2486 /// Width of the widest combined bundle in bits.
2487 unsigned WidestBundleBits
= 0;
2489 using MultiNodeOpTy
=
2490 typename
std::pair
<VPInstruction
*, SmallVector
<VPValue
*, 4>>;
2492 // Input operand bundles for the current multi node. Each multi node operand
2493 // bundle contains values not matching the multi node's opcode. They will
2494 // be reordered in reorderMultiNodeOps, once we completed building a
2496 SmallVector
<MultiNodeOpTy
, 4> MultiNodeOps
;
2498 /// Indicates whether we are building a multi node currently.
2499 bool MultiNodeActive
= false;
2501 /// Check if we can vectorize Operands together.
2502 bool areVectorizable(ArrayRef
<VPValue
*> Operands
) const;
2504 /// Add combined instruction \p New for the bundle \p Operands.
2505 void addCombined(ArrayRef
<VPValue
*> Operands
, VPInstruction
*New
);
2507 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2508 VPInstruction
*markFailed();
2510 /// Reorder operands in the multi node to maximize sequential memory access
2511 /// and commutative operations.
2512 SmallVector
<MultiNodeOpTy
, 4> reorderMultiNodeOps();
2514 /// Choose the best candidate to use for the lane after \p Last. The set of
2515 /// candidates to choose from are values with an opcode matching \p Last's
2516 /// or loads consecutive to \p Last.
2517 std::pair
<OpMode
, VPValue
*> getBest(OpMode Mode
, VPValue
*Last
,
2518 SmallPtrSetImpl
<VPValue
*> &Candidates
,
2519 VPInterleavedAccessInfo
&IAI
);
2521 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2522 /// Print bundle \p Values to dbgs().
2523 void dumpBundle(ArrayRef
<VPValue
*> Values
);
2527 VPlanSlp(VPInterleavedAccessInfo
&IAI
, VPBasicBlock
&BB
) : IAI(IAI
), BB(BB
) {}
2529 ~VPlanSlp() = default;
2531 /// Tries to build an SLP tree rooted at \p Operands and returns a
2532 /// VPInstruction combining \p Operands, if they can be combined.
2533 VPInstruction
*buildGraph(ArrayRef
<VPValue
*> Operands
);
2535 /// Return the width of the widest combined bundle in bits.
2536 unsigned getWidestBundleBits() const { return WidestBundleBits
; }
2538 /// Return true if all visited instruction can be combined.
2539 bool isCompletelySLP() const { return CompletelySLP
; }
2541 } // end namespace llvm
2543 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H