1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
17 //===----------------------------------------------------------------------===//
21 #include "VPlanDominatorTree.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/GenericDomTreeConstruction.h"
39 #include "llvm/Support/GraphWriter.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/Transforms/Utils/LoopVersioning.h"
43 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
51 extern cl::opt
<bool> EnableVPlanNativePath
;
54 #define DEBUG_TYPE "vplan"
56 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
57 raw_ostream
&llvm::operator<<(raw_ostream
&OS
, const VPValue
&V
) {
58 const VPInstruction
*Instr
= dyn_cast
<VPInstruction
>(&V
);
59 VPSlotTracker
SlotTracker(
60 (Instr
&& Instr
->getParent()) ? Instr
->getParent()->getPlan() : nullptr);
61 V
.print(OS
, SlotTracker
);
66 Value
*VPLane::getAsRuntimeExpr(IRBuilderBase
&Builder
,
67 const ElementCount
&VF
) const {
69 case VPLane::Kind::ScalableLast
:
70 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
71 return Builder
.CreateSub(getRuntimeVF(Builder
, Builder
.getInt32Ty(), VF
),
72 Builder
.getInt32(VF
.getKnownMinValue() - Lane
));
73 case VPLane::Kind::First
:
74 return Builder
.getInt32(Lane
);
76 llvm_unreachable("Unknown lane kind");
79 VPValue::VPValue(const unsigned char SC
, Value
*UV
, VPDef
*Def
)
80 : SubclassID(SC
), UnderlyingVal(UV
), Def(Def
) {
82 Def
->addDefinedValue(this);
86 assert(Users
.empty() && "trying to delete a VPValue with remaining users");
88 Def
->removeDefinedValue(this);
91 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
92 void VPValue::print(raw_ostream
&OS
, VPSlotTracker
&SlotTracker
) const {
93 if (const VPRecipeBase
*R
= dyn_cast_or_null
<VPRecipeBase
>(Def
))
94 R
->print(OS
, "", SlotTracker
);
96 printAsOperand(OS
, SlotTracker
);
99 void VPValue::dump() const {
100 const VPRecipeBase
*Instr
= dyn_cast_or_null
<VPRecipeBase
>(this->Def
);
101 VPSlotTracker
SlotTracker(
102 (Instr
&& Instr
->getParent()) ? Instr
->getParent()->getPlan() : nullptr);
103 print(dbgs(), SlotTracker
);
107 void VPDef::dump() const {
108 const VPRecipeBase
*Instr
= dyn_cast_or_null
<VPRecipeBase
>(this);
109 VPSlotTracker
SlotTracker(
110 (Instr
&& Instr
->getParent()) ? Instr
->getParent()->getPlan() : nullptr);
111 print(dbgs(), "", SlotTracker
);
116 VPRecipeBase
*VPValue::getDefiningRecipe() {
117 return cast_or_null
<VPRecipeBase
>(Def
);
120 const VPRecipeBase
*VPValue::getDefiningRecipe() const {
121 return cast_or_null
<VPRecipeBase
>(Def
);
124 // Get the top-most entry block of \p Start. This is the entry block of the
125 // containing VPlan. This function is templated to support both const and non-const blocks
126 template <typename T
> static T
*getPlanEntry(T
*Start
) {
129 while ((Next
= Next
->getParent()))
132 SmallSetVector
<T
*, 8> WorkList
;
133 WorkList
.insert(Current
);
135 for (unsigned i
= 0; i
< WorkList
.size(); i
++) {
136 T
*Current
= WorkList
[i
];
137 if (Current
->getNumPredecessors() == 0)
139 auto &Predecessors
= Current
->getPredecessors();
140 WorkList
.insert(Predecessors
.begin(), Predecessors
.end());
143 llvm_unreachable("VPlan without any entry node without predecessors");
146 VPlan
*VPBlockBase::getPlan() { return getPlanEntry(this)->Plan
; }
148 const VPlan
*VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan
; }
150 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
151 const VPBasicBlock
*VPBlockBase::getEntryBasicBlock() const {
152 const VPBlockBase
*Block
= this;
153 while (const VPRegionBlock
*Region
= dyn_cast
<VPRegionBlock
>(Block
))
154 Block
= Region
->getEntry();
155 return cast
<VPBasicBlock
>(Block
);
158 VPBasicBlock
*VPBlockBase::getEntryBasicBlock() {
159 VPBlockBase
*Block
= this;
160 while (VPRegionBlock
*Region
= dyn_cast
<VPRegionBlock
>(Block
))
161 Block
= Region
->getEntry();
162 return cast
<VPBasicBlock
>(Block
);
165 void VPBlockBase::setPlan(VPlan
*ParentPlan
) {
167 (ParentPlan
->getEntry() == this || ParentPlan
->getPreheader() == this) &&
168 "Can only set plan on its entry or preheader block.");
172 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
173 const VPBasicBlock
*VPBlockBase::getExitingBasicBlock() const {
174 const VPBlockBase
*Block
= this;
175 while (const VPRegionBlock
*Region
= dyn_cast
<VPRegionBlock
>(Block
))
176 Block
= Region
->getExiting();
177 return cast
<VPBasicBlock
>(Block
);
180 VPBasicBlock
*VPBlockBase::getExitingBasicBlock() {
181 VPBlockBase
*Block
= this;
182 while (VPRegionBlock
*Region
= dyn_cast
<VPRegionBlock
>(Block
))
183 Block
= Region
->getExiting();
184 return cast
<VPBasicBlock
>(Block
);
187 VPBlockBase
*VPBlockBase::getEnclosingBlockWithSuccessors() {
188 if (!Successors
.empty() || !Parent
)
190 assert(Parent
->getExiting() == this &&
191 "Block w/o successors not the exiting block of its parent.");
192 return Parent
->getEnclosingBlockWithSuccessors();
195 VPBlockBase
*VPBlockBase::getEnclosingBlockWithPredecessors() {
196 if (!Predecessors
.empty() || !Parent
)
198 assert(Parent
->getEntry() == this &&
199 "Block w/o predecessors not the entry of its parent.");
200 return Parent
->getEnclosingBlockWithPredecessors();
203 void VPBlockBase::deleteCFG(VPBlockBase
*Entry
) {
204 for (VPBlockBase
*Block
: to_vector(vp_depth_first_shallow(Entry
)))
208 VPBasicBlock::iterator
VPBasicBlock::getFirstNonPhi() {
209 iterator It
= begin();
210 while (It
!= end() && It
->isPhi())
215 Value
*VPTransformState::get(VPValue
*Def
, const VPIteration
&Instance
) {
217 return Def
->getLiveInIRValue();
219 if (hasScalarValue(Def
, Instance
)) {
221 .PerPartScalars
[Def
][Instance
.Part
][Instance
.Lane
.mapToCacheIndex(VF
)];
224 assert(hasVectorValue(Def
, Instance
.Part
));
225 auto *VecPart
= Data
.PerPartOutput
[Def
][Instance
.Part
];
226 if (!VecPart
->getType()->isVectorTy()) {
227 assert(Instance
.Lane
.isFirstLane() && "cannot get lane > 0 for scalar");
230 // TODO: Cache created scalar values.
231 Value
*Lane
= Instance
.Lane
.getAsRuntimeExpr(Builder
, VF
);
232 auto *Extract
= Builder
.CreateExtractElement(VecPart
, Lane
);
233 // set(Def, Extract, Instance);
237 Value
*VPTransformState::get(VPValue
*Def
, unsigned Part
) {
238 // If Values have been set for this Def return the one relevant for \p Part.
239 if (hasVectorValue(Def
, Part
))
240 return Data
.PerPartOutput
[Def
][Part
];
242 auto GetBroadcastInstrs
= [this, Def
](Value
*V
) {
243 bool SafeToHoist
= Def
->isDefinedOutsideVectorRegions();
246 // Place the code for broadcasting invariant variables in the new preheader.
247 IRBuilder
<>::InsertPointGuard
Guard(Builder
);
249 BasicBlock
*LoopVectorPreHeader
= CFG
.VPBB2IRBB
[cast
<VPBasicBlock
>(
250 Plan
->getVectorLoopRegion()->getSinglePredecessor())];
251 if (LoopVectorPreHeader
)
252 Builder
.SetInsertPoint(LoopVectorPreHeader
->getTerminator());
255 // Place the code for broadcasting invariant variables in the new preheader.
256 // Broadcast the scalar into all locations in the vector.
257 Value
*Shuf
= Builder
.CreateVectorSplat(VF
, V
, "broadcast");
262 if (!hasScalarValue(Def
, {Part
, 0})) {
263 assert(Def
->isLiveIn() && "expected a live-in");
266 Value
*IRV
= Def
->getLiveInIRValue();
267 Value
*B
= GetBroadcastInstrs(IRV
);
272 Value
*ScalarValue
= get(Def
, {Part
, 0});
273 // If we aren't vectorizing, we can just copy the scalar map values over
274 // to the vector map.
276 set(Def
, ScalarValue
, Part
);
280 bool IsUniform
= vputils::isUniformAfterVectorization(Def
);
282 unsigned LastLane
= IsUniform
? 0 : VF
.getKnownMinValue() - 1;
283 // Check if there is a scalar value for the selected lane.
284 if (!hasScalarValue(Def
, {Part
, LastLane
})) {
285 // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
286 // VPExpandSCEVRecipes can also be uniform.
287 assert((isa
<VPWidenIntOrFpInductionRecipe
>(Def
->getDefiningRecipe()) ||
288 isa
<VPScalarIVStepsRecipe
>(Def
->getDefiningRecipe()) ||
289 isa
<VPExpandSCEVRecipe
>(Def
->getDefiningRecipe())) &&
290 "unexpected recipe found to be invariant");
295 auto *LastInst
= cast
<Instruction
>(get(Def
, {Part
, LastLane
}));
296 // Set the insert point after the last scalarized instruction or after the
297 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
298 // will directly follow the scalar definitions.
299 auto OldIP
= Builder
.saveIP();
301 isa
<PHINode
>(LastInst
)
302 ? BasicBlock::iterator(LastInst
->getParent()->getFirstNonPHI())
303 : std::next(BasicBlock::iterator(LastInst
));
304 Builder
.SetInsertPoint(&*NewIP
);
306 // However, if we are vectorizing, we need to construct the vector values.
307 // If the value is known to be uniform after vectorization, we can just
308 // broadcast the scalar value corresponding to lane zero for each unroll
309 // iteration. Otherwise, we construct the vector values using
310 // insertelement instructions. Since the resulting vectors are stored in
311 // State, we will only generate the insertelements once.
312 Value
*VectorValue
= nullptr;
314 VectorValue
= GetBroadcastInstrs(ScalarValue
);
315 set(Def
, VectorValue
, Part
);
317 // Initialize packing with insertelements to start from undef.
318 assert(!VF
.isScalable() && "VF is assumed to be non scalable.");
319 Value
*Undef
= PoisonValue::get(VectorType::get(LastInst
->getType(), VF
));
320 set(Def
, Undef
, Part
);
321 for (unsigned Lane
= 0; Lane
< VF
.getKnownMinValue(); ++Lane
)
322 packScalarIntoVectorValue(Def
, {Part
, Lane
});
323 VectorValue
= get(Def
, Part
);
325 Builder
.restoreIP(OldIP
);
329 BasicBlock
*VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase
*R
) {
330 VPRegionBlock
*LoopRegion
= R
->getParent()->getEnclosingLoopRegion();
331 return VPBB2IRBB
[LoopRegion
->getPreheaderVPBB()];
334 void VPTransformState::addNewMetadata(Instruction
*To
,
335 const Instruction
*Orig
) {
336 // If the loop was versioned with memchecks, add the corresponding no-alias
338 if (LVer
&& (isa
<LoadInst
>(Orig
) || isa
<StoreInst
>(Orig
)))
339 LVer
->annotateInstWithNoAlias(To
, Orig
);
342 void VPTransformState::addMetadata(Instruction
*To
, Instruction
*From
) {
343 // No source instruction to transfer metadata from?
347 propagateMetadata(To
, From
);
348 addNewMetadata(To
, From
);
351 void VPTransformState::addMetadata(ArrayRef
<Value
*> To
, Instruction
*From
) {
352 // No source instruction to transfer metadata from?
356 for (Value
*V
: To
) {
357 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
358 addMetadata(I
, From
);
362 void VPTransformState::setDebugLocFrom(DebugLoc DL
) {
363 const DILocation
*DIL
= DL
;
364 // When a FSDiscriminator is enabled, we don't need to add the multiply
365 // factors to the discriminators.
367 Builder
.GetInsertBlock()
369 ->shouldEmitDebugInfoForProfiling() &&
370 !EnableFSDiscriminator
) {
371 // FIXME: For scalable vectors, assume vscale=1.
373 DIL
->cloneByMultiplyingDuplicationFactor(UF
* VF
.getKnownMinValue());
375 Builder
.SetCurrentDebugLocation(*NewDIL
);
377 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
378 << DIL
->getFilename() << " Line: " << DIL
->getLine());
380 Builder
.SetCurrentDebugLocation(DIL
);
383 void VPTransformState::packScalarIntoVectorValue(VPValue
*Def
,
384 const VPIteration
&Instance
) {
385 Value
*ScalarInst
= get(Def
, Instance
);
386 Value
*VectorValue
= get(Def
, Instance
.Part
);
387 VectorValue
= Builder
.CreateInsertElement(
388 VectorValue
, ScalarInst
, Instance
.Lane
.getAsRuntimeExpr(Builder
, VF
));
389 set(Def
, VectorValue
, Instance
.Part
);
393 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState
&CFG
) {
394 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
395 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
396 BasicBlock
*PrevBB
= CFG
.PrevBB
;
397 BasicBlock
*NewBB
= BasicBlock::Create(PrevBB
->getContext(), getName(),
398 PrevBB
->getParent(), CFG
.ExitBB
);
399 LLVM_DEBUG(dbgs() << "LV: created " << NewBB
->getName() << '\n');
401 // Hook up the new basic block to its predecessors.
402 for (VPBlockBase
*PredVPBlock
: getHierarchicalPredecessors()) {
403 VPBasicBlock
*PredVPBB
= PredVPBlock
->getExitingBasicBlock();
404 auto &PredVPSuccessors
= PredVPBB
->getHierarchicalSuccessors();
405 BasicBlock
*PredBB
= CFG
.VPBB2IRBB
[PredVPBB
];
407 assert(PredBB
&& "Predecessor basic-block not found building successor.");
408 auto *PredBBTerminator
= PredBB
->getTerminator();
409 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB
->getName() << '\n');
411 auto *TermBr
= dyn_cast
<BranchInst
>(PredBBTerminator
);
412 if (isa
<UnreachableInst
>(PredBBTerminator
)) {
413 assert(PredVPSuccessors
.size() == 1 &&
414 "Predecessor ending w/o branch must have single successor.");
415 DebugLoc DL
= PredBBTerminator
->getDebugLoc();
416 PredBBTerminator
->eraseFromParent();
417 auto *Br
= BranchInst::Create(NewBB
, PredBB
);
419 } else if (TermBr
&& !TermBr
->isConditional()) {
420 TermBr
->setSuccessor(0, NewBB
);
422 // Set each forward successor here when it is created, excluding
423 // backedges. A backward successor is set when the branch is created.
424 unsigned idx
= PredVPSuccessors
.front() == this ? 0 : 1;
425 assert(!TermBr
->getSuccessor(idx
) &&
426 "Trying to reset an existing successor block.");
427 TermBr
->setSuccessor(idx
, NewBB
);
433 void VPBasicBlock::execute(VPTransformState
*State
) {
434 bool Replica
= State
->Instance
&& !State
->Instance
->isFirstIteration();
435 VPBasicBlock
*PrevVPBB
= State
->CFG
.PrevVPBB
;
436 VPBlockBase
*SingleHPred
= nullptr;
437 BasicBlock
*NewBB
= State
->CFG
.PrevBB
; // Reuse it if possible.
439 auto IsLoopRegion
= [](VPBlockBase
*BB
) {
440 auto *R
= dyn_cast
<VPRegionBlock
>(BB
);
441 return R
&& !R
->isReplicator();
444 // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
445 if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
446 // ExitBB can be re-used for the exit block of the Plan.
447 NewBB
= State
->CFG
.ExitBB
;
448 State
->CFG
.PrevBB
= NewBB
;
449 State
->Builder
.SetInsertPoint(NewBB
->getFirstNonPHI());
451 // Update the branch instruction in the predecessor to branch to ExitBB.
452 VPBlockBase
*PredVPB
= getSingleHierarchicalPredecessor();
453 VPBasicBlock
*ExitingVPBB
= PredVPB
->getExitingBasicBlock();
454 assert(PredVPB
->getSingleSuccessor() == this &&
455 "predecessor must have the current block as only successor");
456 BasicBlock
*ExitingBB
= State
->CFG
.VPBB2IRBB
[ExitingVPBB
];
457 // The Exit block of a loop is always set to be successor 0 of the Exiting
459 cast
<BranchInst
>(ExitingBB
->getTerminator())->setSuccessor(0, NewBB
);
460 } else if (PrevVPBB
&& /* A */
461 !((SingleHPred
= getSingleHierarchicalPredecessor()) &&
462 SingleHPred
->getExitingBasicBlock() == PrevVPBB
&&
463 PrevVPBB
->getSingleHierarchicalSuccessor() &&
464 (SingleHPred
->getParent() == getEnclosingLoopRegion() &&
465 !IsLoopRegion(SingleHPred
))) && /* B */
466 !(Replica
&& getPredecessors().empty())) { /* C */
467 // The last IR basic block is reused, as an optimization, in three cases:
468 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
469 // B. when the current VPBB has a single (hierarchical) predecessor which
470 // is PrevVPBB and the latter has a single (hierarchical) successor which
471 // both are in the same non-replicator region; and
472 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
473 // is the exiting VPBB of this region from a previous instance, or the
474 // predecessor of this region.
476 NewBB
= createEmptyBasicBlock(State
->CFG
);
477 State
->Builder
.SetInsertPoint(NewBB
);
478 // Temporarily terminate with unreachable until CFG is rewired.
479 UnreachableInst
*Terminator
= State
->Builder
.CreateUnreachable();
480 // Register NewBB in its loop. In innermost loops its the same for all
482 if (State
->CurrentVectorLoop
)
483 State
->CurrentVectorLoop
->addBasicBlockToLoop(NewBB
, *State
->LI
);
484 State
->Builder
.SetInsertPoint(Terminator
);
485 State
->CFG
.PrevBB
= NewBB
;
488 // 2. Fill the IR basic block with IR instructions.
489 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
490 << " in BB:" << NewBB
->getName() << '\n');
492 State
->CFG
.VPBB2IRBB
[this] = NewBB
;
493 State
->CFG
.PrevVPBB
= this;
495 for (VPRecipeBase
&Recipe
: Recipes
)
496 Recipe
.execute(*State
);
498 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB
);
501 void VPBasicBlock::dropAllReferences(VPValue
*NewValue
) {
502 for (VPRecipeBase
&R
: Recipes
) {
503 for (auto *Def
: R
.definedValues())
504 Def
->replaceAllUsesWith(NewValue
);
506 for (unsigned I
= 0, E
= R
.getNumOperands(); I
!= E
; I
++)
507 R
.setOperand(I
, NewValue
);
511 VPBasicBlock
*VPBasicBlock::splitAt(iterator SplitAt
) {
512 assert((SplitAt
== end() || SplitAt
->getParent() == this) &&
513 "can only split at a position in the same block");
515 SmallVector
<VPBlockBase
*, 2> Succs(successors());
516 // First, disconnect the current block from its successors.
517 for (VPBlockBase
*Succ
: Succs
)
518 VPBlockUtils::disconnectBlocks(this, Succ
);
520 // Create new empty block after the block to split.
521 auto *SplitBlock
= new VPBasicBlock(getName() + ".split");
522 VPBlockUtils::insertBlockAfter(SplitBlock
, this);
524 // Add successors for block to split to new block.
525 for (VPBlockBase
*Succ
: Succs
)
526 VPBlockUtils::connectBlocks(SplitBlock
, Succ
);
528 // Finally, move the recipes starting at SplitAt to new block.
529 for (VPRecipeBase
&ToMove
:
530 make_early_inc_range(make_range(SplitAt
, this->end())))
531 ToMove
.moveBefore(*SplitBlock
, SplitBlock
->end());
536 VPRegionBlock
*VPBasicBlock::getEnclosingLoopRegion() {
537 VPRegionBlock
*P
= getParent();
538 if (P
&& P
->isReplicator()) {
540 assert(!cast
<VPRegionBlock
>(P
)->isReplicator() &&
541 "unexpected nested replicate regions");
546 static bool hasConditionalTerminator(const VPBasicBlock
*VPBB
) {
549 VPBB
->getNumSuccessors() < 2 &&
550 "block with multiple successors doesn't have a recipe as terminator");
554 const VPRecipeBase
*R
= &VPBB
->back();
555 auto *VPI
= dyn_cast
<VPInstruction
>(R
);
557 isa
<VPBranchOnMaskRecipe
>(R
) ||
558 (VPI
&& (VPI
->getOpcode() == VPInstruction::BranchOnCond
||
559 VPI
->getOpcode() == VPInstruction::BranchOnCount
));
562 if (VPBB
->getNumSuccessors() >= 2 || VPBB
->isExiting()) {
563 assert(IsCondBranch
&& "block with multiple successors not terminated by "
564 "conditional branch recipe");
571 "block with 0 or 1 successors terminated by conditional branch recipe");
575 VPRecipeBase
*VPBasicBlock::getTerminator() {
576 if (hasConditionalTerminator(this))
581 const VPRecipeBase
*VPBasicBlock::getTerminator() const {
582 if (hasConditionalTerminator(this))
587 bool VPBasicBlock::isExiting() const {
588 return getParent()->getExitingBasicBlock() == this;
591 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
592 void VPBlockBase::printSuccessors(raw_ostream
&O
, const Twine
&Indent
) const {
593 if (getSuccessors().empty()) {
594 O
<< Indent
<< "No successors\n";
596 O
<< Indent
<< "Successor(s): ";
598 for (auto *Succ
: getSuccessors())
599 O
<< LS
<< Succ
->getName();
604 void VPBasicBlock::print(raw_ostream
&O
, const Twine
&Indent
,
605 VPSlotTracker
&SlotTracker
) const {
606 O
<< Indent
<< getName() << ":\n";
608 auto RecipeIndent
= Indent
+ " ";
609 for (const VPRecipeBase
&Recipe
: *this) {
610 Recipe
.print(O
, RecipeIndent
, SlotTracker
);
614 printSuccessors(O
, Indent
);
618 void VPRegionBlock::dropAllReferences(VPValue
*NewValue
) {
619 for (VPBlockBase
*Block
: vp_depth_first_shallow(Entry
))
620 // Drop all references in VPBasicBlocks and replace all uses with
622 Block
->dropAllReferences(NewValue
);
625 void VPRegionBlock::execute(VPTransformState
*State
) {
626 ReversePostOrderTraversal
<VPBlockShallowTraversalWrapper
<VPBlockBase
*>>
629 if (!isReplicator()) {
630 // Create and register the new vector loop.
631 Loop
*PrevLoop
= State
->CurrentVectorLoop
;
632 State
->CurrentVectorLoop
= State
->LI
->AllocateLoop();
633 BasicBlock
*VectorPH
= State
->CFG
.VPBB2IRBB
[getPreheaderVPBB()];
634 Loop
*ParentLoop
= State
->LI
->getLoopFor(VectorPH
);
636 // Insert the new loop into the loop nest and register the new basic blocks
637 // before calling any utilities such as SCEV that require valid LoopInfo.
639 ParentLoop
->addChildLoop(State
->CurrentVectorLoop
);
641 State
->LI
->addTopLevelLoop(State
->CurrentVectorLoop
);
643 // Visit the VPBlocks connected to "this", starting from it.
644 for (VPBlockBase
*Block
: RPOT
) {
645 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block
->getName() << '\n');
646 Block
->execute(State
);
649 State
->CurrentVectorLoop
= PrevLoop
;
653 assert(!State
->Instance
&& "Replicating a Region with non-null instance.");
655 // Enter replicating mode.
656 State
->Instance
= VPIteration(0, 0);
658 for (unsigned Part
= 0, UF
= State
->UF
; Part
< UF
; ++Part
) {
659 State
->Instance
->Part
= Part
;
660 assert(!State
->VF
.isScalable() && "VF is assumed to be non scalable.");
661 for (unsigned Lane
= 0, VF
= State
->VF
.getKnownMinValue(); Lane
< VF
;
663 State
->Instance
->Lane
= VPLane(Lane
, VPLane::Kind::First
);
664 // Visit the VPBlocks connected to \p this, starting from it.
665 for (VPBlockBase
*Block
: RPOT
) {
666 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block
->getName() << '\n');
667 Block
->execute(State
);
672 // Exit replicating mode.
673 State
->Instance
.reset();
676 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
677 void VPRegionBlock::print(raw_ostream
&O
, const Twine
&Indent
,
678 VPSlotTracker
&SlotTracker
) const {
679 O
<< Indent
<< (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
680 auto NewIndent
= Indent
+ " ";
681 for (auto *BlockBase
: vp_depth_first_shallow(Entry
)) {
683 BlockBase
->print(O
, NewIndent
, SlotTracker
);
685 O
<< Indent
<< "}\n";
687 printSuccessors(O
, Indent
);
692 for (auto &KV
: LiveOuts
)
698 for (VPBlockBase
*Block
: vp_depth_first_shallow(Entry
))
699 Block
->dropAllReferences(&DummyValue
);
701 VPBlockBase::deleteCFG(Entry
);
703 Preheader
->dropAllReferences(&DummyValue
);
706 for (VPValue
*VPV
: VPLiveInsToFree
)
708 if (BackedgeTakenCount
)
709 delete BackedgeTakenCount
;
712 VPlanPtr
VPlan::createInitialVPlan(const SCEV
*TripCount
, ScalarEvolution
&SE
) {
713 VPBasicBlock
*Preheader
= new VPBasicBlock("ph");
714 VPBasicBlock
*VecPreheader
= new VPBasicBlock("vector.ph");
715 auto Plan
= std::make_unique
<VPlan
>(Preheader
, VecPreheader
);
717 vputils::getOrCreateVPValueForSCEVExpr(*Plan
, TripCount
, SE
);
718 // Create empty VPRegionBlock, to be filled during processing later.
719 auto *TopRegion
= new VPRegionBlock("vector loop", false /*isReplicator*/);
720 VPBlockUtils::insertBlockAfter(TopRegion
, VecPreheader
);
721 VPBasicBlock
*MiddleVPBB
= new VPBasicBlock("middle.block");
722 VPBlockUtils::insertBlockAfter(MiddleVPBB
, TopRegion
);
726 void VPlan::prepareToExecute(Value
*TripCountV
, Value
*VectorTripCountV
,
727 Value
*CanonicalIVStartValue
,
728 VPTransformState
&State
) {
729 // Check if the backedge taken count is needed, and if so build it.
730 if (BackedgeTakenCount
&& BackedgeTakenCount
->getNumUsers()) {
731 IRBuilder
<> Builder(State
.CFG
.PrevBB
->getTerminator());
732 auto *TCMO
= Builder
.CreateSub(TripCountV
,
733 ConstantInt::get(TripCountV
->getType(), 1),
734 "trip.count.minus.1");
737 VF
.isScalar() ? TCMO
: Builder
.CreateVectorSplat(VF
, TCMO
, "broadcast");
738 for (unsigned Part
= 0, UF
= State
.UF
; Part
< UF
; ++Part
)
739 State
.set(BackedgeTakenCount
, VTCMO
, Part
);
742 for (unsigned Part
= 0, UF
= State
.UF
; Part
< UF
; ++Part
)
743 State
.set(&VectorTripCount
, VectorTripCountV
, Part
);
745 IRBuilder
<> Builder(State
.CFG
.PrevBB
->getTerminator());
746 // FIXME: Model VF * UF computation completely in VPlan.
748 createStepForVF(Builder
, TripCountV
->getType(), State
.VF
, State
.UF
),
751 // When vectorizing the epilogue loop, the canonical induction start value
752 // needs to be changed from zero to the value after the main vector loop.
753 // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
754 if (CanonicalIVStartValue
) {
755 VPValue
*VPV
= getVPValueOrAddLiveIn(CanonicalIVStartValue
);
756 auto *IV
= getCanonicalIV();
757 assert(all_of(IV
->users(),
758 [](const VPUser
*U
) {
759 return isa
<VPScalarIVStepsRecipe
>(U
) ||
760 isa
<VPDerivedIVRecipe
>(U
) ||
761 cast
<VPInstruction
>(U
)->getOpcode() ==
764 "the canonical IV should only be used by its increment or "
765 "ScalarIVSteps when resetting the start value");
766 IV
->setOperand(0, VPV
);
770 /// Generate the code inside the preheader and body of the vectorized loop.
771 /// Assumes a single pre-header basic-block was created for this. Introduce
772 /// additional basic-blocks as needed, and fill them all.
773 void VPlan::execute(VPTransformState
*State
) {
774 // Set the reverse mapping from VPValues to Values for code generation.
775 for (auto &Entry
: Value2VPValue
)
776 State
->VPValue2Value
[Entry
.second
] = Entry
.first
;
778 // Initialize CFG state.
779 State
->CFG
.PrevVPBB
= nullptr;
780 State
->CFG
.ExitBB
= State
->CFG
.PrevBB
->getSingleSuccessor();
781 BasicBlock
*VectorPreHeader
= State
->CFG
.PrevBB
;
782 State
->Builder
.SetInsertPoint(VectorPreHeader
->getTerminator());
784 // Generate code in the loop pre-header and body.
785 for (VPBlockBase
*Block
: vp_depth_first_shallow(Entry
))
786 Block
->execute(State
);
788 VPBasicBlock
*LatchVPBB
= getVectorLoopRegion()->getExitingBasicBlock();
789 BasicBlock
*VectorLatchBB
= State
->CFG
.VPBB2IRBB
[LatchVPBB
];
791 // Fix the latch value of canonical, reduction and first-order recurrences
792 // phis in the vector loop.
793 VPBasicBlock
*Header
= getVectorLoopRegion()->getEntryBasicBlock();
794 for (VPRecipeBase
&R
: Header
->phis()) {
795 // Skip phi-like recipes that generate their backedege values themselves.
796 if (isa
<VPWidenPHIRecipe
>(&R
))
799 if (isa
<VPWidenPointerInductionRecipe
>(&R
) ||
800 isa
<VPWidenIntOrFpInductionRecipe
>(&R
)) {
801 PHINode
*Phi
= nullptr;
802 if (isa
<VPWidenIntOrFpInductionRecipe
>(&R
)) {
803 Phi
= cast
<PHINode
>(State
->get(R
.getVPSingleValue(), 0));
805 auto *WidenPhi
= cast
<VPWidenPointerInductionRecipe
>(&R
);
806 // TODO: Split off the case that all users of a pointer phi are scalar
807 // from the VPWidenPointerInductionRecipe.
808 if (WidenPhi
->onlyScalarsGenerated(State
->VF
))
811 auto *GEP
= cast
<GetElementPtrInst
>(State
->get(WidenPhi
, 0));
812 Phi
= cast
<PHINode
>(GEP
->getPointerOperand());
815 Phi
->setIncomingBlock(1, VectorLatchBB
);
817 // Move the last step to the end of the latch block. This ensures
818 // consistent placement of all induction updates.
819 Instruction
*Inc
= cast
<Instruction
>(Phi
->getIncomingValue(1));
820 Inc
->moveBefore(VectorLatchBB
->getTerminator()->getPrevNode());
824 auto *PhiR
= cast
<VPHeaderPHIRecipe
>(&R
);
825 // For canonical IV, first-order recurrences and in-order reduction phis,
826 // only a single part is generated, which provides the last part from the
827 // previous iteration. For non-ordered reductions all UF parts are
829 bool SinglePartNeeded
= isa
<VPCanonicalIVPHIRecipe
>(PhiR
) ||
830 isa
<VPFirstOrderRecurrencePHIRecipe
>(PhiR
) ||
831 (isa
<VPReductionPHIRecipe
>(PhiR
) &&
832 cast
<VPReductionPHIRecipe
>(PhiR
)->isOrdered());
833 unsigned LastPartForNewPhi
= SinglePartNeeded
? 1 : State
->UF
;
835 for (unsigned Part
= 0; Part
< LastPartForNewPhi
; ++Part
) {
836 Value
*Phi
= State
->get(PhiR
, Part
);
837 Value
*Val
= State
->get(PhiR
->getBackedgeValue(),
838 SinglePartNeeded
? State
->UF
- 1 : Part
);
839 cast
<PHINode
>(Phi
)->addIncoming(Val
, VectorLatchBB
);
843 // We do not attempt to preserve DT for outer loop vectorization currently.
844 if (!EnableVPlanNativePath
) {
845 BasicBlock
*VectorHeaderBB
= State
->CFG
.VPBB2IRBB
[Header
];
846 State
->DT
->addNewBlock(VectorHeaderBB
, VectorPreHeader
);
847 updateDominatorTree(State
->DT
, VectorHeaderBB
, VectorLatchBB
,
852 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
853 void VPlan::printLiveIns(raw_ostream
&O
) const {
854 VPSlotTracker
SlotTracker(this);
856 if (VFxUF
.getNumUsers() > 0) {
858 VFxUF
.printAsOperand(O
, SlotTracker
);
862 if (VectorTripCount
.getNumUsers() > 0) {
864 VectorTripCount
.printAsOperand(O
, SlotTracker
);
865 O
<< " = vector-trip-count";
868 if (BackedgeTakenCount
&& BackedgeTakenCount
->getNumUsers()) {
870 BackedgeTakenCount
->printAsOperand(O
, SlotTracker
);
871 O
<< " = backedge-taken count";
875 if (TripCount
->isLiveIn())
877 TripCount
->printAsOperand(O
, SlotTracker
);
878 O
<< " = original trip-count";
883 void VPlan::print(raw_ostream
&O
) const {
884 VPSlotTracker
SlotTracker(this);
886 O
<< "VPlan '" << getName() << "' {";
890 if (!getPreheader()->empty()) {
892 getPreheader()->print(O
, "", SlotTracker
);
895 for (const VPBlockBase
*Block
: vp_depth_first_shallow(getEntry())) {
897 Block
->print(O
, "", SlotTracker
);
900 if (!LiveOuts
.empty())
902 for (const auto &KV
: LiveOuts
) {
903 KV
.second
->print(O
, SlotTracker
);
909 std::string
VPlan::getName() const {
911 raw_string_ostream
RSO(Out
);
912 RSO
<< Name
<< " for ";
914 RSO
<< "VF={" << VFs
[0];
915 for (ElementCount VF
: drop_begin(VFs
))
923 RSO
<< "UF={" << UFs
[0];
924 for (unsigned UF
: drop_begin(UFs
))
933 void VPlan::printDOT(raw_ostream
&O
) const {
934 VPlanPrinter
Printer(O
, *this);
939 void VPlan::dump() const { print(dbgs()); }
942 void VPlan::addLiveOut(PHINode
*PN
, VPValue
*V
) {
943 assert(LiveOuts
.count(PN
) == 0 && "an exit value for PN already exists");
944 LiveOuts
.insert({PN
, new VPLiveOut(PN
, V
)});
947 void VPlan::updateDominatorTree(DominatorTree
*DT
, BasicBlock
*LoopHeaderBB
,
948 BasicBlock
*LoopLatchBB
,
949 BasicBlock
*LoopExitBB
) {
950 // The vector body may be more than a single basic-block by this point.
951 // Update the dominator tree information inside the vector body by propagating
952 // it from header to latch, expecting only triangular control-flow, if any.
953 BasicBlock
*PostDomSucc
= nullptr;
954 for (auto *BB
= LoopHeaderBB
; BB
!= LoopLatchBB
; BB
= PostDomSucc
) {
955 // Get the list of successors of this block.
956 std::vector
<BasicBlock
*> Succs(succ_begin(BB
), succ_end(BB
));
957 assert(Succs
.size() <= 2 &&
958 "Basic block in vector loop has more than 2 successors.");
959 PostDomSucc
= Succs
[0];
960 if (Succs
.size() == 1) {
961 assert(PostDomSucc
->getSinglePredecessor() &&
962 "PostDom successor has more than one predecessor.");
963 DT
->addNewBlock(PostDomSucc
, BB
);
966 BasicBlock
*InterimSucc
= Succs
[1];
967 if (PostDomSucc
->getSingleSuccessor() == InterimSucc
) {
968 PostDomSucc
= Succs
[1];
969 InterimSucc
= Succs
[0];
971 assert(InterimSucc
->getSingleSuccessor() == PostDomSucc
&&
972 "One successor of a basic block does not lead to the other.");
973 assert(InterimSucc
->getSinglePredecessor() &&
974 "Interim successor has more than one predecessor.");
975 assert(PostDomSucc
->hasNPredecessors(2) &&
976 "PostDom successor has more than two predecessors.");
977 DT
->addNewBlock(InterimSucc
, BB
);
978 DT
->addNewBlock(PostDomSucc
, BB
);
980 // Latch block is a new dominator for the loop exit.
981 DT
->changeImmediateDominator(LoopExitBB
, LoopLatchBB
);
982 assert(DT
->verify(DominatorTree::VerificationLevel::Fast
));
985 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
987 Twine
VPlanPrinter::getUID(const VPBlockBase
*Block
) {
988 return (isa
<VPRegionBlock
>(Block
) ? "cluster_N" : "N") +
989 Twine(getOrCreateBID(Block
));
992 Twine
VPlanPrinter::getOrCreateName(const VPBlockBase
*Block
) {
993 const std::string
&Name
= Block
->getName();
996 return "VPB" + Twine(getOrCreateBID(Block
));
999 void VPlanPrinter::dump() {
1002 OS
<< "digraph VPlan {\n";
1003 OS
<< "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1004 if (!Plan
.getName().empty())
1005 OS
<< "\\n" << DOT::EscapeString(Plan
.getName());
1010 raw_string_ostream
SS(Str
);
1011 Plan
.printLiveIns(SS
);
1012 SmallVector
<StringRef
, 0> Lines
;
1013 StringRef(Str
).rtrim('\n').split(Lines
, "\n");
1014 for (auto Line
: Lines
)
1015 OS
<< DOT::EscapeString(Line
.str()) << "\\n";
1019 OS
<< "node [shape=rect, fontname=Courier, fontsize=30]\n";
1020 OS
<< "edge [fontname=Courier, fontsize=30]\n";
1021 OS
<< "compound=true\n";
1023 dumpBlock(Plan
.getPreheader());
1025 for (const VPBlockBase
*Block
: vp_depth_first_shallow(Plan
.getEntry()))
1031 void VPlanPrinter::dumpBlock(const VPBlockBase
*Block
) {
1032 if (const VPBasicBlock
*BasicBlock
= dyn_cast
<VPBasicBlock
>(Block
))
1033 dumpBasicBlock(BasicBlock
);
1034 else if (const VPRegionBlock
*Region
= dyn_cast
<VPRegionBlock
>(Block
))
1037 llvm_unreachable("Unsupported kind of VPBlock.");
1040 void VPlanPrinter::drawEdge(const VPBlockBase
*From
, const VPBlockBase
*To
,
1041 bool Hidden
, const Twine
&Label
) {
1042 // Due to "dot" we print an edge between two regions as an edge between the
1043 // exiting basic block and the entry basic of the respective regions.
1044 const VPBlockBase
*Tail
= From
->getExitingBasicBlock();
1045 const VPBlockBase
*Head
= To
->getEntryBasicBlock();
1046 OS
<< Indent
<< getUID(Tail
) << " -> " << getUID(Head
);
1047 OS
<< " [ label=\"" << Label
<< '\"';
1049 OS
<< " ltail=" << getUID(From
);
1051 OS
<< " lhead=" << getUID(To
);
1053 OS
<< "; splines=none";
1057 void VPlanPrinter::dumpEdges(const VPBlockBase
*Block
) {
1058 auto &Successors
= Block
->getSuccessors();
1059 if (Successors
.size() == 1)
1060 drawEdge(Block
, Successors
.front(), false, "");
1061 else if (Successors
.size() == 2) {
1062 drawEdge(Block
, Successors
.front(), false, "T");
1063 drawEdge(Block
, Successors
.back(), false, "F");
1065 unsigned SuccessorNumber
= 0;
1066 for (auto *Successor
: Successors
)
1067 drawEdge(Block
, Successor
, false, Twine(SuccessorNumber
++));
1071 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock
*BasicBlock
) {
1072 // Implement dot-formatted dump by performing plain-text dump into the
1073 // temporary storage followed by some post-processing.
1074 OS
<< Indent
<< getUID(BasicBlock
) << " [label =\n";
1077 raw_string_ostream
SS(Str
);
1078 // Use no indentation as we need to wrap the lines into quotes ourselves.
1079 BasicBlock
->print(SS
, "", SlotTracker
);
1081 // We need to process each line of the output separately, so split
1082 // single-string plain-text dump.
1083 SmallVector
<StringRef
, 0> Lines
;
1084 StringRef(Str
).rtrim('\n').split(Lines
, "\n");
1086 auto EmitLine
= [&](StringRef Line
, StringRef Suffix
) {
1087 OS
<< Indent
<< '"' << DOT::EscapeString(Line
.str()) << "\\l\"" << Suffix
;
1090 // Don't need the "+" after the last line.
1091 for (auto Line
: make_range(Lines
.begin(), Lines
.end() - 1))
1092 EmitLine(Line
, " +\n");
1093 EmitLine(Lines
.back(), "\n");
1096 OS
<< Indent
<< "]\n";
1098 dumpEdges(BasicBlock
);
1101 void VPlanPrinter::dumpRegion(const VPRegionBlock
*Region
) {
1102 OS
<< Indent
<< "subgraph " << getUID(Region
) << " {\n";
1104 OS
<< Indent
<< "fontname=Courier\n"
1105 << Indent
<< "label=\""
1106 << DOT::EscapeString(Region
->isReplicator() ? "<xVFxUF> " : "<x1> ")
1107 << DOT::EscapeString(Region
->getName()) << "\"\n";
1108 // Dump the blocks of the region.
1109 assert(Region
->getEntry() && "Region contains no inner blocks.");
1110 for (const VPBlockBase
*Block
: vp_depth_first_shallow(Region
->getEntry()))
1113 OS
<< Indent
<< "}\n";
1117 void VPlanIngredient::print(raw_ostream
&O
) const {
1118 if (auto *Inst
= dyn_cast
<Instruction
>(V
)) {
1119 if (!Inst
->getType()->isVoidTy()) {
1120 Inst
->printAsOperand(O
, false);
1123 O
<< Inst
->getOpcodeName() << " ";
1124 unsigned E
= Inst
->getNumOperands();
1126 Inst
->getOperand(0)->printAsOperand(O
, false);
1127 for (unsigned I
= 1; I
< E
; ++I
)
1128 Inst
->getOperand(I
)->printAsOperand(O
<< ", ", false);
1131 V
->printAsOperand(O
, false);
1136 template void DomTreeBuilder::Calculate
<VPDominatorTree
>(VPDominatorTree
&DT
);
1138 void VPValue::replaceAllUsesWith(VPValue
*New
) {
1139 replaceUsesWithIf(New
, [](VPUser
&, unsigned) { return true; });
1142 void VPValue::replaceUsesWithIf(
1144 llvm::function_ref
<bool(VPUser
&U
, unsigned Idx
)> ShouldReplace
) {
1145 // Note that this early exit is required for correctness; the implementation
1146 // below relies on the number of users for this VPValue to decrease, which
1147 // isn't the case if this == New.
1151 for (unsigned J
= 0; J
< getNumUsers();) {
1152 VPUser
*User
= Users
[J
];
1153 bool RemovedUser
= false;
1154 for (unsigned I
= 0, E
= User
->getNumOperands(); I
< E
; ++I
) {
1155 if (User
->getOperand(I
) != this || !ShouldReplace(*User
, I
))
1159 User
->setOperand(I
, New
);
1161 // If a user got removed after updating the current user, the next user to
1162 // update will be moved to the current position, so we only need to
1163 // increment the index if the number of users did not change.
1169 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1170 void VPValue::printAsOperand(raw_ostream
&OS
, VPSlotTracker
&Tracker
) const {
1171 if (const Value
*UV
= getUnderlyingValue()) {
1173 UV
->printAsOperand(OS
, false);
1178 unsigned Slot
= Tracker
.getSlot(this);
1179 if (Slot
== unsigned(-1))
1182 OS
<< "vp<%" << Tracker
.getSlot(this) << ">";
1185 void VPUser::printOperands(raw_ostream
&O
, VPSlotTracker
&SlotTracker
) const {
1186 interleaveComma(operands(), O
, [&O
, &SlotTracker
](VPValue
*Op
) {
1187 Op
->printAsOperand(O
, SlotTracker
);
1192 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock
*Region
,
1194 InterleavedAccessInfo
&IAI
) {
1195 ReversePostOrderTraversal
<VPBlockShallowTraversalWrapper
<VPBlockBase
*>>
1196 RPOT(Region
->getEntry());
1197 for (VPBlockBase
*Base
: RPOT
) {
1198 visitBlock(Base
, Old2New
, IAI
);
1202 void VPInterleavedAccessInfo::visitBlock(VPBlockBase
*Block
, Old2NewTy
&Old2New
,
1203 InterleavedAccessInfo
&IAI
) {
1204 if (VPBasicBlock
*VPBB
= dyn_cast
<VPBasicBlock
>(Block
)) {
1205 for (VPRecipeBase
&VPI
: *VPBB
) {
1206 if (isa
<VPHeaderPHIRecipe
>(&VPI
))
1208 assert(isa
<VPInstruction
>(&VPI
) && "Can only handle VPInstructions");
1209 auto *VPInst
= cast
<VPInstruction
>(&VPI
);
1211 auto *Inst
= dyn_cast_or_null
<Instruction
>(VPInst
->getUnderlyingValue());
1214 auto *IG
= IAI
.getInterleaveGroup(Inst
);
1218 auto NewIGIter
= Old2New
.find(IG
);
1219 if (NewIGIter
== Old2New
.end())
1220 Old2New
[IG
] = new InterleaveGroup
<VPInstruction
>(
1221 IG
->getFactor(), IG
->isReverse(), IG
->getAlign());
1223 if (Inst
== IG
->getInsertPos())
1224 Old2New
[IG
]->setInsertPos(VPInst
);
1226 InterleaveGroupMap
[VPInst
] = Old2New
[IG
];
1227 InterleaveGroupMap
[VPInst
]->insertMember(
1228 VPInst
, IG
->getIndex(Inst
),
1229 Align(IG
->isReverse() ? (-1) * int(IG
->getFactor())
1230 : IG
->getFactor()));
1232 } else if (VPRegionBlock
*Region
= dyn_cast
<VPRegionBlock
>(Block
))
1233 visitRegion(Region
, Old2New
, IAI
);
1235 llvm_unreachable("Unsupported kind of VPBlock.");
1238 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan
&Plan
,
1239 InterleavedAccessInfo
&IAI
) {
1241 visitRegion(Plan
.getVectorLoopRegion(), Old2New
, IAI
);
1244 void VPSlotTracker::assignSlot(const VPValue
*V
) {
1245 assert(!Slots
.contains(V
) && "VPValue already has a slot!");
1246 Slots
[V
] = NextSlot
++;
1249 void VPSlotTracker::assignSlots(const VPlan
&Plan
) {
1250 if (Plan
.VFxUF
.getNumUsers() > 0)
1251 assignSlot(&Plan
.VFxUF
);
1252 assignSlot(&Plan
.VectorTripCount
);
1253 if (Plan
.BackedgeTakenCount
)
1254 assignSlot(Plan
.BackedgeTakenCount
);
1255 assignSlots(Plan
.getPreheader());
1257 ReversePostOrderTraversal
<VPBlockDeepTraversalWrapper
<const VPBlockBase
*>>
1258 RPOT(VPBlockDeepTraversalWrapper
<const VPBlockBase
*>(Plan
.getEntry()));
1259 for (const VPBasicBlock
*VPBB
:
1260 VPBlockUtils::blocksOnly
<const VPBasicBlock
>(RPOT
))
1264 void VPSlotTracker::assignSlots(const VPBasicBlock
*VPBB
) {
1265 for (const VPRecipeBase
&Recipe
: *VPBB
)
1266 for (VPValue
*Def
: Recipe
.definedValues())
1270 bool vputils::onlyFirstLaneUsed(VPValue
*Def
) {
1271 return all_of(Def
->users(),
1272 [Def
](VPUser
*U
) { return U
->onlyFirstLaneUsed(Def
); });
1275 bool vputils::onlyFirstPartUsed(VPValue
*Def
) {
1276 return all_of(Def
->users(),
1277 [Def
](VPUser
*U
) { return U
->onlyFirstPartUsed(Def
); });
1280 VPValue
*vputils::getOrCreateVPValueForSCEVExpr(VPlan
&Plan
, const SCEV
*Expr
,
1281 ScalarEvolution
&SE
) {
1282 if (auto *Expanded
= Plan
.getSCEVExpansion(Expr
))
1284 VPValue
*Expanded
= nullptr;
1285 if (auto *E
= dyn_cast
<SCEVConstant
>(Expr
))
1286 Expanded
= Plan
.getVPValueOrAddLiveIn(E
->getValue());
1287 else if (auto *E
= dyn_cast
<SCEVUnknown
>(Expr
))
1288 Expanded
= Plan
.getVPValueOrAddLiveIn(E
->getValue());
1290 Expanded
= new VPExpandSCEVRecipe(Expr
, SE
);
1291 Plan
.getPreheader()->appendRecipe(Expanded
->getDefiningRecipe());
1293 Plan
.addSCEVExpansion(Expr
, Expanded
);