1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains implementations for different VPlan recipes.
12 //===----------------------------------------------------------------------===//
15 #include "VPlanAnalysis.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/IVDescriptors.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Type.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Casting.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
36 using VectorParts
= SmallVector
<Value
*, 2>;
39 extern cl::opt
<bool> EnableVPlanNativePath
;
42 #define LV_NAME "loop-vectorize"
43 #define DEBUG_TYPE LV_NAME
45 bool VPRecipeBase::mayWriteToMemory() const {
46 switch (getVPDefID()) {
47 case VPWidenMemoryInstructionSC
: {
48 return cast
<VPWidenMemoryInstructionRecipe
>(this)->isStore();
52 return cast
<Instruction
>(getVPSingleValue()->getUnderlyingValue())
54 case VPBranchOnMaskSC
:
55 case VPScalarIVStepsSC
:
60 case VPWidenCanonicalIVSC
:
63 case VPWidenIntOrFpInductionSC
:
66 case VPWidenSelectSC
: {
67 const Instruction
*I
=
68 dyn_cast_or_null
<Instruction
>(getVPSingleValue()->getUnderlyingValue());
70 assert((!I
|| !I
->mayWriteToMemory()) &&
71 "underlying instruction may write to memory");
79 bool VPRecipeBase::mayReadFromMemory() const {
80 switch (getVPDefID()) {
81 case VPWidenMemoryInstructionSC
: {
82 return !cast
<VPWidenMemoryInstructionRecipe
>(this)->isStore();
86 return cast
<Instruction
>(getVPSingleValue()->getUnderlyingValue())
87 ->mayReadFromMemory();
88 case VPBranchOnMaskSC
:
89 case VPScalarIVStepsSC
:
94 case VPWidenCanonicalIVSC
:
97 case VPWidenIntOrFpInductionSC
:
100 case VPWidenSelectSC
: {
101 const Instruction
*I
=
102 dyn_cast_or_null
<Instruction
>(getVPSingleValue()->getUnderlyingValue());
104 assert((!I
|| !I
->mayReadFromMemory()) &&
105 "underlying instruction may read from memory");
113 bool VPRecipeBase::mayHaveSideEffects() const {
114 switch (getVPDefID()) {
116 case VPPredInstPHISC
:
118 case VPInstructionSC
:
119 switch (cast
<VPInstruction
>(this)->getOpcode()) {
120 case Instruction::ICmp
:
121 case VPInstruction::Not
:
122 case VPInstruction::CalculateTripCountMinusVF
:
123 case VPInstruction::CanonicalIVIncrement
:
124 case VPInstruction::CanonicalIVIncrementForPart
:
130 return cast
<Instruction
>(getVPSingleValue()->getUnderlyingValue())
131 ->mayHaveSideEffects();
134 case VPScalarIVStepsSC
:
135 case VPWidenCanonicalIVSC
:
138 case VPWidenIntOrFpInductionSC
:
140 case VPWidenPointerInductionSC
:
142 case VPWidenSelectSC
: {
143 const Instruction
*I
=
144 dyn_cast_or_null
<Instruction
>(getVPSingleValue()->getUnderlyingValue());
146 assert((!I
|| !I
->mayHaveSideEffects()) &&
147 "underlying instruction has side-effects");
150 case VPWidenMemoryInstructionSC
:
151 assert(cast
<VPWidenMemoryInstructionRecipe
>(this)
153 .mayHaveSideEffects() == mayWriteToMemory() &&
154 "mayHaveSideffects result for ingredient differs from this "
156 return mayWriteToMemory();
157 case VPReplicateSC
: {
158 auto *R
= cast
<VPReplicateRecipe
>(this);
159 return R
->getUnderlyingInstr()->mayHaveSideEffects();
166 void VPLiveOut::fixPhi(VPlan
&Plan
, VPTransformState
&State
) {
167 auto Lane
= VPLane::getLastLaneForVF(State
.VF
);
168 VPValue
*ExitValue
= getOperand(0);
169 if (vputils::isUniformAfterVectorization(ExitValue
))
170 Lane
= VPLane::getFirstLane();
171 VPBasicBlock
*MiddleVPBB
=
172 cast
<VPBasicBlock
>(Plan
.getVectorLoopRegion()->getSingleSuccessor());
173 assert(MiddleVPBB
->getNumSuccessors() == 0 &&
174 "the middle block must not have any successors");
175 BasicBlock
*MiddleBB
= State
.CFG
.VPBB2IRBB
[MiddleVPBB
];
176 Phi
->addIncoming(State
.get(ExitValue
, VPIteration(State
.UF
- 1, Lane
)),
180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
181 void VPLiveOut::print(raw_ostream
&O
, VPSlotTracker
&SlotTracker
) const {
183 getPhi()->printAsOperand(O
);
185 getOperand(0)->printAsOperand(O
, SlotTracker
);
190 void VPRecipeBase::insertBefore(VPRecipeBase
*InsertPos
) {
191 assert(!Parent
&& "Recipe already in some VPBasicBlock");
192 assert(InsertPos
->getParent() &&
193 "Insertion position not in any VPBasicBlock");
194 Parent
= InsertPos
->getParent();
195 Parent
->getRecipeList().insert(InsertPos
->getIterator(), this);
198 void VPRecipeBase::insertBefore(VPBasicBlock
&BB
,
199 iplist
<VPRecipeBase
>::iterator I
) {
200 assert(!Parent
&& "Recipe already in some VPBasicBlock");
201 assert(I
== BB
.end() || I
->getParent() == &BB
);
203 BB
.getRecipeList().insert(I
, this);
206 void VPRecipeBase::insertAfter(VPRecipeBase
*InsertPos
) {
207 assert(!Parent
&& "Recipe already in some VPBasicBlock");
208 assert(InsertPos
->getParent() &&
209 "Insertion position not in any VPBasicBlock");
210 Parent
= InsertPos
->getParent();
211 Parent
->getRecipeList().insertAfter(InsertPos
->getIterator(), this);
214 void VPRecipeBase::removeFromParent() {
215 assert(getParent() && "Recipe not in any VPBasicBlock");
216 getParent()->getRecipeList().remove(getIterator());
220 iplist
<VPRecipeBase
>::iterator
VPRecipeBase::eraseFromParent() {
221 assert(getParent() && "Recipe not in any VPBasicBlock");
222 return getParent()->getRecipeList().erase(getIterator());
225 void VPRecipeBase::moveAfter(VPRecipeBase
*InsertPos
) {
227 insertAfter(InsertPos
);
230 void VPRecipeBase::moveBefore(VPBasicBlock
&BB
,
231 iplist
<VPRecipeBase
>::iterator I
) {
236 FastMathFlags
VPRecipeWithIRFlags::getFastMathFlags() const {
237 assert(OpType
== OperationType::FPMathOp
&&
238 "recipe doesn't have fast math flags");
240 Res
.setAllowReassoc(FMFs
.AllowReassoc
);
241 Res
.setNoNaNs(FMFs
.NoNaNs
);
242 Res
.setNoInfs(FMFs
.NoInfs
);
243 Res
.setNoSignedZeros(FMFs
.NoSignedZeros
);
244 Res
.setAllowReciprocal(FMFs
.AllowReciprocal
);
245 Res
.setAllowContract(FMFs
.AllowContract
);
246 Res
.setApproxFunc(FMFs
.ApproxFunc
);
250 VPInstruction::VPInstruction(unsigned Opcode
, CmpInst::Predicate Pred
,
251 VPValue
*A
, VPValue
*B
, DebugLoc DL
,
253 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, ArrayRef
<VPValue
*>({A
, B
}),
255 VPValue(this), Opcode(Opcode
), Name(Name
.str()) {
256 assert(Opcode
== Instruction::ICmp
&&
257 "only ICmp predicates supported at the moment");
260 VPInstruction::VPInstruction(unsigned Opcode
,
261 std::initializer_list
<VPValue
*> Operands
,
262 FastMathFlags FMFs
, DebugLoc DL
, const Twine
&Name
)
263 : VPRecipeWithIRFlags(VPDef::VPInstructionSC
, Operands
, FMFs
, DL
),
264 VPValue(this), Opcode(Opcode
), Name(Name
.str()) {
265 // Make sure the VPInstruction is a floating-point operation.
266 assert(isFPMathOp() && "this op can't take fast-math flags");
269 Value
*VPInstruction::generateInstruction(VPTransformState
&State
,
271 IRBuilderBase
&Builder
= State
.Builder
;
272 Builder
.SetCurrentDebugLocation(getDebugLoc());
274 if (Instruction::isBinaryOp(getOpcode())) {
275 Value
*A
= State
.get(getOperand(0), Part
);
276 Value
*B
= State
.get(getOperand(1), Part
);
277 return Builder
.CreateBinOp((Instruction::BinaryOps
)getOpcode(), A
, B
, Name
);
280 switch (getOpcode()) {
281 case VPInstruction::Not
: {
282 Value
*A
= State
.get(getOperand(0), Part
);
283 return Builder
.CreateNot(A
, Name
);
285 case Instruction::ICmp
: {
286 Value
*A
= State
.get(getOperand(0), Part
);
287 Value
*B
= State
.get(getOperand(1), Part
);
288 return Builder
.CreateCmp(getPredicate(), A
, B
, Name
);
290 case Instruction::Select
: {
291 Value
*Cond
= State
.get(getOperand(0), Part
);
292 Value
*Op1
= State
.get(getOperand(1), Part
);
293 Value
*Op2
= State
.get(getOperand(2), Part
);
294 return Builder
.CreateSelect(Cond
, Op1
, Op2
, Name
);
296 case VPInstruction::ActiveLaneMask
: {
297 // Get first lane of vector induction variable.
298 Value
*VIVElem0
= State
.get(getOperand(0), VPIteration(Part
, 0));
299 // Get the original loop tripcount.
300 Value
*ScalarTC
= State
.get(getOperand(1), VPIteration(Part
, 0));
302 auto *Int1Ty
= Type::getInt1Ty(Builder
.getContext());
303 auto *PredTy
= VectorType::get(Int1Ty
, State
.VF
);
304 return Builder
.CreateIntrinsic(Intrinsic::get_active_lane_mask
,
305 {PredTy
, ScalarTC
->getType()},
306 {VIVElem0
, ScalarTC
}, nullptr, Name
);
308 case VPInstruction::FirstOrderRecurrenceSplice
: {
309 // Generate code to combine the previous and current values in vector v3.
312 // v_init = vector(..., ..., ..., a[-1])
316 // i = phi [0, vector.ph], [i+4, vector.body]
317 // v1 = phi [v_init, vector.ph], [v2, vector.body]
318 // v2 = a[i, i+1, i+2, i+3];
319 // v3 = vector(v1(3), v2(0, 1, 2))
321 // For the first part, use the recurrence phi (v1), otherwise v2.
322 auto *V1
= State
.get(getOperand(0), 0);
323 Value
*PartMinus1
= Part
== 0 ? V1
: State
.get(getOperand(1), Part
- 1);
324 if (!PartMinus1
->getType()->isVectorTy())
326 Value
*V2
= State
.get(getOperand(1), Part
);
327 return Builder
.CreateVectorSplice(PartMinus1
, V2
, -1, Name
);
329 case VPInstruction::CalculateTripCountMinusVF
: {
330 Value
*ScalarTC
= State
.get(getOperand(0), {0, 0});
332 createStepForVF(Builder
, ScalarTC
->getType(), State
.VF
, State
.UF
);
333 Value
*Sub
= Builder
.CreateSub(ScalarTC
, Step
);
334 Value
*Cmp
= Builder
.CreateICmp(CmpInst::Predicate::ICMP_UGT
, ScalarTC
, Step
);
335 Value
*Zero
= ConstantInt::get(ScalarTC
->getType(), 0);
336 return Builder
.CreateSelect(Cmp
, Sub
, Zero
);
338 case VPInstruction::CanonicalIVIncrement
: {
340 auto *Phi
= State
.get(getOperand(0), 0);
341 // The loop step is equal to the vectorization factor (num of SIMD
342 // elements) times the unroll factor (num of SIMD instructions).
344 createStepForVF(Builder
, Phi
->getType(), State
.VF
, State
.UF
);
345 return Builder
.CreateAdd(Phi
, Step
, Name
, hasNoUnsignedWrap(),
348 return State
.get(this, 0);
351 case VPInstruction::CanonicalIVIncrementForPart
: {
352 auto *IV
= State
.get(getOperand(0), VPIteration(0, 0));
356 // The canonical IV is incremented by the vectorization factor (num of SIMD
357 // elements) times the unroll part.
358 Value
*Step
= createStepForVF(Builder
, IV
->getType(), State
.VF
, Part
);
359 return Builder
.CreateAdd(IV
, Step
, Name
, hasNoUnsignedWrap(),
362 case VPInstruction::BranchOnCond
: {
366 Value
*Cond
= State
.get(getOperand(0), VPIteration(Part
, 0));
367 VPRegionBlock
*ParentRegion
= getParent()->getParent();
368 VPBasicBlock
*Header
= ParentRegion
->getEntryBasicBlock();
370 // Replace the temporary unreachable terminator with a new conditional
371 // branch, hooking it up to backward destination for exiting blocks now and
372 // to forward destination(s) later when they are created.
374 Builder
.CreateCondBr(Cond
, Builder
.GetInsertBlock(), nullptr);
376 if (getParent()->isExiting())
377 CondBr
->setSuccessor(1, State
.CFG
.VPBB2IRBB
[Header
]);
379 CondBr
->setSuccessor(0, nullptr);
380 Builder
.GetInsertBlock()->getTerminator()->eraseFromParent();
383 case VPInstruction::BranchOnCount
: {
386 // First create the compare.
387 Value
*IV
= State
.get(getOperand(0), Part
);
388 Value
*TC
= State
.get(getOperand(1), Part
);
389 Value
*Cond
= Builder
.CreateICmpEQ(IV
, TC
);
391 // Now create the branch.
392 auto *Plan
= getParent()->getPlan();
393 VPRegionBlock
*TopRegion
= Plan
->getVectorLoopRegion();
394 VPBasicBlock
*Header
= TopRegion
->getEntry()->getEntryBasicBlock();
396 // Replace the temporary unreachable terminator with a new conditional
397 // branch, hooking it up to backward destination (the header) now and to the
398 // forward destination (the exit/middle block) later when it is created.
399 // Note that CreateCondBr expects a valid BB as first argument, so we need
400 // to set it to nullptr later.
401 BranchInst
*CondBr
= Builder
.CreateCondBr(Cond
, Builder
.GetInsertBlock(),
402 State
.CFG
.VPBB2IRBB
[Header
]);
403 CondBr
->setSuccessor(0, nullptr);
404 Builder
.GetInsertBlock()->getTerminator()->eraseFromParent();
408 llvm_unreachable("Unsupported opcode for instruction");
413 bool VPInstruction::isFPMathOp() const {
414 // Inspired by FPMathOperator::classof. Notable differences are that we don't
415 // support Call, PHI and Select opcodes here yet.
416 return Opcode
== Instruction::FAdd
|| Opcode
== Instruction::FMul
||
417 Opcode
== Instruction::FNeg
|| Opcode
== Instruction::FSub
||
418 Opcode
== Instruction::FDiv
|| Opcode
== Instruction::FRem
||
419 Opcode
== Instruction::FCmp
|| Opcode
== Instruction::Select
;
423 void VPInstruction::execute(VPTransformState
&State
) {
424 assert(!State
.Instance
&& "VPInstruction executing an Instance");
425 IRBuilderBase::FastMathFlagGuard
FMFGuard(State
.Builder
);
426 assert((hasFastMathFlags() == isFPMathOp() ||
427 getOpcode() == Instruction::Select
) &&
428 "Recipe not a FPMathOp but has fast-math flags?");
429 if (hasFastMathFlags())
430 State
.Builder
.setFastMathFlags(getFastMathFlags());
431 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
432 Value
*GeneratedValue
= generateInstruction(State
, Part
);
435 assert(GeneratedValue
&& "generateInstruction must produce a value");
436 State
.set(this, GeneratedValue
, Part
);
440 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
441 void VPInstruction::dump() const {
442 VPSlotTracker
SlotTracker(getParent()->getPlan());
443 print(dbgs(), "", SlotTracker
);
446 void VPInstruction::print(raw_ostream
&O
, const Twine
&Indent
,
447 VPSlotTracker
&SlotTracker
) const {
448 O
<< Indent
<< "EMIT ";
451 printAsOperand(O
, SlotTracker
);
455 switch (getOpcode()) {
456 case VPInstruction::Not
:
459 case VPInstruction::SLPLoad
:
460 O
<< "combined load";
462 case VPInstruction::SLPStore
:
463 O
<< "combined store";
465 case VPInstruction::ActiveLaneMask
:
466 O
<< "active lane mask";
468 case VPInstruction::FirstOrderRecurrenceSplice
:
469 O
<< "first-order splice";
471 case VPInstruction::CanonicalIVIncrement
:
474 case VPInstruction::BranchOnCond
:
475 O
<< "branch-on-cond";
477 case VPInstruction::CalculateTripCountMinusVF
:
478 O
<< "TC > VF ? TC - VF : 0";
480 case VPInstruction::CanonicalIVIncrementForPart
:
483 case VPInstruction::BranchOnCount
:
484 O
<< "branch-on-count";
487 O
<< Instruction::getOpcodeName(getOpcode());
491 printOperands(O
, SlotTracker
);
493 if (auto DL
= getDebugLoc()) {
500 void VPWidenCallRecipe::execute(VPTransformState
&State
) {
501 assert(State
.VF
.isVector() && "not widening");
502 auto &CI
= *cast
<CallInst
>(getUnderlyingInstr());
503 assert(!isa
<DbgInfoIntrinsic
>(CI
) &&
504 "DbgInfoIntrinsic should have been dropped during VPlan construction");
505 State
.setDebugLocFrom(CI
.getDebugLoc());
507 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
508 SmallVector
<Type
*, 2> TysForDecl
;
509 // Add return type if intrinsic is overloaded on it.
510 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID
, -1)) {
511 TysForDecl
.push_back(
512 VectorType::get(CI
.getType()->getScalarType(), State
.VF
));
514 SmallVector
<Value
*, 4> Args
;
515 for (const auto &I
: enumerate(operands())) {
516 // Some intrinsics have a scalar argument - don't replace it with a
519 if (VectorIntrinsicID
== Intrinsic::not_intrinsic
||
520 !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID
, I
.index()))
521 Arg
= State
.get(I
.value(), Part
);
523 Arg
= State
.get(I
.value(), VPIteration(0, 0));
524 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID
, I
.index()))
525 TysForDecl
.push_back(Arg
->getType());
530 if (VectorIntrinsicID
!= Intrinsic::not_intrinsic
) {
531 // Use vector version of the intrinsic.
532 Module
*M
= State
.Builder
.GetInsertBlock()->getModule();
533 VectorF
= Intrinsic::getDeclaration(M
, VectorIntrinsicID
, TysForDecl
);
534 assert(VectorF
&& "Can't retrieve vector intrinsic.");
537 assert(Variant
!= nullptr && "Can't create vector function.");
542 SmallVector
<OperandBundleDef
, 1> OpBundles
;
543 CI
.getOperandBundlesAsDefs(OpBundles
);
544 CallInst
*V
= State
.Builder
.CreateCall(VectorF
, Args
, OpBundles
);
546 if (isa
<FPMathOperator
>(V
))
547 V
->copyFastMathFlags(&CI
);
549 State
.set(this, V
, Part
);
550 State
.addMetadata(V
, &CI
);
554 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
555 void VPWidenCallRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
556 VPSlotTracker
&SlotTracker
) const {
557 O
<< Indent
<< "WIDEN-CALL ";
559 auto *CI
= cast
<CallInst
>(getUnderlyingInstr());
560 if (CI
->getType()->isVoidTy())
563 printAsOperand(O
, SlotTracker
);
567 O
<< "call @" << CI
->getCalledFunction()->getName() << "(";
568 printOperands(O
, SlotTracker
);
571 if (VectorIntrinsicID
)
572 O
<< " (using vector intrinsic)";
574 O
<< " (using library function";
575 if (Variant
->hasName())
576 O
<< ": " << Variant
->getName();
581 void VPWidenSelectRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
582 VPSlotTracker
&SlotTracker
) const {
583 O
<< Indent
<< "WIDEN-SELECT ";
584 printAsOperand(O
, SlotTracker
);
586 getOperand(0)->printAsOperand(O
, SlotTracker
);
588 getOperand(1)->printAsOperand(O
, SlotTracker
);
590 getOperand(2)->printAsOperand(O
, SlotTracker
);
591 O
<< (isInvariantCond() ? " (condition is loop invariant)" : "");
595 void VPWidenSelectRecipe::execute(VPTransformState
&State
) {
596 State
.setDebugLocFrom(getDebugLoc());
598 // The condition can be loop invariant but still defined inside the
599 // loop. This means that we can't just use the original 'cond' value.
600 // We have to take the 'vectorized' value and pick the first lane.
601 // Instcombine will make this a no-op.
603 isInvariantCond() ? State
.get(getCond(), VPIteration(0, 0)) : nullptr;
605 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
606 Value
*Cond
= InvarCond
? InvarCond
: State
.get(getCond(), Part
);
607 Value
*Op0
= State
.get(getOperand(1), Part
);
608 Value
*Op1
= State
.get(getOperand(2), Part
);
609 Value
*Sel
= State
.Builder
.CreateSelect(Cond
, Op0
, Op1
);
610 State
.set(this, Sel
, Part
);
611 State
.addMetadata(Sel
, dyn_cast_or_null
<Instruction
>(getUnderlyingValue()));
615 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
616 const FastMathFlags
&FMF
) {
617 AllowReassoc
= FMF
.allowReassoc();
618 NoNaNs
= FMF
.noNaNs();
619 NoInfs
= FMF
.noInfs();
620 NoSignedZeros
= FMF
.noSignedZeros();
621 AllowReciprocal
= FMF
.allowReciprocal();
622 AllowContract
= FMF
.allowContract();
623 ApproxFunc
= FMF
.approxFunc();
626 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
627 void VPRecipeWithIRFlags::printFlags(raw_ostream
&O
) const {
629 case OperationType::Cmp
:
630 O
<< " " << CmpInst::getPredicateName(getPredicate());
632 case OperationType::PossiblyExactOp
:
633 if (ExactFlags
.IsExact
)
636 case OperationType::OverflowingBinOp
:
637 if (WrapFlags
.HasNUW
)
639 if (WrapFlags
.HasNSW
)
642 case OperationType::FPMathOp
:
643 getFastMathFlags().print(O
);
645 case OperationType::GEPOp
:
646 if (GEPFlags
.IsInBounds
)
649 case OperationType::Other
:
652 if (getNumOperands() > 0)
657 void VPWidenRecipe::execute(VPTransformState
&State
) {
658 State
.setDebugLocFrom(getDebugLoc());
659 auto &Builder
= State
.Builder
;
661 case Instruction::Call
:
662 case Instruction::Br
:
663 case Instruction::PHI
:
664 case Instruction::GetElementPtr
:
665 case Instruction::Select
:
666 llvm_unreachable("This instruction is handled by a different recipe.");
667 case Instruction::UDiv
:
668 case Instruction::SDiv
:
669 case Instruction::SRem
:
670 case Instruction::URem
:
671 case Instruction::Add
:
672 case Instruction::FAdd
:
673 case Instruction::Sub
:
674 case Instruction::FSub
:
675 case Instruction::FNeg
:
676 case Instruction::Mul
:
677 case Instruction::FMul
:
678 case Instruction::FDiv
:
679 case Instruction::FRem
:
680 case Instruction::Shl
:
681 case Instruction::LShr
:
682 case Instruction::AShr
:
683 case Instruction::And
:
684 case Instruction::Or
:
685 case Instruction::Xor
: {
686 // Just widen unops and binops.
687 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
688 SmallVector
<Value
*, 2> Ops
;
689 for (VPValue
*VPOp
: operands())
690 Ops
.push_back(State
.get(VPOp
, Part
));
692 Value
*V
= Builder
.CreateNAryOp(Opcode
, Ops
);
694 if (auto *VecOp
= dyn_cast
<Instruction
>(V
))
697 // Use this vector value for all users of the original instruction.
698 State
.set(this, V
, Part
);
699 State
.addMetadata(V
, dyn_cast_or_null
<Instruction
>(getUnderlyingValue()));
704 case Instruction::Freeze
: {
705 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
706 Value
*Op
= State
.get(getOperand(0), Part
);
708 Value
*Freeze
= Builder
.CreateFreeze(Op
);
709 State
.set(this, Freeze
, Part
);
713 case Instruction::ICmp
:
714 case Instruction::FCmp
: {
715 // Widen compares. Generate vector compares.
716 bool FCmp
= Opcode
== Instruction::FCmp
;
717 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
718 Value
*A
= State
.get(getOperand(0), Part
);
719 Value
*B
= State
.get(getOperand(1), Part
);
722 // Propagate fast math flags.
723 IRBuilder
<>::FastMathFlagGuard
FMFG(Builder
);
724 if (auto *I
= dyn_cast_or_null
<Instruction
>(getUnderlyingValue()))
725 Builder
.setFastMathFlags(I
->getFastMathFlags());
726 C
= Builder
.CreateFCmp(getPredicate(), A
, B
);
728 C
= Builder
.CreateICmp(getPredicate(), A
, B
);
730 State
.set(this, C
, Part
);
731 State
.addMetadata(C
, dyn_cast_or_null
<Instruction
>(getUnderlyingValue()));
737 // This instruction is not vectorized by simple widening.
738 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
739 << Instruction::getOpcodeName(Opcode
));
740 llvm_unreachable("Unhandled instruction!");
744 // Verify that VPlan type inference results agree with the type of the
746 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
747 assert(VectorType::get(State
.TypeAnalysis
.inferScalarType(this),
748 State
.VF
) == State
.get(this, Part
)->getType() &&
749 "inferred type and type from generated instructions do not match");
754 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
755 void VPWidenRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
756 VPSlotTracker
&SlotTracker
) const {
757 O
<< Indent
<< "WIDEN ";
758 printAsOperand(O
, SlotTracker
);
759 O
<< " = " << Instruction::getOpcodeName(Opcode
);
761 printOperands(O
, SlotTracker
);
765 void VPWidenCastRecipe::execute(VPTransformState
&State
) {
766 State
.setDebugLocFrom(getDebugLoc());
767 auto &Builder
= State
.Builder
;
769 assert(State
.VF
.isVector() && "Not vectorizing?");
770 Type
*DestTy
= VectorType::get(getResultType(), State
.VF
);
772 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
773 Value
*A
= State
.get(getOperand(0), Part
);
774 Value
*Cast
= Builder
.CreateCast(Instruction::CastOps(Opcode
), A
, DestTy
);
775 State
.set(this, Cast
, Part
);
776 State
.addMetadata(Cast
, cast_or_null
<Instruction
>(getUnderlyingValue()));
780 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
781 void VPWidenCastRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
782 VPSlotTracker
&SlotTracker
) const {
783 O
<< Indent
<< "WIDEN-CAST ";
784 printAsOperand(O
, SlotTracker
);
785 O
<< " = " << Instruction::getOpcodeName(Opcode
) << " ";
786 printOperands(O
, SlotTracker
);
787 O
<< " to " << *getResultType();
791 /// This function adds
792 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
793 /// to each vector element of Val. The sequence starts at StartIndex.
794 /// \p Opcode is relevant for FP induction variable.
795 static Value
*getStepVector(Value
*Val
, Value
*StartIdx
, Value
*Step
,
796 Instruction::BinaryOps BinOp
, ElementCount VF
,
797 IRBuilderBase
&Builder
) {
798 assert(VF
.isVector() && "only vector VFs are supported");
800 // Create and check the types.
801 auto *ValVTy
= cast
<VectorType
>(Val
->getType());
802 ElementCount VLen
= ValVTy
->getElementCount();
804 Type
*STy
= Val
->getType()->getScalarType();
805 assert((STy
->isIntegerTy() || STy
->isFloatingPointTy()) &&
806 "Induction Step must be an integer or FP");
807 assert(Step
->getType() == STy
&& "Step has wrong type");
809 SmallVector
<Constant
*, 8> Indices
;
811 // Create a vector of consecutive numbers from zero to VF.
812 VectorType
*InitVecValVTy
= ValVTy
;
813 if (STy
->isFloatingPointTy()) {
814 Type
*InitVecValSTy
=
815 IntegerType::get(STy
->getContext(), STy
->getScalarSizeInBits());
816 InitVecValVTy
= VectorType::get(InitVecValSTy
, VLen
);
818 Value
*InitVec
= Builder
.CreateStepVector(InitVecValVTy
);
820 // Splat the StartIdx
821 Value
*StartIdxSplat
= Builder
.CreateVectorSplat(VLen
, StartIdx
);
823 if (STy
->isIntegerTy()) {
824 InitVec
= Builder
.CreateAdd(InitVec
, StartIdxSplat
);
825 Step
= Builder
.CreateVectorSplat(VLen
, Step
);
826 assert(Step
->getType() == Val
->getType() && "Invalid step vec");
827 // FIXME: The newly created binary instructions should contain nsw/nuw
828 // flags, which can be found from the original scalar operations.
829 Step
= Builder
.CreateMul(InitVec
, Step
);
830 return Builder
.CreateAdd(Val
, Step
, "induction");
833 // Floating point induction.
834 assert((BinOp
== Instruction::FAdd
|| BinOp
== Instruction::FSub
) &&
835 "Binary Opcode should be specified for FP induction");
836 InitVec
= Builder
.CreateUIToFP(InitVec
, ValVTy
);
837 InitVec
= Builder
.CreateFAdd(InitVec
, StartIdxSplat
);
839 Step
= Builder
.CreateVectorSplat(VLen
, Step
);
840 Value
*MulOp
= Builder
.CreateFMul(InitVec
, Step
);
841 return Builder
.CreateBinOp(BinOp
, Val
, MulOp
, "induction");
844 /// A helper function that returns an integer or floating-point constant with
846 static Constant
*getSignedIntOrFpConstant(Type
*Ty
, int64_t C
) {
847 return Ty
->isIntegerTy() ? ConstantInt::getSigned(Ty
, C
)
848 : ConstantFP::get(Ty
, C
);
851 static Value
*getRuntimeVFAsFloat(IRBuilderBase
&B
, Type
*FTy
,
853 assert(FTy
->isFloatingPointTy() && "Expected floating point type!");
854 Type
*IntTy
= IntegerType::get(FTy
->getContext(), FTy
->getScalarSizeInBits());
855 Value
*RuntimeVF
= getRuntimeVF(B
, IntTy
, VF
);
856 return B
.CreateUIToFP(RuntimeVF
, FTy
);
859 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState
&State
) {
860 assert(!State
.Instance
&& "Int or FP induction being replicated.");
862 Value
*Start
= getStartValue()->getLiveInIRValue();
863 const InductionDescriptor
&ID
= getInductionDescriptor();
864 TruncInst
*Trunc
= getTruncInst();
865 IRBuilderBase
&Builder
= State
.Builder
;
866 assert(IV
->getType() == ID
.getStartValue()->getType() && "Types must match");
867 assert(State
.VF
.isVector() && "must have vector VF");
869 // The value from the original loop to which we are mapping the new induction
871 Instruction
*EntryVal
= Trunc
? cast
<Instruction
>(Trunc
) : IV
;
873 // Fast-math-flags propagate from the original induction instruction.
874 IRBuilder
<>::FastMathFlagGuard
FMFG(Builder
);
875 if (ID
.getInductionBinOp() && isa
<FPMathOperator
>(ID
.getInductionBinOp()))
876 Builder
.setFastMathFlags(ID
.getInductionBinOp()->getFastMathFlags());
878 // Now do the actual transformations, and start with fetching the step value.
879 Value
*Step
= State
.get(getStepValue(), VPIteration(0, 0));
881 assert((isa
<PHINode
>(EntryVal
) || isa
<TruncInst
>(EntryVal
)) &&
882 "Expected either an induction phi-node or a truncate of it!");
884 // Construct the initial value of the vector IV in the vector loop preheader
885 auto CurrIP
= Builder
.saveIP();
886 BasicBlock
*VectorPH
= State
.CFG
.getPreheaderBBFor(this);
887 Builder
.SetInsertPoint(VectorPH
->getTerminator());
888 if (isa
<TruncInst
>(EntryVal
)) {
889 assert(Start
->getType()->isIntegerTy() &&
890 "Truncation requires an integer type");
891 auto *TruncType
= cast
<IntegerType
>(EntryVal
->getType());
892 Step
= Builder
.CreateTrunc(Step
, TruncType
);
893 Start
= Builder
.CreateCast(Instruction::Trunc
, Start
, TruncType
);
896 Value
*Zero
= getSignedIntOrFpConstant(Start
->getType(), 0);
897 Value
*SplatStart
= Builder
.CreateVectorSplat(State
.VF
, Start
);
898 Value
*SteppedStart
= getStepVector(
899 SplatStart
, Zero
, Step
, ID
.getInductionOpcode(), State
.VF
, State
.Builder
);
901 // We create vector phi nodes for both integer and floating-point induction
902 // variables. Here, we determine the kind of arithmetic we will perform.
903 Instruction::BinaryOps AddOp
;
904 Instruction::BinaryOps MulOp
;
905 if (Step
->getType()->isIntegerTy()) {
906 AddOp
= Instruction::Add
;
907 MulOp
= Instruction::Mul
;
909 AddOp
= ID
.getInductionOpcode();
910 MulOp
= Instruction::FMul
;
913 // Multiply the vectorization factor by the step using integer or
914 // floating-point arithmetic as appropriate.
915 Type
*StepType
= Step
->getType();
917 if (Step
->getType()->isFloatingPointTy())
918 RuntimeVF
= getRuntimeVFAsFloat(Builder
, StepType
, State
.VF
);
920 RuntimeVF
= getRuntimeVF(Builder
, StepType
, State
.VF
);
921 Value
*Mul
= Builder
.CreateBinOp(MulOp
, Step
, RuntimeVF
);
923 // Create a vector splat to use in the induction update.
925 // FIXME: If the step is non-constant, we create the vector splat with
926 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
927 // handle a constant vector splat.
928 Value
*SplatVF
= isa
<Constant
>(Mul
)
929 ? ConstantVector::getSplat(State
.VF
, cast
<Constant
>(Mul
))
930 : Builder
.CreateVectorSplat(State
.VF
, Mul
);
931 Builder
.restoreIP(CurrIP
);
933 // We may need to add the step a number of times, depending on the unroll
934 // factor. The last of those goes into the PHI.
935 PHINode
*VecInd
= PHINode::Create(SteppedStart
->getType(), 2, "vec.ind");
936 VecInd
->insertBefore(State
.CFG
.PrevBB
->getFirstInsertionPt());
937 VecInd
->setDebugLoc(EntryVal
->getDebugLoc());
938 Instruction
*LastInduction
= VecInd
;
939 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
940 State
.set(this, LastInduction
, Part
);
942 if (isa
<TruncInst
>(EntryVal
))
943 State
.addMetadata(LastInduction
, EntryVal
);
945 LastInduction
= cast
<Instruction
>(
946 Builder
.CreateBinOp(AddOp
, LastInduction
, SplatVF
, "step.add"));
947 LastInduction
->setDebugLoc(EntryVal
->getDebugLoc());
950 LastInduction
->setName("vec.ind.next");
951 VecInd
->addIncoming(SteppedStart
, VectorPH
);
952 // Add induction update using an incorrect block temporarily. The phi node
953 // will be fixed after VPlan execution. Note that at this point the latch
954 // block cannot be used, as it does not exist yet.
955 // TODO: Model increment value in VPlan, by turning the recipe into a
956 // multi-def and a subclass of VPHeaderPHIRecipe.
957 VecInd
->addIncoming(LastInduction
, VectorPH
);
960 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
961 void VPWidenIntOrFpInductionRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
962 VPSlotTracker
&SlotTracker
) const {
963 O
<< Indent
<< "WIDEN-INDUCTION";
964 if (getTruncInst()) {
966 O
<< " +\n" << Indent
<< "\" " << VPlanIngredient(IV
) << "\\l\"";
967 O
<< " +\n" << Indent
<< "\" ";
968 getVPValue(0)->printAsOperand(O
, SlotTracker
);
970 O
<< " " << VPlanIngredient(IV
);
973 getStepValue()->printAsOperand(O
, SlotTracker
);
977 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
978 // The step may be defined by a recipe in the preheader (e.g. if it requires
979 // SCEV expansion), but for the canonical induction the step is required to be
980 // 1, which is represented as live-in.
981 if (getStepValue()->getDefiningRecipe())
983 auto *StepC
= dyn_cast
<ConstantInt
>(getStepValue()->getLiveInIRValue());
984 auto *StartC
= dyn_cast
<ConstantInt
>(getStartValue()->getLiveInIRValue());
985 return StartC
&& StartC
->isZero() && StepC
&& StepC
->isOne();
988 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
989 void VPDerivedIVRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
990 VPSlotTracker
&SlotTracker
) const {
992 printAsOperand(O
, SlotTracker
);
993 O
<< Indent
<< "= DERIVED-IV ";
994 getStartValue()->printAsOperand(O
, SlotTracker
);
996 getCanonicalIV()->printAsOperand(O
, SlotTracker
);
998 getStepValue()->printAsOperand(O
, SlotTracker
);
1001 O
<< " (truncated to " << *TruncResultTy
<< ")";
1005 void VPScalarIVStepsRecipe::execute(VPTransformState
&State
) {
1006 // Fast-math-flags propagate from the original induction instruction.
1007 IRBuilder
<>::FastMathFlagGuard
FMFG(State
.Builder
);
1008 if (hasFastMathFlags())
1009 State
.Builder
.setFastMathFlags(getFastMathFlags());
1011 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1012 /// variable on which to base the steps, \p Step is the size of the step.
1014 Value
*BaseIV
= State
.get(getOperand(0), VPIteration(0, 0));
1015 Value
*Step
= State
.get(getStepValue(), VPIteration(0, 0));
1016 IRBuilderBase
&Builder
= State
.Builder
;
1018 // Ensure step has the same type as that of scalar IV.
1019 Type
*BaseIVTy
= BaseIV
->getType()->getScalarType();
1020 if (BaseIVTy
!= Step
->getType()) {
1021 // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
1022 // avoid separate truncate here.
1023 assert(Step
->getType()->isIntegerTy() &&
1024 "Truncation requires an integer step");
1025 Step
= State
.Builder
.CreateTrunc(Step
, BaseIVTy
);
1028 // We build scalar steps for both integer and floating-point induction
1029 // variables. Here, we determine the kind of arithmetic we will perform.
1030 Instruction::BinaryOps AddOp
;
1031 Instruction::BinaryOps MulOp
;
1032 if (BaseIVTy
->isIntegerTy()) {
1033 AddOp
= Instruction::Add
;
1034 MulOp
= Instruction::Mul
;
1036 AddOp
= InductionOpcode
;
1037 MulOp
= Instruction::FMul
;
1040 // Determine the number of scalars we need to generate for each unroll
1042 bool FirstLaneOnly
= vputils::onlyFirstLaneUsed(this);
1043 // Compute the scalar steps and save the results in State.
1045 IntegerType::get(BaseIVTy
->getContext(), BaseIVTy
->getScalarSizeInBits());
1046 Type
*VecIVTy
= nullptr;
1047 Value
*UnitStepVec
= nullptr, *SplatStep
= nullptr, *SplatIV
= nullptr;
1048 if (!FirstLaneOnly
&& State
.VF
.isScalable()) {
1049 VecIVTy
= VectorType::get(BaseIVTy
, State
.VF
);
1051 Builder
.CreateStepVector(VectorType::get(IntStepTy
, State
.VF
));
1052 SplatStep
= Builder
.CreateVectorSplat(State
.VF
, Step
);
1053 SplatIV
= Builder
.CreateVectorSplat(State
.VF
, BaseIV
);
1056 unsigned StartPart
= 0;
1057 unsigned EndPart
= State
.UF
;
1058 unsigned StartLane
= 0;
1059 unsigned EndLane
= FirstLaneOnly
? 1 : State
.VF
.getKnownMinValue();
1060 if (State
.Instance
) {
1061 StartPart
= State
.Instance
->Part
;
1062 EndPart
= StartPart
+ 1;
1063 StartLane
= State
.Instance
->Lane
.getKnownLane();
1064 EndLane
= StartLane
+ 1;
1066 for (unsigned Part
= StartPart
; Part
< EndPart
; ++Part
) {
1067 Value
*StartIdx0
= createStepForVF(Builder
, IntStepTy
, State
.VF
, Part
);
1069 if (!FirstLaneOnly
&& State
.VF
.isScalable()) {
1070 auto *SplatStartIdx
= Builder
.CreateVectorSplat(State
.VF
, StartIdx0
);
1071 auto *InitVec
= Builder
.CreateAdd(SplatStartIdx
, UnitStepVec
);
1072 if (BaseIVTy
->isFloatingPointTy())
1073 InitVec
= Builder
.CreateSIToFP(InitVec
, VecIVTy
);
1074 auto *Mul
= Builder
.CreateBinOp(MulOp
, InitVec
, SplatStep
);
1075 auto *Add
= Builder
.CreateBinOp(AddOp
, SplatIV
, Mul
);
1076 State
.set(this, Add
, Part
);
1077 // It's useful to record the lane values too for the known minimum number
1078 // of elements so we do those below. This improves the code quality when
1079 // trying to extract the first element, for example.
1082 if (BaseIVTy
->isFloatingPointTy())
1083 StartIdx0
= Builder
.CreateSIToFP(StartIdx0
, BaseIVTy
);
1085 for (unsigned Lane
= StartLane
; Lane
< EndLane
; ++Lane
) {
1086 Value
*StartIdx
= Builder
.CreateBinOp(
1087 AddOp
, StartIdx0
, getSignedIntOrFpConstant(BaseIVTy
, Lane
));
1088 // The step returned by `createStepForVF` is a runtime-evaluated value
1089 // when VF is scalable. Otherwise, it should be folded into a Constant.
1090 assert((State
.VF
.isScalable() || isa
<Constant
>(StartIdx
)) &&
1091 "Expected StartIdx to be folded to a constant when VF is not "
1093 auto *Mul
= Builder
.CreateBinOp(MulOp
, StartIdx
, Step
);
1094 auto *Add
= Builder
.CreateBinOp(AddOp
, BaseIV
, Mul
);
1095 State
.set(this, Add
, VPIteration(Part
, Lane
));
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 void VPScalarIVStepsRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1102 VPSlotTracker
&SlotTracker
) const {
1104 printAsOperand(O
, SlotTracker
);
1105 O
<< " = SCALAR-STEPS ";
1106 printOperands(O
, SlotTracker
);
1110 void VPWidenGEPRecipe::execute(VPTransformState
&State
) {
1111 assert(State
.VF
.isVector() && "not widening");
1112 auto *GEP
= cast
<GetElementPtrInst
>(getUnderlyingInstr());
1113 // Construct a vector GEP by widening the operands of the scalar GEP as
1114 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1115 // results in a vector of pointers when at least one operand of the GEP
1116 // is vector-typed. Thus, to keep the representation compact, we only use
1117 // vector-typed operands for loop-varying values.
1119 if (areAllOperandsInvariant()) {
1120 // If we are vectorizing, but the GEP has only loop-invariant operands,
1121 // the GEP we build (by only using vector-typed operands for
1122 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1123 // produce a vector of pointers, we need to either arbitrarily pick an
1124 // operand to broadcast, or broadcast a clone of the original GEP.
1125 // Here, we broadcast a clone of the original.
1127 // TODO: If at some point we decide to scalarize instructions having
1128 // loop-invariant operands, this special case will no longer be
1129 // required. We would add the scalarization decision to
1130 // collectLoopScalars() and teach getVectorValue() to broadcast
1131 // the lane-zero scalar value.
1132 SmallVector
<Value
*> Ops
;
1133 for (unsigned I
= 0, E
= getNumOperands(); I
!= E
; I
++)
1134 Ops
.push_back(State
.get(getOperand(I
), VPIteration(0, 0)));
1137 State
.Builder
.CreateGEP(GEP
->getSourceElementType(), Ops
[0],
1138 ArrayRef(Ops
).drop_front(), "", isInBounds());
1139 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
1140 Value
*EntryPart
= State
.Builder
.CreateVectorSplat(State
.VF
, NewGEP
);
1141 State
.set(this, EntryPart
, Part
);
1142 State
.addMetadata(EntryPart
, GEP
);
1145 // If the GEP has at least one loop-varying operand, we are sure to
1146 // produce a vector of pointers. But if we are only unrolling, we want
1147 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1148 // produce with the code below will be scalar (if VF == 1) or vector
1149 // (otherwise). Note that for the unroll-only case, we still maintain
1150 // values in the vector mapping with initVector, as we do for other
1152 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
1153 // The pointer operand of the new GEP. If it's loop-invariant, we
1154 // won't broadcast it.
1155 auto *Ptr
= isPointerLoopInvariant()
1156 ? State
.get(getOperand(0), VPIteration(0, 0))
1157 : State
.get(getOperand(0), Part
);
1159 // Collect all the indices for the new GEP. If any index is
1160 // loop-invariant, we won't broadcast it.
1161 SmallVector
<Value
*, 4> Indices
;
1162 for (unsigned I
= 1, E
= getNumOperands(); I
< E
; I
++) {
1163 VPValue
*Operand
= getOperand(I
);
1164 if (isIndexLoopInvariant(I
- 1))
1165 Indices
.push_back(State
.get(Operand
, VPIteration(0, 0)));
1167 Indices
.push_back(State
.get(Operand
, Part
));
1170 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1171 // but it should be a vector, otherwise.
1172 auto *NewGEP
= State
.Builder
.CreateGEP(GEP
->getSourceElementType(), Ptr
,
1173 Indices
, "", isInBounds());
1174 assert((State
.VF
.isScalar() || NewGEP
->getType()->isVectorTy()) &&
1175 "NewGEP is not a pointer vector");
1176 State
.set(this, NewGEP
, Part
);
1177 State
.addMetadata(NewGEP
, GEP
);
1182 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1183 void VPWidenGEPRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1184 VPSlotTracker
&SlotTracker
) const {
1185 O
<< Indent
<< "WIDEN-GEP ";
1186 O
<< (isPointerLoopInvariant() ? "Inv" : "Var");
1187 for (size_t I
= 0; I
< getNumOperands() - 1; ++I
)
1188 O
<< "[" << (isIndexLoopInvariant(I
) ? "Inv" : "Var") << "]";
1191 printAsOperand(O
, SlotTracker
);
1192 O
<< " = getelementptr";
1194 printOperands(O
, SlotTracker
);
1198 void VPBlendRecipe::execute(VPTransformState
&State
) {
1199 State
.setDebugLocFrom(getDebugLoc());
1200 // We know that all PHIs in non-header blocks are converted into
1201 // selects, so we don't have to worry about the insertion order and we
1202 // can just use the builder.
1203 // At this point we generate the predication tree. There may be
1204 // duplications since this is a simple recursive scan, but future
1205 // optimizations will clean it up.
1207 unsigned NumIncoming
= getNumIncomingValues();
1209 // Generate a sequence of selects of the form:
1210 // SELECT(Mask3, In3,
1211 // SELECT(Mask2, In2,
1212 // SELECT(Mask1, In1,
1214 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1215 // are essentially undef are taken from In0.
1216 VectorParts
Entry(State
.UF
);
1217 for (unsigned In
= 0; In
< NumIncoming
; ++In
) {
1218 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
) {
1219 // We might have single edge PHIs (blocks) - use an identity
1220 // 'select' for the first PHI operand.
1221 Value
*In0
= State
.get(getIncomingValue(In
), Part
);
1223 Entry
[Part
] = In0
; // Initialize with the first incoming value.
1225 // Select between the current value and the previous incoming edge
1226 // based on the incoming mask.
1227 Value
*Cond
= State
.get(getMask(In
), Part
);
1229 State
.Builder
.CreateSelect(Cond
, In0
, Entry
[Part
], "predphi");
1233 for (unsigned Part
= 0; Part
< State
.UF
; ++Part
)
1234 State
.set(this, Entry
[Part
], Part
);
1237 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1238 void VPBlendRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1239 VPSlotTracker
&SlotTracker
) const {
1240 O
<< Indent
<< "BLEND ";
1241 printAsOperand(O
, SlotTracker
);
1243 if (getNumIncomingValues() == 1) {
1244 // Not a User of any mask: not really blending, this is a
1245 // single-predecessor phi.
1247 getIncomingValue(0)->printAsOperand(O
, SlotTracker
);
1249 for (unsigned I
= 0, E
= getNumIncomingValues(); I
< E
; ++I
) {
1251 getIncomingValue(I
)->printAsOperand(O
, SlotTracker
);
1253 getMask(I
)->printAsOperand(O
, SlotTracker
);
1258 void VPReductionRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1259 VPSlotTracker
&SlotTracker
) const {
1260 O
<< Indent
<< "REDUCE ";
1261 printAsOperand(O
, SlotTracker
);
1263 getChainOp()->printAsOperand(O
, SlotTracker
);
1265 if (isa
<FPMathOperator
>(getUnderlyingInstr()))
1266 O
<< getUnderlyingInstr()->getFastMathFlags();
1267 O
<< " reduce." << Instruction::getOpcodeName(RdxDesc
.getOpcode()) << " (";
1268 getVecOp()->printAsOperand(O
, SlotTracker
);
1271 getCondOp()->printAsOperand(O
, SlotTracker
);
1274 if (RdxDesc
.IntermediateStore
)
1275 O
<< " (with final reduction value stored in invariant address sank "
1280 bool VPReplicateRecipe::shouldPack() const {
1281 // Find if the recipe is used by a widened recipe via an intervening
1282 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1283 return any_of(users(), [](const VPUser
*U
) {
1284 if (auto *PredR
= dyn_cast
<VPPredInstPHIRecipe
>(U
))
1285 return any_of(PredR
->users(), [PredR
](const VPUser
*U
) {
1286 return !U
->usesScalars(PredR
);
1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1293 void VPReplicateRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1294 VPSlotTracker
&SlotTracker
) const {
1295 O
<< Indent
<< (IsUniform
? "CLONE " : "REPLICATE ");
1297 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1298 printAsOperand(O
, SlotTracker
);
1301 if (auto *CB
= dyn_cast
<CallBase
>(getUnderlyingInstr())) {
1304 O
<< "@" << CB
->getCalledFunction()->getName() << "(";
1305 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1306 O
, [&O
, &SlotTracker
](VPValue
*Op
) {
1307 Op
->printAsOperand(O
, SlotTracker
);
1311 O
<< Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
1313 printOperands(O
, SlotTracker
);
1321 void VPBranchOnMaskRecipe::execute(VPTransformState
&State
) {
1322 assert(State
.Instance
&& "Branch on Mask works only on single instance.");
1324 unsigned Part
= State
.Instance
->Part
;
1325 unsigned Lane
= State
.Instance
->Lane
.getKnownLane();
1327 Value
*ConditionBit
= nullptr;
1328 VPValue
*BlockInMask
= getMask();
1330 ConditionBit
= State
.get(BlockInMask
, Part
);
1331 if (ConditionBit
->getType()->isVectorTy())
1332 ConditionBit
= State
.Builder
.CreateExtractElement(
1333 ConditionBit
, State
.Builder
.getInt32(Lane
));
1334 } else // Block in mask is all-one.
1335 ConditionBit
= State
.Builder
.getTrue();
1337 // Replace the temporary unreachable terminator with a new conditional branch,
1338 // whose two destinations will be set later when they are created.
1339 auto *CurrentTerminator
= State
.CFG
.PrevBB
->getTerminator();
1340 assert(isa
<UnreachableInst
>(CurrentTerminator
) &&
1341 "Expected to replace unreachable terminator with conditional branch.");
1342 auto *CondBr
= BranchInst::Create(State
.CFG
.PrevBB
, nullptr, ConditionBit
);
1343 CondBr
->setSuccessor(0, nullptr);
1344 ReplaceInstWithInst(CurrentTerminator
, CondBr
);
1347 void VPPredInstPHIRecipe::execute(VPTransformState
&State
) {
1348 assert(State
.Instance
&& "Predicated instruction PHI works per instance.");
1349 Instruction
*ScalarPredInst
=
1350 cast
<Instruction
>(State
.get(getOperand(0), *State
.Instance
));
1351 BasicBlock
*PredicatedBB
= ScalarPredInst
->getParent();
1352 BasicBlock
*PredicatingBB
= PredicatedBB
->getSinglePredecessor();
1353 assert(PredicatingBB
&& "Predicated block has no single predecessor.");
1354 assert(isa
<VPReplicateRecipe
>(getOperand(0)) &&
1355 "operand must be VPReplicateRecipe");
1357 // By current pack/unpack logic we need to generate only a single phi node: if
1358 // a vector value for the predicated instruction exists at this point it means
1359 // the instruction has vector users only, and a phi for the vector value is
1360 // needed. In this case the recipe of the predicated instruction is marked to
1361 // also do that packing, thereby "hoisting" the insert-element sequence.
1362 // Otherwise, a phi node for the scalar value is needed.
1363 unsigned Part
= State
.Instance
->Part
;
1364 if (State
.hasVectorValue(getOperand(0), Part
)) {
1365 Value
*VectorValue
= State
.get(getOperand(0), Part
);
1366 InsertElementInst
*IEI
= cast
<InsertElementInst
>(VectorValue
);
1367 PHINode
*VPhi
= State
.Builder
.CreatePHI(IEI
->getType(), 2);
1368 VPhi
->addIncoming(IEI
->getOperand(0), PredicatingBB
); // Unmodified vector.
1369 VPhi
->addIncoming(IEI
, PredicatedBB
); // New vector with inserted element.
1370 if (State
.hasVectorValue(this, Part
))
1371 State
.reset(this, VPhi
, Part
);
1373 State
.set(this, VPhi
, Part
);
1374 // NOTE: Currently we need to update the value of the operand, so the next
1375 // predicated iteration inserts its generated value in the correct vector.
1376 State
.reset(getOperand(0), VPhi
, Part
);
1378 Type
*PredInstType
= getOperand(0)->getUnderlyingValue()->getType();
1379 PHINode
*Phi
= State
.Builder
.CreatePHI(PredInstType
, 2);
1380 Phi
->addIncoming(PoisonValue::get(ScalarPredInst
->getType()),
1382 Phi
->addIncoming(ScalarPredInst
, PredicatedBB
);
1383 if (State
.hasScalarValue(this, *State
.Instance
))
1384 State
.reset(this, Phi
, *State
.Instance
);
1386 State
.set(this, Phi
, *State
.Instance
);
1387 // NOTE: Currently we need to update the value of the operand, so the next
1388 // predicated iteration inserts its generated value in the correct vector.
1389 State
.reset(getOperand(0), Phi
, *State
.Instance
);
1393 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1394 void VPPredInstPHIRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1395 VPSlotTracker
&SlotTracker
) const {
1396 O
<< Indent
<< "PHI-PREDICATED-INSTRUCTION ";
1397 printAsOperand(O
, SlotTracker
);
1399 printOperands(O
, SlotTracker
);
1402 void VPWidenMemoryInstructionRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1403 VPSlotTracker
&SlotTracker
) const {
1404 O
<< Indent
<< "WIDEN ";
1407 getVPSingleValue()->printAsOperand(O
, SlotTracker
);
1410 O
<< Instruction::getOpcodeName(Ingredient
.getOpcode()) << " ";
1412 printOperands(O
, SlotTracker
);
1416 void VPCanonicalIVPHIRecipe::execute(VPTransformState
&State
) {
1417 Value
*Start
= getStartValue()->getLiveInIRValue();
1418 PHINode
*EntryPart
= PHINode::Create(Start
->getType(), 2, "index");
1419 EntryPart
->insertBefore(State
.CFG
.PrevBB
->getFirstInsertionPt());
1421 BasicBlock
*VectorPH
= State
.CFG
.getPreheaderBBFor(this);
1422 EntryPart
->addIncoming(Start
, VectorPH
);
1423 EntryPart
->setDebugLoc(getDebugLoc());
1424 for (unsigned Part
= 0, UF
= State
.UF
; Part
< UF
; ++Part
)
1425 State
.set(this, EntryPart
, Part
);
1428 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1429 void VPCanonicalIVPHIRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1430 VPSlotTracker
&SlotTracker
) const {
1431 O
<< Indent
<< "EMIT ";
1432 printAsOperand(O
, SlotTracker
);
1433 O
<< " = CANONICAL-INDUCTION ";
1434 printOperands(O
, SlotTracker
);
1438 bool VPCanonicalIVPHIRecipe::isCanonical(
1439 InductionDescriptor::InductionKind Kind
, VPValue
*Start
, VPValue
*Step
,
1441 // The types must match and it must be an integer induction.
1442 if (Ty
!= getScalarType() || Kind
!= InductionDescriptor::IK_IntInduction
)
1444 // Start must match the start value of this canonical induction.
1445 if (Start
!= getStartValue())
1448 // If the step is defined by a recipe, it is not a ConstantInt.
1449 if (Step
->getDefiningRecipe())
1452 ConstantInt
*StepC
= dyn_cast
<ConstantInt
>(Step
->getLiveInIRValue());
1453 return StepC
&& StepC
->isOne();
1456 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF
) {
1457 return IsScalarAfterVectorization
&&
1458 (!VF
.isScalable() || vputils::onlyFirstLaneUsed(this));
1461 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1462 void VPWidenPointerInductionRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1463 VPSlotTracker
&SlotTracker
) const {
1464 O
<< Indent
<< "EMIT ";
1465 printAsOperand(O
, SlotTracker
);
1466 O
<< " = WIDEN-POINTER-INDUCTION ";
1467 getStartValue()->printAsOperand(O
, SlotTracker
);
1468 O
<< ", " << *IndDesc
.getStep();
1472 void VPExpandSCEVRecipe::execute(VPTransformState
&State
) {
1473 assert(!State
.Instance
&& "cannot be used in per-lane");
1474 const DataLayout
&DL
= State
.CFG
.PrevBB
->getModule()->getDataLayout();
1475 SCEVExpander
Exp(SE
, DL
, "induction");
1477 Value
*Res
= Exp
.expandCodeFor(Expr
, Expr
->getType(),
1478 &*State
.Builder
.GetInsertPoint());
1479 assert(!State
.ExpandedSCEVs
.contains(Expr
) &&
1480 "Same SCEV expanded multiple times");
1481 State
.ExpandedSCEVs
[Expr
] = Res
;
1482 for (unsigned Part
= 0, UF
= State
.UF
; Part
< UF
; ++Part
)
1483 State
.set(this, Res
, {Part
, 0});
1486 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487 void VPExpandSCEVRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1488 VPSlotTracker
&SlotTracker
) const {
1489 O
<< Indent
<< "EMIT ";
1490 getVPSingleValue()->printAsOperand(O
, SlotTracker
);
1491 O
<< " = EXPAND SCEV " << *Expr
;
1495 void VPWidenCanonicalIVRecipe::execute(VPTransformState
&State
) {
1496 Value
*CanonicalIV
= State
.get(getOperand(0), 0);
1497 Type
*STy
= CanonicalIV
->getType();
1498 IRBuilder
<> Builder(State
.CFG
.PrevBB
->getTerminator());
1499 ElementCount VF
= State
.VF
;
1500 Value
*VStart
= VF
.isScalar()
1502 : Builder
.CreateVectorSplat(VF
, CanonicalIV
, "broadcast");
1503 for (unsigned Part
= 0, UF
= State
.UF
; Part
< UF
; ++Part
) {
1504 Value
*VStep
= createStepForVF(Builder
, STy
, VF
, Part
);
1505 if (VF
.isVector()) {
1506 VStep
= Builder
.CreateVectorSplat(VF
, VStep
);
1508 Builder
.CreateAdd(VStep
, Builder
.CreateStepVector(VStep
->getType()));
1510 Value
*CanonicalVectorIV
= Builder
.CreateAdd(VStart
, VStep
, "vec.iv");
1511 State
.set(this, CanonicalVectorIV
, Part
);
1515 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1516 void VPWidenCanonicalIVRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1517 VPSlotTracker
&SlotTracker
) const {
1518 O
<< Indent
<< "EMIT ";
1519 printAsOperand(O
, SlotTracker
);
1520 O
<< " = WIDEN-CANONICAL-INDUCTION ";
1521 printOperands(O
, SlotTracker
);
1525 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState
&State
) {
1526 auto &Builder
= State
.Builder
;
1527 // Create a vector from the initial value.
1528 auto *VectorInit
= getStartValue()->getLiveInIRValue();
1530 Type
*VecTy
= State
.VF
.isScalar()
1531 ? VectorInit
->getType()
1532 : VectorType::get(VectorInit
->getType(), State
.VF
);
1534 BasicBlock
*VectorPH
= State
.CFG
.getPreheaderBBFor(this);
1535 if (State
.VF
.isVector()) {
1536 auto *IdxTy
= Builder
.getInt32Ty();
1537 auto *One
= ConstantInt::get(IdxTy
, 1);
1538 IRBuilder
<>::InsertPointGuard
Guard(Builder
);
1539 Builder
.SetInsertPoint(VectorPH
->getTerminator());
1540 auto *RuntimeVF
= getRuntimeVF(Builder
, IdxTy
, State
.VF
);
1541 auto *LastIdx
= Builder
.CreateSub(RuntimeVF
, One
);
1542 VectorInit
= Builder
.CreateInsertElement(
1543 PoisonValue::get(VecTy
), VectorInit
, LastIdx
, "vector.recur.init");
1546 // Create a phi node for the new recurrence.
1547 PHINode
*EntryPart
= PHINode::Create(VecTy
, 2, "vector.recur");
1548 EntryPart
->insertBefore(State
.CFG
.PrevBB
->getFirstInsertionPt());
1549 EntryPart
->addIncoming(VectorInit
, VectorPH
);
1550 State
.set(this, EntryPart
, 0);
1553 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1554 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1555 VPSlotTracker
&SlotTracker
) const {
1556 O
<< Indent
<< "FIRST-ORDER-RECURRENCE-PHI ";
1557 printAsOperand(O
, SlotTracker
);
1559 printOperands(O
, SlotTracker
);
1563 void VPReductionPHIRecipe::execute(VPTransformState
&State
) {
1564 PHINode
*PN
= cast
<PHINode
>(getUnderlyingValue());
1565 auto &Builder
= State
.Builder
;
1567 // In order to support recurrences we need to be able to vectorize Phi nodes.
1568 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1569 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1570 // this value when we vectorize all of the instructions that use the PHI.
1571 bool ScalarPHI
= State
.VF
.isScalar() || IsInLoop
;
1573 ScalarPHI
? PN
->getType() : VectorType::get(PN
->getType(), State
.VF
);
1575 BasicBlock
*HeaderBB
= State
.CFG
.PrevBB
;
1576 assert(State
.CurrentVectorLoop
->getHeader() == HeaderBB
&&
1577 "recipe must be in the vector loop header");
1578 unsigned LastPartForNewPhi
= isOrdered() ? 1 : State
.UF
;
1579 for (unsigned Part
= 0; Part
< LastPartForNewPhi
; ++Part
) {
1580 Instruction
*EntryPart
= PHINode::Create(VecTy
, 2, "vec.phi");
1581 EntryPart
->insertBefore(HeaderBB
->getFirstInsertionPt());
1582 State
.set(this, EntryPart
, Part
);
1585 BasicBlock
*VectorPH
= State
.CFG
.getPreheaderBBFor(this);
1587 // Reductions do not have to start at zero. They can start with
1588 // any loop invariant values.
1589 VPValue
*StartVPV
= getStartValue();
1590 Value
*StartV
= StartVPV
->getLiveInIRValue();
1592 Value
*Iden
= nullptr;
1593 RecurKind RK
= RdxDesc
.getRecurrenceKind();
1594 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK
) ||
1595 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK
)) {
1596 // MinMax and AnyOf reductions have the start value as their identity.
1600 IRBuilderBase::InsertPointGuard
IPBuilder(Builder
);
1601 Builder
.SetInsertPoint(VectorPH
->getTerminator());
1603 Builder
.CreateVectorSplat(State
.VF
, StartV
, "minmax.ident");
1606 Iden
= RdxDesc
.getRecurrenceIdentity(RK
, VecTy
->getScalarType(),
1607 RdxDesc
.getFastMathFlags());
1610 Iden
= Builder
.CreateVectorSplat(State
.VF
, Iden
);
1611 IRBuilderBase::InsertPointGuard
IPBuilder(Builder
);
1612 Builder
.SetInsertPoint(VectorPH
->getTerminator());
1613 Constant
*Zero
= Builder
.getInt32(0);
1614 StartV
= Builder
.CreateInsertElement(Iden
, StartV
, Zero
);
1618 for (unsigned Part
= 0; Part
< LastPartForNewPhi
; ++Part
) {
1619 Value
*EntryPart
= State
.get(this, Part
);
1620 // Make sure to add the reduction start value only to the
1621 // first unroll part.
1622 Value
*StartVal
= (Part
== 0) ? StartV
: Iden
;
1623 cast
<PHINode
>(EntryPart
)->addIncoming(StartVal
, VectorPH
);
1627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1628 void VPReductionPHIRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1629 VPSlotTracker
&SlotTracker
) const {
1630 O
<< Indent
<< "WIDEN-REDUCTION-PHI ";
1632 printAsOperand(O
, SlotTracker
);
1634 printOperands(O
, SlotTracker
);
1638 void VPWidenPHIRecipe::execute(VPTransformState
&State
) {
1639 assert(EnableVPlanNativePath
&&
1640 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1642 Value
*Op0
= State
.get(getOperand(0), 0);
1643 Type
*VecTy
= Op0
->getType();
1644 Value
*VecPhi
= State
.Builder
.CreatePHI(VecTy
, 2, "vec.phi");
1645 State
.set(this, VecPhi
, 0);
1648 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1649 void VPWidenPHIRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1650 VPSlotTracker
&SlotTracker
) const {
1651 O
<< Indent
<< "WIDEN-PHI ";
1653 auto *OriginalPhi
= cast
<PHINode
>(getUnderlyingValue());
1654 // Unless all incoming values are modeled in VPlan print the original PHI
1656 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1657 // values as VPValues.
1658 if (getNumOperands() != OriginalPhi
->getNumOperands()) {
1659 O
<< VPlanIngredient(OriginalPhi
);
1663 printAsOperand(O
, SlotTracker
);
1665 printOperands(O
, SlotTracker
);
1669 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1670 // remove VPActiveLaneMaskPHIRecipe.
1671 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState
&State
) {
1672 BasicBlock
*VectorPH
= State
.CFG
.getPreheaderBBFor(this);
1673 for (unsigned Part
= 0, UF
= State
.UF
; Part
< UF
; ++Part
) {
1674 Value
*StartMask
= State
.get(getOperand(0), Part
);
1675 PHINode
*EntryPart
=
1676 State
.Builder
.CreatePHI(StartMask
->getType(), 2, "active.lane.mask");
1677 EntryPart
->addIncoming(StartMask
, VectorPH
);
1678 EntryPart
->setDebugLoc(getDebugLoc());
1679 State
.set(this, EntryPart
, Part
);
1683 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1684 void VPActiveLaneMaskPHIRecipe::print(raw_ostream
&O
, const Twine
&Indent
,
1685 VPSlotTracker
&SlotTracker
) const {
1686 O
<< Indent
<< "ACTIVE-LANE-MASK-PHI ";
1688 printAsOperand(O
, SlotTracker
);
1690 printOperands(O
, SlotTracker
);