Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / Transforms / Vectorize / VPlanRecipes.cpp
blob6b3218dca1b18b050e9e93188b3fc50dd5c8a5c4
1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
14 #include "VPlan.h"
15 #include "VPlanAnalysis.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/IVDescriptors.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Type.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Casting.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
32 #include <cassert>
34 using namespace llvm;
36 using VectorParts = SmallVector<Value *, 2>;
38 namespace llvm {
39 extern cl::opt<bool> EnableVPlanNativePath;
42 #define LV_NAME "loop-vectorize"
43 #define DEBUG_TYPE LV_NAME
45 bool VPRecipeBase::mayWriteToMemory() const {
46 switch (getVPDefID()) {
47 case VPWidenMemoryInstructionSC: {
48 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
50 case VPReplicateSC:
51 case VPWidenCallSC:
52 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
53 ->mayWriteToMemory();
54 case VPBranchOnMaskSC:
55 case VPScalarIVStepsSC:
56 case VPPredInstPHISC:
57 return false;
58 case VPBlendSC:
59 case VPReductionSC:
60 case VPWidenCanonicalIVSC:
61 case VPWidenCastSC:
62 case VPWidenGEPSC:
63 case VPWidenIntOrFpInductionSC:
64 case VPWidenPHISC:
65 case VPWidenSC:
66 case VPWidenSelectSC: {
67 const Instruction *I =
68 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
69 (void)I;
70 assert((!I || !I->mayWriteToMemory()) &&
71 "underlying instruction may write to memory");
72 return false;
74 default:
75 return true;
79 bool VPRecipeBase::mayReadFromMemory() const {
80 switch (getVPDefID()) {
81 case VPWidenMemoryInstructionSC: {
82 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
84 case VPReplicateSC:
85 case VPWidenCallSC:
86 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
87 ->mayReadFromMemory();
88 case VPBranchOnMaskSC:
89 case VPScalarIVStepsSC:
90 case VPPredInstPHISC:
91 return false;
92 case VPBlendSC:
93 case VPReductionSC:
94 case VPWidenCanonicalIVSC:
95 case VPWidenCastSC:
96 case VPWidenGEPSC:
97 case VPWidenIntOrFpInductionSC:
98 case VPWidenPHISC:
99 case VPWidenSC:
100 case VPWidenSelectSC: {
101 const Instruction *I =
102 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
103 (void)I;
104 assert((!I || !I->mayReadFromMemory()) &&
105 "underlying instruction may read from memory");
106 return false;
108 default:
109 return true;
113 bool VPRecipeBase::mayHaveSideEffects() const {
114 switch (getVPDefID()) {
115 case VPDerivedIVSC:
116 case VPPredInstPHISC:
117 return false;
118 case VPInstructionSC:
119 switch (cast<VPInstruction>(this)->getOpcode()) {
120 case Instruction::ICmp:
121 case VPInstruction::Not:
122 case VPInstruction::CalculateTripCountMinusVF:
123 case VPInstruction::CanonicalIVIncrement:
124 case VPInstruction::CanonicalIVIncrementForPart:
125 return false;
126 default:
127 return true;
129 case VPWidenCallSC:
130 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
131 ->mayHaveSideEffects();
132 case VPBlendSC:
133 case VPReductionSC:
134 case VPScalarIVStepsSC:
135 case VPWidenCanonicalIVSC:
136 case VPWidenCastSC:
137 case VPWidenGEPSC:
138 case VPWidenIntOrFpInductionSC:
139 case VPWidenPHISC:
140 case VPWidenPointerInductionSC:
141 case VPWidenSC:
142 case VPWidenSelectSC: {
143 const Instruction *I =
144 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
145 (void)I;
146 assert((!I || !I->mayHaveSideEffects()) &&
147 "underlying instruction has side-effects");
148 return false;
150 case VPWidenMemoryInstructionSC:
151 assert(cast<VPWidenMemoryInstructionRecipe>(this)
152 ->getIngredient()
153 .mayHaveSideEffects() == mayWriteToMemory() &&
154 "mayHaveSideffects result for ingredient differs from this "
155 "implementation");
156 return mayWriteToMemory();
157 case VPReplicateSC: {
158 auto *R = cast<VPReplicateRecipe>(this);
159 return R->getUnderlyingInstr()->mayHaveSideEffects();
161 default:
162 return true;
166 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
167 auto Lane = VPLane::getLastLaneForVF(State.VF);
168 VPValue *ExitValue = getOperand(0);
169 if (vputils::isUniformAfterVectorization(ExitValue))
170 Lane = VPLane::getFirstLane();
171 VPBasicBlock *MiddleVPBB =
172 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
173 assert(MiddleVPBB->getNumSuccessors() == 0 &&
174 "the middle block must not have any successors");
175 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
176 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
177 MiddleBB);
180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
181 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
182 O << "Live-out ";
183 getPhi()->printAsOperand(O);
184 O << " = ";
185 getOperand(0)->printAsOperand(O, SlotTracker);
186 O << "\n";
188 #endif
190 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
191 assert(!Parent && "Recipe already in some VPBasicBlock");
192 assert(InsertPos->getParent() &&
193 "Insertion position not in any VPBasicBlock");
194 Parent = InsertPos->getParent();
195 Parent->getRecipeList().insert(InsertPos->getIterator(), this);
198 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
199 iplist<VPRecipeBase>::iterator I) {
200 assert(!Parent && "Recipe already in some VPBasicBlock");
201 assert(I == BB.end() || I->getParent() == &BB);
202 Parent = &BB;
203 BB.getRecipeList().insert(I, this);
206 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
207 assert(!Parent && "Recipe already in some VPBasicBlock");
208 assert(InsertPos->getParent() &&
209 "Insertion position not in any VPBasicBlock");
210 Parent = InsertPos->getParent();
211 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
214 void VPRecipeBase::removeFromParent() {
215 assert(getParent() && "Recipe not in any VPBasicBlock");
216 getParent()->getRecipeList().remove(getIterator());
217 Parent = nullptr;
220 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
221 assert(getParent() && "Recipe not in any VPBasicBlock");
222 return getParent()->getRecipeList().erase(getIterator());
225 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
226 removeFromParent();
227 insertAfter(InsertPos);
230 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
231 iplist<VPRecipeBase>::iterator I) {
232 removeFromParent();
233 insertBefore(BB, I);
236 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
237 assert(OpType == OperationType::FPMathOp &&
238 "recipe doesn't have fast math flags");
239 FastMathFlags Res;
240 Res.setAllowReassoc(FMFs.AllowReassoc);
241 Res.setNoNaNs(FMFs.NoNaNs);
242 Res.setNoInfs(FMFs.NoInfs);
243 Res.setNoSignedZeros(FMFs.NoSignedZeros);
244 Res.setAllowReciprocal(FMFs.AllowReciprocal);
245 Res.setAllowContract(FMFs.AllowContract);
246 Res.setApproxFunc(FMFs.ApproxFunc);
247 return Res;
250 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
251 VPValue *A, VPValue *B, DebugLoc DL,
252 const Twine &Name)
253 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
254 Pred, DL),
255 VPValue(this), Opcode(Opcode), Name(Name.str()) {
256 assert(Opcode == Instruction::ICmp &&
257 "only ICmp predicates supported at the moment");
260 VPInstruction::VPInstruction(unsigned Opcode,
261 std::initializer_list<VPValue *> Operands,
262 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
263 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
264 VPValue(this), Opcode(Opcode), Name(Name.str()) {
265 // Make sure the VPInstruction is a floating-point operation.
266 assert(isFPMathOp() && "this op can't take fast-math flags");
269 Value *VPInstruction::generateInstruction(VPTransformState &State,
270 unsigned Part) {
271 IRBuilderBase &Builder = State.Builder;
272 Builder.SetCurrentDebugLocation(getDebugLoc());
274 if (Instruction::isBinaryOp(getOpcode())) {
275 Value *A = State.get(getOperand(0), Part);
276 Value *B = State.get(getOperand(1), Part);
277 return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
280 switch (getOpcode()) {
281 case VPInstruction::Not: {
282 Value *A = State.get(getOperand(0), Part);
283 return Builder.CreateNot(A, Name);
285 case Instruction::ICmp: {
286 Value *A = State.get(getOperand(0), Part);
287 Value *B = State.get(getOperand(1), Part);
288 return Builder.CreateCmp(getPredicate(), A, B, Name);
290 case Instruction::Select: {
291 Value *Cond = State.get(getOperand(0), Part);
292 Value *Op1 = State.get(getOperand(1), Part);
293 Value *Op2 = State.get(getOperand(2), Part);
294 return Builder.CreateSelect(Cond, Op1, Op2, Name);
296 case VPInstruction::ActiveLaneMask: {
297 // Get first lane of vector induction variable.
298 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
299 // Get the original loop tripcount.
300 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
302 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
303 auto *PredTy = VectorType::get(Int1Ty, State.VF);
304 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
305 {PredTy, ScalarTC->getType()},
306 {VIVElem0, ScalarTC}, nullptr, Name);
308 case VPInstruction::FirstOrderRecurrenceSplice: {
309 // Generate code to combine the previous and current values in vector v3.
311 // vector.ph:
312 // v_init = vector(..., ..., ..., a[-1])
313 // br vector.body
315 // vector.body
316 // i = phi [0, vector.ph], [i+4, vector.body]
317 // v1 = phi [v_init, vector.ph], [v2, vector.body]
318 // v2 = a[i, i+1, i+2, i+3];
319 // v3 = vector(v1(3), v2(0, 1, 2))
321 // For the first part, use the recurrence phi (v1), otherwise v2.
322 auto *V1 = State.get(getOperand(0), 0);
323 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
324 if (!PartMinus1->getType()->isVectorTy())
325 return PartMinus1;
326 Value *V2 = State.get(getOperand(1), Part);
327 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
329 case VPInstruction::CalculateTripCountMinusVF: {
330 Value *ScalarTC = State.get(getOperand(0), {0, 0});
331 Value *Step =
332 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
333 Value *Sub = Builder.CreateSub(ScalarTC, Step);
334 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
335 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
336 return Builder.CreateSelect(Cmp, Sub, Zero);
338 case VPInstruction::CanonicalIVIncrement: {
339 if (Part == 0) {
340 auto *Phi = State.get(getOperand(0), 0);
341 // The loop step is equal to the vectorization factor (num of SIMD
342 // elements) times the unroll factor (num of SIMD instructions).
343 Value *Step =
344 createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
345 return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
346 hasNoSignedWrap());
348 return State.get(this, 0);
351 case VPInstruction::CanonicalIVIncrementForPart: {
352 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
353 if (Part == 0)
354 return IV;
356 // The canonical IV is incremented by the vectorization factor (num of SIMD
357 // elements) times the unroll part.
358 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
359 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
360 hasNoSignedWrap());
362 case VPInstruction::BranchOnCond: {
363 if (Part != 0)
364 return nullptr;
366 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
367 VPRegionBlock *ParentRegion = getParent()->getParent();
368 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
370 // Replace the temporary unreachable terminator with a new conditional
371 // branch, hooking it up to backward destination for exiting blocks now and
372 // to forward destination(s) later when they are created.
373 BranchInst *CondBr =
374 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
376 if (getParent()->isExiting())
377 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
379 CondBr->setSuccessor(0, nullptr);
380 Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
381 return CondBr;
383 case VPInstruction::BranchOnCount: {
384 if (Part != 0)
385 return nullptr;
386 // First create the compare.
387 Value *IV = State.get(getOperand(0), Part);
388 Value *TC = State.get(getOperand(1), Part);
389 Value *Cond = Builder.CreateICmpEQ(IV, TC);
391 // Now create the branch.
392 auto *Plan = getParent()->getPlan();
393 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
394 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
396 // Replace the temporary unreachable terminator with a new conditional
397 // branch, hooking it up to backward destination (the header) now and to the
398 // forward destination (the exit/middle block) later when it is created.
399 // Note that CreateCondBr expects a valid BB as first argument, so we need
400 // to set it to nullptr later.
401 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
402 State.CFG.VPBB2IRBB[Header]);
403 CondBr->setSuccessor(0, nullptr);
404 Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
405 return CondBr;
407 default:
408 llvm_unreachable("Unsupported opcode for instruction");
412 #if !defined(NDEBUG)
413 bool VPInstruction::isFPMathOp() const {
414 // Inspired by FPMathOperator::classof. Notable differences are that we don't
415 // support Call, PHI and Select opcodes here yet.
416 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
417 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
418 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
419 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
421 #endif
423 void VPInstruction::execute(VPTransformState &State) {
424 assert(!State.Instance && "VPInstruction executing an Instance");
425 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
426 assert((hasFastMathFlags() == isFPMathOp() ||
427 getOpcode() == Instruction::Select) &&
428 "Recipe not a FPMathOp but has fast-math flags?");
429 if (hasFastMathFlags())
430 State.Builder.setFastMathFlags(getFastMathFlags());
431 for (unsigned Part = 0; Part < State.UF; ++Part) {
432 Value *GeneratedValue = generateInstruction(State, Part);
433 if (!hasResult())
434 continue;
435 assert(GeneratedValue && "generateInstruction must produce a value");
436 State.set(this, GeneratedValue, Part);
440 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
441 void VPInstruction::dump() const {
442 VPSlotTracker SlotTracker(getParent()->getPlan());
443 print(dbgs(), "", SlotTracker);
446 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
447 VPSlotTracker &SlotTracker) const {
448 O << Indent << "EMIT ";
450 if (hasResult()) {
451 printAsOperand(O, SlotTracker);
452 O << " = ";
455 switch (getOpcode()) {
456 case VPInstruction::Not:
457 O << "not";
458 break;
459 case VPInstruction::SLPLoad:
460 O << "combined load";
461 break;
462 case VPInstruction::SLPStore:
463 O << "combined store";
464 break;
465 case VPInstruction::ActiveLaneMask:
466 O << "active lane mask";
467 break;
468 case VPInstruction::FirstOrderRecurrenceSplice:
469 O << "first-order splice";
470 break;
471 case VPInstruction::CanonicalIVIncrement:
472 O << "VF * UF +";
473 break;
474 case VPInstruction::BranchOnCond:
475 O << "branch-on-cond";
476 break;
477 case VPInstruction::CalculateTripCountMinusVF:
478 O << "TC > VF ? TC - VF : 0";
479 break;
480 case VPInstruction::CanonicalIVIncrementForPart:
481 O << "VF * Part +";
482 break;
483 case VPInstruction::BranchOnCount:
484 O << "branch-on-count";
485 break;
486 default:
487 O << Instruction::getOpcodeName(getOpcode());
490 printFlags(O);
491 printOperands(O, SlotTracker);
493 if (auto DL = getDebugLoc()) {
494 O << ", !dbg ";
495 DL.print(O);
498 #endif
500 void VPWidenCallRecipe::execute(VPTransformState &State) {
501 assert(State.VF.isVector() && "not widening");
502 auto &CI = *cast<CallInst>(getUnderlyingInstr());
503 assert(!isa<DbgInfoIntrinsic>(CI) &&
504 "DbgInfoIntrinsic should have been dropped during VPlan construction");
505 State.setDebugLocFrom(CI.getDebugLoc());
507 for (unsigned Part = 0; Part < State.UF; ++Part) {
508 SmallVector<Type *, 2> TysForDecl;
509 // Add return type if intrinsic is overloaded on it.
510 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
511 TysForDecl.push_back(
512 VectorType::get(CI.getType()->getScalarType(), State.VF));
514 SmallVector<Value *, 4> Args;
515 for (const auto &I : enumerate(operands())) {
516 // Some intrinsics have a scalar argument - don't replace it with a
517 // vector.
518 Value *Arg;
519 if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
520 !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
521 Arg = State.get(I.value(), Part);
522 else
523 Arg = State.get(I.value(), VPIteration(0, 0));
524 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
525 TysForDecl.push_back(Arg->getType());
526 Args.push_back(Arg);
529 Function *VectorF;
530 if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
531 // Use vector version of the intrinsic.
532 Module *M = State.Builder.GetInsertBlock()->getModule();
533 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
534 assert(VectorF && "Can't retrieve vector intrinsic.");
535 } else {
536 #ifndef NDEBUG
537 assert(Variant != nullptr && "Can't create vector function.");
538 #endif
539 VectorF = Variant;
542 SmallVector<OperandBundleDef, 1> OpBundles;
543 CI.getOperandBundlesAsDefs(OpBundles);
544 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
546 if (isa<FPMathOperator>(V))
547 V->copyFastMathFlags(&CI);
549 State.set(this, V, Part);
550 State.addMetadata(V, &CI);
554 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
555 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
556 VPSlotTracker &SlotTracker) const {
557 O << Indent << "WIDEN-CALL ";
559 auto *CI = cast<CallInst>(getUnderlyingInstr());
560 if (CI->getType()->isVoidTy())
561 O << "void ";
562 else {
563 printAsOperand(O, SlotTracker);
564 O << " = ";
567 O << "call @" << CI->getCalledFunction()->getName() << "(";
568 printOperands(O, SlotTracker);
569 O << ")";
571 if (VectorIntrinsicID)
572 O << " (using vector intrinsic)";
573 else {
574 O << " (using library function";
575 if (Variant->hasName())
576 O << ": " << Variant->getName();
577 O << ")";
581 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
582 VPSlotTracker &SlotTracker) const {
583 O << Indent << "WIDEN-SELECT ";
584 printAsOperand(O, SlotTracker);
585 O << " = select ";
586 getOperand(0)->printAsOperand(O, SlotTracker);
587 O << ", ";
588 getOperand(1)->printAsOperand(O, SlotTracker);
589 O << ", ";
590 getOperand(2)->printAsOperand(O, SlotTracker);
591 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
593 #endif
595 void VPWidenSelectRecipe::execute(VPTransformState &State) {
596 State.setDebugLocFrom(getDebugLoc());
598 // The condition can be loop invariant but still defined inside the
599 // loop. This means that we can't just use the original 'cond' value.
600 // We have to take the 'vectorized' value and pick the first lane.
601 // Instcombine will make this a no-op.
602 auto *InvarCond =
603 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
605 for (unsigned Part = 0; Part < State.UF; ++Part) {
606 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
607 Value *Op0 = State.get(getOperand(1), Part);
608 Value *Op1 = State.get(getOperand(2), Part);
609 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
610 State.set(this, Sel, Part);
611 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
615 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
616 const FastMathFlags &FMF) {
617 AllowReassoc = FMF.allowReassoc();
618 NoNaNs = FMF.noNaNs();
619 NoInfs = FMF.noInfs();
620 NoSignedZeros = FMF.noSignedZeros();
621 AllowReciprocal = FMF.allowReciprocal();
622 AllowContract = FMF.allowContract();
623 ApproxFunc = FMF.approxFunc();
626 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
627 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
628 switch (OpType) {
629 case OperationType::Cmp:
630 O << " " << CmpInst::getPredicateName(getPredicate());
631 break;
632 case OperationType::PossiblyExactOp:
633 if (ExactFlags.IsExact)
634 O << " exact";
635 break;
636 case OperationType::OverflowingBinOp:
637 if (WrapFlags.HasNUW)
638 O << " nuw";
639 if (WrapFlags.HasNSW)
640 O << " nsw";
641 break;
642 case OperationType::FPMathOp:
643 getFastMathFlags().print(O);
644 break;
645 case OperationType::GEPOp:
646 if (GEPFlags.IsInBounds)
647 O << " inbounds";
648 break;
649 case OperationType::Other:
650 break;
652 if (getNumOperands() > 0)
653 O << " ";
655 #endif
657 void VPWidenRecipe::execute(VPTransformState &State) {
658 State.setDebugLocFrom(getDebugLoc());
659 auto &Builder = State.Builder;
660 switch (Opcode) {
661 case Instruction::Call:
662 case Instruction::Br:
663 case Instruction::PHI:
664 case Instruction::GetElementPtr:
665 case Instruction::Select:
666 llvm_unreachable("This instruction is handled by a different recipe.");
667 case Instruction::UDiv:
668 case Instruction::SDiv:
669 case Instruction::SRem:
670 case Instruction::URem:
671 case Instruction::Add:
672 case Instruction::FAdd:
673 case Instruction::Sub:
674 case Instruction::FSub:
675 case Instruction::FNeg:
676 case Instruction::Mul:
677 case Instruction::FMul:
678 case Instruction::FDiv:
679 case Instruction::FRem:
680 case Instruction::Shl:
681 case Instruction::LShr:
682 case Instruction::AShr:
683 case Instruction::And:
684 case Instruction::Or:
685 case Instruction::Xor: {
686 // Just widen unops and binops.
687 for (unsigned Part = 0; Part < State.UF; ++Part) {
688 SmallVector<Value *, 2> Ops;
689 for (VPValue *VPOp : operands())
690 Ops.push_back(State.get(VPOp, Part));
692 Value *V = Builder.CreateNAryOp(Opcode, Ops);
694 if (auto *VecOp = dyn_cast<Instruction>(V))
695 setFlags(VecOp);
697 // Use this vector value for all users of the original instruction.
698 State.set(this, V, Part);
699 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
702 break;
704 case Instruction::Freeze: {
705 for (unsigned Part = 0; Part < State.UF; ++Part) {
706 Value *Op = State.get(getOperand(0), Part);
708 Value *Freeze = Builder.CreateFreeze(Op);
709 State.set(this, Freeze, Part);
711 break;
713 case Instruction::ICmp:
714 case Instruction::FCmp: {
715 // Widen compares. Generate vector compares.
716 bool FCmp = Opcode == Instruction::FCmp;
717 for (unsigned Part = 0; Part < State.UF; ++Part) {
718 Value *A = State.get(getOperand(0), Part);
719 Value *B = State.get(getOperand(1), Part);
720 Value *C = nullptr;
721 if (FCmp) {
722 // Propagate fast math flags.
723 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
724 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
725 Builder.setFastMathFlags(I->getFastMathFlags());
726 C = Builder.CreateFCmp(getPredicate(), A, B);
727 } else {
728 C = Builder.CreateICmp(getPredicate(), A, B);
730 State.set(this, C, Part);
731 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
734 break;
736 default:
737 // This instruction is not vectorized by simple widening.
738 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
739 << Instruction::getOpcodeName(Opcode));
740 llvm_unreachable("Unhandled instruction!");
741 } // end of switch.
743 #if !defined(NDEBUG)
744 // Verify that VPlan type inference results agree with the type of the
745 // generated values.
746 for (unsigned Part = 0; Part < State.UF; ++Part) {
747 assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
748 State.VF) == State.get(this, Part)->getType() &&
749 "inferred type and type from generated instructions do not match");
751 #endif
754 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
755 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
756 VPSlotTracker &SlotTracker) const {
757 O << Indent << "WIDEN ";
758 printAsOperand(O, SlotTracker);
759 O << " = " << Instruction::getOpcodeName(Opcode);
760 printFlags(O);
761 printOperands(O, SlotTracker);
763 #endif
765 void VPWidenCastRecipe::execute(VPTransformState &State) {
766 State.setDebugLocFrom(getDebugLoc());
767 auto &Builder = State.Builder;
768 /// Vectorize casts.
769 assert(State.VF.isVector() && "Not vectorizing?");
770 Type *DestTy = VectorType::get(getResultType(), State.VF);
772 for (unsigned Part = 0; Part < State.UF; ++Part) {
773 Value *A = State.get(getOperand(0), Part);
774 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
775 State.set(this, Cast, Part);
776 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
780 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
781 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
782 VPSlotTracker &SlotTracker) const {
783 O << Indent << "WIDEN-CAST ";
784 printAsOperand(O, SlotTracker);
785 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
786 printOperands(O, SlotTracker);
787 O << " to " << *getResultType();
789 #endif
791 /// This function adds
792 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
793 /// to each vector element of Val. The sequence starts at StartIndex.
794 /// \p Opcode is relevant for FP induction variable.
795 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
796 Instruction::BinaryOps BinOp, ElementCount VF,
797 IRBuilderBase &Builder) {
798 assert(VF.isVector() && "only vector VFs are supported");
800 // Create and check the types.
801 auto *ValVTy = cast<VectorType>(Val->getType());
802 ElementCount VLen = ValVTy->getElementCount();
804 Type *STy = Val->getType()->getScalarType();
805 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
806 "Induction Step must be an integer or FP");
807 assert(Step->getType() == STy && "Step has wrong type");
809 SmallVector<Constant *, 8> Indices;
811 // Create a vector of consecutive numbers from zero to VF.
812 VectorType *InitVecValVTy = ValVTy;
813 if (STy->isFloatingPointTy()) {
814 Type *InitVecValSTy =
815 IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
816 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
818 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
820 // Splat the StartIdx
821 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
823 if (STy->isIntegerTy()) {
824 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
825 Step = Builder.CreateVectorSplat(VLen, Step);
826 assert(Step->getType() == Val->getType() && "Invalid step vec");
827 // FIXME: The newly created binary instructions should contain nsw/nuw
828 // flags, which can be found from the original scalar operations.
829 Step = Builder.CreateMul(InitVec, Step);
830 return Builder.CreateAdd(Val, Step, "induction");
833 // Floating point induction.
834 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
835 "Binary Opcode should be specified for FP induction");
836 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
837 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
839 Step = Builder.CreateVectorSplat(VLen, Step);
840 Value *MulOp = Builder.CreateFMul(InitVec, Step);
841 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
844 /// A helper function that returns an integer or floating-point constant with
845 /// value C.
846 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
847 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
848 : ConstantFP::get(Ty, C);
851 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
852 ElementCount VF) {
853 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
854 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
855 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
856 return B.CreateUIToFP(RuntimeVF, FTy);
859 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
860 assert(!State.Instance && "Int or FP induction being replicated.");
862 Value *Start = getStartValue()->getLiveInIRValue();
863 const InductionDescriptor &ID = getInductionDescriptor();
864 TruncInst *Trunc = getTruncInst();
865 IRBuilderBase &Builder = State.Builder;
866 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
867 assert(State.VF.isVector() && "must have vector VF");
869 // The value from the original loop to which we are mapping the new induction
870 // variable.
871 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
873 // Fast-math-flags propagate from the original induction instruction.
874 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
875 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
876 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
878 // Now do the actual transformations, and start with fetching the step value.
879 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
881 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
882 "Expected either an induction phi-node or a truncate of it!");
884 // Construct the initial value of the vector IV in the vector loop preheader
885 auto CurrIP = Builder.saveIP();
886 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
887 Builder.SetInsertPoint(VectorPH->getTerminator());
888 if (isa<TruncInst>(EntryVal)) {
889 assert(Start->getType()->isIntegerTy() &&
890 "Truncation requires an integer type");
891 auto *TruncType = cast<IntegerType>(EntryVal->getType());
892 Step = Builder.CreateTrunc(Step, TruncType);
893 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
896 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
897 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
898 Value *SteppedStart = getStepVector(
899 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
901 // We create vector phi nodes for both integer and floating-point induction
902 // variables. Here, we determine the kind of arithmetic we will perform.
903 Instruction::BinaryOps AddOp;
904 Instruction::BinaryOps MulOp;
905 if (Step->getType()->isIntegerTy()) {
906 AddOp = Instruction::Add;
907 MulOp = Instruction::Mul;
908 } else {
909 AddOp = ID.getInductionOpcode();
910 MulOp = Instruction::FMul;
913 // Multiply the vectorization factor by the step using integer or
914 // floating-point arithmetic as appropriate.
915 Type *StepType = Step->getType();
916 Value *RuntimeVF;
917 if (Step->getType()->isFloatingPointTy())
918 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
919 else
920 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
921 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
923 // Create a vector splat to use in the induction update.
925 // FIXME: If the step is non-constant, we create the vector splat with
926 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
927 // handle a constant vector splat.
928 Value *SplatVF = isa<Constant>(Mul)
929 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
930 : Builder.CreateVectorSplat(State.VF, Mul);
931 Builder.restoreIP(CurrIP);
933 // We may need to add the step a number of times, depending on the unroll
934 // factor. The last of those goes into the PHI.
935 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
936 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
937 VecInd->setDebugLoc(EntryVal->getDebugLoc());
938 Instruction *LastInduction = VecInd;
939 for (unsigned Part = 0; Part < State.UF; ++Part) {
940 State.set(this, LastInduction, Part);
942 if (isa<TruncInst>(EntryVal))
943 State.addMetadata(LastInduction, EntryVal);
945 LastInduction = cast<Instruction>(
946 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
947 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
950 LastInduction->setName("vec.ind.next");
951 VecInd->addIncoming(SteppedStart, VectorPH);
952 // Add induction update using an incorrect block temporarily. The phi node
953 // will be fixed after VPlan execution. Note that at this point the latch
954 // block cannot be used, as it does not exist yet.
955 // TODO: Model increment value in VPlan, by turning the recipe into a
956 // multi-def and a subclass of VPHeaderPHIRecipe.
957 VecInd->addIncoming(LastInduction, VectorPH);
960 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
961 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
962 VPSlotTracker &SlotTracker) const {
963 O << Indent << "WIDEN-INDUCTION";
964 if (getTruncInst()) {
965 O << "\\l\"";
966 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
967 O << " +\n" << Indent << "\" ";
968 getVPValue(0)->printAsOperand(O, SlotTracker);
969 } else
970 O << " " << VPlanIngredient(IV);
972 O << ", ";
973 getStepValue()->printAsOperand(O, SlotTracker);
975 #endif
977 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
978 // The step may be defined by a recipe in the preheader (e.g. if it requires
979 // SCEV expansion), but for the canonical induction the step is required to be
980 // 1, which is represented as live-in.
981 if (getStepValue()->getDefiningRecipe())
982 return false;
983 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
984 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
985 return StartC && StartC->isZero() && StepC && StepC->isOne();
988 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
989 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
990 VPSlotTracker &SlotTracker) const {
991 O << Indent;
992 printAsOperand(O, SlotTracker);
993 O << Indent << "= DERIVED-IV ";
994 getStartValue()->printAsOperand(O, SlotTracker);
995 O << " + ";
996 getCanonicalIV()->printAsOperand(O, SlotTracker);
997 O << " * ";
998 getStepValue()->printAsOperand(O, SlotTracker);
1000 if (TruncResultTy)
1001 O << " (truncated to " << *TruncResultTy << ")";
1003 #endif
1005 void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
1006 // Fast-math-flags propagate from the original induction instruction.
1007 IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
1008 if (hasFastMathFlags())
1009 State.Builder.setFastMathFlags(getFastMathFlags());
1011 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1012 /// variable on which to base the steps, \p Step is the size of the step.
1014 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1015 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1016 IRBuilderBase &Builder = State.Builder;
1018 // Ensure step has the same type as that of scalar IV.
1019 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1020 if (BaseIVTy != Step->getType()) {
1021 // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
1022 // avoid separate truncate here.
1023 assert(Step->getType()->isIntegerTy() &&
1024 "Truncation requires an integer step");
1025 Step = State.Builder.CreateTrunc(Step, BaseIVTy);
1028 // We build scalar steps for both integer and floating-point induction
1029 // variables. Here, we determine the kind of arithmetic we will perform.
1030 Instruction::BinaryOps AddOp;
1031 Instruction::BinaryOps MulOp;
1032 if (BaseIVTy->isIntegerTy()) {
1033 AddOp = Instruction::Add;
1034 MulOp = Instruction::Mul;
1035 } else {
1036 AddOp = InductionOpcode;
1037 MulOp = Instruction::FMul;
1040 // Determine the number of scalars we need to generate for each unroll
1041 // iteration.
1042 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1043 // Compute the scalar steps and save the results in State.
1044 Type *IntStepTy =
1045 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1046 Type *VecIVTy = nullptr;
1047 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1048 if (!FirstLaneOnly && State.VF.isScalable()) {
1049 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1050 UnitStepVec =
1051 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1052 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1053 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1056 unsigned StartPart = 0;
1057 unsigned EndPart = State.UF;
1058 unsigned StartLane = 0;
1059 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1060 if (State.Instance) {
1061 StartPart = State.Instance->Part;
1062 EndPart = StartPart + 1;
1063 StartLane = State.Instance->Lane.getKnownLane();
1064 EndLane = StartLane + 1;
1066 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1067 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1069 if (!FirstLaneOnly && State.VF.isScalable()) {
1070 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1071 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1072 if (BaseIVTy->isFloatingPointTy())
1073 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1074 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1075 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1076 State.set(this, Add, Part);
1077 // It's useful to record the lane values too for the known minimum number
1078 // of elements so we do those below. This improves the code quality when
1079 // trying to extract the first element, for example.
1082 if (BaseIVTy->isFloatingPointTy())
1083 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1085 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1086 Value *StartIdx = Builder.CreateBinOp(
1087 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1088 // The step returned by `createStepForVF` is a runtime-evaluated value
1089 // when VF is scalable. Otherwise, it should be folded into a Constant.
1090 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1091 "Expected StartIdx to be folded to a constant when VF is not "
1092 "scalable");
1093 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1094 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1095 State.set(this, Add, VPIteration(Part, Lane));
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1102 VPSlotTracker &SlotTracker) const {
1103 O << Indent;
1104 printAsOperand(O, SlotTracker);
1105 O << " = SCALAR-STEPS ";
1106 printOperands(O, SlotTracker);
1108 #endif
1110 void VPWidenGEPRecipe::execute(VPTransformState &State) {
1111 assert(State.VF.isVector() && "not widening");
1112 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1113 // Construct a vector GEP by widening the operands of the scalar GEP as
1114 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1115 // results in a vector of pointers when at least one operand of the GEP
1116 // is vector-typed. Thus, to keep the representation compact, we only use
1117 // vector-typed operands for loop-varying values.
1119 if (areAllOperandsInvariant()) {
1120 // If we are vectorizing, but the GEP has only loop-invariant operands,
1121 // the GEP we build (by only using vector-typed operands for
1122 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1123 // produce a vector of pointers, we need to either arbitrarily pick an
1124 // operand to broadcast, or broadcast a clone of the original GEP.
1125 // Here, we broadcast a clone of the original.
1127 // TODO: If at some point we decide to scalarize instructions having
1128 // loop-invariant operands, this special case will no longer be
1129 // required. We would add the scalarization decision to
1130 // collectLoopScalars() and teach getVectorValue() to broadcast
1131 // the lane-zero scalar value.
1132 SmallVector<Value *> Ops;
1133 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1134 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1136 auto *NewGEP =
1137 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1138 ArrayRef(Ops).drop_front(), "", isInBounds());
1139 for (unsigned Part = 0; Part < State.UF; ++Part) {
1140 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1141 State.set(this, EntryPart, Part);
1142 State.addMetadata(EntryPart, GEP);
1144 } else {
1145 // If the GEP has at least one loop-varying operand, we are sure to
1146 // produce a vector of pointers. But if we are only unrolling, we want
1147 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1148 // produce with the code below will be scalar (if VF == 1) or vector
1149 // (otherwise). Note that for the unroll-only case, we still maintain
1150 // values in the vector mapping with initVector, as we do for other
1151 // instructions.
1152 for (unsigned Part = 0; Part < State.UF; ++Part) {
1153 // The pointer operand of the new GEP. If it's loop-invariant, we
1154 // won't broadcast it.
1155 auto *Ptr = isPointerLoopInvariant()
1156 ? State.get(getOperand(0), VPIteration(0, 0))
1157 : State.get(getOperand(0), Part);
1159 // Collect all the indices for the new GEP. If any index is
1160 // loop-invariant, we won't broadcast it.
1161 SmallVector<Value *, 4> Indices;
1162 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1163 VPValue *Operand = getOperand(I);
1164 if (isIndexLoopInvariant(I - 1))
1165 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1166 else
1167 Indices.push_back(State.get(Operand, Part));
1170 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1171 // but it should be a vector, otherwise.
1172 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1173 Indices, "", isInBounds());
1174 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1175 "NewGEP is not a pointer vector");
1176 State.set(this, NewGEP, Part);
1177 State.addMetadata(NewGEP, GEP);
1182 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1183 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1184 VPSlotTracker &SlotTracker) const {
1185 O << Indent << "WIDEN-GEP ";
1186 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1187 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1188 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1190 O << " ";
1191 printAsOperand(O, SlotTracker);
1192 O << " = getelementptr";
1193 printFlags(O);
1194 printOperands(O, SlotTracker);
1196 #endif
1198 void VPBlendRecipe::execute(VPTransformState &State) {
1199 State.setDebugLocFrom(getDebugLoc());
1200 // We know that all PHIs in non-header blocks are converted into
1201 // selects, so we don't have to worry about the insertion order and we
1202 // can just use the builder.
1203 // At this point we generate the predication tree. There may be
1204 // duplications since this is a simple recursive scan, but future
1205 // optimizations will clean it up.
1207 unsigned NumIncoming = getNumIncomingValues();
1209 // Generate a sequence of selects of the form:
1210 // SELECT(Mask3, In3,
1211 // SELECT(Mask2, In2,
1212 // SELECT(Mask1, In1,
1213 // In0)))
1214 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1215 // are essentially undef are taken from In0.
1216 VectorParts Entry(State.UF);
1217 for (unsigned In = 0; In < NumIncoming; ++In) {
1218 for (unsigned Part = 0; Part < State.UF; ++Part) {
1219 // We might have single edge PHIs (blocks) - use an identity
1220 // 'select' for the first PHI operand.
1221 Value *In0 = State.get(getIncomingValue(In), Part);
1222 if (In == 0)
1223 Entry[Part] = In0; // Initialize with the first incoming value.
1224 else {
1225 // Select between the current value and the previous incoming edge
1226 // based on the incoming mask.
1227 Value *Cond = State.get(getMask(In), Part);
1228 Entry[Part] =
1229 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1233 for (unsigned Part = 0; Part < State.UF; ++Part)
1234 State.set(this, Entry[Part], Part);
1237 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1238 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1239 VPSlotTracker &SlotTracker) const {
1240 O << Indent << "BLEND ";
1241 printAsOperand(O, SlotTracker);
1242 O << " =";
1243 if (getNumIncomingValues() == 1) {
1244 // Not a User of any mask: not really blending, this is a
1245 // single-predecessor phi.
1246 O << " ";
1247 getIncomingValue(0)->printAsOperand(O, SlotTracker);
1248 } else {
1249 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1250 O << " ";
1251 getIncomingValue(I)->printAsOperand(O, SlotTracker);
1252 O << "/";
1253 getMask(I)->printAsOperand(O, SlotTracker);
1258 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1259 VPSlotTracker &SlotTracker) const {
1260 O << Indent << "REDUCE ";
1261 printAsOperand(O, SlotTracker);
1262 O << " = ";
1263 getChainOp()->printAsOperand(O, SlotTracker);
1264 O << " +";
1265 if (isa<FPMathOperator>(getUnderlyingInstr()))
1266 O << getUnderlyingInstr()->getFastMathFlags();
1267 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1268 getVecOp()->printAsOperand(O, SlotTracker);
1269 if (getCondOp()) {
1270 O << ", ";
1271 getCondOp()->printAsOperand(O, SlotTracker);
1273 O << ")";
1274 if (RdxDesc.IntermediateStore)
1275 O << " (with final reduction value stored in invariant address sank "
1276 "outside of loop)";
1278 #endif
1280 bool VPReplicateRecipe::shouldPack() const {
1281 // Find if the recipe is used by a widened recipe via an intervening
1282 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1283 return any_of(users(), [](const VPUser *U) {
1284 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1285 return any_of(PredR->users(), [PredR](const VPUser *U) {
1286 return !U->usesScalars(PredR);
1288 return false;
1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1293 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1294 VPSlotTracker &SlotTracker) const {
1295 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1297 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1298 printAsOperand(O, SlotTracker);
1299 O << " = ";
1301 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1302 O << "call";
1303 printFlags(O);
1304 O << "@" << CB->getCalledFunction()->getName() << "(";
1305 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1306 O, [&O, &SlotTracker](VPValue *Op) {
1307 Op->printAsOperand(O, SlotTracker);
1309 O << ")";
1310 } else {
1311 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
1312 printFlags(O);
1313 printOperands(O, SlotTracker);
1316 if (shouldPack())
1317 O << " (S->V)";
1319 #endif
1321 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1322 assert(State.Instance && "Branch on Mask works only on single instance.");
1324 unsigned Part = State.Instance->Part;
1325 unsigned Lane = State.Instance->Lane.getKnownLane();
1327 Value *ConditionBit = nullptr;
1328 VPValue *BlockInMask = getMask();
1329 if (BlockInMask) {
1330 ConditionBit = State.get(BlockInMask, Part);
1331 if (ConditionBit->getType()->isVectorTy())
1332 ConditionBit = State.Builder.CreateExtractElement(
1333 ConditionBit, State.Builder.getInt32(Lane));
1334 } else // Block in mask is all-one.
1335 ConditionBit = State.Builder.getTrue();
1337 // Replace the temporary unreachable terminator with a new conditional branch,
1338 // whose two destinations will be set later when they are created.
1339 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1340 assert(isa<UnreachableInst>(CurrentTerminator) &&
1341 "Expected to replace unreachable terminator with conditional branch.");
1342 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1343 CondBr->setSuccessor(0, nullptr);
1344 ReplaceInstWithInst(CurrentTerminator, CondBr);
1347 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1348 assert(State.Instance && "Predicated instruction PHI works per instance.");
1349 Instruction *ScalarPredInst =
1350 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1351 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1352 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1353 assert(PredicatingBB && "Predicated block has no single predecessor.");
1354 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1355 "operand must be VPReplicateRecipe");
1357 // By current pack/unpack logic we need to generate only a single phi node: if
1358 // a vector value for the predicated instruction exists at this point it means
1359 // the instruction has vector users only, and a phi for the vector value is
1360 // needed. In this case the recipe of the predicated instruction is marked to
1361 // also do that packing, thereby "hoisting" the insert-element sequence.
1362 // Otherwise, a phi node for the scalar value is needed.
1363 unsigned Part = State.Instance->Part;
1364 if (State.hasVectorValue(getOperand(0), Part)) {
1365 Value *VectorValue = State.get(getOperand(0), Part);
1366 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1367 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1368 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1369 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1370 if (State.hasVectorValue(this, Part))
1371 State.reset(this, VPhi, Part);
1372 else
1373 State.set(this, VPhi, Part);
1374 // NOTE: Currently we need to update the value of the operand, so the next
1375 // predicated iteration inserts its generated value in the correct vector.
1376 State.reset(getOperand(0), VPhi, Part);
1377 } else {
1378 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1379 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1380 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1381 PredicatingBB);
1382 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1383 if (State.hasScalarValue(this, *State.Instance))
1384 State.reset(this, Phi, *State.Instance);
1385 else
1386 State.set(this, Phi, *State.Instance);
1387 // NOTE: Currently we need to update the value of the operand, so the next
1388 // predicated iteration inserts its generated value in the correct vector.
1389 State.reset(getOperand(0), Phi, *State.Instance);
1393 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1394 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1395 VPSlotTracker &SlotTracker) const {
1396 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1397 printAsOperand(O, SlotTracker);
1398 O << " = ";
1399 printOperands(O, SlotTracker);
1402 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1403 VPSlotTracker &SlotTracker) const {
1404 O << Indent << "WIDEN ";
1406 if (!isStore()) {
1407 getVPSingleValue()->printAsOperand(O, SlotTracker);
1408 O << " = ";
1410 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1412 printOperands(O, SlotTracker);
1414 #endif
1416 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1417 Value *Start = getStartValue()->getLiveInIRValue();
1418 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1419 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1421 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1422 EntryPart->addIncoming(Start, VectorPH);
1423 EntryPart->setDebugLoc(getDebugLoc());
1424 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1425 State.set(this, EntryPart, Part);
1428 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1429 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1430 VPSlotTracker &SlotTracker) const {
1431 O << Indent << "EMIT ";
1432 printAsOperand(O, SlotTracker);
1433 O << " = CANONICAL-INDUCTION ";
1434 printOperands(O, SlotTracker);
1436 #endif
1438 bool VPCanonicalIVPHIRecipe::isCanonical(
1439 InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step,
1440 Type *Ty) const {
1441 // The types must match and it must be an integer induction.
1442 if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction)
1443 return false;
1444 // Start must match the start value of this canonical induction.
1445 if (Start != getStartValue())
1446 return false;
1448 // If the step is defined by a recipe, it is not a ConstantInt.
1449 if (Step->getDefiningRecipe())
1450 return false;
1452 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1453 return StepC && StepC->isOne();
1456 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
1457 return IsScalarAfterVectorization &&
1458 (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
1461 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1462 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1463 VPSlotTracker &SlotTracker) const {
1464 O << Indent << "EMIT ";
1465 printAsOperand(O, SlotTracker);
1466 O << " = WIDEN-POINTER-INDUCTION ";
1467 getStartValue()->printAsOperand(O, SlotTracker);
1468 O << ", " << *IndDesc.getStep();
1470 #endif
1472 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1473 assert(!State.Instance && "cannot be used in per-lane");
1474 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1475 SCEVExpander Exp(SE, DL, "induction");
1477 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1478 &*State.Builder.GetInsertPoint());
1479 assert(!State.ExpandedSCEVs.contains(Expr) &&
1480 "Same SCEV expanded multiple times");
1481 State.ExpandedSCEVs[Expr] = Res;
1482 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1483 State.set(this, Res, {Part, 0});
1486 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1488 VPSlotTracker &SlotTracker) const {
1489 O << Indent << "EMIT ";
1490 getVPSingleValue()->printAsOperand(O, SlotTracker);
1491 O << " = EXPAND SCEV " << *Expr;
1493 #endif
1495 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1496 Value *CanonicalIV = State.get(getOperand(0), 0);
1497 Type *STy = CanonicalIV->getType();
1498 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1499 ElementCount VF = State.VF;
1500 Value *VStart = VF.isScalar()
1501 ? CanonicalIV
1502 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1503 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1504 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1505 if (VF.isVector()) {
1506 VStep = Builder.CreateVectorSplat(VF, VStep);
1507 VStep =
1508 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1510 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1511 State.set(this, CanonicalVectorIV, Part);
1515 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1516 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1517 VPSlotTracker &SlotTracker) const {
1518 O << Indent << "EMIT ";
1519 printAsOperand(O, SlotTracker);
1520 O << " = WIDEN-CANONICAL-INDUCTION ";
1521 printOperands(O, SlotTracker);
1523 #endif
1525 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1526 auto &Builder = State.Builder;
1527 // Create a vector from the initial value.
1528 auto *VectorInit = getStartValue()->getLiveInIRValue();
1530 Type *VecTy = State.VF.isScalar()
1531 ? VectorInit->getType()
1532 : VectorType::get(VectorInit->getType(), State.VF);
1534 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1535 if (State.VF.isVector()) {
1536 auto *IdxTy = Builder.getInt32Ty();
1537 auto *One = ConstantInt::get(IdxTy, 1);
1538 IRBuilder<>::InsertPointGuard Guard(Builder);
1539 Builder.SetInsertPoint(VectorPH->getTerminator());
1540 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1541 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1542 VectorInit = Builder.CreateInsertElement(
1543 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1546 // Create a phi node for the new recurrence.
1547 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1548 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1549 EntryPart->addIncoming(VectorInit, VectorPH);
1550 State.set(this, EntryPart, 0);
1553 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1554 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1555 VPSlotTracker &SlotTracker) const {
1556 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1557 printAsOperand(O, SlotTracker);
1558 O << " = phi ";
1559 printOperands(O, SlotTracker);
1561 #endif
1563 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1564 PHINode *PN = cast<PHINode>(getUnderlyingValue());
1565 auto &Builder = State.Builder;
1567 // In order to support recurrences we need to be able to vectorize Phi nodes.
1568 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1569 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1570 // this value when we vectorize all of the instructions that use the PHI.
1571 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1572 Type *VecTy =
1573 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1575 BasicBlock *HeaderBB = State.CFG.PrevBB;
1576 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1577 "recipe must be in the vector loop header");
1578 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1579 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1580 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1581 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1582 State.set(this, EntryPart, Part);
1585 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1587 // Reductions do not have to start at zero. They can start with
1588 // any loop invariant values.
1589 VPValue *StartVPV = getStartValue();
1590 Value *StartV = StartVPV->getLiveInIRValue();
1592 Value *Iden = nullptr;
1593 RecurKind RK = RdxDesc.getRecurrenceKind();
1594 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1595 RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
1596 // MinMax and AnyOf reductions have the start value as their identity.
1597 if (ScalarPHI) {
1598 Iden = StartV;
1599 } else {
1600 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1601 Builder.SetInsertPoint(VectorPH->getTerminator());
1602 StartV = Iden =
1603 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1605 } else {
1606 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1607 RdxDesc.getFastMathFlags());
1609 if (!ScalarPHI) {
1610 Iden = Builder.CreateVectorSplat(State.VF, Iden);
1611 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1612 Builder.SetInsertPoint(VectorPH->getTerminator());
1613 Constant *Zero = Builder.getInt32(0);
1614 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1618 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1619 Value *EntryPart = State.get(this, Part);
1620 // Make sure to add the reduction start value only to the
1621 // first unroll part.
1622 Value *StartVal = (Part == 0) ? StartV : Iden;
1623 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1628 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1629 VPSlotTracker &SlotTracker) const {
1630 O << Indent << "WIDEN-REDUCTION-PHI ";
1632 printAsOperand(O, SlotTracker);
1633 O << " = phi ";
1634 printOperands(O, SlotTracker);
1636 #endif
1638 void VPWidenPHIRecipe::execute(VPTransformState &State) {
1639 assert(EnableVPlanNativePath &&
1640 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1642 Value *Op0 = State.get(getOperand(0), 0);
1643 Type *VecTy = Op0->getType();
1644 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1645 State.set(this, VecPhi, 0);
1648 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1649 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1650 VPSlotTracker &SlotTracker) const {
1651 O << Indent << "WIDEN-PHI ";
1653 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1654 // Unless all incoming values are modeled in VPlan print the original PHI
1655 // directly.
1656 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1657 // values as VPValues.
1658 if (getNumOperands() != OriginalPhi->getNumOperands()) {
1659 O << VPlanIngredient(OriginalPhi);
1660 return;
1663 printAsOperand(O, SlotTracker);
1664 O << " = phi ";
1665 printOperands(O, SlotTracker);
1667 #endif
1669 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1670 // remove VPActiveLaneMaskPHIRecipe.
1671 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
1672 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1673 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1674 Value *StartMask = State.get(getOperand(0), Part);
1675 PHINode *EntryPart =
1676 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1677 EntryPart->addIncoming(StartMask, VectorPH);
1678 EntryPart->setDebugLoc(getDebugLoc());
1679 State.set(this, EntryPart, Part);
1683 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1684 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1685 VPSlotTracker &SlotTracker) const {
1686 O << Indent << "ACTIVE-LANE-MASK-PHI ";
1688 printAsOperand(O, SlotTracker);
1689 O << " = phi ";
1690 printOperands(O, SlotTracker);
1692 #endif