llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

   1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file contains implementations for different VPlan recipes.
  11 ///
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "VPlan.h"
  15 #include "VPlanAnalysis.h"
  16 #include "llvm/ADT/STLExtras.h"
  17 #include "llvm/ADT/SmallVector.h"
  18 #include "llvm/ADT/Twine.h"
  19 #include "llvm/Analysis/IVDescriptors.h"
  20 #include "llvm/IR/BasicBlock.h"
  21 #include "llvm/IR/IRBuilder.h"
  22 #include "llvm/IR/Instruction.h"
  23 #include "llvm/IR/Instructions.h"
  24 #include "llvm/IR/Type.h"
  25 #include "llvm/IR/Value.h"
  26 #include "llvm/Support/Casting.h"
  27 #include "llvm/Support/CommandLine.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/raw_ostream.h"
  30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  31 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  32 #include <cassert>
  33
  34 using namespace llvm;
  35
  36 using VectorParts = SmallVector<Value *, 2>;
  37
  38 namespace llvm {
  39 extern cl::opt<bool> EnableVPlanNativePath;
  40 }
  41
  42 #define LV_NAME "loop-vectorize"
  43 #define DEBUG_TYPE LV_NAME
  44
  45 bool VPRecipeBase::mayWriteToMemory() const {
  46   switch (getVPDefID()) {
  47   case VPWidenMemoryInstructionSC: {
  48     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
  49   }
  50   case VPReplicateSC:
  51   case VPWidenCallSC:
  52     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
  53         ->mayWriteToMemory();
  54   case VPBranchOnMaskSC:
  55   case VPScalarIVStepsSC:
  56   case VPPredInstPHISC:
  57     return false;
  58   case VPBlendSC:
  59   case VPReductionSC:
  60   case VPWidenCanonicalIVSC:
  61   case VPWidenCastSC:
  62   case VPWidenGEPSC:
  63   case VPWidenIntOrFpInductionSC:
  64   case VPWidenPHISC:
  65   case VPWidenSC:
  66   case VPWidenSelectSC: {
  67     const Instruction *I =
  68         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  69     (void)I;
  70     assert((!I || !I->mayWriteToMemory()) &&
  71            "underlying instruction may write to memory");
  72     return false;
  73   }
  74   default:
  75     return true;
  76   }
  77 }
  78
  79 bool VPRecipeBase::mayReadFromMemory() const {
  80   switch (getVPDefID()) {
  81   case VPWidenMemoryInstructionSC: {
  82     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
  83   }
  84   case VPReplicateSC:
  85   case VPWidenCallSC:
  86     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
  87         ->mayReadFromMemory();
  88   case VPBranchOnMaskSC:
  89   case VPScalarIVStepsSC:
  90   case VPPredInstPHISC:
  91     return false;
  92   case VPBlendSC:
  93   case VPReductionSC:
  94   case VPWidenCanonicalIVSC:
  95   case VPWidenCastSC:
  96   case VPWidenGEPSC:
  97   case VPWidenIntOrFpInductionSC:
  98   case VPWidenPHISC:
  99   case VPWidenSC:
 100   case VPWidenSelectSC: {
 101     const Instruction *I =
 102         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
 103     (void)I;
 104     assert((!I || !I->mayReadFromMemory()) &&
 105            "underlying instruction may read from memory");
 106     return false;
 107   }
 108   default:
 109     return true;
 110   }
 111 }
 112
 113 bool VPRecipeBase::mayHaveSideEffects() const {
 114   switch (getVPDefID()) {
 115   case VPDerivedIVSC:
 116   case VPPredInstPHISC:
 117     return false;
 118   case VPInstructionSC:
 119     switch (cast<VPInstruction>(this)->getOpcode()) {
 120     case Instruction::ICmp:
 121     case VPInstruction::Not:
 122     case VPInstruction::CalculateTripCountMinusVF:
 123     case VPInstruction::CanonicalIVIncrement:
 124     case VPInstruction::CanonicalIVIncrementForPart:
 125       return false;
 126     default:
 127       return true;
 128     }
 129   case VPWidenCallSC:
 130     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
 131         ->mayHaveSideEffects();
 132   case VPBlendSC:
 133   case VPReductionSC:
 134   case VPScalarIVStepsSC:
 135   case VPWidenCanonicalIVSC:
 136   case VPWidenCastSC:
 137   case VPWidenGEPSC:
 138   case VPWidenIntOrFpInductionSC:
 139   case VPWidenPHISC:
 140   case VPWidenPointerInductionSC:
 141   case VPWidenSC:
 142   case VPWidenSelectSC: {
 143     const Instruction *I =
 144         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
 145     (void)I;
 146     assert((!I || !I->mayHaveSideEffects()) &&
 147            "underlying instruction has side-effects");
 148     return false;
 149   }
 150   case VPWidenMemoryInstructionSC:
 151     assert(cast<VPWidenMemoryInstructionRecipe>(this)
 152                    ->getIngredient()
 153                    .mayHaveSideEffects() == mayWriteToMemory() &&
 154            "mayHaveSideffects result for ingredient differs from this "
 155            "implementation");
 156     return mayWriteToMemory();
 157   case VPReplicateSC: {
 158     auto *R = cast<VPReplicateRecipe>(this);
 159     return R->getUnderlyingInstr()->mayHaveSideEffects();
 160   }
 161   default:
 162     return true;
 163   }
 164 }
 165
 166 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
 167   auto Lane = VPLane::getLastLaneForVF(State.VF);
 168   VPValue *ExitValue = getOperand(0);
 169   if (vputils::isUniformAfterVectorization(ExitValue))
 170     Lane = VPLane::getFirstLane();
 171   VPBasicBlock *MiddleVPBB =
 172       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
 173   assert(MiddleVPBB->getNumSuccessors() == 0 &&
 174          "the middle block must not have any successors");
 175   BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
 176   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
 177                    MiddleBB);
 178 }
 179
 180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 181 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
 182   O << "Live-out ";
 183   getPhi()->printAsOperand(O);
 184   O << " = ";
 185   getOperand(0)->printAsOperand(O, SlotTracker);
 186   O << "\n";
 187 }
 188 #endif
 189
 190 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
 191   assert(!Parent && "Recipe already in some VPBasicBlock");
 192   assert(InsertPos->getParent() &&
 193          "Insertion position not in any VPBasicBlock");
 194   Parent = InsertPos->getParent();
 195   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
 196 }
 197
 198 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
 199                                 iplist<VPRecipeBase>::iterator I) {
 200   assert(!Parent && "Recipe already in some VPBasicBlock");
 201   assert(I == BB.end() || I->getParent() == &BB);
 202   Parent = &BB;
 203   BB.getRecipeList().insert(I, this);
 204 }
 205
 206 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
 207   assert(!Parent && "Recipe already in some VPBasicBlock");
 208   assert(InsertPos->getParent() &&
 209          "Insertion position not in any VPBasicBlock");
 210   Parent = InsertPos->getParent();
 211   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
 212 }
 213
 214 void VPRecipeBase::removeFromParent() {
 215   assert(getParent() && "Recipe not in any VPBasicBlock");
 216   getParent()->getRecipeList().remove(getIterator());
 217   Parent = nullptr;
 218 }
 219
 220 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
 221   assert(getParent() && "Recipe not in any VPBasicBlock");
 222   return getParent()->getRecipeList().erase(getIterator());
 223 }
 224
 225 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
 226   removeFromParent();
 227   insertAfter(InsertPos);
 228 }
 229
 230 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
 231                               iplist<VPRecipeBase>::iterator I) {
 232   removeFromParent();
 233   insertBefore(BB, I);
 234 }
 235
 236 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
 237   assert(OpType == OperationType::FPMathOp &&
 238          "recipe doesn't have fast math flags");
 239   FastMathFlags Res;
 240   Res.setAllowReassoc(FMFs.AllowReassoc);
 241   Res.setNoNaNs(FMFs.NoNaNs);
 242   Res.setNoInfs(FMFs.NoInfs);
 243   Res.setNoSignedZeros(FMFs.NoSignedZeros);
 244   Res.setAllowReciprocal(FMFs.AllowReciprocal);
 245   Res.setAllowContract(FMFs.AllowContract);
 246   Res.setApproxFunc(FMFs.ApproxFunc);
 247   return Res;
 248 }
 249
 250 VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
 251                              VPValue *A, VPValue *B, DebugLoc DL,
 252                              const Twine &Name)
 253     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
 254                           Pred, DL),
 255       VPValue(this), Opcode(Opcode), Name(Name.str()) {
 256   assert(Opcode == Instruction::ICmp &&
 257          "only ICmp predicates supported at the moment");
 258 }
 259
 260 VPInstruction::VPInstruction(unsigned Opcode,
 261                              std::initializer_list<VPValue *> Operands,
 262                              FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
 263     : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
 264       VPValue(this), Opcode(Opcode), Name(Name.str()) {
 265   // Make sure the VPInstruction is a floating-point operation.
 266   assert(isFPMathOp() && "this op can't take fast-math flags");
 267 }
 268
 269 Value *VPInstruction::generateInstruction(VPTransformState &State,
 270                                           unsigned Part) {
 271   IRBuilderBase &Builder = State.Builder;
 272   Builder.SetCurrentDebugLocation(getDebugLoc());
 273
 274   if (Instruction::isBinaryOp(getOpcode())) {
 275     Value *A = State.get(getOperand(0), Part);
 276     Value *B = State.get(getOperand(1), Part);
 277     return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
 278   }
 279
 280   switch (getOpcode()) {
 281   case VPInstruction::Not: {
 282     Value *A = State.get(getOperand(0), Part);
 283     return Builder.CreateNot(A, Name);
 284   }
 285   case Instruction::ICmp: {
 286     Value *A = State.get(getOperand(0), Part);
 287     Value *B = State.get(getOperand(1), Part);
 288     return Builder.CreateCmp(getPredicate(), A, B, Name);
 289   }
 290   case Instruction::Select: {
 291     Value *Cond = State.get(getOperand(0), Part);
 292     Value *Op1 = State.get(getOperand(1), Part);
 293     Value *Op2 = State.get(getOperand(2), Part);
 294     return Builder.CreateSelect(Cond, Op1, Op2, Name);
 295   }
 296   case VPInstruction::ActiveLaneMask: {
 297     // Get first lane of vector induction variable.
 298     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
 299     // Get the original loop tripcount.
 300     Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
 301
 302     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
 303     auto *PredTy = VectorType::get(Int1Ty, State.VF);
 304     return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
 305                                    {PredTy, ScalarTC->getType()},
 306                                    {VIVElem0, ScalarTC}, nullptr, Name);
 307   }
 308   case VPInstruction::FirstOrderRecurrenceSplice: {
 309     // Generate code to combine the previous and current values in vector v3.
 310     //
 311     //   vector.ph:
 312     //     v_init = vector(..., ..., ..., a[-1])
 313     //     br vector.body
 314     //
 315     //   vector.body
 316     //     i = phi [0, vector.ph], [i+4, vector.body]
 317     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
 318     //     v2 = a[i, i+1, i+2, i+3];
 319     //     v3 = vector(v1(3), v2(0, 1, 2))
 320
 321     // For the first part, use the recurrence phi (v1), otherwise v2.
 322     auto *V1 = State.get(getOperand(0), 0);
 323     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
 324     if (!PartMinus1->getType()->isVectorTy())
 325       return PartMinus1;
 326     Value *V2 = State.get(getOperand(1), Part);
 327     return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
 328   }
 329   case VPInstruction::CalculateTripCountMinusVF: {
 330     Value *ScalarTC = State.get(getOperand(0), {0, 0});
 331     Value *Step =
 332         createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
 333     Value *Sub = Builder.CreateSub(ScalarTC, Step);
 334     Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
 335     Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
 336     return Builder.CreateSelect(Cmp, Sub, Zero);
 337   }
 338   case VPInstruction::CanonicalIVIncrement: {
 339     if (Part == 0) {
 340       auto *Phi = State.get(getOperand(0), 0);
 341       // The loop step is equal to the vectorization factor (num of SIMD
 342       // elements) times the unroll factor (num of SIMD instructions).
 343       Value *Step =
 344           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
 345       return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
 346                                hasNoSignedWrap());
 347     }
 348     return State.get(this, 0);
 349   }
 350
 351   case VPInstruction::CanonicalIVIncrementForPart: {
 352     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
 353     if (Part == 0)
 354       return IV;
 355
 356     // The canonical IV is incremented by the vectorization factor (num of SIMD
 357     // elements) times the unroll part.
 358     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
 359     return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
 360                              hasNoSignedWrap());
 361   }
 362   case VPInstruction::BranchOnCond: {
 363     if (Part != 0)
 364       return nullptr;
 365
 366     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
 367     VPRegionBlock *ParentRegion = getParent()->getParent();
 368     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
 369
 370     // Replace the temporary unreachable terminator with a new conditional
 371     // branch, hooking it up to backward destination for exiting blocks now and
 372     // to forward destination(s) later when they are created.
 373     BranchInst *CondBr =
 374         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
 375
 376     if (getParent()->isExiting())
 377       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
 378
 379     CondBr->setSuccessor(0, nullptr);
 380     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
 381     return CondBr;
 382   }
 383   case VPInstruction::BranchOnCount: {
 384     if (Part != 0)
 385       return nullptr;
 386     // First create the compare.
 387     Value *IV = State.get(getOperand(0), Part);
 388     Value *TC = State.get(getOperand(1), Part);
 389     Value *Cond = Builder.CreateICmpEQ(IV, TC);
 390
 391     // Now create the branch.
 392     auto *Plan = getParent()->getPlan();
 393     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
 394     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
 395
 396     // Replace the temporary unreachable terminator with a new conditional
 397     // branch, hooking it up to backward destination (the header) now and to the
 398     // forward destination (the exit/middle block) later when it is created.
 399     // Note that CreateCondBr expects a valid BB as first argument, so we need
 400     // to set it to nullptr later.
 401     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
 402                                               State.CFG.VPBB2IRBB[Header]);
 403     CondBr->setSuccessor(0, nullptr);
 404     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
 405     return CondBr;
 406   }
 407   default:
 408     llvm_unreachable("Unsupported opcode for instruction");
 409   }
 410 }
 411
 412 #if !defined(NDEBUG)
 413 bool VPInstruction::isFPMathOp() const {
 414   // Inspired by FPMathOperator::classof. Notable differences are that we don't
 415   // support Call, PHI and Select opcodes here yet.
 416   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
 417          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
 418          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
 419          Opcode == Instruction::FCmp || Opcode == Instruction::Select;
 420 }
 421 #endif
 422
 423 void VPInstruction::execute(VPTransformState &State) {
 424   assert(!State.Instance && "VPInstruction executing an Instance");
 425   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
 426   assert((hasFastMathFlags() == isFPMathOp() ||
 427           getOpcode() == Instruction::Select) &&
 428          "Recipe not a FPMathOp but has fast-math flags?");
 429   if (hasFastMathFlags())
 430     State.Builder.setFastMathFlags(getFastMathFlags());
 431   for (unsigned Part = 0; Part < State.UF; ++Part) {
 432     Value *GeneratedValue = generateInstruction(State, Part);
 433     if (!hasResult())
 434       continue;
 435     assert(GeneratedValue && "generateInstruction must produce a value");
 436     State.set(this, GeneratedValue, Part);
 437   }
 438 }
 439
 440 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 441 void VPInstruction::dump() const {
 442   VPSlotTracker SlotTracker(getParent()->getPlan());
 443   print(dbgs(), "", SlotTracker);
 444 }
 445
 446 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
 447                           VPSlotTracker &SlotTracker) const {
 448   O << Indent << "EMIT ";
 449
 450   if (hasResult()) {
 451     printAsOperand(O, SlotTracker);
 452     O << " = ";
 453   }
 454
 455   switch (getOpcode()) {
 456   case VPInstruction::Not:
 457     O << "not";
 458     break;
 459   case VPInstruction::SLPLoad:
 460     O << "combined load";
 461     break;
 462   case VPInstruction::SLPStore:
 463     O << "combined store";
 464     break;
 465   case VPInstruction::ActiveLaneMask:
 466     O << "active lane mask";
 467     break;
 468   case VPInstruction::FirstOrderRecurrenceSplice:
 469     O << "first-order splice";
 470     break;
 471   case VPInstruction::CanonicalIVIncrement:
 472     O << "VF * UF +";
 473     break;
 474   case VPInstruction::BranchOnCond:
 475     O << "branch-on-cond";
 476     break;
 477   case VPInstruction::CalculateTripCountMinusVF:
 478     O << "TC > VF ? TC - VF : 0";
 479     break;
 480   case VPInstruction::CanonicalIVIncrementForPart:
 481     O << "VF * Part +";
 482     break;
 483   case VPInstruction::BranchOnCount:
 484     O << "branch-on-count";
 485     break;
 486   default:
 487     O << Instruction::getOpcodeName(getOpcode());
 488   }
 489
 490   printFlags(O);
 491   printOperands(O, SlotTracker);
 492
 493   if (auto DL = getDebugLoc()) {
 494     O << ", !dbg ";
 495     DL.print(O);
 496   }
 497 }
 498 #endif
 499
 500 void VPWidenCallRecipe::execute(VPTransformState &State) {
 501   assert(State.VF.isVector() && "not widening");
 502   auto &CI = *cast<CallInst>(getUnderlyingInstr());
 503   assert(!isa<DbgInfoIntrinsic>(CI) &&
 504          "DbgInfoIntrinsic should have been dropped during VPlan construction");
 505   State.setDebugLocFrom(CI.getDebugLoc());
 506
 507   for (unsigned Part = 0; Part < State.UF; ++Part) {
 508     SmallVector<Type *, 2> TysForDecl;
 509     // Add return type if intrinsic is overloaded on it.
 510     if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
 511       TysForDecl.push_back(
 512           VectorType::get(CI.getType()->getScalarType(), State.VF));
 513     }
 514     SmallVector<Value *, 4> Args;
 515     for (const auto &I : enumerate(operands())) {
 516       // Some intrinsics have a scalar argument - don't replace it with a
 517       // vector.
 518       Value *Arg;
 519       if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
 520           !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
 521         Arg = State.get(I.value(), Part);
 522       else
 523         Arg = State.get(I.value(), VPIteration(0, 0));
 524       if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
 525         TysForDecl.push_back(Arg->getType());
 526       Args.push_back(Arg);
 527     }
 528
 529     Function *VectorF;
 530     if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
 531       // Use vector version of the intrinsic.
 532       Module *M = State.Builder.GetInsertBlock()->getModule();
 533       VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
 534       assert(VectorF && "Can't retrieve vector intrinsic.");
 535     } else {
 536 #ifndef NDEBUG
 537       assert(Variant != nullptr && "Can't create vector function.");
 538 #endif
 539       VectorF = Variant;
 540     }
 541
 542     SmallVector<OperandBundleDef, 1> OpBundles;
 543     CI.getOperandBundlesAsDefs(OpBundles);
 544     CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
 545
 546     if (isa<FPMathOperator>(V))
 547       V->copyFastMathFlags(&CI);
 548
 549     State.set(this, V, Part);
 550     State.addMetadata(V, &CI);
 551   }
 552 }
 553
 554 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 555 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
 556                               VPSlotTracker &SlotTracker) const {
 557   O << Indent << "WIDEN-CALL ";
 558
 559   auto *CI = cast<CallInst>(getUnderlyingInstr());
 560   if (CI->getType()->isVoidTy())
 561     O << "void ";
 562   else {
 563     printAsOperand(O, SlotTracker);
 564     O << " = ";
 565   }
 566
 567   O << "call @" << CI->getCalledFunction()->getName() << "(";
 568   printOperands(O, SlotTracker);
 569   O << ")";
 570
 571   if (VectorIntrinsicID)
 572     O << " (using vector intrinsic)";
 573   else {
 574     O << " (using library function";
 575     if (Variant->hasName())
 576       O << ": " << Variant->getName();
 577     O << ")";
 578   }
 579 }
 580
 581 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
 582                                 VPSlotTracker &SlotTracker) const {
 583   O << Indent << "WIDEN-SELECT ";
 584   printAsOperand(O, SlotTracker);
 585   O << " = select ";
 586   getOperand(0)->printAsOperand(O, SlotTracker);
 587   O << ", ";
 588   getOperand(1)->printAsOperand(O, SlotTracker);
 589   O << ", ";
 590   getOperand(2)->printAsOperand(O, SlotTracker);
 591   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
 592 }
 593 #endif
 594
 595 void VPWidenSelectRecipe::execute(VPTransformState &State) {
 596   State.setDebugLocFrom(getDebugLoc());
 597
 598   // The condition can be loop invariant but still defined inside the
 599   // loop. This means that we can't just use the original 'cond' value.
 600   // We have to take the 'vectorized' value and pick the first lane.
 601   // Instcombine will make this a no-op.
 602   auto *InvarCond =
 603       isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
 604
 605   for (unsigned Part = 0; Part < State.UF; ++Part) {
 606     Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
 607     Value *Op0 = State.get(getOperand(1), Part);
 608     Value *Op1 = State.get(getOperand(2), Part);
 609     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
 610     State.set(this, Sel, Part);
 611     State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
 612   }
 613 }
 614
 615 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
 616     const FastMathFlags &FMF) {
 617   AllowReassoc = FMF.allowReassoc();
 618   NoNaNs = FMF.noNaNs();
 619   NoInfs = FMF.noInfs();
 620   NoSignedZeros = FMF.noSignedZeros();
 621   AllowReciprocal = FMF.allowReciprocal();
 622   AllowContract = FMF.allowContract();
 623   ApproxFunc = FMF.approxFunc();
 624 }
 625
 626 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 627 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
 628   switch (OpType) {
 629   case OperationType::Cmp:
 630     O << " " << CmpInst::getPredicateName(getPredicate());
 631     break;
 632   case OperationType::PossiblyExactOp:
 633     if (ExactFlags.IsExact)
 634       O << " exact";
 635     break;
 636   case OperationType::OverflowingBinOp:
 637     if (WrapFlags.HasNUW)
 638       O << " nuw";
 639     if (WrapFlags.HasNSW)
 640       O << " nsw";
 641     break;
 642   case OperationType::FPMathOp:
 643     getFastMathFlags().print(O);
 644     break;
 645   case OperationType::GEPOp:
 646     if (GEPFlags.IsInBounds)
 647       O << " inbounds";
 648     break;
 649   case OperationType::Other:
 650     break;
 651   }
 652   if (getNumOperands() > 0)
 653     O << " ";
 654 }
 655 #endif
 656
 657 void VPWidenRecipe::execute(VPTransformState &State) {
 658   State.setDebugLocFrom(getDebugLoc());
 659   auto &Builder = State.Builder;
 660   switch (Opcode) {
 661   case Instruction::Call:
 662   case Instruction::Br:
 663   case Instruction::PHI:
 664   case Instruction::GetElementPtr:
 665   case Instruction::Select:
 666     llvm_unreachable("This instruction is handled by a different recipe.");
 667   case Instruction::UDiv:
 668   case Instruction::SDiv:
 669   case Instruction::SRem:
 670   case Instruction::URem:
 671   case Instruction::Add:
 672   case Instruction::FAdd:
 673   case Instruction::Sub:
 674   case Instruction::FSub:
 675   case Instruction::FNeg:
 676   case Instruction::Mul:
 677   case Instruction::FMul:
 678   case Instruction::FDiv:
 679   case Instruction::FRem:
 680   case Instruction::Shl:
 681   case Instruction::LShr:
 682   case Instruction::AShr:
 683   case Instruction::And:
 684   case Instruction::Or:
 685   case Instruction::Xor: {
 686     // Just widen unops and binops.
 687     for (unsigned Part = 0; Part < State.UF; ++Part) {
 688       SmallVector<Value *, 2> Ops;
 689       for (VPValue *VPOp : operands())
 690         Ops.push_back(State.get(VPOp, Part));
 691
 692       Value *V = Builder.CreateNAryOp(Opcode, Ops);
 693
 694       if (auto *VecOp = dyn_cast<Instruction>(V))
 695         setFlags(VecOp);
 696
 697       // Use this vector value for all users of the original instruction.
 698       State.set(this, V, Part);
 699       State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
 700     }
 701
 702     break;
 703   }
 704   case Instruction::Freeze: {
 705     for (unsigned Part = 0; Part < State.UF; ++Part) {
 706       Value *Op = State.get(getOperand(0), Part);
 707
 708       Value *Freeze = Builder.CreateFreeze(Op);
 709       State.set(this, Freeze, Part);
 710     }
 711     break;
 712   }
 713   case Instruction::ICmp:
 714   case Instruction::FCmp: {
 715     // Widen compares. Generate vector compares.
 716     bool FCmp = Opcode == Instruction::FCmp;
 717     for (unsigned Part = 0; Part < State.UF; ++Part) {
 718       Value *A = State.get(getOperand(0), Part);
 719       Value *B = State.get(getOperand(1), Part);
 720       Value *C = nullptr;
 721       if (FCmp) {
 722         // Propagate fast math flags.
 723         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
 724         if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
 725           Builder.setFastMathFlags(I->getFastMathFlags());
 726         C = Builder.CreateFCmp(getPredicate(), A, B);
 727       } else {
 728         C = Builder.CreateICmp(getPredicate(), A, B);
 729       }
 730       State.set(this, C, Part);
 731       State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
 732     }
 733
 734     break;
 735   }
 736   default:
 737     // This instruction is not vectorized by simple widening.
 738     LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
 739                       << Instruction::getOpcodeName(Opcode));
 740     llvm_unreachable("Unhandled instruction!");
 741   } // end of switch.
 742
 743 #if !defined(NDEBUG)
 744   // Verify that VPlan type inference results agree with the type of the
 745   // generated values.
 746   for (unsigned Part = 0; Part < State.UF; ++Part) {
 747     assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
 748                            State.VF) == State.get(this, Part)->getType() &&
 749            "inferred type and type from generated instructions do not match");
 750   }
 751 #endif
 752 }
 753
 754 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 755 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
 756                           VPSlotTracker &SlotTracker) const {
 757   O << Indent << "WIDEN ";
 758   printAsOperand(O, SlotTracker);
 759   O << " = " << Instruction::getOpcodeName(Opcode);
 760   printFlags(O);
 761   printOperands(O, SlotTracker);
 762 }
 763 #endif
 764
 765 void VPWidenCastRecipe::execute(VPTransformState &State) {
 766   State.setDebugLocFrom(getDebugLoc());
 767   auto &Builder = State.Builder;
 768   /// Vectorize casts.
 769   assert(State.VF.isVector() && "Not vectorizing?");
 770   Type *DestTy = VectorType::get(getResultType(), State.VF);
 771
 772   for (unsigned Part = 0; Part < State.UF; ++Part) {
 773     Value *A = State.get(getOperand(0), Part);
 774     Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
 775     State.set(this, Cast, Part);
 776     State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
 777   }
 778 }
 779
 780 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 781 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
 782                               VPSlotTracker &SlotTracker) const {
 783   O << Indent << "WIDEN-CAST ";
 784   printAsOperand(O, SlotTracker);
 785   O << " = " << Instruction::getOpcodeName(Opcode) << " ";
 786   printOperands(O, SlotTracker);
 787   O << " to " << *getResultType();
 788 }
 789 #endif
 790
 791 /// This function adds
 792 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
 793 /// to each vector element of Val. The sequence starts at StartIndex.
 794 /// \p Opcode is relevant for FP induction variable.
 795 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
 796                             Instruction::BinaryOps BinOp, ElementCount VF,
 797                             IRBuilderBase &Builder) {
 798   assert(VF.isVector() && "only vector VFs are supported");
 799
 800   // Create and check the types.
 801   auto *ValVTy = cast<VectorType>(Val->getType());
 802   ElementCount VLen = ValVTy->getElementCount();
 803
 804   Type *STy = Val->getType()->getScalarType();
 805   assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
 806          "Induction Step must be an integer or FP");
 807   assert(Step->getType() == STy && "Step has wrong type");
 808
 809   SmallVector<Constant *, 8> Indices;
 810
 811   // Create a vector of consecutive numbers from zero to VF.
 812   VectorType *InitVecValVTy = ValVTy;
 813   if (STy->isFloatingPointTy()) {
 814     Type *InitVecValSTy =
 815         IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
 816     InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
 817   }
 818   Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
 819
 820   // Splat the StartIdx
 821   Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
 822
 823   if (STy->isIntegerTy()) {
 824     InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
 825     Step = Builder.CreateVectorSplat(VLen, Step);
 826     assert(Step->getType() == Val->getType() && "Invalid step vec");
 827     // FIXME: The newly created binary instructions should contain nsw/nuw
 828     // flags, which can be found from the original scalar operations.
 829     Step = Builder.CreateMul(InitVec, Step);
 830     return Builder.CreateAdd(Val, Step, "induction");
 831   }
 832
 833   // Floating point induction.
 834   assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
 835          "Binary Opcode should be specified for FP induction");
 836   InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
 837   InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
 838
 839   Step = Builder.CreateVectorSplat(VLen, Step);
 840   Value *MulOp = Builder.CreateFMul(InitVec, Step);
 841   return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
 842 }
 843
 844 /// A helper function that returns an integer or floating-point constant with
 845 /// value C.
 846 static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
 847   return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
 848                            : ConstantFP::get(Ty, C);
 849 }
 850
 851 static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
 852                                   ElementCount VF) {
 853   assert(FTy->isFloatingPointTy() && "Expected floating point type!");
 854   Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
 855   Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
 856   return B.CreateUIToFP(RuntimeVF, FTy);
 857 }
 858
 859 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
 860   assert(!State.Instance && "Int or FP induction being replicated.");
 861
 862   Value *Start = getStartValue()->getLiveInIRValue();
 863   const InductionDescriptor &ID = getInductionDescriptor();
 864   TruncInst *Trunc = getTruncInst();
 865   IRBuilderBase &Builder = State.Builder;
 866   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
 867   assert(State.VF.isVector() && "must have vector VF");
 868
 869   // The value from the original loop to which we are mapping the new induction
 870   // variable.
 871   Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
 872
 873   // Fast-math-flags propagate from the original induction instruction.
 874   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
 875   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
 876     Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
 877
 878   // Now do the actual transformations, and start with fetching the step value.
 879   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
 880
 881   assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
 882          "Expected either an induction phi-node or a truncate of it!");
 883
 884   // Construct the initial value of the vector IV in the vector loop preheader
 885   auto CurrIP = Builder.saveIP();
 886   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
 887   Builder.SetInsertPoint(VectorPH->getTerminator());
 888   if (isa<TruncInst>(EntryVal)) {
 889     assert(Start->getType()->isIntegerTy() &&
 890            "Truncation requires an integer type");
 891     auto *TruncType = cast<IntegerType>(EntryVal->getType());
 892     Step = Builder.CreateTrunc(Step, TruncType);
 893     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
 894   }
 895
 896   Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
 897   Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
 898   Value *SteppedStart = getStepVector(
 899       SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
 900
 901   // We create vector phi nodes for both integer and floating-point induction
 902   // variables. Here, we determine the kind of arithmetic we will perform.
 903   Instruction::BinaryOps AddOp;
 904   Instruction::BinaryOps MulOp;
 905   if (Step->getType()->isIntegerTy()) {
 906     AddOp = Instruction::Add;
 907     MulOp = Instruction::Mul;
 908   } else {
 909     AddOp = ID.getInductionOpcode();
 910     MulOp = Instruction::FMul;
 911   }
 912
 913   // Multiply the vectorization factor by the step using integer or
 914   // floating-point arithmetic as appropriate.
 915   Type *StepType = Step->getType();
 916   Value *RuntimeVF;
 917   if (Step->getType()->isFloatingPointTy())
 918     RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
 919   else
 920     RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
 921   Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
 922
 923   // Create a vector splat to use in the induction update.
 924   //
 925   // FIXME: If the step is non-constant, we create the vector splat with
 926   //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
 927   //        handle a constant vector splat.
 928   Value *SplatVF = isa<Constant>(Mul)
 929                        ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
 930                        : Builder.CreateVectorSplat(State.VF, Mul);
 931   Builder.restoreIP(CurrIP);
 932
 933   // We may need to add the step a number of times, depending on the unroll
 934   // factor. The last of those goes into the PHI.
 935   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
 936   VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
 937   VecInd->setDebugLoc(EntryVal->getDebugLoc());
 938   Instruction *LastInduction = VecInd;
 939   for (unsigned Part = 0; Part < State.UF; ++Part) {
 940     State.set(this, LastInduction, Part);
 941
 942     if (isa<TruncInst>(EntryVal))
 943       State.addMetadata(LastInduction, EntryVal);
 944
 945     LastInduction = cast<Instruction>(
 946         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
 947     LastInduction->setDebugLoc(EntryVal->getDebugLoc());
 948   }
 949
 950   LastInduction->setName("vec.ind.next");
 951   VecInd->addIncoming(SteppedStart, VectorPH);
 952   // Add induction update using an incorrect block temporarily. The phi node
 953   // will be fixed after VPlan execution. Note that at this point the latch
 954   // block cannot be used, as it does not exist yet.
 955   // TODO: Model increment value in VPlan, by turning the recipe into a
 956   // multi-def and a subclass of VPHeaderPHIRecipe.
 957   VecInd->addIncoming(LastInduction, VectorPH);
 958 }
 959
 960 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 961 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
 962                                           VPSlotTracker &SlotTracker) const {
 963   O << Indent << "WIDEN-INDUCTION";
 964   if (getTruncInst()) {
 965     O << "\\l\"";
 966     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
 967     O << " +\n" << Indent << "\"  ";
 968     getVPValue(0)->printAsOperand(O, SlotTracker);
 969   } else
 970     O << " " << VPlanIngredient(IV);
 971
 972   O << ", ";
 973   getStepValue()->printAsOperand(O, SlotTracker);
 974 }
 975 #endif
 976
 977 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
 978   // The step may be defined by a recipe in the preheader (e.g. if it requires
 979   // SCEV expansion), but for the canonical induction the step is required to be
 980   // 1, which is represented as live-in.
 981   if (getStepValue()->getDefiningRecipe())
 982     return false;
 983   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
 984   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
 985   return StartC && StartC->isZero() && StepC && StepC->isOne();
 986 }
 987
 988 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 989 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
 990                               VPSlotTracker &SlotTracker) const {
 991   O << Indent;
 992   printAsOperand(O, SlotTracker);
 993   O << Indent << "= DERIVED-IV ";
 994   getStartValue()->printAsOperand(O, SlotTracker);
 995   O << " + ";
 996   getCanonicalIV()->printAsOperand(O, SlotTracker);
 997   O << " * ";
 998   getStepValue()->printAsOperand(O, SlotTracker);
 999
1000   if (TruncResultTy)
1001     O << " (truncated to " << *TruncResultTy << ")";
1002 }
1003 #endif
1004
1005 void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
1006   // Fast-math-flags propagate from the original induction instruction.
1007   IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
1008   if (hasFastMathFlags())
1009     State.Builder.setFastMathFlags(getFastMathFlags());
1010
1011   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1012   /// variable on which to base the steps, \p Step is the size of the step.
1013
1014   Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1015   Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1016   IRBuilderBase &Builder = State.Builder;
1017
1018   // Ensure step has the same type as that of scalar IV.
1019   Type *BaseIVTy = BaseIV->getType()->getScalarType();
1020   if (BaseIVTy != Step->getType()) {
1021     // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
1022     // avoid separate truncate here.
1023     assert(Step->getType()->isIntegerTy() &&
1024            "Truncation requires an integer step");
1025     Step = State.Builder.CreateTrunc(Step, BaseIVTy);
1026   }
1027
1028   // We build scalar steps for both integer and floating-point induction
1029   // variables. Here, we determine the kind of arithmetic we will perform.
1030   Instruction::BinaryOps AddOp;
1031   Instruction::BinaryOps MulOp;
1032   if (BaseIVTy->isIntegerTy()) {
1033     AddOp = Instruction::Add;
1034     MulOp = Instruction::Mul;
1035   } else {
1036     AddOp = InductionOpcode;
1037     MulOp = Instruction::FMul;
1038   }
1039
1040   // Determine the number of scalars we need to generate for each unroll
1041   // iteration.
1042   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1043   // Compute the scalar steps and save the results in State.
1044   Type *IntStepTy =
1045       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1046   Type *VecIVTy = nullptr;
1047   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1048   if (!FirstLaneOnly && State.VF.isScalable()) {
1049     VecIVTy = VectorType::get(BaseIVTy, State.VF);
1050     UnitStepVec =
1051         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1052     SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1053     SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1054   }
1055
1056   unsigned StartPart = 0;
1057   unsigned EndPart = State.UF;
1058   unsigned StartLane = 0;
1059   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1060   if (State.Instance) {
1061     StartPart = State.Instance->Part;
1062     EndPart = StartPart + 1;
1063     StartLane = State.Instance->Lane.getKnownLane();
1064     EndLane = StartLane + 1;
1065   }
1066   for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1067     Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1068
1069     if (!FirstLaneOnly && State.VF.isScalable()) {
1070       auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1071       auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1072       if (BaseIVTy->isFloatingPointTy())
1073         InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1074       auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1075       auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1076       State.set(this, Add, Part);
1077       // It's useful to record the lane values too for the known minimum number
1078       // of elements so we do those below. This improves the code quality when
1079       // trying to extract the first element, for example.
1080     }
1081
1082     if (BaseIVTy->isFloatingPointTy())
1083       StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1084
1085     for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1086       Value *StartIdx = Builder.CreateBinOp(
1087           AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1088       // The step returned by `createStepForVF` is a runtime-evaluated value
1089       // when VF is scalable. Otherwise, it should be folded into a Constant.
1090       assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1091              "Expected StartIdx to be folded to a constant when VF is not "
1092              "scalable");
1093       auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1094       auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1095       State.set(this, Add, VPIteration(Part, Lane));
1096     }
1097   }
1098 }
1099
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1102                                   VPSlotTracker &SlotTracker) const {
1103   O << Indent;
1104   printAsOperand(O, SlotTracker);
1105   O << " = SCALAR-STEPS ";
1106   printOperands(O, SlotTracker);
1107 }
1108 #endif
1109
1110 void VPWidenGEPRecipe::execute(VPTransformState &State) {
1111   assert(State.VF.isVector() && "not widening");
1112   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1113   // Construct a vector GEP by widening the operands of the scalar GEP as
1114   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1115   // results in a vector of pointers when at least one operand of the GEP
1116   // is vector-typed. Thus, to keep the representation compact, we only use
1117   // vector-typed operands for loop-varying values.
1118
1119   if (areAllOperandsInvariant()) {
1120     // If we are vectorizing, but the GEP has only loop-invariant operands,
1121     // the GEP we build (by only using vector-typed operands for
1122     // loop-varying values) would be a scalar pointer. Thus, to ensure we
1123     // produce a vector of pointers, we need to either arbitrarily pick an
1124     // operand to broadcast, or broadcast a clone of the original GEP.
1125     // Here, we broadcast a clone of the original.
1126     //
1127     // TODO: If at some point we decide to scalarize instructions having
1128     //       loop-invariant operands, this special case will no longer be
1129     //       required. We would add the scalarization decision to
1130     //       collectLoopScalars() and teach getVectorValue() to broadcast
1131     //       the lane-zero scalar value.
1132     SmallVector<Value *> Ops;
1133     for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1134       Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1135
1136     auto *NewGEP =
1137         State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1138                                 ArrayRef(Ops).drop_front(), "", isInBounds());
1139     for (unsigned Part = 0; Part < State.UF; ++Part) {
1140       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1141       State.set(this, EntryPart, Part);
1142       State.addMetadata(EntryPart, GEP);
1143     }
1144   } else {
1145     // If the GEP has at least one loop-varying operand, we are sure to
1146     // produce a vector of pointers. But if we are only unrolling, we want
1147     // to produce a scalar GEP for each unroll part. Thus, the GEP we
1148     // produce with the code below will be scalar (if VF == 1) or vector
1149     // (otherwise). Note that for the unroll-only case, we still maintain
1150     // values in the vector mapping with initVector, as we do for other
1151     // instructions.
1152     for (unsigned Part = 0; Part < State.UF; ++Part) {
1153       // The pointer operand of the new GEP. If it's loop-invariant, we
1154       // won't broadcast it.
1155       auto *Ptr = isPointerLoopInvariant()
1156                       ? State.get(getOperand(0), VPIteration(0, 0))
1157                       : State.get(getOperand(0), Part);
1158
1159       // Collect all the indices for the new GEP. If any index is
1160       // loop-invariant, we won't broadcast it.
1161       SmallVector<Value *, 4> Indices;
1162       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1163         VPValue *Operand = getOperand(I);
1164         if (isIndexLoopInvariant(I - 1))
1165           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1166         else
1167           Indices.push_back(State.get(Operand, Part));
1168       }
1169
1170       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1171       // but it should be a vector, otherwise.
1172       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1173                                              Indices, "", isInBounds());
1174       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1175              "NewGEP is not a pointer vector");
1176       State.set(this, NewGEP, Part);
1177       State.addMetadata(NewGEP, GEP);
1178     }
1179   }
1180 }
1181
1182 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1183 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1184                              VPSlotTracker &SlotTracker) const {
1185   O << Indent << "WIDEN-GEP ";
1186   O << (isPointerLoopInvariant() ? "Inv" : "Var");
1187   for (size_t I = 0; I < getNumOperands() - 1; ++I)
1188     O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1189
1190   O << " ";
1191   printAsOperand(O, SlotTracker);
1192   O << " = getelementptr";
1193   printFlags(O);
1194   printOperands(O, SlotTracker);
1195 }
1196 #endif
1197
1198 void VPBlendRecipe::execute(VPTransformState &State) {
1199   State.setDebugLocFrom(getDebugLoc());
1200   // We know that all PHIs in non-header blocks are converted into
1201   // selects, so we don't have to worry about the insertion order and we
1202   // can just use the builder.
1203   // At this point we generate the predication tree. There may be
1204   // duplications since this is a simple recursive scan, but future
1205   // optimizations will clean it up.
1206
1207   unsigned NumIncoming = getNumIncomingValues();
1208
1209   // Generate a sequence of selects of the form:
1210   // SELECT(Mask3, In3,
1211   //        SELECT(Mask2, In2,
1212   //               SELECT(Mask1, In1,
1213   //                      In0)))
1214   // Note that Mask0 is never used: lanes for which no path reaches this phi and
1215   // are essentially undef are taken from In0.
1216  VectorParts Entry(State.UF);
1217   for (unsigned In = 0; In < NumIncoming; ++In) {
1218     for (unsigned Part = 0; Part < State.UF; ++Part) {
1219       // We might have single edge PHIs (blocks) - use an identity
1220       // 'select' for the first PHI operand.
1221       Value *In0 = State.get(getIncomingValue(In), Part);
1222       if (In == 0)
1223         Entry[Part] = In0; // Initialize with the first incoming value.
1224       else {
1225         // Select between the current value and the previous incoming edge
1226         // based on the incoming mask.
1227         Value *Cond = State.get(getMask(In), Part);
1228         Entry[Part] =
1229             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1230       }
1231     }
1232   }
1233   for (unsigned Part = 0; Part < State.UF; ++Part)
1234     State.set(this, Entry[Part], Part);
1235 }
1236
1237 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1238 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1239                           VPSlotTracker &SlotTracker) const {
1240   O << Indent << "BLEND ";
1241   printAsOperand(O, SlotTracker);
1242   O << " =";
1243   if (getNumIncomingValues() == 1) {
1244     // Not a User of any mask: not really blending, this is a
1245     // single-predecessor phi.
1246     O << " ";
1247     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1248   } else {
1249     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1250       O << " ";
1251       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1252       O << "/";
1253       getMask(I)->printAsOperand(O, SlotTracker);
1254     }
1255   }
1256 }
1257
1258 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1259                               VPSlotTracker &SlotTracker) const {
1260   O << Indent << "REDUCE ";
1261   printAsOperand(O, SlotTracker);
1262   O << " = ";
1263   getChainOp()->printAsOperand(O, SlotTracker);
1264   O << " +";
1265   if (isa<FPMathOperator>(getUnderlyingInstr()))
1266     O << getUnderlyingInstr()->getFastMathFlags();
1267   O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1268   getVecOp()->printAsOperand(O, SlotTracker);
1269   if (getCondOp()) {
1270     O << ", ";
1271     getCondOp()->printAsOperand(O, SlotTracker);
1272   }
1273   O << ")";
1274   if (RdxDesc.IntermediateStore)
1275     O << " (with final reduction value stored in invariant address sank "
1276          "outside of loop)";
1277 }
1278 #endif
1279
1280 bool VPReplicateRecipe::shouldPack() const {
1281   // Find if the recipe is used by a widened recipe via an intervening
1282   // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1283   return any_of(users(), [](const VPUser *U) {
1284     if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1285       return any_of(PredR->users(), [PredR](const VPUser *U) {
1286         return !U->usesScalars(PredR);
1287       });
1288     return false;
1289   });
1290 }
1291
1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1293 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1294                               VPSlotTracker &SlotTracker) const {
1295   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1296
1297   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1298     printAsOperand(O, SlotTracker);
1299     O << " = ";
1300   }
1301   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1302     O << "call";
1303     printFlags(O);
1304     O << "@" << CB->getCalledFunction()->getName() << "(";
1305     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1306                     O, [&O, &SlotTracker](VPValue *Op) {
1307                       Op->printAsOperand(O, SlotTracker);
1308                     });
1309     O << ")";
1310   } else {
1311     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
1312     printFlags(O);
1313     printOperands(O, SlotTracker);
1314   }
1315
1316   if (shouldPack())
1317     O << " (S->V)";
1318 }
1319 #endif
1320
1321 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1322   assert(State.Instance && "Branch on Mask works only on single instance.");
1323
1324   unsigned Part = State.Instance->Part;
1325   unsigned Lane = State.Instance->Lane.getKnownLane();
1326
1327   Value *ConditionBit = nullptr;
1328   VPValue *BlockInMask = getMask();
1329   if (BlockInMask) {
1330     ConditionBit = State.get(BlockInMask, Part);
1331     if (ConditionBit->getType()->isVectorTy())
1332       ConditionBit = State.Builder.CreateExtractElement(
1333           ConditionBit, State.Builder.getInt32(Lane));
1334   } else // Block in mask is all-one.
1335     ConditionBit = State.Builder.getTrue();
1336
1337   // Replace the temporary unreachable terminator with a new conditional branch,
1338   // whose two destinations will be set later when they are created.
1339   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1340   assert(isa<UnreachableInst>(CurrentTerminator) &&
1341          "Expected to replace unreachable terminator with conditional branch.");
1342   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1343   CondBr->setSuccessor(0, nullptr);
1344   ReplaceInstWithInst(CurrentTerminator, CondBr);
1345 }
1346
1347 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1348   assert(State.Instance && "Predicated instruction PHI works per instance.");
1349   Instruction *ScalarPredInst =
1350       cast<Instruction>(State.get(getOperand(0), *State.Instance));
1351   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1352   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1353   assert(PredicatingBB && "Predicated block has no single predecessor.");
1354   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1355          "operand must be VPReplicateRecipe");
1356
1357   // By current pack/unpack logic we need to generate only a single phi node: if
1358   // a vector value for the predicated instruction exists at this point it means
1359   // the instruction has vector users only, and a phi for the vector value is
1360   // needed. In this case the recipe of the predicated instruction is marked to
1361   // also do that packing, thereby "hoisting" the insert-element sequence.
1362   // Otherwise, a phi node for the scalar value is needed.
1363   unsigned Part = State.Instance->Part;
1364   if (State.hasVectorValue(getOperand(0), Part)) {
1365     Value *VectorValue = State.get(getOperand(0), Part);
1366     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1367     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1368     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1369     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1370     if (State.hasVectorValue(this, Part))
1371       State.reset(this, VPhi, Part);
1372     else
1373       State.set(this, VPhi, Part);
1374     // NOTE: Currently we need to update the value of the operand, so the next
1375     // predicated iteration inserts its generated value in the correct vector.
1376     State.reset(getOperand(0), VPhi, Part);
1377   } else {
1378     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1379     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1380     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1381                      PredicatingBB);
1382     Phi->addIncoming(ScalarPredInst, PredicatedBB);
1383     if (State.hasScalarValue(this, *State.Instance))
1384       State.reset(this, Phi, *State.Instance);
1385     else
1386       State.set(this, Phi, *State.Instance);
1387     // NOTE: Currently we need to update the value of the operand, so the next
1388     // predicated iteration inserts its generated value in the correct vector.
1389     State.reset(getOperand(0), Phi, *State.Instance);
1390   }
1391 }
1392
1393 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1394 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1395                                 VPSlotTracker &SlotTracker) const {
1396   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1397   printAsOperand(O, SlotTracker);
1398   O << " = ";
1399   printOperands(O, SlotTracker);
1400 }
1401
1402 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1403                                            VPSlotTracker &SlotTracker) const {
1404   O << Indent << "WIDEN ";
1405
1406   if (!isStore()) {
1407     getVPSingleValue()->printAsOperand(O, SlotTracker);
1408     O << " = ";
1409   }
1410   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1411
1412   printOperands(O, SlotTracker);
1413 }
1414 #endif
1415
1416 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1417   Value *Start = getStartValue()->getLiveInIRValue();
1418   PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1419   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1420
1421   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1422   EntryPart->addIncoming(Start, VectorPH);
1423   EntryPart->setDebugLoc(getDebugLoc());
1424   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1425     State.set(this, EntryPart, Part);
1426 }
1427
1428 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1429 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1430                                    VPSlotTracker &SlotTracker) const {
1431   O << Indent << "EMIT ";
1432   printAsOperand(O, SlotTracker);
1433   O << " = CANONICAL-INDUCTION ";
1434   printOperands(O, SlotTracker);
1435 }
1436 #endif
1437
1438 bool VPCanonicalIVPHIRecipe::isCanonical(
1439     InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step,
1440     Type *Ty) const {
1441   // The types must match and it must be an integer induction.
1442   if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction)
1443     return false;
1444   // Start must match the start value of this canonical induction.
1445   if (Start != getStartValue())
1446     return false;
1447
1448   // If the step is defined by a recipe, it is not a ConstantInt.
1449   if (Step->getDefiningRecipe())
1450     return false;
1451
1452   ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1453   return StepC && StepC->isOne();
1454 }
1455
1456 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
1457   return IsScalarAfterVectorization &&
1458          (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
1459 }
1460
1461 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1462 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1463                                           VPSlotTracker &SlotTracker) const {
1464   O << Indent << "EMIT ";
1465   printAsOperand(O, SlotTracker);
1466   O << " = WIDEN-POINTER-INDUCTION ";
1467   getStartValue()->printAsOperand(O, SlotTracker);
1468   O << ", " << *IndDesc.getStep();
1469 }
1470 #endif
1471
1472 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1473   assert(!State.Instance && "cannot be used in per-lane");
1474   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1475   SCEVExpander Exp(SE, DL, "induction");
1476
1477   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1478                                  &*State.Builder.GetInsertPoint());
1479   assert(!State.ExpandedSCEVs.contains(Expr) &&
1480          "Same SCEV expanded multiple times");
1481   State.ExpandedSCEVs[Expr] = Res;
1482   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1483     State.set(this, Res, {Part, 0});
1484 }
1485
1486 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1488                                VPSlotTracker &SlotTracker) const {
1489   O << Indent << "EMIT ";
1490   getVPSingleValue()->printAsOperand(O, SlotTracker);
1491   O << " = EXPAND SCEV " << *Expr;
1492 }
1493 #endif
1494
1495 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1496   Value *CanonicalIV = State.get(getOperand(0), 0);
1497   Type *STy = CanonicalIV->getType();
1498   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1499   ElementCount VF = State.VF;
1500   Value *VStart = VF.isScalar()
1501                       ? CanonicalIV
1502                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1503   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1504     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1505     if (VF.isVector()) {
1506       VStep = Builder.CreateVectorSplat(VF, VStep);
1507       VStep =
1508           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1509     }
1510     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1511     State.set(this, CanonicalVectorIV, Part);
1512   }
1513 }
1514
1515 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1516 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1517                                      VPSlotTracker &SlotTracker) const {
1518   O << Indent << "EMIT ";
1519   printAsOperand(O, SlotTracker);
1520   O << " = WIDEN-CANONICAL-INDUCTION ";
1521   printOperands(O, SlotTracker);
1522 }
1523 #endif
1524
1525 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1526   auto &Builder = State.Builder;
1527   // Create a vector from the initial value.
1528   auto *VectorInit = getStartValue()->getLiveInIRValue();
1529
1530   Type *VecTy = State.VF.isScalar()
1531                     ? VectorInit->getType()
1532                     : VectorType::get(VectorInit->getType(), State.VF);
1533
1534   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1535   if (State.VF.isVector()) {
1536     auto *IdxTy = Builder.getInt32Ty();
1537     auto *One = ConstantInt::get(IdxTy, 1);
1538     IRBuilder<>::InsertPointGuard Guard(Builder);
1539     Builder.SetInsertPoint(VectorPH->getTerminator());
1540     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1541     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1542     VectorInit = Builder.CreateInsertElement(
1543         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1544   }
1545
1546   // Create a phi node for the new recurrence.
1547   PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1548   EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1549   EntryPart->addIncoming(VectorInit, VectorPH);
1550   State.set(this, EntryPart, 0);
1551 }
1552
1553 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1554 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1555                                             VPSlotTracker &SlotTracker) const {
1556   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1557   printAsOperand(O, SlotTracker);
1558   O << " = phi ";
1559   printOperands(O, SlotTracker);
1560 }
1561 #endif
1562
1563 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1564   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1565   auto &Builder = State.Builder;
1566
1567   // In order to support recurrences we need to be able to vectorize Phi nodes.
1568   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1569   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1570   // this value when we vectorize all of the instructions that use the PHI.
1571   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1572   Type *VecTy =
1573       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1574
1575   BasicBlock *HeaderBB = State.CFG.PrevBB;
1576   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1577          "recipe must be in the vector loop header");
1578   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1579   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1580     Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1581     EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1582     State.set(this, EntryPart, Part);
1583   }
1584
1585   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1586
1587   // Reductions do not have to start at zero. They can start with
1588   // any loop invariant values.
1589   VPValue *StartVPV = getStartValue();
1590   Value *StartV = StartVPV->getLiveInIRValue();
1591
1592   Value *Iden = nullptr;
1593   RecurKind RK = RdxDesc.getRecurrenceKind();
1594   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1595       RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
1596     // MinMax and AnyOf reductions have the start value as their identity.
1597     if (ScalarPHI) {
1598       Iden = StartV;
1599     } else {
1600       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1601       Builder.SetInsertPoint(VectorPH->getTerminator());
1602       StartV = Iden =
1603           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1604     }
1605   } else {
1606     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1607                                          RdxDesc.getFastMathFlags());
1608
1609     if (!ScalarPHI) {
1610       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1611       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1612       Builder.SetInsertPoint(VectorPH->getTerminator());
1613       Constant *Zero = Builder.getInt32(0);
1614       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1615     }
1616   }
1617
1618   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1619     Value *EntryPart = State.get(this, Part);
1620     // Make sure to add the reduction start value only to the
1621     // first unroll part.
1622     Value *StartVal = (Part == 0) ? StartV : Iden;
1623     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1624   }
1625 }
1626
1627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1628 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1629                                  VPSlotTracker &SlotTracker) const {
1630   O << Indent << "WIDEN-REDUCTION-PHI ";
1631
1632   printAsOperand(O, SlotTracker);
1633   O << " = phi ";
1634   printOperands(O, SlotTracker);
1635 }
1636 #endif
1637
1638 void VPWidenPHIRecipe::execute(VPTransformState &State) {
1639   assert(EnableVPlanNativePath &&
1640          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1641
1642   Value *Op0 = State.get(getOperand(0), 0);
1643   Type *VecTy = Op0->getType();
1644   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1645   State.set(this, VecPhi, 0);
1646 }
1647
1648 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1649 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1650                              VPSlotTracker &SlotTracker) const {
1651   O << Indent << "WIDEN-PHI ";
1652
1653   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1654   // Unless all incoming values are modeled in VPlan  print the original PHI
1655   // directly.
1656   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1657   // values as VPValues.
1658   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1659     O << VPlanIngredient(OriginalPhi);
1660     return;
1661   }
1662
1663   printAsOperand(O, SlotTracker);
1664   O << " = phi ";
1665   printOperands(O, SlotTracker);
1666 }
1667 #endif
1668
1669 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1670 // remove VPActiveLaneMaskPHIRecipe.
1671 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
1672   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1673   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1674     Value *StartMask = State.get(getOperand(0), Part);
1675     PHINode *EntryPart =
1676         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1677     EntryPart->addIncoming(StartMask, VectorPH);
1678     EntryPart->setDebugLoc(getDebugLoc());
1679     State.set(this, EntryPart, Part);
1680   }
1681 }
1682
1683 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1684 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1685                                       VPSlotTracker &SlotTracker) const {
1686   O << Indent << "ACTIVE-LANE-MASK-PHI ";
1687
1688   printAsOperand(O, SlotTracker);
1689   O << " = phi ";
1690   printOperands(O, SlotTracker);
1691 }
1692 #endif