lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

   1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides loop vectorization legality analysis. Original code
  10 // resided in LoopVectorize.cpp for a long time.
  11 //
  12 // At this point, it is implemented as a utility class, not as an analysis
  13 // pass. It should be easy to create an analysis pass around it if there
  14 // is a need (but D45420 needs to happen first).
  15 //
  16 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
  17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
  18 #include "llvm/Analysis/Loads.h"
  19 #include "llvm/Analysis/ValueTracking.h"
  20 #include "llvm/Analysis/VectorUtils.h"
  21 #include "llvm/IR/IntrinsicInst.h"
  22
  23 using namespace llvm;
  24
  25 #define LV_NAME "loop-vectorize"
  26 #define DEBUG_TYPE LV_NAME
  27
  28 extern cl::opt<bool> EnableVPlanPredication;
  29
  30 static cl::opt<bool>
  31     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
  32                        cl::desc("Enable if-conversion during vectorization."));
  33
  34 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
  35     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
  36     cl::desc("The maximum allowed number of runtime memory checks with a "
  37              "vectorize(enable) pragma."));
  38
  39 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
  40     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
  41     cl::desc("The maximum number of SCEV checks allowed."));
  42
  43 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
  44     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
  45     cl::desc("The maximum number of SCEV checks allowed with a "
  46              "vectorize(enable) pragma"));
  47
  48 /// Maximum vectorization interleave count.
  49 static const unsigned MaxInterleaveFactor = 16;
  50
  51 namespace llvm {
  52
  53 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
  54   switch (Kind) {
  55   case HK_WIDTH:
  56     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
  57   case HK_UNROLL:
  58     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
  59   case HK_FORCE:
  60     return (Val <= 1);
  61   case HK_ISVECTORIZED:
  62   case HK_PREDICATE:
  63     return (Val == 0 || Val == 1);
  64   }
  65   return false;
  66 }
  67
  68 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
  69                                        bool InterleaveOnlyWhenForced,
  70                                        OptimizationRemarkEmitter &ORE)
  71     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
  72       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
  73       Force("vectorize.enable", FK_Undefined, HK_FORCE),
  74       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
  75       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
  76       ORE(ORE) {
  77   // Populate values with existing loop metadata.
  78   getHintsFromMetadata();
  79
  80   // force-vector-interleave overrides DisableInterleaving.
  81   if (VectorizerParams::isInterleaveForced())
  82     Interleave.Value = VectorizerParams::VectorizationInterleave;
  83
  84   if (IsVectorized.Value != 1)
  85     // If the vectorization width and interleaving count are both 1 then
  86     // consider the loop to have been already vectorized because there's
  87     // nothing more that we can do.
  88     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
  89   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
  90              << "LV: Interleaving disabled by the pass manager\n");
  91 }
  92
  93 void LoopVectorizeHints::setAlreadyVectorized() {
  94   LLVMContext &Context = TheLoop->getHeader()->getContext();
  95
  96   MDNode *IsVectorizedMD = MDNode::get(
  97       Context,
  98       {MDString::get(Context, "llvm.loop.isvectorized"),
  99        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
 100   MDNode *LoopID = TheLoop->getLoopID();
 101   MDNode *NewLoopID =
 102       makePostTransformationMetadata(Context, LoopID,
 103                                      {Twine(Prefix(), "vectorize.").str(),
 104                                       Twine(Prefix(), "interleave.").str()},
 105                                      {IsVectorizedMD});
 106   TheLoop->setLoopID(NewLoopID);
 107
 108   // Update internal cache.
 109   IsVectorized.Value = 1;
 110 }
 111
 112 bool LoopVectorizeHints::allowVectorization(
 113     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
 114   if (getForce() == LoopVectorizeHints::FK_Disabled) {
 115     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
 116     emitRemarkWithHints();
 117     return false;
 118   }
 119
 120   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
 121     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
 122     emitRemarkWithHints();
 123     return false;
 124   }
 125
 126   if (getIsVectorized() == 1) {
 127     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
 128     // FIXME: Add interleave.disable metadata. This will allow
 129     // vectorize.disable to be used without disabling the pass and errors
 130     // to differentiate between disabled vectorization and a width of 1.
 131     ORE.emit([&]() {
 132       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
 133                                         "AllDisabled", L->getStartLoc(),
 134                                         L->getHeader())
 135              << "loop not vectorized: vectorization and interleaving are "
 136                 "explicitly disabled, or the loop has already been "
 137                 "vectorized";
 138     });
 139     return false;
 140   }
 141
 142   return true;
 143 }
 144
 145 void LoopVectorizeHints::emitRemarkWithHints() const {
 146   using namespace ore;
 147
 148   ORE.emit([&]() {
 149     if (Force.Value == LoopVectorizeHints::FK_Disabled)
 150       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
 151                                       TheLoop->getStartLoc(),
 152                                       TheLoop->getHeader())
 153              << "loop not vectorized: vectorization is explicitly disabled";
 154     else {
 155       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
 156                                  TheLoop->getStartLoc(), TheLoop->getHeader());
 157       R << "loop not vectorized";
 158       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
 159         R << " (Force=" << NV("Force", true);
 160         if (Width.Value != 0)
 161           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
 162         if (Interleave.Value != 0)
 163           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
 164         R << ")";
 165       }
 166       return R;
 167     }
 168   });
 169 }
 170
 171 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
 172   if (getWidth() == 1)
 173     return LV_NAME;
 174   if (getForce() == LoopVectorizeHints::FK_Disabled)
 175     return LV_NAME;
 176   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
 177     return LV_NAME;
 178   return OptimizationRemarkAnalysis::AlwaysPrint;
 179 }
 180
 181 void LoopVectorizeHints::getHintsFromMetadata() {
 182   MDNode *LoopID = TheLoop->getLoopID();
 183   if (!LoopID)
 184     return;
 185
 186   // First operand should refer to the loop id itself.
 187   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
 188   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
 189
 190   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
 191     const MDString *S = nullptr;
 192     SmallVector<Metadata *, 4> Args;
 193
 194     // The expected hint is either a MDString or a MDNode with the first
 195     // operand a MDString.
 196     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
 197       if (!MD || MD->getNumOperands() == 0)
 198         continue;
 199       S = dyn_cast<MDString>(MD->getOperand(0));
 200       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
 201         Args.push_back(MD->getOperand(i));
 202     } else {
 203       S = dyn_cast<MDString>(LoopID->getOperand(i));
 204       assert(Args.size() == 0 && "too many arguments for MDString");
 205     }
 206
 207     if (!S)
 208       continue;
 209
 210     // Check if the hint starts with the loop metadata prefix.
 211     StringRef Name = S->getString();
 212     if (Args.size() == 1)
 213       setHint(Name, Args[0]);
 214   }
 215 }
 216
 217 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
 218   if (!Name.startswith(Prefix()))
 219     return;
 220   Name = Name.substr(Prefix().size(), StringRef::npos);
 221
 222   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
 223   if (!C)
 224     return;
 225   unsigned Val = C->getZExtValue();
 226
 227   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
 228   for (auto H : Hints) {
 229     if (Name == H->Name) {
 230       if (H->validate(Val))
 231         H->Value = Val;
 232       else
 233         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
 234       break;
 235     }
 236   }
 237 }
 238
 239 bool LoopVectorizationRequirements::doesNotMeet(
 240     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
 241   const char *PassName = Hints.vectorizeAnalysisPassName();
 242   bool Failed = false;
 243   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
 244     ORE.emit([&]() {
 245       return OptimizationRemarkAnalysisFPCommute(
 246                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
 247                  UnsafeAlgebraInst->getParent())
 248              << "loop not vectorized: cannot prove it is safe to reorder "
 249                 "floating-point operations";
 250     });
 251     Failed = true;
 252   }
 253
 254   // Test if runtime memcheck thresholds are exceeded.
 255   bool PragmaThresholdReached =
 256       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
 257   bool ThresholdReached =
 258       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
 259   if ((ThresholdReached && !Hints.allowReordering()) ||
 260       PragmaThresholdReached) {
 261     ORE.emit([&]() {
 262       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
 263                                                 L->getStartLoc(),
 264                                                 L->getHeader())
 265              << "loop not vectorized: cannot prove it is safe to reorder "
 266                 "memory operations";
 267     });
 268     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
 269     Failed = true;
 270   }
 271
 272   return Failed;
 273 }
 274
 275 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
 276 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
 277 // executing the inner loop will execute the same iterations). This check is
 278 // very constrained for now but it will be relaxed in the future. \p Lp is
 279 // considered uniform if it meets all the following conditions:
 280 //   1) it has a canonical IV (starting from 0 and with stride 1),
 281 //   2) its latch terminator is a conditional branch and,
 282 //   3) its latch condition is a compare instruction whose operands are the
 283 //      canonical IV and an OuterLp invariant.
 284 // This check doesn't take into account the uniformity of other conditions not
 285 // related to the loop latch because they don't affect the loop uniformity.
 286 //
 287 // NOTE: We decided to keep all these checks and its associated documentation
 288 // together so that we can easily have a picture of the current supported loop
 289 // nests. However, some of the current checks don't depend on \p OuterLp and
 290 // would be redundantly executed for each \p Lp if we invoked this function for
 291 // different candidate outer loops. This is not the case for now because we
 292 // don't currently have the infrastructure to evaluate multiple candidate outer
 293 // loops and \p OuterLp will be a fixed parameter while we only support explicit
 294 // outer loop vectorization. It's also very likely that these checks go away
 295 // before introducing the aforementioned infrastructure. However, if this is not
 296 // the case, we should move the \p OuterLp independent checks to a separate
 297 // function that is only executed once for each \p Lp.
 298 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
 299   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
 300
 301   // If Lp is the outer loop, it's uniform by definition.
 302   if (Lp == OuterLp)
 303     return true;
 304   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
 305
 306   // 1.
 307   PHINode *IV = Lp->getCanonicalInductionVariable();
 308   if (!IV) {
 309     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
 310     return false;
 311   }
 312
 313   // 2.
 314   BasicBlock *Latch = Lp->getLoopLatch();
 315   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
 316   if (!LatchBr || LatchBr->isUnconditional()) {
 317     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
 318     return false;
 319   }
 320
 321   // 3.
 322   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
 323   if (!LatchCmp) {
 324     LLVM_DEBUG(
 325         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
 326     return false;
 327   }
 328
 329   Value *CondOp0 = LatchCmp->getOperand(0);
 330   Value *CondOp1 = LatchCmp->getOperand(1);
 331   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
 332   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
 333       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
 334     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
 335     return false;
 336   }
 337
 338   return true;
 339 }
 340
 341 // Return true if \p Lp and all its nested loops are uniform with regard to \p
 342 // OuterLp.
 343 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
 344   if (!isUniformLoop(Lp, OuterLp))
 345     return false;
 346
 347   // Check if nested loops are uniform.
 348   for (Loop *SubLp : *Lp)
 349     if (!isUniformLoopNest(SubLp, OuterLp))
 350       return false;
 351
 352   return true;
 353 }
 354
 355 /// Check whether it is safe to if-convert this phi node.
 356 ///
 357 /// Phi nodes with constant expressions that can trap are not safe to if
 358 /// convert.
 359 static bool canIfConvertPHINodes(BasicBlock *BB) {
 360   for (PHINode &Phi : BB->phis()) {
 361     for (Value *V : Phi.incoming_values())
 362       if (auto *C = dyn_cast<Constant>(V))
 363         if (C->canTrap())
 364           return false;
 365   }
 366   return true;
 367 }
 368
 369 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
 370   if (Ty->isPointerTy())
 371     return DL.getIntPtrType(Ty);
 372
 373   // It is possible that char's or short's overflow when we ask for the loop's
 374   // trip count, work around this by changing the type size.
 375   if (Ty->getScalarSizeInBits() < 32)
 376     return Type::getInt32Ty(Ty->getContext());
 377
 378   return Ty;
 379 }
 380
 381 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
 382   Ty0 = convertPointerToIntegerType(DL, Ty0);
 383   Ty1 = convertPointerToIntegerType(DL, Ty1);
 384   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
 385     return Ty0;
 386   return Ty1;
 387 }
 388
 389 /// Check that the instruction has outside loop users and is not an
 390 /// identified reduction variable.
 391 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
 392                                SmallPtrSetImpl<Value *> &AllowedExit) {
 393   // Reductions, Inductions and non-header phis are allowed to have exit users. All
 394   // other instructions must not have external users.
 395   if (!AllowedExit.count(Inst))
 396     // Check that all of the users of the loop are inside the BB.
 397     for (User *U : Inst->users()) {
 398       Instruction *UI = cast<Instruction>(U);
 399       // This user may be a reduction exit value.
 400       if (!TheLoop->contains(UI)) {
 401         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
 402         return true;
 403       }
 404     }
 405   return false;
 406 }
 407
 408 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
 409   const ValueToValueMap &Strides =
 410       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
 411
 412   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
 413   if (Stride == 1 || Stride == -1)
 414     return Stride;
 415   return 0;
 416 }
 417
 418 bool LoopVectorizationLegality::isUniform(Value *V) {
 419   return LAI->isUniform(V);
 420 }
 421
 422 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
 423   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
 424   // Store the result and return it at the end instead of exiting early, in case
 425   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
 426   bool Result = true;
 427   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
 428
 429   for (BasicBlock *BB : TheLoop->blocks()) {
 430     // Check whether the BB terminator is a BranchInst. Any other terminator is
 431     // not supported yet.
 432     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
 433     if (!Br) {
 434       reportVectorizationFailure("Unsupported basic block terminator",
 435           "loop control flow is not understood by vectorizer",
 436           "CFGNotUnderstood", ORE, TheLoop);
 437       if (DoExtraAnalysis)
 438         Result = false;
 439       else
 440         return false;
 441     }
 442
 443     // Check whether the BranchInst is a supported one. Only unconditional
 444     // branches, conditional branches with an outer loop invariant condition or
 445     // backedges are supported.
 446     // FIXME: We skip these checks when VPlan predication is enabled as we
 447     // want to allow divergent branches. This whole check will be removed
 448     // once VPlan predication is on by default.
 449     if (!EnableVPlanPredication && Br && Br->isConditional() &&
 450         !TheLoop->isLoopInvariant(Br->getCondition()) &&
 451         !LI->isLoopHeader(Br->getSuccessor(0)) &&
 452         !LI->isLoopHeader(Br->getSuccessor(1))) {
 453       reportVectorizationFailure("Unsupported conditional branch",
 454           "loop control flow is not understood by vectorizer",
 455           "CFGNotUnderstood", ORE, TheLoop);
 456       if (DoExtraAnalysis)
 457         Result = false;
 458       else
 459         return false;
 460     }
 461   }
 462
 463   // Check whether inner loops are uniform. At this point, we only support
 464   // simple outer loops scenarios with uniform nested loops.
 465   if (!isUniformLoopNest(TheLoop /*loop nest*/,
 466                          TheLoop /*context outer loop*/)) {
 467     reportVectorizationFailure("Outer loop contains divergent loops",
 468         "loop control flow is not understood by vectorizer",
 469         "CFGNotUnderstood", ORE, TheLoop);
 470     if (DoExtraAnalysis)
 471       Result = false;
 472     else
 473       return false;
 474   }
 475
 476   // Check whether we are able to set up outer loop induction.
 477   if (!setupOuterLoopInductions()) {
 478     reportVectorizationFailure("Unsupported outer loop Phi(s)",
 479                                "Unsupported outer loop Phi(s)",
 480                                "UnsupportedPhi", ORE, TheLoop);
 481     if (DoExtraAnalysis)
 482       Result = false;
 483     else
 484       return false;
 485   }
 486
 487   return Result;
 488 }
 489
 490 void LoopVectorizationLegality::addInductionPhi(
 491     PHINode *Phi, const InductionDescriptor &ID,
 492     SmallPtrSetImpl<Value *> &AllowedExit) {
 493   Inductions[Phi] = ID;
 494
 495   // In case this induction also comes with casts that we know we can ignore
 496   // in the vectorized loop body, record them here. All casts could be recorded
 497   // here for ignoring, but suffices to record only the first (as it is the
 498   // only one that may bw used outside the cast sequence).
 499   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
 500   if (!Casts.empty())
 501     InductionCastsToIgnore.insert(*Casts.begin());
 502
 503   Type *PhiTy = Phi->getType();
 504   const DataLayout &DL = Phi->getModule()->getDataLayout();
 505
 506   // Get the widest type.
 507   if (!PhiTy->isFloatingPointTy()) {
 508     if (!WidestIndTy)
 509       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
 510     else
 511       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
 512   }
 513
 514   // Int inductions are special because we only allow one IV.
 515   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
 516       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
 517       isa<Constant>(ID.getStartValue()) &&
 518       cast<Constant>(ID.getStartValue())->isNullValue()) {
 519
 520     // Use the phi node with the widest type as induction. Use the last
 521     // one if there are multiple (no good reason for doing this other
 522     // than it is expedient). We've checked that it begins at zero and
 523     // steps by one, so this is a canonical induction variable.
 524     if (!PrimaryInduction || PhiTy == WidestIndTy)
 525       PrimaryInduction = Phi;
 526   }
 527
 528   // Both the PHI node itself, and the "post-increment" value feeding
 529   // back into the PHI node may have external users.
 530   // We can allow those uses, except if the SCEVs we have for them rely
 531   // on predicates that only hold within the loop, since allowing the exit
 532   // currently means re-using this SCEV outside the loop (see PR33706 for more
 533   // details).
 534   if (PSE.getUnionPredicate().isAlwaysTrue()) {
 535     AllowedExit.insert(Phi);
 536     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
 537   }
 538
 539   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
 540 }
 541
 542 bool LoopVectorizationLegality::setupOuterLoopInductions() {
 543   BasicBlock *Header = TheLoop->getHeader();
 544
 545   // Returns true if a given Phi is a supported induction.
 546   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
 547     InductionDescriptor ID;
 548     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
 549         ID.getKind() == InductionDescriptor::IK_IntInduction) {
 550       addInductionPhi(&Phi, ID, AllowedExit);
 551       return true;
 552     } else {
 553       // Bail out for any Phi in the outer loop header that is not a supported
 554       // induction.
 555       LLVM_DEBUG(
 556           dbgs()
 557           << "LV: Found unsupported PHI for outer loop vectorization.\n");
 558       return false;
 559     }
 560   };
 561
 562   if (llvm::all_of(Header->phis(), isSupportedPhi))
 563     return true;
 564   else
 565     return false;
 566 }
 567
 568 bool LoopVectorizationLegality::canVectorizeInstrs() {
 569   BasicBlock *Header = TheLoop->getHeader();
 570
 571   // Look for the attribute signaling the absence of NaNs.
 572   Function &F = *Header->getParent();
 573   HasFunNoNaNAttr =
 574       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
 575
 576   // For each block in the loop.
 577   for (BasicBlock *BB : TheLoop->blocks()) {
 578     // Scan the instructions in the block and look for hazards.
 579     for (Instruction &I : *BB) {
 580       if (auto *Phi = dyn_cast<PHINode>(&I)) {
 581         Type *PhiTy = Phi->getType();
 582         // Check that this PHI type is allowed.
 583         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
 584             !PhiTy->isPointerTy()) {
 585           reportVectorizationFailure("Found a non-int non-pointer PHI",
 586                                      "loop control flow is not understood by vectorizer",
 587                                      "CFGNotUnderstood", ORE, TheLoop);
 588           return false;
 589         }
 590
 591         // If this PHINode is not in the header block, then we know that we
 592         // can convert it to select during if-conversion. No need to check if
 593         // the PHIs in this block are induction or reduction variables.
 594         if (BB != Header) {
 595           // Non-header phi nodes that have outside uses can be vectorized. Add
 596           // them to the list of allowed exits.
 597           // Unsafe cyclic dependencies with header phis are identified during
 598           // legalization for reduction, induction and first order
 599           // recurrences.
 600           AllowedExit.insert(&I);
 601           continue;
 602         }
 603
 604         // We only allow if-converted PHIs with exactly two incoming values.
 605         if (Phi->getNumIncomingValues() != 2) {
 606           reportVectorizationFailure("Found an invalid PHI",
 607               "loop control flow is not understood by vectorizer",
 608               "CFGNotUnderstood", ORE, TheLoop, Phi);
 609           return false;
 610         }
 611
 612         RecurrenceDescriptor RedDes;
 613         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
 614                                                  DT)) {
 615           if (RedDes.hasUnsafeAlgebra())
 616             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
 617           AllowedExit.insert(RedDes.getLoopExitInstr());
 618           Reductions[Phi] = RedDes;
 619           continue;
 620         }
 621
 622         // TODO: Instead of recording the AllowedExit, it would be good to record the
 623         // complementary set: NotAllowedExit. These include (but may not be
 624         // limited to):
 625         // 1. Reduction phis as they represent the one-before-last value, which
 626         // is not available when vectorized
 627         // 2. Induction phis and increment when SCEV predicates cannot be used
 628         // outside the loop - see addInductionPhi
 629         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
 630         // outside the loop - see call to hasOutsideLoopUser in the non-phi
 631         // handling below
 632         // 4. FirstOrderRecurrence phis that can possibly be handled by
 633         // extraction.
 634         // By recording these, we can then reason about ways to vectorize each
 635         // of these NotAllowedExit.
 636         InductionDescriptor ID;
 637         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
 638           addInductionPhi(Phi, ID, AllowedExit);
 639           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
 640             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
 641           continue;
 642         }
 643
 644         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
 645                                                          SinkAfter, DT)) {
 646           FirstOrderRecurrences.insert(Phi);
 647           continue;
 648         }
 649
 650         // As a last resort, coerce the PHI to a AddRec expression
 651         // and re-try classifying it a an induction PHI.
 652         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
 653           addInductionPhi(Phi, ID, AllowedExit);
 654           continue;
 655         }
 656
 657         reportVectorizationFailure("Found an unidentified PHI",
 658             "value that could not be identified as "
 659             "reduction is used outside the loop",
 660             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
 661         return false;
 662       } // end of PHI handling
 663
 664       // We handle calls that:
 665       //   * Are debug info intrinsics.
 666       //   * Have a mapping to an IR intrinsic.
 667       //   * Have a vector version available.
 668       auto *CI = dyn_cast<CallInst>(&I);
 669       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
 670           !isa<DbgInfoIntrinsic>(CI) &&
 671           !(CI->getCalledFunction() && TLI &&
 672             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
 673         // If the call is a recognized math libary call, it is likely that
 674         // we can vectorize it given loosened floating-point constraints.
 675         LibFunc Func;
 676         bool IsMathLibCall =
 677             TLI && CI->getCalledFunction() &&
 678             CI->getType()->isFloatingPointTy() &&
 679             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
 680             TLI->hasOptimizedCodeGen(Func);
 681
 682         if (IsMathLibCall) {
 683           // TODO: Ideally, we should not use clang-specific language here,
 684           // but it's hard to provide meaningful yet generic advice.
 685           // Also, should this be guarded by allowExtraAnalysis() and/or be part
 686           // of the returned info from isFunctionVectorizable()?
 687           reportVectorizationFailure("Found a non-intrinsic callsite",
 688               "library call cannot be vectorized. "
 689               "Try compiling with -fno-math-errno, -ffast-math, "
 690               "or similar flags",
 691               "CantVectorizeLibcall", ORE, TheLoop, CI);
 692         } else {
 693           reportVectorizationFailure("Found a non-intrinsic callsite",
 694                                      "call instruction cannot be vectorized",
 695                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
 696         }
 697         return false;
 698       }
 699
 700       // Some intrinsics have scalar arguments and should be same in order for
 701       // them to be vectorized (i.e. loop invariant).
 702       if (CI) {
 703         auto *SE = PSE.getSE();
 704         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
 705         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
 706           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
 707             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
 708               reportVectorizationFailure("Found unvectorizable intrinsic",
 709                   "intrinsic instruction cannot be vectorized",
 710                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
 711               return false;
 712             }
 713           }
 714       }
 715
 716       // Check that the instruction return type is vectorizable.
 717       // Also, we can't vectorize extractelement instructions.
 718       if ((!VectorType::isValidElementType(I.getType()) &&
 719            !I.getType()->isVoidTy()) ||
 720           isa<ExtractElementInst>(I)) {
 721         reportVectorizationFailure("Found unvectorizable type",
 722             "instruction return type cannot be vectorized",
 723             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
 724         return false;
 725       }
 726
 727       // Check that the stored type is vectorizable.
 728       if (auto *ST = dyn_cast<StoreInst>(&I)) {
 729         Type *T = ST->getValueOperand()->getType();
 730         if (!VectorType::isValidElementType(T)) {
 731           reportVectorizationFailure("Store instruction cannot be vectorized",
 732                                      "store instruction cannot be vectorized",
 733                                      "CantVectorizeStore", ORE, TheLoop, ST);
 734           return false;
 735         }
 736
 737         // For nontemporal stores, check that a nontemporal vector version is
 738         // supported on the target.
 739         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
 740           // Arbitrarily try a vector of 2 elements.
 741           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
 742           assert(VecTy && "did not find vectorized version of stored type");
 743           unsigned Alignment = getLoadStoreAlignment(ST);
 744           assert(Alignment && "Alignment should be set");
 745           if (!TTI->isLegalNTStore(VecTy, llvm::Align(Alignment))) {
 746             reportVectorizationFailure(
 747                 "nontemporal store instruction cannot be vectorized",
 748                 "nontemporal store instruction cannot be vectorized",
 749                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
 750             return false;
 751           }
 752         }
 753
 754       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
 755         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
 756           // For nontemporal loads, check that a nontemporal vector version is
 757           // supported on the target (arbitrarily try a vector of 2 elements).
 758           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
 759           assert(VecTy && "did not find vectorized version of load type");
 760           unsigned Alignment = getLoadStoreAlignment(LD);
 761           assert(Alignment && "Alignment should be set");
 762           if (!TTI->isLegalNTLoad(VecTy, llvm::Align(Alignment))) {
 763             reportVectorizationFailure(
 764                 "nontemporal load instruction cannot be vectorized",
 765                 "nontemporal load instruction cannot be vectorized",
 766                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
 767             return false;
 768           }
 769         }
 770
 771         // FP instructions can allow unsafe algebra, thus vectorizable by
 772         // non-IEEE-754 compliant SIMD units.
 773         // This applies to floating-point math operations and calls, not memory
 774         // operations, shuffles, or casts, as they don't change precision or
 775         // semantics.
 776       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
 777                  !I.isFast()) {
 778         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
 779         Hints->setPotentiallyUnsafe();
 780       }
 781
 782       // Reduction instructions are allowed to have exit users.
 783       // All other instructions must not have external users.
 784       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
 785         // We can safely vectorize loops where instructions within the loop are
 786         // used outside the loop only if the SCEV predicates within the loop is
 787         // same as outside the loop. Allowing the exit means reusing the SCEV
 788         // outside the loop.
 789         if (PSE.getUnionPredicate().isAlwaysTrue()) {
 790           AllowedExit.insert(&I);
 791           continue;
 792         }
 793         reportVectorizationFailure("Value cannot be used outside the loop",
 794                                    "value cannot be used outside the loop",
 795                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
 796         return false;
 797       }
 798     } // next instr.
 799   }
 800
 801   if (!PrimaryInduction) {
 802     if (Inductions.empty()) {
 803       reportVectorizationFailure("Did not find one integer induction var",
 804           "loop induction variable could not be identified",
 805           "NoInductionVariable", ORE, TheLoop);
 806       return false;
 807     } else if (!WidestIndTy) {
 808       reportVectorizationFailure("Did not find one integer induction var",
 809           "integer loop induction variable could not be identified",
 810           "NoIntegerInductionVariable", ORE, TheLoop);
 811       return false;
 812     } else {
 813       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
 814     }
 815   }
 816
 817   // Now we know the widest induction type, check if our found induction
 818   // is the same size. If it's not, unset it here and InnerLoopVectorizer
 819   // will create another.
 820   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
 821     PrimaryInduction = nullptr;
 822
 823   return true;
 824 }
 825
 826 bool LoopVectorizationLegality::canVectorizeMemory() {
 827   LAI = &(*GetLAA)(*TheLoop);
 828   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
 829   if (LAR) {
 830     ORE->emit([&]() {
 831       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
 832                                         "loop not vectorized: ", *LAR);
 833     });
 834   }
 835   if (!LAI->canVectorizeMemory())
 836     return false;
 837
 838   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
 839     reportVectorizationFailure("Stores to a uniform address",
 840         "write to a loop invariant address could not be vectorized",
 841         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
 842     return false;
 843   }
 844   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
 845   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
 846
 847   return true;
 848 }
 849
 850 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
 851   Value *In0 = const_cast<Value *>(V);
 852   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
 853   if (!PN)
 854     return false;
 855
 856   return Inductions.count(PN);
 857 }
 858
 859 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
 860   auto *Inst = dyn_cast<Instruction>(V);
 861   return (Inst && InductionCastsToIgnore.count(Inst));
 862 }
 863
 864 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
 865   return isInductionPhi(V) || isCastedInductionVariable(V);
 866 }
 867
 868 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
 869   return FirstOrderRecurrences.count(Phi);
 870 }
 871
 872 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
 873   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 874 }
 875
 876 bool LoopVectorizationLegality::blockCanBePredicated(
 877     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
 878   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 879
 880   for (Instruction &I : *BB) {
 881     // Check that we don't have a constant expression that can trap as operand.
 882     for (Value *Operand : I.operands()) {
 883       if (auto *C = dyn_cast<Constant>(Operand))
 884         if (C->canTrap())
 885           return false;
 886     }
 887     // We might be able to hoist the load.
 888     if (I.mayReadFromMemory()) {
 889       auto *LI = dyn_cast<LoadInst>(&I);
 890       if (!LI)
 891         return false;
 892       if (!SafePtrs.count(LI->getPointerOperand())) {
 893         // !llvm.mem.parallel_loop_access implies if-conversion safety.
 894         // Otherwise, record that the load needs (real or emulated) masking
 895         // and let the cost model decide.
 896         if (!IsAnnotatedParallel || PreserveGuards)
 897           MaskedOp.insert(LI);
 898         continue;
 899       }
 900     }
 901
 902     if (I.mayWriteToMemory()) {
 903       auto *SI = dyn_cast<StoreInst>(&I);
 904       if (!SI)
 905         return false;
 906       // Predicated store requires some form of masking:
 907       // 1) masked store HW instruction,
 908       // 2) emulation via load-blend-store (only if safe and legal to do so,
 909       //    be aware on the race conditions), or
 910       // 3) element-by-element predicate check and scalar store.
 911       MaskedOp.insert(SI);
 912       continue;
 913     }
 914     if (I.mayThrow())
 915       return false;
 916   }
 917
 918   return true;
 919 }
 920
 921 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
 922   if (!EnableIfConversion) {
 923     reportVectorizationFailure("If-conversion is disabled",
 924                                "if-conversion is disabled",
 925                                "IfConversionDisabled",
 926                                ORE, TheLoop);
 927     return false;
 928   }
 929
 930   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
 931
 932   // A list of pointers which are known to be dereferenceable within scope of
 933   // the loop body for each iteration of the loop which executes.  That is,
 934   // the memory pointed to can be dereferenced (with the access size implied by
 935   // the value's type) unconditionally within the loop header without
 936   // introducing a new fault.
 937   SmallPtrSet<Value *, 8> SafePointes;
 938
 939   // Collect safe addresses.
 940   for (BasicBlock *BB : TheLoop->blocks()) {
 941     if (!blockNeedsPredication(BB)) {
 942       for (Instruction &I : *BB)
 943         if (auto *Ptr = getLoadStorePointerOperand(&I))
 944           SafePointes.insert(Ptr);
 945       continue;
 946     }
 947
 948     // For a block which requires predication, a address may be safe to access
 949     // in the loop w/o predication if we can prove dereferenceability facts
 950     // sufficient to ensure it'll never fault within the loop. For the moment,
 951     // we restrict this to loads; stores are more complicated due to
 952     // concurrency restrictions.
 953     ScalarEvolution &SE = *PSE.getSE();
 954     for (Instruction &I : *BB) {
 955       LoadInst *LI = dyn_cast<LoadInst>(&I);
 956       if (LI && !mustSuppressSpeculation(*LI) &&
 957           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
 958         SafePointes.insert(LI->getPointerOperand());
 959     }
 960   }
 961
 962   // Collect the blocks that need predication.
 963   BasicBlock *Header = TheLoop->getHeader();
 964   for (BasicBlock *BB : TheLoop->blocks()) {
 965     // We don't support switch statements inside loops.
 966     if (!isa<BranchInst>(BB->getTerminator())) {
 967       reportVectorizationFailure("Loop contains a switch statement",
 968                                  "loop contains a switch statement",
 969                                  "LoopContainsSwitch", ORE, TheLoop,
 970                                  BB->getTerminator());
 971       return false;
 972     }
 973
 974     // We must be able to predicate all blocks that need to be predicated.
 975     if (blockNeedsPredication(BB)) {
 976       if (!blockCanBePredicated(BB, SafePointes)) {
 977         reportVectorizationFailure(
 978             "Control flow cannot be substituted for a select",
 979             "control flow cannot be substituted for a select",
 980             "NoCFGForSelect", ORE, TheLoop,
 981             BB->getTerminator());
 982         return false;
 983       }
 984     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
 985       reportVectorizationFailure(
 986           "Control flow cannot be substituted for a select",
 987           "control flow cannot be substituted for a select",
 988           "NoCFGForSelect", ORE, TheLoop,
 989           BB->getTerminator());
 990       return false;
 991     }
 992   }
 993
 994   // We can if-convert this loop.
 995   return true;
 996 }
 997
 998 // Helper function to canVectorizeLoopNestCFG.
 999 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1000                                                     bool UseVPlanNativePath) {
1001   assert((UseVPlanNativePath || Lp->empty()) &&
1002          "VPlan-native path is not enabled.");
1003
1004   // TODO: ORE should be improved to show more accurate information when an
1005   // outer loop can't be vectorized because a nested loop is not understood or
1006   // legal. Something like: "outer_loop_location: loop not vectorized:
1007   // (inner_loop_location) loop control flow is not understood by vectorizer".
1008
1009   // Store the result and return it at the end instead of exiting early, in case
1010   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1011   bool Result = true;
1012   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1013
1014   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1015   // be canonicalized.
1016   if (!Lp->getLoopPreheader()) {
1017     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1018         "loop control flow is not understood by vectorizer",
1019         "CFGNotUnderstood", ORE, TheLoop);
1020     if (DoExtraAnalysis)
1021       Result = false;
1022     else
1023       return false;
1024   }
1025
1026   // We must have a single backedge.
1027   if (Lp->getNumBackEdges() != 1) {
1028     reportVectorizationFailure("The loop must have a single backedge",
1029         "loop control flow is not understood by vectorizer",
1030         "CFGNotUnderstood", ORE, TheLoop);
1031     if (DoExtraAnalysis)
1032       Result = false;
1033     else
1034       return false;
1035   }
1036
1037   // We must have a single exiting block.
1038   if (!Lp->getExitingBlock()) {
1039     reportVectorizationFailure("The loop must have an exiting block",
1040         "loop control flow is not understood by vectorizer",
1041         "CFGNotUnderstood", ORE, TheLoop);
1042     if (DoExtraAnalysis)
1043       Result = false;
1044     else
1045       return false;
1046   }
1047
1048   // We only handle bottom-tested loops, i.e. loop in which the condition is
1049   // checked at the end of each iteration. With that we can assume that all
1050   // instructions in the loop are executed the same number of times.
1051   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1052     reportVectorizationFailure("The exiting block is not the loop latch",
1053         "loop control flow is not understood by vectorizer",
1054         "CFGNotUnderstood", ORE, TheLoop);
1055     if (DoExtraAnalysis)
1056       Result = false;
1057     else
1058       return false;
1059   }
1060
1061   return Result;
1062 }
1063
1064 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1065     Loop *Lp, bool UseVPlanNativePath) {
1066   // Store the result and return it at the end instead of exiting early, in case
1067   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1068   bool Result = true;
1069   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1070   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1071     if (DoExtraAnalysis)
1072       Result = false;
1073     else
1074       return false;
1075   }
1076
1077   // Recursively check whether the loop control flow of nested loops is
1078   // understood.
1079   for (Loop *SubLp : *Lp)
1080     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1081       if (DoExtraAnalysis)
1082         Result = false;
1083       else
1084         return false;
1085     }
1086
1087   return Result;
1088 }
1089
1090 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1091   // Store the result and return it at the end instead of exiting early, in case
1092   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1093   bool Result = true;
1094
1095   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1096   // Check whether the loop-related control flow in the loop nest is expected by
1097   // vectorizer.
1098   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1099     if (DoExtraAnalysis)
1100       Result = false;
1101     else
1102       return false;
1103   }
1104
1105   // We need to have a loop header.
1106   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1107                     << '\n');
1108
1109   // Specific checks for outer loops. We skip the remaining legal checks at this
1110   // point because they don't support outer loops.
1111   if (!TheLoop->empty()) {
1112     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1113
1114     if (!canVectorizeOuterLoop()) {
1115       reportVectorizationFailure("Unsupported outer loop",
1116                                  "unsupported outer loop",
1117                                  "UnsupportedOuterLoop",
1118                                  ORE, TheLoop);
1119       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1120       // outer loops.
1121       return false;
1122     }
1123
1124     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1125     return Result;
1126   }
1127
1128   assert(TheLoop->empty() && "Inner loop expected.");
1129   // Check if we can if-convert non-single-bb loops.
1130   unsigned NumBlocks = TheLoop->getNumBlocks();
1131   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1132     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1133     if (DoExtraAnalysis)
1134       Result = false;
1135     else
1136       return false;
1137   }
1138
1139   // Check if we can vectorize the instructions and CFG in this loop.
1140   if (!canVectorizeInstrs()) {
1141     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1142     if (DoExtraAnalysis)
1143       Result = false;
1144     else
1145       return false;
1146   }
1147
1148   // Go over each instruction and look at memory deps.
1149   if (!canVectorizeMemory()) {
1150     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1151     if (DoExtraAnalysis)
1152       Result = false;
1153     else
1154       return false;
1155   }
1156
1157   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1158                     << (LAI->getRuntimePointerChecking()->Need
1159                             ? " (with a runtime bound check)"
1160                             : "")
1161                     << "!\n");
1162
1163   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1164   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1165     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1166
1167   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1168     reportVectorizationFailure("Too many SCEV checks needed",
1169         "Too many SCEV assumptions need to be made and checked at runtime",
1170         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1171     if (DoExtraAnalysis)
1172       Result = false;
1173     else
1174       return false;
1175   }
1176
1177   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1178   // we can vectorize, and at this point we don't have any other mem analysis
1179   // which may limit our maximum vectorization factor, so just return true with
1180   // no restrictions.
1181   return Result;
1182 }
1183
1184 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1185
1186   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1187
1188   if (!PrimaryInduction) {
1189     reportVectorizationFailure(
1190         "No primary induction, cannot fold tail by masking",
1191         "Missing a primary induction variable in the loop, which is "
1192         "needed in order to fold tail by masking as required.",
1193         "NoPrimaryInduction", ORE, TheLoop);
1194     return false;
1195   }
1196
1197   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1198
1199   for (auto &Reduction : *getReductionVars())
1200     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1201
1202   // TODO: handle non-reduction outside users when tail is folded by masking.
1203   for (auto *AE : AllowedExit) {
1204     // Check that all users of allowed exit values are inside the loop or
1205     // are the live-out of a reduction.
1206     if (ReductionLiveOuts.count(AE))
1207       continue;
1208     for (User *U : AE->users()) {
1209       Instruction *UI = cast<Instruction>(U);
1210       if (TheLoop->contains(UI))
1211         continue;
1212       reportVectorizationFailure(
1213           "Cannot fold tail by masking, loop has an outside user for",
1214           "Cannot fold tail by masking in the presence of live outs.",
1215           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1216       return false;
1217     }
1218   }
1219
1220   // The list of pointers that we can safely read and write to remains empty.
1221   SmallPtrSet<Value *, 8> SafePointers;
1222
1223   // Check and mark all blocks for predication, including those that ordinarily
1224   // do not need predication such as the header block.
1225   for (BasicBlock *BB : TheLoop->blocks()) {
1226     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1227       reportVectorizationFailure(
1228           "Cannot fold tail by masking as required",
1229           "control flow cannot be substituted for a select",
1230           "NoCFGForSelect", ORE, TheLoop,
1231           BB->getTerminator());
1232       return false;
1233     }
1234   }
1235
1236   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1237   return true;
1238 }
1239
1240 } // namespace llvm