lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

   1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides loop vectorization legality analysis. Original code
  10 // resided in LoopVectorize.cpp for a long time.
  11 //
  12 // At this point, it is implemented as a utility class, not as an analysis
  13 // pass. It should be easy to create an analysis pass around it if there
  14 // is a need (but D45420 needs to happen first).
  15 //
  16 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
  17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
  18 #include "llvm/Analysis/Loads.h"
  19 #include "llvm/Analysis/ValueTracking.h"
  20 #include "llvm/Analysis/VectorUtils.h"
  21 #include "llvm/IR/IntrinsicInst.h"
  22
  23 using namespace llvm;
  24
  25 #define LV_NAME "loop-vectorize"
  26 #define DEBUG_TYPE LV_NAME
  27
  28 extern cl::opt<bool> EnableVPlanPredication;
  29
  30 static cl::opt<bool>
  31     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
  32                        cl::desc("Enable if-conversion during vectorization."));
  33
  34 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
  35     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
  36     cl::desc("The maximum allowed number of runtime memory checks with a "
  37              "vectorize(enable) pragma."));
  38
  39 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
  40     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
  41     cl::desc("The maximum number of SCEV checks allowed."));
  42
  43 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
  44     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
  45     cl::desc("The maximum number of SCEV checks allowed with a "
  46              "vectorize(enable) pragma"));
  47
  48 /// Maximum vectorization interleave count.
  49 static const unsigned MaxInterleaveFactor = 16;
  50
  51 namespace llvm {
  52
  53 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
  54   switch (Kind) {
  55   case HK_WIDTH:
  56     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
  57   case HK_UNROLL:
  58     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
  59   case HK_FORCE:
  60     return (Val <= 1);
  61   case HK_ISVECTORIZED:
  62   case HK_PREDICATE:
  63     return (Val == 0 || Val == 1);
  64   }
  65   return false;
  66 }
  67
  68 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
  69                                        bool InterleaveOnlyWhenForced,
  70                                        OptimizationRemarkEmitter &ORE)
  71     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
  72       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
  73       Force("vectorize.enable", FK_Undefined, HK_FORCE),
  74       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
  75       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
  76       ORE(ORE) {
  77   // Populate values with existing loop metadata.
  78   getHintsFromMetadata();
  79
  80   // force-vector-interleave overrides DisableInterleaving.
  81   if (VectorizerParams::isInterleaveForced())
  82     Interleave.Value = VectorizerParams::VectorizationInterleave;
  83
  84   if (IsVectorized.Value != 1)
  85     // If the vectorization width and interleaving count are both 1 then
  86     // consider the loop to have been already vectorized because there's
  87     // nothing more that we can do.
  88     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
  89   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
  90              << "LV: Interleaving disabled by the pass manager\n");
  91 }
  92
  93 void LoopVectorizeHints::setAlreadyVectorized() {
  94   LLVMContext &Context = TheLoop->getHeader()->getContext();
  95
  96   MDNode *IsVectorizedMD = MDNode::get(
  97       Context,
  98       {MDString::get(Context, "llvm.loop.isvectorized"),
  99        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
 100   MDNode *LoopID = TheLoop->getLoopID();
 101   MDNode *NewLoopID =
 102       makePostTransformationMetadata(Context, LoopID,
 103                                      {Twine(Prefix(), "vectorize.").str(),
 104                                       Twine(Prefix(), "interleave.").str()},
 105                                      {IsVectorizedMD});
 106   TheLoop->setLoopID(NewLoopID);
 107
 108   // Update internal cache.
 109   IsVectorized.Value = 1;
 110 }
 111
 112 bool LoopVectorizeHints::allowVectorization(
 113     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
 114   if (getForce() == LoopVectorizeHints::FK_Disabled) {
 115     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
 116     emitRemarkWithHints();
 117     return false;
 118   }
 119
 120   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
 121     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
 122     emitRemarkWithHints();
 123     return false;
 124   }
 125
 126   if (getIsVectorized() == 1) {
 127     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
 128     // FIXME: Add interleave.disable metadata. This will allow
 129     // vectorize.disable to be used without disabling the pass and errors
 130     // to differentiate between disabled vectorization and a width of 1.
 131     ORE.emit([&]() {
 132       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
 133                                         "AllDisabled", L->getStartLoc(),
 134                                         L->getHeader())
 135              << "loop not vectorized: vectorization and interleaving are "
 136                 "explicitly disabled, or the loop has already been "
 137                 "vectorized";
 138     });
 139     return false;
 140   }
 141
 142   return true;
 143 }
 144
 145 void LoopVectorizeHints::emitRemarkWithHints() const {
 146   using namespace ore;
 147
 148   ORE.emit([&]() {
 149     if (Force.Value == LoopVectorizeHints::FK_Disabled)
 150       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
 151                                       TheLoop->getStartLoc(),
 152                                       TheLoop->getHeader())
 153              << "loop not vectorized: vectorization is explicitly disabled";
 154     else {
 155       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
 156                                  TheLoop->getStartLoc(), TheLoop->getHeader());
 157       R << "loop not vectorized";
 158       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
 159         R << " (Force=" << NV("Force", true);
 160         if (Width.Value != 0)
 161           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
 162         if (Interleave.Value != 0)
 163           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
 164         R << ")";
 165       }
 166       return R;
 167     }
 168   });
 169 }
 170
 171 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
 172   if (getWidth() == 1)
 173     return LV_NAME;
 174   if (getForce() == LoopVectorizeHints::FK_Disabled)
 175     return LV_NAME;
 176   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
 177     return LV_NAME;
 178   return OptimizationRemarkAnalysis::AlwaysPrint;
 179 }
 180
 181 void LoopVectorizeHints::getHintsFromMetadata() {
 182   MDNode *LoopID = TheLoop->getLoopID();
 183   if (!LoopID)
 184     return;
 185
 186   // First operand should refer to the loop id itself.
 187   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
 188   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
 189
 190   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
 191     const MDString *S = nullptr;
 192     SmallVector<Metadata *, 4> Args;
 193
 194     // The expected hint is either a MDString or a MDNode with the first
 195     // operand a MDString.
 196     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
 197       if (!MD || MD->getNumOperands() == 0)
 198         continue;
 199       S = dyn_cast<MDString>(MD->getOperand(0));
 200       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
 201         Args.push_back(MD->getOperand(i));
 202     } else {
 203       S = dyn_cast<MDString>(LoopID->getOperand(i));
 204       assert(Args.size() == 0 && "too many arguments for MDString");
 205     }
 206
 207     if (!S)
 208       continue;
 209
 210     // Check if the hint starts with the loop metadata prefix.
 211     StringRef Name = S->getString();
 212     if (Args.size() == 1)
 213       setHint(Name, Args[0]);
 214   }
 215 }
 216
 217 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
 218   if (!Name.startswith(Prefix()))
 219     return;
 220   Name = Name.substr(Prefix().size(), StringRef::npos);
 221
 222   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
 223   if (!C)
 224     return;
 225   unsigned Val = C->getZExtValue();
 226
 227   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
 228   for (auto H : Hints) {
 229     if (Name == H->Name) {
 230       if (H->validate(Val))
 231         H->Value = Val;
 232       else
 233         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
 234       break;
 235     }
 236   }
 237 }
 238
 239 bool LoopVectorizationRequirements::doesNotMeet(
 240     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
 241   const char *PassName = Hints.vectorizeAnalysisPassName();
 242   bool Failed = false;
 243   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
 244     ORE.emit([&]() {
 245       return OptimizationRemarkAnalysisFPCommute(
 246                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
 247                  UnsafeAlgebraInst->getParent())
 248              << "loop not vectorized: cannot prove it is safe to reorder "
 249                 "floating-point operations";
 250     });
 251     Failed = true;
 252   }
 253
 254   // Test if runtime memcheck thresholds are exceeded.
 255   bool PragmaThresholdReached =
 256       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
 257   bool ThresholdReached =
 258       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
 259   if ((ThresholdReached && !Hints.allowReordering()) ||
 260       PragmaThresholdReached) {
 261     ORE.emit([&]() {
 262       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
 263                                                 L->getStartLoc(),
 264                                                 L->getHeader())
 265              << "loop not vectorized: cannot prove it is safe to reorder "
 266                 "memory operations";
 267     });
 268     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
 269     Failed = true;
 270   }
 271
 272   return Failed;
 273 }
 274
 275 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
 276 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
 277 // executing the inner loop will execute the same iterations). This check is
 278 // very constrained for now but it will be relaxed in the future. \p Lp is
 279 // considered uniform if it meets all the following conditions:
 280 //   1) it has a canonical IV (starting from 0 and with stride 1),
 281 //   2) its latch terminator is a conditional branch and,
 282 //   3) its latch condition is a compare instruction whose operands are the
 283 //      canonical IV and an OuterLp invariant.
 284 // This check doesn't take into account the uniformity of other conditions not
 285 // related to the loop latch because they don't affect the loop uniformity.
 286 //
 287 // NOTE: We decided to keep all these checks and its associated documentation
 288 // together so that we can easily have a picture of the current supported loop
 289 // nests. However, some of the current checks don't depend on \p OuterLp and
 290 // would be redundantly executed for each \p Lp if we invoked this function for
 291 // different candidate outer loops. This is not the case for now because we
 292 // don't currently have the infrastructure to evaluate multiple candidate outer
 293 // loops and \p OuterLp will be a fixed parameter while we only support explicit
 294 // outer loop vectorization. It's also very likely that these checks go away
 295 // before introducing the aforementioned infrastructure. However, if this is not
 296 // the case, we should move the \p OuterLp independent checks to a separate
 297 // function that is only executed once for each \p Lp.
 298 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
 299   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
 300
 301   // If Lp is the outer loop, it's uniform by definition.
 302   if (Lp == OuterLp)
 303     return true;
 304   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
 305
 306   // 1.
 307   PHINode *IV = Lp->getCanonicalInductionVariable();
 308   if (!IV) {
 309     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
 310     return false;
 311   }
 312
 313   // 2.
 314   BasicBlock *Latch = Lp->getLoopLatch();
 315   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
 316   if (!LatchBr || LatchBr->isUnconditional()) {
 317     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
 318     return false;
 319   }
 320
 321   // 3.
 322   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
 323   if (!LatchCmp) {
 324     LLVM_DEBUG(
 325         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
 326     return false;
 327   }
 328
 329   Value *CondOp0 = LatchCmp->getOperand(0);
 330   Value *CondOp1 = LatchCmp->getOperand(1);
 331   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
 332   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
 333       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
 334     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
 335     return false;
 336   }
 337
 338   return true;
 339 }
 340
 341 // Return true if \p Lp and all its nested loops are uniform with regard to \p
 342 // OuterLp.
 343 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
 344   if (!isUniformLoop(Lp, OuterLp))
 345     return false;
 346
 347   // Check if nested loops are uniform.
 348   for (Loop *SubLp : *Lp)
 349     if (!isUniformLoopNest(SubLp, OuterLp))
 350       return false;
 351
 352   return true;
 353 }
 354
 355 /// Check whether it is safe to if-convert this phi node.
 356 ///
 357 /// Phi nodes with constant expressions that can trap are not safe to if
 358 /// convert.
 359 static bool canIfConvertPHINodes(BasicBlock *BB) {
 360   for (PHINode &Phi : BB->phis()) {
 361     for (Value *V : Phi.incoming_values())
 362       if (auto *C = dyn_cast<Constant>(V))
 363         if (C->canTrap())
 364           return false;
 365   }
 366   return true;
 367 }
 368
 369 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
 370   if (Ty->isPointerTy())
 371     return DL.getIntPtrType(Ty);
 372
 373   // It is possible that char's or short's overflow when we ask for the loop's
 374   // trip count, work around this by changing the type size.
 375   if (Ty->getScalarSizeInBits() < 32)
 376     return Type::getInt32Ty(Ty->getContext());
 377
 378   return Ty;
 379 }
 380
 381 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
 382   Ty0 = convertPointerToIntegerType(DL, Ty0);
 383   Ty1 = convertPointerToIntegerType(DL, Ty1);
 384   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
 385     return Ty0;
 386   return Ty1;
 387 }
 388
 389 /// Check that the instruction has outside loop users and is not an
 390 /// identified reduction variable.
 391 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
 392                                SmallPtrSetImpl<Value *> &AllowedExit) {
 393   // Reductions, Inductions and non-header phis are allowed to have exit users. All
 394   // other instructions must not have external users.
 395   if (!AllowedExit.count(Inst))
 396     // Check that all of the users of the loop are inside the BB.
 397     for (User *U : Inst->users()) {
 398       Instruction *UI = cast<Instruction>(U);
 399       // This user may be a reduction exit value.
 400       if (!TheLoop->contains(UI)) {
 401         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
 402         return true;
 403       }
 404     }
 405   return false;
 406 }
 407
 408 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
 409   const ValueToValueMap &Strides =
 410       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
 411
 412   bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
 413   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
 414   if (Stride == 1 || Stride == -1)
 415     return Stride;
 416   return 0;
 417 }
 418
 419 bool LoopVectorizationLegality::isUniform(Value *V) {
 420   return LAI->isUniform(V);
 421 }
 422
 423 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
 424   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
 425   // Store the result and return it at the end instead of exiting early, in case
 426   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
 427   bool Result = true;
 428   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
 429
 430   for (BasicBlock *BB : TheLoop->blocks()) {
 431     // Check whether the BB terminator is a BranchInst. Any other terminator is
 432     // not supported yet.
 433     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
 434     if (!Br) {
 435       reportVectorizationFailure("Unsupported basic block terminator",
 436           "loop control flow is not understood by vectorizer",
 437           "CFGNotUnderstood", ORE, TheLoop);
 438       if (DoExtraAnalysis)
 439         Result = false;
 440       else
 441         return false;
 442     }
 443
 444     // Check whether the BranchInst is a supported one. Only unconditional
 445     // branches, conditional branches with an outer loop invariant condition or
 446     // backedges are supported.
 447     // FIXME: We skip these checks when VPlan predication is enabled as we
 448     // want to allow divergent branches. This whole check will be removed
 449     // once VPlan predication is on by default.
 450     if (!EnableVPlanPredication && Br && Br->isConditional() &&
 451         !TheLoop->isLoopInvariant(Br->getCondition()) &&
 452         !LI->isLoopHeader(Br->getSuccessor(0)) &&
 453         !LI->isLoopHeader(Br->getSuccessor(1))) {
 454       reportVectorizationFailure("Unsupported conditional branch",
 455           "loop control flow is not understood by vectorizer",
 456           "CFGNotUnderstood", ORE, TheLoop);
 457       if (DoExtraAnalysis)
 458         Result = false;
 459       else
 460         return false;
 461     }
 462   }
 463
 464   // Check whether inner loops are uniform. At this point, we only support
 465   // simple outer loops scenarios with uniform nested loops.
 466   if (!isUniformLoopNest(TheLoop /*loop nest*/,
 467                          TheLoop /*context outer loop*/)) {
 468     reportVectorizationFailure("Outer loop contains divergent loops",
 469         "loop control flow is not understood by vectorizer",
 470         "CFGNotUnderstood", ORE, TheLoop);
 471     if (DoExtraAnalysis)
 472       Result = false;
 473     else
 474       return false;
 475   }
 476
 477   // Check whether we are able to set up outer loop induction.
 478   if (!setupOuterLoopInductions()) {
 479     reportVectorizationFailure("Unsupported outer loop Phi(s)",
 480                                "Unsupported outer loop Phi(s)",
 481                                "UnsupportedPhi", ORE, TheLoop);
 482     if (DoExtraAnalysis)
 483       Result = false;
 484     else
 485       return false;
 486   }
 487
 488   return Result;
 489 }
 490
 491 void LoopVectorizationLegality::addInductionPhi(
 492     PHINode *Phi, const InductionDescriptor &ID,
 493     SmallPtrSetImpl<Value *> &AllowedExit) {
 494   Inductions[Phi] = ID;
 495
 496   // In case this induction also comes with casts that we know we can ignore
 497   // in the vectorized loop body, record them here. All casts could be recorded
 498   // here for ignoring, but suffices to record only the first (as it is the
 499   // only one that may bw used outside the cast sequence).
 500   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
 501   if (!Casts.empty())
 502     InductionCastsToIgnore.insert(*Casts.begin());
 503
 504   Type *PhiTy = Phi->getType();
 505   const DataLayout &DL = Phi->getModule()->getDataLayout();
 506
 507   // Get the widest type.
 508   if (!PhiTy->isFloatingPointTy()) {
 509     if (!WidestIndTy)
 510       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
 511     else
 512       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
 513   }
 514
 515   // Int inductions are special because we only allow one IV.
 516   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
 517       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
 518       isa<Constant>(ID.getStartValue()) &&
 519       cast<Constant>(ID.getStartValue())->isNullValue()) {
 520
 521     // Use the phi node with the widest type as induction. Use the last
 522     // one if there are multiple (no good reason for doing this other
 523     // than it is expedient). We've checked that it begins at zero and
 524     // steps by one, so this is a canonical induction variable.
 525     if (!PrimaryInduction || PhiTy == WidestIndTy)
 526       PrimaryInduction = Phi;
 527   }
 528
 529   // Both the PHI node itself, and the "post-increment" value feeding
 530   // back into the PHI node may have external users.
 531   // We can allow those uses, except if the SCEVs we have for them rely
 532   // on predicates that only hold within the loop, since allowing the exit
 533   // currently means re-using this SCEV outside the loop (see PR33706 for more
 534   // details).
 535   if (PSE.getUnionPredicate().isAlwaysTrue()) {
 536     AllowedExit.insert(Phi);
 537     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
 538   }
 539
 540   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
 541 }
 542
 543 bool LoopVectorizationLegality::setupOuterLoopInductions() {
 544   BasicBlock *Header = TheLoop->getHeader();
 545
 546   // Returns true if a given Phi is a supported induction.
 547   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
 548     InductionDescriptor ID;
 549     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
 550         ID.getKind() == InductionDescriptor::IK_IntInduction) {
 551       addInductionPhi(&Phi, ID, AllowedExit);
 552       return true;
 553     } else {
 554       // Bail out for any Phi in the outer loop header that is not a supported
 555       // induction.
 556       LLVM_DEBUG(
 557           dbgs()
 558           << "LV: Found unsupported PHI for outer loop vectorization.\n");
 559       return false;
 560     }
 561   };
 562
 563   if (llvm::all_of(Header->phis(), isSupportedPhi))
 564     return true;
 565   else
 566     return false;
 567 }
 568
 569 bool LoopVectorizationLegality::canVectorizeInstrs() {
 570   BasicBlock *Header = TheLoop->getHeader();
 571
 572   // Look for the attribute signaling the absence of NaNs.
 573   Function &F = *Header->getParent();
 574   HasFunNoNaNAttr =
 575       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
 576
 577   // For each block in the loop.
 578   for (BasicBlock *BB : TheLoop->blocks()) {
 579     // Scan the instructions in the block and look for hazards.
 580     for (Instruction &I : *BB) {
 581       if (auto *Phi = dyn_cast<PHINode>(&I)) {
 582         Type *PhiTy = Phi->getType();
 583         // Check that this PHI type is allowed.
 584         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
 585             !PhiTy->isPointerTy()) {
 586           reportVectorizationFailure("Found a non-int non-pointer PHI",
 587                                      "loop control flow is not understood by vectorizer",
 588                                      "CFGNotUnderstood", ORE, TheLoop);
 589           return false;
 590         }
 591
 592         // If this PHINode is not in the header block, then we know that we
 593         // can convert it to select during if-conversion. No need to check if
 594         // the PHIs in this block are induction or reduction variables.
 595         if (BB != Header) {
 596           // Non-header phi nodes that have outside uses can be vectorized. Add
 597           // them to the list of allowed exits.
 598           // Unsafe cyclic dependencies with header phis are identified during
 599           // legalization for reduction, induction and first order
 600           // recurrences.
 601           AllowedExit.insert(&I);
 602           continue;
 603         }
 604
 605         // We only allow if-converted PHIs with exactly two incoming values.
 606         if (Phi->getNumIncomingValues() != 2) {
 607           reportVectorizationFailure("Found an invalid PHI",
 608               "loop control flow is not understood by vectorizer",
 609               "CFGNotUnderstood", ORE, TheLoop, Phi);
 610           return false;
 611         }
 612
 613         RecurrenceDescriptor RedDes;
 614         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
 615                                                  DT)) {
 616           if (RedDes.hasUnsafeAlgebra())
 617             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
 618           AllowedExit.insert(RedDes.getLoopExitInstr());
 619           Reductions[Phi] = RedDes;
 620           continue;
 621         }
 622
 623         // TODO: Instead of recording the AllowedExit, it would be good to record the
 624         // complementary set: NotAllowedExit. These include (but may not be
 625         // limited to):
 626         // 1. Reduction phis as they represent the one-before-last value, which
 627         // is not available when vectorized
 628         // 2. Induction phis and increment when SCEV predicates cannot be used
 629         // outside the loop - see addInductionPhi
 630         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
 631         // outside the loop - see call to hasOutsideLoopUser in the non-phi
 632         // handling below
 633         // 4. FirstOrderRecurrence phis that can possibly be handled by
 634         // extraction.
 635         // By recording these, we can then reason about ways to vectorize each
 636         // of these NotAllowedExit.
 637         InductionDescriptor ID;
 638         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
 639           addInductionPhi(Phi, ID, AllowedExit);
 640           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
 641             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
 642           continue;
 643         }
 644
 645         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
 646                                                          SinkAfter, DT)) {
 647           FirstOrderRecurrences.insert(Phi);
 648           continue;
 649         }
 650
 651         // As a last resort, coerce the PHI to a AddRec expression
 652         // and re-try classifying it a an induction PHI.
 653         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
 654           addInductionPhi(Phi, ID, AllowedExit);
 655           continue;
 656         }
 657
 658         reportVectorizationFailure("Found an unidentified PHI",
 659             "value that could not be identified as "
 660             "reduction is used outside the loop",
 661             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
 662         return false;
 663       } // end of PHI handling
 664
 665       // We handle calls that:
 666       //   * Are debug info intrinsics.
 667       //   * Have a mapping to an IR intrinsic.
 668       //   * Have a vector version available.
 669       auto *CI = dyn_cast<CallInst>(&I);
 670       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
 671           !isa<DbgInfoIntrinsic>(CI) &&
 672           !(CI->getCalledFunction() && TLI &&
 673             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
 674         // If the call is a recognized math libary call, it is likely that
 675         // we can vectorize it given loosened floating-point constraints.
 676         LibFunc Func;
 677         bool IsMathLibCall =
 678             TLI && CI->getCalledFunction() &&
 679             CI->getType()->isFloatingPointTy() &&
 680             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
 681             TLI->hasOptimizedCodeGen(Func);
 682
 683         if (IsMathLibCall) {
 684           // TODO: Ideally, we should not use clang-specific language here,
 685           // but it's hard to provide meaningful yet generic advice.
 686           // Also, should this be guarded by allowExtraAnalysis() and/or be part
 687           // of the returned info from isFunctionVectorizable()?
 688           reportVectorizationFailure("Found a non-intrinsic callsite",
 689               "library call cannot be vectorized. "
 690               "Try compiling with -fno-math-errno, -ffast-math, "
 691               "or similar flags",
 692               "CantVectorizeLibcall", ORE, TheLoop, CI);
 693         } else {
 694           reportVectorizationFailure("Found a non-intrinsic callsite",
 695                                      "call instruction cannot be vectorized",
 696                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
 697         }
 698         return false;
 699       }
 700
 701       // Some intrinsics have scalar arguments and should be same in order for
 702       // them to be vectorized (i.e. loop invariant).
 703       if (CI) {
 704         auto *SE = PSE.getSE();
 705         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
 706         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
 707           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
 708             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
 709               reportVectorizationFailure("Found unvectorizable intrinsic",
 710                   "intrinsic instruction cannot be vectorized",
 711                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
 712               return false;
 713             }
 714           }
 715       }
 716
 717       // Check that the instruction return type is vectorizable.
 718       // Also, we can't vectorize extractelement instructions.
 719       if ((!VectorType::isValidElementType(I.getType()) &&
 720            !I.getType()->isVoidTy()) ||
 721           isa<ExtractElementInst>(I)) {
 722         reportVectorizationFailure("Found unvectorizable type",
 723             "instruction return type cannot be vectorized",
 724             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
 725         return false;
 726       }
 727
 728       // Check that the stored type is vectorizable.
 729       if (auto *ST = dyn_cast<StoreInst>(&I)) {
 730         Type *T = ST->getValueOperand()->getType();
 731         if (!VectorType::isValidElementType(T)) {
 732           reportVectorizationFailure("Store instruction cannot be vectorized",
 733                                      "store instruction cannot be vectorized",
 734                                      "CantVectorizeStore", ORE, TheLoop, ST);
 735           return false;
 736         }
 737
 738         // For nontemporal stores, check that a nontemporal vector version is
 739         // supported on the target.
 740         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
 741           // Arbitrarily try a vector of 2 elements.
 742           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
 743           assert(VecTy && "did not find vectorized version of stored type");
 744           const MaybeAlign Alignment = getLoadStoreAlignment(ST);
 745           assert(Alignment && "Alignment should be set");
 746           if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
 747             reportVectorizationFailure(
 748                 "nontemporal store instruction cannot be vectorized",
 749                 "nontemporal store instruction cannot be vectorized",
 750                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
 751             return false;
 752           }
 753         }
 754
 755       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
 756         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
 757           // For nontemporal loads, check that a nontemporal vector version is
 758           // supported on the target (arbitrarily try a vector of 2 elements).
 759           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
 760           assert(VecTy && "did not find vectorized version of load type");
 761           const MaybeAlign Alignment = getLoadStoreAlignment(LD);
 762           assert(Alignment && "Alignment should be set");
 763           if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
 764             reportVectorizationFailure(
 765                 "nontemporal load instruction cannot be vectorized",
 766                 "nontemporal load instruction cannot be vectorized",
 767                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
 768             return false;
 769           }
 770         }
 771
 772         // FP instructions can allow unsafe algebra, thus vectorizable by
 773         // non-IEEE-754 compliant SIMD units.
 774         // This applies to floating-point math operations and calls, not memory
 775         // operations, shuffles, or casts, as they don't change precision or
 776         // semantics.
 777       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
 778                  !I.isFast()) {
 779         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
 780         Hints->setPotentiallyUnsafe();
 781       }
 782
 783       // Reduction instructions are allowed to have exit users.
 784       // All other instructions must not have external users.
 785       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
 786         // We can safely vectorize loops where instructions within the loop are
 787         // used outside the loop only if the SCEV predicates within the loop is
 788         // same as outside the loop. Allowing the exit means reusing the SCEV
 789         // outside the loop.
 790         if (PSE.getUnionPredicate().isAlwaysTrue()) {
 791           AllowedExit.insert(&I);
 792           continue;
 793         }
 794         reportVectorizationFailure("Value cannot be used outside the loop",
 795                                    "value cannot be used outside the loop",
 796                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
 797         return false;
 798       }
 799     } // next instr.
 800   }
 801
 802   if (!PrimaryInduction) {
 803     if (Inductions.empty()) {
 804       reportVectorizationFailure("Did not find one integer induction var",
 805           "loop induction variable could not be identified",
 806           "NoInductionVariable", ORE, TheLoop);
 807       return false;
 808     } else if (!WidestIndTy) {
 809       reportVectorizationFailure("Did not find one integer induction var",
 810           "integer loop induction variable could not be identified",
 811           "NoIntegerInductionVariable", ORE, TheLoop);
 812       return false;
 813     } else {
 814       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
 815     }
 816   }
 817
 818   // Now we know the widest induction type, check if our found induction
 819   // is the same size. If it's not, unset it here and InnerLoopVectorizer
 820   // will create another.
 821   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
 822     PrimaryInduction = nullptr;
 823
 824   return true;
 825 }
 826
 827 bool LoopVectorizationLegality::canVectorizeMemory() {
 828   LAI = &(*GetLAA)(*TheLoop);
 829   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
 830   if (LAR) {
 831     ORE->emit([&]() {
 832       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
 833                                         "loop not vectorized: ", *LAR);
 834     });
 835   }
 836   if (!LAI->canVectorizeMemory())
 837     return false;
 838
 839   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
 840     reportVectorizationFailure("Stores to a uniform address",
 841         "write to a loop invariant address could not be vectorized",
 842         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
 843     return false;
 844   }
 845   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
 846   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
 847
 848   return true;
 849 }
 850
 851 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
 852   Value *In0 = const_cast<Value *>(V);
 853   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
 854   if (!PN)
 855     return false;
 856
 857   return Inductions.count(PN);
 858 }
 859
 860 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
 861   auto *Inst = dyn_cast<Instruction>(V);
 862   return (Inst && InductionCastsToIgnore.count(Inst));
 863 }
 864
 865 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
 866   return isInductionPhi(V) || isCastedInductionVariable(V);
 867 }
 868
 869 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
 870   return FirstOrderRecurrences.count(Phi);
 871 }
 872
 873 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
 874   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 875 }
 876
 877 bool LoopVectorizationLegality::blockCanBePredicated(
 878     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
 879   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 880
 881   for (Instruction &I : *BB) {
 882     // Check that we don't have a constant expression that can trap as operand.
 883     for (Value *Operand : I.operands()) {
 884       if (auto *C = dyn_cast<Constant>(Operand))
 885         if (C->canTrap())
 886           return false;
 887     }
 888     // We might be able to hoist the load.
 889     if (I.mayReadFromMemory()) {
 890       auto *LI = dyn_cast<LoadInst>(&I);
 891       if (!LI)
 892         return false;
 893       if (!SafePtrs.count(LI->getPointerOperand())) {
 894         // !llvm.mem.parallel_loop_access implies if-conversion safety.
 895         // Otherwise, record that the load needs (real or emulated) masking
 896         // and let the cost model decide.
 897         if (!IsAnnotatedParallel || PreserveGuards)
 898           MaskedOp.insert(LI);
 899         continue;
 900       }
 901     }
 902
 903     if (I.mayWriteToMemory()) {
 904       auto *SI = dyn_cast<StoreInst>(&I);
 905       if (!SI)
 906         return false;
 907       // Predicated store requires some form of masking:
 908       // 1) masked store HW instruction,
 909       // 2) emulation via load-blend-store (only if safe and legal to do so,
 910       //    be aware on the race conditions), or
 911       // 3) element-by-element predicate check and scalar store.
 912       MaskedOp.insert(SI);
 913       continue;
 914     }
 915     if (I.mayThrow())
 916       return false;
 917   }
 918
 919   return true;
 920 }
 921
 922 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
 923   if (!EnableIfConversion) {
 924     reportVectorizationFailure("If-conversion is disabled",
 925                                "if-conversion is disabled",
 926                                "IfConversionDisabled",
 927                                ORE, TheLoop);
 928     return false;
 929   }
 930
 931   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
 932
 933   // A list of pointers which are known to be dereferenceable within scope of
 934   // the loop body for each iteration of the loop which executes.  That is,
 935   // the memory pointed to can be dereferenced (with the access size implied by
 936   // the value's type) unconditionally within the loop header without
 937   // introducing a new fault.
 938   SmallPtrSet<Value *, 8> SafePointes;
 939
 940   // Collect safe addresses.
 941   for (BasicBlock *BB : TheLoop->blocks()) {
 942     if (!blockNeedsPredication(BB)) {
 943       for (Instruction &I : *BB)
 944         if (auto *Ptr = getLoadStorePointerOperand(&I))
 945           SafePointes.insert(Ptr);
 946       continue;
 947     }
 948
 949     // For a block which requires predication, a address may be safe to access
 950     // in the loop w/o predication if we can prove dereferenceability facts
 951     // sufficient to ensure it'll never fault within the loop. For the moment,
 952     // we restrict this to loads; stores are more complicated due to
 953     // concurrency restrictions.
 954     ScalarEvolution &SE = *PSE.getSE();
 955     for (Instruction &I : *BB) {
 956       LoadInst *LI = dyn_cast<LoadInst>(&I);
 957       if (LI && !mustSuppressSpeculation(*LI) &&
 958           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
 959         SafePointes.insert(LI->getPointerOperand());
 960     }
 961   }
 962
 963   // Collect the blocks that need predication.
 964   BasicBlock *Header = TheLoop->getHeader();
 965   for (BasicBlock *BB : TheLoop->blocks()) {
 966     // We don't support switch statements inside loops.
 967     if (!isa<BranchInst>(BB->getTerminator())) {
 968       reportVectorizationFailure("Loop contains a switch statement",
 969                                  "loop contains a switch statement",
 970                                  "LoopContainsSwitch", ORE, TheLoop,
 971                                  BB->getTerminator());
 972       return false;
 973     }
 974
 975     // We must be able to predicate all blocks that need to be predicated.
 976     if (blockNeedsPredication(BB)) {
 977       if (!blockCanBePredicated(BB, SafePointes)) {
 978         reportVectorizationFailure(
 979             "Control flow cannot be substituted for a select",
 980             "control flow cannot be substituted for a select",
 981             "NoCFGForSelect", ORE, TheLoop,
 982             BB->getTerminator());
 983         return false;
 984       }
 985     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
 986       reportVectorizationFailure(
 987           "Control flow cannot be substituted for a select",
 988           "control flow cannot be substituted for a select",
 989           "NoCFGForSelect", ORE, TheLoop,
 990           BB->getTerminator());
 991       return false;
 992     }
 993   }
 994
 995   // We can if-convert this loop.
 996   return true;
 997 }
 998
 999 // Helper function to canVectorizeLoopNestCFG.
1000 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1001                                                     bool UseVPlanNativePath) {
1002   assert((UseVPlanNativePath || Lp->empty()) &&
1003          "VPlan-native path is not enabled.");
1004
1005   // TODO: ORE should be improved to show more accurate information when an
1006   // outer loop can't be vectorized because a nested loop is not understood or
1007   // legal. Something like: "outer_loop_location: loop not vectorized:
1008   // (inner_loop_location) loop control flow is not understood by vectorizer".
1009
1010   // Store the result and return it at the end instead of exiting early, in case
1011   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1012   bool Result = true;
1013   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1014
1015   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1016   // be canonicalized.
1017   if (!Lp->getLoopPreheader()) {
1018     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1019         "loop control flow is not understood by vectorizer",
1020         "CFGNotUnderstood", ORE, TheLoop);
1021     if (DoExtraAnalysis)
1022       Result = false;
1023     else
1024       return false;
1025   }
1026
1027   // We must have a single backedge.
1028   if (Lp->getNumBackEdges() != 1) {
1029     reportVectorizationFailure("The loop must have a single backedge",
1030         "loop control flow is not understood by vectorizer",
1031         "CFGNotUnderstood", ORE, TheLoop);
1032     if (DoExtraAnalysis)
1033       Result = false;
1034     else
1035       return false;
1036   }
1037
1038   // We must have a single exiting block.
1039   if (!Lp->getExitingBlock()) {
1040     reportVectorizationFailure("The loop must have an exiting block",
1041         "loop control flow is not understood by vectorizer",
1042         "CFGNotUnderstood", ORE, TheLoop);
1043     if (DoExtraAnalysis)
1044       Result = false;
1045     else
1046       return false;
1047   }
1048
1049   // We only handle bottom-tested loops, i.e. loop in which the condition is
1050   // checked at the end of each iteration. With that we can assume that all
1051   // instructions in the loop are executed the same number of times.
1052   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1053     reportVectorizationFailure("The exiting block is not the loop latch",
1054         "loop control flow is not understood by vectorizer",
1055         "CFGNotUnderstood", ORE, TheLoop);
1056     if (DoExtraAnalysis)
1057       Result = false;
1058     else
1059       return false;
1060   }
1061
1062   return Result;
1063 }
1064
1065 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1066     Loop *Lp, bool UseVPlanNativePath) {
1067   // Store the result and return it at the end instead of exiting early, in case
1068   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1069   bool Result = true;
1070   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1071   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1072     if (DoExtraAnalysis)
1073       Result = false;
1074     else
1075       return false;
1076   }
1077
1078   // Recursively check whether the loop control flow of nested loops is
1079   // understood.
1080   for (Loop *SubLp : *Lp)
1081     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1082       if (DoExtraAnalysis)
1083         Result = false;
1084       else
1085         return false;
1086     }
1087
1088   return Result;
1089 }
1090
1091 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1092   // Store the result and return it at the end instead of exiting early, in case
1093   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1094   bool Result = true;
1095
1096   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1097   // Check whether the loop-related control flow in the loop nest is expected by
1098   // vectorizer.
1099   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1100     if (DoExtraAnalysis)
1101       Result = false;
1102     else
1103       return false;
1104   }
1105
1106   // We need to have a loop header.
1107   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1108                     << '\n');
1109
1110   // Specific checks for outer loops. We skip the remaining legal checks at this
1111   // point because they don't support outer loops.
1112   if (!TheLoop->empty()) {
1113     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1114
1115     if (!canVectorizeOuterLoop()) {
1116       reportVectorizationFailure("Unsupported outer loop",
1117                                  "unsupported outer loop",
1118                                  "UnsupportedOuterLoop",
1119                                  ORE, TheLoop);
1120       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1121       // outer loops.
1122       return false;
1123     }
1124
1125     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1126     return Result;
1127   }
1128
1129   assert(TheLoop->empty() && "Inner loop expected.");
1130   // Check if we can if-convert non-single-bb loops.
1131   unsigned NumBlocks = TheLoop->getNumBlocks();
1132   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1133     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1134     if (DoExtraAnalysis)
1135       Result = false;
1136     else
1137       return false;
1138   }
1139
1140   // Check if we can vectorize the instructions and CFG in this loop.
1141   if (!canVectorizeInstrs()) {
1142     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1143     if (DoExtraAnalysis)
1144       Result = false;
1145     else
1146       return false;
1147   }
1148
1149   // Go over each instruction and look at memory deps.
1150   if (!canVectorizeMemory()) {
1151     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1152     if (DoExtraAnalysis)
1153       Result = false;
1154     else
1155       return false;
1156   }
1157
1158   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1159                     << (LAI->getRuntimePointerChecking()->Need
1160                             ? " (with a runtime bound check)"
1161                             : "")
1162                     << "!\n");
1163
1164   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1165   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1166     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1167
1168   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1169     reportVectorizationFailure("Too many SCEV checks needed",
1170         "Too many SCEV assumptions need to be made and checked at runtime",
1171         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1172     if (DoExtraAnalysis)
1173       Result = false;
1174     else
1175       return false;
1176   }
1177
1178   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1179   // we can vectorize, and at this point we don't have any other mem analysis
1180   // which may limit our maximum vectorization factor, so just return true with
1181   // no restrictions.
1182   return Result;
1183 }
1184
1185 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1186
1187   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1188
1189   if (!PrimaryInduction) {
1190     reportVectorizationFailure(
1191         "No primary induction, cannot fold tail by masking",
1192         "Missing a primary induction variable in the loop, which is "
1193         "needed in order to fold tail by masking as required.",
1194         "NoPrimaryInduction", ORE, TheLoop);
1195     return false;
1196   }
1197
1198   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1199
1200   for (auto &Reduction : *getReductionVars())
1201     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1202
1203   // TODO: handle non-reduction outside users when tail is folded by masking.
1204   for (auto *AE : AllowedExit) {
1205     // Check that all users of allowed exit values are inside the loop or
1206     // are the live-out of a reduction.
1207     if (ReductionLiveOuts.count(AE))
1208       continue;
1209     for (User *U : AE->users()) {
1210       Instruction *UI = cast<Instruction>(U);
1211       if (TheLoop->contains(UI))
1212         continue;
1213       reportVectorizationFailure(
1214           "Cannot fold tail by masking, loop has an outside user for",
1215           "Cannot fold tail by masking in the presence of live outs.",
1216           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1217       return false;
1218     }
1219   }
1220
1221   // The list of pointers that we can safely read and write to remains empty.
1222   SmallPtrSet<Value *, 8> SafePointers;
1223
1224   // Check and mark all blocks for predication, including those that ordinarily
1225   // do not need predication such as the header block.
1226   for (BasicBlock *BB : TheLoop->blocks()) {
1227     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1228       reportVectorizationFailure(
1229           "Cannot fold tail by masking as required",
1230           "control flow cannot be substituted for a select",
1231           "NoCFGForSelect", ORE, TheLoop,
1232           BB->getTerminator());
1233       return false;
1234     }
1235   }
1236
1237   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1238   return true;
1239 }
1240
1241 } // namespace llvm