lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

   1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides loop vectorization legality analysis. Original code
  10 // resided in LoopVectorize.cpp for a long time.
  11 //
  12 // At this point, it is implemented as a utility class, not as an analysis
  13 // pass. It should be easy to create an analysis pass around it if there
  14 // is a need (but D45420 needs to happen first).
  15 //
  16 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
  17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
  18 #include "llvm/Analysis/VectorUtils.h"
  19 #include "llvm/IR/IntrinsicInst.h"
  20
  21 using namespace llvm;
  22
  23 #define LV_NAME "loop-vectorize"
  24 #define DEBUG_TYPE LV_NAME
  25
  26 extern cl::opt<bool> EnableVPlanPredication;
  27
  28 static cl::opt<bool>
  29     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
  30                        cl::desc("Enable if-conversion during vectorization."));
  31
  32 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
  33     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
  34     cl::desc("The maximum allowed number of runtime memory checks with a "
  35              "vectorize(enable) pragma."));
  36
  37 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
  38     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
  39     cl::desc("The maximum number of SCEV checks allowed."));
  40
  41 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
  42     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
  43     cl::desc("The maximum number of SCEV checks allowed with a "
  44              "vectorize(enable) pragma"));
  45
  46 /// Maximum vectorization interleave count.
  47 static const unsigned MaxInterleaveFactor = 16;
  48
  49 namespace llvm {
  50
  51 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
  52   switch (Kind) {
  53   case HK_WIDTH:
  54     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
  55   case HK_UNROLL:
  56     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
  57   case HK_FORCE:
  58     return (Val <= 1);
  59   case HK_ISVECTORIZED:
  60   case HK_PREDICATE:
  61     return (Val == 0 || Val == 1);
  62   }
  63   return false;
  64 }
  65
  66 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
  67                                        bool InterleaveOnlyWhenForced,
  68                                        OptimizationRemarkEmitter &ORE)
  69     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
  70       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
  71       Force("vectorize.enable", FK_Undefined, HK_FORCE),
  72       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
  73       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
  74       ORE(ORE) {
  75   // Populate values with existing loop metadata.
  76   getHintsFromMetadata();
  77
  78   // force-vector-interleave overrides DisableInterleaving.
  79   if (VectorizerParams::isInterleaveForced())
  80     Interleave.Value = VectorizerParams::VectorizationInterleave;
  81
  82   if (IsVectorized.Value != 1)
  83     // If the vectorization width and interleaving count are both 1 then
  84     // consider the loop to have been already vectorized because there's
  85     // nothing more that we can do.
  86     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
  87   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
  88              << "LV: Interleaving disabled by the pass manager\n");
  89 }
  90
  91 void LoopVectorizeHints::setAlreadyVectorized() {
  92   LLVMContext &Context = TheLoop->getHeader()->getContext();
  93
  94   MDNode *IsVectorizedMD = MDNode::get(
  95       Context,
  96       {MDString::get(Context, "llvm.loop.isvectorized"),
  97        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
  98   MDNode *LoopID = TheLoop->getLoopID();
  99   MDNode *NewLoopID =
 100       makePostTransformationMetadata(Context, LoopID,
 101                                      {Twine(Prefix(), "vectorize.").str(),
 102                                       Twine(Prefix(), "interleave.").str()},
 103                                      {IsVectorizedMD});
 104   TheLoop->setLoopID(NewLoopID);
 105
 106   // Update internal cache.
 107   IsVectorized.Value = 1;
 108 }
 109
 110 bool LoopVectorizeHints::allowVectorization(
 111     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
 112   if (getForce() == LoopVectorizeHints::FK_Disabled) {
 113     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
 114     emitRemarkWithHints();
 115     return false;
 116   }
 117
 118   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
 119     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
 120     emitRemarkWithHints();
 121     return false;
 122   }
 123
 124   if (getIsVectorized() == 1) {
 125     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
 126     // FIXME: Add interleave.disable metadata. This will allow
 127     // vectorize.disable to be used without disabling the pass and errors
 128     // to differentiate between disabled vectorization and a width of 1.
 129     ORE.emit([&]() {
 130       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
 131                                         "AllDisabled", L->getStartLoc(),
 132                                         L->getHeader())
 133              << "loop not vectorized: vectorization and interleaving are "
 134                 "explicitly disabled, or the loop has already been "
 135                 "vectorized";
 136     });
 137     return false;
 138   }
 139
 140   return true;
 141 }
 142
 143 void LoopVectorizeHints::emitRemarkWithHints() const {
 144   using namespace ore;
 145
 146   ORE.emit([&]() {
 147     if (Force.Value == LoopVectorizeHints::FK_Disabled)
 148       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
 149                                       TheLoop->getStartLoc(),
 150                                       TheLoop->getHeader())
 151              << "loop not vectorized: vectorization is explicitly disabled";
 152     else {
 153       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
 154                                  TheLoop->getStartLoc(), TheLoop->getHeader());
 155       R << "loop not vectorized";
 156       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
 157         R << " (Force=" << NV("Force", true);
 158         if (Width.Value != 0)
 159           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
 160         if (Interleave.Value != 0)
 161           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
 162         R << ")";
 163       }
 164       return R;
 165     }
 166   });
 167 }
 168
 169 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
 170   if (getWidth() == 1)
 171     return LV_NAME;
 172   if (getForce() == LoopVectorizeHints::FK_Disabled)
 173     return LV_NAME;
 174   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
 175     return LV_NAME;
 176   return OptimizationRemarkAnalysis::AlwaysPrint;
 177 }
 178
 179 void LoopVectorizeHints::getHintsFromMetadata() {
 180   MDNode *LoopID = TheLoop->getLoopID();
 181   if (!LoopID)
 182     return;
 183
 184   // First operand should refer to the loop id itself.
 185   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
 186   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
 187
 188   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
 189     const MDString *S = nullptr;
 190     SmallVector<Metadata *, 4> Args;
 191
 192     // The expected hint is either a MDString or a MDNode with the first
 193     // operand a MDString.
 194     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
 195       if (!MD || MD->getNumOperands() == 0)
 196         continue;
 197       S = dyn_cast<MDString>(MD->getOperand(0));
 198       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
 199         Args.push_back(MD->getOperand(i));
 200     } else {
 201       S = dyn_cast<MDString>(LoopID->getOperand(i));
 202       assert(Args.size() == 0 && "too many arguments for MDString");
 203     }
 204
 205     if (!S)
 206       continue;
 207
 208     // Check if the hint starts with the loop metadata prefix.
 209     StringRef Name = S->getString();
 210     if (Args.size() == 1)
 211       setHint(Name, Args[0]);
 212   }
 213 }
 214
 215 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
 216   if (!Name.startswith(Prefix()))
 217     return;
 218   Name = Name.substr(Prefix().size(), StringRef::npos);
 219
 220   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
 221   if (!C)
 222     return;
 223   unsigned Val = C->getZExtValue();
 224
 225   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
 226   for (auto H : Hints) {
 227     if (Name == H->Name) {
 228       if (H->validate(Val))
 229         H->Value = Val;
 230       else
 231         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
 232       break;
 233     }
 234   }
 235 }
 236
 237 bool LoopVectorizationRequirements::doesNotMeet(
 238     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
 239   const char *PassName = Hints.vectorizeAnalysisPassName();
 240   bool Failed = false;
 241   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
 242     ORE.emit([&]() {
 243       return OptimizationRemarkAnalysisFPCommute(
 244                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
 245                  UnsafeAlgebraInst->getParent())
 246              << "loop not vectorized: cannot prove it is safe to reorder "
 247                 "floating-point operations";
 248     });
 249     Failed = true;
 250   }
 251
 252   // Test if runtime memcheck thresholds are exceeded.
 253   bool PragmaThresholdReached =
 254       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
 255   bool ThresholdReached =
 256       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
 257   if ((ThresholdReached && !Hints.allowReordering()) ||
 258       PragmaThresholdReached) {
 259     ORE.emit([&]() {
 260       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
 261                                                 L->getStartLoc(),
 262                                                 L->getHeader())
 263              << "loop not vectorized: cannot prove it is safe to reorder "
 264                 "memory operations";
 265     });
 266     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
 267     Failed = true;
 268   }
 269
 270   return Failed;
 271 }
 272
 273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
 274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
 275 // executing the inner loop will execute the same iterations). This check is
 276 // very constrained for now but it will be relaxed in the future. \p Lp is
 277 // considered uniform if it meets all the following conditions:
 278 //   1) it has a canonical IV (starting from 0 and with stride 1),
 279 //   2) its latch terminator is a conditional branch and,
 280 //   3) its latch condition is a compare instruction whose operands are the
 281 //      canonical IV and an OuterLp invariant.
 282 // This check doesn't take into account the uniformity of other conditions not
 283 // related to the loop latch because they don't affect the loop uniformity.
 284 //
 285 // NOTE: We decided to keep all these checks and its associated documentation
 286 // together so that we can easily have a picture of the current supported loop
 287 // nests. However, some of the current checks don't depend on \p OuterLp and
 288 // would be redundantly executed for each \p Lp if we invoked this function for
 289 // different candidate outer loops. This is not the case for now because we
 290 // don't currently have the infrastructure to evaluate multiple candidate outer
 291 // loops and \p OuterLp will be a fixed parameter while we only support explicit
 292 // outer loop vectorization. It's also very likely that these checks go away
 293 // before introducing the aforementioned infrastructure. However, if this is not
 294 // the case, we should move the \p OuterLp independent checks to a separate
 295 // function that is only executed once for each \p Lp.
 296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
 297   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
 298
 299   // If Lp is the outer loop, it's uniform by definition.
 300   if (Lp == OuterLp)
 301     return true;
 302   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
 303
 304   // 1.
 305   PHINode *IV = Lp->getCanonicalInductionVariable();
 306   if (!IV) {
 307     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
 308     return false;
 309   }
 310
 311   // 2.
 312   BasicBlock *Latch = Lp->getLoopLatch();
 313   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
 314   if (!LatchBr || LatchBr->isUnconditional()) {
 315     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
 316     return false;
 317   }
 318
 319   // 3.
 320   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
 321   if (!LatchCmp) {
 322     LLVM_DEBUG(
 323         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
 324     return false;
 325   }
 326
 327   Value *CondOp0 = LatchCmp->getOperand(0);
 328   Value *CondOp1 = LatchCmp->getOperand(1);
 329   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
 330   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
 331       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
 332     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
 333     return false;
 334   }
 335
 336   return true;
 337 }
 338
 339 // Return true if \p Lp and all its nested loops are uniform with regard to \p
 340 // OuterLp.
 341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
 342   if (!isUniformLoop(Lp, OuterLp))
 343     return false;
 344
 345   // Check if nested loops are uniform.
 346   for (Loop *SubLp : *Lp)
 347     if (!isUniformLoopNest(SubLp, OuterLp))
 348       return false;
 349
 350   return true;
 351 }
 352
 353 /// Check whether it is safe to if-convert this phi node.
 354 ///
 355 /// Phi nodes with constant expressions that can trap are not safe to if
 356 /// convert.
 357 static bool canIfConvertPHINodes(BasicBlock *BB) {
 358   for (PHINode &Phi : BB->phis()) {
 359     for (Value *V : Phi.incoming_values())
 360       if (auto *C = dyn_cast<Constant>(V))
 361         if (C->canTrap())
 362           return false;
 363   }
 364   return true;
 365 }
 366
 367 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
 368   if (Ty->isPointerTy())
 369     return DL.getIntPtrType(Ty);
 370
 371   // It is possible that char's or short's overflow when we ask for the loop's
 372   // trip count, work around this by changing the type size.
 373   if (Ty->getScalarSizeInBits() < 32)
 374     return Type::getInt32Ty(Ty->getContext());
 375
 376   return Ty;
 377 }
 378
 379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
 380   Ty0 = convertPointerToIntegerType(DL, Ty0);
 381   Ty1 = convertPointerToIntegerType(DL, Ty1);
 382   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
 383     return Ty0;
 384   return Ty1;
 385 }
 386
 387 /// Check that the instruction has outside loop users and is not an
 388 /// identified reduction variable.
 389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
 390                                SmallPtrSetImpl<Value *> &AllowedExit) {
 391   // Reductions, Inductions and non-header phis are allowed to have exit users. All
 392   // other instructions must not have external users.
 393   if (!AllowedExit.count(Inst))
 394     // Check that all of the users of the loop are inside the BB.
 395     for (User *U : Inst->users()) {
 396       Instruction *UI = cast<Instruction>(U);
 397       // This user may be a reduction exit value.
 398       if (!TheLoop->contains(UI)) {
 399         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
 400         return true;
 401       }
 402     }
 403   return false;
 404 }
 405
 406 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
 407   const ValueToValueMap &Strides =
 408       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
 409
 410   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
 411   if (Stride == 1 || Stride == -1)
 412     return Stride;
 413   return 0;
 414 }
 415
 416 bool LoopVectorizationLegality::isUniform(Value *V) {
 417   return LAI->isUniform(V);
 418 }
 419
 420 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
 421   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
 422   // Store the result and return it at the end instead of exiting early, in case
 423   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
 424   bool Result = true;
 425   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
 426
 427   for (BasicBlock *BB : TheLoop->blocks()) {
 428     // Check whether the BB terminator is a BranchInst. Any other terminator is
 429     // not supported yet.
 430     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
 431     if (!Br) {
 432       reportVectorizationFailure("Unsupported basic block terminator",
 433           "loop control flow is not understood by vectorizer",
 434           "CFGNotUnderstood", ORE, TheLoop);
 435       if (DoExtraAnalysis)
 436         Result = false;
 437       else
 438         return false;
 439     }
 440
 441     // Check whether the BranchInst is a supported one. Only unconditional
 442     // branches, conditional branches with an outer loop invariant condition or
 443     // backedges are supported.
 444     // FIXME: We skip these checks when VPlan predication is enabled as we
 445     // want to allow divergent branches. This whole check will be removed
 446     // once VPlan predication is on by default.
 447     if (!EnableVPlanPredication && Br && Br->isConditional() &&
 448         !TheLoop->isLoopInvariant(Br->getCondition()) &&
 449         !LI->isLoopHeader(Br->getSuccessor(0)) &&
 450         !LI->isLoopHeader(Br->getSuccessor(1))) {
 451       reportVectorizationFailure("Unsupported conditional branch",
 452           "loop control flow is not understood by vectorizer",
 453           "CFGNotUnderstood", ORE, TheLoop);
 454       if (DoExtraAnalysis)
 455         Result = false;
 456       else
 457         return false;
 458     }
 459   }
 460
 461   // Check whether inner loops are uniform. At this point, we only support
 462   // simple outer loops scenarios with uniform nested loops.
 463   if (!isUniformLoopNest(TheLoop /*loop nest*/,
 464                          TheLoop /*context outer loop*/)) {
 465     reportVectorizationFailure("Outer loop contains divergent loops",
 466         "loop control flow is not understood by vectorizer",
 467         "CFGNotUnderstood", ORE, TheLoop);
 468     if (DoExtraAnalysis)
 469       Result = false;
 470     else
 471       return false;
 472   }
 473
 474   // Check whether we are able to set up outer loop induction.
 475   if (!setupOuterLoopInductions()) {
 476     reportVectorizationFailure("Unsupported outer loop Phi(s)",
 477                                "Unsupported outer loop Phi(s)",
 478                                "UnsupportedPhi", ORE, TheLoop);
 479     if (DoExtraAnalysis)
 480       Result = false;
 481     else
 482       return false;
 483   }
 484
 485   return Result;
 486 }
 487
 488 void LoopVectorizationLegality::addInductionPhi(
 489     PHINode *Phi, const InductionDescriptor &ID,
 490     SmallPtrSetImpl<Value *> &AllowedExit) {
 491   Inductions[Phi] = ID;
 492
 493   // In case this induction also comes with casts that we know we can ignore
 494   // in the vectorized loop body, record them here. All casts could be recorded
 495   // here for ignoring, but suffices to record only the first (as it is the
 496   // only one that may bw used outside the cast sequence).
 497   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
 498   if (!Casts.empty())
 499     InductionCastsToIgnore.insert(*Casts.begin());
 500
 501   Type *PhiTy = Phi->getType();
 502   const DataLayout &DL = Phi->getModule()->getDataLayout();
 503
 504   // Get the widest type.
 505   if (!PhiTy->isFloatingPointTy()) {
 506     if (!WidestIndTy)
 507       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
 508     else
 509       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
 510   }
 511
 512   // Int inductions are special because we only allow one IV.
 513   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
 514       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
 515       isa<Constant>(ID.getStartValue()) &&
 516       cast<Constant>(ID.getStartValue())->isNullValue()) {
 517
 518     // Use the phi node with the widest type as induction. Use the last
 519     // one if there are multiple (no good reason for doing this other
 520     // than it is expedient). We've checked that it begins at zero and
 521     // steps by one, so this is a canonical induction variable.
 522     if (!PrimaryInduction || PhiTy == WidestIndTy)
 523       PrimaryInduction = Phi;
 524   }
 525
 526   // Both the PHI node itself, and the "post-increment" value feeding
 527   // back into the PHI node may have external users.
 528   // We can allow those uses, except if the SCEVs we have for them rely
 529   // on predicates that only hold within the loop, since allowing the exit
 530   // currently means re-using this SCEV outside the loop (see PR33706 for more
 531   // details).
 532   if (PSE.getUnionPredicate().isAlwaysTrue()) {
 533     AllowedExit.insert(Phi);
 534     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
 535   }
 536
 537   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
 538 }
 539
 540 bool LoopVectorizationLegality::setupOuterLoopInductions() {
 541   BasicBlock *Header = TheLoop->getHeader();
 542
 543   // Returns true if a given Phi is a supported induction.
 544   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
 545     InductionDescriptor ID;
 546     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
 547         ID.getKind() == InductionDescriptor::IK_IntInduction) {
 548       addInductionPhi(&Phi, ID, AllowedExit);
 549       return true;
 550     } else {
 551       // Bail out for any Phi in the outer loop header that is not a supported
 552       // induction.
 553       LLVM_DEBUG(
 554           dbgs()
 555           << "LV: Found unsupported PHI for outer loop vectorization.\n");
 556       return false;
 557     }
 558   };
 559
 560   if (llvm::all_of(Header->phis(), isSupportedPhi))
 561     return true;
 562   else
 563     return false;
 564 }
 565
 566 bool LoopVectorizationLegality::canVectorizeInstrs() {
 567   BasicBlock *Header = TheLoop->getHeader();
 568
 569   // Look for the attribute signaling the absence of NaNs.
 570   Function &F = *Header->getParent();
 571   HasFunNoNaNAttr =
 572       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
 573
 574   // For each block in the loop.
 575   for (BasicBlock *BB : TheLoop->blocks()) {
 576     // Scan the instructions in the block and look for hazards.
 577     for (Instruction &I : *BB) {
 578       if (auto *Phi = dyn_cast<PHINode>(&I)) {
 579         Type *PhiTy = Phi->getType();
 580         // Check that this PHI type is allowed.
 581         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
 582             !PhiTy->isPointerTy()) {
 583           reportVectorizationFailure("Found a non-int non-pointer PHI",
 584                                      "loop control flow is not understood by vectorizer",
 585                                      "CFGNotUnderstood", ORE, TheLoop);
 586           return false;
 587         }
 588
 589         // If this PHINode is not in the header block, then we know that we
 590         // can convert it to select during if-conversion. No need to check if
 591         // the PHIs in this block are induction or reduction variables.
 592         if (BB != Header) {
 593           // Non-header phi nodes that have outside uses can be vectorized. Add
 594           // them to the list of allowed exits.
 595           // Unsafe cyclic dependencies with header phis are identified during
 596           // legalization for reduction, induction and first order
 597           // recurrences.
 598           AllowedExit.insert(&I);
 599           continue;
 600         }
 601
 602         // We only allow if-converted PHIs with exactly two incoming values.
 603         if (Phi->getNumIncomingValues() != 2) {
 604           reportVectorizationFailure("Found an invalid PHI",
 605               "loop control flow is not understood by vectorizer",
 606               "CFGNotUnderstood", ORE, TheLoop, Phi);
 607           return false;
 608         }
 609
 610         RecurrenceDescriptor RedDes;
 611         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
 612                                                  DT)) {
 613           if (RedDes.hasUnsafeAlgebra())
 614             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
 615           AllowedExit.insert(RedDes.getLoopExitInstr());
 616           Reductions[Phi] = RedDes;
 617           continue;
 618         }
 619
 620         // TODO: Instead of recording the AllowedExit, it would be good to record the
 621         // complementary set: NotAllowedExit. These include (but may not be
 622         // limited to):
 623         // 1. Reduction phis as they represent the one-before-last value, which
 624         // is not available when vectorized
 625         // 2. Induction phis and increment when SCEV predicates cannot be used
 626         // outside the loop - see addInductionPhi
 627         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
 628         // outside the loop - see call to hasOutsideLoopUser in the non-phi
 629         // handling below
 630         // 4. FirstOrderRecurrence phis that can possibly be handled by
 631         // extraction.
 632         // By recording these, we can then reason about ways to vectorize each
 633         // of these NotAllowedExit.
 634         InductionDescriptor ID;
 635         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
 636           addInductionPhi(Phi, ID, AllowedExit);
 637           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
 638             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
 639           continue;
 640         }
 641
 642         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
 643                                                          SinkAfter, DT)) {
 644           FirstOrderRecurrences.insert(Phi);
 645           continue;
 646         }
 647
 648         // As a last resort, coerce the PHI to a AddRec expression
 649         // and re-try classifying it a an induction PHI.
 650         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
 651           addInductionPhi(Phi, ID, AllowedExit);
 652           continue;
 653         }
 654
 655         reportVectorizationFailure("Found an unidentified PHI",
 656             "value that could not be identified as "
 657             "reduction is used outside the loop",
 658             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
 659         return false;
 660       } // end of PHI handling
 661
 662       // We handle calls that:
 663       //   * Are debug info intrinsics.
 664       //   * Have a mapping to an IR intrinsic.
 665       //   * Have a vector version available.
 666       auto *CI = dyn_cast<CallInst>(&I);
 667       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
 668           !isa<DbgInfoIntrinsic>(CI) &&
 669           !(CI->getCalledFunction() && TLI &&
 670             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
 671         // If the call is a recognized math libary call, it is likely that
 672         // we can vectorize it given loosened floating-point constraints.
 673         LibFunc Func;
 674         bool IsMathLibCall =
 675             TLI && CI->getCalledFunction() &&
 676             CI->getType()->isFloatingPointTy() &&
 677             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
 678             TLI->hasOptimizedCodeGen(Func);
 679
 680         if (IsMathLibCall) {
 681           // TODO: Ideally, we should not use clang-specific language here,
 682           // but it's hard to provide meaningful yet generic advice.
 683           // Also, should this be guarded by allowExtraAnalysis() and/or be part
 684           // of the returned info from isFunctionVectorizable()?
 685           reportVectorizationFailure("Found a non-intrinsic callsite",
 686               "library call cannot be vectorized. "
 687               "Try compiling with -fno-math-errno, -ffast-math, "
 688               "or similar flags",
 689               "CantVectorizeLibcall", ORE, TheLoop, CI);
 690         } else {
 691           reportVectorizationFailure("Found a non-intrinsic callsite",
 692                                      "call instruction cannot be vectorized",
 693                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
 694         }
 695         return false;
 696       }
 697
 698       // Some intrinsics have scalar arguments and should be same in order for
 699       // them to be vectorized (i.e. loop invariant).
 700       if (CI) {
 701         auto *SE = PSE.getSE();
 702         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
 703         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
 704           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
 705             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
 706               reportVectorizationFailure("Found unvectorizable intrinsic",
 707                   "intrinsic instruction cannot be vectorized",
 708                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
 709               return false;
 710             }
 711           }
 712       }
 713
 714       // Check that the instruction return type is vectorizable.
 715       // Also, we can't vectorize extractelement instructions.
 716       if ((!VectorType::isValidElementType(I.getType()) &&
 717            !I.getType()->isVoidTy()) ||
 718           isa<ExtractElementInst>(I)) {
 719         reportVectorizationFailure("Found unvectorizable type",
 720             "instruction return type cannot be vectorized",
 721             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
 722         return false;
 723       }
 724
 725       // Check that the stored type is vectorizable.
 726       if (auto *ST = dyn_cast<StoreInst>(&I)) {
 727         Type *T = ST->getValueOperand()->getType();
 728         if (!VectorType::isValidElementType(T)) {
 729           reportVectorizationFailure("Store instruction cannot be vectorized",
 730                                      "store instruction cannot be vectorized",
 731                                      "CantVectorizeStore", ORE, TheLoop, ST);
 732           return false;
 733         }
 734
 735         // For nontemporal stores, check that a nontemporal vector version is
 736         // supported on the target.
 737         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
 738           // Arbitrarily try a vector of 2 elements.
 739           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
 740           assert(VecTy && "did not find vectorized version of stored type");
 741           unsigned Alignment = getLoadStoreAlignment(ST);
 742           assert(Alignment && "Alignment should be set");
 743           if (!TTI->isLegalNTStore(VecTy, llvm::Align(Alignment))) {
 744             reportVectorizationFailure(
 745                 "nontemporal store instruction cannot be vectorized",
 746                 "nontemporal store instruction cannot be vectorized",
 747                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
 748             return false;
 749           }
 750         }
 751
 752       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
 753         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
 754           // For nontemporal loads, check that a nontemporal vector version is
 755           // supported on the target (arbitrarily try a vector of 2 elements).
 756           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
 757           assert(VecTy && "did not find vectorized version of load type");
 758           unsigned Alignment = getLoadStoreAlignment(LD);
 759           assert(Alignment && "Alignment should be set");
 760           if (!TTI->isLegalNTLoad(VecTy, llvm::Align(Alignment))) {
 761             reportVectorizationFailure(
 762                 "nontemporal load instruction cannot be vectorized",
 763                 "nontemporal load instruction cannot be vectorized",
 764                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
 765             return false;
 766           }
 767         }
 768
 769         // FP instructions can allow unsafe algebra, thus vectorizable by
 770         // non-IEEE-754 compliant SIMD units.
 771         // This applies to floating-point math operations and calls, not memory
 772         // operations, shuffles, or casts, as they don't change precision or
 773         // semantics.
 774       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
 775                  !I.isFast()) {
 776         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
 777         Hints->setPotentiallyUnsafe();
 778       }
 779
 780       // Reduction instructions are allowed to have exit users.
 781       // All other instructions must not have external users.
 782       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
 783         // We can safely vectorize loops where instructions within the loop are
 784         // used outside the loop only if the SCEV predicates within the loop is
 785         // same as outside the loop. Allowing the exit means reusing the SCEV
 786         // outside the loop.
 787         if (PSE.getUnionPredicate().isAlwaysTrue()) {
 788           AllowedExit.insert(&I);
 789           continue;
 790         }
 791         reportVectorizationFailure("Value cannot be used outside the loop",
 792                                    "value cannot be used outside the loop",
 793                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
 794         return false;
 795       }
 796     } // next instr.
 797   }
 798
 799   if (!PrimaryInduction) {
 800     if (Inductions.empty()) {
 801       reportVectorizationFailure("Did not find one integer induction var",
 802           "loop induction variable could not be identified",
 803           "NoInductionVariable", ORE, TheLoop);
 804       return false;
 805     } else if (!WidestIndTy) {
 806       reportVectorizationFailure("Did not find one integer induction var",
 807           "integer loop induction variable could not be identified",
 808           "NoIntegerInductionVariable", ORE, TheLoop);
 809       return false;
 810     } else {
 811       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
 812     }
 813   }
 814
 815   // Now we know the widest induction type, check if our found induction
 816   // is the same size. If it's not, unset it here and InnerLoopVectorizer
 817   // will create another.
 818   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
 819     PrimaryInduction = nullptr;
 820
 821   return true;
 822 }
 823
 824 bool LoopVectorizationLegality::canVectorizeMemory() {
 825   LAI = &(*GetLAA)(*TheLoop);
 826   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
 827   if (LAR) {
 828     ORE->emit([&]() {
 829       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
 830                                         "loop not vectorized: ", *LAR);
 831     });
 832   }
 833   if (!LAI->canVectorizeMemory())
 834     return false;
 835
 836   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
 837     reportVectorizationFailure("Stores to a uniform address",
 838         "write to a loop invariant address could not be vectorized",
 839         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
 840     return false;
 841   }
 842   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
 843   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
 844
 845   return true;
 846 }
 847
 848 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
 849   Value *In0 = const_cast<Value *>(V);
 850   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
 851   if (!PN)
 852     return false;
 853
 854   return Inductions.count(PN);
 855 }
 856
 857 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
 858   auto *Inst = dyn_cast<Instruction>(V);
 859   return (Inst && InductionCastsToIgnore.count(Inst));
 860 }
 861
 862 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
 863   return isInductionPhi(V) || isCastedInductionVariable(V);
 864 }
 865
 866 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
 867   return FirstOrderRecurrences.count(Phi);
 868 }
 869
 870 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
 871   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 872 }
 873
 874 bool LoopVectorizationLegality::blockCanBePredicated(
 875     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
 876   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 877
 878   for (Instruction &I : *BB) {
 879     // Check that we don't have a constant expression that can trap as operand.
 880     for (Value *Operand : I.operands()) {
 881       if (auto *C = dyn_cast<Constant>(Operand))
 882         if (C->canTrap())
 883           return false;
 884     }
 885     // We might be able to hoist the load.
 886     if (I.mayReadFromMemory()) {
 887       auto *LI = dyn_cast<LoadInst>(&I);
 888       if (!LI)
 889         return false;
 890       if (!SafePtrs.count(LI->getPointerOperand())) {
 891         // !llvm.mem.parallel_loop_access implies if-conversion safety.
 892         // Otherwise, record that the load needs (real or emulated) masking
 893         // and let the cost model decide.
 894         if (!IsAnnotatedParallel || PreserveGuards)
 895           MaskedOp.insert(LI);
 896         continue;
 897       }
 898     }
 899
 900     if (I.mayWriteToMemory()) {
 901       auto *SI = dyn_cast<StoreInst>(&I);
 902       if (!SI)
 903         return false;
 904       // Predicated store requires some form of masking:
 905       // 1) masked store HW instruction,
 906       // 2) emulation via load-blend-store (only if safe and legal to do so,
 907       //    be aware on the race conditions), or
 908       // 3) element-by-element predicate check and scalar store.
 909       MaskedOp.insert(SI);
 910       continue;
 911     }
 912     if (I.mayThrow())
 913       return false;
 914   }
 915
 916   return true;
 917 }
 918
 919 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
 920   if (!EnableIfConversion) {
 921     reportVectorizationFailure("If-conversion is disabled",
 922                                "if-conversion is disabled",
 923                                "IfConversionDisabled",
 924                                ORE, TheLoop);
 925     return false;
 926   }
 927
 928   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
 929
 930   // A list of pointers which are known to be dereferenceable within scope of
 931   // the loop body for each iteration of the loop which executes.  That is,
 932   // the memory pointed to can be dereferenced (with the access size implied by
 933   // the value's type) unconditionally within the loop header without
 934   // introducing a new fault.
 935   SmallPtrSet<Value *, 8> SafePointes;
 936
 937   // Collect safe addresses.
 938   for (BasicBlock *BB : TheLoop->blocks()) {
 939     if (blockNeedsPredication(BB))
 940       continue;
 941
 942     for (Instruction &I : *BB)
 943       if (auto *Ptr = getLoadStorePointerOperand(&I))
 944         SafePointes.insert(Ptr);
 945   }
 946
 947   // Collect the blocks that need predication.
 948   BasicBlock *Header = TheLoop->getHeader();
 949   for (BasicBlock *BB : TheLoop->blocks()) {
 950     // We don't support switch statements inside loops.
 951     if (!isa<BranchInst>(BB->getTerminator())) {
 952       reportVectorizationFailure("Loop contains a switch statement",
 953                                  "loop contains a switch statement",
 954                                  "LoopContainsSwitch", ORE, TheLoop,
 955                                  BB->getTerminator());
 956       return false;
 957     }
 958
 959     // We must be able to predicate all blocks that need to be predicated.
 960     if (blockNeedsPredication(BB)) {
 961       if (!blockCanBePredicated(BB, SafePointes)) {
 962         reportVectorizationFailure(
 963             "Control flow cannot be substituted for a select",
 964             "control flow cannot be substituted for a select",
 965             "NoCFGForSelect", ORE, TheLoop,
 966             BB->getTerminator());
 967         return false;
 968       }
 969     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
 970       reportVectorizationFailure(
 971           "Control flow cannot be substituted for a select",
 972           "control flow cannot be substituted for a select",
 973           "NoCFGForSelect", ORE, TheLoop,
 974           BB->getTerminator());
 975       return false;
 976     }
 977   }
 978
 979   // We can if-convert this loop.
 980   return true;
 981 }
 982
 983 // Helper function to canVectorizeLoopNestCFG.
 984 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
 985                                                     bool UseVPlanNativePath) {
 986   assert((UseVPlanNativePath || Lp->empty()) &&
 987          "VPlan-native path is not enabled.");
 988
 989   // TODO: ORE should be improved to show more accurate information when an
 990   // outer loop can't be vectorized because a nested loop is not understood or
 991   // legal. Something like: "outer_loop_location: loop not vectorized:
 992   // (inner_loop_location) loop control flow is not understood by vectorizer".
 993
 994   // Store the result and return it at the end instead of exiting early, in case
 995   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
 996   bool Result = true;
 997   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
 998
 999   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1000   // be canonicalized.
1001   if (!Lp->getLoopPreheader()) {
1002     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1003         "loop control flow is not understood by vectorizer",
1004         "CFGNotUnderstood", ORE, TheLoop);
1005     if (DoExtraAnalysis)
1006       Result = false;
1007     else
1008       return false;
1009   }
1010
1011   // We must have a single backedge.
1012   if (Lp->getNumBackEdges() != 1) {
1013     reportVectorizationFailure("The loop must have a single backedge",
1014         "loop control flow is not understood by vectorizer",
1015         "CFGNotUnderstood", ORE, TheLoop);
1016     if (DoExtraAnalysis)
1017       Result = false;
1018     else
1019       return false;
1020   }
1021
1022   // We must have a single exiting block.
1023   if (!Lp->getExitingBlock()) {
1024     reportVectorizationFailure("The loop must have an exiting block",
1025         "loop control flow is not understood by vectorizer",
1026         "CFGNotUnderstood", ORE, TheLoop);
1027     if (DoExtraAnalysis)
1028       Result = false;
1029     else
1030       return false;
1031   }
1032
1033   // We only handle bottom-tested loops, i.e. loop in which the condition is
1034   // checked at the end of each iteration. With that we can assume that all
1035   // instructions in the loop are executed the same number of times.
1036   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1037     reportVectorizationFailure("The exiting block is not the loop latch",
1038         "loop control flow is not understood by vectorizer",
1039         "CFGNotUnderstood", ORE, TheLoop);
1040     if (DoExtraAnalysis)
1041       Result = false;
1042     else
1043       return false;
1044   }
1045
1046   return Result;
1047 }
1048
1049 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1050     Loop *Lp, bool UseVPlanNativePath) {
1051   // Store the result and return it at the end instead of exiting early, in case
1052   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1053   bool Result = true;
1054   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1055   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1056     if (DoExtraAnalysis)
1057       Result = false;
1058     else
1059       return false;
1060   }
1061
1062   // Recursively check whether the loop control flow of nested loops is
1063   // understood.
1064   for (Loop *SubLp : *Lp)
1065     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1066       if (DoExtraAnalysis)
1067         Result = false;
1068       else
1069         return false;
1070     }
1071
1072   return Result;
1073 }
1074
1075 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1076   // Store the result and return it at the end instead of exiting early, in case
1077   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1078   bool Result = true;
1079
1080   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1081   // Check whether the loop-related control flow in the loop nest is expected by
1082   // vectorizer.
1083   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1084     if (DoExtraAnalysis)
1085       Result = false;
1086     else
1087       return false;
1088   }
1089
1090   // We need to have a loop header.
1091   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1092                     << '\n');
1093
1094   // Specific checks for outer loops. We skip the remaining legal checks at this
1095   // point because they don't support outer loops.
1096   if (!TheLoop->empty()) {
1097     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1098
1099     if (!canVectorizeOuterLoop()) {
1100       reportVectorizationFailure("Unsupported outer loop",
1101                                  "unsupported outer loop",
1102                                  "UnsupportedOuterLoop",
1103                                  ORE, TheLoop);
1104       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1105       // outer loops.
1106       return false;
1107     }
1108
1109     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1110     return Result;
1111   }
1112
1113   assert(TheLoop->empty() && "Inner loop expected.");
1114   // Check if we can if-convert non-single-bb loops.
1115   unsigned NumBlocks = TheLoop->getNumBlocks();
1116   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1117     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1118     if (DoExtraAnalysis)
1119       Result = false;
1120     else
1121       return false;
1122   }
1123
1124   // Check if we can vectorize the instructions and CFG in this loop.
1125   if (!canVectorizeInstrs()) {
1126     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1127     if (DoExtraAnalysis)
1128       Result = false;
1129     else
1130       return false;
1131   }
1132
1133   // Go over each instruction and look at memory deps.
1134   if (!canVectorizeMemory()) {
1135     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1136     if (DoExtraAnalysis)
1137       Result = false;
1138     else
1139       return false;
1140   }
1141
1142   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1143                     << (LAI->getRuntimePointerChecking()->Need
1144                             ? " (with a runtime bound check)"
1145                             : "")
1146                     << "!\n");
1147
1148   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1149   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1150     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1151
1152   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1153     reportVectorizationFailure("Too many SCEV checks needed",
1154         "Too many SCEV assumptions need to be made and checked at runtime",
1155         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1156     if (DoExtraAnalysis)
1157       Result = false;
1158     else
1159       return false;
1160   }
1161
1162   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1163   // we can vectorize, and at this point we don't have any other mem analysis
1164   // which may limit our maximum vectorization factor, so just return true with
1165   // no restrictions.
1166   return Result;
1167 }
1168
1169 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1170
1171   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1172
1173   if (!PrimaryInduction) {
1174     reportVectorizationFailure(
1175         "No primary induction, cannot fold tail by masking",
1176         "Missing a primary induction variable in the loop, which is "
1177         "needed in order to fold tail by masking as required.",
1178         "NoPrimaryInduction", ORE, TheLoop);
1179     return false;
1180   }
1181
1182   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1183
1184   for (auto &Reduction : *getReductionVars())
1185     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1186
1187   // TODO: handle non-reduction outside users when tail is folded by masking.
1188   for (auto *AE : AllowedExit) {
1189     // Check that all users of allowed exit values are inside the loop or
1190     // are the live-out of a reduction.
1191     if (ReductionLiveOuts.count(AE))
1192       continue;
1193     for (User *U : AE->users()) {
1194       Instruction *UI = cast<Instruction>(U);
1195       if (TheLoop->contains(UI))
1196         continue;
1197       reportVectorizationFailure(
1198           "Cannot fold tail by masking, loop has an outside user for",
1199           "Cannot fold tail by masking in the presence of live outs.",
1200           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1201       return false;
1202     }
1203   }
1204
1205   // The list of pointers that we can safely read and write to remains empty.
1206   SmallPtrSet<Value *, 8> SafePointers;
1207
1208   // Check and mark all blocks for predication, including those that ordinarily
1209   // do not need predication such as the header block.
1210   for (BasicBlock *BB : TheLoop->blocks()) {
1211     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1212       reportVectorizationFailure(
1213           "Cannot fold tail by masking as required",
1214           "control flow cannot be substituted for a select",
1215           "NoCFGForSelect", ORE, TheLoop,
1216           BB->getTerminator());
1217       return false;
1218     }
1219   }
1220
1221   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1222   return true;
1223 }
1224
1225 } // namespace llvm