llvm/lib/Analysis/LoopAccessAnalysis.cpp

   1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // The implementation for the loop memory dependence that was originally
  10 // developed for the loop vectorizer.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Analysis/LoopAccessAnalysis.h"
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/DenseMap.h"
  17 #include "llvm/ADT/EquivalenceClasses.h"
  18 #include "llvm/ADT/PointerIntPair.h"
  19 #include "llvm/ADT/STLExtras.h"
  20 #include "llvm/ADT/SetVector.h"
  21 #include "llvm/ADT/SmallPtrSet.h"
  22 #include "llvm/ADT/SmallSet.h"
  23 #include "llvm/ADT/SmallVector.h"
  24 #include "llvm/Analysis/AliasAnalysis.h"
  25 #include "llvm/Analysis/AliasSetTracker.h"
  26 #include "llvm/Analysis/LoopAnalysisManager.h"
  27 #include "llvm/Analysis/LoopInfo.h"
  28 #include "llvm/Analysis/LoopIterator.h"
  29 #include "llvm/Analysis/MemoryLocation.h"
  30 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  31 #include "llvm/Analysis/ScalarEvolution.h"
  32 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  33 #include "llvm/Analysis/TargetLibraryInfo.h"
  34 #include "llvm/Analysis/TargetTransformInfo.h"
  35 #include "llvm/Analysis/ValueTracking.h"
  36 #include "llvm/Analysis/VectorUtils.h"
  37 #include "llvm/IR/BasicBlock.h"
  38 #include "llvm/IR/Constants.h"
  39 #include "llvm/IR/DataLayout.h"
  40 #include "llvm/IR/DebugLoc.h"
  41 #include "llvm/IR/DerivedTypes.h"
  42 #include "llvm/IR/DiagnosticInfo.h"
  43 #include "llvm/IR/Dominators.h"
  44 #include "llvm/IR/Function.h"
  45 #include "llvm/IR/GetElementPtrTypeIterator.h"
  46 #include "llvm/IR/InstrTypes.h"
  47 #include "llvm/IR/Instruction.h"
  48 #include "llvm/IR/Instructions.h"
  49 #include "llvm/IR/Operator.h"
  50 #include "llvm/IR/PassManager.h"
  51 #include "llvm/IR/PatternMatch.h"
  52 #include "llvm/IR/Type.h"
  53 #include "llvm/IR/Value.h"
  54 #include "llvm/IR/ValueHandle.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CommandLine.h"
  57 #include "llvm/Support/Debug.h"
  58 #include "llvm/Support/ErrorHandling.h"
  59 #include "llvm/Support/raw_ostream.h"
  60 #include <algorithm>
  61 #include <cassert>
  62 #include <cstdint>
  63 #include <iterator>
  64 #include <utility>
  65 #include <variant>
  66 #include <vector>
  67
  68 using namespace llvm;
  69 using namespace llvm::PatternMatch;
  70
  71 #define DEBUG_TYPE "loop-accesses"
  72
  73 static cl::opt<unsigned, true>
  74 VectorizationFactor("force-vector-width", cl::Hidden,
  75                     cl::desc("Sets the SIMD width. Zero is autoselect."),
  76                     cl::location(VectorizerParams::VectorizationFactor));
  77 unsigned VectorizerParams::VectorizationFactor;
  78
  79 static cl::opt<unsigned, true>
  80 VectorizationInterleave("force-vector-interleave", cl::Hidden,
  81                         cl::desc("Sets the vectorization interleave count. "
  82                                  "Zero is autoselect."),
  83                         cl::location(
  84                             VectorizerParams::VectorizationInterleave));
  85 unsigned VectorizerParams::VectorizationInterleave;
  86
  87 static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
  88     "runtime-memory-check-threshold", cl::Hidden,
  89     cl::desc("When performing memory disambiguation checks at runtime do not "
  90              "generate more than this number of comparisons (default = 8)."),
  91     cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
  92 unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
  93
  94 /// The maximum iterations used to merge memory checks
  95 static cl::opt<unsigned> MemoryCheckMergeThreshold(
  96     "memory-check-merge-threshold", cl::Hidden,
  97     cl::desc("Maximum number of comparisons done when trying to merge "
  98              "runtime memory checks. (default = 100)"),
  99     cl::init(100));
 100
 101 /// Maximum SIMD width.
 102 const unsigned VectorizerParams::MaxVectorWidth = 64;
 103
 104 /// We collect dependences up to this threshold.
 105 static cl::opt<unsigned>
 106     MaxDependences("max-dependences", cl::Hidden,
 107                    cl::desc("Maximum number of dependences collected by "
 108                             "loop-access analysis (default = 100)"),
 109                    cl::init(100));
 110
 111 /// This enables versioning on the strides of symbolically striding memory
 112 /// accesses in code like the following.
 113 ///   for (i = 0; i < N; ++i)
 114 ///     A[i * Stride1] += B[i * Stride2] ...
 115 ///
 116 /// Will be roughly translated to
 117 ///    if (Stride1 == 1 && Stride2 == 1) {
 118 ///      for (i = 0; i < N; i+=4)
 119 ///       A[i:i+3] += ...
 120 ///    } else
 121 ///      ...
 122 static cl::opt<bool> EnableMemAccessVersioning(
 123     "enable-mem-access-versioning", cl::init(true), cl::Hidden,
 124     cl::desc("Enable symbolic stride memory access versioning"));
 125
 126 /// Enable store-to-load forwarding conflict detection. This option can
 127 /// be disabled for correctness testing.
 128 static cl::opt<bool> EnableForwardingConflictDetection(
 129     "store-to-load-forwarding-conflict-detection", cl::Hidden,
 130     cl::desc("Enable conflict detection in loop-access analysis"),
 131     cl::init(true));
 132
 133 static cl::opt<unsigned> MaxForkedSCEVDepth(
 134     "max-forked-scev-depth", cl::Hidden,
 135     cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
 136     cl::init(5));
 137
 138 static cl::opt<bool> SpeculateUnitStride(
 139     "laa-speculate-unit-stride", cl::Hidden,
 140     cl::desc("Speculate that non-constant strides are unit in LAA"),
 141     cl::init(true));
 142
 143 static cl::opt<bool, true> HoistRuntimeChecks(
 144     "hoist-runtime-checks", cl::Hidden,
 145     cl::desc(
 146         "Hoist inner loop runtime memory checks to outer loop if possible"),
 147     cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true));
 148 bool VectorizerParams::HoistRuntimeChecks;
 149
 150 bool VectorizerParams::isInterleaveForced() {
 151   return ::VectorizationInterleave.getNumOccurrences() > 0;
 152 }
 153
 154 const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
 155                                             const DenseMap<Value *, const SCEV *> &PtrToStride,
 156                                             Value *Ptr) {
 157   const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
 158
 159   // If there is an entry in the map return the SCEV of the pointer with the
 160   // symbolic stride replaced by one.
 161   DenseMap<Value *, const SCEV *>::const_iterator SI = PtrToStride.find(Ptr);
 162   if (SI == PtrToStride.end())
 163     // For a non-symbolic stride, just return the original expression.
 164     return OrigSCEV;
 165
 166   const SCEV *StrideSCEV = SI->second;
 167   // Note: This assert is both overly strong and overly weak.  The actual
 168   // invariant here is that StrideSCEV should be loop invariant.  The only
 169   // such invariant strides we happen to speculate right now are unknowns
 170   // and thus this is a reasonable proxy of the actual invariant.
 171   assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
 172
 173   ScalarEvolution *SE = PSE.getSE();
 174   const SCEV *CT = SE->getOne(StrideSCEV->getType());
 175   PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
 176   const SCEV *Expr = PSE.getSCEV(Ptr);
 177
 178   LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
 179              << " by: " << *Expr << "\n");
 180   return Expr;
 181 }
 182
 183 RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
 184     unsigned Index, const RuntimePointerChecking &RtCheck)
 185     : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
 186       AddressSpace(RtCheck.Pointers[Index]
 187                        .PointerValue->getType()
 188                        ->getPointerAddressSpace()),
 189       NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
 190   Members.push_back(Index);
 191 }
 192
 193 std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
 194     const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
 195     ScalarEvolution *SE,
 196     DenseMap<std::pair<const SCEV *, Type *>,
 197              std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
 198   std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
 199   if (PointerBounds) {
 200     auto [Iter, Ins] = PointerBounds->insert(
 201         {{PtrExpr, AccessTy},
 202          {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
 203     if (!Ins)
 204       return Iter->second;
 205     PtrBoundsPair = &Iter->second;
 206   }
 207
 208   const SCEV *ScStart;
 209   const SCEV *ScEnd;
 210
 211   if (SE->isLoopInvariant(PtrExpr, Lp)) {
 212     ScStart = ScEnd = PtrExpr;
 213   } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
 214     ScStart = AR->getStart();
 215     ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
 216     const SCEV *Step = AR->getStepRecurrence(*SE);
 217
 218     // For expressions with negative step, the upper bound is ScStart and the
 219     // lower bound is ScEnd.
 220     if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
 221       if (CStep->getValue()->isNegative())
 222         std::swap(ScStart, ScEnd);
 223     } else {
 224       // Fallback case: the step is not constant, but we can still
 225       // get the upper and lower bounds of the interval by using min/max
 226       // expressions.
 227       ScStart = SE->getUMinExpr(ScStart, ScEnd);
 228       ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
 229     }
 230   } else
 231     return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
 232
 233   assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
 234   assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
 235
 236   // Add the size of the pointed element to ScEnd.
 237   auto &DL = Lp->getHeader()->getDataLayout();
 238   Type *IdxTy = DL.getIndexType(PtrExpr->getType());
 239   const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
 240   ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
 241
 242   std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
 243   if (PointerBounds)
 244     *PtrBoundsPair = Res;
 245   return Res;
 246 }
 247
 248 /// Calculate Start and End points of memory access using
 249 /// getStartAndEndForAccess.
 250 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
 251                                     Type *AccessTy, bool WritePtr,
 252                                     unsigned DepSetId, unsigned ASId,
 253                                     PredicatedScalarEvolution &PSE,
 254                                     bool NeedsFreeze) {
 255   const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
 256   const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
 257       Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
 258   assert(!isa<SCEVCouldNotCompute>(ScStart) &&
 259          !isa<SCEVCouldNotCompute>(ScEnd) &&
 260          "must be able to compute both start and end expressions");
 261   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
 262                         NeedsFreeze);
 263 }
 264
 265 bool RuntimePointerChecking::tryToCreateDiffCheck(
 266     const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
 267   // If either group contains multiple different pointers, bail out.
 268   // TODO: Support multiple pointers by using the minimum or maximum pointer,
 269   // depending on src & sink.
 270   if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)
 271     return false;
 272
 273   const PointerInfo *Src = &Pointers[CGI.Members[0]];
 274   const PointerInfo *Sink = &Pointers[CGJ.Members[0]];
 275
 276   // If either pointer is read and written, multiple checks may be needed. Bail
 277   // out.
 278   if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
 279       !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())
 280     return false;
 281
 282   ArrayRef<unsigned> AccSrc =
 283       DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
 284   ArrayRef<unsigned> AccSink =
 285       DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
 286   // If either pointer is accessed multiple times, there may not be a clear
 287   // src/sink relation. Bail out for now.
 288   if (AccSrc.size() != 1 || AccSink.size() != 1)
 289     return false;
 290
 291   // If the sink is accessed before src, swap src/sink.
 292   if (AccSink[0] < AccSrc[0])
 293     std::swap(Src, Sink);
 294
 295   auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
 296   auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
 297   if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
 298       SinkAR->getLoop() != DC.getInnermostLoop())
 299     return false;
 300
 301   SmallVector<Instruction *, 4> SrcInsts =
 302       DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
 303   SmallVector<Instruction *, 4> SinkInsts =
 304       DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
 305   Type *SrcTy = getLoadStoreType(SrcInsts[0]);
 306   Type *DstTy = getLoadStoreType(SinkInsts[0]);
 307   if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy))
 308     return false;
 309
 310   const DataLayout &DL =
 311       SinkAR->getLoop()->getHeader()->getDataLayout();
 312   unsigned AllocSize =
 313       std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
 314
 315   // Only matching constant steps matching the AllocSize are supported at the
 316   // moment. This simplifies the difference computation. Can be extended in the
 317   // future.
 318   auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
 319   if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
 320       Step->getAPInt().abs() != AllocSize)
 321     return false;
 322
 323   IntegerType *IntTy =
 324       IntegerType::get(Src->PointerValue->getContext(),
 325                        DL.getPointerSizeInBits(CGI.AddressSpace));
 326
 327   // When counting down, the dependence distance needs to be swapped.
 328   if (Step->getValue()->isNegative())
 329     std::swap(SinkAR, SrcAR);
 330
 331   const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
 332   const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
 333   if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
 334       isa<SCEVCouldNotCompute>(SrcStartInt))
 335     return false;
 336
 337   const Loop *InnerLoop = SrcAR->getLoop();
 338   // If the start values for both Src and Sink also vary according to an outer
 339   // loop, then it's probably better to avoid creating diff checks because
 340   // they may not be hoisted. We should instead let llvm::addRuntimeChecks
 341   // do the expanded full range overlap checks, which can be hoisted.
 342   if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
 343       isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
 344     auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
 345     auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
 346     const Loop *StartARLoop = SrcStartAR->getLoop();
 347     if (StartARLoop == SinkStartAR->getLoop() &&
 348         StartARLoop == InnerLoop->getParentLoop() &&
 349         // If the diff check would already be loop invariant (due to the
 350         // recurrences being the same), then we prefer to keep the diff checks
 351         // because they are cheaper.
 352         SrcStartAR->getStepRecurrence(*SE) !=
 353             SinkStartAR->getStepRecurrence(*SE)) {
 354       LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
 355                            "cannot be hoisted out of the outer loop\n");
 356       return false;
 357     }
 358   }
 359
 360   LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
 361                     << "SrcStart: " << *SrcStartInt << '\n'
 362                     << "SinkStartInt: " << *SinkStartInt << '\n');
 363   DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
 364                           Src->NeedsFreeze || Sink->NeedsFreeze);
 365   return true;
 366 }
 367
 368 SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
 369   SmallVector<RuntimePointerCheck, 4> Checks;
 370
 371   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
 372     for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
 373       const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
 374       const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
 375
 376       if (needsChecking(CGI, CGJ)) {
 377         CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
 378         Checks.emplace_back(&CGI, &CGJ);
 379       }
 380     }
 381   }
 382   return Checks;
 383 }
 384
 385 void RuntimePointerChecking::generateChecks(
 386     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
 387   assert(Checks.empty() && "Checks is not empty");
 388   groupChecks(DepCands, UseDependencies);
 389   Checks = generateChecks();
 390 }
 391
 392 bool RuntimePointerChecking::needsChecking(
 393     const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
 394   for (const auto &I : M.Members)
 395     for (const auto &J : N.Members)
 396       if (needsChecking(I, J))
 397         return true;
 398   return false;
 399 }
 400
 401 /// Compare \p I and \p J and return the minimum.
 402 /// Return nullptr in case we couldn't find an answer.
 403 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
 404                                    ScalarEvolution *SE) {
 405   std::optional<APInt> Diff = SE->computeConstantDifference(J, I);
 406   if (!Diff)
 407     return nullptr;
 408   return Diff->isNegative() ? J : I;
 409 }
 410
 411 bool RuntimeCheckingPtrGroup::addPointer(
 412     unsigned Index, const RuntimePointerChecking &RtCheck) {
 413   return addPointer(
 414       Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
 415       RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
 416       RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
 417 }
 418
 419 bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
 420                                          const SCEV *End, unsigned AS,
 421                                          bool NeedsFreeze,
 422                                          ScalarEvolution &SE) {
 423   assert(AddressSpace == AS &&
 424          "all pointers in a checking group must be in the same address space");
 425
 426   // Compare the starts and ends with the known minimum and maximum
 427   // of this set. We need to know how we compare against the min/max
 428   // of the set in order to be able to emit memchecks.
 429   const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
 430   if (!Min0)
 431     return false;
 432
 433   const SCEV *Min1 = getMinFromExprs(End, High, &SE);
 434   if (!Min1)
 435     return false;
 436
 437   // Update the low bound  expression if we've found a new min value.
 438   if (Min0 == Start)
 439     Low = Start;
 440
 441   // Update the high bound expression if we've found a new max value.
 442   if (Min1 != End)
 443     High = End;
 444
 445   Members.push_back(Index);
 446   this->NeedsFreeze |= NeedsFreeze;
 447   return true;
 448 }
 449
 450 void RuntimePointerChecking::groupChecks(
 451     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
 452   // We build the groups from dependency candidates equivalence classes
 453   // because:
 454   //    - We know that pointers in the same equivalence class share
 455   //      the same underlying object and therefore there is a chance
 456   //      that we can compare pointers
 457   //    - We wouldn't be able to merge two pointers for which we need
 458   //      to emit a memcheck. The classes in DepCands are already
 459   //      conveniently built such that no two pointers in the same
 460   //      class need checking against each other.
 461
 462   // We use the following (greedy) algorithm to construct the groups
 463   // For every pointer in the equivalence class:
 464   //   For each existing group:
 465   //   - if the difference between this pointer and the min/max bounds
 466   //     of the group is a constant, then make the pointer part of the
 467   //     group and update the min/max bounds of that group as required.
 468
 469   CheckingGroups.clear();
 470
 471   // If we need to check two pointers to the same underlying object
 472   // with a non-constant difference, we shouldn't perform any pointer
 473   // grouping with those pointers. This is because we can easily get
 474   // into cases where the resulting check would return false, even when
 475   // the accesses are safe.
 476   //
 477   // The following example shows this:
 478   // for (i = 0; i < 1000; ++i)
 479   //   a[5000 + i * m] = a[i] + a[i + 9000]
 480   //
 481   // Here grouping gives a check of (5000, 5000 + 1000 * m) against
 482   // (0, 10000) which is always false. However, if m is 1, there is no
 483   // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
 484   // us to perform an accurate check in this case.
 485   //
 486   // The above case requires that we have an UnknownDependence between
 487   // accesses to the same underlying object. This cannot happen unless
 488   // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
 489   // is also false. In this case we will use the fallback path and create
 490   // separate checking groups for all pointers.
 491
 492   // If we don't have the dependency partitions, construct a new
 493   // checking pointer group for each pointer. This is also required
 494   // for correctness, because in this case we can have checking between
 495   // pointers to the same underlying object.
 496   if (!UseDependencies) {
 497     for (unsigned I = 0; I < Pointers.size(); ++I)
 498       CheckingGroups.emplace_back(I, *this);
 499     return;
 500   }
 501
 502   unsigned TotalComparisons = 0;
 503
 504   DenseMap<Value *, SmallVector<unsigned>> PositionMap;
 505   for (unsigned Index = 0; Index < Pointers.size(); ++Index)
 506     PositionMap[Pointers[Index].PointerValue].push_back(Index);
 507
 508   // We need to keep track of what pointers we've already seen so we
 509   // don't process them twice.
 510   SmallSet<unsigned, 2> Seen;
 511
 512   // Go through all equivalence classes, get the "pointer check groups"
 513   // and add them to the overall solution. We use the order in which accesses
 514   // appear in 'Pointers' to enforce determinism.
 515   for (unsigned I = 0; I < Pointers.size(); ++I) {
 516     // We've seen this pointer before, and therefore already processed
 517     // its equivalence class.
 518     if (Seen.count(I))
 519       continue;
 520
 521     MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
 522                                            Pointers[I].IsWritePtr);
 523
 524     SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
 525     auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
 526
 527     // Because DepCands is constructed by visiting accesses in the order in
 528     // which they appear in alias sets (which is deterministic) and the
 529     // iteration order within an equivalence class member is only dependent on
 530     // the order in which unions and insertions are performed on the
 531     // equivalence class, the iteration order is deterministic.
 532     for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
 533          MI != ME; ++MI) {
 534       auto PointerI = PositionMap.find(MI->getPointer());
 535       assert(PointerI != PositionMap.end() &&
 536              "pointer in equivalence class not found in PositionMap");
 537       for (unsigned Pointer : PointerI->second) {
 538         bool Merged = false;
 539         // Mark this pointer as seen.
 540         Seen.insert(Pointer);
 541
 542         // Go through all the existing sets and see if we can find one
 543         // which can include this pointer.
 544         for (RuntimeCheckingPtrGroup &Group : Groups) {
 545           // Don't perform more than a certain amount of comparisons.
 546           // This should limit the cost of grouping the pointers to something
 547           // reasonable.  If we do end up hitting this threshold, the algorithm
 548           // will create separate groups for all remaining pointers.
 549           if (TotalComparisons > MemoryCheckMergeThreshold)
 550             break;
 551
 552           TotalComparisons++;
 553
 554           if (Group.addPointer(Pointer, *this)) {
 555             Merged = true;
 556             break;
 557           }
 558         }
 559
 560         if (!Merged)
 561           // We couldn't add this pointer to any existing set or the threshold
 562           // for the number of comparisons has been reached. Create a new group
 563           // to hold the current pointer.
 564           Groups.emplace_back(Pointer, *this);
 565       }
 566     }
 567
 568     // We've computed the grouped checks for this partition.
 569     // Save the results and continue with the next one.
 570     llvm::copy(Groups, std::back_inserter(CheckingGroups));
 571   }
 572 }
 573
 574 bool RuntimePointerChecking::arePointersInSamePartition(
 575     const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
 576     unsigned PtrIdx2) {
 577   return (PtrToPartition[PtrIdx1] != -1 &&
 578           PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
 579 }
 580
 581 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
 582   const PointerInfo &PointerI = Pointers[I];
 583   const PointerInfo &PointerJ = Pointers[J];
 584
 585   // No need to check if two readonly pointers intersect.
 586   if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
 587     return false;
 588
 589   // Only need to check pointers between two different dependency sets.
 590   if (PointerI.DependencySetId == PointerJ.DependencySetId)
 591     return false;
 592
 593   // Only need to check pointers in the same alias set.
 594   return PointerI.AliasSetId == PointerJ.AliasSetId;
 595 }
 596
 597 void RuntimePointerChecking::printChecks(
 598     raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
 599     unsigned Depth) const {
 600   unsigned N = 0;
 601   for (const auto &[Check1, Check2] : Checks) {
 602     const auto &First = Check1->Members, &Second = Check2->Members;
 603
 604     OS.indent(Depth) << "Check " << N++ << ":\n";
 605
 606     OS.indent(Depth + 2) << "Comparing group (" << Check1 << "):\n";
 607     for (unsigned K : First)
 608       OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
 609
 610     OS.indent(Depth + 2) << "Against group (" << Check2 << "):\n";
 611     for (unsigned K : Second)
 612       OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
 613   }
 614 }
 615
 616 void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
 617
 618   OS.indent(Depth) << "Run-time memory checks:\n";
 619   printChecks(OS, Checks, Depth);
 620
 621   OS.indent(Depth) << "Grouped accesses:\n";
 622   for (const auto &CG : CheckingGroups) {
 623     OS.indent(Depth + 2) << "Group " << &CG << ":\n";
 624     OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
 625                          << ")\n";
 626     for (unsigned Member : CG.Members) {
 627       OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";
 628     }
 629   }
 630 }
 631
 632 namespace {
 633
 634 /// Analyses memory accesses in a loop.
 635 ///
 636 /// Checks whether run time pointer checks are needed and builds sets for data
 637 /// dependence checking.
 638 class AccessAnalysis {
 639 public:
 640   /// Read or write access location.
 641   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
 642   typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
 643
 644   AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
 645                  MemoryDepChecker::DepCandidates &DA,
 646                  PredicatedScalarEvolution &PSE,
 647                  SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
 648       : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
 649         LoopAliasScopes(LoopAliasScopes) {
 650     // We're analyzing dependences across loop iterations.
 651     BAA.enableCrossIterationMode();
 652   }
 653
 654   /// Register a load  and whether it is only read from.
 655   void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
 656     Value *Ptr = const_cast<Value *>(Loc.Ptr);
 657     AST.add(adjustLoc(Loc));
 658     Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
 659     if (IsReadOnly)
 660       ReadOnlyPtr.insert(Ptr);
 661   }
 662
 663   /// Register a store.
 664   void addStore(const MemoryLocation &Loc, Type *AccessTy) {
 665     Value *Ptr = const_cast<Value *>(Loc.Ptr);
 666     AST.add(adjustLoc(Loc));
 667     Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
 668   }
 669
 670   /// Check if we can emit a run-time no-alias check for \p Access.
 671   ///
 672   /// Returns true if we can emit a run-time no alias check for \p Access.
 673   /// If we can check this access, this also adds it to a dependence set and
 674   /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
 675   /// we will attempt to use additional run-time checks in order to get
 676   /// the bounds of the pointer.
 677   bool createCheckForAccess(RuntimePointerChecking &RtCheck,
 678                             MemAccessInfo Access, Type *AccessTy,
 679                             const DenseMap<Value *, const SCEV *> &Strides,
 680                             DenseMap<Value *, unsigned> &DepSetId,
 681                             Loop *TheLoop, unsigned &RunningDepId,
 682                             unsigned ASId, bool ShouldCheckStride, bool Assume);
 683
 684   /// Check whether we can check the pointers at runtime for
 685   /// non-intersection.
 686   ///
 687   /// Returns true if we need no check or if we do and we can generate them
 688   /// (i.e. the pointers have computable bounds).
 689   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
 690                        Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides,
 691                        Value *&UncomputablePtr, bool ShouldCheckWrap = false);
 692
 693   /// Goes over all memory accesses, checks whether a RT check is needed
 694   /// and builds sets of dependent accesses.
 695   void buildDependenceSets() {
 696     processMemAccesses();
 697   }
 698
 699   /// Initial processing of memory accesses determined that we need to
 700   /// perform dependency checking.
 701   ///
 702   /// Note that this can later be cleared if we retry memcheck analysis without
 703   /// dependency checking (i.e. FoundNonConstantDistanceDependence).
 704   bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
 705
 706   /// We decided that no dependence analysis would be used.  Reset the state.
 707   void resetDepChecks(MemoryDepChecker &DepChecker) {
 708     CheckDeps.clear();
 709     DepChecker.clearDependences();
 710   }
 711
 712   const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }
 713
 714 private:
 715   typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
 716
 717   /// Adjust the MemoryLocation so that it represents accesses to this
 718   /// location across all iterations, rather than a single one.
 719   MemoryLocation adjustLoc(MemoryLocation Loc) const {
 720     // The accessed location varies within the loop, but remains within the
 721     // underlying object.
 722     Loc.Size = LocationSize::beforeOrAfterPointer();
 723     Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
 724     Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
 725     return Loc;
 726   }
 727
 728   /// Drop alias scopes that are only valid within a single loop iteration.
 729   MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
 730     if (!ScopeList)
 731       return nullptr;
 732
 733     // For the sake of simplicity, drop the whole scope list if any scope is
 734     // iteration-local.
 735     if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
 736           return LoopAliasScopes.contains(cast<MDNode>(Scope));
 737         }))
 738       return nullptr;
 739
 740     return ScopeList;
 741   }
 742
 743   /// Go over all memory access and check whether runtime pointer checks
 744   /// are needed and build sets of dependency check candidates.
 745   void processMemAccesses();
 746
 747   /// Map of all accesses. Values are the types used to access memory pointed to
 748   /// by the pointer.
 749   PtrAccessMap Accesses;
 750
 751   /// The loop being checked.
 752   const Loop *TheLoop;
 753
 754   /// List of accesses that need a further dependence check.
 755   MemAccessInfoList CheckDeps;
 756
 757   /// Set of pointers that are read only.
 758   SmallPtrSet<Value*, 16> ReadOnlyPtr;
 759
 760   /// Batched alias analysis results.
 761   BatchAAResults BAA;
 762
 763   /// An alias set tracker to partition the access set by underlying object and
 764   //intrinsic property (such as TBAA metadata).
 765   AliasSetTracker AST;
 766
 767   /// The LoopInfo of the loop being checked.
 768   const LoopInfo *LI;
 769
 770   /// Sets of potentially dependent accesses - members of one set share an
 771   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
 772   /// dependence check.
 773   MemoryDepChecker::DepCandidates &DepCands;
 774
 775   /// Initial processing of memory accesses determined that we may need
 776   /// to add memchecks.  Perform the analysis to determine the necessary checks.
 777   ///
 778   /// Note that, this is different from isDependencyCheckNeeded.  When we retry
 779   /// memcheck analysis without dependency checking
 780   /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
 781   /// cleared while this remains set if we have potentially dependent accesses.
 782   bool IsRTCheckAnalysisNeeded = false;
 783
 784   /// The SCEV predicate containing all the SCEV-related assumptions.
 785   PredicatedScalarEvolution &PSE;
 786
 787   DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
 788
 789   /// Alias scopes that are declared inside the loop, and as such not valid
 790   /// across iterations.
 791   SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
 792 };
 793
 794 } // end anonymous namespace
 795
 796 /// Check whether a pointer can participate in a runtime bounds check.
 797 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
 798 /// by adding run-time checks (overflow checks) if necessary.
 799 static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
 800                                 const SCEV *PtrScev, Loop *L, bool Assume) {
 801   // The bounds for loop-invariant pointer is trivial.
 802   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
 803     return true;
 804
 805   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
 806
 807   if (!AR && Assume)
 808     AR = PSE.getAsAddRec(Ptr);
 809
 810   if (!AR)
 811     return false;
 812
 813   return AR->isAffine();
 814 }
 815
 816 /// Check whether a pointer address cannot wrap.
 817 static bool isNoWrap(PredicatedScalarEvolution &PSE,
 818                      const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
 819                      Type *AccessTy, Loop *L, bool Assume) {
 820   const SCEV *PtrScev = PSE.getSCEV(Ptr);
 821   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
 822     return true;
 823
 824   return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
 825          PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
 826 }
 827
 828 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
 829                           function_ref<void(Value *)> AddPointer) {
 830   SmallPtrSet<Value *, 8> Visited;
 831   SmallVector<Value *> WorkList;
 832   WorkList.push_back(StartPtr);
 833
 834   while (!WorkList.empty()) {
 835     Value *Ptr = WorkList.pop_back_val();
 836     if (!Visited.insert(Ptr).second)
 837       continue;
 838     auto *PN = dyn_cast<PHINode>(Ptr);
 839     // SCEV does not look through non-header PHIs inside the loop. Such phis
 840     // can be analyzed by adding separate accesses for each incoming pointer
 841     // value.
 842     if (PN && InnermostLoop.contains(PN->getParent()) &&
 843         PN->getParent() != InnermostLoop.getHeader()) {
 844       for (const Use &Inc : PN->incoming_values())
 845         WorkList.push_back(Inc);
 846     } else
 847       AddPointer(Ptr);
 848   }
 849 }
 850
 851 // Walk back through the IR for a pointer, looking for a select like the
 852 // following:
 853 //
 854 //  %offset = select i1 %cmp, i64 %a, i64 %b
 855 //  %addr = getelementptr double, double* %base, i64 %offset
 856 //  %ld = load double, double* %addr, align 8
 857 //
 858 // We won't be able to form a single SCEVAddRecExpr from this since the
 859 // address for each loop iteration depends on %cmp. We could potentially
 860 // produce multiple valid SCEVAddRecExprs, though, and check all of them for
 861 // memory safety/aliasing if needed.
 862 //
 863 // If we encounter some IR we don't yet handle, or something obviously fine
 864 // like a constant, then we just add the SCEV for that term to the list passed
 865 // in by the caller. If we have a node that may potentially yield a valid
 866 // SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
 867 // ourselves before adding to the list.
 868 static void findForkedSCEVs(
 869     ScalarEvolution *SE, const Loop *L, Value *Ptr,
 870     SmallVectorImpl<PointerIntPair<const SCEV *, 1, bool>> &ScevList,
 871     unsigned Depth) {
 872   // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
 873   // we've exceeded our limit on recursion, just return whatever we have
 874   // regardless of whether it can be used for a forked pointer or not, along
 875   // with an indication of whether it might be a poison or undef value.
 876   const SCEV *Scev = SE->getSCEV(Ptr);
 877   if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
 878       !isa<Instruction>(Ptr) || Depth == 0) {
 879     ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
 880     return;
 881   }
 882
 883   Depth--;
 884
 885   auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
 886     return get<1>(S);
 887   };
 888
 889   auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
 890     switch (Opcode) {
 891     case Instruction::Add:
 892       return SE->getAddExpr(L, R);
 893     case Instruction::Sub:
 894       return SE->getMinusSCEV(L, R);
 895     default:
 896       llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
 897     }
 898   };
 899
 900   Instruction *I = cast<Instruction>(Ptr);
 901   unsigned Opcode = I->getOpcode();
 902   switch (Opcode) {
 903   case Instruction::GetElementPtr: {
 904     auto *GEP = cast<GetElementPtrInst>(I);
 905     Type *SourceTy = GEP->getSourceElementType();
 906     // We only handle base + single offset GEPs here for now.
 907     // Not dealing with preexisting gathers yet, so no vectors.
 908     if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
 909       ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
 910       break;
 911     }
 912     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> BaseScevs;
 913     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> OffsetScevs;
 914     findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
 915     findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
 916
 917     // See if we need to freeze our fork...
 918     bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
 919                        any_of(OffsetScevs, UndefPoisonCheck);
 920
 921     // Check that we only have a single fork, on either the base or the offset.
 922     // Copy the SCEV across for the one without a fork in order to generate
 923     // the full SCEV for both sides of the GEP.
 924     if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
 925       BaseScevs.push_back(BaseScevs[0]);
 926     else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
 927       OffsetScevs.push_back(OffsetScevs[0]);
 928     else {
 929       ScevList.emplace_back(Scev, NeedsFreeze);
 930       break;
 931     }
 932
 933     // Find the pointer type we need to extend to.
 934     Type *IntPtrTy = SE->getEffectiveSCEVType(
 935         SE->getSCEV(GEP->getPointerOperand())->getType());
 936
 937     // Find the size of the type being pointed to. We only have a single
 938     // index term (guarded above) so we don't need to index into arrays or
 939     // structures, just get the size of the scalar value.
 940     const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
 941
 942     // Scale up the offsets by the size of the type, then add to the bases.
 943     const SCEV *Scaled1 = SE->getMulExpr(
 944         Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[0]), IntPtrTy));
 945     const SCEV *Scaled2 = SE->getMulExpr(
 946         Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[1]), IntPtrTy));
 947     ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[0]), Scaled1),
 948                           NeedsFreeze);
 949     ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[1]), Scaled2),
 950                           NeedsFreeze);
 951     break;
 952   }
 953   case Instruction::Select: {
 954     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs;
 955     // A select means we've found a forked pointer, but we currently only
 956     // support a single select per pointer so if there's another behind this
 957     // then we just bail out and return the generic SCEV.
 958     findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
 959     findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
 960     if (ChildScevs.size() == 2) {
 961       ScevList.push_back(ChildScevs[0]);
 962       ScevList.push_back(ChildScevs[1]);
 963     } else
 964       ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
 965     break;
 966   }
 967   case Instruction::PHI: {
 968     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs;
 969     // A phi means we've found a forked pointer, but we currently only
 970     // support a single phi per pointer so if there's another behind this
 971     // then we just bail out and return the generic SCEV.
 972     if (I->getNumOperands() == 2) {
 973       findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
 974       findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
 975     }
 976     if (ChildScevs.size() == 2) {
 977       ScevList.push_back(ChildScevs[0]);
 978       ScevList.push_back(ChildScevs[1]);
 979     } else
 980       ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
 981     break;
 982   }
 983   case Instruction::Add:
 984   case Instruction::Sub: {
 985     SmallVector<PointerIntPair<const SCEV *, 1, bool>> LScevs;
 986     SmallVector<PointerIntPair<const SCEV *, 1, bool>> RScevs;
 987     findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
 988     findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
 989
 990     // See if we need to freeze our fork...
 991     bool NeedsFreeze =
 992         any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
 993
 994     // Check that we only have a single fork, on either the left or right side.
 995     // Copy the SCEV across for the one without a fork in order to generate
 996     // the full SCEV for both sides of the BinOp.
 997     if (LScevs.size() == 2 && RScevs.size() == 1)
 998       RScevs.push_back(RScevs[0]);
 999     else if (RScevs.size() == 2 && LScevs.size() == 1)
1000       LScevs.push_back(LScevs[0]);
1001     else {
1002       ScevList.emplace_back(Scev, NeedsFreeze);
1003       break;
1004     }
1005
1006     ScevList.emplace_back(
1007         GetBinOpExpr(Opcode, get<0>(LScevs[0]), get<0>(RScevs[0])),
1008         NeedsFreeze);
1009     ScevList.emplace_back(
1010         GetBinOpExpr(Opcode, get<0>(LScevs[1]), get<0>(RScevs[1])),
1011         NeedsFreeze);
1012     break;
1013   }
1014   default:
1015     // Just return the current SCEV if we haven't handled the instruction yet.
1016     LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1017     ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1018     break;
1019   }
1020 }
1021
1022 static SmallVector<PointerIntPair<const SCEV *, 1, bool>>
1023 findForkedPointer(PredicatedScalarEvolution &PSE,
1024                   const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr,
1025                   const Loop *L) {
1026   ScalarEvolution *SE = PSE.getSE();
1027   assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1028   SmallVector<PointerIntPair<const SCEV *, 1, bool>> Scevs;
1029   findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
1030
1031   // For now, we will only accept a forked pointer with two possible SCEVs
1032   // that are either SCEVAddRecExprs or loop invariant.
1033   if (Scevs.size() == 2 &&
1034       (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
1035        SE->isLoopInvariant(get<0>(Scevs[0]), L)) &&
1036       (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
1037        SE->isLoopInvariant(get<0>(Scevs[1]), L))) {
1038     LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
1039     LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n");
1040     LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n");
1041     return Scevs;
1042   }
1043
1044   return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
1045 }
1046
1047 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
1048                                           MemAccessInfo Access, Type *AccessTy,
1049                                           const DenseMap<Value *, const SCEV *> &StridesMap,
1050                                           DenseMap<Value *, unsigned> &DepSetId,
1051                                           Loop *TheLoop, unsigned &RunningDepId,
1052                                           unsigned ASId, bool ShouldCheckWrap,
1053                                           bool Assume) {
1054   Value *Ptr = Access.getPointer();
1055
1056   SmallVector<PointerIntPair<const SCEV *, 1, bool>> TranslatedPtrs =
1057       findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
1058
1059   for (const auto &P : TranslatedPtrs) {
1060     const SCEV *PtrExpr = get<0>(P);
1061     if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
1062       return false;
1063
1064     // When we run after a failing dependency check we have to make sure
1065     // we don't have wrapping pointers.
1066     if (ShouldCheckWrap) {
1067       // Skip wrap checking when translating pointers.
1068       if (TranslatedPtrs.size() > 1)
1069         return false;
1070
1071       if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop, Assume))
1072         return false;
1073     }
1074     // If there's only one option for Ptr, look it up after bounds and wrap
1075     // checking, because assumptions might have been added to PSE.
1076     if (TranslatedPtrs.size() == 1)
1077       TranslatedPtrs[0] = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr),
1078                            false};
1079   }
1080
1081   for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
1082     // The id of the dependence set.
1083     unsigned DepId;
1084
1085     if (isDependencyCheckNeeded()) {
1086       Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1087       unsigned &LeaderId = DepSetId[Leader];
1088       if (!LeaderId)
1089         LeaderId = RunningDepId++;
1090       DepId = LeaderId;
1091     } else
1092       // Each access has its own dependence set.
1093       DepId = RunningDepId++;
1094
1095     bool IsWrite = Access.getInt();
1096     RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1097                    NeedsFreeze);
1098     LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1099   }
1100
1101   return true;
1102 }
1103
1104 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
1105                                      ScalarEvolution *SE, Loop *TheLoop,
1106                                      const DenseMap<Value *, const SCEV *> &StridesMap,
1107                                      Value *&UncomputablePtr, bool ShouldCheckWrap) {
1108   // Find pointers with computable bounds. We are going to use this information
1109   // to place a runtime bound check.
1110   bool CanDoRT = true;
1111
1112   bool MayNeedRTCheck = false;
1113   if (!IsRTCheckAnalysisNeeded) return true;
1114
1115   bool IsDepCheckNeeded = isDependencyCheckNeeded();
1116
1117   // We assign a consecutive id to access from different alias sets.
1118   // Accesses between different groups doesn't need to be checked.
1119   unsigned ASId = 0;
1120   for (const auto &AS : AST) {
1121     int NumReadPtrChecks = 0;
1122     int NumWritePtrChecks = 0;
1123     bool CanDoAliasSetRT = true;
1124     ++ASId;
1125     auto ASPointers = AS.getPointers();
1126
1127     // We assign consecutive id to access from different dependence sets.
1128     // Accesses within the same set don't need a runtime check.
1129     unsigned RunningDepId = 1;
1130     DenseMap<Value *, unsigned> DepSetId;
1131
1132     SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries;
1133
1134     // First, count how many write and read accesses are in the alias set. Also
1135     // collect MemAccessInfos for later.
1136     SmallVector<MemAccessInfo, 4> AccessInfos;
1137     for (const Value *ConstPtr : ASPointers) {
1138       Value *Ptr = const_cast<Value *>(ConstPtr);
1139       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
1140       if (IsWrite)
1141         ++NumWritePtrChecks;
1142       else
1143         ++NumReadPtrChecks;
1144       AccessInfos.emplace_back(Ptr, IsWrite);
1145     }
1146
1147     // We do not need runtime checks for this alias set, if there are no writes
1148     // or a single write and no reads.
1149     if (NumWritePtrChecks == 0 ||
1150         (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1151       assert((ASPointers.size() <= 1 ||
1152               all_of(ASPointers,
1153                      [this](const Value *Ptr) {
1154                        MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1155                                                  true);
1156                        return DepCands.findValue(AccessWrite) == DepCands.end();
1157                      })) &&
1158              "Can only skip updating CanDoRT below, if all entries in AS "
1159              "are reads or there is at most 1 entry");
1160       continue;
1161     }
1162
1163     for (auto &Access : AccessInfos) {
1164       for (const auto &AccessTy : Accesses[Access]) {
1165         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1166                                   DepSetId, TheLoop, RunningDepId, ASId,
1167                                   ShouldCheckWrap, false)) {
1168           LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1169                             << *Access.getPointer() << '\n');
1170           Retries.emplace_back(Access, AccessTy);
1171           CanDoAliasSetRT = false;
1172         }
1173       }
1174     }
1175
1176     // Note that this function computes CanDoRT and MayNeedRTCheck
1177     // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1178     // we have a pointer for which we couldn't find the bounds but we don't
1179     // actually need to emit any checks so it does not matter.
1180     //
1181     // We need runtime checks for this alias set, if there are at least 2
1182     // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1183     // any bound checks (because in that case the number of dependence sets is
1184     // incomplete).
1185     bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1186
1187     // We need to perform run-time alias checks, but some pointers had bounds
1188     // that couldn't be checked.
1189     if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1190       // Reset the CanDoSetRt flag and retry all accesses that have failed.
1191       // We know that we need these checks, so we can now be more aggressive
1192       // and add further checks if required (overflow checks).
1193       CanDoAliasSetRT = true;
1194       for (const auto &[Access, AccessTy] : Retries) {
1195         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1196                                   DepSetId, TheLoop, RunningDepId, ASId,
1197                                   ShouldCheckWrap, /*Assume=*/true)) {
1198           CanDoAliasSetRT = false;
1199           UncomputablePtr = Access.getPointer();
1200           break;
1201         }
1202       }
1203     }
1204
1205     CanDoRT &= CanDoAliasSetRT;
1206     MayNeedRTCheck |= NeedsAliasSetRTCheck;
1207     ++ASId;
1208   }
1209
1210   // If the pointers that we would use for the bounds comparison have different
1211   // address spaces, assume the values aren't directly comparable, so we can't
1212   // use them for the runtime check. We also have to assume they could
1213   // overlap. In the future there should be metadata for whether address spaces
1214   // are disjoint.
1215   unsigned NumPointers = RtCheck.Pointers.size();
1216   for (unsigned i = 0; i < NumPointers; ++i) {
1217     for (unsigned j = i + 1; j < NumPointers; ++j) {
1218       // Only need to check pointers between two different dependency sets.
1219       if (RtCheck.Pointers[i].DependencySetId ==
1220           RtCheck.Pointers[j].DependencySetId)
1221        continue;
1222       // Only need to check pointers in the same alias set.
1223       if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1224         continue;
1225
1226       Value *PtrI = RtCheck.Pointers[i].PointerValue;
1227       Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1228
1229       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1230       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1231       if (ASi != ASj) {
1232         LLVM_DEBUG(
1233             dbgs() << "LAA: Runtime check would require comparison between"
1234                       " different address spaces\n");
1235         return false;
1236       }
1237     }
1238   }
1239
1240   if (MayNeedRTCheck && CanDoRT)
1241     RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
1242
1243   LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1244                     << " pointer comparisons.\n");
1245
1246   // If we can do run-time checks, but there are no checks, no runtime checks
1247   // are needed. This can happen when all pointers point to the same underlying
1248   // object for example.
1249   RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1250
1251   bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1252   if (!CanDoRTIfNeeded)
1253     RtCheck.reset();
1254   return CanDoRTIfNeeded;
1255 }
1256
1257 void AccessAnalysis::processMemAccesses() {
1258   // We process the set twice: first we process read-write pointers, last we
1259   // process read-only pointers. This allows us to skip dependence tests for
1260   // read-only pointers.
1261
1262   LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1263   LLVM_DEBUG(dbgs() << "  AST: "; AST.dump());
1264   LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
1265   LLVM_DEBUG({
1266     for (const auto &[A, _] : Accesses)
1267       dbgs() << "\t" << *A.getPointer() << " ("
1268              << (A.getInt() ? "write"
1269                             : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
1270                                                                  : "read"))
1271              << ")\n";
1272   });
1273
1274   // The AliasSetTracker has nicely partitioned our pointers by metadata
1275   // compatibility and potential for underlying-object overlap. As a result, we
1276   // only need to check for potential pointer dependencies within each alias
1277   // set.
1278   for (const auto &AS : AST) {
1279     // Note that both the alias-set tracker and the alias sets themselves used
1280     // ordered collections internally and so the iteration order here is
1281     // deterministic.
1282     auto ASPointers = AS.getPointers();
1283
1284     bool SetHasWrite = false;
1285
1286     // Map of pointers to last access encountered.
1287     typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
1288     UnderlyingObjToAccessMap ObjToLastAccess;
1289
1290     // Set of access to check after all writes have been processed.
1291     PtrAccessMap DeferredAccesses;
1292
1293     // Iterate over each alias set twice, once to process read/write pointers,
1294     // and then to process read-only pointers.
1295     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1296       bool UseDeferred = SetIteration > 0;
1297       PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1298
1299       for (const Value *ConstPtr : ASPointers) {
1300         Value *Ptr = const_cast<Value *>(ConstPtr);
1301
1302         // For a single memory access in AliasSetTracker, Accesses may contain
1303         // both read and write, and they both need to be handled for CheckDeps.
1304         for (const auto &[AC, _] : S) {
1305           if (AC.getPointer() != Ptr)
1306             continue;
1307
1308           bool IsWrite = AC.getInt();
1309
1310           // If we're using the deferred access set, then it contains only
1311           // reads.
1312           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
1313           if (UseDeferred && !IsReadOnlyPtr)
1314             continue;
1315           // Otherwise, the pointer must be in the PtrAccessSet, either as a
1316           // read or a write.
1317           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1318                   S.count(MemAccessInfo(Ptr, false))) &&
1319                  "Alias-set pointer not in the access set?");
1320
1321           MemAccessInfo Access(Ptr, IsWrite);
1322           DepCands.insert(Access);
1323
1324           // Memorize read-only pointers for later processing and skip them in
1325           // the first round (they need to be checked after we have seen all
1326           // write pointers). Note: we also mark pointer that are not
1327           // consecutive as "read-only" pointers (so that we check
1328           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1329           if (!UseDeferred && IsReadOnlyPtr) {
1330             // We only use the pointer keys, the types vector values don't
1331             // matter.
1332             DeferredAccesses.insert({Access, {}});
1333             continue;
1334           }
1335
1336           // If this is a write - check other reads and writes for conflicts. If
1337           // this is a read only check other writes for conflicts (but only if
1338           // there is no other write to the ptr - this is an optimization to
1339           // catch "a[i] = a[i] + " without having to do a dependence check).
1340           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1341             CheckDeps.push_back(Access);
1342             IsRTCheckAnalysisNeeded = true;
1343           }
1344
1345           if (IsWrite)
1346             SetHasWrite = true;
1347
1348           // Create sets of pointers connected by a shared alias set and
1349           // underlying object.
1350           typedef SmallVector<const Value *, 16> ValueVector;
1351           ValueVector TempObjects;
1352
1353           UnderlyingObjects[Ptr] = {};
1354           SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
1355           ::getUnderlyingObjects(Ptr, UOs, LI);
1356           LLVM_DEBUG(dbgs()
1357                      << "Underlying objects for pointer " << *Ptr << "\n");
1358           for (const Value *UnderlyingObj : UOs) {
1359             // nullptr never alias, don't join sets for pointer that have "null"
1360             // in their UnderlyingObjects list.
1361             if (isa<ConstantPointerNull>(UnderlyingObj) &&
1362                 !NullPointerIsDefined(
1363                     TheLoop->getHeader()->getParent(),
1364                     UnderlyingObj->getType()->getPointerAddressSpace()))
1365               continue;
1366
1367             UnderlyingObjToAccessMap::iterator Prev =
1368                 ObjToLastAccess.find(UnderlyingObj);
1369             if (Prev != ObjToLastAccess.end())
1370               DepCands.unionSets(Access, Prev->second);
1371
1372             ObjToLastAccess[UnderlyingObj] = Access;
1373             LLVM_DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
1374           }
1375         }
1376       }
1377     }
1378   }
1379 }
1380
1381 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1382 /// i.e. monotonically increasing/decreasing.
1383 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
1384                            PredicatedScalarEvolution &PSE, const Loop *L) {
1385
1386   // FIXME: This should probably only return true for NUW.
1387   if (AR->getNoWrapFlags(SCEV::NoWrapMask))
1388     return true;
1389
1390   if (PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
1391     return true;
1392
1393   // Scalar evolution does not propagate the non-wrapping flags to values that
1394   // are derived from a non-wrapping induction variable because non-wrapping
1395   // could be flow-sensitive.
1396   //
1397   // Look through the potentially overflowing instruction to try to prove
1398   // non-wrapping for the *specific* value of Ptr.
1399
1400   // The arithmetic implied by an nusw GEP can't overflow.
1401   const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1402   if (!GEP || !GEP->hasNoUnsignedSignedWrap())
1403     return false;
1404
1405   // Make sure there is only one non-const index and analyze that.
1406   Value *NonConstIndex = nullptr;
1407   for (Value *Index : GEP->indices())
1408     if (!isa<ConstantInt>(Index)) {
1409       if (NonConstIndex)
1410         return false;
1411       NonConstIndex = Index;
1412     }
1413   if (!NonConstIndex)
1414     // The recurrence is on the pointer, ignore for now.
1415     return false;
1416
1417   // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
1418   // AddRec using a NSW operation.
1419   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1420     if (OBO->hasNoSignedWrap() &&
1421         // Assume constant for other the operand so that the AddRec can be
1422         // easily found.
1423         isa<ConstantInt>(OBO->getOperand(1))) {
1424       const SCEV *OpScev = PSE.getSCEV(OBO->getOperand(0));
1425
1426       if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1427         return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1428     }
1429
1430   return false;
1431 }
1432
1433 /// Check whether the access through \p Ptr has a constant stride.
1434 std::optional<int64_t>
1435 llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
1436                    const Loop *Lp,
1437                    const DenseMap<Value *, const SCEV *> &StridesMap,
1438                    bool Assume, bool ShouldCheckWrap) {
1439   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1440   if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
1441     return 0;
1442
1443   Type *Ty = Ptr->getType();
1444   assert(Ty->isPointerTy() && "Unexpected non-ptr");
1445   if (isa<ScalableVectorType>(AccessTy)) {
1446     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1447                       << "\n");
1448     return std::nullopt;
1449   }
1450
1451   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1452   if (Assume && !AR)
1453     AR = PSE.getAsAddRec(Ptr);
1454
1455   if (!AR) {
1456     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1457                       << " SCEV: " << *PtrScev << "\n");
1458     return std::nullopt;
1459   }
1460
1461   // The access function must stride over the innermost loop.
1462   if (Lp != AR->getLoop()) {
1463     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1464                       << *Ptr << " SCEV: " << *AR << "\n");
1465     return std::nullopt;
1466   }
1467
1468   // Check the step is constant.
1469   const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1470
1471   // Calculate the pointer stride and check if it is constant.
1472   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1473   if (!C) {
1474     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1475                       << " SCEV: " << *AR << "\n");
1476     return std::nullopt;
1477   }
1478
1479   const auto &DL = Lp->getHeader()->getDataLayout();
1480   TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1481   int64_t Size = AllocSize.getFixedValue();
1482   const APInt &APStepVal = C->getAPInt();
1483
1484   // Huge step value - give up.
1485   if (APStepVal.getBitWidth() > 64)
1486     return std::nullopt;
1487
1488   int64_t StepVal = APStepVal.getSExtValue();
1489
1490   // Strided access.
1491   int64_t Stride = StepVal / Size;
1492   int64_t Rem = StepVal % Size;
1493   if (Rem)
1494     return std::nullopt;
1495
1496   if (!ShouldCheckWrap)
1497     return Stride;
1498
1499   // The address calculation must not wrap. Otherwise, a dependence could be
1500   // inverted.
1501   if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
1502     return Stride;
1503
1504   // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1505   // the distance between the previously accessed location and the wrapped
1506   // location will be larger than half the pointer index type space. In that
1507   // case, the GEP would be  poison and any memory access dependent on it would
1508   // be immediate UB when executed.
1509   if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1510       GEP && GEP->hasNoUnsignedSignedWrap())
1511     return Stride;
1512
1513   // If the null pointer is undefined, then a access sequence which would
1514   // otherwise access it can be assumed not to unsigned wrap.  Note that this
1515   // assumes the object in memory is aligned to the natural alignment.
1516   unsigned AddrSpace = Ty->getPointerAddressSpace();
1517   if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
1518       (Stride == 1 || Stride == -1))
1519     return Stride;
1520
1521   if (Assume) {
1522     PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1523     LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1524                       << "LAA:   Pointer: " << *Ptr << "\n"
1525                       << "LAA:   SCEV: " << *AR << "\n"
1526                       << "LAA:   Added an overflow assumption\n");
1527     return Stride;
1528   }
1529   LLVM_DEBUG(
1530       dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1531              << *Ptr << " SCEV: " << *AR << "\n");
1532   return std::nullopt;
1533 }
1534
1535 std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
1536                                          Type *ElemTyB, Value *PtrB,
1537                                          const DataLayout &DL,
1538                                          ScalarEvolution &SE, bool StrictCheck,
1539                                          bool CheckType) {
1540   assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1541
1542   // Make sure that A and B are different pointers.
1543   if (PtrA == PtrB)
1544     return 0;
1545
1546   // Make sure that the element types are the same if required.
1547   if (CheckType && ElemTyA != ElemTyB)
1548     return std::nullopt;
1549
1550   unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1551   unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1552
1553   // Check that the address spaces match.
1554   if (ASA != ASB)
1555     return std::nullopt;
1556   unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1557
1558   APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1559   const Value *PtrA1 = PtrA->stripAndAccumulateConstantOffsets(
1560       DL, OffsetA, /*AllowNonInbounds=*/true);
1561   const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
1562       DL, OffsetB, /*AllowNonInbounds=*/true);
1563
1564   int Val;
1565   if (PtrA1 == PtrB1) {
1566     // Retrieve the address space again as pointer stripping now tracks through
1567     // `addrspacecast`.
1568     ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1569     ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1570     // Check that the address spaces match and that the pointers are valid.
1571     if (ASA != ASB)
1572       return std::nullopt;
1573
1574     IdxWidth = DL.getIndexSizeInBits(ASA);
1575     OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1576     OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1577
1578     OffsetB -= OffsetA;
1579     Val = OffsetB.getSExtValue();
1580   } else {
1581     // Otherwise compute the distance with SCEV between the base pointers.
1582     const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1583     const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1584     std::optional<APInt> Diff =
1585         SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
1586     if (!Diff)
1587       return std::nullopt;
1588     Val = Diff->getSExtValue();
1589   }
1590   int Size = DL.getTypeStoreSize(ElemTyA);
1591   int Dist = Val / Size;
1592
1593   // Ensure that the calculated distance matches the type-based one after all
1594   // the bitcasts removal in the provided pointers.
1595   if (!StrictCheck || Dist * Size == Val)
1596     return Dist;
1597   return std::nullopt;
1598 }
1599
1600 bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
1601                            const DataLayout &DL, ScalarEvolution &SE,
1602                            SmallVectorImpl<unsigned> &SortedIndices) {
1603   assert(llvm::all_of(
1604              VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1605          "Expected list of pointer operands.");
1606   // Walk over the pointers, and map each of them to an offset relative to
1607   // first pointer in the array.
1608   Value *Ptr0 = VL[0];
1609
1610   using DistOrdPair = std::pair<int64_t, int>;
1611   auto Compare = llvm::less_first();
1612   std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1613   Offsets.emplace(0, 0);
1614   bool IsConsecutive = true;
1615   for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
1616     std::optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1617                                               /*StrictCheck=*/true);
1618     if (!Diff)
1619       return false;
1620
1621     // Check if the pointer with the same offset is found.
1622     int64_t Offset = *Diff;
1623     auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1624     if (!IsInserted)
1625       return false;
1626     // Consecutive order if the inserted element is the last one.
1627     IsConsecutive &= std::next(It) == Offsets.end();
1628   }
1629   SortedIndices.clear();
1630   if (!IsConsecutive) {
1631     // Fill SortedIndices array only if it is non-consecutive.
1632     SortedIndices.resize(VL.size());
1633     for (auto [Idx, Off] : enumerate(Offsets))
1634       SortedIndices[Idx] = Off.second;
1635   }
1636   return true;
1637 }
1638
1639 /// Returns true if the memory operations \p A and \p B are consecutive.
1640 bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1641                                ScalarEvolution &SE, bool CheckType) {
1642   Value *PtrA = getLoadStorePointerOperand(A);
1643   Value *PtrB = getLoadStorePointerOperand(B);
1644   if (!PtrA || !PtrB)
1645     return false;
1646   Type *ElemTyA = getLoadStoreType(A);
1647   Type *ElemTyB = getLoadStoreType(B);
1648   std::optional<int> Diff =
1649       getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1650                       /*StrictCheck=*/true, CheckType);
1651   return Diff && *Diff == 1;
1652 }
1653
1654 void MemoryDepChecker::addAccess(StoreInst *SI) {
1655   visitPointers(SI->getPointerOperand(), *InnermostLoop,
1656                 [this, SI](Value *Ptr) {
1657                   Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1658                   InstMap.push_back(SI);
1659                   ++AccessIdx;
1660                 });
1661 }
1662
1663 void MemoryDepChecker::addAccess(LoadInst *LI) {
1664   visitPointers(LI->getPointerOperand(), *InnermostLoop,
1665                 [this, LI](Value *Ptr) {
1666                   Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1667                   InstMap.push_back(LI);
1668                   ++AccessIdx;
1669                 });
1670 }
1671
1672 MemoryDepChecker::VectorizationSafetyStatus
1673 MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1674   switch (Type) {
1675   case NoDep:
1676   case Forward:
1677   case BackwardVectorizable:
1678     return VectorizationSafetyStatus::Safe;
1679
1680   case Unknown:
1681     return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1682   case ForwardButPreventsForwarding:
1683   case Backward:
1684   case BackwardVectorizableButPreventsForwarding:
1685   case IndirectUnsafe:
1686     return VectorizationSafetyStatus::Unsafe;
1687   }
1688   llvm_unreachable("unexpected DepType!");
1689 }
1690
1691 bool MemoryDepChecker::Dependence::isBackward() const {
1692   switch (Type) {
1693   case NoDep:
1694   case Forward:
1695   case ForwardButPreventsForwarding:
1696   case Unknown:
1697   case IndirectUnsafe:
1698     return false;
1699
1700   case BackwardVectorizable:
1701   case Backward:
1702   case BackwardVectorizableButPreventsForwarding:
1703     return true;
1704   }
1705   llvm_unreachable("unexpected DepType!");
1706 }
1707
1708 bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1709   return isBackward() || Type == Unknown || Type == IndirectUnsafe;
1710 }
1711
1712 bool MemoryDepChecker::Dependence::isForward() const {
1713   switch (Type) {
1714   case Forward:
1715   case ForwardButPreventsForwarding:
1716     return true;
1717
1718   case NoDep:
1719   case Unknown:
1720   case BackwardVectorizable:
1721   case Backward:
1722   case BackwardVectorizableButPreventsForwarding:
1723   case IndirectUnsafe:
1724     return false;
1725   }
1726   llvm_unreachable("unexpected DepType!");
1727 }
1728
1729 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1730                                                     uint64_t TypeByteSize) {
1731   // If loads occur at a distance that is not a multiple of a feasible vector
1732   // factor store-load forwarding does not take place.
1733   // Positive dependences might cause troubles because vectorizing them might
1734   // prevent store-load forwarding making vectorized code run a lot slower.
1735   //   a[i] = a[i-3] ^ a[i-8];
1736   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1737   //   hence on your typical architecture store-load forwarding does not take
1738   //   place. Vectorizing in such cases does not make sense.
1739   // Store-load forwarding distance.
1740
1741   // After this many iterations store-to-load forwarding conflicts should not
1742   // cause any slowdowns.
1743   const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1744   // Maximum vector factor.
1745   uint64_t MaxVFWithoutSLForwardIssues = std::min(
1746       VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
1747
1748   // Compute the smallest VF at which the store and load would be misaligned.
1749   for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1750        VF *= 2) {
1751     // If the number of vector iteration between the store and the load are
1752     // small we could incur conflicts.
1753     if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1754       MaxVFWithoutSLForwardIssues = (VF >> 1);
1755       break;
1756     }
1757   }
1758
1759   if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1760     LLVM_DEBUG(
1761         dbgs() << "LAA: Distance " << Distance
1762                << " that could cause a store-load forwarding conflict\n");
1763     return true;
1764   }
1765
1766   if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
1767       MaxVFWithoutSLForwardIssues !=
1768           VectorizerParams::MaxVectorWidth * TypeByteSize)
1769     MinDepDistBytes = MaxVFWithoutSLForwardIssues;
1770   return false;
1771 }
1772
1773 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1774   if (Status < S)
1775     Status = S;
1776 }
1777
1778 /// Given a dependence-distance \p Dist between two
1779 /// memory accesses, that have strides in the same direction whose absolute
1780 /// value of the maximum stride is given in \p MaxStride, and that have the same
1781 /// type size \p TypeByteSize, in a loop whose maximum backedge taken count is
1782 /// \p MaxBTC, check if it is possible to prove statically that the dependence
1783 /// distance is larger than the range that the accesses will travel through the
1784 /// execution of the loop. If so, return true; false otherwise. This is useful
1785 /// for example in loops such as the following (PR31098):
1786 ///     for (i = 0; i < D; ++i) {
1787 ///                = out[i];
1788 ///       out[i+D] =
1789 ///     }
1790 static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1791                                      const SCEV &MaxBTC, const SCEV &Dist,
1792                                      uint64_t MaxStride,
1793                                      uint64_t TypeByteSize) {
1794
1795   // If we can prove that
1796   //      (**) |Dist| > MaxBTC * Step
1797   // where Step is the absolute stride of the memory accesses in bytes,
1798   // then there is no dependence.
1799   //
1800   // Rationale:
1801   // We basically want to check if the absolute distance (|Dist/Step|)
1802   // is >= the loop iteration count (or > MaxBTC).
1803   // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1804   // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1805   // that the dependence distance is >= VF; This is checked elsewhere.
1806   // But in some cases we can prune dependence distances early, and
1807   // even before selecting the VF, and without a runtime test, by comparing
1808   // the distance against the loop iteration count. Since the vectorized code
1809   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1810   // also guarantees that distance >= VF.
1811   //
1812   const uint64_t ByteStride = MaxStride * TypeByteSize;
1813   const SCEV *Step = SE.getConstant(MaxBTC.getType(), ByteStride);
1814   const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
1815
1816   const SCEV *CastedDist = &Dist;
1817   const SCEV *CastedProduct = Product;
1818   uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1819   uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1820
1821   // The dependence distance can be positive/negative, so we sign extend Dist;
1822   // The multiplication of the absolute stride in bytes and the
1823   // backedgeTakenCount is non-negative, so we zero extend Product.
1824   if (DistTypeSizeBits > ProductTypeSizeBits)
1825     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1826   else
1827     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1828
1829   // Is  Dist - (MaxBTC * Step) > 0 ?
1830   // (If so, then we have proven (**) because |Dist| >= Dist)
1831   const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1832   if (SE.isKnownPositive(Minus))
1833     return true;
1834
1835   // Second try: Is  -Dist - (MaxBTC * Step) > 0 ?
1836   // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1837   const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1838   Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1839   return SE.isKnownPositive(Minus);
1840 }
1841
1842 /// Check the dependence for two accesses with the same stride \p Stride.
1843 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1844 /// bytes.
1845 ///
1846 /// \returns true if they are independent.
1847 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1848                                           uint64_t TypeByteSize) {
1849   assert(Stride > 1 && "The stride must be greater than 1");
1850   assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1851   assert(Distance > 0 && "The distance must be non-zero");
1852
1853   // Skip if the distance is not multiple of type byte size.
1854   if (Distance % TypeByteSize)
1855     return false;
1856
1857   uint64_t ScaledDist = Distance / TypeByteSize;
1858
1859   // No dependence if the scaled distance is not multiple of the stride.
1860   // E.g.
1861   //      for (i = 0; i < 1024 ; i += 4)
1862   //        A[i+2] = A[i] + 1;
1863   //
1864   // Two accesses in memory (scaled distance is 2, stride is 4):
1865   //     | A[0] |      |      |      | A[4] |      |      |      |
1866   //     |      |      | A[2] |      |      |      | A[6] |      |
1867   //
1868   // E.g.
1869   //      for (i = 0; i < 1024 ; i += 3)
1870   //        A[i+4] = A[i] + 1;
1871   //
1872   // Two accesses in memory (scaled distance is 4, stride is 3):
1873   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
1874   //     |      |      |      |      | A[4] |      |      | A[7] |      |
1875   return ScaledDist % Stride;
1876 }
1877
1878 std::variant<MemoryDepChecker::Dependence::DepType,
1879              MemoryDepChecker::DepDistanceStrideAndSizeInfo>
1880 MemoryDepChecker::getDependenceDistanceStrideAndSize(
1881     const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
1882     const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
1883   const auto &DL = InnermostLoop->getHeader()->getDataLayout();
1884   auto &SE = *PSE.getSE();
1885   const auto &[APtr, AIsWrite] = A;
1886   const auto &[BPtr, BIsWrite] = B;
1887
1888   // Two reads are independent.
1889   if (!AIsWrite && !BIsWrite)
1890     return MemoryDepChecker::Dependence::NoDep;
1891
1892   Type *ATy = getLoadStoreType(AInst);
1893   Type *BTy = getLoadStoreType(BInst);
1894
1895   // We cannot check pointers in different address spaces.
1896   if (APtr->getType()->getPointerAddressSpace() !=
1897       BPtr->getType()->getPointerAddressSpace())
1898     return MemoryDepChecker::Dependence::Unknown;
1899
1900   std::optional<int64_t> StrideAPtr =
1901       getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
1902   std::optional<int64_t> StrideBPtr =
1903       getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
1904
1905   const SCEV *Src = PSE.getSCEV(APtr);
1906   const SCEV *Sink = PSE.getSCEV(BPtr);
1907
1908   // If the induction step is negative we have to invert source and sink of the
1909   // dependence when measuring the distance between them. We should not swap
1910   // AIsWrite with BIsWrite, as their uses expect them in program order.
1911   if (StrideAPtr && *StrideAPtr < 0) {
1912     std::swap(Src, Sink);
1913     std::swap(AInst, BInst);
1914     std::swap(ATy, BTy);
1915     std::swap(StrideAPtr, StrideBPtr);
1916   }
1917
1918   const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
1919
1920   LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1921                     << "\n");
1922   LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
1923                     << ": " << *Dist << "\n");
1924
1925   // Check if we can prove that Sink only accesses memory after Src's end or
1926   // vice versa. At the moment this is limited to cases where either source or
1927   // sink are loop invariant to avoid compile-time increases. This is not
1928   // required for correctness.
1929   if (SE.isLoopInvariant(Src, InnermostLoop) ||
1930       SE.isLoopInvariant(Sink, InnermostLoop)) {
1931     const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1932     const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1933         InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1934     const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1935         InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
1936     if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
1937         !isa<SCEVCouldNotCompute>(SrcEnd_) &&
1938         !isa<SCEVCouldNotCompute>(SinkStart_) &&
1939         !isa<SCEVCouldNotCompute>(SinkEnd_)) {
1940       if (!LoopGuards)
1941         LoopGuards.emplace(
1942             ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
1943       auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards);
1944       auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
1945       if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1946         return MemoryDepChecker::Dependence::NoDep;
1947
1948       auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
1949       auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
1950       if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1951         return MemoryDepChecker::Dependence::NoDep;
1952     }
1953   }
1954
1955   // Need accesses with constant strides and the same direction for further
1956   // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
1957   // similar code or pointer arithmetic that could wrap in the address space.
1958
1959   // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
1960   // not loop-invariant (stride will be 0 in that case), we cannot analyze the
1961   // dependence further and also cannot generate runtime checks.
1962   if (!StrideAPtr || !StrideBPtr) {
1963     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1964     return MemoryDepChecker::Dependence::IndirectUnsafe;
1965   }
1966
1967   int64_t StrideAPtrInt = *StrideAPtr;
1968   int64_t StrideBPtrInt = *StrideBPtr;
1969   LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << StrideAPtrInt
1970                     << " Sink induction step: " << StrideBPtrInt << "\n");
1971   // At least Src or Sink are loop invariant and the other is strided or
1972   // invariant. We can generate a runtime check to disambiguate the accesses.
1973   if (!StrideAPtrInt || !StrideBPtrInt)
1974     return MemoryDepChecker::Dependence::Unknown;
1975
1976   // Both Src and Sink have a constant stride, check if they are in the same
1977   // direction.
1978   if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
1979     LLVM_DEBUG(
1980         dbgs() << "Pointer access with strides in different directions\n");
1981     return MemoryDepChecker::Dependence::Unknown;
1982   }
1983
1984   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1985   bool HasSameSize =
1986       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
1987   if (!HasSameSize)
1988     TypeByteSize = 0;
1989
1990   StrideAPtrInt = std::abs(StrideAPtrInt);
1991   StrideBPtrInt = std::abs(StrideBPtrInt);
1992
1993   uint64_t MaxStride = std::max(StrideAPtrInt, StrideBPtrInt);
1994
1995   std::optional<uint64_t> CommonStride;
1996   if (StrideAPtrInt == StrideBPtrInt)
1997     CommonStride = StrideAPtrInt;
1998
1999   // TODO: Historically, we don't retry with runtime checks unless the
2000   // (unscaled) strides are the same. Fix this once the condition for runtime
2001   // checks in isDependent is fixed.
2002   bool ShouldRetryWithRuntimeCheck = CommonStride.has_value();
2003
2004   return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2005                                       ShouldRetryWithRuntimeCheck, TypeByteSize,
2006                                       AIsWrite, BIsWrite);
2007 }
2008
2009 MemoryDepChecker::Dependence::DepType
2010 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2011                               const MemAccessInfo &B, unsigned BIdx) {
2012   assert(AIdx < BIdx && "Must pass arguments in program order");
2013
2014   // Get the dependence distance, stride, type size and what access writes for
2015   // the dependence between A and B.
2016   auto Res =
2017       getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2018   if (std::holds_alternative<Dependence::DepType>(Res))
2019     return std::get<Dependence::DepType>(Res);
2020
2021   auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
2022          TypeByteSize, AIsWrite, BIsWrite] =
2023       std::get<DepDistanceStrideAndSizeInfo>(Res);
2024   bool HasSameSize = TypeByteSize > 0;
2025
2026   if (isa<SCEVCouldNotCompute>(Dist)) {
2027     // TODO: Relax requirement that there is a common unscaled stride to retry
2028     // with non-constant distance dependencies.
2029     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2030     LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
2031     return Dependence::Unknown;
2032   }
2033
2034   ScalarEvolution &SE = *PSE.getSE();
2035   auto &DL = InnermostLoop->getHeader()->getDataLayout();
2036
2037   // If the distance between the acecsses is larger than their maximum absolute
2038   // stride multiplied by the symbolic maximum backedge taken count (which is an
2039   // upper bound of the number of iterations), the accesses are independet, i.e.
2040   // they are far enough appart that accesses won't access the same location
2041   // across all loop ierations.
2042   if (HasSameSize && isSafeDependenceDistance(
2043                          DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()),
2044                          *Dist, MaxStride, TypeByteSize))
2045     return Dependence::NoDep;
2046
2047   const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
2048
2049   // Attempt to prove strided accesses independent.
2050   if (ConstDist) {
2051     uint64_t Distance = ConstDist->getAPInt().abs().getZExtValue();
2052
2053     // If the distance between accesses and their strides are known constants,
2054     // check whether the accesses interlace each other.
2055     if (Distance > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2056         areStridedAccessesIndependent(Distance, *CommonStride, TypeByteSize)) {
2057       LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2058       return Dependence::NoDep;
2059     }
2060   } else {
2061     if (!LoopGuards)
2062       LoopGuards.emplace(
2063           ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2064     Dist = SE.applyLoopGuards(Dist, *LoopGuards);
2065   }
2066
2067   // Negative distances are not plausible dependencies.
2068   if (SE.isKnownNonPositive(Dist)) {
2069     if (SE.isKnownNonNegative(Dist)) {
2070       if (HasSameSize) {
2071         // Write to the same location with the same size.
2072         return Dependence::Forward;
2073       }
2074       LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2075                            "different type sizes\n");
2076       return Dependence::Unknown;
2077     }
2078
2079     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2080     // Check if the first access writes to a location that is read in a later
2081     // iteration, where the distance between them is not a multiple of a vector
2082     // factor and relatively small.
2083     //
2084     // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2085     // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2086     // forward dependency will allow vectorization using any width.
2087
2088     if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2089       if (!ConstDist) {
2090         // TODO: FoundNonConstantDistanceDependence is used as a necessary
2091         // condition to consider retrying with runtime checks. Historically, we
2092         // did not set it when strides were different but there is no inherent
2093         // reason to.
2094         FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2095         return Dependence::Unknown;
2096       }
2097       if (!HasSameSize ||
2098           couldPreventStoreLoadForward(
2099               ConstDist->getAPInt().abs().getZExtValue(), TypeByteSize)) {
2100         LLVM_DEBUG(
2101             dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2102         return Dependence::ForwardButPreventsForwarding;
2103       }
2104     }
2105
2106     LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2107     return Dependence::Forward;
2108   }
2109
2110   int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
2111   // Below we only handle strictly positive distances.
2112   if (MinDistance <= 0) {
2113     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2114     return Dependence::Unknown;
2115   }
2116
2117   if (!ConstDist) {
2118     // Previously this case would be treated as Unknown, possibly setting
2119     // FoundNonConstantDistanceDependence to force re-trying with runtime
2120     // checks. Until the TODO below is addressed, set it here to preserve
2121     // original behavior w.r.t. re-trying with runtime checks.
2122     // TODO: FoundNonConstantDistanceDependence is used as a necessary
2123     // condition to consider retrying with runtime checks. Historically, we
2124     // did not set it when strides were different but there is no inherent
2125     // reason to.
2126     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2127   }
2128
2129   if (!HasSameSize) {
2130     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2131                          "different type sizes\n");
2132     return Dependence::Unknown;
2133   }
2134
2135   if (!CommonStride)
2136     return Dependence::Unknown;
2137
2138   // Bail out early if passed-in parameters make vectorization not feasible.
2139   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2140                            VectorizerParams::VectorizationFactor : 1);
2141   unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2142                            VectorizerParams::VectorizationInterleave : 1);
2143   // The minimum number of iterations for a vectorized/unrolled version.
2144   unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
2145
2146   // It's not vectorizable if the distance is smaller than the minimum distance
2147   // needed for a vectroized/unrolled version. Vectorizing one iteration in
2148   // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
2149   // TypeByteSize (No need to plus the last gap distance).
2150   //
2151   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2152   //      foo(int *A) {
2153   //        int *B = (int *)((char *)A + 14);
2154   //        for (i = 0 ; i < 1024 ; i += 2)
2155   //          B[i] = A[i] + 1;
2156   //      }
2157   //
2158   // Two accesses in memory (stride is 2):
2159   //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
2160   //                              | B[0] |      | B[2] |      | B[4] |
2161   //
2162   // MinDistance needs for vectorizing iterations except the last iteration:
2163   // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2164   // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2165   //
2166   // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2167   // 12, which is less than distance.
2168   //
2169   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2170   // the minimum distance needed is 28, which is greater than distance. It is
2171   // not safe to do vectorization.
2172
2173   // We know that Dist is positive, but it may not be constant. Use the signed
2174   // minimum for computations below, as this ensures we compute the closest
2175   // possible dependence distance.
2176   uint64_t MinDistanceNeeded =
2177       TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize;
2178   if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2179     if (!ConstDist) {
2180       // For non-constant distances, we checked the lower bound of the
2181       // dependence distance and the distance may be larger at runtime (and safe
2182       // for vectorization). Classify it as Unknown, so we re-try with runtime
2183       // checks.
2184       return Dependence::Unknown;
2185     }
2186     LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2187                       << MinDistance << '\n');
2188     return Dependence::Backward;
2189   }
2190
2191   // Unsafe if the minimum distance needed is greater than smallest dependence
2192   // distance distance.
2193   if (MinDistanceNeeded > MinDepDistBytes) {
2194     LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2195                       << MinDistanceNeeded << " size in bytes\n");
2196     return Dependence::Backward;
2197   }
2198
2199   // Positive distance bigger than max vectorization factor.
2200   // FIXME: Should use max factor instead of max distance in bytes, which could
2201   // not handle different types.
2202   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2203   //      void foo (int *A, char *B) {
2204   //        for (unsigned i = 0; i < 1024; i++) {
2205   //          A[i+2] = A[i] + 1;
2206   //          B[i+2] = B[i] + 1;
2207   //        }
2208   //      }
2209   //
2210   // This case is currently unsafe according to the max safe distance. If we
2211   // analyze the two accesses on array B, the max safe dependence distance
2212   // is 2. Then we analyze the accesses on array A, the minimum distance needed
2213   // is 8, which is less than 2 and forbidden vectorization, But actually
2214   // both A and B could be vectorized by 2 iterations.
2215   MinDepDistBytes =
2216       std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2217
2218   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2219   uint64_t MinDepDistBytesOld = MinDepDistBytes;
2220   if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2221       couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
2222     // Sanity check that we didn't update MinDepDistBytes when calling
2223     // couldPreventStoreLoadForward
2224     assert(MinDepDistBytes == MinDepDistBytesOld &&
2225            "An update to MinDepDistBytes requires an update to "
2226            "MaxSafeVectorWidthInBits");
2227     (void)MinDepDistBytesOld;
2228     return Dependence::BackwardVectorizableButPreventsForwarding;
2229   }
2230
2231   // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2232   // since there is a backwards dependency.
2233   uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride);
2234   LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2235                     << " with max VF = " << MaxVF << '\n');
2236
2237   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2238   if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2239     // For non-constant distances, we checked the lower bound of the dependence
2240     // distance and the distance may be larger at runtime (and safe for
2241     // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2242     return Dependence::Unknown;
2243   }
2244
2245   MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2246   return Dependence::BackwardVectorizable;
2247 }
2248
2249 bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
2250                                    const MemAccessInfoList &CheckDeps) {
2251
2252   MinDepDistBytes = -1;
2253   SmallPtrSet<MemAccessInfo, 8> Visited;
2254   for (MemAccessInfo CurAccess : CheckDeps) {
2255     if (Visited.count(CurAccess))
2256       continue;
2257
2258     // Get the relevant memory access set.
2259     EquivalenceClasses<MemAccessInfo>::iterator I =
2260       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
2261
2262     // Check accesses within this set.
2263     EquivalenceClasses<MemAccessInfo>::member_iterator AI =
2264         AccessSets.member_begin(I);
2265     EquivalenceClasses<MemAccessInfo>::member_iterator AE =
2266         AccessSets.member_end();
2267
2268     // Check every access pair.
2269     while (AI != AE) {
2270       Visited.insert(*AI);
2271       bool AIIsWrite = AI->getInt();
2272       // Check loads only against next equivalent class, but stores also against
2273       // other stores in the same equivalence class - to the same address.
2274       EquivalenceClasses<MemAccessInfo>::member_iterator OI =
2275           (AIIsWrite ? AI : std::next(AI));
2276       while (OI != AE) {
2277         // Check every accessing instruction pair in program order.
2278         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
2279              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
2280           // Scan all accesses of another equivalence class, but only the next
2281           // accesses of the same equivalent class.
2282           for (std::vector<unsigned>::iterator
2283                    I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2284                    I2E = (OI == AI ? I1E : Accesses[*OI].end());
2285                I2 != I2E; ++I2) {
2286             auto A = std::make_pair(&*AI, *I1);
2287             auto B = std::make_pair(&*OI, *I2);
2288
2289             assert(*I1 != *I2);
2290             if (*I1 > *I2)
2291               std::swap(A, B);
2292
2293             Dependence::DepType Type =
2294                 isDependent(*A.first, A.second, *B.first, B.second);
2295             mergeInStatus(Dependence::isSafeForVectorization(Type));
2296
2297             // Gather dependences unless we accumulated MaxDependences
2298             // dependences.  In that case return as soon as we find the first
2299             // unsafe dependence.  This puts a limit on this quadratic
2300             // algorithm.
2301             if (RecordDependences) {
2302               if (Type != Dependence::NoDep)
2303                 Dependences.emplace_back(A.second, B.second, Type);
2304
2305               if (Dependences.size() >= MaxDependences) {
2306                 RecordDependences = false;
2307                 Dependences.clear();
2308                 LLVM_DEBUG(dbgs()
2309                            << "Too many dependences, stopped recording\n");
2310               }
2311             }
2312             if (!RecordDependences && !isSafeForVectorization())
2313               return false;
2314           }
2315         ++OI;
2316       }
2317       ++AI;
2318     }
2319   }
2320
2321   LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2322   return isSafeForVectorization();
2323 }
2324
2325 SmallVector<Instruction *, 4>
2326 MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool IsWrite) const {
2327   MemAccessInfo Access(Ptr, IsWrite);
2328   auto &IndexVector = Accesses.find(Access)->second;
2329
2330   SmallVector<Instruction *, 4> Insts;
2331   transform(IndexVector,
2332                  std::back_inserter(Insts),
2333                  [&](unsigned Idx) { return this->InstMap[Idx]; });
2334   return Insts;
2335 }
2336
2337 const char *MemoryDepChecker::Dependence::DepName[] = {
2338     "NoDep",
2339     "Unknown",
2340     "IndirectUnsafe",
2341     "Forward",
2342     "ForwardButPreventsForwarding",
2343     "Backward",
2344     "BackwardVectorizable",
2345     "BackwardVectorizableButPreventsForwarding"};
2346
2347 void MemoryDepChecker::Dependence::print(
2348     raw_ostream &OS, unsigned Depth,
2349     const SmallVectorImpl<Instruction *> &Instrs) const {
2350   OS.indent(Depth) << DepName[Type] << ":\n";
2351   OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2352   OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2353 }
2354
2355 bool LoopAccessInfo::canAnalyzeLoop() {
2356   // We need to have a loop header.
2357   LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
2358                     << TheLoop->getHeader()->getParent()->getName() << "' from "
2359                     << TheLoop->getLocStr() << "\n");
2360
2361   // We can only analyze innermost loops.
2362   if (!TheLoop->isInnermost()) {
2363     LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2364     recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2365     return false;
2366   }
2367
2368   // We must have a single backedge.
2369   if (TheLoop->getNumBackEdges() != 1) {
2370     LLVM_DEBUG(
2371         dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2372     recordAnalysis("CFGNotUnderstood")
2373         << "loop control flow is not understood by analyzer";
2374     return false;
2375   }
2376
2377   // ScalarEvolution needs to be able to find the symbolic max backedge taken
2378   // count, which is an upper bound on the number of loop iterations. The loop
2379   // may execute fewer iterations, if it exits via an uncountable exit.
2380   const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
2381   if (isa<SCEVCouldNotCompute>(ExitCount)) {
2382     recordAnalysis("CantComputeNumberOfIterations")
2383         << "could not determine number of loop iterations";
2384     LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2385     return false;
2386   }
2387
2388   LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2389                     << TheLoop->getHeader()->getName() << "\n");
2390   return true;
2391 }
2392
2393 bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2394                                  const TargetLibraryInfo *TLI,
2395                                  DominatorTree *DT) {
2396   // Holds the Load and Store instructions.
2397   SmallVector<LoadInst *, 16> Loads;
2398   SmallVector<StoreInst *, 16> Stores;
2399   SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2400
2401   // Holds all the different accesses in the loop.
2402   unsigned NumReads = 0;
2403   unsigned NumReadWrites = 0;
2404
2405   bool HasComplexMemInst = false;
2406
2407   // A runtime check is only legal to insert if there are no convergent calls.
2408   HasConvergentOp = false;
2409
2410   PtrRtChecking->Pointers.clear();
2411   PtrRtChecking->Need = false;
2412
2413   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2414
2415   const bool EnableMemAccessVersioningOfLoop =
2416       EnableMemAccessVersioning &&
2417       !TheLoop->getHeader()->getParent()->hasOptSize();
2418
2419   // Traverse blocks in fixed RPOT order, regardless of their storage in the
2420   // loop info, as it may be arbitrary.
2421   LoopBlocksRPO RPOT(TheLoop);
2422   RPOT.perform(LI);
2423   for (BasicBlock *BB : RPOT) {
2424     // Scan the BB and collect legal loads and stores. Also detect any
2425     // convergent instructions.
2426     for (Instruction &I : *BB) {
2427       if (auto *Call = dyn_cast<CallBase>(&I)) {
2428         if (Call->isConvergent())
2429           HasConvergentOp = true;
2430       }
2431
2432       // With both a non-vectorizable memory instruction and a convergent
2433       // operation, found in this loop, no reason to continue the search.
2434       if (HasComplexMemInst && HasConvergentOp)
2435         return false;
2436
2437       // Avoid hitting recordAnalysis multiple times.
2438       if (HasComplexMemInst)
2439         continue;
2440
2441       // Record alias scopes defined inside the loop.
2442       if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2443         for (Metadata *Op : Decl->getScopeList()->operands())
2444           LoopAliasScopes.insert(cast<MDNode>(Op));
2445
2446       // Many math library functions read the rounding mode. We will only
2447       // vectorize a loop if it contains known function calls that don't set
2448       // the flag. Therefore, it is safe to ignore this read from memory.
2449       auto *Call = dyn_cast<CallInst>(&I);
2450       if (Call && getVectorIntrinsicIDForCall(Call, TLI))
2451         continue;
2452
2453       // If this is a load, save it. If this instruction can read from memory
2454       // but is not a load, we only allow it if it's a call to a function with a
2455       // vector mapping and no pointer arguments.
2456       if (I.mayReadFromMemory()) {
2457         auto hasPointerArgs = [](CallBase *CB) {
2458           return any_of(CB->args(), [](Value const *Arg) {
2459             return Arg->getType()->isPointerTy();
2460           });
2461         };
2462
2463         // If the function has an explicit vectorized counterpart, and does not
2464         // take output/input pointers, we can safely assume that it can be
2465         // vectorized.
2466         if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2467             !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
2468           continue;
2469
2470         auto *Ld = dyn_cast<LoadInst>(&I);
2471         if (!Ld) {
2472           recordAnalysis("CantVectorizeInstruction", Ld)
2473             << "instruction cannot be vectorized";
2474           HasComplexMemInst = true;
2475           continue;
2476         }
2477         if (!Ld->isSimple() && !IsAnnotatedParallel) {
2478           recordAnalysis("NonSimpleLoad", Ld)
2479               << "read with atomic ordering or volatile read";
2480           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2481           HasComplexMemInst = true;
2482           continue;
2483         }
2484         NumLoads++;
2485         Loads.push_back(Ld);
2486         DepChecker->addAccess(Ld);
2487         if (EnableMemAccessVersioningOfLoop)
2488           collectStridedAccess(Ld);
2489         continue;
2490       }
2491
2492       // Save 'store' instructions. Abort if other instructions write to memory.
2493       if (I.mayWriteToMemory()) {
2494         auto *St = dyn_cast<StoreInst>(&I);
2495         if (!St) {
2496           recordAnalysis("CantVectorizeInstruction", St)
2497               << "instruction cannot be vectorized";
2498           HasComplexMemInst = true;
2499           continue;
2500         }
2501         if (!St->isSimple() && !IsAnnotatedParallel) {
2502           recordAnalysis("NonSimpleStore", St)
2503               << "write with atomic ordering or volatile write";
2504           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2505           HasComplexMemInst = true;
2506           continue;
2507         }
2508         NumStores++;
2509         Stores.push_back(St);
2510         DepChecker->addAccess(St);
2511         if (EnableMemAccessVersioningOfLoop)
2512           collectStridedAccess(St);
2513       }
2514     } // Next instr.
2515   } // Next block.
2516
2517   if (HasComplexMemInst)
2518     return false;
2519
2520   // Now we have two lists that hold the loads and the stores.
2521   // Next, we find the pointers that they use.
2522
2523   // Check if we see any stores. If there are no stores, then we don't
2524   // care if the pointers are *restrict*.
2525   if (!Stores.size()) {
2526     LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2527     return true;
2528   }
2529
2530   MemoryDepChecker::DepCandidates DependentAccesses;
2531   AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
2532                           LoopAliasScopes);
2533
2534   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2535   // multiple times on the same object. If the ptr is accessed twice, once
2536   // for read and once for write, it will only appear once (on the write
2537   // list). This is okay, since we are going to check for conflicts between
2538   // writes and between reads and writes, but not between reads and reads.
2539   SmallSet<std::pair<Value *, Type *>, 16> Seen;
2540
2541   // Record uniform store addresses to identify if we have multiple stores
2542   // to the same address.
2543   SmallPtrSet<Value *, 16> UniformStores;
2544
2545   for (StoreInst *ST : Stores) {
2546     Value *Ptr = ST->getPointerOperand();
2547
2548     if (isInvariant(Ptr)) {
2549       // Record store instructions to loop invariant addresses
2550       StoresToInvariantAddresses.push_back(ST);
2551       HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2552           !UniformStores.insert(Ptr).second;
2553     }
2554
2555     // If we did *not* see this pointer before, insert it to  the read-write
2556     // list. At this phase it is only a 'write' list.
2557     Type *AccessTy = getLoadStoreType(ST);
2558     if (Seen.insert({Ptr, AccessTy}).second) {
2559       ++NumReadWrites;
2560
2561       MemoryLocation Loc = MemoryLocation::get(ST);
2562       // The TBAA metadata could have a control dependency on the predication
2563       // condition, so we cannot rely on it when determining whether or not we
2564       // need runtime pointer checks.
2565       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2566         Loc.AATags.TBAA = nullptr;
2567
2568       visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2569                     [&Accesses, AccessTy, Loc](Value *Ptr) {
2570                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2571                       Accesses.addStore(NewLoc, AccessTy);
2572                     });
2573     }
2574   }
2575
2576   if (IsAnnotatedParallel) {
2577     LLVM_DEBUG(
2578         dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2579                << "checks.\n");
2580     return true;
2581   }
2582
2583   for (LoadInst *LD : Loads) {
2584     Value *Ptr = LD->getPointerOperand();
2585     // If we did *not* see this pointer before, insert it to the
2586     // read list. If we *did* see it before, then it is already in
2587     // the read-write list. This allows us to vectorize expressions
2588     // such as A[i] += x;  Because the address of A[i] is a read-write
2589     // pointer. This only works if the index of A[i] is consecutive.
2590     // If the address of i is unknown (for example A[B[i]]) then we may
2591     // read a few words, modify, and write a few words, and some of the
2592     // words may be written to the same address.
2593     bool IsReadOnlyPtr = false;
2594     Type *AccessTy = getLoadStoreType(LD);
2595     if (Seen.insert({Ptr, AccessTy}).second ||
2596         !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) {
2597       ++NumReads;
2598       IsReadOnlyPtr = true;
2599     }
2600
2601     // See if there is an unsafe dependency between a load to a uniform address and
2602     // store to the same uniform address.
2603     if (UniformStores.count(Ptr)) {
2604       LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2605                            "load and uniform store to the same address!\n");
2606       HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2607     }
2608
2609     MemoryLocation Loc = MemoryLocation::get(LD);
2610     // The TBAA metadata could have a control dependency on the predication
2611     // condition, so we cannot rely on it when determining whether or not we
2612     // need runtime pointer checks.
2613     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2614       Loc.AATags.TBAA = nullptr;
2615
2616     visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2617                   [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2618                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2619                     Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2620                   });
2621   }
2622
2623   // If we write (or read-write) to a single destination and there are no
2624   // other reads in this loop then is it safe to vectorize.
2625   if (NumReadWrites == 1 && NumReads == 0) {
2626     LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2627     return true;
2628   }
2629
2630   // Build dependence sets and check whether we need a runtime pointer bounds
2631   // check.
2632   Accesses.buildDependenceSets();
2633
2634   // Find pointers with computable bounds. We are going to use this information
2635   // to place a runtime bound check.
2636   Value *UncomputablePtr = nullptr;
2637   bool CanDoRTIfNeeded =
2638       Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
2639                                SymbolicStrides, UncomputablePtr, false);
2640   if (!CanDoRTIfNeeded) {
2641     const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2642     recordAnalysis("CantIdentifyArrayBounds", I)
2643         << "cannot identify array bounds";
2644     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2645                       << "the array bounds.\n");
2646     return false;
2647   }
2648
2649   LLVM_DEBUG(
2650     dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2651
2652   bool DepsAreSafe = true;
2653   if (Accesses.isDependencyCheckNeeded()) {
2654     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2655     DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
2656                                           Accesses.getDependenciesToCheck());
2657
2658     if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
2659       LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2660
2661       // Clear the dependency checks. We assume they are not needed.
2662       Accesses.resetDepChecks(*DepChecker);
2663
2664       PtrRtChecking->reset();
2665       PtrRtChecking->Need = true;
2666
2667       auto *SE = PSE->getSE();
2668       UncomputablePtr = nullptr;
2669       CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2670           *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
2671
2672       // Check that we found the bounds for the pointer.
2673       if (!CanDoRTIfNeeded) {
2674         auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2675         recordAnalysis("CantCheckMemDepsAtRunTime", I)
2676             << "cannot check memory dependencies at runtime";
2677         LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2678         return false;
2679       }
2680       DepsAreSafe = true;
2681     }
2682   }
2683
2684   if (HasConvergentOp) {
2685     recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2686         << "cannot add control dependency to convergent operation";
2687     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2688                          "would be needed with a convergent operation\n");
2689     return false;
2690   }
2691
2692   if (DepsAreSafe) {
2693     LLVM_DEBUG(
2694         dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
2695                << (PtrRtChecking->Need ? "" : " don't")
2696                << " need runtime memory checks.\n");
2697     return true;
2698   }
2699
2700   emitUnsafeDependenceRemark();
2701   return false;
2702 }
2703
2704 void LoopAccessInfo::emitUnsafeDependenceRemark() {
2705   const auto *Deps = getDepChecker().getDependences();
2706   if (!Deps)
2707     return;
2708   const auto *Found =
2709       llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2710         return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
2711                MemoryDepChecker::VectorizationSafetyStatus::Safe;
2712       });
2713   if (Found == Deps->end())
2714     return;
2715   MemoryDepChecker::Dependence Dep = *Found;
2716
2717   LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2718
2719   // Emit remark for first unsafe dependence
2720   bool HasForcedDistribution = false;
2721   std::optional<const MDOperand *> Value =
2722       findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
2723   if (Value) {
2724     const MDOperand *Op = *Value;
2725     assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2726     HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
2727   }
2728
2729   const std::string Info =
2730       HasForcedDistribution
2731           ? "unsafe dependent memory operations in loop."
2732           : "unsafe dependent memory operations in loop. Use "
2733             "#pragma clang loop distribute(enable) to allow loop distribution "
2734             "to attempt to isolate the offending operations into a separate "
2735             "loop";
2736   OptimizationRemarkAnalysis &R =
2737       recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2738
2739   switch (Dep.Type) {
2740   case MemoryDepChecker::Dependence::NoDep:
2741   case MemoryDepChecker::Dependence::Forward:
2742   case MemoryDepChecker::Dependence::BackwardVectorizable:
2743     llvm_unreachable("Unexpected dependence");
2744   case MemoryDepChecker::Dependence::Backward:
2745     R << "\nBackward loop carried data dependence.";
2746     break;
2747   case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
2748     R << "\nForward loop carried data dependence that prevents "
2749          "store-to-load forwarding.";
2750     break;
2751   case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
2752     R << "\nBackward loop carried data dependence that prevents "
2753          "store-to-load forwarding.";
2754     break;
2755   case MemoryDepChecker::Dependence::IndirectUnsafe:
2756     R << "\nUnsafe indirect dependence.";
2757     break;
2758   case MemoryDepChecker::Dependence::Unknown:
2759     R << "\nUnknown data dependence.";
2760     break;
2761   }
2762
2763   if (Instruction *I = Dep.getSource(getDepChecker())) {
2764     DebugLoc SourceLoc = I->getDebugLoc();
2765     if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
2766       SourceLoc = DD->getDebugLoc();
2767     if (SourceLoc)
2768       R << " Memory location is the same as accessed at "
2769         << ore::NV("Location", SourceLoc);
2770   }
2771 }
2772
2773 bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
2774                                            DominatorTree *DT)  {
2775   assert(TheLoop->contains(BB) && "Unknown block used");
2776
2777   // Blocks that do not dominate the latch need predication.
2778   const BasicBlock *Latch = TheLoop->getLoopLatch();
2779   return !DT->dominates(BB, Latch);
2780 }
2781
2782 OptimizationRemarkAnalysis &
2783 LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
2784   assert(!Report && "Multiple reports generated");
2785
2786   const Value *CodeRegion = TheLoop->getHeader();
2787   DebugLoc DL = TheLoop->getStartLoc();
2788
2789   if (I) {
2790     CodeRegion = I->getParent();
2791     // If there is no debug location attached to the instruction, revert back to
2792     // using the loop's.
2793     if (I->getDebugLoc())
2794       DL = I->getDebugLoc();
2795   }
2796
2797   Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2798                                                    CodeRegion);
2799   return *Report;
2800 }
2801
2802 bool LoopAccessInfo::isInvariant(Value *V) const {
2803   auto *SE = PSE->getSE();
2804   // TODO: Is this really what we want? Even without FP SCEV, we may want some
2805   // trivially loop-invariant FP values to be considered invariant.
2806   if (!SE->isSCEVable(V->getType()))
2807     return false;
2808   const SCEV *S = SE->getSCEV(V);
2809   return SE->isLoopInvariant(S, TheLoop);
2810 }
2811
2812 /// Find the operand of the GEP that should be checked for consecutive
2813 /// stores. This ignores trailing indices that have no effect on the final
2814 /// pointer.
2815 static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
2816   const DataLayout &DL = Gep->getDataLayout();
2817   unsigned LastOperand = Gep->getNumOperands() - 1;
2818   TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
2819
2820   // Walk backwards and try to peel off zeros.
2821   while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
2822     // Find the type we're currently indexing into.
2823     gep_type_iterator GEPTI = gep_type_begin(Gep);
2824     std::advance(GEPTI, LastOperand - 2);
2825
2826     // If it's a type with the same allocation size as the result of the GEP we
2827     // can peel off the zero index.
2828     TypeSize ElemSize = GEPTI.isStruct()
2829                             ? DL.getTypeAllocSize(GEPTI.getIndexedType())
2830                             : GEPTI.getSequentialElementStride(DL);
2831     if (ElemSize != GEPAllocSize)
2832       break;
2833     --LastOperand;
2834   }
2835
2836   return LastOperand;
2837 }
2838
2839 /// If the argument is a GEP, then returns the operand identified by
2840 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
2841 /// operand, it returns that instead.
2842 static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
2843   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
2844   if (!GEP)
2845     return Ptr;
2846
2847   unsigned InductionOperand = getGEPInductionOperand(GEP);
2848
2849   // Check that all of the gep indices are uniform except for our induction
2850   // operand.
2851   for (unsigned I = 0, E = GEP->getNumOperands(); I != E; ++I)
2852     if (I != InductionOperand &&
2853         !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(I)), Lp))
2854       return Ptr;
2855   return GEP->getOperand(InductionOperand);
2856 }
2857
2858 /// Get the stride of a pointer access in a loop. Looks for symbolic
2859 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
2860 static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
2861   auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
2862   if (!PtrTy)
2863     return nullptr;
2864
2865   // Try to remove a gep instruction to make the pointer (actually index at this
2866   // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
2867   // pointer, otherwise, we are analyzing the index.
2868   Value *OrigPtr = Ptr;
2869
2870   Ptr = stripGetElementPtr(Ptr, SE, Lp);
2871   const SCEV *V = SE->getSCEV(Ptr);
2872
2873   if (Ptr != OrigPtr)
2874     // Strip off casts.
2875     while (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
2876       V = C->getOperand();
2877
2878   auto *S = dyn_cast<SCEVAddRecExpr>(V);
2879   if (!S)
2880     return nullptr;
2881
2882   // If the pointer is invariant then there is no stride and it makes no
2883   // sense to add it here.
2884   if (Lp != S->getLoop())
2885     return nullptr;
2886
2887   V = S->getStepRecurrence(*SE);
2888
2889   // Strip off the size of access multiplication if we are still analyzing the
2890   // pointer.
2891   if (OrigPtr == Ptr) {
2892     if (auto *M = dyn_cast<SCEVMulExpr>(V)) {
2893       auto *StepConst = dyn_cast<SCEVConstant>(M->getOperand(0));
2894       if (!StepConst)
2895         return nullptr;
2896
2897       auto StepVal = StepConst->getAPInt().trySExtValue();
2898       // Bail out on a non-unit pointer access size.
2899       if (!StepVal || StepVal != 1)
2900         return nullptr;
2901
2902       V = M->getOperand(1);
2903     }
2904   }
2905
2906   // Note that the restriction after this loop invariant check are only
2907   // profitability restrictions.
2908   if (!SE->isLoopInvariant(V, Lp))
2909     return nullptr;
2910
2911   // Look for the loop invariant symbolic value.
2912   if (isa<SCEVUnknown>(V))
2913     return V;
2914
2915   if (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
2916     if (isa<SCEVUnknown>(C->getOperand()))
2917       return V;
2918
2919   return nullptr;
2920 }
2921
2922 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2923   Value *Ptr = getLoadStorePointerOperand(MemAccess);
2924   if (!Ptr)
2925     return;
2926
2927   // Note: getStrideFromPointer is a *profitability* heuristic.  We
2928   // could broaden the scope of values returned here - to anything
2929   // which happens to be loop invariant and contributes to the
2930   // computation of an interesting IV - but we chose not to as we
2931   // don't have a cost model here, and broadening the scope exposes
2932   // far too many unprofitable cases.
2933   const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2934   if (!StrideExpr)
2935     return;
2936
2937   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2938                        "versioning:");
2939   LLVM_DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
2940
2941   if (!SpeculateUnitStride) {
2942     LLVM_DEBUG(dbgs() << "  Chose not to due to -laa-speculate-unit-stride\n");
2943     return;
2944   }
2945
2946   // Avoid adding the "Stride == 1" predicate when we know that
2947   // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2948   // or zero iteration loop, as Trip-Count <= Stride == 1.
2949   //
2950   // TODO: We are currently not making a very informed decision on when it is
2951   // beneficial to apply stride versioning. It might make more sense that the
2952   // users of this analysis (such as the vectorizer) will trigger it, based on
2953   // their specific cost considerations; For example, in cases where stride
2954   // versioning does  not help resolving memory accesses/dependences, the
2955   // vectorizer should evaluate the cost of the runtime test, and the benefit
2956   // of various possible stride specializations, considering the alternatives
2957   // of using gather/scatters (if available).
2958
2959   const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
2960
2961   // Match the types so we can compare the stride and the MaxBTC.
2962   // The Stride can be positive/negative, so we sign extend Stride;
2963   // The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
2964   const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
2965   uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
2966   uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
2967   const SCEV *CastedStride = StrideExpr;
2968   const SCEV *CastedBECount = MaxBTC;
2969   ScalarEvolution *SE = PSE->getSE();
2970   if (BETypeSizeBits >= StrideTypeSizeBits)
2971     CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
2972   else
2973     CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
2974   const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2975   // Since TripCount == BackEdgeTakenCount + 1, checking:
2976   // "Stride >= TripCount" is equivalent to checking:
2977   // Stride - MaxBTC> 0
2978   if (SE->isKnownPositive(StrideMinusBETaken)) {
2979     LLVM_DEBUG(
2980         dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2981                   "Stride==1 predicate will imply that the loop executes "
2982                   "at most once.\n");
2983     return;
2984   }
2985   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
2986
2987   // Strip back off the integer cast, and check that our result is a
2988   // SCEVUnknown as we expect.
2989   const SCEV *StrideBase = StrideExpr;
2990   if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
2991     StrideBase = C->getOperand();
2992   SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
2993 }
2994
2995 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2996                                const TargetTransformInfo *TTI,
2997                                const TargetLibraryInfo *TLI, AAResults *AA,
2998                                DominatorTree *DT, LoopInfo *LI)
2999     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3000       PtrRtChecking(nullptr), TheLoop(L) {
3001   unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3002   if (TTI) {
3003     TypeSize FixedWidth =
3004         TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
3005     if (FixedWidth.isNonZero()) {
3006       // Scale the vector width by 2 as rough estimate to also consider
3007       // interleaving.
3008       MaxTargetVectorWidthInBits = FixedWidth.getFixedValue() * 2;
3009     }
3010
3011     TypeSize ScalableWidth =
3012         TTI->getRegisterBitWidth(TargetTransformInfo::RGK_ScalableVector);
3013     if (ScalableWidth.isNonZero())
3014       MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3015   }
3016   DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3017                                                   MaxTargetVectorWidthInBits);
3018   PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3019   if (canAnalyzeLoop())
3020     CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3021 }
3022
3023 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
3024   if (CanVecMem) {
3025     OS.indent(Depth) << "Memory dependences are safe";
3026     const MemoryDepChecker &DC = getDepChecker();
3027     if (!DC.isSafeForAnyVectorWidth())
3028       OS << " with a maximum safe vector width of "
3029          << DC.getMaxSafeVectorWidthInBits() << " bits";
3030     if (PtrRtChecking->Need)
3031       OS << " with run-time checks";
3032     OS << "\n";
3033   }
3034
3035   if (HasConvergentOp)
3036     OS.indent(Depth) << "Has convergent operation in loop\n";
3037
3038   if (Report)
3039     OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3040
3041   if (auto *Dependences = DepChecker->getDependences()) {
3042     OS.indent(Depth) << "Dependences:\n";
3043     for (const auto &Dep : *Dependences) {
3044       Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3045       OS << "\n";
3046     }
3047   } else
3048     OS.indent(Depth) << "Too many dependences, not recorded\n";
3049
3050   // List the pair of accesses need run-time checks to prove independence.
3051   PtrRtChecking->print(OS, Depth);
3052   OS << "\n";
3053
3054   OS.indent(Depth)
3055       << "Non vectorizable stores to invariant address were "
3056       << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3057                   HasLoadStoreDependenceInvolvingLoopInvariantAddress
3058               ? ""
3059               : "not ")
3060       << "found in loop.\n";
3061
3062   OS.indent(Depth) << "SCEV assumptions:\n";
3063   PSE->getPredicate().print(OS, Depth);
3064
3065   OS << "\n";
3066
3067   OS.indent(Depth) << "Expressions re-written:\n";
3068   PSE->print(OS, Depth);
3069 }
3070
3071 const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3072   const auto &[It, Inserted] = LoopAccessInfoMap.insert({&L, nullptr});
3073
3074   if (Inserted)
3075     It->second =
3076         std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
3077
3078   return *It->second;
3079 }
3080 void LoopAccessInfoManager::clear() {
3081   SmallVector<Loop *> ToRemove;
3082   // Collect LoopAccessInfo entries that may keep references to IR outside the
3083   // analyzed loop or SCEVs that may have been modified or invalidated. At the
3084   // moment, that is loops requiring memory or SCEV runtime checks, as those cache
3085   // SCEVs, e.g. for pointer expressions.
3086   for (const auto &[L, LAI] : LoopAccessInfoMap) {
3087     if (LAI->getRuntimePointerChecking()->getChecks().empty() &&
3088         LAI->getPSE().getPredicate().isAlwaysTrue())
3089       continue;
3090     ToRemove.push_back(L);
3091   }
3092
3093   for (Loop *L : ToRemove)
3094     LoopAccessInfoMap.erase(L);
3095 }
3096
3097 bool LoopAccessInfoManager::invalidate(
3098     Function &F, const PreservedAnalyses &PA,
3099     FunctionAnalysisManager::Invalidator &Inv) {
3100   // Check whether our analysis is preserved.
3101   auto PAC = PA.getChecker<LoopAccessAnalysis>();
3102   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3103     // If not, give up now.
3104     return true;
3105
3106   // Check whether the analyses we depend on became invalid for any reason.
3107   // Skip checking TargetLibraryAnalysis as it is immutable and can't become
3108   // invalid.
3109   return Inv.invalidate<AAManager>(F, PA) ||
3110          Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3111          Inv.invalidate<LoopAnalysis>(F, PA) ||
3112          Inv.invalidate<DominatorTreeAnalysis>(F, PA);
3113 }
3114
3115 LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
3116                                               FunctionAnalysisManager &FAM) {
3117   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
3118   auto &AA = FAM.getResult<AAManager>(F);
3119   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
3120   auto &LI = FAM.getResult<LoopAnalysis>(F);
3121   auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
3122   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3123   return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
3124 }
3125
3126 AnalysisKey LoopAccessAnalysis::Key;