polly/lib/Analysis/ScopBuilder.cpp

   1 //===- ScopBuilder.cpp ----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Create a polyhedral description for a static control flow region.
  10 //
  11 // The pass creates a polyhedral description of the Scops detected by the SCoP
  12 // detection derived from their LLVM-IR code.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "polly/ScopBuilder.h"
  17 #include "polly/Options.h"
  18 #include "polly/ScopDetection.h"
  19 #include "polly/ScopInfo.h"
  20 #include "polly/Support/GICHelper.h"
  21 #include "polly/Support/ISLTools.h"
  22 #include "polly/Support/SCEVValidator.h"
  23 #include "polly/Support/ScopHelper.h"
  24 #include "polly/Support/VirtualInstruction.h"
  25 #include "llvm/ADT/ArrayRef.h"
  26 #include "llvm/ADT/EquivalenceClasses.h"
  27 #include "llvm/ADT/PostOrderIterator.h"
  28 #include "llvm/ADT/Sequence.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/Statistic.h"
  31 #include "llvm/Analysis/AliasAnalysis.h"
  32 #include "llvm/Analysis/AssumptionCache.h"
  33 #include "llvm/Analysis/Delinearization.h"
  34 #include "llvm/Analysis/Loads.h"
  35 #include "llvm/Analysis/LoopInfo.h"
  36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  37 #include "llvm/Analysis/RegionInfo.h"
  38 #include "llvm/Analysis/RegionIterator.h"
  39 #include "llvm/Analysis/ScalarEvolution.h"
  40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  41 #include "llvm/IR/BasicBlock.h"
  42 #include "llvm/IR/DataLayout.h"
  43 #include "llvm/IR/DebugLoc.h"
  44 #include "llvm/IR/DerivedTypes.h"
  45 #include "llvm/IR/Dominators.h"
  46 #include "llvm/IR/Function.h"
  47 #include "llvm/IR/InstrTypes.h"
  48 #include "llvm/IR/Instruction.h"
  49 #include "llvm/IR/Instructions.h"
  50 #include "llvm/IR/Type.h"
  51 #include "llvm/IR/Use.h"
  52 #include "llvm/IR/Value.h"
  53 #include "llvm/Support/CommandLine.h"
  54 #include "llvm/Support/Compiler.h"
  55 #include "llvm/Support/Debug.h"
  56 #include "llvm/Support/ErrorHandling.h"
  57 #include "llvm/Support/raw_ostream.h"
  58 #include <cassert>
  59
  60 using namespace llvm;
  61 using namespace polly;
  62
  63 #include "polly/Support/PollyDebug.h"
  64 #define DEBUG_TYPE "polly-scops"
  65
  66 STATISTIC(ScopFound, "Number of valid Scops");
  67 STATISTIC(RichScopFound, "Number of Scops containing a loop");
  68 STATISTIC(InfeasibleScops,
  69           "Number of SCoPs with statically infeasible context.");
  70
  71 bool polly::ModelReadOnlyScalars;
  72
  73 // The maximal number of dimensions we allow during invariant load construction.
  74 // More complex access ranges will result in very high compile time and are also
  75 // unlikely to result in good code. This value is very high and should only
  76 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
  77 static unsigned const MaxDimensionsInAccessRange = 9;
  78
  79 static cl::opt<bool, true> XModelReadOnlyScalars(
  80     "polly-analyze-read-only-scalars",
  81     cl::desc("Model read-only scalar values in the scop description"),
  82     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
  83     cl::cat(PollyCategory));
  84
  85 static cl::opt<int>
  86     OptComputeOut("polly-analysis-computeout",
  87                   cl::desc("Bound the scop analysis by a maximal amount of "
  88                            "computational steps (0 means no bound)"),
  89                   cl::Hidden, cl::init(800000), cl::cat(PollyCategory));
  90
  91 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
  92     "polly-allow-dereference-of-all-function-parameters",
  93     cl::desc(
  94         "Treat all parameters to functions that are pointers as dereferencible."
  95         " This is useful for invariant load hoisting, since we can generate"
  96         " less runtime checks. This is only valid if all pointers to functions"
  97         " are always initialized, so that Polly can choose to hoist"
  98         " their loads. "),
  99     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 100
 101 static cl::opt<bool>
 102     PollyIgnoreInbounds("polly-ignore-inbounds",
 103                         cl::desc("Do not take inbounds assumptions at all"),
 104                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 105
 106 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
 107     "polly-rtc-max-arrays-per-group",
 108     cl::desc("The maximal number of arrays to compare in each alias group."),
 109     cl::Hidden, cl::init(20), cl::cat(PollyCategory));
 110
 111 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
 112     "polly-rtc-max-array-disjuncts",
 113     cl::desc("The maximal number of disjunts allowed in memory accesses to "
 114              "to build RTCs."),
 115     cl::Hidden, cl::init(8), cl::cat(PollyCategory));
 116
 117 static cl::opt<unsigned> RunTimeChecksMaxParameters(
 118     "polly-rtc-max-parameters",
 119     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
 120     cl::init(8), cl::cat(PollyCategory));
 121
 122 static cl::opt<bool> UnprofitableScalarAccs(
 123     "polly-unprofitable-scalar-accs",
 124     cl::desc("Count statements with scalar accesses as not optimizable"),
 125     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 126
 127 static cl::opt<std::string> UserContextStr(
 128     "polly-context", cl::value_desc("isl parameter set"),
 129     cl::desc("Provide additional constraints on the context parameters"),
 130     cl::init(""), cl::cat(PollyCategory));
 131
 132 static cl::opt<bool> DetectReductions("polly-detect-reductions",
 133                                       cl::desc("Detect and exploit reductions"),
 134                                       cl::Hidden, cl::init(true),
 135                                       cl::cat(PollyCategory));
 136
 137 // Multiplicative reductions can be disabled separately as these kind of
 138 // operations can overflow easily. Additive reductions and bit operations
 139 // are in contrast pretty stable.
 140 static cl::opt<bool> DisableMultiplicativeReductions(
 141     "polly-disable-multiplicative-reductions",
 142     cl::desc("Disable multiplicative reductions"), cl::Hidden,
 143     cl::cat(PollyCategory));
 144
 145 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
 146
 147 static cl::opt<GranularityChoice> StmtGranularity(
 148     "polly-stmt-granularity",
 149     cl::desc(
 150         "Algorithm to use for splitting basic blocks into multiple statements"),
 151     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
 152                           "One statement per basic block"),
 153                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
 154                           "Scalar independence heuristic"),
 155                clEnumValN(GranularityChoice::Stores, "store",
 156                           "Store-level granularity")),
 157     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
 158
 159 /// Helper to treat non-affine regions and basic blocks the same.
 160 ///
 161 ///{
 162
 163 /// Return the block that is the representing block for @p RN.
 164 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
 165   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
 166                            : RN->getNodeAs<BasicBlock>();
 167 }
 168
 169 /// Return the @p idx'th block that is executed after @p RN.
 170 static inline BasicBlock *
 171 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
 172   if (RN->isSubRegion()) {
 173     assert(idx == 0);
 174     return RN->getNodeAs<Region>()->getExit();
 175   }
 176   return TI->getSuccessor(idx);
 177 }
 178
 179 static bool containsErrorBlock(RegionNode *RN, const Region &R,
 180                                ScopDetection *SD) {
 181   if (!RN->isSubRegion())
 182     return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
 183   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
 184     if (SD->isErrorBlock(*BB, R))
 185       return true;
 186   return false;
 187 }
 188
 189 ///}
 190
 191 /// Create a map to map from a given iteration to a subsequent iteration.
 192 ///
 193 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
 194 /// is incremented by one and all other dimensions are equal, e.g.,
 195 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
 196 ///
 197 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
 198 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
 199   isl::space MapSpace = SetSpace.map_from_set();
 200   isl::map NextIterationMap = isl::map::universe(MapSpace);
 201   for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
 202     if (u != Dim)
 203       NextIterationMap =
 204           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
 205   isl::constraint C =
 206       isl::constraint::alloc_equality(isl::local_space(MapSpace));
 207   C = C.set_constant_si(1);
 208   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
 209   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
 210   NextIterationMap = NextIterationMap.add_constraint(C);
 211   return NextIterationMap;
 212 }
 213
 214 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
 215 static isl::set collectBoundedParts(isl::set S) {
 216   isl::set BoundedParts = isl::set::empty(S.get_space());
 217   for (isl::basic_set BSet : S.get_basic_set_list())
 218     if (BSet.is_bounded())
 219       BoundedParts = BoundedParts.unite(isl::set(BSet));
 220   return BoundedParts;
 221 }
 222
 223 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
 224 ///
 225 /// @returns A separation of @p S into first an unbounded then a bounded subset,
 226 ///          both with regards to the dimension @p Dim.
 227 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
 228                                                        unsigned Dim) {
 229   for (unsigned u : rangeIslSize(0, S.tuple_dim()))
 230     S = S.lower_bound_si(isl::dim::set, u, 0);
 231
 232   unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
 233   isl::set OnlyDimS = S;
 234
 235   // Remove dimensions that are greater than Dim as they are not interesting.
 236   assert(NumDimsS >= Dim + 1);
 237   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
 238
 239   // Create artificial parametric upper bounds for dimensions smaller than Dim
 240   // as we are not interested in them.
 241   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
 242
 243   for (unsigned u = 0; u < Dim; u++) {
 244     isl::constraint C = isl::constraint::alloc_inequality(
 245         isl::local_space(OnlyDimS.get_space()));
 246     C = C.set_coefficient_si(isl::dim::param, u, 1);
 247     C = C.set_coefficient_si(isl::dim::set, u, -1);
 248     OnlyDimS = OnlyDimS.add_constraint(C);
 249   }
 250
 251   // Collect all bounded parts of OnlyDimS.
 252   isl::set BoundedParts = collectBoundedParts(OnlyDimS);
 253
 254   // Create the dimensions greater than Dim again.
 255   BoundedParts =
 256       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
 257
 258   // Remove the artificial upper bound parameters again.
 259   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
 260
 261   isl::set UnboundedParts = S.subtract(BoundedParts);
 262   return std::make_pair(UnboundedParts, BoundedParts);
 263 }
 264
 265 /// Create the conditions under which @p L @p Pred @p R is true.
 266 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
 267                                   isl::pw_aff R) {
 268   switch (Pred) {
 269   case ICmpInst::ICMP_EQ:
 270     return L.eq_set(R);
 271   case ICmpInst::ICMP_NE:
 272     return L.ne_set(R);
 273   case ICmpInst::ICMP_SLT:
 274     return L.lt_set(R);
 275   case ICmpInst::ICMP_SLE:
 276     return L.le_set(R);
 277   case ICmpInst::ICMP_SGT:
 278     return L.gt_set(R);
 279   case ICmpInst::ICMP_SGE:
 280     return L.ge_set(R);
 281   case ICmpInst::ICMP_ULT:
 282     return L.lt_set(R);
 283   case ICmpInst::ICMP_UGT:
 284     return L.gt_set(R);
 285   case ICmpInst::ICMP_ULE:
 286     return L.le_set(R);
 287   case ICmpInst::ICMP_UGE:
 288     return L.ge_set(R);
 289   default:
 290     llvm_unreachable("Non integer predicate not supported");
 291   }
 292 }
 293
 294 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
 295                                              Loop *NewL) {
 296   // If the loops are the same there is nothing to do.
 297   if (NewL == OldL)
 298     return Dom;
 299
 300   int OldDepth = scop->getRelativeLoopDepth(OldL);
 301   int NewDepth = scop->getRelativeLoopDepth(NewL);
 302   // If both loops are non-affine loops there is nothing to do.
 303   if (OldDepth == -1 && NewDepth == -1)
 304     return Dom;
 305
 306   // Distinguish three cases:
 307   //   1) The depth is the same but the loops are not.
 308   //      => One loop was left one was entered.
 309   //   2) The depth increased from OldL to NewL.
 310   //      => One loop was entered, none was left.
 311   //   3) The depth decreased from OldL to NewL.
 312   //      => Loops were left were difference of the depths defines how many.
 313   if (OldDepth == NewDepth) {
 314     assert(OldL->getParentLoop() == NewL->getParentLoop());
 315     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
 316     Dom = Dom.add_dims(isl::dim::set, 1);
 317   } else if (OldDepth < NewDepth) {
 318     assert(OldDepth + 1 == NewDepth);
 319     auto &R = scop->getRegion();
 320     (void)R;
 321     assert(NewL->getParentLoop() == OldL ||
 322            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
 323     Dom = Dom.add_dims(isl::dim::set, 1);
 324   } else {
 325     assert(OldDepth > NewDepth);
 326     unsigned Diff = OldDepth - NewDepth;
 327     unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
 328     assert(NumDim >= Diff);
 329     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
 330   }
 331
 332   return Dom;
 333 }
 334
 335 /// Compute the isl representation for the SCEV @p E in this BB.
 336 ///
 337 /// @param BB               The BB for which isl representation is to be
 338 /// computed.
 339 /// @param InvalidDomainMap A map of BB to their invalid domains.
 340 /// @param E                The SCEV that should be translated.
 341 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
 342 ///
 343 /// Note that this function will also adjust the invalid context accordingly.
 344
 345 __isl_give isl_pw_aff *
 346 ScopBuilder::getPwAff(BasicBlock *BB,
 347                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 348                       const SCEV *E, bool NonNegative) {
 349   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
 350   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
 351   return PWAC.first.release();
 352 }
 353
 354 /// Build condition sets for unsigned ICmpInst(s).
 355 /// Special handling is required for unsigned operands to ensure that if
 356 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
 357 /// it should wrap around.
 358 ///
 359 /// @param IsStrictUpperBound holds information on the predicate relation
 360 /// between TestVal and UpperBound, i.e,
 361 /// TestVal < UpperBound  OR  TestVal <= UpperBound
 362 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
 363     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
 364     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
 365     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 366     bool IsStrictUpperBound) {
 367   // Do not take NonNeg assumption on TestVal
 368   // as it might have MSB (Sign bit) set.
 369   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
 370   // Take NonNeg assumption on UpperBound.
 371   isl_pw_aff *UpperBound =
 372       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
 373
 374   // 0 <= TestVal
 375   isl_set *First =
 376       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
 377                             isl_pw_aff_get_domain_space(TestVal))),
 378                         isl_pw_aff_copy(TestVal));
 379
 380   isl_set *Second;
 381   if (IsStrictUpperBound)
 382     // TestVal < UpperBound
 383     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
 384   else
 385     // TestVal <= UpperBound
 386     Second = isl_pw_aff_le_set(TestVal, UpperBound);
 387
 388   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
 389   return ConsequenceCondSet;
 390 }
 391
 392 bool ScopBuilder::buildConditionSets(
 393     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
 394     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 395     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 396   Value *Condition = getConditionFromTerminator(SI);
 397   assert(Condition && "No condition for switch");
 398
 399   isl_pw_aff *LHS, *RHS;
 400   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
 401
 402   unsigned NumSuccessors = SI->getNumSuccessors();
 403   ConditionSets.resize(NumSuccessors);
 404   for (auto &Case : SI->cases()) {
 405     unsigned Idx = Case.getSuccessorIndex();
 406     ConstantInt *CaseValue = Case.getCaseValue();
 407
 408     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
 409     isl_set *CaseConditionSet =
 410         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
 411                           isl::manage(RHS))
 412             .release();
 413     ConditionSets[Idx] = isl_set_coalesce(
 414         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
 415   }
 416
 417   assert(ConditionSets[0] == nullptr && "Default condition set was set");
 418   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
 419   for (unsigned u = 2; u < NumSuccessors; u++)
 420     ConditionSetUnion =
 421         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
 422   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
 423
 424   isl_pw_aff_free(LHS);
 425
 426   return true;
 427 }
 428
 429 bool ScopBuilder::buildConditionSets(
 430     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
 431     __isl_keep isl_set *Domain,
 432     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 433     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 434   isl_set *ConsequenceCondSet = nullptr;
 435
 436   if (auto Load = dyn_cast<LoadInst>(Condition)) {
 437     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
 438     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
 439     bool NonNeg = false;
 440     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
 441     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
 442     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
 443                                            isl::manage(RHS))
 444                              .release();
 445   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
 446     auto *Unique = dyn_cast<ConstantInt>(
 447         getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
 448     assert(Unique &&
 449            "A PHINode condition should only be accepted by ScopDetection if "
 450            "getUniqueNonErrorValue returns non-NULL");
 451
 452     if (Unique->isZero())
 453       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
 454     else
 455       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
 456   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
 457     if (CCond->isZero())
 458       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
 459     else
 460       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
 461   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
 462     auto Opcode = BinOp->getOpcode();
 463     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
 464
 465     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
 466                                     InvalidDomainMap, ConditionSets) &&
 467                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
 468                                     InvalidDomainMap, ConditionSets);
 469     if (!Valid) {
 470       while (!ConditionSets.empty())
 471         isl_set_free(ConditionSets.pop_back_val());
 472       return false;
 473     }
 474
 475     isl_set_free(ConditionSets.pop_back_val());
 476     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
 477     isl_set_free(ConditionSets.pop_back_val());
 478     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
 479
 480     if (Opcode == Instruction::And)
 481       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
 482     else
 483       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
 484   } else {
 485     auto *ICond = dyn_cast<ICmpInst>(Condition);
 486     assert(ICond &&
 487            "Condition of exiting branch was neither constant nor ICmp!");
 488
 489     Region &R = scop->getRegion();
 490
 491     isl_pw_aff *LHS, *RHS;
 492     // For unsigned comparisons we assumed the signed bit of neither operand
 493     // to be set. The comparison is equal to a signed comparison under this
 494     // assumption.
 495     bool NonNeg = ICond->isUnsigned();
 496     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
 497                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
 498
 499     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
 500     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
 501
 502     switch (ICond->getPredicate()) {
 503     case ICmpInst::ICMP_ULT:
 504       ConsequenceCondSet =
 505           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
 506                                      RightOperand, InvalidDomainMap, true);
 507       break;
 508     case ICmpInst::ICMP_ULE:
 509       ConsequenceCondSet =
 510           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
 511                                      RightOperand, InvalidDomainMap, false);
 512       break;
 513     case ICmpInst::ICMP_UGT:
 514       ConsequenceCondSet =
 515           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
 516                                      LeftOperand, InvalidDomainMap, true);
 517       break;
 518     case ICmpInst::ICMP_UGE:
 519       ConsequenceCondSet =
 520           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
 521                                      LeftOperand, InvalidDomainMap, false);
 522       break;
 523     default:
 524       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
 525       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
 526       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
 527                                              isl::manage(LHS), isl::manage(RHS))
 528                                .release();
 529       break;
 530     }
 531   }
 532
 533   // If no terminator was given we are only looking for parameter constraints
 534   // under which @p Condition is true/false.
 535   if (!TI)
 536     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
 537   assert(ConsequenceCondSet);
 538   ConsequenceCondSet = isl_set_coalesce(
 539       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
 540
 541   isl_set *AlternativeCondSet = nullptr;
 542   bool TooComplex =
 543       isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
 544
 545   if (!TooComplex) {
 546     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
 547                                           isl_set_copy(ConsequenceCondSet));
 548     TooComplex =
 549         isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
 550   }
 551
 552   if (TooComplex) {
 553     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
 554                      TI ? TI->getParent() : nullptr /* BasicBlock */);
 555     isl_set_free(AlternativeCondSet);
 556     isl_set_free(ConsequenceCondSet);
 557     return false;
 558   }
 559
 560   ConditionSets.push_back(ConsequenceCondSet);
 561   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
 562
 563   return true;
 564 }
 565
 566 bool ScopBuilder::buildConditionSets(
 567     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
 568     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 569     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 570   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
 571     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
 572                               ConditionSets);
 573
 574   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
 575
 576   if (TI->getNumSuccessors() == 1) {
 577     ConditionSets.push_back(isl_set_copy(Domain));
 578     return true;
 579   }
 580
 581   Value *Condition = getConditionFromTerminator(TI);
 582   assert(Condition && "No condition for Terminator");
 583
 584   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
 585                             ConditionSets);
 586 }
 587
 588 bool ScopBuilder::propagateDomainConstraints(
 589     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 590   // Iterate over the region R and propagate the domain constrains from the
 591   // predecessors to the current node. In contrast to the
 592   // buildDomainsWithBranchConstraints function, this one will pull the domain
 593   // information from the predecessors instead of pushing it to the successors.
 594   // Additionally, we assume the domains to be already present in the domain
 595   // map here. However, we iterate again in reverse post order so we know all
 596   // predecessors have been visited before a block or non-affine subregion is
 597   // visited.
 598
 599   ReversePostOrderTraversal<Region *> RTraversal(R);
 600   for (auto *RN : RTraversal) {
 601     // Recurse for affine subregions but go on for basic blocks and non-affine
 602     // subregions.
 603     if (RN->isSubRegion()) {
 604       Region *SubRegion = RN->getNodeAs<Region>();
 605       if (!scop->isNonAffineSubRegion(SubRegion)) {
 606         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
 607           return false;
 608         continue;
 609       }
 610     }
 611
 612     BasicBlock *BB = getRegionNodeBasicBlock(RN);
 613     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
 614     assert(!Domain.is_null());
 615
 616     // Under the union of all predecessor conditions we can reach this block.
 617     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
 618     Domain = Domain.intersect(PredDom).coalesce();
 619     Domain = Domain.align_params(scop->getParamSpace());
 620
 621     Loop *BBLoop = getRegionNodeLoop(RN, LI);
 622     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
 623       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
 624         return false;
 625   }
 626
 627   return true;
 628 }
 629
 630 void ScopBuilder::propagateDomainConstraintsToRegionExit(
 631     BasicBlock *BB, Loop *BBLoop,
 632     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
 633     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 634   // Check if the block @p BB is the entry of a region. If so we propagate it's
 635   // domain to the exit block of the region. Otherwise we are done.
 636   auto *RI = scop->getRegion().getRegionInfo();
 637   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
 638   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
 639   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
 640     return;
 641
 642   // Do not propagate the domain if there is a loop backedge inside the region
 643   // that would prevent the exit block from being executed.
 644   auto *L = BBLoop;
 645   while (L && scop->contains(L)) {
 646     SmallVector<BasicBlock *, 4> LatchBBs;
 647     BBLoop->getLoopLatches(LatchBBs);
 648     for (auto *LatchBB : LatchBBs)
 649       if (BB != LatchBB && BBReg->contains(LatchBB))
 650         return;
 651     L = L->getParentLoop();
 652   }
 653
 654   isl::set Domain = scop->getOrInitEmptyDomain(BB);
 655   assert(!Domain.is_null() && "Cannot propagate a nullptr");
 656
 657   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
 658
 659   // Since the dimensions of @p BB and @p ExitBB might be different we have to
 660   // adjust the domain before we can propagate it.
 661   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
 662   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
 663
 664   // If the exit domain is not yet created we set it otherwise we "add" the
 665   // current domain.
 666   ExitDomain =
 667       !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
 668
 669   // Initialize the invalid domain.
 670   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
 671
 672   FinishedExitBlocks.insert(ExitBB);
 673 }
 674
 675 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
 676                                                       isl::set Domain) {
 677   // If @p BB is the ScopEntry we are done
 678   if (scop->getRegion().getEntry() == BB)
 679     return isl::set::universe(Domain.get_space());
 680
 681   // The region info of this function.
 682   auto &RI = *scop->getRegion().getRegionInfo();
 683
 684   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
 685
 686   // A domain to collect all predecessor domains, thus all conditions under
 687   // which the block is executed. To this end we start with the empty domain.
 688   isl::set PredDom = isl::set::empty(Domain.get_space());
 689
 690   // Set of regions of which the entry block domain has been propagated to BB.
 691   // all predecessors inside any of the regions can be skipped.
 692   SmallSet<Region *, 8> PropagatedRegions;
 693
 694   for (auto *PredBB : predecessors(BB)) {
 695     // Skip backedges.
 696     if (DT.dominates(BB, PredBB))
 697       continue;
 698
 699     // If the predecessor is in a region we used for propagation we can skip it.
 700     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
 701     if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
 702       continue;
 703     }
 704
 705     // Check if there is a valid region we can use for propagation, thus look
 706     // for a region that contains the predecessor and has @p BB as exit block.
 707     // FIXME: This was an side-effect-free (and possibly infinite) loop when
 708     //        committed and seems not to be needed.
 709     auto *PredR = RI.getRegionFor(PredBB);
 710     while (PredR->getExit() != BB && !PredR->contains(BB))
 711       PredR = PredR->getParent();
 712
 713     // If a valid region for propagation was found use the entry of that region
 714     // for propagation, otherwise the PredBB directly.
 715     if (PredR->getExit() == BB) {
 716       PredBB = PredR->getEntry();
 717       PropagatedRegions.insert(PredR);
 718     }
 719
 720     isl::set PredBBDom = scop->getDomainConditions(PredBB);
 721     Loop *PredBBLoop =
 722         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
 723     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
 724     PredDom = PredDom.unite(PredBBDom);
 725   }
 726
 727   return PredDom;
 728 }
 729
 730 bool ScopBuilder::addLoopBoundsToHeaderDomain(
 731     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 732   int LoopDepth = scop->getRelativeLoopDepth(L);
 733   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
 734
 735   BasicBlock *HeaderBB = L->getHeader();
 736   assert(scop->isDomainDefined(HeaderBB));
 737   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
 738
 739   isl::map NextIterationMap =
 740       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
 741
 742   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
 743
 744   SmallVector<BasicBlock *, 4> LatchBlocks;
 745   L->getLoopLatches(LatchBlocks);
 746
 747   for (BasicBlock *LatchBB : LatchBlocks) {
 748     // If the latch is only reachable via error statements we skip it.
 749     if (!scop->isDomainDefined(LatchBB))
 750       continue;
 751
 752     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
 753
 754     isl::set BackedgeCondition;
 755
 756     Instruction *TI = LatchBB->getTerminator();
 757     BranchInst *BI = dyn_cast<BranchInst>(TI);
 758     assert(BI && "Only branch instructions allowed in loop latches");
 759
 760     if (BI->isUnconditional())
 761       BackedgeCondition = LatchBBDom;
 762     else {
 763       SmallVector<isl_set *, 8> ConditionSets;
 764       int idx = BI->getSuccessor(0) != HeaderBB;
 765       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
 766                               InvalidDomainMap, ConditionSets))
 767         return false;
 768
 769       // Free the non back edge condition set as we do not need it.
 770       isl_set_free(ConditionSets[1 - idx]);
 771
 772       BackedgeCondition = isl::manage(ConditionSets[idx]);
 773     }
 774
 775     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
 776     assert(LatchLoopDepth >= LoopDepth);
 777     BackedgeCondition = BackedgeCondition.project_out(
 778         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
 779     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
 780   }
 781
 782   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
 783   for (int i = 0; i < LoopDepth; i++)
 784     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
 785
 786   isl::set UnionBackedgeConditionComplement =
 787       UnionBackedgeCondition.complement();
 788   UnionBackedgeConditionComplement =
 789       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
 790                                                       0);
 791   UnionBackedgeConditionComplement =
 792       UnionBackedgeConditionComplement.apply(ForwardMap);
 793   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
 794   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
 795
 796   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
 797   HeaderBBDom = Parts.second;
 798
 799   // Check if there is a <nsw> tagged AddRec for this loop and if so do not
 800   // require a runtime check. The assumption is already implied by the <nsw>
 801   // tag.
 802   bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
 803
 804   isl::set UnboundedCtx = Parts.first.params();
 805   recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
 806                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
 807                    nullptr, RequiresRTC);
 808   return true;
 809 }
 810
 811 void ScopBuilder::buildInvariantEquivalenceClasses() {
 812   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
 813
 814   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
 815   for (LoadInst *LInst : RIL) {
 816     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
 817
 818     Type *Ty = LInst->getType();
 819     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
 820     if (ClassRep) {
 821       scop->addInvariantLoadMapping(LInst, ClassRep);
 822       continue;
 823     }
 824
 825     ClassRep = LInst;
 826     scop->addInvariantEquivClass(
 827         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
 828   }
 829 }
 830
 831 bool ScopBuilder::buildDomains(
 832     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 833   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
 834   auto *EntryBB = R->getEntry();
 835   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
 836   int LD = scop->getRelativeLoopDepth(L);
 837   auto *S =
 838       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
 839
 840   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
 841   isl::set Domain = isl::manage(S);
 842   scop->setDomain(EntryBB, Domain);
 843
 844   if (IsOnlyNonAffineRegion)
 845     return !containsErrorBlock(R->getNode(), *R, &SD);
 846
 847   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
 848     return false;
 849
 850   if (!propagateDomainConstraints(R, InvalidDomainMap))
 851     return false;
 852
 853   // Error blocks and blocks dominated by them have been assumed to never be
 854   // executed. Representing them in the Scop does not add any value. In fact,
 855   // it is likely to cause issues during construction of the ScopStmts. The
 856   // contents of error blocks have not been verified to be expressible and
 857   // will cause problems when building up a ScopStmt for them.
 858   // Furthermore, basic blocks dominated by error blocks may reference
 859   // instructions in the error block which, if the error block is not modeled,
 860   // can themselves not be constructed properly. To this end we will replace
 861   // the domains of error blocks and those only reachable via error blocks
 862   // with an empty set. Additionally, we will record for each block under which
 863   // parameter combination it would be reached via an error block in its
 864   // InvalidDomain. This information is needed during load hoisting.
 865   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
 866     return false;
 867
 868   return true;
 869 }
 870
 871 bool ScopBuilder::buildDomainsWithBranchConstraints(
 872     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 873   // To create the domain for each block in R we iterate over all blocks and
 874   // subregions in R and propagate the conditions under which the current region
 875   // element is executed. To this end we iterate in reverse post order over R as
 876   // it ensures that we first visit all predecessors of a region node (either a
 877   // basic block or a subregion) before we visit the region node itself.
 878   // Initially, only the domain for the SCoP region entry block is set and from
 879   // there we propagate the current domain to all successors, however we add the
 880   // condition that the successor is actually executed next.
 881   // As we are only interested in non-loop carried constraints here we can
 882   // simply skip loop back edges.
 883
 884   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
 885   ReversePostOrderTraversal<Region *> RTraversal(R);
 886   for (auto *RN : RTraversal) {
 887     // Recurse for affine subregions but go on for basic blocks and non-affine
 888     // subregions.
 889     if (RN->isSubRegion()) {
 890       Region *SubRegion = RN->getNodeAs<Region>();
 891       if (!scop->isNonAffineSubRegion(SubRegion)) {
 892         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
 893           return false;
 894         continue;
 895       }
 896     }
 897
 898     if (containsErrorBlock(RN, scop->getRegion(), &SD))
 899       scop->notifyErrorBlock();
 900     ;
 901
 902     BasicBlock *BB = getRegionNodeBasicBlock(RN);
 903     Instruction *TI = BB->getTerminator();
 904
 905     if (isa<UnreachableInst>(TI))
 906       continue;
 907
 908     if (!scop->isDomainDefined(BB))
 909       continue;
 910     isl::set Domain = scop->getDomainConditions(BB);
 911
 912     scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
 913
 914     auto *BBLoop = getRegionNodeLoop(RN, LI);
 915     // Propagate the domain from BB directly to blocks that have a superset
 916     // domain, at the moment only region exit nodes of regions that start in BB.
 917     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
 918                                            InvalidDomainMap);
 919
 920     // If all successors of BB have been set a domain through the propagation
 921     // above we do not need to build condition sets but can just skip this
 922     // block. However, it is important to note that this is a local property
 923     // with regards to the region @p R. To this end FinishedExitBlocks is a
 924     // local variable.
 925     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
 926       return FinishedExitBlocks.count(SuccBB);
 927     };
 928     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
 929       continue;
 930
 931     // Build the condition sets for the successor nodes of the current region
 932     // node. If it is a non-affine subregion we will always execute the single
 933     // exit node, hence the single entry node domain is the condition set. For
 934     // basic blocks we use the helper function buildConditionSets.
 935     SmallVector<isl_set *, 8> ConditionSets;
 936     if (RN->isSubRegion())
 937       ConditionSets.push_back(Domain.copy());
 938     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
 939                                  ConditionSets))
 940       return false;
 941
 942     // Now iterate over the successors and set their initial domain based on
 943     // their condition set. We skip back edges here and have to be careful when
 944     // we leave a loop not to keep constraints over a dimension that doesn't
 945     // exist anymore.
 946     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
 947     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
 948       isl::set CondSet = isl::manage(ConditionSets[u]);
 949       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
 950
 951       // Skip blocks outside the region.
 952       if (!scop->contains(SuccBB))
 953         continue;
 954
 955       // If we propagate the domain of some block to "SuccBB" we do not have to
 956       // adjust the domain.
 957       if (FinishedExitBlocks.count(SuccBB))
 958         continue;
 959
 960       // Skip back edges.
 961       if (DT.dominates(SuccBB, BB))
 962         continue;
 963
 964       Loop *SuccBBLoop =
 965           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
 966
 967       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
 968
 969       // Set the domain for the successor or merge it with an existing domain in
 970       // case there are multiple paths (without loop back edges) to the
 971       // successor block.
 972       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
 973
 974       if (!SuccDomain.is_null()) {
 975         SuccDomain = SuccDomain.unite(CondSet).coalesce();
 976       } else {
 977         // Initialize the invalid domain.
 978         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
 979         SuccDomain = CondSet;
 980       }
 981
 982       SuccDomain = SuccDomain.detect_equalities();
 983
 984       // Check if the maximal number of domain disjunctions was reached.
 985       // In case this happens we will clean up and bail.
 986       if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
 987         continue;
 988
 989       scop->invalidate(COMPLEXITY, DebugLoc());
 990       while (++u < ConditionSets.size())
 991         isl_set_free(ConditionSets[u]);
 992       return false;
 993     }
 994   }
 995
 996   return true;
 997 }
 998
 999 bool ScopBuilder::propagateInvalidStmtDomains(
1000     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1001   ReversePostOrderTraversal<Region *> RTraversal(R);
1002   for (auto *RN : RTraversal) {
1003
1004     // Recurse for affine subregions but go on for basic blocks and non-affine
1005     // subregions.
1006     if (RN->isSubRegion()) {
1007       Region *SubRegion = RN->getNodeAs<Region>();
1008       if (!scop->isNonAffineSubRegion(SubRegion)) {
1009         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1010         continue;
1011       }
1012     }
1013
1014     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
1015     BasicBlock *BB = getRegionNodeBasicBlock(RN);
1016     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1017     assert(!Domain.is_null() && "Cannot propagate a nullptr");
1018
1019     isl::set InvalidDomain = InvalidDomainMap[BB];
1020
1021     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1022
1023     if (!IsInvalidBlock) {
1024       InvalidDomain = InvalidDomain.intersect(Domain);
1025     } else {
1026       InvalidDomain = Domain;
1027       isl::set DomPar = Domain.params();
1028       recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
1029                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1030       Domain = isl::set::empty(Domain.get_space());
1031     }
1032
1033     if (InvalidDomain.is_empty()) {
1034       InvalidDomainMap[BB] = InvalidDomain;
1035       continue;
1036     }
1037
1038     auto *BBLoop = getRegionNodeLoop(RN, LI);
1039     auto *TI = BB->getTerminator();
1040     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1041     for (unsigned u = 0; u < NumSuccs; u++) {
1042       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1043
1044       // Skip successors outside the SCoP.
1045       if (!scop->contains(SuccBB))
1046         continue;
1047
1048       // Skip backedges.
1049       if (DT.dominates(SuccBB, BB))
1050         continue;
1051
1052       Loop *SuccBBLoop =
1053           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1054
1055       auto AdjustedInvalidDomain =
1056           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1057
1058       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1059       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1060       SuccInvalidDomain = SuccInvalidDomain.coalesce();
1061
1062       InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1063
1064       // Check if the maximal number of domain disjunctions was reached.
1065       // In case this happens we will bail.
1066       if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
1067           MaxDisjunctsInDomain)
1068         continue;
1069
1070       InvalidDomainMap.erase(BB);
1071       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1072       return false;
1073     }
1074
1075     InvalidDomainMap[BB] = InvalidDomain;
1076   }
1077
1078   return true;
1079 }
1080
1081 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1082                                    Region *NonAffineSubRegion,
1083                                    bool IsExitBlock) {
1084   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1085   // true, are not modeled as ordinary PHI nodes as they are not part of the
1086   // region. However, we model the operands in the predecessor blocks that are
1087   // part of the region as regular scalar accesses.
1088
1089   // If we can synthesize a PHI we can skip it, however only if it is in
1090   // the region. If it is not it can only be in the exit block of the region.
1091   // In this case we model the operands but not the PHI itself.
1092   auto *Scope = LI.getLoopFor(PHI->getParent());
1093   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1094     return;
1095
1096   // PHI nodes are modeled as if they had been demoted prior to the SCoP
1097   // detection. Hence, the PHI is a load of a new memory location in which the
1098   // incoming value was written at the end of the incoming basic block.
1099   bool OnlyNonAffineSubRegionOperands = true;
1100   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1101     Value *Op = PHI->getIncomingValue(u);
1102     BasicBlock *OpBB = PHI->getIncomingBlock(u);
1103     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1104
1105     // Do not build PHI dependences inside a non-affine subregion, but make
1106     // sure that the necessary scalar values are still made available.
1107     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1108       auto *OpInst = dyn_cast<Instruction>(Op);
1109       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1110         ensureValueRead(Op, OpStmt);
1111       continue;
1112     }
1113
1114     OnlyNonAffineSubRegionOperands = false;
1115     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1116   }
1117
1118   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1119     addPHIReadAccess(PHIStmt, PHI);
1120   }
1121 }
1122
1123 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1124                                          Instruction *Inst) {
1125   assert(!isa<PHINode>(Inst));
1126
1127   // Pull-in required operands.
1128   for (Use &Op : Inst->operands())
1129     ensureValueRead(Op.get(), UserStmt);
1130 }
1131
1132 // Create a sequence of two schedules. Either argument may be null and is
1133 // interpreted as the empty schedule. Can also return null if both schedules are
1134 // empty.
1135 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1136   if (Prev.is_null())
1137     return Succ;
1138   if (Succ.is_null())
1139     return Prev;
1140
1141   return Prev.sequence(Succ);
1142 }
1143
1144 // Create an isl_multi_union_aff that defines an identity mapping from the
1145 // elements of USet to their N-th dimension.
1146 //
1147 // # Example:
1148 //
1149 //            Domain: { A[i,j]; B[i,j,k] }
1150 //                 N: 1
1151 //
1152 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1153 //
1154 // @param USet   A union set describing the elements for which to generate a
1155 //               mapping.
1156 // @param N      The dimension to map to.
1157 // @returns      A mapping from USet to its N-th dimension.
1158 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1159   assert(!USet.is_null());
1160   assert(!USet.is_empty());
1161
1162   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1163
1164   for (isl::set S : USet.get_set_list()) {
1165     unsigned Dim = unsignedFromIslSize(S.tuple_dim());
1166     assert(Dim >= N);
1167     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1168                                                   N, Dim - N);
1169     if (N > 1)
1170       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1171
1172     Result = Result.add_pw_multi_aff(PMA);
1173   }
1174
1175   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1176 }
1177
1178 void ScopBuilder::buildSchedule() {
1179   Loop *L = getLoopSurroundingScop(*scop, LI);
1180   LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1181   buildSchedule(scop->getRegion().getNode(), LoopStack);
1182   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1183   scop->setScheduleTree(LoopStack[0].Schedule);
1184 }
1185
1186 /// To generate a schedule for the elements in a Region we traverse the Region
1187 /// in reverse-post-order and add the contained RegionNodes in traversal order
1188 /// to the schedule of the loop that is currently at the top of the LoopStack.
1189 /// For loop-free codes, this results in a correct sequential ordering.
1190 ///
1191 /// Example:
1192 ///           bb1(0)
1193 ///         /     \.
1194 ///      bb2(1)   bb3(2)
1195 ///         \    /  \.
1196 ///          bb4(3)  bb5(4)
1197 ///             \   /
1198 ///              bb6(5)
1199 ///
1200 /// Including loops requires additional processing. Whenever a loop header is
1201 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1202 /// from an empty schedule, we first process all RegionNodes that are within
1203 /// this loop and complete the sequential schedule at this loop-level before
1204 /// processing about any other nodes. To implement this
1205 /// loop-nodes-first-processing, the reverse post-order traversal is
1206 /// insufficient. Hence, we additionally check if the traversal yields
1207 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1208 /// These region-nodes are then queue and only traverse after the all nodes
1209 /// within the current loop have been processed.
1210 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1211   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1212
1213   ReversePostOrderTraversal<Region *> RTraversal(R);
1214   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1215   std::deque<RegionNode *> DelayList;
1216   bool LastRNWaiting = false;
1217
1218   // Iterate over the region @p R in reverse post-order but queue
1219   // sub-regions/blocks iff they are not part of the last encountered but not
1220   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1221   // that we queued the last sub-region/block from the reverse post-order
1222   // iterator. If it is set we have to explore the next sub-region/block from
1223   // the iterator (if any) to guarantee progress. If it is not set we first try
1224   // the next queued sub-region/blocks.
1225   while (!WorkList.empty() || !DelayList.empty()) {
1226     RegionNode *RN;
1227
1228     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1229       RN = WorkList.front();
1230       WorkList.pop_front();
1231       LastRNWaiting = false;
1232     } else {
1233       RN = DelayList.front();
1234       DelayList.pop_front();
1235     }
1236
1237     Loop *L = getRegionNodeLoop(RN, LI);
1238     if (!scop->contains(L))
1239       L = OuterScopLoop;
1240
1241     Loop *LastLoop = LoopStack.back().L;
1242     if (LastLoop != L) {
1243       if (LastLoop && !LastLoop->contains(L)) {
1244         LastRNWaiting = true;
1245         DelayList.push_back(RN);
1246         continue;
1247       }
1248       LoopStack.push_back({L, {}, 0});
1249     }
1250     buildSchedule(RN, LoopStack);
1251   }
1252 }
1253
1254 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1255   if (RN->isSubRegion()) {
1256     auto *LocalRegion = RN->getNodeAs<Region>();
1257     if (!scop->isNonAffineSubRegion(LocalRegion)) {
1258       buildSchedule(LocalRegion, LoopStack);
1259       return;
1260     }
1261   }
1262
1263   assert(LoopStack.rbegin() != LoopStack.rend());
1264   auto LoopData = LoopStack.rbegin();
1265   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1266
1267   for (auto *Stmt : scop->getStmtListFor(RN)) {
1268     isl::union_set UDomain{Stmt->getDomain()};
1269     auto StmtSchedule = isl::schedule::from_domain(UDomain);
1270     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1271   }
1272
1273   // Check if we just processed the last node in this loop. If we did, finalize
1274   // the loop by:
1275   //
1276   //   - adding new schedule dimensions
1277   //   - folding the resulting schedule into the parent loop schedule
1278   //   - dropping the loop schedule from the LoopStack.
1279   //
1280   // Then continue to check surrounding loops, which might also have been
1281   // completed by this node.
1282   size_t Dimension = LoopStack.size();
1283   while (LoopData->L &&
1284          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1285     isl::schedule Schedule = LoopData->Schedule;
1286     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1287
1288     assert(std::next(LoopData) != LoopStack.rend());
1289     Loop *L = LoopData->L;
1290     ++LoopData;
1291     --Dimension;
1292
1293     if (!Schedule.is_null()) {
1294       isl::union_set Domain = Schedule.get_domain();
1295       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1296       Schedule = Schedule.insert_partial_schedule(MUPA);
1297
1298       if (hasDisableAllTransformsHint(L)) {
1299         /// If any of the loops has a disable_nonforced heuristic, mark the
1300         /// entire SCoP as such. The ISL rescheduler can only reschedule the
1301         /// SCoP in its entirety.
1302         /// TODO: ScopDetection could avoid including such loops or warp them as
1303         /// boxed loop. It still needs to pass-through loop with user-defined
1304         /// metadata.
1305         scop->markDisableHeuristics();
1306       }
1307
1308       // It is easier to insert the marks here that do it retroactively.
1309       isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
1310       if (!IslLoopId.is_null())
1311         Schedule =
1312             Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
1313
1314       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1315     }
1316
1317     LoopData->NumBlocksProcessed += NumBlocksProcessed;
1318   }
1319   // Now pop all loops processed up there from the LoopStack
1320   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1321 }
1322
1323 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1324   // Check for uses of this instruction outside the scop. Because we do not
1325   // iterate over such instructions and therefore did not "ensure" the existence
1326   // of a write, we must determine such use here.
1327   if (scop->isEscaping(Inst))
1328     ensureValueWrite(Inst);
1329 }
1330
1331 void ScopBuilder::addRecordedAssumptions() {
1332   for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1333
1334     if (!AS.BB) {
1335       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1336                           nullptr /* BasicBlock */, AS.RequiresRTC);
1337       continue;
1338     }
1339
1340     // If the domain was deleted the assumptions are void.
1341     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1342     if (!Dom)
1343       continue;
1344
1345     // If a basic block was given use its domain to simplify the assumption.
1346     // In case of restrictions we know they only have to hold on the domain,
1347     // thus we can intersect them with the domain of the block. However, for
1348     // assumptions the domain has to imply them, thus:
1349     //                     _              _____
1350     //   Dom => S   <==>   A v B   <==>   A - B
1351     //
1352     // To avoid the complement we will register A - B as a restriction not an
1353     // assumption.
1354     isl_set *S = AS.Set.copy();
1355     if (AS.Sign == AS_RESTRICTION)
1356       S = isl_set_params(isl_set_intersect(S, Dom));
1357     else /* (AS.Sign == AS_ASSUMPTION) */
1358       S = isl_set_params(isl_set_subtract(Dom, S));
1359
1360     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1361                         AS.RequiresRTC);
1362   }
1363 }
1364
1365 void ScopBuilder::addUserAssumptions(
1366     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1367   for (auto &Assumption : AC.assumptions()) {
1368     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1369     if (!CI || CI->arg_size() != 1)
1370       continue;
1371
1372     bool InScop = scop->contains(CI);
1373     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1374       continue;
1375
1376     auto *L = LI.getLoopFor(CI->getParent());
1377     auto *Val = CI->getArgOperand(0);
1378     ParameterSetTy DetectedParams;
1379     auto &R = scop->getRegion();
1380     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1381       ORE.emit(
1382           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1383           << "Non-affine user assumption ignored.");
1384       continue;
1385     }
1386
1387     // Collect all newly introduced parameters.
1388     ParameterSetTy NewParams;
1389     for (auto *Param : DetectedParams) {
1390       Param = extractConstantFactor(Param, SE).second;
1391       Param = scop->getRepresentingInvariantLoadSCEV(Param);
1392       if (scop->isParam(Param))
1393         continue;
1394       NewParams.insert(Param);
1395     }
1396
1397     SmallVector<isl_set *, 2> ConditionSets;
1398     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1399     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1400     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1401                        : isl_set_copy(scop->getContext().get());
1402     assert(Dom && "Cannot propagate a nullptr.");
1403     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1404                                     ConditionSets);
1405     isl_set_free(Dom);
1406
1407     if (!Valid)
1408       continue;
1409
1410     isl_set *AssumptionCtx = nullptr;
1411     if (InScop) {
1412       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1413       isl_set_free(ConditionSets[0]);
1414     } else {
1415       AssumptionCtx = isl_set_complement(ConditionSets[1]);
1416       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1417     }
1418
1419     // Project out newly introduced parameters as they are not otherwise useful.
1420     if (!NewParams.empty()) {
1421       for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1422         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1423         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1424         isl_id_free(Id);
1425
1426         if (!NewParams.count(Param))
1427           continue;
1428
1429         AssumptionCtx =
1430             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1431       }
1432     }
1433     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1434              << "Use user assumption: "
1435              << stringFromIslObj(AssumptionCtx, "null"));
1436     isl::set newContext =
1437         scop->getContext().intersect(isl::manage(AssumptionCtx));
1438     scop->setContext(newContext);
1439   }
1440 }
1441
1442 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1443   // Memory builtins are not considered in this function.
1444   if (!Inst.isLoad() && !Inst.isStore())
1445     return false;
1446
1447   Value *Val = Inst.getValueOperand();
1448   Type *ElementType = Val->getType();
1449   Value *Address = Inst.getPointerOperand();
1450   const SCEV *AccessFunction =
1451       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1452   const SCEVUnknown *BasePointer =
1453       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1454   enum MemoryAccess::AccessType AccType =
1455       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1456
1457   if (auto *BitCast = dyn_cast<BitCastInst>(Address))
1458     Address = BitCast->getOperand(0);
1459
1460   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1461   if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
1462                   DL.getTypeAllocSize(ElementType))
1463     return false;
1464
1465   SmallVector<const SCEV *, 4> Subscripts;
1466   SmallVector<int, 4> Sizes;
1467   getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1468   auto *BasePtr = GEP->getOperand(0);
1469
1470   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1471     BasePtr = BasePtrCast->getOperand(0);
1472
1473   // Check for identical base pointers to ensure that we do not miss index
1474   // offsets that have been added before this GEP is applied.
1475   if (BasePtr != BasePointer->getValue())
1476     return false;
1477
1478   std::vector<const SCEV *> SizesSCEV;
1479
1480   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1481
1482   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1483   for (auto *Subscript : Subscripts) {
1484     InvariantLoadsSetTy AccessILS;
1485     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1486                       &AccessILS))
1487       return false;
1488
1489     for (LoadInst *LInst : AccessILS)
1490       if (!ScopRIL.count(LInst))
1491         return false;
1492   }
1493
1494   if (Sizes.empty())
1495     return false;
1496
1497   SizesSCEV.push_back(nullptr);
1498
1499   for (auto V : Sizes)
1500     SizesSCEV.push_back(SE.getSCEV(
1501         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1502
1503   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1504                  true, Subscripts, SizesSCEV, Val);
1505   return true;
1506 }
1507
1508 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1509   // Memory builtins are not considered by this function.
1510   if (!Inst.isLoad() && !Inst.isStore())
1511     return false;
1512
1513   if (!PollyDelinearize)
1514     return false;
1515
1516   Value *Address = Inst.getPointerOperand();
1517   Value *Val = Inst.getValueOperand();
1518   Type *ElementType = Val->getType();
1519   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1520   enum MemoryAccess::AccessType AccType =
1521       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1522
1523   const SCEV *AccessFunction =
1524       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1525   const SCEVUnknown *BasePointer =
1526       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1527
1528   assert(BasePointer && "Could not find base pointer");
1529
1530   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1531   auto AccItr = InsnToMemAcc.find(Inst);
1532   if (AccItr == InsnToMemAcc.end())
1533     return false;
1534
1535   std::vector<const SCEV *> Sizes = {nullptr};
1536
1537   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1538                AccItr->second.Shape->DelinearizedSizes.end());
1539
1540   // In case only the element size is contained in the 'Sizes' array, the
1541   // access does not access a real multi-dimensional array. Hence, we allow
1542   // the normal single-dimensional access construction to handle this.
1543   if (Sizes.size() == 1)
1544     return false;
1545
1546   // Remove the element size. This information is already provided by the
1547   // ElementSize parameter. In case the element size of this access and the
1548   // element size used for delinearization differs the delinearization is
1549   // incorrect. Hence, we invalidate the scop.
1550   //
1551   // TODO: Handle delinearization with differing element sizes.
1552   auto DelinearizedSize =
1553       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1554   Sizes.pop_back();
1555   if (ElementSize != DelinearizedSize)
1556     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1557
1558   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1559                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1560   return true;
1561 }
1562
1563 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1564   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1565
1566   if (MemIntr == nullptr)
1567     return false;
1568
1569   auto *L = LI.getLoopFor(Inst->getParent());
1570   const SCEV *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1571   assert(LengthVal);
1572
1573   // Check if the length val is actually affine or if we overapproximate it
1574   InvariantLoadsSetTy AccessILS;
1575   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1576
1577   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1578   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1579                                      LengthVal, SE, &AccessILS);
1580   for (LoadInst *LInst : AccessILS)
1581     if (!ScopRIL.count(LInst))
1582       LengthIsAffine = false;
1583   if (!LengthIsAffine)
1584     LengthVal = nullptr;
1585
1586   auto *DestPtrVal = MemIntr->getDest();
1587   assert(DestPtrVal);
1588
1589   const SCEV *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1590   assert(DestAccFunc);
1591   // Ignore accesses to "NULL".
1592   // TODO: We could use this to optimize the region further, e.g., intersect
1593   //       the context with
1594   //          isl_set_complement(isl_set_params(getDomain()))
1595   //       as we know it would be undefined to execute this instruction anyway.
1596   if (DestAccFunc->isZero())
1597     return true;
1598
1599   if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1600     if (isa<ConstantPointerNull>(U->getValue()))
1601       return true;
1602   }
1603
1604   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1605   assert(DestPtrSCEV);
1606   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1607   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1608                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
1609                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1610                  Inst.getValueOperand());
1611
1612   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1613   if (!MemTrans)
1614     return true;
1615
1616   auto *SrcPtrVal = MemTrans->getSource();
1617   assert(SrcPtrVal);
1618
1619   const SCEV *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1620   assert(SrcAccFunc);
1621   // Ignore accesses to "NULL".
1622   // TODO: See above TODO
1623   if (SrcAccFunc->isZero())
1624     return true;
1625
1626   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1627   assert(SrcPtrSCEV);
1628   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1629   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1630                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1631                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1632                  Inst.getValueOperand());
1633
1634   return true;
1635 }
1636
1637 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1638   auto *CI = dyn_cast_or_null<CallInst>(Inst);
1639
1640   if (CI == nullptr)
1641     return false;
1642
1643   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1644     return true;
1645
1646   const SCEV *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1647   auto *CalledFunction = CI->getCalledFunction();
1648   MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
1649   if (ME.doesNotAccessMemory())
1650     return true;
1651
1652   if (ME.onlyAccessesArgPointees()) {
1653     ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
1654     auto AccType =
1655         !isModSet(ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1656     Loop *L = LI.getLoopFor(Inst->getParent());
1657     for (const auto &Arg : CI->args()) {
1658       if (!Arg->getType()->isPointerTy())
1659         continue;
1660
1661       const SCEV *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1662       if (ArgSCEV->isZero())
1663         continue;
1664
1665       if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1666         if (isa<ConstantPointerNull>(U->getValue()))
1667           return true;
1668       }
1669
1670       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1671       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1672                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1673     }
1674     return true;
1675   }
1676
1677   if (ME.onlyReadsMemory()) {
1678     GlobalReads.emplace_back(Stmt, CI);
1679     return true;
1680   }
1681   return false;
1682 }
1683
1684 bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1685   // Memory builtins are not considered by this function.
1686   if (!Inst.isLoad() && !Inst.isStore())
1687     return false;
1688
1689   Value *Address = Inst.getPointerOperand();
1690   Value *Val = Inst.getValueOperand();
1691   Type *ElementType = Val->getType();
1692   enum MemoryAccess::AccessType AccType =
1693       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1694
1695   const SCEV *AccessFunction =
1696       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1697   const SCEVUnknown *BasePointer =
1698       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1699
1700   assert(BasePointer && "Could not find base pointer");
1701   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1702
1703   // Check if the access depends on a loop contained in a non-affine subregion.
1704   bool isVariantInNonAffineLoop = false;
1705   SetVector<const Loop *> Loops;
1706   findLoops(AccessFunction, Loops);
1707   for (const Loop *L : Loops)
1708     if (Stmt->contains(L)) {
1709       isVariantInNonAffineLoop = true;
1710       break;
1711     }
1712
1713   InvariantLoadsSetTy AccessILS;
1714
1715   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1716   bool IsAffine = !isVariantInNonAffineLoop &&
1717                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
1718                                AccessFunction, SE, &AccessILS);
1719
1720   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1721   for (LoadInst *LInst : AccessILS)
1722     if (!ScopRIL.count(LInst))
1723       IsAffine = false;
1724
1725   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1726     AccType = MemoryAccess::MAY_WRITE;
1727
1728   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1729                  IsAffine, {AccessFunction}, {nullptr}, Val);
1730   return true;
1731 }
1732
1733 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1734   if (buildAccessMemIntrinsic(Inst, Stmt))
1735     return;
1736
1737   if (buildAccessCallInst(Inst, Stmt))
1738     return;
1739
1740   if (buildAccessMultiDimFixed(Inst, Stmt))
1741     return;
1742
1743   if (buildAccessMultiDimParam(Inst, Stmt))
1744     return;
1745
1746   if (buildAccessSingleDim(Inst, Stmt))
1747     return;
1748
1749   llvm_unreachable(
1750       "At least one of the buildAccess functions must handled this access, or "
1751       "ScopDetection should have rejected this SCoP");
1752 }
1753
1754 void ScopBuilder::buildAccessFunctions() {
1755   for (auto &Stmt : *scop) {
1756     if (Stmt.isBlockStmt()) {
1757       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1758       continue;
1759     }
1760
1761     Region *R = Stmt.getRegion();
1762     for (BasicBlock *BB : R->blocks())
1763       buildAccessFunctions(&Stmt, *BB, R);
1764   }
1765
1766   // Build write accesses for values that are used after the SCoP.
1767   // The instructions defining them might be synthesizable and therefore not
1768   // contained in any statement, hence we iterate over the original instructions
1769   // to identify all escaping values.
1770   for (BasicBlock *BB : scop->getRegion().blocks()) {
1771     for (Instruction &Inst : *BB)
1772       buildEscapingDependences(&Inst);
1773   }
1774 }
1775
1776 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1777   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1778          !canSynthesize(Inst, *scop, &SE, L);
1779 }
1780
1781 /// Generate a name for a statement.
1782 ///
1783 /// @param BB     The basic block the statement will represent.
1784 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
1785 /// @param Count  The index of the created statement in @p BB.
1786 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1787 ///               no suffix will be added.
1788 /// @param IsLast Uses a special indicator for the last statement of a BB.
1789 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1790                                 bool IsMain, bool IsLast = false) {
1791   std::string Suffix;
1792   if (!IsMain) {
1793     if (UseInstructionNames)
1794       Suffix = '_';
1795     if (IsLast)
1796       Suffix += "last";
1797     else if (Count < 26)
1798       Suffix += 'a' + Count;
1799     else
1800       Suffix += std::to_string(Count);
1801   }
1802   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1803 }
1804
1805 /// Generate a name for a statement that represents a non-affine subregion.
1806 ///
1807 /// @param R    The region the statement will represent.
1808 /// @param RIdx The index of the @p R relative to other BBs/regions.
1809 static std::string makeStmtName(Region *R, long RIdx) {
1810   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1811                               UseInstructionNames);
1812 }
1813
1814 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1815   Loop *SurroundingLoop = LI.getLoopFor(BB);
1816
1817   int Count = 0;
1818   long BBIdx = scop->getNextStmtIdx();
1819   std::vector<Instruction *> Instructions;
1820   for (Instruction &Inst : *BB) {
1821     if (shouldModelInst(&Inst, SurroundingLoop))
1822       Instructions.push_back(&Inst);
1823     if (Inst.getMetadata("polly_split_after") ||
1824         (SplitOnStore && isa<StoreInst>(Inst))) {
1825       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1826       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1827       Count++;
1828       Instructions.clear();
1829     }
1830   }
1831
1832   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1833   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1834 }
1835
1836 /// Is @p Inst an ordered instruction?
1837 ///
1838 /// An unordered instruction is an instruction, such that a sequence of
1839 /// unordered instructions can be permuted without changing semantics. Any
1840 /// instruction for which this is not always the case is ordered.
1841 static bool isOrderedInstruction(Instruction *Inst) {
1842   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1843 }
1844
1845 /// Join instructions to the same statement if one uses the scalar result of the
1846 /// other.
1847 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1848                             ArrayRef<Instruction *> ModeledInsts) {
1849   for (Instruction *Inst : ModeledInsts) {
1850     if (isa<PHINode>(Inst))
1851       continue;
1852
1853     for (Use &Op : Inst->operands()) {
1854       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1855       if (!OpInst)
1856         continue;
1857
1858       // Check if OpInst is in the BB and is a modeled instruction.
1859       auto OpVal = UnionFind.findValue(OpInst);
1860       if (OpVal == UnionFind.end())
1861         continue;
1862
1863       UnionFind.unionSets(Inst, OpInst);
1864     }
1865   }
1866 }
1867
1868 /// Ensure that the order of ordered instructions does not change.
1869 ///
1870 /// If we encounter an ordered instruction enclosed in instructions belonging to
1871 /// a different statement (which might as well contain ordered instructions, but
1872 /// this is not tested here), join them.
1873 static void
1874 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1875                         ArrayRef<Instruction *> ModeledInsts) {
1876   SetVector<Instruction *> SeenLeaders;
1877   for (Instruction *Inst : ModeledInsts) {
1878     if (!isOrderedInstruction(Inst))
1879       continue;
1880
1881     Instruction *Leader = UnionFind.getLeaderValue(Inst);
1882     // Since previous iterations might have merged sets, some items in
1883     // SeenLeaders are not leaders anymore. However, The new leader of
1884     // previously merged instructions must be one of the former leaders of
1885     // these merged instructions.
1886     bool Inserted = SeenLeaders.insert(Leader);
1887     if (Inserted)
1888       continue;
1889
1890     // Merge statements to close holes. Say, we have already seen statements A
1891     // and B, in this order. Then we see an instruction of A again and we would
1892     // see the pattern "A B A". This function joins all statements until the
1893     // only seen occurrence of A.
1894     for (Instruction *Prev : reverse(SeenLeaders)) {
1895       // We are backtracking from the last element until we see Inst's leader
1896       // in SeenLeaders and merge all into one set. Although leaders of
1897       // instructions change during the execution of this loop, it's irrelevant
1898       // as we are just searching for the element that we already confirmed is
1899       // in the list.
1900       if (Prev == Leader)
1901         break;
1902       UnionFind.unionSets(Prev, Leader);
1903     }
1904   }
1905 }
1906
1907 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1908 /// the incoming values from this block are executed after the PHI READ.
1909 ///
1910 /// Otherwise it could overwrite the incoming value from before the BB with the
1911 /// value for the next execution. This can happen if the PHI WRITE is added to
1912 /// the statement with the instruction that defines the incoming value (instead
1913 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1914 /// are in order, we put both into the statement. PHI WRITEs are always executed
1915 /// after PHI READs when they are in the same statement.
1916 ///
1917 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1918 /// WRITE is not put into a statement containing the PHI itself. That could also
1919 /// be done by
1920 /// - having all (strongly connected) PHIs in a single statement,
1921 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1922 ///   has a chance of being lifted before a PHI by being in a statement with a
1923 ///   PHI that comes before in the basic block), or
1924 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1925 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1926                             ArrayRef<Instruction *> ModeledInsts) {
1927   for (Instruction *Inst : ModeledInsts) {
1928     PHINode *PHI = dyn_cast<PHINode>(Inst);
1929     if (!PHI)
1930       continue;
1931
1932     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1933     if (Idx < 0)
1934       continue;
1935
1936     Instruction *IncomingVal =
1937         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1938     if (!IncomingVal)
1939       continue;
1940
1941     UnionFind.unionSets(PHI, IncomingVal);
1942   }
1943 }
1944
1945 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1946   Loop *L = LI.getLoopFor(BB);
1947
1948   // Extracting out modeled instructions saves us from checking
1949   // shouldModelInst() repeatedly.
1950   SmallVector<Instruction *, 32> ModeledInsts;
1951   EquivalenceClasses<Instruction *> UnionFind;
1952   Instruction *MainInst = nullptr, *MainLeader = nullptr;
1953   for (Instruction &Inst : *BB) {
1954     if (!shouldModelInst(&Inst, L))
1955       continue;
1956     ModeledInsts.push_back(&Inst);
1957     UnionFind.insert(&Inst);
1958
1959     // When a BB is split into multiple statements, the main statement is the
1960     // one containing the 'main' instruction. We select the first instruction
1961     // that is unlikely to be removed (because it has side-effects) as the main
1962     // one. It is used to ensure that at least one statement from the bb has the
1963     // same name as with -polly-stmt-granularity=bb.
1964     if (!MainInst && (isa<StoreInst>(Inst) ||
1965                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1966       MainInst = &Inst;
1967   }
1968
1969   joinOperandTree(UnionFind, ModeledInsts);
1970   joinOrderedInstructions(UnionFind, ModeledInsts);
1971   joinOrderedPHIs(UnionFind, ModeledInsts);
1972
1973   // The list of instructions for statement (statement represented by the leader
1974   // instruction).
1975   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1976
1977   // The order of statements must be preserved w.r.t. their ordered
1978   // instructions. Without this explicit scan, we would also use non-ordered
1979   // instructions (whose order is arbitrary) to determine statement order.
1980   for (Instruction *Inst : ModeledInsts) {
1981     if (!isOrderedInstruction(Inst))
1982       continue;
1983
1984     auto LeaderIt = UnionFind.findLeader(Inst);
1985     if (LeaderIt == UnionFind.member_end())
1986       continue;
1987
1988     // Insert element for the leader instruction.
1989     (void)LeaderToInstList[*LeaderIt];
1990   }
1991
1992   // Collect the instructions of all leaders. UnionFind's member iterator
1993   // unfortunately are not in any specific order.
1994   for (Instruction *Inst : ModeledInsts) {
1995     auto LeaderIt = UnionFind.findLeader(Inst);
1996     if (LeaderIt == UnionFind.member_end())
1997       continue;
1998
1999     if (Inst == MainInst)
2000       MainLeader = *LeaderIt;
2001     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2002     InstList.push_back(Inst);
2003   }
2004
2005   // Finally build the statements.
2006   int Count = 0;
2007   long BBIdx = scop->getNextStmtIdx();
2008   for (auto &Instructions : LeaderToInstList) {
2009     std::vector<Instruction *> &InstList = Instructions.second;
2010
2011     // If there is no main instruction, make the first statement the main.
2012     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2013
2014     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2015     scop->addScopStmt(BB, Name, L, std::move(InstList));
2016     Count += 1;
2017   }
2018
2019   // Unconditionally add an epilogue (last statement). It contains no
2020   // instructions, but holds the PHI write accesses for successor basic blocks,
2021   // if the incoming value is not defined in another statement if the same BB.
2022   // The epilogue becomes the main statement only if there is no other
2023   // statement that could become main.
2024   // The epilogue will be removed if no PHIWrite is added to it.
2025   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2026   scop->addScopStmt(BB, EpilogueName, L, {});
2027 }
2028
2029 void ScopBuilder::buildStmts(Region &SR) {
2030   if (scop->isNonAffineSubRegion(&SR)) {
2031     std::vector<Instruction *> Instructions;
2032     Loop *SurroundingLoop =
2033         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2034     for (Instruction &Inst : *SR.getEntry())
2035       if (shouldModelInst(&Inst, SurroundingLoop))
2036         Instructions.push_back(&Inst);
2037     long RIdx = scop->getNextStmtIdx();
2038     std::string Name = makeStmtName(&SR, RIdx);
2039     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2040     return;
2041   }
2042
2043   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2044     if (I->isSubRegion())
2045       buildStmts(*I->getNodeAs<Region>());
2046     else {
2047       BasicBlock *BB = I->getNodeAs<BasicBlock>();
2048       switch (StmtGranularity) {
2049       case GranularityChoice::BasicBlocks:
2050         buildSequentialBlockStmts(BB);
2051         break;
2052       case GranularityChoice::ScalarIndependence:
2053         buildEqivClassBlockStmts(BB);
2054         break;
2055       case GranularityChoice::Stores:
2056         buildSequentialBlockStmts(BB, true);
2057         break;
2058       }
2059     }
2060 }
2061
2062 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2063                                        Region *NonAffineSubRegion) {
2064   assert(
2065       Stmt &&
2066       "The exit BB is the only one that cannot be represented by a statement");
2067   assert(Stmt->represents(&BB));
2068
2069   // We do not build access functions for error blocks, as they may contain
2070   // instructions we can not model.
2071   if (SD.isErrorBlock(BB, scop->getRegion()))
2072     return;
2073
2074   auto BuildAccessesForInst = [this, Stmt,
2075                                NonAffineSubRegion](Instruction *Inst) {
2076     PHINode *PHI = dyn_cast<PHINode>(Inst);
2077     if (PHI)
2078       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2079
2080     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2081       assert(Stmt && "Cannot build access function in non-existing statement");
2082       buildMemoryAccess(MemInst, Stmt);
2083     }
2084
2085     // PHI nodes have already been modeled above and terminators that are
2086     // not part of a non-affine subregion are fully modeled and regenerated
2087     // from the polyhedral domains. Hence, they do not need to be modeled as
2088     // explicit data dependences.
2089     if (!PHI)
2090       buildScalarDependences(Stmt, Inst);
2091   };
2092
2093   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2094   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2095   if (IsEntryBlock) {
2096     for (Instruction *Inst : Stmt->getInstructions())
2097       BuildAccessesForInst(Inst);
2098     if (Stmt->isRegionStmt())
2099       BuildAccessesForInst(BB.getTerminator());
2100   } else {
2101     for (Instruction &Inst : BB) {
2102       if (isIgnoredIntrinsic(&Inst))
2103         continue;
2104
2105       // Invariant loads already have been processed.
2106       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2107         continue;
2108
2109       BuildAccessesForInst(&Inst);
2110     }
2111   }
2112 }
2113
2114 MemoryAccess *ScopBuilder::addMemoryAccess(
2115     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2116     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2117     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2118     MemoryKind Kind) {
2119   bool isKnownMustAccess = false;
2120
2121   // Accesses in single-basic block statements are always executed.
2122   if (Stmt->isBlockStmt())
2123     isKnownMustAccess = true;
2124
2125   if (Stmt->isRegionStmt()) {
2126     // Accesses that dominate the exit block of a non-affine region are always
2127     // executed. In non-affine regions there may exist MemoryKind::Values that
2128     // do not dominate the exit. MemoryKind::Values will always dominate the
2129     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2130     // non-affine region.
2131     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2132       isKnownMustAccess = true;
2133   }
2134
2135   // Non-affine PHI writes do not "happen" at a particular instruction, but
2136   // after exiting the statement. Therefore they are guaranteed to execute and
2137   // overwrite the old value.
2138   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2139     isKnownMustAccess = true;
2140
2141   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2142     AccType = MemoryAccess::MAY_WRITE;
2143
2144   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2145                                   Affine, Subscripts, Sizes, AccessValue, Kind);
2146
2147   scop->addAccessFunction(Access);
2148   Stmt->addAccess(Access);
2149   return Access;
2150 }
2151
2152 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2153                                  MemoryAccess::AccessType AccType,
2154                                  Value *BaseAddress, Type *ElementType,
2155                                  bool IsAffine,
2156                                  ArrayRef<const SCEV *> Subscripts,
2157                                  ArrayRef<const SCEV *> Sizes,
2158                                  Value *AccessValue) {
2159   ArrayBasePointers.insert(BaseAddress);
2160   addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
2161                   AccessValue, Subscripts, Sizes, MemoryKind::Array);
2162 }
2163
2164 /// Check if @p Expr is divisible by @p Size.
2165 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2166   assert(Size != 0);
2167   if (Size == 1)
2168     return true;
2169
2170   // Only one factor needs to be divisible.
2171   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2172     for (const SCEV *FactorExpr : MulExpr->operands())
2173       if (isDivisible(FactorExpr, Size, SE))
2174         return true;
2175     return false;
2176   }
2177
2178   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2179   // to be divisible.
2180   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2181     for (const SCEV *OpExpr : NAryExpr->operands())
2182       if (!isDivisible(OpExpr, Size, SE))
2183         return false;
2184     return true;
2185   }
2186
2187   const SCEV *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2188   const SCEV *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2189   const SCEV *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2190   return MulSCEV == Expr;
2191 }
2192
2193 void ScopBuilder::foldSizeConstantsToRight() {
2194   isl::union_set Accessed = scop->getAccesses().range();
2195
2196   for (auto Array : scop->arrays()) {
2197     if (Array->getNumberOfDimensions() <= 1)
2198       continue;
2199
2200     isl::space Space = Array->getSpace();
2201     Space = Space.align_params(Accessed.get_space());
2202
2203     if (!Accessed.contains(Space))
2204       continue;
2205
2206     isl::set Elements = Accessed.extract_set(Space);
2207     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2208
2209     std::vector<int> Int;
2210     unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
2211     for (unsigned i = 0; i < Dims; i++) {
2212       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2213       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2214       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2215
2216       isl::basic_set DimHull = DimOnly.affine_hull();
2217
2218       if (i == Dims - 1) {
2219         Int.push_back(1);
2220         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2221         continue;
2222       }
2223
2224       if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
2225         isl::aff Diff = DimHull.get_div(0);
2226         isl::val Val = Diff.get_denominator_val();
2227
2228         int ValInt = 1;
2229         if (Val.is_int()) {
2230           auto ValAPInt = APIntFromVal(Val);
2231           if (ValAPInt.isSignedIntN(32))
2232             ValInt = ValAPInt.getSExtValue();
2233         } else {
2234         }
2235
2236         Int.push_back(ValInt);
2237         isl::constraint C = isl::constraint::alloc_equality(
2238             isl::local_space(Transform.get_space()));
2239         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2240         C = C.set_coefficient_si(isl::dim::in, i, -1);
2241         Transform = Transform.add_constraint(C);
2242         continue;
2243       }
2244
2245       isl::basic_set ZeroSet = isl::basic_set(DimHull);
2246       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2247
2248       int ValInt = 1;
2249       if (ZeroSet.is_equal(DimHull)) {
2250         ValInt = 0;
2251       }
2252
2253       Int.push_back(ValInt);
2254       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2255     }
2256
2257     isl::set MappedElements = isl::map(Transform).domain();
2258     if (!Elements.is_subset(MappedElements))
2259       continue;
2260
2261     bool CanFold = true;
2262     if (Int[0] <= 1)
2263       CanFold = false;
2264
2265     unsigned NumDims = Array->getNumberOfDimensions();
2266     for (unsigned i = 1; i < NumDims - 1; i++)
2267       if (Int[0] != Int[i] && Int[i])
2268         CanFold = false;
2269
2270     if (!CanFold)
2271       continue;
2272
2273     for (auto &Access : scop->access_functions())
2274       if (Access->getScopArrayInfo() == Array)
2275         Access->setAccessRelation(
2276             Access->getAccessRelation().apply_range(Transform));
2277
2278     std::vector<const SCEV *> Sizes;
2279     for (unsigned i = 0; i < NumDims; i++) {
2280       auto Size = Array->getDimensionSize(i);
2281
2282       if (i == NumDims - 1)
2283         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2284       Sizes.push_back(Size);
2285     }
2286
2287     Array->updateSizes(Sizes, false /* CheckConsistency */);
2288   }
2289 }
2290
2291 void ScopBuilder::finalizeAccesses() {
2292   updateAccessDimensionality();
2293   foldSizeConstantsToRight();
2294   foldAccessRelations();
2295   assumeNoOutOfBounds();
2296 }
2297
2298 void ScopBuilder::updateAccessDimensionality() {
2299   // Check all array accesses for each base pointer and find a (virtual) element
2300   // size for the base pointer that divides all access functions.
2301   for (ScopStmt &Stmt : *scop)
2302     for (MemoryAccess *Access : Stmt) {
2303       if (!Access->isArrayKind())
2304         continue;
2305       ScopArrayInfo *Array =
2306           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2307
2308       if (Array->getNumberOfDimensions() != 1)
2309         continue;
2310       unsigned DivisibleSize = Array->getElemSizeInBytes();
2311       const SCEV *Subscript = Access->getSubscript(0);
2312       while (!isDivisible(Subscript, DivisibleSize, SE))
2313         DivisibleSize /= 2;
2314       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2315       Array->updateElementType(Ty);
2316     }
2317
2318   for (auto &Stmt : *scop)
2319     for (auto &Access : Stmt)
2320       Access->updateDimensionality();
2321 }
2322
2323 void ScopBuilder::foldAccessRelations() {
2324   for (auto &Stmt : *scop)
2325     for (auto &Access : Stmt)
2326       Access->foldAccessRelation();
2327 }
2328
2329 void ScopBuilder::assumeNoOutOfBounds() {
2330   if (PollyIgnoreInbounds)
2331     return;
2332   for (auto &Stmt : *scop)
2333     for (auto &Access : Stmt) {
2334       isl::set Outside = Access->assumeNoOutOfBound();
2335       const auto &Loc = Access->getAccessInstruction()
2336                             ? Access->getAccessInstruction()->getDebugLoc()
2337                             : DebugLoc();
2338       recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
2339                        AS_ASSUMPTION);
2340     }
2341 }
2342
2343 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2344   // Find the statement that defines the value of Inst. That statement has to
2345   // write the value to make it available to those statements that read it.
2346   ScopStmt *Stmt = scop->getStmtFor(Inst);
2347
2348   // It is possible that the value is synthesizable within a loop (such that it
2349   // is not part of any statement), but not after the loop (where you need the
2350   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2351   // avoid this. In case the IR has no such PHI, use the last statement (where
2352   // the value is synthesizable) to write the value.
2353   if (!Stmt)
2354     Stmt = scop->getLastStmtFor(Inst->getParent());
2355
2356   // Inst not defined within this SCoP.
2357   if (!Stmt)
2358     return;
2359
2360   // Do not process further if the instruction is already written.
2361   if (Stmt->lookupValueWriteOf(Inst))
2362     return;
2363
2364   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2365                   true, Inst, ArrayRef<const SCEV *>(),
2366                   ArrayRef<const SCEV *>(), MemoryKind::Value);
2367 }
2368
2369 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2370   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2371   // to be able to replace this one. Currently, there is a split responsibility.
2372   // In a first step, the MemoryAccess is created, but without the
2373   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2374   // AccessRelation is created. At least for scalar accesses, there is no new
2375   // information available at ScopStmt::buildAccessRelations(), so we could
2376   // create the AccessRelation right away. This is what
2377   // ScopStmt::ensureValueRead(Value*) does.
2378
2379   auto *Scope = UserStmt->getSurroundingLoop();
2380   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2381   switch (VUse.getKind()) {
2382   case VirtualUse::Constant:
2383   case VirtualUse::Block:
2384   case VirtualUse::Synthesizable:
2385   case VirtualUse::Hoisted:
2386   case VirtualUse::Intra:
2387     // Uses of these kinds do not need a MemoryAccess.
2388     break;
2389
2390   case VirtualUse::ReadOnly:
2391     // Add MemoryAccess for invariant values only if requested.
2392     if (!ModelReadOnlyScalars)
2393       break;
2394
2395     [[fallthrough]];
2396   case VirtualUse::Inter:
2397
2398     // Do not create another MemoryAccess for reloading the value if one already
2399     // exists.
2400     if (UserStmt->lookupValueReadOf(V))
2401       break;
2402
2403     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2404                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2405                     MemoryKind::Value);
2406
2407     // Inter-statement uses need to write the value in their defining statement.
2408     if (VUse.isInter())
2409       ensureValueWrite(cast<Instruction>(V));
2410     break;
2411   }
2412 }
2413
2414 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2415                                  BasicBlock *IncomingBlock,
2416                                  Value *IncomingValue, bool IsExitBlock) {
2417   // As the incoming block might turn out to be an error statement ensure we
2418   // will create an exit PHI SAI object. It is needed during code generation
2419   // and would be created later anyway.
2420   if (IsExitBlock)
2421     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2422                                    MemoryKind::ExitPHI);
2423
2424   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2425   // from outside the SCoP's region have no statement representation.
2426   if (!IncomingStmt)
2427     return;
2428
2429   // Take care for the incoming value being available in the incoming block.
2430   // This must be done before the check for multiple PHI writes because multiple
2431   // exiting edges from subregion each can be the effective written value of the
2432   // subregion. As such, all of them must be made available in the subregion
2433   // statement.
2434   ensureValueRead(IncomingValue, IncomingStmt);
2435
2436   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2437   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2438     assert(Acc->getAccessInstruction() == PHI);
2439     Acc->addIncoming(IncomingBlock, IncomingValue);
2440     return;
2441   }
2442
2443   MemoryAccess *Acc = addMemoryAccess(
2444       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2445       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2446       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2447   assert(Acc);
2448   Acc->addIncoming(IncomingBlock, IncomingValue);
2449 }
2450
2451 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2452   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2453                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2454                   MemoryKind::PHI);
2455 }
2456
2457 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2458   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2459
2460   Stmt.Domain = scop->getDomainConditions(&Stmt);
2461   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2462 }
2463
2464 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2465   isl::set Domain = Stmt.getDomain();
2466   BasicBlock *BB = Stmt.getEntryBlock();
2467
2468   Loop *L = LI.getLoopFor(BB);
2469
2470   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2471     L = L->getParentLoop();
2472
2473   SmallVector<llvm::Loop *, 8> Loops;
2474
2475   while (L && Stmt.getParent()->getRegion().contains(L)) {
2476     Loops.push_back(L);
2477     L = L->getParentLoop();
2478   }
2479
2480   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2481 }
2482
2483 /// Return the reduction type for a given binary operator.
2484 static MemoryAccess::ReductionType
2485 getReductionType(const BinaryOperator *BinOp) {
2486   if (!BinOp)
2487     return MemoryAccess::RT_NONE;
2488   switch (BinOp->getOpcode()) {
2489   case Instruction::FAdd:
2490     if (!BinOp->isFast())
2491       return MemoryAccess::RT_NONE;
2492     [[fallthrough]];
2493   case Instruction::Add:
2494     return MemoryAccess::RT_ADD;
2495   case Instruction::Or:
2496     return MemoryAccess::RT_BOR;
2497   case Instruction::Xor:
2498     return MemoryAccess::RT_BXOR;
2499   case Instruction::And:
2500     return MemoryAccess::RT_BAND;
2501   case Instruction::FMul:
2502     if (!BinOp->isFast())
2503       return MemoryAccess::RT_NONE;
2504     [[fallthrough]];
2505   case Instruction::Mul:
2506     if (DisableMultiplicativeReductions)
2507       return MemoryAccess::RT_NONE;
2508     return MemoryAccess::RT_MUL;
2509   default:
2510     return MemoryAccess::RT_NONE;
2511   }
2512 }
2513
2514 /// @brief Combine two reduction types
2515 static MemoryAccess::ReductionType
2516 combineReductionType(MemoryAccess::ReductionType RT0,
2517                      MemoryAccess::ReductionType RT1) {
2518   if (RT0 == MemoryAccess::RT_BOTTOM)
2519     return RT1;
2520   if (RT0 == RT1)
2521     return RT1;
2522   return MemoryAccess::RT_NONE;
2523 }
2524
2525 ///  True if @p AllAccs intersects with @p MemAccs execpt @p LoadMA and @p
2526 ///  StoreMA
2527 bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA,
2528                              MemoryAccess *StoreMA, isl::set Domain,
2529                              SmallVector<MemoryAccess *, 8> &MemAccs) {
2530   bool HasIntersectingAccs = false;
2531   auto AllAccsNoParams = AllAccs.project_out_all_params();
2532
2533   for (MemoryAccess *MA : MemAccs) {
2534     if (MA == LoadMA || MA == StoreMA)
2535       continue;
2536     auto AccRel = MA->getAccessRelation().intersect_domain(Domain);
2537     auto Accs = AccRel.range();
2538     auto AccsNoParams = Accs.project_out_all_params();
2539
2540     bool CompatibleSpace = AllAccsNoParams.has_equal_space(AccsNoParams);
2541
2542     if (CompatibleSpace) {
2543       auto OverlapAccs = Accs.intersect(AllAccs);
2544       bool DoesIntersect = !OverlapAccs.is_empty();
2545       HasIntersectingAccs |= DoesIntersect;
2546     }
2547   }
2548   return HasIntersectingAccs;
2549 }
2550
2551 ///  Test if the accesses of @p LoadMA and @p StoreMA can form a reduction
2552 bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA,
2553                                 isl::set Domain,
2554                                 SmallVector<MemoryAccess *, 8> &MemAccs) {
2555   // First check if the base value is the same.
2556   isl::map LoadAccs = LoadMA->getAccessRelation();
2557   isl::map StoreAccs = StoreMA->getAccessRelation();
2558   bool Valid = LoadAccs.has_equal_space(StoreAccs);
2559   POLLY_DEBUG(dbgs() << " == The accessed space below is "
2560                      << (Valid ? "" : "not ") << "equal!\n");
2561   POLLY_DEBUG(LoadMA->dump(); StoreMA->dump());
2562
2563   if (Valid) {
2564     // Then check if they actually access the same memory.
2565     isl::map R = isl::manage(LoadAccs.copy())
2566                      .intersect_domain(isl::manage(Domain.copy()));
2567     isl::map W = isl::manage(StoreAccs.copy())
2568                      .intersect_domain(isl::manage(Domain.copy()));
2569     isl::set RS = R.range();
2570     isl::set WS = W.range();
2571
2572     isl::set InterAccs =
2573         isl::manage(RS.copy()).intersect(isl::manage(WS.copy()));
2574     Valid = !InterAccs.is_empty();
2575     POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
2576                        << "overlapping!\n");
2577   }
2578
2579   if (Valid) {
2580     // Finally, check if they are no other instructions accessing this memory
2581     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2582     AllAccsRel = AllAccsRel.intersect_domain(Domain);
2583     isl::set AllAccs = AllAccsRel.range();
2584     Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
2585     POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
2586                        << "accessed by other instructions!\n");
2587   }
2588
2589   return Valid;
2590 }
2591
2592 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2593   // Perform a data flow analysis on the current scop statement to propagate the
2594   // uses of loaded values. Then check and mark the memory accesses which are
2595   // part of reduction like chains.
2596   // During the data flow analysis we use the State variable to keep track of
2597   // the used "load-instructions" for each instruction in the scop statement.
2598   // This includes the LLVM-IR of the load and the "number of uses" (or the
2599   // number of paths in the operand tree which end in this load).
2600   using StatePairTy = std::pair<unsigned, MemoryAccess::ReductionType>;
2601   using FlowInSetTy = MapVector<const LoadInst *, StatePairTy>;
2602   using StateTy = MapVector<const Instruction *, FlowInSetTy>;
2603   StateTy State;
2604
2605   // Invalid loads are loads which have uses we can't track properly in the
2606   // state map. This includes loads which:
2607   //   o do not form a reduction when they flow into a memory location:
2608   //     (e.g., A[i] = B[i] * 3 and  A[i] = A[i] * A[i] + A[i])
2609   //   o are used by a non binary operator or one which is not commutative
2610   //     and associative (e.g., A[i] = A[i] % 3)
2611   //   o might change the control flow            (e.g., if (A[i]))
2612   //   o are used in indirect memory accesses     (e.g., A[B[i]])
2613   //   o are used outside the current scop statement
2614   SmallPtrSet<const Instruction *, 8> InvalidLoads;
2615   SmallVector<BasicBlock *, 8> ScopBlocks;
2616   BasicBlock *BB = Stmt.getBasicBlock();
2617   if (BB)
2618     ScopBlocks.push_back(BB);
2619   else
2620     for (BasicBlock *Block : Stmt.getRegion()->blocks())
2621       ScopBlocks.push_back(Block);
2622   // Run the data flow analysis for all values in the scop statement
2623   for (BasicBlock *Block : ScopBlocks) {
2624     for (Instruction &Inst : *Block) {
2625       if ((Stmt.getParent())->getStmtFor(&Inst) != &Stmt)
2626         continue;
2627       bool UsedOutsideStmt = any_of(Inst.users(), [&Stmt](User *U) {
2628         return (Stmt.getParent())->getStmtFor(cast<Instruction>(U)) != &Stmt;
2629       });
2630       //  Treat loads and stores special
2631       if (auto *Load = dyn_cast<LoadInst>(&Inst)) {
2632         // Invalidate all loads used which feed into the address of this load.
2633         if (auto *Ptr = dyn_cast<Instruction>(Load->getPointerOperand())) {
2634           const auto &It = State.find(Ptr);
2635           if (It != State.end())
2636             for (const auto &FlowInSetElem : It->second)
2637               InvalidLoads.insert(FlowInSetElem.first);
2638         }
2639
2640         // If this load is used outside this stmt, invalidate it.
2641         if (UsedOutsideStmt)
2642           InvalidLoads.insert(Load);
2643
2644         // And indicate that this load uses itself once but without specifying
2645         // any reduction operator.
2646         State[Load].insert(
2647             std::make_pair(Load, std::make_pair(1, MemoryAccess::RT_BOTTOM)));
2648         continue;
2649       }
2650
2651       if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
2652         // Invalidate all loads which feed into the address of this store.
2653         if (const Instruction *Ptr =
2654                 dyn_cast<Instruction>(Store->getPointerOperand())) {
2655           const auto &It = State.find(Ptr);
2656           if (It != State.end())
2657             for (const auto &FlowInSetElem : It->second)
2658               InvalidLoads.insert(FlowInSetElem.first);
2659         }
2660
2661         // Propagate the uses of the value operand to the store
2662         if (auto *ValueInst = dyn_cast<Instruction>(Store->getValueOperand()))
2663           State.insert(std::make_pair(Store, State[ValueInst]));
2664         continue;
2665       }
2666
2667       // Non load and store instructions are either binary operators or they
2668       // will invalidate all used loads.
2669       auto *BinOp = dyn_cast<BinaryOperator>(&Inst);
2670       MemoryAccess::ReductionType CurRedType = getReductionType(BinOp);
2671       POLLY_DEBUG(dbgs() << "CurInst: " << Inst << " RT: " << CurRedType
2672                          << "\n");
2673
2674       // Iterate over all operands and propagate their input loads to
2675       // instruction.
2676       FlowInSetTy &InstInFlowSet = State[&Inst];
2677       for (Use &Op : Inst.operands()) {
2678         auto *OpInst = dyn_cast<Instruction>(Op);
2679         if (!OpInst)
2680           continue;
2681
2682         POLLY_DEBUG(dbgs().indent(4) << "Op Inst: " << *OpInst << "\n");
2683         const StateTy::iterator &OpInFlowSetIt = State.find(OpInst);
2684         if (OpInFlowSetIt == State.end())
2685           continue;
2686
2687         // Iterate over all the input loads of the operand and combine them
2688         // with the input loads of current instruction.
2689         FlowInSetTy &OpInFlowSet = OpInFlowSetIt->second;
2690         for (auto &OpInFlowPair : OpInFlowSet) {
2691           unsigned OpFlowIn = OpInFlowPair.second.first;
2692           unsigned InstFlowIn = InstInFlowSet[OpInFlowPair.first].first;
2693
2694           MemoryAccess::ReductionType OpRedType = OpInFlowPair.second.second;
2695           MemoryAccess::ReductionType InstRedType =
2696               InstInFlowSet[OpInFlowPair.first].second;
2697
2698           MemoryAccess::ReductionType NewRedType =
2699               combineReductionType(OpRedType, CurRedType);
2700           if (InstFlowIn)
2701             NewRedType = combineReductionType(NewRedType, InstRedType);
2702
2703           POLLY_DEBUG(dbgs().indent(8) << "OpRedType: " << OpRedType << "\n");
2704           POLLY_DEBUG(dbgs().indent(8) << "NewRedType: " << NewRedType << "\n");
2705           InstInFlowSet[OpInFlowPair.first] =
2706               std::make_pair(OpFlowIn + InstFlowIn, NewRedType);
2707         }
2708       }
2709
2710       // If this operation is used outside the stmt, invalidate all the loads
2711       // which feed into it.
2712       if (UsedOutsideStmt)
2713         for (const auto &FlowInSetElem : InstInFlowSet)
2714           InvalidLoads.insert(FlowInSetElem.first);
2715     }
2716   }
2717
2718   // All used loads are propagated through the whole basic block; now try to
2719   // find valid reduction-like candidate pairs. These load-store pairs fulfill
2720   // all reduction like properties with regards to only this load-store chain.
2721   // We later have to check if the loaded value was invalidated by an
2722   // instruction not in that chain.
2723   using MemAccPair = std::pair<MemoryAccess *, MemoryAccess *>;
2724   DenseMap<MemAccPair, MemoryAccess::ReductionType> ValidCandidates;
2725
2726   // Iterate over all write memory accesses and check the loads flowing into
2727   // it for reduction candidate pairs.
2728   for (MemoryAccess *WriteMA : Stmt.MemAccs) {
2729     if (WriteMA->isRead())
2730       continue;
2731     StoreInst *St = dyn_cast<StoreInst>(WriteMA->getAccessInstruction());
2732     if (!St)
2733       continue;
2734     assert(!St->isVolatile());
2735
2736     FlowInSetTy &MaInFlowSet = State[WriteMA->getAccessInstruction()];
2737     for (auto &MaInFlowSetElem : MaInFlowSet) {
2738       MemoryAccess *ReadMA = &Stmt.getArrayAccessFor(MaInFlowSetElem.first);
2739       assert(ReadMA && "Couldn't find memory access for incoming load!");
2740
2741       POLLY_DEBUG(dbgs() << "'" << *ReadMA->getAccessInstruction()
2742                          << "'\n\tflows into\n'"
2743                          << *WriteMA->getAccessInstruction() << "'\n\t #"
2744                          << MaInFlowSetElem.second.first << " times & RT: "
2745                          << MaInFlowSetElem.second.second << "\n");
2746
2747       MemoryAccess::ReductionType RT = MaInFlowSetElem.second.second;
2748       unsigned NumAllowableInFlow = 1;
2749
2750       // We allow the load to flow in exactly once for binary reductions
2751       bool Valid = (MaInFlowSetElem.second.first == NumAllowableInFlow);
2752
2753       // Check if we saw a valid chain of binary operators.
2754       Valid = Valid && RT != MemoryAccess::RT_BOTTOM;
2755       Valid = Valid && RT != MemoryAccess::RT_NONE;
2756
2757       // Then check if the memory accesses allow a reduction.
2758       Valid = Valid && checkCandidatePairAccesses(
2759                            ReadMA, WriteMA, Stmt.getDomain(), Stmt.MemAccs);
2760
2761       // Finally, mark the pair as a candidate or the load as a invalid one.
2762       if (Valid)
2763         ValidCandidates[std::make_pair(ReadMA, WriteMA)] = RT;
2764       else
2765         InvalidLoads.insert(ReadMA->getAccessInstruction());
2766     }
2767   }
2768
2769   // In the last step mark the memory accesses of candidate pairs as reduction
2770   // like if the load wasn't marked invalid in the previous step.
2771   for (auto &CandidatePair : ValidCandidates) {
2772     MemoryAccess *LoadMA = CandidatePair.first.first;
2773     if (InvalidLoads.count(LoadMA->getAccessInstruction()))
2774       continue;
2775     POLLY_DEBUG(
2776         dbgs() << " Load :: "
2777                << *((CandidatePair.first.first)->getAccessInstruction())
2778                << "\n Store :: "
2779                << *((CandidatePair.first.second)->getAccessInstruction())
2780                << "\n are marked as reduction like\n");
2781     MemoryAccess::ReductionType RT = CandidatePair.second;
2782     CandidatePair.first.first->markAsReductionLike(RT);
2783     CandidatePair.first.second->markAsReductionLike(RT);
2784   }
2785 }
2786
2787 void ScopBuilder::verifyInvariantLoads() {
2788   auto &RIL = scop->getRequiredInvariantLoads();
2789   for (LoadInst *LI : RIL) {
2790     assert(LI && scop->contains(LI));
2791     // If there exists a statement in the scop which has a memory access for
2792     // @p LI, then mark this scop as infeasible for optimization.
2793     for (ScopStmt &Stmt : *scop)
2794       if (Stmt.getArrayAccessOrNULLFor(LI)) {
2795         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2796         return;
2797       }
2798   }
2799 }
2800
2801 void ScopBuilder::hoistInvariantLoads() {
2802   if (!PollyInvariantLoadHoisting)
2803     return;
2804
2805   isl::union_map Writes = scop->getWrites();
2806   for (ScopStmt &Stmt : *scop) {
2807     InvariantAccessesTy InvariantAccesses;
2808
2809     for (MemoryAccess *Access : Stmt) {
2810       isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2811       if (!NHCtx.is_null())
2812         InvariantAccesses.push_back({Access, NHCtx});
2813     }
2814
2815     // Transfer the memory access from the statement to the SCoP.
2816     for (auto InvMA : InvariantAccesses)
2817       Stmt.removeMemoryAccess(InvMA.MA);
2818     addInvariantLoads(Stmt, InvariantAccesses);
2819   }
2820 }
2821
2822 /// Check if an access range is too complex.
2823 ///
2824 /// An access range is too complex, if it contains either many disjuncts or
2825 /// very complex expressions. As a simple heuristic, we assume if a set to
2826 /// be too complex if the sum of existentially quantified dimensions and
2827 /// set dimensions is larger than a threshold. This reliably detects both
2828 /// sets with many disjuncts as well as sets with many divisions as they
2829 /// arise in h264.
2830 ///
2831 /// @param AccessRange The range to check for complexity.
2832 ///
2833 /// @returns True if the access range is too complex.
2834 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2835   unsigned NumTotalDims = 0;
2836
2837   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2838     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
2839     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
2840   }
2841
2842   if (NumTotalDims > MaxDimensionsInAccessRange)
2843     return true;
2844
2845   return false;
2846 }
2847
2848 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2849                                                isl::union_map Writes) {
2850   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2851     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2852   }
2853
2854   Value *BaseAddr = MA->getOriginalBaseAddr();
2855   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2856     if (!isa<LoadInst>(BasePtrInst))
2857       return scop->contains(BasePtrInst);
2858
2859   return false;
2860 }
2861
2862 void ScopBuilder::addUserContext() {
2863   if (UserContextStr.empty())
2864     return;
2865
2866   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2867   isl::space Space = scop->getParamSpace();
2868   isl::size SpaceParams = Space.dim(isl::dim::param);
2869   if (unsignedFromIslSize(SpaceParams) !=
2870       unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
2871     std::string SpaceStr = stringFromIslObj(Space, "null");
2872     errs() << "Error: the context provided in -polly-context has not the same "
2873            << "number of dimensions than the computed context. Due to this "
2874            << "mismatch, the -polly-context option is ignored. Please provide "
2875            << "the context in the parameter space: " << SpaceStr << ".\n";
2876     return;
2877   }
2878
2879   for (auto i : rangeIslSize(0, SpaceParams)) {
2880     std::string NameContext =
2881         scop->getContext().get_dim_name(isl::dim::param, i);
2882     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2883
2884     if (NameContext != NameUserContext) {
2885       std::string SpaceStr = stringFromIslObj(Space, "null");
2886       errs() << "Error: the name of dimension " << i
2887              << " provided in -polly-context "
2888              << "is '" << NameUserContext << "', but the name in the computed "
2889              << "context is '" << NameContext
2890              << "'. Due to this name mismatch, "
2891              << "the -polly-context option is ignored. Please provide "
2892              << "the context in the parameter space: " << SpaceStr << ".\n";
2893       return;
2894     }
2895
2896     UserContext = UserContext.set_dim_id(isl::dim::param, i,
2897                                          Space.get_dim_id(isl::dim::param, i));
2898   }
2899   isl::set newContext = scop->getContext().intersect(UserContext);
2900   scop->setContext(newContext);
2901 }
2902
2903 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2904                                          isl::union_map Writes) {
2905   // TODO: Loads that are not loop carried, hence are in a statement with
2906   //       zero iterators, are by construction invariant, though we
2907   //       currently "hoist" them anyway. This is necessary because we allow
2908   //       them to be treated as parameters (e.g., in conditions) and our code
2909   //       generation would otherwise use the old value.
2910
2911   auto &Stmt = *Access->getStatement();
2912   BasicBlock *BB = Stmt.getEntryBlock();
2913
2914   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2915       Access->isMemoryIntrinsic())
2916     return {};
2917
2918   // Skip accesses that have an invariant base pointer which is defined but
2919   // not loaded inside the SCoP. This can happened e.g., if a readnone call
2920   // returns a pointer that is used as a base address. However, as we want
2921   // to hoist indirect pointers, we allow the base pointer to be defined in
2922   // the region if it is also a memory access. Each ScopArrayInfo object
2923   // that has a base pointer origin has a base pointer that is loaded and
2924   // that it is invariant, thus it will be hoisted too. However, if there is
2925   // no base pointer origin we check that the base pointer is defined
2926   // outside the region.
2927   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2928   if (hasNonHoistableBasePtrInScop(Access, Writes))
2929     return {};
2930
2931   isl::map AccessRelation = Access->getAccessRelation();
2932   assert(!AccessRelation.is_empty());
2933
2934   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2935     return {};
2936
2937   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2938   isl::set SafeToLoad;
2939
2940   auto &DL = scop->getFunction().getDataLayout();
2941   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2942                                   LI->getAlign(), DL, nullptr)) {
2943     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2944   } else if (BB != LI->getParent()) {
2945     // Skip accesses in non-affine subregions as they might not be executed
2946     // under the same condition as the entry of the non-affine subregion.
2947     return {};
2948   } else {
2949     SafeToLoad = AccessRelation.range();
2950   }
2951
2952   if (isAccessRangeTooComplex(AccessRelation.range()))
2953     return {};
2954
2955   isl::union_map Written = Writes.intersect_range(SafeToLoad);
2956   isl::set WrittenCtx = Written.params();
2957   bool IsWritten = !WrittenCtx.is_empty();
2958
2959   if (!IsWritten)
2960     return WrittenCtx;
2961
2962   WrittenCtx = WrittenCtx.remove_divs();
2963   bool TooComplex =
2964       unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2965   if (TooComplex || !isRequiredInvariantLoad(LI))
2966     return {};
2967
2968   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2969                       AS_RESTRICTION, LI->getParent());
2970   return WrittenCtx;
2971 }
2972
2973 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2974   for (const llvm::Argument &Arg : F.args())
2975     if (&Arg == maybeParam)
2976       return true;
2977
2978   return false;
2979 }
2980
2981 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2982                                      bool StmtInvalidCtxIsEmpty,
2983                                      bool MAInvalidCtxIsEmpty,
2984                                      bool NonHoistableCtxIsEmpty) {
2985   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2986   const DataLayout &DL = LInst->getDataLayout();
2987   if (PollyAllowDereferenceOfAllFunctionParams &&
2988       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2989     return true;
2990
2991   // TODO: We can provide more information for better but more expensive
2992   //       results.
2993   if (!isDereferenceableAndAlignedPointer(
2994           LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2995     return false;
2996
2997   // If the location might be overwritten we do not hoist it unconditionally.
2998   //
2999   // TODO: This is probably too conservative.
3000   if (!NonHoistableCtxIsEmpty)
3001     return false;
3002
3003   // If a dereferenceable load is in a statement that is modeled precisely we
3004   // can hoist it.
3005   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
3006     return true;
3007
3008   // Even if the statement is not modeled precisely we can hoist the load if it
3009   // does not involve any parameters that might have been specialized by the
3010   // statement domain.
3011   for (const SCEV *Subscript : MA->subscripts())
3012     if (!isa<SCEVConstant>(Subscript))
3013       return false;
3014   return true;
3015 }
3016
3017 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
3018                                     InvariantAccessesTy &InvMAs) {
3019   if (InvMAs.empty())
3020     return;
3021
3022   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
3023   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
3024
3025   // Get the context under which the statement is executed but remove the error
3026   // context under which this statement is reached.
3027   isl::set DomainCtx = Stmt.getDomain().params();
3028   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
3029
3030   if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
3031     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3032     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
3033     return;
3034   }
3035
3036   // Project out all parameters that relate to loads in the statement. Otherwise
3037   // we could have cyclic dependences on the constraints under which the
3038   // hoisted loads are executed and we could not determine an order in which to
3039   // pre-load them. This happens because not only lower bounds are part of the
3040   // domain but also upper bounds.
3041   for (auto &InvMA : InvMAs) {
3042     auto *MA = InvMA.MA;
3043     Instruction *AccInst = MA->getAccessInstruction();
3044     if (SE.isSCEVable(AccInst->getType())) {
3045       SetVector<Value *> Values;
3046       for (const SCEV *Parameter : scop->parameters()) {
3047         Values.clear();
3048         findValues(Parameter, SE, Values);
3049         if (!Values.count(AccInst))
3050           continue;
3051
3052         isl::id ParamId = scop->getIdForParam(Parameter);
3053         if (!ParamId.is_null()) {
3054           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3055           if (Dim >= 0)
3056             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3057         }
3058       }
3059     }
3060   }
3061
3062   for (auto &InvMA : InvMAs) {
3063     auto *MA = InvMA.MA;
3064     isl::set NHCtx = InvMA.NonHoistableCtx;
3065
3066     // Check for another invariant access that accesses the same location as
3067     // MA and if found consolidate them. Otherwise create a new equivalence
3068     // class at the end of InvariantEquivClasses.
3069     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3070     Type *Ty = LInst->getType();
3071     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
3072
3073     isl::set MAInvalidCtx = MA->getInvalidContext();
3074     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3075     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3076
3077     isl::set MACtx;
3078     // Check if we know that this pointer can be speculatively accessed.
3079     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3080                            NonHoistableCtxIsEmpty)) {
3081       MACtx = isl::set::universe(DomainCtx.get_space());
3082     } else {
3083       MACtx = DomainCtx;
3084       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3085       MACtx = MACtx.gist_params(scop->getContext());
3086     }
3087
3088     bool Consolidated = false;
3089     for (auto &IAClass : scop->invariantEquivClasses()) {
3090       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3091         continue;
3092
3093       // If the pointer and the type is equal check if the access function wrt.
3094       // to the domain is equal too. It can happen that the domain fixes
3095       // parameter values and these can be different for distinct part of the
3096       // SCoP. If this happens we cannot consolidate the loads but need to
3097       // create a new invariant load equivalence class.
3098       auto &MAs = IAClass.InvariantAccesses;
3099       if (!MAs.empty()) {
3100         auto *LastMA = MAs.front();
3101
3102         isl::set AR = MA->getAccessRelation().range();
3103         isl::set LastAR = LastMA->getAccessRelation().range();
3104         bool SameAR = AR.is_equal(LastAR);
3105
3106         if (!SameAR)
3107           continue;
3108       }
3109
3110       // Add MA to the list of accesses that are in this class.
3111       MAs.push_front(MA);
3112
3113       Consolidated = true;
3114
3115       // Unify the execution context of the class and this statement.
3116       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3117       if (!IAClassDomainCtx.is_null())
3118         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
3119       else
3120         IAClassDomainCtx = MACtx;
3121       IAClass.ExecutionContext = IAClassDomainCtx;
3122       break;
3123     }
3124
3125     if (Consolidated)
3126       continue;
3127
3128     MACtx = MACtx.coalesce();
3129
3130     // If we did not consolidate MA, thus did not find an equivalence class
3131     // for it, we create a new one.
3132     scop->addInvariantEquivClass(
3133         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3134   }
3135 }
3136
3137 /// Find the canonical scop array info object for a set of invariant load
3138 /// hoisted loads. The canonical array is the one that corresponds to the
3139 /// first load in the list of accesses which is used as base pointer of a
3140 /// scop array.
3141 static const ScopArrayInfo *findCanonicalArray(Scop &S,
3142                                                MemoryAccessList &Accesses) {
3143   for (MemoryAccess *Access : Accesses) {
3144     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
3145         Access->getAccessInstruction(), MemoryKind::Array);
3146     if (CanonicalArray)
3147       return CanonicalArray;
3148   }
3149   return nullptr;
3150 }
3151
3152 /// Check if @p Array severs as base array in an invariant load.
3153 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
3154   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
3155     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3156       if (Access2->getScopArrayInfo() == Array)
3157         return true;
3158   return false;
3159 }
3160
3161 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3162 /// with a reference to @p New.
3163 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3164                                  const ScopArrayInfo *New) {
3165   for (ScopStmt &Stmt : S)
3166     for (MemoryAccess *Access : Stmt) {
3167       if (Access->getLatestScopArrayInfo() != Old)
3168         continue;
3169
3170       isl::id Id = New->getBasePtrId();
3171       isl::map Map = Access->getAccessRelation();
3172       Map = Map.set_tuple_id(isl::dim::out, Id);
3173       Access->setAccessRelation(Map);
3174     }
3175 }
3176
3177 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3178   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3179     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3180
3181     const ScopArrayInfo *CanonicalBasePtrSAI =
3182         findCanonicalArray(*scop, BasePtrAccesses);
3183
3184     if (!CanonicalBasePtrSAI)
3185       continue;
3186
3187     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3188       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3189           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3190       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3191           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3192         continue;
3193
3194       // we currently do not canonicalize arrays where some accesses are
3195       // hoisted as invariant loads. If we would, we need to update the access
3196       // function of the invariant loads as well. However, as this is not a
3197       // very common situation, we leave this for now to avoid further
3198       // complexity increases.
3199       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3200         continue;
3201
3202       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3203     }
3204   }
3205 }
3206
3207 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3208   for (MemoryAccess *Access : Stmt.MemAccs) {
3209     Type *ElementType = Access->getElementType();
3210
3211     MemoryKind Ty;
3212     if (Access->isPHIKind())
3213       Ty = MemoryKind::PHI;
3214     else if (Access->isExitPHIKind())
3215       Ty = MemoryKind::ExitPHI;
3216     else if (Access->isValueKind())
3217       Ty = MemoryKind::Value;
3218     else
3219       Ty = MemoryKind::Array;
3220
3221     // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3222     // assumptions which are taken. isl::pw_aff objects are cached internally
3223     // and they are used later by scop.
3224     for (const SCEV *Size : Access->Sizes) {
3225       if (!Size)
3226         continue;
3227       scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3228     }
3229     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3230                                                ElementType, Access->Sizes, Ty);
3231
3232     // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3233     // assumptions which are taken. isl::pw_aff objects are cached internally
3234     // and they are used later by scop.
3235     for (const SCEV *Subscript : Access->subscripts()) {
3236       if (!Access->isAffine() || !Subscript)
3237         continue;
3238       scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3239                      &RecordedAssumptions);
3240     }
3241     Access->buildAccessRelation(SAI);
3242     scop->addAccessData(Access);
3243   }
3244 }
3245
3246 /// Add the minimal/maximal access in @p Set to @p User.
3247 ///
3248 /// @return True if more accesses should be added, false if we reached the
3249 ///         maximal number of run-time checks to be generated.
3250 static bool buildMinMaxAccess(isl::set Set,
3251                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3252   isl::pw_multi_aff MinPMA, MaxPMA;
3253   isl::pw_aff LastDimAff;
3254   isl::aff OneAff;
3255   unsigned Pos;
3256
3257   Set = Set.remove_divs();
3258   polly::simplify(Set);
3259
3260   if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3261     Set = Set.simple_hull();
3262
3263   // Restrict the number of parameters involved in the access as the lexmin/
3264   // lexmax computation will take too long if this number is high.
3265   //
3266   // Experiments with a simple test case using an i7 4800MQ:
3267   //
3268   //  #Parameters involved | Time (in sec)
3269   //            6          |     0.01
3270   //            7          |     0.04
3271   //            8          |     0.12
3272   //            9          |     0.40
3273   //           10          |     1.54
3274   //           11          |     6.78
3275   //           12          |    30.38
3276   //
3277   if (isl_set_n_param(Set.get()) >
3278       static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3279     unsigned InvolvedParams = 0;
3280     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3281       if (Set.involves_dims(isl::dim::param, u, 1))
3282         InvolvedParams++;
3283
3284     if (InvolvedParams > RunTimeChecksMaxParameters)
3285       return false;
3286   }
3287
3288   MinPMA = Set.lexmin_pw_multi_aff();
3289   MaxPMA = Set.lexmax_pw_multi_aff();
3290
3291   MinPMA = MinPMA.coalesce();
3292   MaxPMA = MaxPMA.coalesce();
3293
3294   if (MaxPMA.is_null())
3295     return false;
3296
3297   unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
3298
3299   // Adjust the last dimension of the maximal access by one as we want to
3300   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3301   // we test during code generation might now point after the end of the
3302   // allocated array but we will never dereference it anyway.
3303   assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3304
3305   Pos = MaxOutputSize - 1;
3306   LastDimAff = MaxPMA.at(Pos);
3307   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3308   OneAff = OneAff.add_constant_si(1);
3309   LastDimAff = LastDimAff.add(OneAff);
3310   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3311
3312   if (MinPMA.is_null() || MaxPMA.is_null())
3313     return false;
3314
3315   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3316
3317   return true;
3318 }
3319
3320 /// Wrapper function to calculate minimal/maximal accesses to each array.
3321 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3322                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
3323   MinMaxAccesses.reserve(AliasGroup.size());
3324
3325   isl::union_set Domains = scop->getDomains();
3326   isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
3327
3328   for (MemoryAccess *MA : AliasGroup)
3329     Accesses = Accesses.unite(MA->getAccessRelation());
3330
3331   Accesses = Accesses.intersect_domain(Domains);
3332   isl::union_set Locations = Accesses.range();
3333
3334   bool LimitReached = false;
3335   for (isl::set Set : Locations.get_set_list()) {
3336     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3337     if (LimitReached)
3338       break;
3339   }
3340
3341   return !LimitReached;
3342 }
3343
3344 static isl::set getAccessDomain(MemoryAccess *MA) {
3345   isl::set Domain = MA->getStatement()->getDomain();
3346   Domain = Domain.project_out(isl::dim::set, 0,
3347                               unsignedFromIslSize(Domain.tuple_dim()));
3348   return Domain.reset_tuple_id();
3349 }
3350
3351 bool ScopBuilder::buildAliasChecks() {
3352   if (!PollyUseRuntimeAliasChecks)
3353     return true;
3354
3355   if (buildAliasGroups()) {
3356     // Aliasing assumptions do not go through addAssumption but we still want to
3357     // collect statistics so we do it here explicitly.
3358     if (scop->getAliasGroups().size())
3359       Scop::incrementNumberOfAliasingAssumptions(1);
3360     return true;
3361   }
3362
3363   // If a problem occurs while building the alias groups we need to delete
3364   // this SCoP and pretend it wasn't valid in the first place. To this end
3365   // we make the assumed context infeasible.
3366   scop->invalidate(ALIASING, DebugLoc());
3367
3368   POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3369                      << " could not be created. This SCoP has been dismissed.");
3370   return false;
3371 }
3372
3373 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3374 ScopBuilder::buildAliasGroupsForAccesses() {
3375   BatchAAResults BAA(AA);
3376   AliasSetTracker AST(BAA);
3377
3378   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3379   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3380   for (ScopStmt &Stmt : *scop) {
3381
3382     isl::set StmtDomain = Stmt.getDomain();
3383     bool StmtDomainEmpty = StmtDomain.is_empty();
3384
3385     // Statements with an empty domain will never be executed.
3386     if (StmtDomainEmpty)
3387       continue;
3388
3389     for (MemoryAccess *MA : Stmt) {
3390       if (MA->isScalarKind())
3391         continue;
3392       if (!MA->isRead())
3393         HasWriteAccess.insert(MA->getScopArrayInfo());
3394       MemAccInst Acc(MA->getAccessInstruction());
3395       if (MA->isRead() && isa<MemTransferInst>(Acc))
3396         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3397       else
3398         PtrToAcc[Acc.getPointerOperand()] = MA;
3399       AST.add(Acc);
3400     }
3401   }
3402
3403   AliasGroupVectorTy AliasGroups;
3404   for (AliasSet &AS : AST) {
3405     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3406       continue;
3407     AliasGroupTy AG;
3408     for (const Value *Ptr : AS.getPointers())
3409       AG.push_back(PtrToAcc[const_cast<Value *>(Ptr)]);
3410     if (AG.size() < 2)
3411       continue;
3412     AliasGroups.push_back(std::move(AG));
3413   }
3414
3415   return std::make_tuple(AliasGroups, HasWriteAccess);
3416 }
3417
3418 bool ScopBuilder::buildAliasGroups() {
3419   // To create sound alias checks we perform the following steps:
3420   //   o) We partition each group into read only and non read only accesses.
3421   //   o) For each group with more than one base pointer we then compute minimal
3422   //      and maximal accesses to each array of a group in read only and non
3423   //      read only partitions separately.
3424   AliasGroupVectorTy AliasGroups;
3425   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3426
3427   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3428
3429   splitAliasGroupsByDomain(AliasGroups);
3430
3431   for (AliasGroupTy &AG : AliasGroups) {
3432     if (!scop->hasFeasibleRuntimeContext())
3433       return false;
3434
3435     {
3436       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3437       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3438       if (!Valid)
3439         return false;
3440     }
3441     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3442       scop->invalidate(COMPLEXITY, DebugLoc());
3443       return false;
3444     }
3445   }
3446
3447   return true;
3448 }
3449
3450 bool ScopBuilder::buildAliasGroup(
3451     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3452   AliasGroupTy ReadOnlyAccesses;
3453   AliasGroupTy ReadWriteAccesses;
3454   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3455   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3456
3457   if (AliasGroup.size() < 2)
3458     return true;
3459
3460   for (MemoryAccess *Access : AliasGroup) {
3461     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3462                                         Access->getAccessInstruction())
3463              << "Possibly aliasing pointer, use restrict keyword.");
3464     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3465     if (HasWriteAccess.count(Array)) {
3466       ReadWriteArrays.insert(Array);
3467       ReadWriteAccesses.push_back(Access);
3468     } else {
3469       ReadOnlyArrays.insert(Array);
3470       ReadOnlyAccesses.push_back(Access);
3471     }
3472   }
3473
3474   // If there are no read-only pointers, and less than two read-write pointers,
3475   // no alias check is needed.
3476   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3477     return true;
3478
3479   // If there is no read-write pointer, no alias check is needed.
3480   if (ReadWriteArrays.empty())
3481     return true;
3482
3483   // For non-affine accesses, no alias check can be generated as we cannot
3484   // compute a sufficiently tight lower and upper bound: bail out.
3485   for (MemoryAccess *MA : AliasGroup) {
3486     if (!MA->isAffine()) {
3487       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3488                        MA->getAccessInstruction()->getParent());
3489       return false;
3490     }
3491   }
3492
3493   // Ensure that for all memory accesses for which we generate alias checks,
3494   // their base pointers are available.
3495   for (MemoryAccess *MA : AliasGroup) {
3496     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3497       scop->addRequiredInvariantLoad(
3498           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3499   }
3500
3501   //  scop->getAliasGroups().emplace_back();
3502   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3503   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3504   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3505
3506   bool Valid;
3507
3508   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3509
3510   if (!Valid)
3511     return false;
3512
3513   // Bail out if the number of values we need to compare is too large.
3514   // This is important as the number of comparisons grows quadratically with
3515   // the number of values we need to compare.
3516   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3517       RunTimeChecksMaxArraysPerGroup)
3518     return false;
3519
3520   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3521
3522   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3523   if (!Valid)
3524     return false;
3525
3526   return true;
3527 }
3528
3529 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3530   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3531     AliasGroupTy NewAG;
3532     AliasGroupTy &AG = AliasGroups[u];
3533     AliasGroupTy::iterator AGI = AG.begin();
3534     isl::set AGDomain = getAccessDomain(*AGI);
3535     while (AGI != AG.end()) {
3536       MemoryAccess *MA = *AGI;
3537       isl::set MADomain = getAccessDomain(MA);
3538       if (AGDomain.is_disjoint(MADomain)) {
3539         NewAG.push_back(MA);
3540         AGI = AG.erase(AGI);
3541       } else {
3542         AGDomain = AGDomain.unite(MADomain);
3543         AGI++;
3544       }
3545     }
3546     if (NewAG.size() > 1)
3547       AliasGroups.push_back(std::move(NewAG));
3548   }
3549 }
3550
3551 #ifndef NDEBUG
3552 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3553   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3554   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3555   assert(PhysUse.getKind() == VirtUse.getKind());
3556 }
3557
3558 /// Check the consistency of every statement's MemoryAccesses.
3559 ///
3560 /// The check is carried out by expecting the "physical" kind of use (derived
3561 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3562 /// kind of use (derived from a statement's MemoryAccess).
3563 ///
3564 /// The "physical" uses are taken by ensureValueRead to determine whether to
3565 /// create MemoryAccesses. When done, the kind of scalar access should be the
3566 /// same no matter which way it was derived.
3567 ///
3568 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3569 /// can intentionally influence on the kind of uses (not corresponding to the
3570 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3571 /// to pick up the virtual uses. But here in the code generator, this has not
3572 /// happened yet, such that virtual and physical uses are equivalent.
3573 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3574   for (auto *BB : S->getRegion().blocks()) {
3575     for (auto &Inst : *BB) {
3576       auto *Stmt = S->getStmtFor(&Inst);
3577       if (!Stmt)
3578         continue;
3579
3580       if (isIgnoredIntrinsic(&Inst))
3581         continue;
3582
3583       // Branch conditions are encoded in the statement domains.
3584       if (Inst.isTerminator() && Stmt->isBlockStmt())
3585         continue;
3586
3587       // Verify all uses.
3588       for (auto &Op : Inst.operands())
3589         verifyUse(S, Op, LI);
3590
3591       // Stores do not produce values used by other statements.
3592       if (isa<StoreInst>(Inst))
3593         continue;
3594
3595       // For every value defined in the block, also check that a use of that
3596       // value in the same statement would not be an inter-statement use. It can
3597       // still be synthesizable or load-hoisted, but these kind of instructions
3598       // are not directly copied in code-generation.
3599       auto VirtDef =
3600           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3601       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3602              VirtDef.getKind() == VirtualUse::Intra ||
3603              VirtDef.getKind() == VirtualUse::Hoisted);
3604     }
3605   }
3606
3607   if (S->hasSingleExitEdge())
3608     return;
3609
3610   // PHINodes in the SCoP region's exit block are also uses to be checked.
3611   if (!S->getRegion().isTopLevelRegion()) {
3612     for (auto &Inst : *S->getRegion().getExit()) {
3613       if (!isa<PHINode>(Inst))
3614         break;
3615
3616       for (auto &Op : Inst.operands())
3617         verifyUse(S, Op, LI);
3618     }
3619   }
3620 }
3621 #endif
3622
3623 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3624   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3625                       SD.getNextID()));
3626
3627   buildStmts(R);
3628
3629   // Create all invariant load instructions first. These are categorized as
3630   // 'synthesizable', therefore are not part of any ScopStmt but need to be
3631   // created somewhere.
3632   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3633   for (BasicBlock *BB : scop->getRegion().blocks()) {
3634     if (SD.isErrorBlock(*BB, scop->getRegion()))
3635       continue;
3636
3637     for (Instruction &Inst : *BB) {
3638       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3639       if (!Load)
3640         continue;
3641
3642       if (!RIL.count(Load))
3643         continue;
3644
3645       // Invariant loads require a MemoryAccess to be created in some statement.
3646       // It is not important to which statement the MemoryAccess is added
3647       // because it will later be removed from the ScopStmt again. We chose the
3648       // first statement of the basic block the LoadInst is in.
3649       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3650       assert(!List.empty());
3651       ScopStmt *RILStmt = List.front();
3652       buildMemoryAccess(Load, RILStmt);
3653     }
3654   }
3655   buildAccessFunctions();
3656
3657   // In case the region does not have an exiting block we will later (during
3658   // code generation) split the exit block. This will move potential PHI nodes
3659   // from the current exit block into the new region exiting block. Hence, PHI
3660   // nodes that are at this point not part of the region will be.
3661   // To handle these PHI nodes later we will now model their operands as scalar
3662   // accesses. Note that we do not model anything in the exit block if we have
3663   // an exiting block in the region, as there will not be any splitting later.
3664   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3665     for (Instruction &Inst : *R.getExit()) {
3666       PHINode *PHI = dyn_cast<PHINode>(&Inst);
3667       if (!PHI)
3668         break;
3669
3670       buildPHIAccesses(nullptr, PHI, nullptr, true);
3671     }
3672   }
3673
3674   // Create memory accesses for global reads since all arrays are now known.
3675   const SCEV *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3676   for (auto GlobalReadPair : GlobalReads) {
3677     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3678     Instruction *GlobalRead = GlobalReadPair.second;
3679     for (auto *BP : ArrayBasePointers)
3680       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3681                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3682   }
3683
3684   buildInvariantEquivalenceClasses();
3685
3686   /// A map from basic blocks to their invalid domains.
3687   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3688
3689   if (!buildDomains(&R, InvalidDomainMap)) {
3690     POLLY_DEBUG(
3691         dbgs() << "Bailing-out because buildDomains encountered problems\n");
3692     return;
3693   }
3694
3695   addUserAssumptions(AC, InvalidDomainMap);
3696
3697   // Initialize the invalid domain.
3698   for (ScopStmt &Stmt : scop->Stmts)
3699     if (Stmt.isBlockStmt())
3700       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3701     else
3702       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3703           Stmt.getRegion()->getNode())]);
3704
3705   // Remove empty statements.
3706   // Exit early in case there are no executable statements left in this scop.
3707   scop->removeStmtNotInDomainMap();
3708   scop->simplifySCoP(false);
3709   if (scop->isEmpty()) {
3710     POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3711     return;
3712   }
3713
3714   // The ScopStmts now have enough information to initialize themselves.
3715   for (ScopStmt &Stmt : *scop) {
3716     collectSurroundingLoops(Stmt);
3717
3718     buildDomain(Stmt);
3719     buildAccessRelations(Stmt);
3720
3721     if (DetectReductions)
3722       checkForReductions(Stmt);
3723   }
3724
3725   // Check early for a feasible runtime context.
3726   if (!scop->hasFeasibleRuntimeContext()) {
3727     POLLY_DEBUG(
3728         dbgs() << "Bailing-out because of unfeasible context (early)\n");
3729     return;
3730   }
3731
3732   // Check early for profitability. Afterwards it cannot change anymore,
3733   // only the runtime context could become infeasible.
3734   if (!scop->isProfitable(UnprofitableScalarAccs)) {
3735     scop->invalidate(PROFITABLE, DebugLoc());
3736     POLLY_DEBUG(
3737         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3738     return;
3739   }
3740
3741   buildSchedule();
3742
3743   finalizeAccesses();
3744
3745   scop->realignParams();
3746   addUserContext();
3747
3748   // After the context was fully constructed, thus all our knowledge about
3749   // the parameters is in there, we add all recorded assumptions to the
3750   // assumed/invalid context.
3751   addRecordedAssumptions();
3752
3753   scop->simplifyContexts();
3754   if (!buildAliasChecks()) {
3755     POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3756     return;
3757   }
3758
3759   hoistInvariantLoads();
3760   canonicalizeDynamicBasePtrs();
3761   verifyInvariantLoads();
3762   scop->simplifySCoP(true);
3763
3764   // Check late for a feasible runtime context because profitability did not
3765   // change.
3766   if (!scop->hasFeasibleRuntimeContext()) {
3767     POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3768     return;
3769   }
3770
3771 #ifndef NDEBUG
3772   verifyUses(scop.get(), LI, DT);
3773 #endif
3774 }
3775
3776 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3777                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3778                          ScopDetection &SD, ScalarEvolution &SE,
3779                          OptimizationRemarkEmitter &ORE)
3780     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3781   DebugLoc Beg, End;
3782   auto P = getBBPairForRegion(R);
3783   getDebugLocations(P, Beg, End);
3784
3785   std::string Msg = "SCoP begins here.";
3786   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3787            << Msg);
3788
3789   buildScop(*R, AC);
3790
3791   POLLY_DEBUG(dbgs() << *scop);
3792
3793   if (!scop->hasFeasibleRuntimeContext()) {
3794     InfeasibleScops++;
3795     Msg = "SCoP ends here but was dismissed.";
3796     POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3797     RecordedAssumptions.clear();
3798     scop.reset();
3799   } else {
3800     Msg = "SCoP ends here.";
3801     ++ScopFound;
3802     if (scop->getMaxLoopDepth() > 0)
3803       ++RichScopFound;
3804   }
3805
3806   if (R->isTopLevelRegion())
3807     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3808              << Msg);
3809   else
3810     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3811              << Msg);
3812 }