polly/lib/Analysis/ScopBuilder.cpp

   1 //===- ScopBuilder.cpp ----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Create a polyhedral description for a static control flow region.
  10 //
  11 // The pass creates a polyhedral description of the Scops detected by the SCoP
  12 // detection derived from their LLVM-IR code.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "polly/ScopBuilder.h"
  17 #include "polly/Options.h"
  18 #include "polly/ScopDetection.h"
  19 #include "polly/ScopInfo.h"
  20 #include "polly/Support/GICHelper.h"
  21 #include "polly/Support/ISLTools.h"
  22 #include "polly/Support/SCEVValidator.h"
  23 #include "polly/Support/ScopHelper.h"
  24 #include "polly/Support/VirtualInstruction.h"
  25 #include "llvm/ADT/ArrayRef.h"
  26 #include "llvm/ADT/EquivalenceClasses.h"
  27 #include "llvm/ADT/PostOrderIterator.h"
  28 #include "llvm/ADT/Sequence.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/Statistic.h"
  31 #include "llvm/Analysis/AliasAnalysis.h"
  32 #include "llvm/Analysis/AssumptionCache.h"
  33 #include "llvm/Analysis/Delinearization.h"
  34 #include "llvm/Analysis/Loads.h"
  35 #include "llvm/Analysis/LoopInfo.h"
  36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  37 #include "llvm/Analysis/RegionInfo.h"
  38 #include "llvm/Analysis/RegionIterator.h"
  39 #include "llvm/Analysis/ScalarEvolution.h"
  40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  41 #include "llvm/IR/BasicBlock.h"
  42 #include "llvm/IR/DataLayout.h"
  43 #include "llvm/IR/DebugLoc.h"
  44 #include "llvm/IR/DerivedTypes.h"
  45 #include "llvm/IR/Dominators.h"
  46 #include "llvm/IR/Function.h"
  47 #include "llvm/IR/InstrTypes.h"
  48 #include "llvm/IR/Instruction.h"
  49 #include "llvm/IR/Instructions.h"
  50 #include "llvm/IR/Type.h"
  51 #include "llvm/IR/Use.h"
  52 #include "llvm/IR/Value.h"
  53 #include "llvm/Support/CommandLine.h"
  54 #include "llvm/Support/Compiler.h"
  55 #include "llvm/Support/Debug.h"
  56 #include "llvm/Support/ErrorHandling.h"
  57 #include "llvm/Support/raw_ostream.h"
  58 #include <cassert>
  59
  60 using namespace llvm;
  61 using namespace polly;
  62
  63 #define DEBUG_TYPE "polly-scops"
  64
  65 STATISTIC(ScopFound, "Number of valid Scops");
  66 STATISTIC(RichScopFound, "Number of Scops containing a loop");
  67 STATISTIC(InfeasibleScops,
  68           "Number of SCoPs with statically infeasible context.");
  69
  70 bool polly::ModelReadOnlyScalars;
  71
  72 // The maximal number of dimensions we allow during invariant load construction.
  73 // More complex access ranges will result in very high compile time and are also
  74 // unlikely to result in good code. This value is very high and should only
  75 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
  76 static unsigned const MaxDimensionsInAccessRange = 9;
  77
  78 static cl::opt<bool, true> XModelReadOnlyScalars(
  79     "polly-analyze-read-only-scalars",
  80     cl::desc("Model read-only scalar values in the scop description"),
  81     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
  82     cl::cat(PollyCategory));
  83
  84 static cl::opt<int>
  85     OptComputeOut("polly-analysis-computeout",
  86                   cl::desc("Bound the scop analysis by a maximal amount of "
  87                            "computational steps (0 means no bound)"),
  88                   cl::Hidden, cl::init(800000), cl::cat(PollyCategory));
  89
  90 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
  91     "polly-allow-dereference-of-all-function-parameters",
  92     cl::desc(
  93         "Treat all parameters to functions that are pointers as dereferencible."
  94         " This is useful for invariant load hoisting, since we can generate"
  95         " less runtime checks. This is only valid if all pointers to functions"
  96         " are always initialized, so that Polly can choose to hoist"
  97         " their loads. "),
  98     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  99
 100 static cl::opt<bool>
 101     PollyIgnoreInbounds("polly-ignore-inbounds",
 102                         cl::desc("Do not take inbounds assumptions at all"),
 103                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 104
 105 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
 106     "polly-rtc-max-arrays-per-group",
 107     cl::desc("The maximal number of arrays to compare in each alias group."),
 108     cl::Hidden, cl::init(20), cl::cat(PollyCategory));
 109
 110 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
 111     "polly-rtc-max-array-disjuncts",
 112     cl::desc("The maximal number of disjunts allowed in memory accesses to "
 113              "to build RTCs."),
 114     cl::Hidden, cl::init(8), cl::cat(PollyCategory));
 115
 116 static cl::opt<unsigned> RunTimeChecksMaxParameters(
 117     "polly-rtc-max-parameters",
 118     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
 119     cl::init(8), cl::cat(PollyCategory));
 120
 121 static cl::opt<bool> UnprofitableScalarAccs(
 122     "polly-unprofitable-scalar-accs",
 123     cl::desc("Count statements with scalar accesses as not optimizable"),
 124     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 125
 126 static cl::opt<std::string> UserContextStr(
 127     "polly-context", cl::value_desc("isl parameter set"),
 128     cl::desc("Provide additional constraints on the context parameters"),
 129     cl::init(""), cl::cat(PollyCategory));
 130
 131 static cl::opt<bool> DetectReductions("polly-detect-reductions",
 132                                       cl::desc("Detect and exploit reductions"),
 133                                       cl::Hidden, cl::init(true),
 134                                       cl::cat(PollyCategory));
 135
 136 // Multiplicative reductions can be disabled separately as these kind of
 137 // operations can overflow easily. Additive reductions and bit operations
 138 // are in contrast pretty stable.
 139 static cl::opt<bool> DisableMultiplicativeReductions(
 140     "polly-disable-multiplicative-reductions",
 141     cl::desc("Disable multiplicative reductions"), cl::Hidden,
 142     cl::cat(PollyCategory));
 143
 144 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
 145
 146 static cl::opt<GranularityChoice> StmtGranularity(
 147     "polly-stmt-granularity",
 148     cl::desc(
 149         "Algorithm to use for splitting basic blocks into multiple statements"),
 150     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
 151                           "One statement per basic block"),
 152                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
 153                           "Scalar independence heuristic"),
 154                clEnumValN(GranularityChoice::Stores, "store",
 155                           "Store-level granularity")),
 156     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
 157
 158 /// Helper to treat non-affine regions and basic blocks the same.
 159 ///
 160 ///{
 161
 162 /// Return the block that is the representing block for @p RN.
 163 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
 164   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
 165                            : RN->getNodeAs<BasicBlock>();
 166 }
 167
 168 /// Return the @p idx'th block that is executed after @p RN.
 169 static inline BasicBlock *
 170 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
 171   if (RN->isSubRegion()) {
 172     assert(idx == 0);
 173     return RN->getNodeAs<Region>()->getExit();
 174   }
 175   return TI->getSuccessor(idx);
 176 }
 177
 178 static bool containsErrorBlock(RegionNode *RN, const Region &R,
 179                                ScopDetection *SD) {
 180   if (!RN->isSubRegion())
 181     return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
 182   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
 183     if (SD->isErrorBlock(*BB, R))
 184       return true;
 185   return false;
 186 }
 187
 188 ///}
 189
 190 /// Create a map to map from a given iteration to a subsequent iteration.
 191 ///
 192 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
 193 /// is incremented by one and all other dimensions are equal, e.g.,
 194 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
 195 ///
 196 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
 197 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
 198   isl::space MapSpace = SetSpace.map_from_set();
 199   isl::map NextIterationMap = isl::map::universe(MapSpace);
 200   for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
 201     if (u != Dim)
 202       NextIterationMap =
 203           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
 204   isl::constraint C =
 205       isl::constraint::alloc_equality(isl::local_space(MapSpace));
 206   C = C.set_constant_si(1);
 207   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
 208   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
 209   NextIterationMap = NextIterationMap.add_constraint(C);
 210   return NextIterationMap;
 211 }
 212
 213 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
 214 static isl::set collectBoundedParts(isl::set S) {
 215   isl::set BoundedParts = isl::set::empty(S.get_space());
 216   for (isl::basic_set BSet : S.get_basic_set_list())
 217     if (BSet.is_bounded())
 218       BoundedParts = BoundedParts.unite(isl::set(BSet));
 219   return BoundedParts;
 220 }
 221
 222 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
 223 ///
 224 /// @returns A separation of @p S into first an unbounded then a bounded subset,
 225 ///          both with regards to the dimension @p Dim.
 226 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
 227                                                        unsigned Dim) {
 228   for (unsigned u : rangeIslSize(0, S.tuple_dim()))
 229     S = S.lower_bound_si(isl::dim::set, u, 0);
 230
 231   unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
 232   isl::set OnlyDimS = S;
 233
 234   // Remove dimensions that are greater than Dim as they are not interesting.
 235   assert(NumDimsS >= Dim + 1);
 236   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
 237
 238   // Create artificial parametric upper bounds for dimensions smaller than Dim
 239   // as we are not interested in them.
 240   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
 241
 242   for (unsigned u = 0; u < Dim; u++) {
 243     isl::constraint C = isl::constraint::alloc_inequality(
 244         isl::local_space(OnlyDimS.get_space()));
 245     C = C.set_coefficient_si(isl::dim::param, u, 1);
 246     C = C.set_coefficient_si(isl::dim::set, u, -1);
 247     OnlyDimS = OnlyDimS.add_constraint(C);
 248   }
 249
 250   // Collect all bounded parts of OnlyDimS.
 251   isl::set BoundedParts = collectBoundedParts(OnlyDimS);
 252
 253   // Create the dimensions greater than Dim again.
 254   BoundedParts =
 255       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
 256
 257   // Remove the artificial upper bound parameters again.
 258   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
 259
 260   isl::set UnboundedParts = S.subtract(BoundedParts);
 261   return std::make_pair(UnboundedParts, BoundedParts);
 262 }
 263
 264 /// Create the conditions under which @p L @p Pred @p R is true.
 265 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
 266                                   isl::pw_aff R) {
 267   switch (Pred) {
 268   case ICmpInst::ICMP_EQ:
 269     return L.eq_set(R);
 270   case ICmpInst::ICMP_NE:
 271     return L.ne_set(R);
 272   case ICmpInst::ICMP_SLT:
 273     return L.lt_set(R);
 274   case ICmpInst::ICMP_SLE:
 275     return L.le_set(R);
 276   case ICmpInst::ICMP_SGT:
 277     return L.gt_set(R);
 278   case ICmpInst::ICMP_SGE:
 279     return L.ge_set(R);
 280   case ICmpInst::ICMP_ULT:
 281     return L.lt_set(R);
 282   case ICmpInst::ICMP_UGT:
 283     return L.gt_set(R);
 284   case ICmpInst::ICMP_ULE:
 285     return L.le_set(R);
 286   case ICmpInst::ICMP_UGE:
 287     return L.ge_set(R);
 288   default:
 289     llvm_unreachable("Non integer predicate not supported");
 290   }
 291 }
 292
 293 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
 294                                              Loop *NewL) {
 295   // If the loops are the same there is nothing to do.
 296   if (NewL == OldL)
 297     return Dom;
 298
 299   int OldDepth = scop->getRelativeLoopDepth(OldL);
 300   int NewDepth = scop->getRelativeLoopDepth(NewL);
 301   // If both loops are non-affine loops there is nothing to do.
 302   if (OldDepth == -1 && NewDepth == -1)
 303     return Dom;
 304
 305   // Distinguish three cases:
 306   //   1) The depth is the same but the loops are not.
 307   //      => One loop was left one was entered.
 308   //   2) The depth increased from OldL to NewL.
 309   //      => One loop was entered, none was left.
 310   //   3) The depth decreased from OldL to NewL.
 311   //      => Loops were left were difference of the depths defines how many.
 312   if (OldDepth == NewDepth) {
 313     assert(OldL->getParentLoop() == NewL->getParentLoop());
 314     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
 315     Dom = Dom.add_dims(isl::dim::set, 1);
 316   } else if (OldDepth < NewDepth) {
 317     assert(OldDepth + 1 == NewDepth);
 318     auto &R = scop->getRegion();
 319     (void)R;
 320     assert(NewL->getParentLoop() == OldL ||
 321            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
 322     Dom = Dom.add_dims(isl::dim::set, 1);
 323   } else {
 324     assert(OldDepth > NewDepth);
 325     unsigned Diff = OldDepth - NewDepth;
 326     unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
 327     assert(NumDim >= Diff);
 328     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
 329   }
 330
 331   return Dom;
 332 }
 333
 334 /// Compute the isl representation for the SCEV @p E in this BB.
 335 ///
 336 /// @param BB               The BB for which isl representation is to be
 337 /// computed.
 338 /// @param InvalidDomainMap A map of BB to their invalid domains.
 339 /// @param E                The SCEV that should be translated.
 340 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
 341 ///
 342 /// Note that this function will also adjust the invalid context accordingly.
 343
 344 __isl_give isl_pw_aff *
 345 ScopBuilder::getPwAff(BasicBlock *BB,
 346                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 347                       const SCEV *E, bool NonNegative) {
 348   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
 349   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
 350   return PWAC.first.release();
 351 }
 352
 353 /// Build condition sets for unsigned ICmpInst(s).
 354 /// Special handling is required for unsigned operands to ensure that if
 355 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
 356 /// it should wrap around.
 357 ///
 358 /// @param IsStrictUpperBound holds information on the predicate relation
 359 /// between TestVal and UpperBound, i.e,
 360 /// TestVal < UpperBound  OR  TestVal <= UpperBound
 361 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
 362     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
 363     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
 364     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 365     bool IsStrictUpperBound) {
 366   // Do not take NonNeg assumption on TestVal
 367   // as it might have MSB (Sign bit) set.
 368   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
 369   // Take NonNeg assumption on UpperBound.
 370   isl_pw_aff *UpperBound =
 371       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
 372
 373   // 0 <= TestVal
 374   isl_set *First =
 375       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
 376                             isl_pw_aff_get_domain_space(TestVal))),
 377                         isl_pw_aff_copy(TestVal));
 378
 379   isl_set *Second;
 380   if (IsStrictUpperBound)
 381     // TestVal < UpperBound
 382     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
 383   else
 384     // TestVal <= UpperBound
 385     Second = isl_pw_aff_le_set(TestVal, UpperBound);
 386
 387   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
 388   return ConsequenceCondSet;
 389 }
 390
 391 bool ScopBuilder::buildConditionSets(
 392     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
 393     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 394     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 395   Value *Condition = getConditionFromTerminator(SI);
 396   assert(Condition && "No condition for switch");
 397
 398   isl_pw_aff *LHS, *RHS;
 399   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
 400
 401   unsigned NumSuccessors = SI->getNumSuccessors();
 402   ConditionSets.resize(NumSuccessors);
 403   for (auto &Case : SI->cases()) {
 404     unsigned Idx = Case.getSuccessorIndex();
 405     ConstantInt *CaseValue = Case.getCaseValue();
 406
 407     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
 408     isl_set *CaseConditionSet =
 409         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
 410                           isl::manage(RHS))
 411             .release();
 412     ConditionSets[Idx] = isl_set_coalesce(
 413         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
 414   }
 415
 416   assert(ConditionSets[0] == nullptr && "Default condition set was set");
 417   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
 418   for (unsigned u = 2; u < NumSuccessors; u++)
 419     ConditionSetUnion =
 420         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
 421   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
 422
 423   isl_pw_aff_free(LHS);
 424
 425   return true;
 426 }
 427
 428 bool ScopBuilder::buildConditionSets(
 429     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
 430     __isl_keep isl_set *Domain,
 431     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 432     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 433   isl_set *ConsequenceCondSet = nullptr;
 434
 435   if (auto Load = dyn_cast<LoadInst>(Condition)) {
 436     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
 437     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
 438     bool NonNeg = false;
 439     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
 440     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
 441     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
 442                                            isl::manage(RHS))
 443                              .release();
 444   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
 445     auto *Unique = dyn_cast<ConstantInt>(
 446         getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
 447     assert(Unique &&
 448            "A PHINode condition should only be accepted by ScopDetection if "
 449            "getUniqueNonErrorValue returns non-NULL");
 450
 451     if (Unique->isZero())
 452       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
 453     else
 454       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
 455   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
 456     if (CCond->isZero())
 457       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
 458     else
 459       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
 460   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
 461     auto Opcode = BinOp->getOpcode();
 462     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
 463
 464     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
 465                                     InvalidDomainMap, ConditionSets) &&
 466                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
 467                                     InvalidDomainMap, ConditionSets);
 468     if (!Valid) {
 469       while (!ConditionSets.empty())
 470         isl_set_free(ConditionSets.pop_back_val());
 471       return false;
 472     }
 473
 474     isl_set_free(ConditionSets.pop_back_val());
 475     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
 476     isl_set_free(ConditionSets.pop_back_val());
 477     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
 478
 479     if (Opcode == Instruction::And)
 480       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
 481     else
 482       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
 483   } else {
 484     auto *ICond = dyn_cast<ICmpInst>(Condition);
 485     assert(ICond &&
 486            "Condition of exiting branch was neither constant nor ICmp!");
 487
 488     Region &R = scop->getRegion();
 489
 490     isl_pw_aff *LHS, *RHS;
 491     // For unsigned comparisons we assumed the signed bit of neither operand
 492     // to be set. The comparison is equal to a signed comparison under this
 493     // assumption.
 494     bool NonNeg = ICond->isUnsigned();
 495     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
 496                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
 497
 498     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
 499     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
 500
 501     switch (ICond->getPredicate()) {
 502     case ICmpInst::ICMP_ULT:
 503       ConsequenceCondSet =
 504           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
 505                                      RightOperand, InvalidDomainMap, true);
 506       break;
 507     case ICmpInst::ICMP_ULE:
 508       ConsequenceCondSet =
 509           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
 510                                      RightOperand, InvalidDomainMap, false);
 511       break;
 512     case ICmpInst::ICMP_UGT:
 513       ConsequenceCondSet =
 514           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
 515                                      LeftOperand, InvalidDomainMap, true);
 516       break;
 517     case ICmpInst::ICMP_UGE:
 518       ConsequenceCondSet =
 519           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
 520                                      LeftOperand, InvalidDomainMap, false);
 521       break;
 522     default:
 523       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
 524       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
 525       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
 526                                              isl::manage(LHS), isl::manage(RHS))
 527                                .release();
 528       break;
 529     }
 530   }
 531
 532   // If no terminator was given we are only looking for parameter constraints
 533   // under which @p Condition is true/false.
 534   if (!TI)
 535     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
 536   assert(ConsequenceCondSet);
 537   ConsequenceCondSet = isl_set_coalesce(
 538       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
 539
 540   isl_set *AlternativeCondSet = nullptr;
 541   bool TooComplex =
 542       isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
 543
 544   if (!TooComplex) {
 545     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
 546                                           isl_set_copy(ConsequenceCondSet));
 547     TooComplex =
 548         isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
 549   }
 550
 551   if (TooComplex) {
 552     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
 553                      TI ? TI->getParent() : nullptr /* BasicBlock */);
 554     isl_set_free(AlternativeCondSet);
 555     isl_set_free(ConsequenceCondSet);
 556     return false;
 557   }
 558
 559   ConditionSets.push_back(ConsequenceCondSet);
 560   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
 561
 562   return true;
 563 }
 564
 565 bool ScopBuilder::buildConditionSets(
 566     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
 567     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
 568     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
 569   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
 570     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
 571                               ConditionSets);
 572
 573   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
 574
 575   if (TI->getNumSuccessors() == 1) {
 576     ConditionSets.push_back(isl_set_copy(Domain));
 577     return true;
 578   }
 579
 580   Value *Condition = getConditionFromTerminator(TI);
 581   assert(Condition && "No condition for Terminator");
 582
 583   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
 584                             ConditionSets);
 585 }
 586
 587 bool ScopBuilder::propagateDomainConstraints(
 588     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 589   // Iterate over the region R and propagate the domain constrains from the
 590   // predecessors to the current node. In contrast to the
 591   // buildDomainsWithBranchConstraints function, this one will pull the domain
 592   // information from the predecessors instead of pushing it to the successors.
 593   // Additionally, we assume the domains to be already present in the domain
 594   // map here. However, we iterate again in reverse post order so we know all
 595   // predecessors have been visited before a block or non-affine subregion is
 596   // visited.
 597
 598   ReversePostOrderTraversal<Region *> RTraversal(R);
 599   for (auto *RN : RTraversal) {
 600     // Recurse for affine subregions but go on for basic blocks and non-affine
 601     // subregions.
 602     if (RN->isSubRegion()) {
 603       Region *SubRegion = RN->getNodeAs<Region>();
 604       if (!scop->isNonAffineSubRegion(SubRegion)) {
 605         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
 606           return false;
 607         continue;
 608       }
 609     }
 610
 611     BasicBlock *BB = getRegionNodeBasicBlock(RN);
 612     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
 613     assert(!Domain.is_null());
 614
 615     // Under the union of all predecessor conditions we can reach this block.
 616     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
 617     Domain = Domain.intersect(PredDom).coalesce();
 618     Domain = Domain.align_params(scop->getParamSpace());
 619
 620     Loop *BBLoop = getRegionNodeLoop(RN, LI);
 621     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
 622       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
 623         return false;
 624   }
 625
 626   return true;
 627 }
 628
 629 void ScopBuilder::propagateDomainConstraintsToRegionExit(
 630     BasicBlock *BB, Loop *BBLoop,
 631     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
 632     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 633   // Check if the block @p BB is the entry of a region. If so we propagate it's
 634   // domain to the exit block of the region. Otherwise we are done.
 635   auto *RI = scop->getRegion().getRegionInfo();
 636   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
 637   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
 638   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
 639     return;
 640
 641   // Do not propagate the domain if there is a loop backedge inside the region
 642   // that would prevent the exit block from being executed.
 643   auto *L = BBLoop;
 644   while (L && scop->contains(L)) {
 645     SmallVector<BasicBlock *, 4> LatchBBs;
 646     BBLoop->getLoopLatches(LatchBBs);
 647     for (auto *LatchBB : LatchBBs)
 648       if (BB != LatchBB && BBReg->contains(LatchBB))
 649         return;
 650     L = L->getParentLoop();
 651   }
 652
 653   isl::set Domain = scop->getOrInitEmptyDomain(BB);
 654   assert(!Domain.is_null() && "Cannot propagate a nullptr");
 655
 656   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
 657
 658   // Since the dimensions of @p BB and @p ExitBB might be different we have to
 659   // adjust the domain before we can propagate it.
 660   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
 661   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
 662
 663   // If the exit domain is not yet created we set it otherwise we "add" the
 664   // current domain.
 665   ExitDomain =
 666       !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
 667
 668   // Initialize the invalid domain.
 669   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
 670
 671   FinishedExitBlocks.insert(ExitBB);
 672 }
 673
 674 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
 675                                                       isl::set Domain) {
 676   // If @p BB is the ScopEntry we are done
 677   if (scop->getRegion().getEntry() == BB)
 678     return isl::set::universe(Domain.get_space());
 679
 680   // The region info of this function.
 681   auto &RI = *scop->getRegion().getRegionInfo();
 682
 683   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
 684
 685   // A domain to collect all predecessor domains, thus all conditions under
 686   // which the block is executed. To this end we start with the empty domain.
 687   isl::set PredDom = isl::set::empty(Domain.get_space());
 688
 689   // Set of regions of which the entry block domain has been propagated to BB.
 690   // all predecessors inside any of the regions can be skipped.
 691   SmallSet<Region *, 8> PropagatedRegions;
 692
 693   for (auto *PredBB : predecessors(BB)) {
 694     // Skip backedges.
 695     if (DT.dominates(BB, PredBB))
 696       continue;
 697
 698     // If the predecessor is in a region we used for propagation we can skip it.
 699     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
 700     if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
 701       continue;
 702     }
 703
 704     // Check if there is a valid region we can use for propagation, thus look
 705     // for a region that contains the predecessor and has @p BB as exit block.
 706     // FIXME: This was an side-effect-free (and possibly infinite) loop when
 707     //        committed and seems not to be needed.
 708     auto *PredR = RI.getRegionFor(PredBB);
 709     while (PredR->getExit() != BB && !PredR->contains(BB))
 710       PredR = PredR->getParent();
 711
 712     // If a valid region for propagation was found use the entry of that region
 713     // for propagation, otherwise the PredBB directly.
 714     if (PredR->getExit() == BB) {
 715       PredBB = PredR->getEntry();
 716       PropagatedRegions.insert(PredR);
 717     }
 718
 719     isl::set PredBBDom = scop->getDomainConditions(PredBB);
 720     Loop *PredBBLoop =
 721         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
 722     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
 723     PredDom = PredDom.unite(PredBBDom);
 724   }
 725
 726   return PredDom;
 727 }
 728
 729 bool ScopBuilder::addLoopBoundsToHeaderDomain(
 730     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 731   int LoopDepth = scop->getRelativeLoopDepth(L);
 732   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
 733
 734   BasicBlock *HeaderBB = L->getHeader();
 735   assert(scop->isDomainDefined(HeaderBB));
 736   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
 737
 738   isl::map NextIterationMap =
 739       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
 740
 741   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
 742
 743   SmallVector<BasicBlock *, 4> LatchBlocks;
 744   L->getLoopLatches(LatchBlocks);
 745
 746   for (BasicBlock *LatchBB : LatchBlocks) {
 747     // If the latch is only reachable via error statements we skip it.
 748     if (!scop->isDomainDefined(LatchBB))
 749       continue;
 750
 751     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
 752
 753     isl::set BackedgeCondition;
 754
 755     Instruction *TI = LatchBB->getTerminator();
 756     BranchInst *BI = dyn_cast<BranchInst>(TI);
 757     assert(BI && "Only branch instructions allowed in loop latches");
 758
 759     if (BI->isUnconditional())
 760       BackedgeCondition = LatchBBDom;
 761     else {
 762       SmallVector<isl_set *, 8> ConditionSets;
 763       int idx = BI->getSuccessor(0) != HeaderBB;
 764       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
 765                               InvalidDomainMap, ConditionSets))
 766         return false;
 767
 768       // Free the non back edge condition set as we do not need it.
 769       isl_set_free(ConditionSets[1 - idx]);
 770
 771       BackedgeCondition = isl::manage(ConditionSets[idx]);
 772     }
 773
 774     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
 775     assert(LatchLoopDepth >= LoopDepth);
 776     BackedgeCondition = BackedgeCondition.project_out(
 777         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
 778     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
 779   }
 780
 781   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
 782   for (int i = 0; i < LoopDepth; i++)
 783     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
 784
 785   isl::set UnionBackedgeConditionComplement =
 786       UnionBackedgeCondition.complement();
 787   UnionBackedgeConditionComplement =
 788       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
 789                                                       0);
 790   UnionBackedgeConditionComplement =
 791       UnionBackedgeConditionComplement.apply(ForwardMap);
 792   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
 793   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
 794
 795   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
 796   HeaderBBDom = Parts.second;
 797
 798   // Check if there is a <nsw> tagged AddRec for this loop and if so do not
 799   // require a runtime check. The assumption is already implied by the <nsw>
 800   // tag.
 801   bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
 802
 803   isl::set UnboundedCtx = Parts.first.params();
 804   recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
 805                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
 806                    nullptr, RequiresRTC);
 807   return true;
 808 }
 809
 810 void ScopBuilder::buildInvariantEquivalenceClasses() {
 811   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
 812
 813   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
 814   for (LoadInst *LInst : RIL) {
 815     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
 816
 817     Type *Ty = LInst->getType();
 818     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
 819     if (ClassRep) {
 820       scop->addInvariantLoadMapping(LInst, ClassRep);
 821       continue;
 822     }
 823
 824     ClassRep = LInst;
 825     scop->addInvariantEquivClass(
 826         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
 827   }
 828 }
 829
 830 bool ScopBuilder::buildDomains(
 831     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 832   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
 833   auto *EntryBB = R->getEntry();
 834   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
 835   int LD = scop->getRelativeLoopDepth(L);
 836   auto *S =
 837       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
 838
 839   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
 840   isl::set Domain = isl::manage(S);
 841   scop->setDomain(EntryBB, Domain);
 842
 843   if (IsOnlyNonAffineRegion)
 844     return !containsErrorBlock(R->getNode(), *R, &SD);
 845
 846   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
 847     return false;
 848
 849   if (!propagateDomainConstraints(R, InvalidDomainMap))
 850     return false;
 851
 852   // Error blocks and blocks dominated by them have been assumed to never be
 853   // executed. Representing them in the Scop does not add any value. In fact,
 854   // it is likely to cause issues during construction of the ScopStmts. The
 855   // contents of error blocks have not been verified to be expressible and
 856   // will cause problems when building up a ScopStmt for them.
 857   // Furthermore, basic blocks dominated by error blocks may reference
 858   // instructions in the error block which, if the error block is not modeled,
 859   // can themselves not be constructed properly. To this end we will replace
 860   // the domains of error blocks and those only reachable via error blocks
 861   // with an empty set. Additionally, we will record for each block under which
 862   // parameter combination it would be reached via an error block in its
 863   // InvalidDomain. This information is needed during load hoisting.
 864   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
 865     return false;
 866
 867   return true;
 868 }
 869
 870 bool ScopBuilder::buildDomainsWithBranchConstraints(
 871     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
 872   // To create the domain for each block in R we iterate over all blocks and
 873   // subregions in R and propagate the conditions under which the current region
 874   // element is executed. To this end we iterate in reverse post order over R as
 875   // it ensures that we first visit all predecessors of a region node (either a
 876   // basic block or a subregion) before we visit the region node itself.
 877   // Initially, only the domain for the SCoP region entry block is set and from
 878   // there we propagate the current domain to all successors, however we add the
 879   // condition that the successor is actually executed next.
 880   // As we are only interested in non-loop carried constraints here we can
 881   // simply skip loop back edges.
 882
 883   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
 884   ReversePostOrderTraversal<Region *> RTraversal(R);
 885   for (auto *RN : RTraversal) {
 886     // Recurse for affine subregions but go on for basic blocks and non-affine
 887     // subregions.
 888     if (RN->isSubRegion()) {
 889       Region *SubRegion = RN->getNodeAs<Region>();
 890       if (!scop->isNonAffineSubRegion(SubRegion)) {
 891         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
 892           return false;
 893         continue;
 894       }
 895     }
 896
 897     if (containsErrorBlock(RN, scop->getRegion(), &SD))
 898       scop->notifyErrorBlock();
 899     ;
 900
 901     BasicBlock *BB = getRegionNodeBasicBlock(RN);
 902     Instruction *TI = BB->getTerminator();
 903
 904     if (isa<UnreachableInst>(TI))
 905       continue;
 906
 907     if (!scop->isDomainDefined(BB))
 908       continue;
 909     isl::set Domain = scop->getDomainConditions(BB);
 910
 911     scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
 912
 913     auto *BBLoop = getRegionNodeLoop(RN, LI);
 914     // Propagate the domain from BB directly to blocks that have a superset
 915     // domain, at the moment only region exit nodes of regions that start in BB.
 916     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
 917                                            InvalidDomainMap);
 918
 919     // If all successors of BB have been set a domain through the propagation
 920     // above we do not need to build condition sets but can just skip this
 921     // block. However, it is important to note that this is a local property
 922     // with regards to the region @p R. To this end FinishedExitBlocks is a
 923     // local variable.
 924     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
 925       return FinishedExitBlocks.count(SuccBB);
 926     };
 927     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
 928       continue;
 929
 930     // Build the condition sets for the successor nodes of the current region
 931     // node. If it is a non-affine subregion we will always execute the single
 932     // exit node, hence the single entry node domain is the condition set. For
 933     // basic blocks we use the helper function buildConditionSets.
 934     SmallVector<isl_set *, 8> ConditionSets;
 935     if (RN->isSubRegion())
 936       ConditionSets.push_back(Domain.copy());
 937     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
 938                                  ConditionSets))
 939       return false;
 940
 941     // Now iterate over the successors and set their initial domain based on
 942     // their condition set. We skip back edges here and have to be careful when
 943     // we leave a loop not to keep constraints over a dimension that doesn't
 944     // exist anymore.
 945     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
 946     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
 947       isl::set CondSet = isl::manage(ConditionSets[u]);
 948       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
 949
 950       // Skip blocks outside the region.
 951       if (!scop->contains(SuccBB))
 952         continue;
 953
 954       // If we propagate the domain of some block to "SuccBB" we do not have to
 955       // adjust the domain.
 956       if (FinishedExitBlocks.count(SuccBB))
 957         continue;
 958
 959       // Skip back edges.
 960       if (DT.dominates(SuccBB, BB))
 961         continue;
 962
 963       Loop *SuccBBLoop =
 964           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
 965
 966       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
 967
 968       // Set the domain for the successor or merge it with an existing domain in
 969       // case there are multiple paths (without loop back edges) to the
 970       // successor block.
 971       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
 972
 973       if (!SuccDomain.is_null()) {
 974         SuccDomain = SuccDomain.unite(CondSet).coalesce();
 975       } else {
 976         // Initialize the invalid domain.
 977         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
 978         SuccDomain = CondSet;
 979       }
 980
 981       SuccDomain = SuccDomain.detect_equalities();
 982
 983       // Check if the maximal number of domain disjunctions was reached.
 984       // In case this happens we will clean up and bail.
 985       if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
 986         continue;
 987
 988       scop->invalidate(COMPLEXITY, DebugLoc());
 989       while (++u < ConditionSets.size())
 990         isl_set_free(ConditionSets[u]);
 991       return false;
 992     }
 993   }
 994
 995   return true;
 996 }
 997
 998 bool ScopBuilder::propagateInvalidStmtDomains(
 999     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1000   ReversePostOrderTraversal<Region *> RTraversal(R);
1001   for (auto *RN : RTraversal) {
1002
1003     // Recurse for affine subregions but go on for basic blocks and non-affine
1004     // subregions.
1005     if (RN->isSubRegion()) {
1006       Region *SubRegion = RN->getNodeAs<Region>();
1007       if (!scop->isNonAffineSubRegion(SubRegion)) {
1008         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1009         continue;
1010       }
1011     }
1012
1013     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
1014     BasicBlock *BB = getRegionNodeBasicBlock(RN);
1015     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1016     assert(!Domain.is_null() && "Cannot propagate a nullptr");
1017
1018     isl::set InvalidDomain = InvalidDomainMap[BB];
1019
1020     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1021
1022     if (!IsInvalidBlock) {
1023       InvalidDomain = InvalidDomain.intersect(Domain);
1024     } else {
1025       InvalidDomain = Domain;
1026       isl::set DomPar = Domain.params();
1027       recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
1028                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1029       Domain = isl::set::empty(Domain.get_space());
1030     }
1031
1032     if (InvalidDomain.is_empty()) {
1033       InvalidDomainMap[BB] = InvalidDomain;
1034       continue;
1035     }
1036
1037     auto *BBLoop = getRegionNodeLoop(RN, LI);
1038     auto *TI = BB->getTerminator();
1039     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1040     for (unsigned u = 0; u < NumSuccs; u++) {
1041       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1042
1043       // Skip successors outside the SCoP.
1044       if (!scop->contains(SuccBB))
1045         continue;
1046
1047       // Skip backedges.
1048       if (DT.dominates(SuccBB, BB))
1049         continue;
1050
1051       Loop *SuccBBLoop =
1052           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1053
1054       auto AdjustedInvalidDomain =
1055           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1056
1057       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1058       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1059       SuccInvalidDomain = SuccInvalidDomain.coalesce();
1060
1061       InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1062
1063       // Check if the maximal number of domain disjunctions was reached.
1064       // In case this happens we will bail.
1065       if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
1066           MaxDisjunctsInDomain)
1067         continue;
1068
1069       InvalidDomainMap.erase(BB);
1070       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1071       return false;
1072     }
1073
1074     InvalidDomainMap[BB] = InvalidDomain;
1075   }
1076
1077   return true;
1078 }
1079
1080 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1081                                    Region *NonAffineSubRegion,
1082                                    bool IsExitBlock) {
1083   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1084   // true, are not modeled as ordinary PHI nodes as they are not part of the
1085   // region. However, we model the operands in the predecessor blocks that are
1086   // part of the region as regular scalar accesses.
1087
1088   // If we can synthesize a PHI we can skip it, however only if it is in
1089   // the region. If it is not it can only be in the exit block of the region.
1090   // In this case we model the operands but not the PHI itself.
1091   auto *Scope = LI.getLoopFor(PHI->getParent());
1092   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1093     return;
1094
1095   // PHI nodes are modeled as if they had been demoted prior to the SCoP
1096   // detection. Hence, the PHI is a load of a new memory location in which the
1097   // incoming value was written at the end of the incoming basic block.
1098   bool OnlyNonAffineSubRegionOperands = true;
1099   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1100     Value *Op = PHI->getIncomingValue(u);
1101     BasicBlock *OpBB = PHI->getIncomingBlock(u);
1102     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1103
1104     // Do not build PHI dependences inside a non-affine subregion, but make
1105     // sure that the necessary scalar values are still made available.
1106     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1107       auto *OpInst = dyn_cast<Instruction>(Op);
1108       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1109         ensureValueRead(Op, OpStmt);
1110       continue;
1111     }
1112
1113     OnlyNonAffineSubRegionOperands = false;
1114     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1115   }
1116
1117   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1118     addPHIReadAccess(PHIStmt, PHI);
1119   }
1120 }
1121
1122 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1123                                          Instruction *Inst) {
1124   assert(!isa<PHINode>(Inst));
1125
1126   // Pull-in required operands.
1127   for (Use &Op : Inst->operands())
1128     ensureValueRead(Op.get(), UserStmt);
1129 }
1130
1131 // Create a sequence of two schedules. Either argument may be null and is
1132 // interpreted as the empty schedule. Can also return null if both schedules are
1133 // empty.
1134 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1135   if (Prev.is_null())
1136     return Succ;
1137   if (Succ.is_null())
1138     return Prev;
1139
1140   return Prev.sequence(Succ);
1141 }
1142
1143 // Create an isl_multi_union_aff that defines an identity mapping from the
1144 // elements of USet to their N-th dimension.
1145 //
1146 // # Example:
1147 //
1148 //            Domain: { A[i,j]; B[i,j,k] }
1149 //                 N: 1
1150 //
1151 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1152 //
1153 // @param USet   A union set describing the elements for which to generate a
1154 //               mapping.
1155 // @param N      The dimension to map to.
1156 // @returns      A mapping from USet to its N-th dimension.
1157 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1158   assert(!USet.is_null());
1159   assert(!USet.is_empty());
1160
1161   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1162
1163   for (isl::set S : USet.get_set_list()) {
1164     unsigned Dim = unsignedFromIslSize(S.tuple_dim());
1165     assert(Dim >= N);
1166     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1167                                                   N, Dim - N);
1168     if (N > 1)
1169       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1170
1171     Result = Result.add_pw_multi_aff(PMA);
1172   }
1173
1174   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1175 }
1176
1177 void ScopBuilder::buildSchedule() {
1178   Loop *L = getLoopSurroundingScop(*scop, LI);
1179   LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1180   buildSchedule(scop->getRegion().getNode(), LoopStack);
1181   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1182   scop->setScheduleTree(LoopStack[0].Schedule);
1183 }
1184
1185 /// To generate a schedule for the elements in a Region we traverse the Region
1186 /// in reverse-post-order and add the contained RegionNodes in traversal order
1187 /// to the schedule of the loop that is currently at the top of the LoopStack.
1188 /// For loop-free codes, this results in a correct sequential ordering.
1189 ///
1190 /// Example:
1191 ///           bb1(0)
1192 ///         /     \.
1193 ///      bb2(1)   bb3(2)
1194 ///         \    /  \.
1195 ///          bb4(3)  bb5(4)
1196 ///             \   /
1197 ///              bb6(5)
1198 ///
1199 /// Including loops requires additional processing. Whenever a loop header is
1200 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1201 /// from an empty schedule, we first process all RegionNodes that are within
1202 /// this loop and complete the sequential schedule at this loop-level before
1203 /// processing about any other nodes. To implement this
1204 /// loop-nodes-first-processing, the reverse post-order traversal is
1205 /// insufficient. Hence, we additionally check if the traversal yields
1206 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1207 /// These region-nodes are then queue and only traverse after the all nodes
1208 /// within the current loop have been processed.
1209 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1210   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1211
1212   ReversePostOrderTraversal<Region *> RTraversal(R);
1213   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1214   std::deque<RegionNode *> DelayList;
1215   bool LastRNWaiting = false;
1216
1217   // Iterate over the region @p R in reverse post-order but queue
1218   // sub-regions/blocks iff they are not part of the last encountered but not
1219   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1220   // that we queued the last sub-region/block from the reverse post-order
1221   // iterator. If it is set we have to explore the next sub-region/block from
1222   // the iterator (if any) to guarantee progress. If it is not set we first try
1223   // the next queued sub-region/blocks.
1224   while (!WorkList.empty() || !DelayList.empty()) {
1225     RegionNode *RN;
1226
1227     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1228       RN = WorkList.front();
1229       WorkList.pop_front();
1230       LastRNWaiting = false;
1231     } else {
1232       RN = DelayList.front();
1233       DelayList.pop_front();
1234     }
1235
1236     Loop *L = getRegionNodeLoop(RN, LI);
1237     if (!scop->contains(L))
1238       L = OuterScopLoop;
1239
1240     Loop *LastLoop = LoopStack.back().L;
1241     if (LastLoop != L) {
1242       if (LastLoop && !LastLoop->contains(L)) {
1243         LastRNWaiting = true;
1244         DelayList.push_back(RN);
1245         continue;
1246       }
1247       LoopStack.push_back({L, {}, 0});
1248     }
1249     buildSchedule(RN, LoopStack);
1250   }
1251 }
1252
1253 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1254   if (RN->isSubRegion()) {
1255     auto *LocalRegion = RN->getNodeAs<Region>();
1256     if (!scop->isNonAffineSubRegion(LocalRegion)) {
1257       buildSchedule(LocalRegion, LoopStack);
1258       return;
1259     }
1260   }
1261
1262   assert(LoopStack.rbegin() != LoopStack.rend());
1263   auto LoopData = LoopStack.rbegin();
1264   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1265
1266   for (auto *Stmt : scop->getStmtListFor(RN)) {
1267     isl::union_set UDomain{Stmt->getDomain()};
1268     auto StmtSchedule = isl::schedule::from_domain(UDomain);
1269     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1270   }
1271
1272   // Check if we just processed the last node in this loop. If we did, finalize
1273   // the loop by:
1274   //
1275   //   - adding new schedule dimensions
1276   //   - folding the resulting schedule into the parent loop schedule
1277   //   - dropping the loop schedule from the LoopStack.
1278   //
1279   // Then continue to check surrounding loops, which might also have been
1280   // completed by this node.
1281   size_t Dimension = LoopStack.size();
1282   while (LoopData->L &&
1283          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1284     isl::schedule Schedule = LoopData->Schedule;
1285     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1286
1287     assert(std::next(LoopData) != LoopStack.rend());
1288     Loop *L = LoopData->L;
1289     ++LoopData;
1290     --Dimension;
1291
1292     if (!Schedule.is_null()) {
1293       isl::union_set Domain = Schedule.get_domain();
1294       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1295       Schedule = Schedule.insert_partial_schedule(MUPA);
1296
1297       if (hasDisableAllTransformsHint(L)) {
1298         /// If any of the loops has a disable_nonforced heuristic, mark the
1299         /// entire SCoP as such. The ISL rescheduler can only reschedule the
1300         /// SCoP in its entirety.
1301         /// TODO: ScopDetection could avoid including such loops or warp them as
1302         /// boxed loop. It still needs to pass-through loop with user-defined
1303         /// metadata.
1304         scop->markDisableHeuristics();
1305       }
1306
1307       // It is easier to insert the marks here that do it retroactively.
1308       isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
1309       if (!IslLoopId.is_null())
1310         Schedule =
1311             Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
1312
1313       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1314     }
1315
1316     LoopData->NumBlocksProcessed += NumBlocksProcessed;
1317   }
1318   // Now pop all loops processed up there from the LoopStack
1319   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1320 }
1321
1322 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1323   // Check for uses of this instruction outside the scop. Because we do not
1324   // iterate over such instructions and therefore did not "ensure" the existence
1325   // of a write, we must determine such use here.
1326   if (scop->isEscaping(Inst))
1327     ensureValueWrite(Inst);
1328 }
1329
1330 void ScopBuilder::addRecordedAssumptions() {
1331   for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1332
1333     if (!AS.BB) {
1334       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1335                           nullptr /* BasicBlock */, AS.RequiresRTC);
1336       continue;
1337     }
1338
1339     // If the domain was deleted the assumptions are void.
1340     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1341     if (!Dom)
1342       continue;
1343
1344     // If a basic block was given use its domain to simplify the assumption.
1345     // In case of restrictions we know they only have to hold on the domain,
1346     // thus we can intersect them with the domain of the block. However, for
1347     // assumptions the domain has to imply them, thus:
1348     //                     _              _____
1349     //   Dom => S   <==>   A v B   <==>   A - B
1350     //
1351     // To avoid the complement we will register A - B as a restriction not an
1352     // assumption.
1353     isl_set *S = AS.Set.copy();
1354     if (AS.Sign == AS_RESTRICTION)
1355       S = isl_set_params(isl_set_intersect(S, Dom));
1356     else /* (AS.Sign == AS_ASSUMPTION) */
1357       S = isl_set_params(isl_set_subtract(Dom, S));
1358
1359     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1360                         AS.RequiresRTC);
1361   }
1362 }
1363
1364 void ScopBuilder::addUserAssumptions(
1365     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1366   for (auto &Assumption : AC.assumptions()) {
1367     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1368     if (!CI || CI->arg_size() != 1)
1369       continue;
1370
1371     bool InScop = scop->contains(CI);
1372     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1373       continue;
1374
1375     auto *L = LI.getLoopFor(CI->getParent());
1376     auto *Val = CI->getArgOperand(0);
1377     ParameterSetTy DetectedParams;
1378     auto &R = scop->getRegion();
1379     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1380       ORE.emit(
1381           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1382           << "Non-affine user assumption ignored.");
1383       continue;
1384     }
1385
1386     // Collect all newly introduced parameters.
1387     ParameterSetTy NewParams;
1388     for (auto *Param : DetectedParams) {
1389       Param = extractConstantFactor(Param, SE).second;
1390       Param = scop->getRepresentingInvariantLoadSCEV(Param);
1391       if (scop->isParam(Param))
1392         continue;
1393       NewParams.insert(Param);
1394     }
1395
1396     SmallVector<isl_set *, 2> ConditionSets;
1397     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1398     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1399     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1400                        : isl_set_copy(scop->getContext().get());
1401     assert(Dom && "Cannot propagate a nullptr.");
1402     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1403                                     ConditionSets);
1404     isl_set_free(Dom);
1405
1406     if (!Valid)
1407       continue;
1408
1409     isl_set *AssumptionCtx = nullptr;
1410     if (InScop) {
1411       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1412       isl_set_free(ConditionSets[0]);
1413     } else {
1414       AssumptionCtx = isl_set_complement(ConditionSets[1]);
1415       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1416     }
1417
1418     // Project out newly introduced parameters as they are not otherwise useful.
1419     if (!NewParams.empty()) {
1420       for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1421         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1422         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1423         isl_id_free(Id);
1424
1425         if (!NewParams.count(Param))
1426           continue;
1427
1428         AssumptionCtx =
1429             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1430       }
1431     }
1432     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1433              << "Use user assumption: "
1434              << stringFromIslObj(AssumptionCtx, "null"));
1435     isl::set newContext =
1436         scop->getContext().intersect(isl::manage(AssumptionCtx));
1437     scop->setContext(newContext);
1438   }
1439 }
1440
1441 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1442   // Memory builtins are not considered in this function.
1443   if (!Inst.isLoad() && !Inst.isStore())
1444     return false;
1445
1446   Value *Val = Inst.getValueOperand();
1447   Type *ElementType = Val->getType();
1448   Value *Address = Inst.getPointerOperand();
1449   const SCEV *AccessFunction =
1450       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1451   const SCEVUnknown *BasePointer =
1452       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1453   enum MemoryAccess::AccessType AccType =
1454       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1455
1456   if (auto *BitCast = dyn_cast<BitCastInst>(Address))
1457     Address = BitCast->getOperand(0);
1458
1459   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1460   if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
1461                   DL.getTypeAllocSize(ElementType))
1462     return false;
1463
1464   SmallVector<const SCEV *, 4> Subscripts;
1465   SmallVector<int, 4> Sizes;
1466   getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1467   auto *BasePtr = GEP->getOperand(0);
1468
1469   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1470     BasePtr = BasePtrCast->getOperand(0);
1471
1472   // Check for identical base pointers to ensure that we do not miss index
1473   // offsets that have been added before this GEP is applied.
1474   if (BasePtr != BasePointer->getValue())
1475     return false;
1476
1477   std::vector<const SCEV *> SizesSCEV;
1478
1479   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1480
1481   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1482   for (auto *Subscript : Subscripts) {
1483     InvariantLoadsSetTy AccessILS;
1484     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1485                       &AccessILS))
1486       return false;
1487
1488     for (LoadInst *LInst : AccessILS)
1489       if (!ScopRIL.count(LInst))
1490         return false;
1491   }
1492
1493   if (Sizes.empty())
1494     return false;
1495
1496   SizesSCEV.push_back(nullptr);
1497
1498   for (auto V : Sizes)
1499     SizesSCEV.push_back(SE.getSCEV(
1500         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1501
1502   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1503                  true, Subscripts, SizesSCEV, Val);
1504   return true;
1505 }
1506
1507 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1508   // Memory builtins are not considered by this function.
1509   if (!Inst.isLoad() && !Inst.isStore())
1510     return false;
1511
1512   if (!PollyDelinearize)
1513     return false;
1514
1515   Value *Address = Inst.getPointerOperand();
1516   Value *Val = Inst.getValueOperand();
1517   Type *ElementType = Val->getType();
1518   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1519   enum MemoryAccess::AccessType AccType =
1520       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1521
1522   const SCEV *AccessFunction =
1523       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1524   const SCEVUnknown *BasePointer =
1525       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1526
1527   assert(BasePointer && "Could not find base pointer");
1528
1529   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1530   auto AccItr = InsnToMemAcc.find(Inst);
1531   if (AccItr == InsnToMemAcc.end())
1532     return false;
1533
1534   std::vector<const SCEV *> Sizes = {nullptr};
1535
1536   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1537                AccItr->second.Shape->DelinearizedSizes.end());
1538
1539   // In case only the element size is contained in the 'Sizes' array, the
1540   // access does not access a real multi-dimensional array. Hence, we allow
1541   // the normal single-dimensional access construction to handle this.
1542   if (Sizes.size() == 1)
1543     return false;
1544
1545   // Remove the element size. This information is already provided by the
1546   // ElementSize parameter. In case the element size of this access and the
1547   // element size used for delinearization differs the delinearization is
1548   // incorrect. Hence, we invalidate the scop.
1549   //
1550   // TODO: Handle delinearization with differing element sizes.
1551   auto DelinearizedSize =
1552       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1553   Sizes.pop_back();
1554   if (ElementSize != DelinearizedSize)
1555     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1556
1557   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1558                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1559   return true;
1560 }
1561
1562 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1563   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1564
1565   if (MemIntr == nullptr)
1566     return false;
1567
1568   auto *L = LI.getLoopFor(Inst->getParent());
1569   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1570   assert(LengthVal);
1571
1572   // Check if the length val is actually affine or if we overapproximate it
1573   InvariantLoadsSetTy AccessILS;
1574   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1575
1576   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1577   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1578                                      LengthVal, SE, &AccessILS);
1579   for (LoadInst *LInst : AccessILS)
1580     if (!ScopRIL.count(LInst))
1581       LengthIsAffine = false;
1582   if (!LengthIsAffine)
1583     LengthVal = nullptr;
1584
1585   auto *DestPtrVal = MemIntr->getDest();
1586   assert(DestPtrVal);
1587
1588   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1589   assert(DestAccFunc);
1590   // Ignore accesses to "NULL".
1591   // TODO: We could use this to optimize the region further, e.g., intersect
1592   //       the context with
1593   //          isl_set_complement(isl_set_params(getDomain()))
1594   //       as we know it would be undefined to execute this instruction anyway.
1595   if (DestAccFunc->isZero())
1596     return true;
1597
1598   if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1599     if (isa<ConstantPointerNull>(U->getValue()))
1600       return true;
1601   }
1602
1603   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1604   assert(DestPtrSCEV);
1605   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1606   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1607                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
1608                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1609                  Inst.getValueOperand());
1610
1611   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1612   if (!MemTrans)
1613     return true;
1614
1615   auto *SrcPtrVal = MemTrans->getSource();
1616   assert(SrcPtrVal);
1617
1618   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1619   assert(SrcAccFunc);
1620   // Ignore accesses to "NULL".
1621   // TODO: See above TODO
1622   if (SrcAccFunc->isZero())
1623     return true;
1624
1625   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1626   assert(SrcPtrSCEV);
1627   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1628   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1629                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1630                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1631                  Inst.getValueOperand());
1632
1633   return true;
1634 }
1635
1636 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1637   auto *CI = dyn_cast_or_null<CallInst>(Inst);
1638
1639   if (CI == nullptr)
1640     return false;
1641
1642   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1643     return true;
1644
1645   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1646   auto *CalledFunction = CI->getCalledFunction();
1647   MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
1648   if (ME.doesNotAccessMemory())
1649     return true;
1650
1651   if (ME.onlyAccessesArgPointees()) {
1652     ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
1653     auto AccType =
1654         !isModSet(ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1655     Loop *L = LI.getLoopFor(Inst->getParent());
1656     for (const auto &Arg : CI->args()) {
1657       if (!Arg->getType()->isPointerTy())
1658         continue;
1659
1660       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1661       if (ArgSCEV->isZero())
1662         continue;
1663
1664       if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1665         if (isa<ConstantPointerNull>(U->getValue()))
1666           return true;
1667       }
1668
1669       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1670       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1671                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1672     }
1673     return true;
1674   }
1675
1676   if (ME.onlyReadsMemory()) {
1677     GlobalReads.emplace_back(Stmt, CI);
1678     return true;
1679   }
1680   return false;
1681 }
1682
1683 bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1684   // Memory builtins are not considered by this function.
1685   if (!Inst.isLoad() && !Inst.isStore())
1686     return false;
1687
1688   Value *Address = Inst.getPointerOperand();
1689   Value *Val = Inst.getValueOperand();
1690   Type *ElementType = Val->getType();
1691   enum MemoryAccess::AccessType AccType =
1692       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1693
1694   const SCEV *AccessFunction =
1695       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1696   const SCEVUnknown *BasePointer =
1697       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1698
1699   assert(BasePointer && "Could not find base pointer");
1700   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1701
1702   // Check if the access depends on a loop contained in a non-affine subregion.
1703   bool isVariantInNonAffineLoop = false;
1704   SetVector<const Loop *> Loops;
1705   findLoops(AccessFunction, Loops);
1706   for (const Loop *L : Loops)
1707     if (Stmt->contains(L)) {
1708       isVariantInNonAffineLoop = true;
1709       break;
1710     }
1711
1712   InvariantLoadsSetTy AccessILS;
1713
1714   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1715   bool IsAffine = !isVariantInNonAffineLoop &&
1716                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
1717                                AccessFunction, SE, &AccessILS);
1718
1719   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1720   for (LoadInst *LInst : AccessILS)
1721     if (!ScopRIL.count(LInst))
1722       IsAffine = false;
1723
1724   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1725     AccType = MemoryAccess::MAY_WRITE;
1726
1727   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1728                  IsAffine, {AccessFunction}, {nullptr}, Val);
1729   return true;
1730 }
1731
1732 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1733   if (buildAccessMemIntrinsic(Inst, Stmt))
1734     return;
1735
1736   if (buildAccessCallInst(Inst, Stmt))
1737     return;
1738
1739   if (buildAccessMultiDimFixed(Inst, Stmt))
1740     return;
1741
1742   if (buildAccessMultiDimParam(Inst, Stmt))
1743     return;
1744
1745   if (buildAccessSingleDim(Inst, Stmt))
1746     return;
1747
1748   llvm_unreachable(
1749       "At least one of the buildAccess functions must handled this access, or "
1750       "ScopDetection should have rejected this SCoP");
1751 }
1752
1753 void ScopBuilder::buildAccessFunctions() {
1754   for (auto &Stmt : *scop) {
1755     if (Stmt.isBlockStmt()) {
1756       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1757       continue;
1758     }
1759
1760     Region *R = Stmt.getRegion();
1761     for (BasicBlock *BB : R->blocks())
1762       buildAccessFunctions(&Stmt, *BB, R);
1763   }
1764
1765   // Build write accesses for values that are used after the SCoP.
1766   // The instructions defining them might be synthesizable and therefore not
1767   // contained in any statement, hence we iterate over the original instructions
1768   // to identify all escaping values.
1769   for (BasicBlock *BB : scop->getRegion().blocks()) {
1770     for (Instruction &Inst : *BB)
1771       buildEscapingDependences(&Inst);
1772   }
1773 }
1774
1775 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1776   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1777          !canSynthesize(Inst, *scop, &SE, L);
1778 }
1779
1780 /// Generate a name for a statement.
1781 ///
1782 /// @param BB     The basic block the statement will represent.
1783 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
1784 /// @param Count  The index of the created statement in @p BB.
1785 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1786 ///               no suffix will be added.
1787 /// @param IsLast Uses a special indicator for the last statement of a BB.
1788 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1789                                 bool IsMain, bool IsLast = false) {
1790   std::string Suffix;
1791   if (!IsMain) {
1792     if (UseInstructionNames)
1793       Suffix = '_';
1794     if (IsLast)
1795       Suffix += "last";
1796     else if (Count < 26)
1797       Suffix += 'a' + Count;
1798     else
1799       Suffix += std::to_string(Count);
1800   }
1801   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1802 }
1803
1804 /// Generate a name for a statement that represents a non-affine subregion.
1805 ///
1806 /// @param R    The region the statement will represent.
1807 /// @param RIdx The index of the @p R relative to other BBs/regions.
1808 static std::string makeStmtName(Region *R, long RIdx) {
1809   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1810                               UseInstructionNames);
1811 }
1812
1813 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1814   Loop *SurroundingLoop = LI.getLoopFor(BB);
1815
1816   int Count = 0;
1817   long BBIdx = scop->getNextStmtIdx();
1818   std::vector<Instruction *> Instructions;
1819   for (Instruction &Inst : *BB) {
1820     if (shouldModelInst(&Inst, SurroundingLoop))
1821       Instructions.push_back(&Inst);
1822     if (Inst.getMetadata("polly_split_after") ||
1823         (SplitOnStore && isa<StoreInst>(Inst))) {
1824       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1825       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1826       Count++;
1827       Instructions.clear();
1828     }
1829   }
1830
1831   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1832   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1833 }
1834
1835 /// Is @p Inst an ordered instruction?
1836 ///
1837 /// An unordered instruction is an instruction, such that a sequence of
1838 /// unordered instructions can be permuted without changing semantics. Any
1839 /// instruction for which this is not always the case is ordered.
1840 static bool isOrderedInstruction(Instruction *Inst) {
1841   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1842 }
1843
1844 /// Join instructions to the same statement if one uses the scalar result of the
1845 /// other.
1846 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1847                             ArrayRef<Instruction *> ModeledInsts) {
1848   for (Instruction *Inst : ModeledInsts) {
1849     if (isa<PHINode>(Inst))
1850       continue;
1851
1852     for (Use &Op : Inst->operands()) {
1853       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1854       if (!OpInst)
1855         continue;
1856
1857       // Check if OpInst is in the BB and is a modeled instruction.
1858       auto OpVal = UnionFind.findValue(OpInst);
1859       if (OpVal == UnionFind.end())
1860         continue;
1861
1862       UnionFind.unionSets(Inst, OpInst);
1863     }
1864   }
1865 }
1866
1867 /// Ensure that the order of ordered instructions does not change.
1868 ///
1869 /// If we encounter an ordered instruction enclosed in instructions belonging to
1870 /// a different statement (which might as well contain ordered instructions, but
1871 /// this is not tested here), join them.
1872 static void
1873 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1874                         ArrayRef<Instruction *> ModeledInsts) {
1875   SetVector<Instruction *> SeenLeaders;
1876   for (Instruction *Inst : ModeledInsts) {
1877     if (!isOrderedInstruction(Inst))
1878       continue;
1879
1880     Instruction *Leader = UnionFind.getLeaderValue(Inst);
1881     // Since previous iterations might have merged sets, some items in
1882     // SeenLeaders are not leaders anymore. However, The new leader of
1883     // previously merged instructions must be one of the former leaders of
1884     // these merged instructions.
1885     bool Inserted = SeenLeaders.insert(Leader);
1886     if (Inserted)
1887       continue;
1888
1889     // Merge statements to close holes. Say, we have already seen statements A
1890     // and B, in this order. Then we see an instruction of A again and we would
1891     // see the pattern "A B A". This function joins all statements until the
1892     // only seen occurrence of A.
1893     for (Instruction *Prev : reverse(SeenLeaders)) {
1894       // We are backtracking from the last element until we see Inst's leader
1895       // in SeenLeaders and merge all into one set. Although leaders of
1896       // instructions change during the execution of this loop, it's irrelevant
1897       // as we are just searching for the element that we already confirmed is
1898       // in the list.
1899       if (Prev == Leader)
1900         break;
1901       UnionFind.unionSets(Prev, Leader);
1902     }
1903   }
1904 }
1905
1906 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1907 /// the incoming values from this block are executed after the PHI READ.
1908 ///
1909 /// Otherwise it could overwrite the incoming value from before the BB with the
1910 /// value for the next execution. This can happen if the PHI WRITE is added to
1911 /// the statement with the instruction that defines the incoming value (instead
1912 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1913 /// are in order, we put both into the statement. PHI WRITEs are always executed
1914 /// after PHI READs when they are in the same statement.
1915 ///
1916 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1917 /// WRITE is not put into a statement containing the PHI itself. That could also
1918 /// be done by
1919 /// - having all (strongly connected) PHIs in a single statement,
1920 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1921 ///   has a chance of being lifted before a PHI by being in a statement with a
1922 ///   PHI that comes before in the basic block), or
1923 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1924 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1925                             ArrayRef<Instruction *> ModeledInsts) {
1926   for (Instruction *Inst : ModeledInsts) {
1927     PHINode *PHI = dyn_cast<PHINode>(Inst);
1928     if (!PHI)
1929       continue;
1930
1931     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1932     if (Idx < 0)
1933       continue;
1934
1935     Instruction *IncomingVal =
1936         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1937     if (!IncomingVal)
1938       continue;
1939
1940     UnionFind.unionSets(PHI, IncomingVal);
1941   }
1942 }
1943
1944 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1945   Loop *L = LI.getLoopFor(BB);
1946
1947   // Extracting out modeled instructions saves us from checking
1948   // shouldModelInst() repeatedly.
1949   SmallVector<Instruction *, 32> ModeledInsts;
1950   EquivalenceClasses<Instruction *> UnionFind;
1951   Instruction *MainInst = nullptr, *MainLeader = nullptr;
1952   for (Instruction &Inst : *BB) {
1953     if (!shouldModelInst(&Inst, L))
1954       continue;
1955     ModeledInsts.push_back(&Inst);
1956     UnionFind.insert(&Inst);
1957
1958     // When a BB is split into multiple statements, the main statement is the
1959     // one containing the 'main' instruction. We select the first instruction
1960     // that is unlikely to be removed (because it has side-effects) as the main
1961     // one. It is used to ensure that at least one statement from the bb has the
1962     // same name as with -polly-stmt-granularity=bb.
1963     if (!MainInst && (isa<StoreInst>(Inst) ||
1964                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1965       MainInst = &Inst;
1966   }
1967
1968   joinOperandTree(UnionFind, ModeledInsts);
1969   joinOrderedInstructions(UnionFind, ModeledInsts);
1970   joinOrderedPHIs(UnionFind, ModeledInsts);
1971
1972   // The list of instructions for statement (statement represented by the leader
1973   // instruction).
1974   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1975
1976   // The order of statements must be preserved w.r.t. their ordered
1977   // instructions. Without this explicit scan, we would also use non-ordered
1978   // instructions (whose order is arbitrary) to determine statement order.
1979   for (Instruction *Inst : ModeledInsts) {
1980     if (!isOrderedInstruction(Inst))
1981       continue;
1982
1983     auto LeaderIt = UnionFind.findLeader(Inst);
1984     if (LeaderIt == UnionFind.member_end())
1985       continue;
1986
1987     // Insert element for the leader instruction.
1988     (void)LeaderToInstList[*LeaderIt];
1989   }
1990
1991   // Collect the instructions of all leaders. UnionFind's member iterator
1992   // unfortunately are not in any specific order.
1993   for (Instruction *Inst : ModeledInsts) {
1994     auto LeaderIt = UnionFind.findLeader(Inst);
1995     if (LeaderIt == UnionFind.member_end())
1996       continue;
1997
1998     if (Inst == MainInst)
1999       MainLeader = *LeaderIt;
2000     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2001     InstList.push_back(Inst);
2002   }
2003
2004   // Finally build the statements.
2005   int Count = 0;
2006   long BBIdx = scop->getNextStmtIdx();
2007   for (auto &Instructions : LeaderToInstList) {
2008     std::vector<Instruction *> &InstList = Instructions.second;
2009
2010     // If there is no main instruction, make the first statement the main.
2011     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2012
2013     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2014     scop->addScopStmt(BB, Name, L, std::move(InstList));
2015     Count += 1;
2016   }
2017
2018   // Unconditionally add an epilogue (last statement). It contains no
2019   // instructions, but holds the PHI write accesses for successor basic blocks,
2020   // if the incoming value is not defined in another statement if the same BB.
2021   // The epilogue becomes the main statement only if there is no other
2022   // statement that could become main.
2023   // The epilogue will be removed if no PHIWrite is added to it.
2024   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2025   scop->addScopStmt(BB, EpilogueName, L, {});
2026 }
2027
2028 void ScopBuilder::buildStmts(Region &SR) {
2029   if (scop->isNonAffineSubRegion(&SR)) {
2030     std::vector<Instruction *> Instructions;
2031     Loop *SurroundingLoop =
2032         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2033     for (Instruction &Inst : *SR.getEntry())
2034       if (shouldModelInst(&Inst, SurroundingLoop))
2035         Instructions.push_back(&Inst);
2036     long RIdx = scop->getNextStmtIdx();
2037     std::string Name = makeStmtName(&SR, RIdx);
2038     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2039     return;
2040   }
2041
2042   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2043     if (I->isSubRegion())
2044       buildStmts(*I->getNodeAs<Region>());
2045     else {
2046       BasicBlock *BB = I->getNodeAs<BasicBlock>();
2047       switch (StmtGranularity) {
2048       case GranularityChoice::BasicBlocks:
2049         buildSequentialBlockStmts(BB);
2050         break;
2051       case GranularityChoice::ScalarIndependence:
2052         buildEqivClassBlockStmts(BB);
2053         break;
2054       case GranularityChoice::Stores:
2055         buildSequentialBlockStmts(BB, true);
2056         break;
2057       }
2058     }
2059 }
2060
2061 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2062                                        Region *NonAffineSubRegion) {
2063   assert(
2064       Stmt &&
2065       "The exit BB is the only one that cannot be represented by a statement");
2066   assert(Stmt->represents(&BB));
2067
2068   // We do not build access functions for error blocks, as they may contain
2069   // instructions we can not model.
2070   if (SD.isErrorBlock(BB, scop->getRegion()))
2071     return;
2072
2073   auto BuildAccessesForInst = [this, Stmt,
2074                                NonAffineSubRegion](Instruction *Inst) {
2075     PHINode *PHI = dyn_cast<PHINode>(Inst);
2076     if (PHI)
2077       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2078
2079     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2080       assert(Stmt && "Cannot build access function in non-existing statement");
2081       buildMemoryAccess(MemInst, Stmt);
2082     }
2083
2084     // PHI nodes have already been modeled above and terminators that are
2085     // not part of a non-affine subregion are fully modeled and regenerated
2086     // from the polyhedral domains. Hence, they do not need to be modeled as
2087     // explicit data dependences.
2088     if (!PHI)
2089       buildScalarDependences(Stmt, Inst);
2090   };
2091
2092   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2093   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2094   if (IsEntryBlock) {
2095     for (Instruction *Inst : Stmt->getInstructions())
2096       BuildAccessesForInst(Inst);
2097     if (Stmt->isRegionStmt())
2098       BuildAccessesForInst(BB.getTerminator());
2099   } else {
2100     for (Instruction &Inst : BB) {
2101       if (isIgnoredIntrinsic(&Inst))
2102         continue;
2103
2104       // Invariant loads already have been processed.
2105       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2106         continue;
2107
2108       BuildAccessesForInst(&Inst);
2109     }
2110   }
2111 }
2112
2113 MemoryAccess *ScopBuilder::addMemoryAccess(
2114     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2115     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2116     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2117     MemoryKind Kind) {
2118   bool isKnownMustAccess = false;
2119
2120   // Accesses in single-basic block statements are always executed.
2121   if (Stmt->isBlockStmt())
2122     isKnownMustAccess = true;
2123
2124   if (Stmt->isRegionStmt()) {
2125     // Accesses that dominate the exit block of a non-affine region are always
2126     // executed. In non-affine regions there may exist MemoryKind::Values that
2127     // do not dominate the exit. MemoryKind::Values will always dominate the
2128     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2129     // non-affine region.
2130     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2131       isKnownMustAccess = true;
2132   }
2133
2134   // Non-affine PHI writes do not "happen" at a particular instruction, but
2135   // after exiting the statement. Therefore they are guaranteed to execute and
2136   // overwrite the old value.
2137   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2138     isKnownMustAccess = true;
2139
2140   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2141     AccType = MemoryAccess::MAY_WRITE;
2142
2143   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2144                                   Affine, Subscripts, Sizes, AccessValue, Kind);
2145
2146   scop->addAccessFunction(Access);
2147   Stmt->addAccess(Access);
2148   return Access;
2149 }
2150
2151 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2152                                  MemoryAccess::AccessType AccType,
2153                                  Value *BaseAddress, Type *ElementType,
2154                                  bool IsAffine,
2155                                  ArrayRef<const SCEV *> Subscripts,
2156                                  ArrayRef<const SCEV *> Sizes,
2157                                  Value *AccessValue) {
2158   ArrayBasePointers.insert(BaseAddress);
2159   addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
2160                   AccessValue, Subscripts, Sizes, MemoryKind::Array);
2161 }
2162
2163 /// Check if @p Expr is divisible by @p Size.
2164 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2165   assert(Size != 0);
2166   if (Size == 1)
2167     return true;
2168
2169   // Only one factor needs to be divisible.
2170   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2171     for (auto *FactorExpr : MulExpr->operands())
2172       if (isDivisible(FactorExpr, Size, SE))
2173         return true;
2174     return false;
2175   }
2176
2177   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2178   // to be divisible.
2179   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2180     for (auto *OpExpr : NAryExpr->operands())
2181       if (!isDivisible(OpExpr, Size, SE))
2182         return false;
2183     return true;
2184   }
2185
2186   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2187   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2188   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2189   return MulSCEV == Expr;
2190 }
2191
2192 void ScopBuilder::foldSizeConstantsToRight() {
2193   isl::union_set Accessed = scop->getAccesses().range();
2194
2195   for (auto Array : scop->arrays()) {
2196     if (Array->getNumberOfDimensions() <= 1)
2197       continue;
2198
2199     isl::space Space = Array->getSpace();
2200     Space = Space.align_params(Accessed.get_space());
2201
2202     if (!Accessed.contains(Space))
2203       continue;
2204
2205     isl::set Elements = Accessed.extract_set(Space);
2206     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2207
2208     std::vector<int> Int;
2209     unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
2210     for (unsigned i = 0; i < Dims; i++) {
2211       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2212       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2213       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2214
2215       isl::basic_set DimHull = DimOnly.affine_hull();
2216
2217       if (i == Dims - 1) {
2218         Int.push_back(1);
2219         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2220         continue;
2221       }
2222
2223       if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
2224         isl::aff Diff = DimHull.get_div(0);
2225         isl::val Val = Diff.get_denominator_val();
2226
2227         int ValInt = 1;
2228         if (Val.is_int()) {
2229           auto ValAPInt = APIntFromVal(Val);
2230           if (ValAPInt.isSignedIntN(32))
2231             ValInt = ValAPInt.getSExtValue();
2232         } else {
2233         }
2234
2235         Int.push_back(ValInt);
2236         isl::constraint C = isl::constraint::alloc_equality(
2237             isl::local_space(Transform.get_space()));
2238         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2239         C = C.set_coefficient_si(isl::dim::in, i, -1);
2240         Transform = Transform.add_constraint(C);
2241         continue;
2242       }
2243
2244       isl::basic_set ZeroSet = isl::basic_set(DimHull);
2245       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2246
2247       int ValInt = 1;
2248       if (ZeroSet.is_equal(DimHull)) {
2249         ValInt = 0;
2250       }
2251
2252       Int.push_back(ValInt);
2253       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2254     }
2255
2256     isl::set MappedElements = isl::map(Transform).domain();
2257     if (!Elements.is_subset(MappedElements))
2258       continue;
2259
2260     bool CanFold = true;
2261     if (Int[0] <= 1)
2262       CanFold = false;
2263
2264     unsigned NumDims = Array->getNumberOfDimensions();
2265     for (unsigned i = 1; i < NumDims - 1; i++)
2266       if (Int[0] != Int[i] && Int[i])
2267         CanFold = false;
2268
2269     if (!CanFold)
2270       continue;
2271
2272     for (auto &Access : scop->access_functions())
2273       if (Access->getScopArrayInfo() == Array)
2274         Access->setAccessRelation(
2275             Access->getAccessRelation().apply_range(Transform));
2276
2277     std::vector<const SCEV *> Sizes;
2278     for (unsigned i = 0; i < NumDims; i++) {
2279       auto Size = Array->getDimensionSize(i);
2280
2281       if (i == NumDims - 1)
2282         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2283       Sizes.push_back(Size);
2284     }
2285
2286     Array->updateSizes(Sizes, false /* CheckConsistency */);
2287   }
2288 }
2289
2290 void ScopBuilder::finalizeAccesses() {
2291   updateAccessDimensionality();
2292   foldSizeConstantsToRight();
2293   foldAccessRelations();
2294   assumeNoOutOfBounds();
2295 }
2296
2297 void ScopBuilder::updateAccessDimensionality() {
2298   // Check all array accesses for each base pointer and find a (virtual) element
2299   // size for the base pointer that divides all access functions.
2300   for (ScopStmt &Stmt : *scop)
2301     for (MemoryAccess *Access : Stmt) {
2302       if (!Access->isArrayKind())
2303         continue;
2304       ScopArrayInfo *Array =
2305           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2306
2307       if (Array->getNumberOfDimensions() != 1)
2308         continue;
2309       unsigned DivisibleSize = Array->getElemSizeInBytes();
2310       const SCEV *Subscript = Access->getSubscript(0);
2311       while (!isDivisible(Subscript, DivisibleSize, SE))
2312         DivisibleSize /= 2;
2313       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2314       Array->updateElementType(Ty);
2315     }
2316
2317   for (auto &Stmt : *scop)
2318     for (auto &Access : Stmt)
2319       Access->updateDimensionality();
2320 }
2321
2322 void ScopBuilder::foldAccessRelations() {
2323   for (auto &Stmt : *scop)
2324     for (auto &Access : Stmt)
2325       Access->foldAccessRelation();
2326 }
2327
2328 void ScopBuilder::assumeNoOutOfBounds() {
2329   if (PollyIgnoreInbounds)
2330     return;
2331   for (auto &Stmt : *scop)
2332     for (auto &Access : Stmt) {
2333       isl::set Outside = Access->assumeNoOutOfBound();
2334       const auto &Loc = Access->getAccessInstruction()
2335                             ? Access->getAccessInstruction()->getDebugLoc()
2336                             : DebugLoc();
2337       recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
2338                        AS_ASSUMPTION);
2339     }
2340 }
2341
2342 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2343   // Find the statement that defines the value of Inst. That statement has to
2344   // write the value to make it available to those statements that read it.
2345   ScopStmt *Stmt = scop->getStmtFor(Inst);
2346
2347   // It is possible that the value is synthesizable within a loop (such that it
2348   // is not part of any statement), but not after the loop (where you need the
2349   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2350   // avoid this. In case the IR has no such PHI, use the last statement (where
2351   // the value is synthesizable) to write the value.
2352   if (!Stmt)
2353     Stmt = scop->getLastStmtFor(Inst->getParent());
2354
2355   // Inst not defined within this SCoP.
2356   if (!Stmt)
2357     return;
2358
2359   // Do not process further if the instruction is already written.
2360   if (Stmt->lookupValueWriteOf(Inst))
2361     return;
2362
2363   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2364                   true, Inst, ArrayRef<const SCEV *>(),
2365                   ArrayRef<const SCEV *>(), MemoryKind::Value);
2366 }
2367
2368 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2369   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2370   // to be able to replace this one. Currently, there is a split responsibility.
2371   // In a first step, the MemoryAccess is created, but without the
2372   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2373   // AccessRelation is created. At least for scalar accesses, there is no new
2374   // information available at ScopStmt::buildAccessRelations(), so we could
2375   // create the AccessRelation right away. This is what
2376   // ScopStmt::ensureValueRead(Value*) does.
2377
2378   auto *Scope = UserStmt->getSurroundingLoop();
2379   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2380   switch (VUse.getKind()) {
2381   case VirtualUse::Constant:
2382   case VirtualUse::Block:
2383   case VirtualUse::Synthesizable:
2384   case VirtualUse::Hoisted:
2385   case VirtualUse::Intra:
2386     // Uses of these kinds do not need a MemoryAccess.
2387     break;
2388
2389   case VirtualUse::ReadOnly:
2390     // Add MemoryAccess for invariant values only if requested.
2391     if (!ModelReadOnlyScalars)
2392       break;
2393
2394     [[fallthrough]];
2395   case VirtualUse::Inter:
2396
2397     // Do not create another MemoryAccess for reloading the value if one already
2398     // exists.
2399     if (UserStmt->lookupValueReadOf(V))
2400       break;
2401
2402     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2403                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2404                     MemoryKind::Value);
2405
2406     // Inter-statement uses need to write the value in their defining statement.
2407     if (VUse.isInter())
2408       ensureValueWrite(cast<Instruction>(V));
2409     break;
2410   }
2411 }
2412
2413 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2414                                  BasicBlock *IncomingBlock,
2415                                  Value *IncomingValue, bool IsExitBlock) {
2416   // As the incoming block might turn out to be an error statement ensure we
2417   // will create an exit PHI SAI object. It is needed during code generation
2418   // and would be created later anyway.
2419   if (IsExitBlock)
2420     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2421                                    MemoryKind::ExitPHI);
2422
2423   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2424   // from outside the SCoP's region have no statement representation.
2425   if (!IncomingStmt)
2426     return;
2427
2428   // Take care for the incoming value being available in the incoming block.
2429   // This must be done before the check for multiple PHI writes because multiple
2430   // exiting edges from subregion each can be the effective written value of the
2431   // subregion. As such, all of them must be made available in the subregion
2432   // statement.
2433   ensureValueRead(IncomingValue, IncomingStmt);
2434
2435   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2436   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2437     assert(Acc->getAccessInstruction() == PHI);
2438     Acc->addIncoming(IncomingBlock, IncomingValue);
2439     return;
2440   }
2441
2442   MemoryAccess *Acc = addMemoryAccess(
2443       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2444       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2445       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2446   assert(Acc);
2447   Acc->addIncoming(IncomingBlock, IncomingValue);
2448 }
2449
2450 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2451   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2452                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2453                   MemoryKind::PHI);
2454 }
2455
2456 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2457   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2458
2459   Stmt.Domain = scop->getDomainConditions(&Stmt);
2460   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2461 }
2462
2463 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2464   isl::set Domain = Stmt.getDomain();
2465   BasicBlock *BB = Stmt.getEntryBlock();
2466
2467   Loop *L = LI.getLoopFor(BB);
2468
2469   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2470     L = L->getParentLoop();
2471
2472   SmallVector<llvm::Loop *, 8> Loops;
2473
2474   while (L && Stmt.getParent()->getRegion().contains(L)) {
2475     Loops.push_back(L);
2476     L = L->getParentLoop();
2477   }
2478
2479   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2480 }
2481
2482 /// Return the reduction type for a given binary operator.
2483 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2484                                                     const Instruction *Load) {
2485   if (!BinOp)
2486     return MemoryAccess::RT_NONE;
2487   switch (BinOp->getOpcode()) {
2488   case Instruction::FAdd:
2489     if (!BinOp->isFast())
2490       return MemoryAccess::RT_NONE;
2491     [[fallthrough]];
2492   case Instruction::Add:
2493     return MemoryAccess::RT_ADD;
2494   case Instruction::Or:
2495     return MemoryAccess::RT_BOR;
2496   case Instruction::Xor:
2497     return MemoryAccess::RT_BXOR;
2498   case Instruction::And:
2499     return MemoryAccess::RT_BAND;
2500   case Instruction::FMul:
2501     if (!BinOp->isFast())
2502       return MemoryAccess::RT_NONE;
2503     [[fallthrough]];
2504   case Instruction::Mul:
2505     if (DisableMultiplicativeReductions)
2506       return MemoryAccess::RT_NONE;
2507     return MemoryAccess::RT_MUL;
2508   default:
2509     return MemoryAccess::RT_NONE;
2510   }
2511 }
2512
2513 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2514   SmallVector<MemoryAccess *, 2> Loads;
2515   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2516
2517   // First collect candidate load-store reduction chains by iterating over all
2518   // stores and collecting possible reduction loads.
2519   for (MemoryAccess *StoreMA : Stmt) {
2520     if (StoreMA->isRead())
2521       continue;
2522
2523     Loads.clear();
2524     collectCandidateReductionLoads(StoreMA, Loads);
2525     for (MemoryAccess *LoadMA : Loads)
2526       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
2527   }
2528
2529   // Then check each possible candidate pair.
2530   for (const auto &CandidatePair : Candidates) {
2531     bool Valid = true;
2532     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
2533     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
2534
2535     // Skip those with obviously unequal base addresses.
2536     if (!LoadAccs.has_equal_space(StoreAccs)) {
2537       continue;
2538     }
2539
2540     // And check if the remaining for overlap with other memory accesses.
2541     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2542     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
2543     isl::set AllAccs = AllAccsRel.range();
2544
2545     for (MemoryAccess *MA : Stmt) {
2546       if (MA == CandidatePair.first || MA == CandidatePair.second)
2547         continue;
2548
2549       isl::map AccRel =
2550           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
2551       isl::set Accs = AccRel.range();
2552
2553       if (AllAccs.has_equal_space(Accs)) {
2554         isl::set OverlapAccs = Accs.intersect(AllAccs);
2555         Valid = Valid && OverlapAccs.is_empty();
2556       }
2557     }
2558
2559     if (!Valid)
2560       continue;
2561
2562     const LoadInst *Load =
2563         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
2564     MemoryAccess::ReductionType RT =
2565         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
2566
2567     // If no overlapping access was found we mark the load and store as
2568     // reduction like.
2569     CandidatePair.first->markAsReductionLike(RT);
2570     CandidatePair.second->markAsReductionLike(RT);
2571   }
2572 }
2573
2574 void ScopBuilder::verifyInvariantLoads() {
2575   auto &RIL = scop->getRequiredInvariantLoads();
2576   for (LoadInst *LI : RIL) {
2577     assert(LI && scop->contains(LI));
2578     // If there exists a statement in the scop which has a memory access for
2579     // @p LI, then mark this scop as infeasible for optimization.
2580     for (ScopStmt &Stmt : *scop)
2581       if (Stmt.getArrayAccessOrNULLFor(LI)) {
2582         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2583         return;
2584       }
2585   }
2586 }
2587
2588 void ScopBuilder::hoistInvariantLoads() {
2589   if (!PollyInvariantLoadHoisting)
2590     return;
2591
2592   isl::union_map Writes = scop->getWrites();
2593   for (ScopStmt &Stmt : *scop) {
2594     InvariantAccessesTy InvariantAccesses;
2595
2596     for (MemoryAccess *Access : Stmt) {
2597       isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2598       if (!NHCtx.is_null())
2599         InvariantAccesses.push_back({Access, NHCtx});
2600     }
2601
2602     // Transfer the memory access from the statement to the SCoP.
2603     for (auto InvMA : InvariantAccesses)
2604       Stmt.removeMemoryAccess(InvMA.MA);
2605     addInvariantLoads(Stmt, InvariantAccesses);
2606   }
2607 }
2608
2609 /// Check if an access range is too complex.
2610 ///
2611 /// An access range is too complex, if it contains either many disjuncts or
2612 /// very complex expressions. As a simple heuristic, we assume if a set to
2613 /// be too complex if the sum of existentially quantified dimensions and
2614 /// set dimensions is larger than a threshold. This reliably detects both
2615 /// sets with many disjuncts as well as sets with many divisions as they
2616 /// arise in h264.
2617 ///
2618 /// @param AccessRange The range to check for complexity.
2619 ///
2620 /// @returns True if the access range is too complex.
2621 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2622   unsigned NumTotalDims = 0;
2623
2624   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2625     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
2626     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
2627   }
2628
2629   if (NumTotalDims > MaxDimensionsInAccessRange)
2630     return true;
2631
2632   return false;
2633 }
2634
2635 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2636                                                isl::union_map Writes) {
2637   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2638     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2639   }
2640
2641   Value *BaseAddr = MA->getOriginalBaseAddr();
2642   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2643     if (!isa<LoadInst>(BasePtrInst))
2644       return scop->contains(BasePtrInst);
2645
2646   return false;
2647 }
2648
2649 void ScopBuilder::addUserContext() {
2650   if (UserContextStr.empty())
2651     return;
2652
2653   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2654   isl::space Space = scop->getParamSpace();
2655   isl::size SpaceParams = Space.dim(isl::dim::param);
2656   if (unsignedFromIslSize(SpaceParams) !=
2657       unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
2658     std::string SpaceStr = stringFromIslObj(Space, "null");
2659     errs() << "Error: the context provided in -polly-context has not the same "
2660            << "number of dimensions than the computed context. Due to this "
2661            << "mismatch, the -polly-context option is ignored. Please provide "
2662            << "the context in the parameter space: " << SpaceStr << ".\n";
2663     return;
2664   }
2665
2666   for (auto i : rangeIslSize(0, SpaceParams)) {
2667     std::string NameContext =
2668         scop->getContext().get_dim_name(isl::dim::param, i);
2669     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2670
2671     if (NameContext != NameUserContext) {
2672       std::string SpaceStr = stringFromIslObj(Space, "null");
2673       errs() << "Error: the name of dimension " << i
2674              << " provided in -polly-context " << "is '" << NameUserContext
2675              << "', but the name in the computed " << "context is '"
2676              << NameContext << "'. Due to this name mismatch, "
2677              << "the -polly-context option is ignored. Please provide "
2678              << "the context in the parameter space: " << SpaceStr << ".\n";
2679       return;
2680     }
2681
2682     UserContext = UserContext.set_dim_id(isl::dim::param, i,
2683                                          Space.get_dim_id(isl::dim::param, i));
2684   }
2685   isl::set newContext = scop->getContext().intersect(UserContext);
2686   scop->setContext(newContext);
2687 }
2688
2689 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2690                                          isl::union_map Writes) {
2691   // TODO: Loads that are not loop carried, hence are in a statement with
2692   //       zero iterators, are by construction invariant, though we
2693   //       currently "hoist" them anyway. This is necessary because we allow
2694   //       them to be treated as parameters (e.g., in conditions) and our code
2695   //       generation would otherwise use the old value.
2696
2697   auto &Stmt = *Access->getStatement();
2698   BasicBlock *BB = Stmt.getEntryBlock();
2699
2700   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2701       Access->isMemoryIntrinsic())
2702     return {};
2703
2704   // Skip accesses that have an invariant base pointer which is defined but
2705   // not loaded inside the SCoP. This can happened e.g., if a readnone call
2706   // returns a pointer that is used as a base address. However, as we want
2707   // to hoist indirect pointers, we allow the base pointer to be defined in
2708   // the region if it is also a memory access. Each ScopArrayInfo object
2709   // that has a base pointer origin has a base pointer that is loaded and
2710   // that it is invariant, thus it will be hoisted too. However, if there is
2711   // no base pointer origin we check that the base pointer is defined
2712   // outside the region.
2713   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2714   if (hasNonHoistableBasePtrInScop(Access, Writes))
2715     return {};
2716
2717   isl::map AccessRelation = Access->getAccessRelation();
2718   assert(!AccessRelation.is_empty());
2719
2720   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2721     return {};
2722
2723   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2724   isl::set SafeToLoad;
2725
2726   auto &DL = scop->getFunction().getParent()->getDataLayout();
2727   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2728                                   LI->getAlign(), DL)) {
2729     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2730   } else if (BB != LI->getParent()) {
2731     // Skip accesses in non-affine subregions as they might not be executed
2732     // under the same condition as the entry of the non-affine subregion.
2733     return {};
2734   } else {
2735     SafeToLoad = AccessRelation.range();
2736   }
2737
2738   if (isAccessRangeTooComplex(AccessRelation.range()))
2739     return {};
2740
2741   isl::union_map Written = Writes.intersect_range(SafeToLoad);
2742   isl::set WrittenCtx = Written.params();
2743   bool IsWritten = !WrittenCtx.is_empty();
2744
2745   if (!IsWritten)
2746     return WrittenCtx;
2747
2748   WrittenCtx = WrittenCtx.remove_divs();
2749   bool TooComplex =
2750       unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2751   if (TooComplex || !isRequiredInvariantLoad(LI))
2752     return {};
2753
2754   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2755                       AS_RESTRICTION, LI->getParent());
2756   return WrittenCtx;
2757 }
2758
2759 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2760   for (const llvm::Argument &Arg : F.args())
2761     if (&Arg == maybeParam)
2762       return true;
2763
2764   return false;
2765 }
2766
2767 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2768                                      bool StmtInvalidCtxIsEmpty,
2769                                      bool MAInvalidCtxIsEmpty,
2770                                      bool NonHoistableCtxIsEmpty) {
2771   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2772   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2773   if (PollyAllowDereferenceOfAllFunctionParams &&
2774       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2775     return true;
2776
2777   // TODO: We can provide more information for better but more expensive
2778   //       results.
2779   if (!isDereferenceableAndAlignedPointer(
2780           LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2781     return false;
2782
2783   // If the location might be overwritten we do not hoist it unconditionally.
2784   //
2785   // TODO: This is probably too conservative.
2786   if (!NonHoistableCtxIsEmpty)
2787     return false;
2788
2789   // If a dereferenceable load is in a statement that is modeled precisely we
2790   // can hoist it.
2791   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2792     return true;
2793
2794   // Even if the statement is not modeled precisely we can hoist the load if it
2795   // does not involve any parameters that might have been specialized by the
2796   // statement domain.
2797   for (const SCEV *Subscript : MA->subscripts())
2798     if (!isa<SCEVConstant>(Subscript))
2799       return false;
2800   return true;
2801 }
2802
2803 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2804                                     InvariantAccessesTy &InvMAs) {
2805   if (InvMAs.empty())
2806     return;
2807
2808   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2809   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2810
2811   // Get the context under which the statement is executed but remove the error
2812   // context under which this statement is reached.
2813   isl::set DomainCtx = Stmt.getDomain().params();
2814   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
2815
2816   if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
2817     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2818     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
2819     return;
2820   }
2821
2822   // Project out all parameters that relate to loads in the statement. Otherwise
2823   // we could have cyclic dependences on the constraints under which the
2824   // hoisted loads are executed and we could not determine an order in which to
2825   // pre-load them. This happens because not only lower bounds are part of the
2826   // domain but also upper bounds.
2827   for (auto &InvMA : InvMAs) {
2828     auto *MA = InvMA.MA;
2829     Instruction *AccInst = MA->getAccessInstruction();
2830     if (SE.isSCEVable(AccInst->getType())) {
2831       SetVector<Value *> Values;
2832       for (const SCEV *Parameter : scop->parameters()) {
2833         Values.clear();
2834         findValues(Parameter, SE, Values);
2835         if (!Values.count(AccInst))
2836           continue;
2837
2838         isl::id ParamId = scop->getIdForParam(Parameter);
2839         if (!ParamId.is_null()) {
2840           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
2841           if (Dim >= 0)
2842             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
2843         }
2844       }
2845     }
2846   }
2847
2848   for (auto &InvMA : InvMAs) {
2849     auto *MA = InvMA.MA;
2850     isl::set NHCtx = InvMA.NonHoistableCtx;
2851
2852     // Check for another invariant access that accesses the same location as
2853     // MA and if found consolidate them. Otherwise create a new equivalence
2854     // class at the end of InvariantEquivClasses.
2855     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2856     Type *Ty = LInst->getType();
2857     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
2858
2859     isl::set MAInvalidCtx = MA->getInvalidContext();
2860     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2861     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2862
2863     isl::set MACtx;
2864     // Check if we know that this pointer can be speculatively accessed.
2865     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2866                            NonHoistableCtxIsEmpty)) {
2867       MACtx = isl::set::universe(DomainCtx.get_space());
2868     } else {
2869       MACtx = DomainCtx;
2870       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
2871       MACtx = MACtx.gist_params(scop->getContext());
2872     }
2873
2874     bool Consolidated = false;
2875     for (auto &IAClass : scop->invariantEquivClasses()) {
2876       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
2877         continue;
2878
2879       // If the pointer and the type is equal check if the access function wrt.
2880       // to the domain is equal too. It can happen that the domain fixes
2881       // parameter values and these can be different for distinct part of the
2882       // SCoP. If this happens we cannot consolidate the loads but need to
2883       // create a new invariant load equivalence class.
2884       auto &MAs = IAClass.InvariantAccesses;
2885       if (!MAs.empty()) {
2886         auto *LastMA = MAs.front();
2887
2888         isl::set AR = MA->getAccessRelation().range();
2889         isl::set LastAR = LastMA->getAccessRelation().range();
2890         bool SameAR = AR.is_equal(LastAR);
2891
2892         if (!SameAR)
2893           continue;
2894       }
2895
2896       // Add MA to the list of accesses that are in this class.
2897       MAs.push_front(MA);
2898
2899       Consolidated = true;
2900
2901       // Unify the execution context of the class and this statement.
2902       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2903       if (!IAClassDomainCtx.is_null())
2904         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
2905       else
2906         IAClassDomainCtx = MACtx;
2907       IAClass.ExecutionContext = IAClassDomainCtx;
2908       break;
2909     }
2910
2911     if (Consolidated)
2912       continue;
2913
2914     MACtx = MACtx.coalesce();
2915
2916     // If we did not consolidate MA, thus did not find an equivalence class
2917     // for it, we create a new one.
2918     scop->addInvariantEquivClass(
2919         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
2920   }
2921 }
2922
2923 void ScopBuilder::collectCandidateReductionLoads(
2924     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
2925   ScopStmt *Stmt = StoreMA->getStatement();
2926
2927   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
2928   if (!Store)
2929     return;
2930
2931   // Skip if there is not one binary operator between the load and the store
2932   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
2933   if (!BinOp)
2934     return;
2935
2936   // Skip if the binary operators has multiple uses
2937   if (BinOp->getNumUses() != 1)
2938     return;
2939
2940   // Skip if the opcode of the binary operator is not commutative/associative
2941   if (!BinOp->isCommutative() || !BinOp->isAssociative())
2942     return;
2943
2944   // Skip if the binary operator is outside the current SCoP
2945   if (BinOp->getParent() != Store->getParent())
2946     return;
2947
2948   // Skip if it is a multiplicative reduction and we disabled them
2949   if (DisableMultiplicativeReductions &&
2950       (BinOp->getOpcode() == Instruction::Mul ||
2951        BinOp->getOpcode() == Instruction::FMul))
2952     return;
2953
2954   // Check the binary operator operands for a candidate load
2955   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
2956   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
2957   if (!PossibleLoad0 && !PossibleLoad1)
2958     return;
2959
2960   // A load is only a candidate if it cannot escape (thus has only this use)
2961   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
2962     if (PossibleLoad0->getParent() == Store->getParent())
2963       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
2964   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
2965     if (PossibleLoad1->getParent() == Store->getParent())
2966       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
2967 }
2968
2969 /// Find the canonical scop array info object for a set of invariant load
2970 /// hoisted loads. The canonical array is the one that corresponds to the
2971 /// first load in the list of accesses which is used as base pointer of a
2972 /// scop array.
2973 static const ScopArrayInfo *findCanonicalArray(Scop &S,
2974                                                MemoryAccessList &Accesses) {
2975   for (MemoryAccess *Access : Accesses) {
2976     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
2977         Access->getAccessInstruction(), MemoryKind::Array);
2978     if (CanonicalArray)
2979       return CanonicalArray;
2980   }
2981   return nullptr;
2982 }
2983
2984 /// Check if @p Array severs as base array in an invariant load.
2985 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
2986   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
2987     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
2988       if (Access2->getScopArrayInfo() == Array)
2989         return true;
2990   return false;
2991 }
2992
2993 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
2994 /// with a reference to @p New.
2995 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
2996                                  const ScopArrayInfo *New) {
2997   for (ScopStmt &Stmt : S)
2998     for (MemoryAccess *Access : Stmt) {
2999       if (Access->getLatestScopArrayInfo() != Old)
3000         continue;
3001
3002       isl::id Id = New->getBasePtrId();
3003       isl::map Map = Access->getAccessRelation();
3004       Map = Map.set_tuple_id(isl::dim::out, Id);
3005       Access->setAccessRelation(Map);
3006     }
3007 }
3008
3009 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3010   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3011     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3012
3013     const ScopArrayInfo *CanonicalBasePtrSAI =
3014         findCanonicalArray(*scop, BasePtrAccesses);
3015
3016     if (!CanonicalBasePtrSAI)
3017       continue;
3018
3019     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3020       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3021           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3022       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3023           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3024         continue;
3025
3026       // we currently do not canonicalize arrays where some accesses are
3027       // hoisted as invariant loads. If we would, we need to update the access
3028       // function of the invariant loads as well. However, as this is not a
3029       // very common situation, we leave this for now to avoid further
3030       // complexity increases.
3031       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3032         continue;
3033
3034       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3035     }
3036   }
3037 }
3038
3039 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3040   for (MemoryAccess *Access : Stmt.MemAccs) {
3041     Type *ElementType = Access->getElementType();
3042
3043     MemoryKind Ty;
3044     if (Access->isPHIKind())
3045       Ty = MemoryKind::PHI;
3046     else if (Access->isExitPHIKind())
3047       Ty = MemoryKind::ExitPHI;
3048     else if (Access->isValueKind())
3049       Ty = MemoryKind::Value;
3050     else
3051       Ty = MemoryKind::Array;
3052
3053     // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3054     // assumptions which are taken. isl::pw_aff objects are cached internally
3055     // and they are used later by scop.
3056     for (const SCEV *Size : Access->Sizes) {
3057       if (!Size)
3058         continue;
3059       scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3060     }
3061     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3062                                                ElementType, Access->Sizes, Ty);
3063
3064     // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3065     // assumptions which are taken. isl::pw_aff objects are cached internally
3066     // and they are used later by scop.
3067     for (const SCEV *Subscript : Access->subscripts()) {
3068       if (!Access->isAffine() || !Subscript)
3069         continue;
3070       scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3071                      &RecordedAssumptions);
3072     }
3073     Access->buildAccessRelation(SAI);
3074     scop->addAccessData(Access);
3075   }
3076 }
3077
3078 /// Add the minimal/maximal access in @p Set to @p User.
3079 ///
3080 /// @return True if more accesses should be added, false if we reached the
3081 ///         maximal number of run-time checks to be generated.
3082 static bool buildMinMaxAccess(isl::set Set,
3083                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3084   isl::pw_multi_aff MinPMA, MaxPMA;
3085   isl::pw_aff LastDimAff;
3086   isl::aff OneAff;
3087   unsigned Pos;
3088
3089   Set = Set.remove_divs();
3090   polly::simplify(Set);
3091
3092   if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3093     Set = Set.simple_hull();
3094
3095   // Restrict the number of parameters involved in the access as the lexmin/
3096   // lexmax computation will take too long if this number is high.
3097   //
3098   // Experiments with a simple test case using an i7 4800MQ:
3099   //
3100   //  #Parameters involved | Time (in sec)
3101   //            6          |     0.01
3102   //            7          |     0.04
3103   //            8          |     0.12
3104   //            9          |     0.40
3105   //           10          |     1.54
3106   //           11          |     6.78
3107   //           12          |    30.38
3108   //
3109   if (isl_set_n_param(Set.get()) >
3110       static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3111     unsigned InvolvedParams = 0;
3112     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3113       if (Set.involves_dims(isl::dim::param, u, 1))
3114         InvolvedParams++;
3115
3116     if (InvolvedParams > RunTimeChecksMaxParameters)
3117       return false;
3118   }
3119
3120   MinPMA = Set.lexmin_pw_multi_aff();
3121   MaxPMA = Set.lexmax_pw_multi_aff();
3122
3123   MinPMA = MinPMA.coalesce();
3124   MaxPMA = MaxPMA.coalesce();
3125
3126   if (MaxPMA.is_null())
3127     return false;
3128
3129   unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
3130
3131   // Adjust the last dimension of the maximal access by one as we want to
3132   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3133   // we test during code generation might now point after the end of the
3134   // allocated array but we will never dereference it anyway.
3135   assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3136
3137   Pos = MaxOutputSize - 1;
3138   LastDimAff = MaxPMA.at(Pos);
3139   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3140   OneAff = OneAff.add_constant_si(1);
3141   LastDimAff = LastDimAff.add(OneAff);
3142   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3143
3144   if (MinPMA.is_null() || MaxPMA.is_null())
3145     return false;
3146
3147   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3148
3149   return true;
3150 }
3151
3152 /// Wrapper function to calculate minimal/maximal accesses to each array.
3153 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3154                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
3155   MinMaxAccesses.reserve(AliasGroup.size());
3156
3157   isl::union_set Domains = scop->getDomains();
3158   isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
3159
3160   for (MemoryAccess *MA : AliasGroup)
3161     Accesses = Accesses.unite(MA->getAccessRelation());
3162
3163   Accesses = Accesses.intersect_domain(Domains);
3164   isl::union_set Locations = Accesses.range();
3165
3166   bool LimitReached = false;
3167   for (isl::set Set : Locations.get_set_list()) {
3168     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3169     if (LimitReached)
3170       break;
3171   }
3172
3173   return !LimitReached;
3174 }
3175
3176 static isl::set getAccessDomain(MemoryAccess *MA) {
3177   isl::set Domain = MA->getStatement()->getDomain();
3178   Domain = Domain.project_out(isl::dim::set, 0,
3179                               unsignedFromIslSize(Domain.tuple_dim()));
3180   return Domain.reset_tuple_id();
3181 }
3182
3183 bool ScopBuilder::buildAliasChecks() {
3184   if (!PollyUseRuntimeAliasChecks)
3185     return true;
3186
3187   if (buildAliasGroups()) {
3188     // Aliasing assumptions do not go through addAssumption but we still want to
3189     // collect statistics so we do it here explicitly.
3190     if (scop->getAliasGroups().size())
3191       Scop::incrementNumberOfAliasingAssumptions(1);
3192     return true;
3193   }
3194
3195   // If a problem occurs while building the alias groups we need to delete
3196   // this SCoP and pretend it wasn't valid in the first place. To this end
3197   // we make the assumed context infeasible.
3198   scop->invalidate(ALIASING, DebugLoc());
3199
3200   LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3201                     << " could not be created. This SCoP has been dismissed.");
3202   return false;
3203 }
3204
3205 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3206 ScopBuilder::buildAliasGroupsForAccesses() {
3207   BatchAAResults BAA(AA);
3208   AliasSetTracker AST(BAA);
3209
3210   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3211   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3212   for (ScopStmt &Stmt : *scop) {
3213
3214     isl::set StmtDomain = Stmt.getDomain();
3215     bool StmtDomainEmpty = StmtDomain.is_empty();
3216
3217     // Statements with an empty domain will never be executed.
3218     if (StmtDomainEmpty)
3219       continue;
3220
3221     for (MemoryAccess *MA : Stmt) {
3222       if (MA->isScalarKind())
3223         continue;
3224       if (!MA->isRead())
3225         HasWriteAccess.insert(MA->getScopArrayInfo());
3226       MemAccInst Acc(MA->getAccessInstruction());
3227       if (MA->isRead() && isa<MemTransferInst>(Acc))
3228         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3229       else
3230         PtrToAcc[Acc.getPointerOperand()] = MA;
3231       AST.add(Acc);
3232     }
3233   }
3234
3235   AliasGroupVectorTy AliasGroups;
3236   for (AliasSet &AS : AST) {
3237     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3238       continue;
3239     AliasGroupTy AG;
3240     for (auto &PR : AS)
3241       AG.push_back(PtrToAcc[PR.getValue()]);
3242     if (AG.size() < 2)
3243       continue;
3244     AliasGroups.push_back(std::move(AG));
3245   }
3246
3247   return std::make_tuple(AliasGroups, HasWriteAccess);
3248 }
3249
3250 bool ScopBuilder::buildAliasGroups() {
3251   // To create sound alias checks we perform the following steps:
3252   //   o) We partition each group into read only and non read only accesses.
3253   //   o) For each group with more than one base pointer we then compute minimal
3254   //      and maximal accesses to each array of a group in read only and non
3255   //      read only partitions separately.
3256   AliasGroupVectorTy AliasGroups;
3257   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3258
3259   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3260
3261   splitAliasGroupsByDomain(AliasGroups);
3262
3263   for (AliasGroupTy &AG : AliasGroups) {
3264     if (!scop->hasFeasibleRuntimeContext())
3265       return false;
3266
3267     {
3268       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3269       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3270       if (!Valid)
3271         return false;
3272     }
3273     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3274       scop->invalidate(COMPLEXITY, DebugLoc());
3275       return false;
3276     }
3277   }
3278
3279   return true;
3280 }
3281
3282 bool ScopBuilder::buildAliasGroup(
3283     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3284   AliasGroupTy ReadOnlyAccesses;
3285   AliasGroupTy ReadWriteAccesses;
3286   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3287   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3288
3289   if (AliasGroup.size() < 2)
3290     return true;
3291
3292   for (MemoryAccess *Access : AliasGroup) {
3293     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3294                                         Access->getAccessInstruction())
3295              << "Possibly aliasing pointer, use restrict keyword.");
3296     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3297     if (HasWriteAccess.count(Array)) {
3298       ReadWriteArrays.insert(Array);
3299       ReadWriteAccesses.push_back(Access);
3300     } else {
3301       ReadOnlyArrays.insert(Array);
3302       ReadOnlyAccesses.push_back(Access);
3303     }
3304   }
3305
3306   // If there are no read-only pointers, and less than two read-write pointers,
3307   // no alias check is needed.
3308   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3309     return true;
3310
3311   // If there is no read-write pointer, no alias check is needed.
3312   if (ReadWriteArrays.empty())
3313     return true;
3314
3315   // For non-affine accesses, no alias check can be generated as we cannot
3316   // compute a sufficiently tight lower and upper bound: bail out.
3317   for (MemoryAccess *MA : AliasGroup) {
3318     if (!MA->isAffine()) {
3319       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3320                        MA->getAccessInstruction()->getParent());
3321       return false;
3322     }
3323   }
3324
3325   // Ensure that for all memory accesses for which we generate alias checks,
3326   // their base pointers are available.
3327   for (MemoryAccess *MA : AliasGroup) {
3328     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3329       scop->addRequiredInvariantLoad(
3330           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3331   }
3332
3333   //  scop->getAliasGroups().emplace_back();
3334   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3335   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3336   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3337
3338   bool Valid;
3339
3340   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3341
3342   if (!Valid)
3343     return false;
3344
3345   // Bail out if the number of values we need to compare is too large.
3346   // This is important as the number of comparisons grows quadratically with
3347   // the number of values we need to compare.
3348   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3349       RunTimeChecksMaxArraysPerGroup)
3350     return false;
3351
3352   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3353
3354   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3355   if (!Valid)
3356     return false;
3357
3358   return true;
3359 }
3360
3361 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3362   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3363     AliasGroupTy NewAG;
3364     AliasGroupTy &AG = AliasGroups[u];
3365     AliasGroupTy::iterator AGI = AG.begin();
3366     isl::set AGDomain = getAccessDomain(*AGI);
3367     while (AGI != AG.end()) {
3368       MemoryAccess *MA = *AGI;
3369       isl::set MADomain = getAccessDomain(MA);
3370       if (AGDomain.is_disjoint(MADomain)) {
3371         NewAG.push_back(MA);
3372         AGI = AG.erase(AGI);
3373       } else {
3374         AGDomain = AGDomain.unite(MADomain);
3375         AGI++;
3376       }
3377     }
3378     if (NewAG.size() > 1)
3379       AliasGroups.push_back(std::move(NewAG));
3380   }
3381 }
3382
3383 #ifndef NDEBUG
3384 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3385   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3386   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3387   assert(PhysUse.getKind() == VirtUse.getKind());
3388 }
3389
3390 /// Check the consistency of every statement's MemoryAccesses.
3391 ///
3392 /// The check is carried out by expecting the "physical" kind of use (derived
3393 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3394 /// kind of use (derived from a statement's MemoryAccess).
3395 ///
3396 /// The "physical" uses are taken by ensureValueRead to determine whether to
3397 /// create MemoryAccesses. When done, the kind of scalar access should be the
3398 /// same no matter which way it was derived.
3399 ///
3400 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3401 /// can intentionally influence on the kind of uses (not corresponding to the
3402 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3403 /// to pick up the virtual uses. But here in the code generator, this has not
3404 /// happened yet, such that virtual and physical uses are equivalent.
3405 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3406   for (auto *BB : S->getRegion().blocks()) {
3407     for (auto &Inst : *BB) {
3408       auto *Stmt = S->getStmtFor(&Inst);
3409       if (!Stmt)
3410         continue;
3411
3412       if (isIgnoredIntrinsic(&Inst))
3413         continue;
3414
3415       // Branch conditions are encoded in the statement domains.
3416       if (Inst.isTerminator() && Stmt->isBlockStmt())
3417         continue;
3418
3419       // Verify all uses.
3420       for (auto &Op : Inst.operands())
3421         verifyUse(S, Op, LI);
3422
3423       // Stores do not produce values used by other statements.
3424       if (isa<StoreInst>(Inst))
3425         continue;
3426
3427       // For every value defined in the block, also check that a use of that
3428       // value in the same statement would not be an inter-statement use. It can
3429       // still be synthesizable or load-hoisted, but these kind of instructions
3430       // are not directly copied in code-generation.
3431       auto VirtDef =
3432           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3433       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3434              VirtDef.getKind() == VirtualUse::Intra ||
3435              VirtDef.getKind() == VirtualUse::Hoisted);
3436     }
3437   }
3438
3439   if (S->hasSingleExitEdge())
3440     return;
3441
3442   // PHINodes in the SCoP region's exit block are also uses to be checked.
3443   if (!S->getRegion().isTopLevelRegion()) {
3444     for (auto &Inst : *S->getRegion().getExit()) {
3445       if (!isa<PHINode>(Inst))
3446         break;
3447
3448       for (auto &Op : Inst.operands())
3449         verifyUse(S, Op, LI);
3450     }
3451   }
3452 }
3453 #endif
3454
3455 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3456   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3457                       SD.getNextID()));
3458
3459   buildStmts(R);
3460
3461   // Create all invariant load instructions first. These are categorized as
3462   // 'synthesizable', therefore are not part of any ScopStmt but need to be
3463   // created somewhere.
3464   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3465   for (BasicBlock *BB : scop->getRegion().blocks()) {
3466     if (SD.isErrorBlock(*BB, scop->getRegion()))
3467       continue;
3468
3469     for (Instruction &Inst : *BB) {
3470       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3471       if (!Load)
3472         continue;
3473
3474       if (!RIL.count(Load))
3475         continue;
3476
3477       // Invariant loads require a MemoryAccess to be created in some statement.
3478       // It is not important to which statement the MemoryAccess is added
3479       // because it will later be removed from the ScopStmt again. We chose the
3480       // first statement of the basic block the LoadInst is in.
3481       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3482       assert(!List.empty());
3483       ScopStmt *RILStmt = List.front();
3484       buildMemoryAccess(Load, RILStmt);
3485     }
3486   }
3487   buildAccessFunctions();
3488
3489   // In case the region does not have an exiting block we will later (during
3490   // code generation) split the exit block. This will move potential PHI nodes
3491   // from the current exit block into the new region exiting block. Hence, PHI
3492   // nodes that are at this point not part of the region will be.
3493   // To handle these PHI nodes later we will now model their operands as scalar
3494   // accesses. Note that we do not model anything in the exit block if we have
3495   // an exiting block in the region, as there will not be any splitting later.
3496   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3497     for (Instruction &Inst : *R.getExit()) {
3498       PHINode *PHI = dyn_cast<PHINode>(&Inst);
3499       if (!PHI)
3500         break;
3501
3502       buildPHIAccesses(nullptr, PHI, nullptr, true);
3503     }
3504   }
3505
3506   // Create memory accesses for global reads since all arrays are now known.
3507   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3508   for (auto GlobalReadPair : GlobalReads) {
3509     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3510     Instruction *GlobalRead = GlobalReadPair.second;
3511     for (auto *BP : ArrayBasePointers)
3512       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3513                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3514   }
3515
3516   buildInvariantEquivalenceClasses();
3517
3518   /// A map from basic blocks to their invalid domains.
3519   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3520
3521   if (!buildDomains(&R, InvalidDomainMap)) {
3522     LLVM_DEBUG(
3523         dbgs() << "Bailing-out because buildDomains encountered problems\n");
3524     return;
3525   }
3526
3527   addUserAssumptions(AC, InvalidDomainMap);
3528
3529   // Initialize the invalid domain.
3530   for (ScopStmt &Stmt : scop->Stmts)
3531     if (Stmt.isBlockStmt())
3532       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3533     else
3534       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3535           Stmt.getRegion()->getNode())]);
3536
3537   // Remove empty statements.
3538   // Exit early in case there are no executable statements left in this scop.
3539   scop->removeStmtNotInDomainMap();
3540   scop->simplifySCoP(false);
3541   if (scop->isEmpty()) {
3542     LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3543     return;
3544   }
3545
3546   // The ScopStmts now have enough information to initialize themselves.
3547   for (ScopStmt &Stmt : *scop) {
3548     collectSurroundingLoops(Stmt);
3549
3550     buildDomain(Stmt);
3551     buildAccessRelations(Stmt);
3552
3553     if (DetectReductions)
3554       checkForReductions(Stmt);
3555   }
3556
3557   // Check early for a feasible runtime context.
3558   if (!scop->hasFeasibleRuntimeContext()) {
3559     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3560     return;
3561   }
3562
3563   // Check early for profitability. Afterwards it cannot change anymore,
3564   // only the runtime context could become infeasible.
3565   if (!scop->isProfitable(UnprofitableScalarAccs)) {
3566     scop->invalidate(PROFITABLE, DebugLoc());
3567     LLVM_DEBUG(
3568         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3569     return;
3570   }
3571
3572   buildSchedule();
3573
3574   finalizeAccesses();
3575
3576   scop->realignParams();
3577   addUserContext();
3578
3579   // After the context was fully constructed, thus all our knowledge about
3580   // the parameters is in there, we add all recorded assumptions to the
3581   // assumed/invalid context.
3582   addRecordedAssumptions();
3583
3584   scop->simplifyContexts();
3585   if (!buildAliasChecks()) {
3586     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3587     return;
3588   }
3589
3590   hoistInvariantLoads();
3591   canonicalizeDynamicBasePtrs();
3592   verifyInvariantLoads();
3593   scop->simplifySCoP(true);
3594
3595   // Check late for a feasible runtime context because profitability did not
3596   // change.
3597   if (!scop->hasFeasibleRuntimeContext()) {
3598     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3599     return;
3600   }
3601
3602 #ifndef NDEBUG
3603   verifyUses(scop.get(), LI, DT);
3604 #endif
3605 }
3606
3607 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3608                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3609                          ScopDetection &SD, ScalarEvolution &SE,
3610                          OptimizationRemarkEmitter &ORE)
3611     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3612   DebugLoc Beg, End;
3613   auto P = getBBPairForRegion(R);
3614   getDebugLocations(P, Beg, End);
3615
3616   std::string Msg = "SCoP begins here.";
3617   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3618            << Msg);
3619
3620   buildScop(*R, AC);
3621
3622   LLVM_DEBUG(dbgs() << *scop);
3623
3624   if (!scop->hasFeasibleRuntimeContext()) {
3625     InfeasibleScops++;
3626     Msg = "SCoP ends here but was dismissed.";
3627     LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3628     RecordedAssumptions.clear();
3629     scop.reset();
3630   } else {
3631     Msg = "SCoP ends here.";
3632     ++ScopFound;
3633     if (scop->getMaxLoopDepth() > 0)
3634       ++RichScopFound;
3635   }
3636
3637   if (R->isTopLevelRegion())
3638     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3639              << Msg);
3640   else
3641     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3642              << Msg);
3643 }