1 //===- ScopBuilder.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Create a polyhedral description for a static control flow region.
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
14 //===----------------------------------------------------------------------===//
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/Sequence.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/AssumptionCache.h"
33 #include "llvm/Analysis/Delinearization.h"
34 #include "llvm/Analysis/Loads.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
37 #include "llvm/Analysis/RegionInfo.h"
38 #include "llvm/Analysis/RegionIterator.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/raw_ostream.h"
61 using namespace polly
;
63 #include "polly/Support/PollyDebug.h"
64 #define DEBUG_TYPE "polly-scops"
66 STATISTIC(ScopFound
, "Number of valid Scops");
67 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
68 STATISTIC(InfeasibleScops
,
69 "Number of SCoPs with statically infeasible context.");
71 bool polly::ModelReadOnlyScalars
;
73 // The maximal number of dimensions we allow during invariant load construction.
74 // More complex access ranges will result in very high compile time and are also
75 // unlikely to result in good code. This value is very high and should only
76 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
77 static unsigned const MaxDimensionsInAccessRange
= 9;
79 static cl::opt
<bool, true> XModelReadOnlyScalars(
80 "polly-analyze-read-only-scalars",
81 cl::desc("Model read-only scalar values in the scop description"),
82 cl::location(ModelReadOnlyScalars
), cl::Hidden
, cl::init(true),
83 cl::cat(PollyCategory
));
86 OptComputeOut("polly-analysis-computeout",
87 cl::desc("Bound the scop analysis by a maximal amount of "
88 "computational steps (0 means no bound)"),
89 cl::Hidden
, cl::init(800000), cl::cat(PollyCategory
));
91 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
92 "polly-allow-dereference-of-all-function-parameters",
94 "Treat all parameters to functions that are pointers as dereferencible."
95 " This is useful for invariant load hoisting, since we can generate"
96 " less runtime checks. This is only valid if all pointers to functions"
97 " are always initialized, so that Polly can choose to hoist"
99 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
102 PollyIgnoreInbounds("polly-ignore-inbounds",
103 cl::desc("Do not take inbounds assumptions at all"),
104 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
106 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
107 "polly-rtc-max-arrays-per-group",
108 cl::desc("The maximal number of arrays to compare in each alias group."),
109 cl::Hidden
, cl::init(20), cl::cat(PollyCategory
));
111 static cl::opt
<unsigned> RunTimeChecksMaxAccessDisjuncts(
112 "polly-rtc-max-array-disjuncts",
113 cl::desc("The maximal number of disjunts allowed in memory accesses to "
115 cl::Hidden
, cl::init(8), cl::cat(PollyCategory
));
117 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
118 "polly-rtc-max-parameters",
119 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
120 cl::init(8), cl::cat(PollyCategory
));
122 static cl::opt
<bool> UnprofitableScalarAccs(
123 "polly-unprofitable-scalar-accs",
124 cl::desc("Count statements with scalar accesses as not optimizable"),
125 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
127 static cl::opt
<std::string
> UserContextStr(
128 "polly-context", cl::value_desc("isl parameter set"),
129 cl::desc("Provide additional constraints on the context parameters"),
130 cl::init(""), cl::cat(PollyCategory
));
132 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
133 cl::desc("Detect and exploit reductions"),
134 cl::Hidden
, cl::init(true),
135 cl::cat(PollyCategory
));
137 // Multiplicative reductions can be disabled separately as these kind of
138 // operations can overflow easily. Additive reductions and bit operations
139 // are in contrast pretty stable.
140 static cl::opt
<bool> DisableMultiplicativeReductions(
141 "polly-disable-multiplicative-reductions",
142 cl::desc("Disable multiplicative reductions"), cl::Hidden
,
143 cl::cat(PollyCategory
));
145 enum class GranularityChoice
{ BasicBlocks
, ScalarIndependence
, Stores
};
147 static cl::opt
<GranularityChoice
> StmtGranularity(
148 "polly-stmt-granularity",
150 "Algorithm to use for splitting basic blocks into multiple statements"),
151 cl::values(clEnumValN(GranularityChoice::BasicBlocks
, "bb",
152 "One statement per basic block"),
153 clEnumValN(GranularityChoice::ScalarIndependence
, "scalar-indep",
154 "Scalar independence heuristic"),
155 clEnumValN(GranularityChoice::Stores
, "store",
156 "Store-level granularity")),
157 cl::init(GranularityChoice::ScalarIndependence
), cl::cat(PollyCategory
));
159 /// Helper to treat non-affine regions and basic blocks the same.
163 /// Return the block that is the representing block for @p RN.
164 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
165 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
166 : RN
->getNodeAs
<BasicBlock
>();
169 /// Return the @p idx'th block that is executed after @p RN.
170 static inline BasicBlock
*
171 getRegionNodeSuccessor(RegionNode
*RN
, Instruction
*TI
, unsigned idx
) {
172 if (RN
->isSubRegion()) {
174 return RN
->getNodeAs
<Region
>()->getExit();
176 return TI
->getSuccessor(idx
);
179 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
,
181 if (!RN
->isSubRegion())
182 return SD
->isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
);
183 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
184 if (SD
->isErrorBlock(*BB
, R
))
191 /// Create a map to map from a given iteration to a subsequent iteration.
193 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
194 /// is incremented by one and all other dimensions are equal, e.g.,
195 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
197 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
198 static isl::map
createNextIterationMap(isl::space SetSpace
, unsigned Dim
) {
199 isl::space MapSpace
= SetSpace
.map_from_set();
200 isl::map NextIterationMap
= isl::map::universe(MapSpace
);
201 for (unsigned u
: rangeIslSize(0, NextIterationMap
.domain_tuple_dim()))
204 NextIterationMap
.equate(isl::dim::in
, u
, isl::dim::out
, u
);
206 isl::constraint::alloc_equality(isl::local_space(MapSpace
));
207 C
= C
.set_constant_si(1);
208 C
= C
.set_coefficient_si(isl::dim::in
, Dim
, 1);
209 C
= C
.set_coefficient_si(isl::dim::out
, Dim
, -1);
210 NextIterationMap
= NextIterationMap
.add_constraint(C
);
211 return NextIterationMap
;
214 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
215 static isl::set
collectBoundedParts(isl::set S
) {
216 isl::set BoundedParts
= isl::set::empty(S
.get_space());
217 for (isl::basic_set BSet
: S
.get_basic_set_list())
218 if (BSet
.is_bounded())
219 BoundedParts
= BoundedParts
.unite(isl::set(BSet
));
223 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
225 /// @returns A separation of @p S into first an unbounded then a bounded subset,
226 /// both with regards to the dimension @p Dim.
227 static std::pair
<isl::set
, isl::set
> partitionSetParts(isl::set S
,
229 for (unsigned u
: rangeIslSize(0, S
.tuple_dim()))
230 S
= S
.lower_bound_si(isl::dim::set
, u
, 0);
232 unsigned NumDimsS
= unsignedFromIslSize(S
.tuple_dim());
233 isl::set OnlyDimS
= S
;
235 // Remove dimensions that are greater than Dim as they are not interesting.
236 assert(NumDimsS
>= Dim
+ 1);
237 OnlyDimS
= OnlyDimS
.project_out(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
239 // Create artificial parametric upper bounds for dimensions smaller than Dim
240 // as we are not interested in them.
241 OnlyDimS
= OnlyDimS
.insert_dims(isl::dim::param
, 0, Dim
);
243 for (unsigned u
= 0; u
< Dim
; u
++) {
244 isl::constraint C
= isl::constraint::alloc_inequality(
245 isl::local_space(OnlyDimS
.get_space()));
246 C
= C
.set_coefficient_si(isl::dim::param
, u
, 1);
247 C
= C
.set_coefficient_si(isl::dim::set
, u
, -1);
248 OnlyDimS
= OnlyDimS
.add_constraint(C
);
251 // Collect all bounded parts of OnlyDimS.
252 isl::set BoundedParts
= collectBoundedParts(OnlyDimS
);
254 // Create the dimensions greater than Dim again.
256 BoundedParts
.insert_dims(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
258 // Remove the artificial upper bound parameters again.
259 BoundedParts
= BoundedParts
.remove_dims(isl::dim::param
, 0, Dim
);
261 isl::set UnboundedParts
= S
.subtract(BoundedParts
);
262 return std::make_pair(UnboundedParts
, BoundedParts
);
265 /// Create the conditions under which @p L @p Pred @p R is true.
266 static isl::set
buildConditionSet(ICmpInst::Predicate Pred
, isl::pw_aff L
,
269 case ICmpInst::ICMP_EQ
:
271 case ICmpInst::ICMP_NE
:
273 case ICmpInst::ICMP_SLT
:
275 case ICmpInst::ICMP_SLE
:
277 case ICmpInst::ICMP_SGT
:
279 case ICmpInst::ICMP_SGE
:
281 case ICmpInst::ICMP_ULT
:
283 case ICmpInst::ICMP_UGT
:
285 case ICmpInst::ICMP_ULE
:
287 case ICmpInst::ICMP_UGE
:
290 llvm_unreachable("Non integer predicate not supported");
294 isl::set
ScopBuilder::adjustDomainDimensions(isl::set Dom
, Loop
*OldL
,
296 // If the loops are the same there is nothing to do.
300 int OldDepth
= scop
->getRelativeLoopDepth(OldL
);
301 int NewDepth
= scop
->getRelativeLoopDepth(NewL
);
302 // If both loops are non-affine loops there is nothing to do.
303 if (OldDepth
== -1 && NewDepth
== -1)
306 // Distinguish three cases:
307 // 1) The depth is the same but the loops are not.
308 // => One loop was left one was entered.
309 // 2) The depth increased from OldL to NewL.
310 // => One loop was entered, none was left.
311 // 3) The depth decreased from OldL to NewL.
312 // => Loops were left were difference of the depths defines how many.
313 if (OldDepth
== NewDepth
) {
314 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
315 Dom
= Dom
.project_out(isl::dim::set
, NewDepth
, 1);
316 Dom
= Dom
.add_dims(isl::dim::set
, 1);
317 } else if (OldDepth
< NewDepth
) {
318 assert(OldDepth
+ 1 == NewDepth
);
319 auto &R
= scop
->getRegion();
321 assert(NewL
->getParentLoop() == OldL
||
322 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
323 Dom
= Dom
.add_dims(isl::dim::set
, 1);
325 assert(OldDepth
> NewDepth
);
326 unsigned Diff
= OldDepth
- NewDepth
;
327 unsigned NumDim
= unsignedFromIslSize(Dom
.tuple_dim());
328 assert(NumDim
>= Diff
);
329 Dom
= Dom
.project_out(isl::dim::set
, NumDim
- Diff
, Diff
);
335 /// Compute the isl representation for the SCEV @p E in this BB.
337 /// @param BB The BB for which isl representation is to be
339 /// @param InvalidDomainMap A map of BB to their invalid domains.
340 /// @param E The SCEV that should be translated.
341 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
343 /// Note that this function will also adjust the invalid context accordingly.
345 __isl_give isl_pw_aff
*
346 ScopBuilder::getPwAff(BasicBlock
*BB
,
347 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
348 const SCEV
*E
, bool NonNegative
) {
349 PWACtx PWAC
= scop
->getPwAff(E
, BB
, NonNegative
, &RecordedAssumptions
);
350 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
351 return PWAC
.first
.release();
354 /// Build condition sets for unsigned ICmpInst(s).
355 /// Special handling is required for unsigned operands to ensure that if
356 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
357 /// it should wrap around.
359 /// @param IsStrictUpperBound holds information on the predicate relation
360 /// between TestVal and UpperBound, i.e,
361 /// TestVal < UpperBound OR TestVal <= UpperBound
362 __isl_give isl_set
*ScopBuilder::buildUnsignedConditionSets(
363 BasicBlock
*BB
, Value
*Condition
, __isl_keep isl_set
*Domain
,
364 const SCEV
*SCEV_TestVal
, const SCEV
*SCEV_UpperBound
,
365 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
366 bool IsStrictUpperBound
) {
367 // Do not take NonNeg assumption on TestVal
368 // as it might have MSB (Sign bit) set.
369 isl_pw_aff
*TestVal
= getPwAff(BB
, InvalidDomainMap
, SCEV_TestVal
, false);
370 // Take NonNeg assumption on UpperBound.
371 isl_pw_aff
*UpperBound
=
372 getPwAff(BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
376 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
377 isl_pw_aff_get_domain_space(TestVal
))),
378 isl_pw_aff_copy(TestVal
));
381 if (IsStrictUpperBound
)
382 // TestVal < UpperBound
383 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
385 // TestVal <= UpperBound
386 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
388 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
389 return ConsequenceCondSet
;
392 bool ScopBuilder::buildConditionSets(
393 BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
, __isl_keep isl_set
*Domain
,
394 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
395 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
396 Value
*Condition
= getConditionFromTerminator(SI
);
397 assert(Condition
&& "No condition for switch");
399 isl_pw_aff
*LHS
, *RHS
;
400 LHS
= getPwAff(BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
402 unsigned NumSuccessors
= SI
->getNumSuccessors();
403 ConditionSets
.resize(NumSuccessors
);
404 for (auto &Case
: SI
->cases()) {
405 unsigned Idx
= Case
.getSuccessorIndex();
406 ConstantInt
*CaseValue
= Case
.getCaseValue();
408 RHS
= getPwAff(BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
409 isl_set
*CaseConditionSet
=
410 buildConditionSet(ICmpInst::ICMP_EQ
, isl::manage_copy(LHS
),
413 ConditionSets
[Idx
] = isl_set_coalesce(
414 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
417 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
418 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
419 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
421 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
422 ConditionSets
[0] = isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
);
424 isl_pw_aff_free(LHS
);
429 bool ScopBuilder::buildConditionSets(
430 BasicBlock
*BB
, Value
*Condition
, Instruction
*TI
, Loop
*L
,
431 __isl_keep isl_set
*Domain
,
432 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
433 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
434 isl_set
*ConsequenceCondSet
= nullptr;
436 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
437 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
438 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
440 isl_pw_aff
*LHS
= getPwAff(BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
441 isl_pw_aff
*RHS
= getPwAff(BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
442 ConsequenceCondSet
= buildConditionSet(ICmpInst::ICMP_SLE
, isl::manage(LHS
),
445 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
446 auto *Unique
= dyn_cast
<ConstantInt
>(
447 getUniqueNonErrorValue(PHI
, &scop
->getRegion(), &SD
));
449 "A PHINode condition should only be accepted by ScopDetection if "
450 "getUniqueNonErrorValue returns non-NULL");
452 if (Unique
->isZero())
453 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
455 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
456 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
458 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
460 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
461 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
462 auto Opcode
= BinOp
->getOpcode();
463 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
465 bool Valid
= buildConditionSets(BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
466 InvalidDomainMap
, ConditionSets
) &&
467 buildConditionSets(BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
468 InvalidDomainMap
, ConditionSets
);
470 while (!ConditionSets
.empty())
471 isl_set_free(ConditionSets
.pop_back_val());
475 isl_set_free(ConditionSets
.pop_back_val());
476 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
477 isl_set_free(ConditionSets
.pop_back_val());
478 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
480 if (Opcode
== Instruction::And
)
481 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
483 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
485 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
487 "Condition of exiting branch was neither constant nor ICmp!");
489 Region
&R
= scop
->getRegion();
491 isl_pw_aff
*LHS
, *RHS
;
492 // For unsigned comparisons we assumed the signed bit of neither operand
493 // to be set. The comparison is equal to a signed comparison under this
495 bool NonNeg
= ICond
->isUnsigned();
496 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
497 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
499 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, &SD
);
500 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, &SD
);
502 switch (ICond
->getPredicate()) {
503 case ICmpInst::ICMP_ULT
:
505 buildUnsignedConditionSets(BB
, Condition
, Domain
, LeftOperand
,
506 RightOperand
, InvalidDomainMap
, true);
508 case ICmpInst::ICMP_ULE
:
510 buildUnsignedConditionSets(BB
, Condition
, Domain
, LeftOperand
,
511 RightOperand
, InvalidDomainMap
, false);
513 case ICmpInst::ICMP_UGT
:
515 buildUnsignedConditionSets(BB
, Condition
, Domain
, RightOperand
,
516 LeftOperand
, InvalidDomainMap
, true);
518 case ICmpInst::ICMP_UGE
:
520 buildUnsignedConditionSets(BB
, Condition
, Domain
, RightOperand
,
521 LeftOperand
, InvalidDomainMap
, false);
524 LHS
= getPwAff(BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
525 RHS
= getPwAff(BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
526 ConsequenceCondSet
= buildConditionSet(ICond
->getPredicate(),
527 isl::manage(LHS
), isl::manage(RHS
))
533 // If no terminator was given we are only looking for parameter constraints
534 // under which @p Condition is true/false.
536 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
537 assert(ConsequenceCondSet
);
538 ConsequenceCondSet
= isl_set_coalesce(
539 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
541 isl_set
*AlternativeCondSet
= nullptr;
543 isl_set_n_basic_set(ConsequenceCondSet
) >= (int)MaxDisjunctsInDomain
;
546 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
547 isl_set_copy(ConsequenceCondSet
));
549 isl_set_n_basic_set(AlternativeCondSet
) >= (int)MaxDisjunctsInDomain
;
553 scop
->invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
554 TI
? TI
->getParent() : nullptr /* BasicBlock */);
555 isl_set_free(AlternativeCondSet
);
556 isl_set_free(ConsequenceCondSet
);
560 ConditionSets
.push_back(ConsequenceCondSet
);
561 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
566 bool ScopBuilder::buildConditionSets(
567 BasicBlock
*BB
, Instruction
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
568 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
569 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
570 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
571 return buildConditionSets(BB
, SI
, L
, Domain
, InvalidDomainMap
,
574 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
576 if (TI
->getNumSuccessors() == 1) {
577 ConditionSets
.push_back(isl_set_copy(Domain
));
581 Value
*Condition
= getConditionFromTerminator(TI
);
582 assert(Condition
&& "No condition for Terminator");
584 return buildConditionSets(BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
588 bool ScopBuilder::propagateDomainConstraints(
589 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
590 // Iterate over the region R and propagate the domain constrains from the
591 // predecessors to the current node. In contrast to the
592 // buildDomainsWithBranchConstraints function, this one will pull the domain
593 // information from the predecessors instead of pushing it to the successors.
594 // Additionally, we assume the domains to be already present in the domain
595 // map here. However, we iterate again in reverse post order so we know all
596 // predecessors have been visited before a block or non-affine subregion is
599 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
600 for (auto *RN
: RTraversal
) {
601 // Recurse for affine subregions but go on for basic blocks and non-affine
603 if (RN
->isSubRegion()) {
604 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
605 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
606 if (!propagateDomainConstraints(SubRegion
, InvalidDomainMap
))
612 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
613 isl::set
&Domain
= scop
->getOrInitEmptyDomain(BB
);
614 assert(!Domain
.is_null());
616 // Under the union of all predecessor conditions we can reach this block.
617 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
);
618 Domain
= Domain
.intersect(PredDom
).coalesce();
619 Domain
= Domain
.align_params(scop
->getParamSpace());
621 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
622 if (BBLoop
&& BBLoop
->getHeader() == BB
&& scop
->contains(BBLoop
))
623 if (!addLoopBoundsToHeaderDomain(BBLoop
, InvalidDomainMap
))
630 void ScopBuilder::propagateDomainConstraintsToRegionExit(
631 BasicBlock
*BB
, Loop
*BBLoop
,
632 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
,
633 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
634 // Check if the block @p BB is the entry of a region. If so we propagate it's
635 // domain to the exit block of the region. Otherwise we are done.
636 auto *RI
= scop
->getRegion().getRegionInfo();
637 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
638 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
639 if (!BBReg
|| BBReg
->getEntry() != BB
|| !scop
->contains(ExitBB
))
642 // Do not propagate the domain if there is a loop backedge inside the region
643 // that would prevent the exit block from being executed.
645 while (L
&& scop
->contains(L
)) {
646 SmallVector
<BasicBlock
*, 4> LatchBBs
;
647 BBLoop
->getLoopLatches(LatchBBs
);
648 for (auto *LatchBB
: LatchBBs
)
649 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
651 L
= L
->getParentLoop();
654 isl::set Domain
= scop
->getOrInitEmptyDomain(BB
);
655 assert(!Domain
.is_null() && "Cannot propagate a nullptr");
657 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, scop
->getBoxedLoops());
659 // Since the dimensions of @p BB and @p ExitBB might be different we have to
660 // adjust the domain before we can propagate it.
661 isl::set AdjustedDomain
= adjustDomainDimensions(Domain
, BBLoop
, ExitBBLoop
);
662 isl::set
&ExitDomain
= scop
->getOrInitEmptyDomain(ExitBB
);
664 // If the exit domain is not yet created we set it otherwise we "add" the
667 !ExitDomain
.is_null() ? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
669 // Initialize the invalid domain.
670 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
672 FinishedExitBlocks
.insert(ExitBB
);
675 isl::set
ScopBuilder::getPredecessorDomainConstraints(BasicBlock
*BB
,
677 // If @p BB is the ScopEntry we are done
678 if (scop
->getRegion().getEntry() == BB
)
679 return isl::set::universe(Domain
.get_space());
681 // The region info of this function.
682 auto &RI
= *scop
->getRegion().getRegionInfo();
684 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, scop
->getBoxedLoops());
686 // A domain to collect all predecessor domains, thus all conditions under
687 // which the block is executed. To this end we start with the empty domain.
688 isl::set PredDom
= isl::set::empty(Domain
.get_space());
690 // Set of regions of which the entry block domain has been propagated to BB.
691 // all predecessors inside any of the regions can be skipped.
692 SmallSet
<Region
*, 8> PropagatedRegions
;
694 for (auto *PredBB
: predecessors(BB
)) {
696 if (DT
.dominates(BB
, PredBB
))
699 // If the predecessor is in a region we used for propagation we can skip it.
700 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
701 if (llvm::any_of(PropagatedRegions
, PredBBInRegion
)) {
705 // Check if there is a valid region we can use for propagation, thus look
706 // for a region that contains the predecessor and has @p BB as exit block.
707 // FIXME: This was an side-effect-free (and possibly infinite) loop when
708 // committed and seems not to be needed.
709 auto *PredR
= RI
.getRegionFor(PredBB
);
710 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
711 PredR
= PredR
->getParent();
713 // If a valid region for propagation was found use the entry of that region
714 // for propagation, otherwise the PredBB directly.
715 if (PredR
->getExit() == BB
) {
716 PredBB
= PredR
->getEntry();
717 PropagatedRegions
.insert(PredR
);
720 isl::set PredBBDom
= scop
->getDomainConditions(PredBB
);
722 getFirstNonBoxedLoopFor(PredBB
, LI
, scop
->getBoxedLoops());
723 PredBBDom
= adjustDomainDimensions(PredBBDom
, PredBBLoop
, BBLoop
);
724 PredDom
= PredDom
.unite(PredBBDom
);
730 bool ScopBuilder::addLoopBoundsToHeaderDomain(
731 Loop
*L
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
732 int LoopDepth
= scop
->getRelativeLoopDepth(L
);
733 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
735 BasicBlock
*HeaderBB
= L
->getHeader();
736 assert(scop
->isDomainDefined(HeaderBB
));
737 isl::set
&HeaderBBDom
= scop
->getOrInitEmptyDomain(HeaderBB
);
739 isl::map NextIterationMap
=
740 createNextIterationMap(HeaderBBDom
.get_space(), LoopDepth
);
742 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
744 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
745 L
->getLoopLatches(LatchBlocks
);
747 for (BasicBlock
*LatchBB
: LatchBlocks
) {
748 // If the latch is only reachable via error statements we skip it.
749 if (!scop
->isDomainDefined(LatchBB
))
752 isl::set LatchBBDom
= scop
->getDomainConditions(LatchBB
);
754 isl::set BackedgeCondition
;
756 Instruction
*TI
= LatchBB
->getTerminator();
757 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
758 assert(BI
&& "Only branch instructions allowed in loop latches");
760 if (BI
->isUnconditional())
761 BackedgeCondition
= LatchBBDom
;
763 SmallVector
<isl_set
*, 8> ConditionSets
;
764 int idx
= BI
->getSuccessor(0) != HeaderBB
;
765 if (!buildConditionSets(LatchBB
, TI
, L
, LatchBBDom
.get(),
766 InvalidDomainMap
, ConditionSets
))
769 // Free the non back edge condition set as we do not need it.
770 isl_set_free(ConditionSets
[1 - idx
]);
772 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
775 int LatchLoopDepth
= scop
->getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
776 assert(LatchLoopDepth
>= LoopDepth
);
777 BackedgeCondition
= BackedgeCondition
.project_out(
778 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
779 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
782 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
783 for (int i
= 0; i
< LoopDepth
; i
++)
784 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
786 isl::set UnionBackedgeConditionComplement
=
787 UnionBackedgeCondition
.complement();
788 UnionBackedgeConditionComplement
=
789 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
791 UnionBackedgeConditionComplement
=
792 UnionBackedgeConditionComplement
.apply(ForwardMap
);
793 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
794 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
796 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
797 HeaderBBDom
= Parts
.second
;
799 // Check if there is a <nsw> tagged AddRec for this loop and if so do not
800 // require a runtime check. The assumption is already implied by the <nsw>
802 bool RequiresRTC
= !scop
->hasNSWAddRecForLoop(L
);
804 isl::set UnboundedCtx
= Parts
.first
.params();
805 recordAssumption(&RecordedAssumptions
, INFINITELOOP
, UnboundedCtx
,
806 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
,
807 nullptr, RequiresRTC
);
811 void ScopBuilder::buildInvariantEquivalenceClasses() {
812 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
814 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
815 for (LoadInst
*LInst
: RIL
) {
816 const SCEV
*PointerSCEV
= SE
.getSCEV(LInst
->getPointerOperand());
818 Type
*Ty
= LInst
->getType();
819 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
821 scop
->addInvariantLoadMapping(LInst
, ClassRep
);
826 scop
->addInvariantEquivClass(
827 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), {}, Ty
});
831 bool ScopBuilder::buildDomains(
832 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
833 bool IsOnlyNonAffineRegion
= scop
->isNonAffineSubRegion(R
);
834 auto *EntryBB
= R
->getEntry();
835 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
836 int LD
= scop
->getRelativeLoopDepth(L
);
838 isl_set_universe(isl_space_set_alloc(scop
->getIslCtx().get(), 0, LD
+ 1));
840 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
841 isl::set Domain
= isl::manage(S
);
842 scop
->setDomain(EntryBB
, Domain
);
844 if (IsOnlyNonAffineRegion
)
845 return !containsErrorBlock(R
->getNode(), *R
, &SD
);
847 if (!buildDomainsWithBranchConstraints(R
, InvalidDomainMap
))
850 if (!propagateDomainConstraints(R
, InvalidDomainMap
))
853 // Error blocks and blocks dominated by them have been assumed to never be
854 // executed. Representing them in the Scop does not add any value. In fact,
855 // it is likely to cause issues during construction of the ScopStmts. The
856 // contents of error blocks have not been verified to be expressible and
857 // will cause problems when building up a ScopStmt for them.
858 // Furthermore, basic blocks dominated by error blocks may reference
859 // instructions in the error block which, if the error block is not modeled,
860 // can themselves not be constructed properly. To this end we will replace
861 // the domains of error blocks and those only reachable via error blocks
862 // with an empty set. Additionally, we will record for each block under which
863 // parameter combination it would be reached via an error block in its
864 // InvalidDomain. This information is needed during load hoisting.
865 if (!propagateInvalidStmtDomains(R
, InvalidDomainMap
))
871 bool ScopBuilder::buildDomainsWithBranchConstraints(
872 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
873 // To create the domain for each block in R we iterate over all blocks and
874 // subregions in R and propagate the conditions under which the current region
875 // element is executed. To this end we iterate in reverse post order over R as
876 // it ensures that we first visit all predecessors of a region node (either a
877 // basic block or a subregion) before we visit the region node itself.
878 // Initially, only the domain for the SCoP region entry block is set and from
879 // there we propagate the current domain to all successors, however we add the
880 // condition that the successor is actually executed next.
881 // As we are only interested in non-loop carried constraints here we can
882 // simply skip loop back edges.
884 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
885 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
886 for (auto *RN
: RTraversal
) {
887 // Recurse for affine subregions but go on for basic blocks and non-affine
889 if (RN
->isSubRegion()) {
890 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
891 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
892 if (!buildDomainsWithBranchConstraints(SubRegion
, InvalidDomainMap
))
898 if (containsErrorBlock(RN
, scop
->getRegion(), &SD
))
899 scop
->notifyErrorBlock();
902 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
903 Instruction
*TI
= BB
->getTerminator();
905 if (isa
<UnreachableInst
>(TI
))
908 if (!scop
->isDomainDefined(BB
))
910 isl::set Domain
= scop
->getDomainConditions(BB
);
912 scop
->updateMaxLoopDepth(unsignedFromIslSize(Domain
.tuple_dim()));
914 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
915 // Propagate the domain from BB directly to blocks that have a superset
916 // domain, at the moment only region exit nodes of regions that start in BB.
917 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
,
920 // If all successors of BB have been set a domain through the propagation
921 // above we do not need to build condition sets but can just skip this
922 // block. However, it is important to note that this is a local property
923 // with regards to the region @p R. To this end FinishedExitBlocks is a
925 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
926 return FinishedExitBlocks
.count(SuccBB
);
928 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
931 // Build the condition sets for the successor nodes of the current region
932 // node. If it is a non-affine subregion we will always execute the single
933 // exit node, hence the single entry node domain is the condition set. For
934 // basic blocks we use the helper function buildConditionSets.
935 SmallVector
<isl_set
*, 8> ConditionSets
;
936 if (RN
->isSubRegion())
937 ConditionSets
.push_back(Domain
.copy());
938 else if (!buildConditionSets(BB
, TI
, BBLoop
, Domain
.get(), InvalidDomainMap
,
942 // Now iterate over the successors and set their initial domain based on
943 // their condition set. We skip back edges here and have to be careful when
944 // we leave a loop not to keep constraints over a dimension that doesn't
946 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
947 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
948 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
949 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
951 // Skip blocks outside the region.
952 if (!scop
->contains(SuccBB
))
955 // If we propagate the domain of some block to "SuccBB" we do not have to
956 // adjust the domain.
957 if (FinishedExitBlocks
.count(SuccBB
))
961 if (DT
.dominates(SuccBB
, BB
))
965 getFirstNonBoxedLoopFor(SuccBB
, LI
, scop
->getBoxedLoops());
967 CondSet
= adjustDomainDimensions(CondSet
, BBLoop
, SuccBBLoop
);
969 // Set the domain for the successor or merge it with an existing domain in
970 // case there are multiple paths (without loop back edges) to the
972 isl::set
&SuccDomain
= scop
->getOrInitEmptyDomain(SuccBB
);
974 if (!SuccDomain
.is_null()) {
975 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
977 // Initialize the invalid domain.
978 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
979 SuccDomain
= CondSet
;
982 SuccDomain
= SuccDomain
.detect_equalities();
984 // Check if the maximal number of domain disjunctions was reached.
985 // In case this happens we will clean up and bail.
986 if (unsignedFromIslSize(SuccDomain
.n_basic_set()) < MaxDisjunctsInDomain
)
989 scop
->invalidate(COMPLEXITY
, DebugLoc());
990 while (++u
< ConditionSets
.size())
991 isl_set_free(ConditionSets
[u
]);
999 bool ScopBuilder::propagateInvalidStmtDomains(
1000 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
1001 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
1002 for (auto *RN
: RTraversal
) {
1004 // Recurse for affine subregions but go on for basic blocks and non-affine
1006 if (RN
->isSubRegion()) {
1007 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
1008 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
1009 propagateInvalidStmtDomains(SubRegion
, InvalidDomainMap
);
1014 bool ContainsErrorBlock
= containsErrorBlock(RN
, scop
->getRegion(), &SD
);
1015 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
1016 isl::set
&Domain
= scop
->getOrInitEmptyDomain(BB
);
1017 assert(!Domain
.is_null() && "Cannot propagate a nullptr");
1019 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
1021 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
1023 if (!IsInvalidBlock
) {
1024 InvalidDomain
= InvalidDomain
.intersect(Domain
);
1026 InvalidDomain
= Domain
;
1027 isl::set DomPar
= Domain
.params();
1028 recordAssumption(&RecordedAssumptions
, ERRORBLOCK
, DomPar
,
1029 BB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
1030 Domain
= isl::set::empty(Domain
.get_space());
1033 if (InvalidDomain
.is_empty()) {
1034 InvalidDomainMap
[BB
] = InvalidDomain
;
1038 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
1039 auto *TI
= BB
->getTerminator();
1040 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
1041 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
1042 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
1044 // Skip successors outside the SCoP.
1045 if (!scop
->contains(SuccBB
))
1049 if (DT
.dominates(SuccBB
, BB
))
1053 getFirstNonBoxedLoopFor(SuccBB
, LI
, scop
->getBoxedLoops());
1055 auto AdjustedInvalidDomain
=
1056 adjustDomainDimensions(InvalidDomain
, BBLoop
, SuccBBLoop
);
1058 isl::set SuccInvalidDomain
= InvalidDomainMap
[SuccBB
];
1059 SuccInvalidDomain
= SuccInvalidDomain
.unite(AdjustedInvalidDomain
);
1060 SuccInvalidDomain
= SuccInvalidDomain
.coalesce();
1062 InvalidDomainMap
[SuccBB
] = SuccInvalidDomain
;
1064 // Check if the maximal number of domain disjunctions was reached.
1065 // In case this happens we will bail.
1066 if (unsignedFromIslSize(SuccInvalidDomain
.n_basic_set()) <
1067 MaxDisjunctsInDomain
)
1070 InvalidDomainMap
.erase(BB
);
1071 scop
->invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
1075 InvalidDomainMap
[BB
] = InvalidDomain
;
1081 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
1082 Region
*NonAffineSubRegion
,
1084 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1085 // true, are not modeled as ordinary PHI nodes as they are not part of the
1086 // region. However, we model the operands in the predecessor blocks that are
1087 // part of the region as regular scalar accesses.
1089 // If we can synthesize a PHI we can skip it, however only if it is in
1090 // the region. If it is not it can only be in the exit block of the region.
1091 // In this case we model the operands but not the PHI itself.
1092 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
1093 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
1096 // PHI nodes are modeled as if they had been demoted prior to the SCoP
1097 // detection. Hence, the PHI is a load of a new memory location in which the
1098 // incoming value was written at the end of the incoming basic block.
1099 bool OnlyNonAffineSubRegionOperands
= true;
1100 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
1101 Value
*Op
= PHI
->getIncomingValue(u
);
1102 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
1103 ScopStmt
*OpStmt
= scop
->getIncomingStmtFor(PHI
->getOperandUse(u
));
1105 // Do not build PHI dependences inside a non-affine subregion, but make
1106 // sure that the necessary scalar values are still made available.
1107 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
1108 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
1109 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
1110 ensureValueRead(Op
, OpStmt
);
1114 OnlyNonAffineSubRegionOperands
= false;
1115 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
1118 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
1119 addPHIReadAccess(PHIStmt
, PHI
);
1123 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
1124 Instruction
*Inst
) {
1125 assert(!isa
<PHINode
>(Inst
));
1127 // Pull-in required operands.
1128 for (Use
&Op
: Inst
->operands())
1129 ensureValueRead(Op
.get(), UserStmt
);
1132 // Create a sequence of two schedules. Either argument may be null and is
1133 // interpreted as the empty schedule. Can also return null if both schedules are
1135 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
1141 return Prev
.sequence(Succ
);
1144 // Create an isl_multi_union_aff that defines an identity mapping from the
1145 // elements of USet to their N-th dimension.
1149 // Domain: { A[i,j]; B[i,j,k] }
1152 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1154 // @param USet A union set describing the elements for which to generate a
1156 // @param N The dimension to map to.
1157 // @returns A mapping from USet to its N-th dimension.
1158 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, unsigned N
) {
1159 assert(!USet
.is_null());
1160 assert(!USet
.is_empty());
1162 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
1164 for (isl::set S
: USet
.get_set_list()) {
1165 unsigned Dim
= unsignedFromIslSize(S
.tuple_dim());
1167 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
1170 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
1172 Result
= Result
.add_pw_multi_aff(PMA
);
1175 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
1178 void ScopBuilder::buildSchedule() {
1179 Loop
*L
= getLoopSurroundingScop(*scop
, LI
);
1180 LoopStackTy
LoopStack({LoopStackElementTy(L
, {}, 0)});
1181 buildSchedule(scop
->getRegion().getNode(), LoopStack
);
1182 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
1183 scop
->setScheduleTree(LoopStack
[0].Schedule
);
1186 /// To generate a schedule for the elements in a Region we traverse the Region
1187 /// in reverse-post-order and add the contained RegionNodes in traversal order
1188 /// to the schedule of the loop that is currently at the top of the LoopStack.
1189 /// For loop-free codes, this results in a correct sequential ordering.
1200 /// Including loops requires additional processing. Whenever a loop header is
1201 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1202 /// from an empty schedule, we first process all RegionNodes that are within
1203 /// this loop and complete the sequential schedule at this loop-level before
1204 /// processing about any other nodes. To implement this
1205 /// loop-nodes-first-processing, the reverse post-order traversal is
1206 /// insufficient. Hence, we additionally check if the traversal yields
1207 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1208 /// These region-nodes are then queue and only traverse after the all nodes
1209 /// within the current loop have been processed.
1210 void ScopBuilder::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
) {
1211 Loop
*OuterScopLoop
= getLoopSurroundingScop(*scop
, LI
);
1213 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
1214 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
1215 std::deque
<RegionNode
*> DelayList
;
1216 bool LastRNWaiting
= false;
1218 // Iterate over the region @p R in reverse post-order but queue
1219 // sub-regions/blocks iff they are not part of the last encountered but not
1220 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1221 // that we queued the last sub-region/block from the reverse post-order
1222 // iterator. If it is set we have to explore the next sub-region/block from
1223 // the iterator (if any) to guarantee progress. If it is not set we first try
1224 // the next queued sub-region/blocks.
1225 while (!WorkList
.empty() || !DelayList
.empty()) {
1228 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
1229 RN
= WorkList
.front();
1230 WorkList
.pop_front();
1231 LastRNWaiting
= false;
1233 RN
= DelayList
.front();
1234 DelayList
.pop_front();
1237 Loop
*L
= getRegionNodeLoop(RN
, LI
);
1238 if (!scop
->contains(L
))
1241 Loop
*LastLoop
= LoopStack
.back().L
;
1242 if (LastLoop
!= L
) {
1243 if (LastLoop
&& !LastLoop
->contains(L
)) {
1244 LastRNWaiting
= true;
1245 DelayList
.push_back(RN
);
1248 LoopStack
.push_back({L
, {}, 0});
1250 buildSchedule(RN
, LoopStack
);
1254 void ScopBuilder::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
) {
1255 if (RN
->isSubRegion()) {
1256 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
1257 if (!scop
->isNonAffineSubRegion(LocalRegion
)) {
1258 buildSchedule(LocalRegion
, LoopStack
);
1263 assert(LoopStack
.rbegin() != LoopStack
.rend());
1264 auto LoopData
= LoopStack
.rbegin();
1265 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
1267 for (auto *Stmt
: scop
->getStmtListFor(RN
)) {
1268 isl::union_set UDomain
{Stmt
->getDomain()};
1269 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
1270 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
1273 // Check if we just processed the last node in this loop. If we did, finalize
1276 // - adding new schedule dimensions
1277 // - folding the resulting schedule into the parent loop schedule
1278 // - dropping the loop schedule from the LoopStack.
1280 // Then continue to check surrounding loops, which might also have been
1281 // completed by this node.
1282 size_t Dimension
= LoopStack
.size();
1283 while (LoopData
->L
&&
1284 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
1285 isl::schedule Schedule
= LoopData
->Schedule
;
1286 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
1288 assert(std::next(LoopData
) != LoopStack
.rend());
1289 Loop
*L
= LoopData
->L
;
1293 if (!Schedule
.is_null()) {
1294 isl::union_set Domain
= Schedule
.get_domain();
1295 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
1296 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
1298 if (hasDisableAllTransformsHint(L
)) {
1299 /// If any of the loops has a disable_nonforced heuristic, mark the
1300 /// entire SCoP as such. The ISL rescheduler can only reschedule the
1301 /// SCoP in its entirety.
1302 /// TODO: ScopDetection could avoid including such loops or warp them as
1303 /// boxed loop. It still needs to pass-through loop with user-defined
1305 scop
->markDisableHeuristics();
1308 // It is easier to insert the marks here that do it retroactively.
1309 isl::id IslLoopId
= createIslLoopAttr(scop
->getIslCtx(), L
);
1310 if (!IslLoopId
.is_null())
1312 Schedule
.get_root().child(0).insert_mark(IslLoopId
).get_schedule();
1314 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
1317 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
1319 // Now pop all loops processed up there from the LoopStack
1320 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
1323 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
1324 // Check for uses of this instruction outside the scop. Because we do not
1325 // iterate over such instructions and therefore did not "ensure" the existence
1326 // of a write, we must determine such use here.
1327 if (scop
->isEscaping(Inst
))
1328 ensureValueWrite(Inst
);
1331 void ScopBuilder::addRecordedAssumptions() {
1332 for (auto &AS
: llvm::reverse(RecordedAssumptions
)) {
1335 scop
->addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
,
1336 nullptr /* BasicBlock */, AS
.RequiresRTC
);
1340 // If the domain was deleted the assumptions are void.
1341 isl_set
*Dom
= scop
->getDomainConditions(AS
.BB
).release();
1345 // If a basic block was given use its domain to simplify the assumption.
1346 // In case of restrictions we know they only have to hold on the domain,
1347 // thus we can intersect them with the domain of the block. However, for
1348 // assumptions the domain has to imply them, thus:
1350 // Dom => S <==> A v B <==> A - B
1352 // To avoid the complement we will register A - B as a restriction not an
1354 isl_set
*S
= AS
.Set
.copy();
1355 if (AS
.Sign
== AS_RESTRICTION
)
1356 S
= isl_set_params(isl_set_intersect(S
, Dom
));
1357 else /* (AS.Sign == AS_ASSUMPTION) */
1358 S
= isl_set_params(isl_set_subtract(Dom
, S
));
1360 scop
->addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
,
1365 void ScopBuilder::addUserAssumptions(
1366 AssumptionCache
&AC
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
1367 for (auto &Assumption
: AC
.assumptions()) {
1368 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
1369 if (!CI
|| CI
->arg_size() != 1)
1372 bool InScop
= scop
->contains(CI
);
1373 if (!InScop
&& !scop
->isDominatedBy(DT
, CI
->getParent()))
1376 auto *L
= LI
.getLoopFor(CI
->getParent());
1377 auto *Val
= CI
->getArgOperand(0);
1378 ParameterSetTy DetectedParams
;
1379 auto &R
= scop
->getRegion();
1380 if (!isAffineConstraint(Val
, &R
, L
, SE
, DetectedParams
)) {
1382 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
1383 << "Non-affine user assumption ignored.");
1387 // Collect all newly introduced parameters.
1388 ParameterSetTy NewParams
;
1389 for (auto *Param
: DetectedParams
) {
1390 Param
= extractConstantFactor(Param
, SE
).second
;
1391 Param
= scop
->getRepresentingInvariantLoadSCEV(Param
);
1392 if (scop
->isParam(Param
))
1394 NewParams
.insert(Param
);
1397 SmallVector
<isl_set
*, 2> ConditionSets
;
1398 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
1399 BasicBlock
*BB
= InScop
? CI
->getParent() : R
.getEntry();
1400 auto *Dom
= InScop
? isl_set_copy(scop
->getDomainConditions(BB
).get())
1401 : isl_set_copy(scop
->getContext().get());
1402 assert(Dom
&& "Cannot propagate a nullptr.");
1403 bool Valid
= buildConditionSets(BB
, Val
, TI
, L
, Dom
, InvalidDomainMap
,
1410 isl_set
*AssumptionCtx
= nullptr;
1412 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
1413 isl_set_free(ConditionSets
[0]);
1415 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
1416 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
1419 // Project out newly introduced parameters as they are not otherwise useful.
1420 if (!NewParams
.empty()) {
1421 for (isl_size u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
1422 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
1423 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
1426 if (!NewParams
.count(Param
))
1430 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
1433 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
1434 << "Use user assumption: "
1435 << stringFromIslObj(AssumptionCtx
, "null"));
1436 isl::set newContext
=
1437 scop
->getContext().intersect(isl::manage(AssumptionCtx
));
1438 scop
->setContext(newContext
);
1442 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
1443 // Memory builtins are not considered in this function.
1444 if (!Inst
.isLoad() && !Inst
.isStore())
1447 Value
*Val
= Inst
.getValueOperand();
1448 Type
*ElementType
= Val
->getType();
1449 Value
*Address
= Inst
.getPointerOperand();
1450 const SCEV
*AccessFunction
=
1451 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1452 const SCEVUnknown
*BasePointer
=
1453 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1454 enum MemoryAccess::AccessType AccType
=
1455 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1457 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
))
1458 Address
= BitCast
->getOperand(0);
1460 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
1461 if (!GEP
|| DL
.getTypeAllocSize(GEP
->getResultElementType()) !=
1462 DL
.getTypeAllocSize(ElementType
))
1465 SmallVector
<const SCEV
*, 4> Subscripts
;
1466 SmallVector
<int, 4> Sizes
;
1467 getIndexExpressionsFromGEP(SE
, GEP
, Subscripts
, Sizes
);
1468 auto *BasePtr
= GEP
->getOperand(0);
1470 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
1471 BasePtr
= BasePtrCast
->getOperand(0);
1473 // Check for identical base pointers to ensure that we do not miss index
1474 // offsets that have been added before this GEP is applied.
1475 if (BasePtr
!= BasePointer
->getValue())
1478 std::vector
<const SCEV
*> SizesSCEV
;
1480 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1482 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1483 for (auto *Subscript
: Subscripts
) {
1484 InvariantLoadsSetTy AccessILS
;
1485 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
1489 for (LoadInst
*LInst
: AccessILS
)
1490 if (!ScopRIL
.count(LInst
))
1497 SizesSCEV
.push_back(nullptr);
1499 for (auto V
: Sizes
)
1500 SizesSCEV
.push_back(SE
.getSCEV(
1501 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
1503 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1504 true, Subscripts
, SizesSCEV
, Val
);
1508 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
1509 // Memory builtins are not considered by this function.
1510 if (!Inst
.isLoad() && !Inst
.isStore())
1513 if (!PollyDelinearize
)
1516 Value
*Address
= Inst
.getPointerOperand();
1517 Value
*Val
= Inst
.getValueOperand();
1518 Type
*ElementType
= Val
->getType();
1519 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
1520 enum MemoryAccess::AccessType AccType
=
1521 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1523 const SCEV
*AccessFunction
=
1524 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1525 const SCEVUnknown
*BasePointer
=
1526 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1528 assert(BasePointer
&& "Could not find base pointer");
1530 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
1531 auto AccItr
= InsnToMemAcc
.find(Inst
);
1532 if (AccItr
== InsnToMemAcc
.end())
1535 std::vector
<const SCEV
*> Sizes
= {nullptr};
1537 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
1538 AccItr
->second
.Shape
->DelinearizedSizes
.end());
1540 // In case only the element size is contained in the 'Sizes' array, the
1541 // access does not access a real multi-dimensional array. Hence, we allow
1542 // the normal single-dimensional access construction to handle this.
1543 if (Sizes
.size() == 1)
1546 // Remove the element size. This information is already provided by the
1547 // ElementSize parameter. In case the element size of this access and the
1548 // element size used for delinearization differs the delinearization is
1549 // incorrect. Hence, we invalidate the scop.
1551 // TODO: Handle delinearization with differing element sizes.
1552 auto DelinearizedSize
=
1553 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
1555 if (ElementSize
!= DelinearizedSize
)
1556 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
1558 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1559 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
1563 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
1564 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
1566 if (MemIntr
== nullptr)
1569 auto *L
= LI
.getLoopFor(Inst
->getParent());
1570 const SCEV
*LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
1573 // Check if the length val is actually affine or if we overapproximate it
1574 InvariantLoadsSetTy AccessILS
;
1575 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1577 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1578 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
1579 LengthVal
, SE
, &AccessILS
);
1580 for (LoadInst
*LInst
: AccessILS
)
1581 if (!ScopRIL
.count(LInst
))
1582 LengthIsAffine
= false;
1583 if (!LengthIsAffine
)
1584 LengthVal
= nullptr;
1586 auto *DestPtrVal
= MemIntr
->getDest();
1589 const SCEV
*DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
1590 assert(DestAccFunc
);
1591 // Ignore accesses to "NULL".
1592 // TODO: We could use this to optimize the region further, e.g., intersect
1594 // isl_set_complement(isl_set_params(getDomain()))
1595 // as we know it would be undefined to execute this instruction anyway.
1596 if (DestAccFunc
->isZero())
1599 if (auto *U
= dyn_cast
<SCEVUnknown
>(DestAccFunc
)) {
1600 if (isa
<ConstantPointerNull
>(U
->getValue()))
1604 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
1605 assert(DestPtrSCEV
);
1606 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
1607 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
1608 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
1609 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
1610 Inst
.getValueOperand());
1612 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
1616 auto *SrcPtrVal
= MemTrans
->getSource();
1619 const SCEV
*SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
1621 // Ignore accesses to "NULL".
1622 // TODO: See above TODO
1623 if (SrcAccFunc
->isZero())
1626 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
1628 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
1629 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
1630 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
1631 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
1632 Inst
.getValueOperand());
1637 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
1638 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
1643 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
) || isDebugCall(CI
))
1646 const SCEV
*AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
1647 auto *CalledFunction
= CI
->getCalledFunction();
1648 MemoryEffects ME
= AA
.getMemoryEffects(CalledFunction
);
1649 if (ME
.doesNotAccessMemory())
1652 if (ME
.onlyAccessesArgPointees()) {
1653 ModRefInfo ArgMR
= ME
.getModRef(IRMemLocation::ArgMem
);
1655 !isModSet(ArgMR
) ? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
1656 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
1657 for (const auto &Arg
: CI
->args()) {
1658 if (!Arg
->getType()->isPointerTy())
1661 const SCEV
*ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
1662 if (ArgSCEV
->isZero())
1665 if (auto *U
= dyn_cast
<SCEVUnknown
>(ArgSCEV
)) {
1666 if (isa
<ConstantPointerNull
>(U
->getValue()))
1670 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
1671 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
1672 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
1677 if (ME
.onlyReadsMemory()) {
1678 GlobalReads
.emplace_back(Stmt
, CI
);
1684 bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
1685 // Memory builtins are not considered by this function.
1686 if (!Inst
.isLoad() && !Inst
.isStore())
1689 Value
*Address
= Inst
.getPointerOperand();
1690 Value
*Val
= Inst
.getValueOperand();
1691 Type
*ElementType
= Val
->getType();
1692 enum MemoryAccess::AccessType AccType
=
1693 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1695 const SCEV
*AccessFunction
=
1696 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1697 const SCEVUnknown
*BasePointer
=
1698 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1700 assert(BasePointer
&& "Could not find base pointer");
1701 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
1703 // Check if the access depends on a loop contained in a non-affine subregion.
1704 bool isVariantInNonAffineLoop
= false;
1705 SetVector
<const Loop
*> Loops
;
1706 findLoops(AccessFunction
, Loops
);
1707 for (const Loop
*L
: Loops
)
1708 if (Stmt
->contains(L
)) {
1709 isVariantInNonAffineLoop
= true;
1713 InvariantLoadsSetTy AccessILS
;
1715 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1716 bool IsAffine
= !isVariantInNonAffineLoop
&&
1717 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
1718 AccessFunction
, SE
, &AccessILS
);
1720 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1721 for (LoadInst
*LInst
: AccessILS
)
1722 if (!ScopRIL
.count(LInst
))
1725 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
1726 AccType
= MemoryAccess::MAY_WRITE
;
1728 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1729 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
1733 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
1734 if (buildAccessMemIntrinsic(Inst
, Stmt
))
1737 if (buildAccessCallInst(Inst
, Stmt
))
1740 if (buildAccessMultiDimFixed(Inst
, Stmt
))
1743 if (buildAccessMultiDimParam(Inst
, Stmt
))
1746 if (buildAccessSingleDim(Inst
, Stmt
))
1750 "At least one of the buildAccess functions must handled this access, or "
1751 "ScopDetection should have rejected this SCoP");
1754 void ScopBuilder::buildAccessFunctions() {
1755 for (auto &Stmt
: *scop
) {
1756 if (Stmt
.isBlockStmt()) {
1757 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
1761 Region
*R
= Stmt
.getRegion();
1762 for (BasicBlock
*BB
: R
->blocks())
1763 buildAccessFunctions(&Stmt
, *BB
, R
);
1766 // Build write accesses for values that are used after the SCoP.
1767 // The instructions defining them might be synthesizable and therefore not
1768 // contained in any statement, hence we iterate over the original instructions
1769 // to identify all escaping values.
1770 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
1771 for (Instruction
&Inst
: *BB
)
1772 buildEscapingDependences(&Inst
);
1776 bool ScopBuilder::shouldModelInst(Instruction
*Inst
, Loop
*L
) {
1777 return !Inst
->isTerminator() && !isIgnoredIntrinsic(Inst
) &&
1778 !canSynthesize(Inst
, *scop
, &SE
, L
);
1781 /// Generate a name for a statement.
1783 /// @param BB The basic block the statement will represent.
1784 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
1785 /// @param Count The index of the created statement in @p BB.
1786 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1787 /// no suffix will be added.
1788 /// @param IsLast Uses a special indicator for the last statement of a BB.
1789 static std::string
makeStmtName(BasicBlock
*BB
, long BBIdx
, int Count
,
1790 bool IsMain
, bool IsLast
= false) {
1793 if (UseInstructionNames
)
1797 else if (Count
< 26)
1798 Suffix
+= 'a' + Count
;
1800 Suffix
+= std::to_string(Count
);
1802 return getIslCompatibleName("Stmt", BB
, BBIdx
, Suffix
, UseInstructionNames
);
1805 /// Generate a name for a statement that represents a non-affine subregion.
1807 /// @param R The region the statement will represent.
1808 /// @param RIdx The index of the @p R relative to other BBs/regions.
1809 static std::string
makeStmtName(Region
*R
, long RIdx
) {
1810 return getIslCompatibleName("Stmt", R
->getNameStr(), RIdx
, "",
1811 UseInstructionNames
);
1814 void ScopBuilder::buildSequentialBlockStmts(BasicBlock
*BB
, bool SplitOnStore
) {
1815 Loop
*SurroundingLoop
= LI
.getLoopFor(BB
);
1818 long BBIdx
= scop
->getNextStmtIdx();
1819 std::vector
<Instruction
*> Instructions
;
1820 for (Instruction
&Inst
: *BB
) {
1821 if (shouldModelInst(&Inst
, SurroundingLoop
))
1822 Instructions
.push_back(&Inst
);
1823 if (Inst
.getMetadata("polly_split_after") ||
1824 (SplitOnStore
&& isa
<StoreInst
>(Inst
))) {
1825 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
1826 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
1828 Instructions
.clear();
1832 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
1833 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
1836 /// Is @p Inst an ordered instruction?
1838 /// An unordered instruction is an instruction, such that a sequence of
1839 /// unordered instructions can be permuted without changing semantics. Any
1840 /// instruction for which this is not always the case is ordered.
1841 static bool isOrderedInstruction(Instruction
*Inst
) {
1842 return Inst
->mayHaveSideEffects() || Inst
->mayReadOrWriteMemory();
1845 /// Join instructions to the same statement if one uses the scalar result of the
1847 static void joinOperandTree(EquivalenceClasses
<Instruction
*> &UnionFind
,
1848 ArrayRef
<Instruction
*> ModeledInsts
) {
1849 for (Instruction
*Inst
: ModeledInsts
) {
1850 if (isa
<PHINode
>(Inst
))
1853 for (Use
&Op
: Inst
->operands()) {
1854 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
.get());
1858 // Check if OpInst is in the BB and is a modeled instruction.
1859 auto OpVal
= UnionFind
.findValue(OpInst
);
1860 if (OpVal
== UnionFind
.end())
1863 UnionFind
.unionSets(Inst
, OpInst
);
1868 /// Ensure that the order of ordered instructions does not change.
1870 /// If we encounter an ordered instruction enclosed in instructions belonging to
1871 /// a different statement (which might as well contain ordered instructions, but
1872 /// this is not tested here), join them.
1874 joinOrderedInstructions(EquivalenceClasses
<Instruction
*> &UnionFind
,
1875 ArrayRef
<Instruction
*> ModeledInsts
) {
1876 SetVector
<Instruction
*> SeenLeaders
;
1877 for (Instruction
*Inst
: ModeledInsts
) {
1878 if (!isOrderedInstruction(Inst
))
1881 Instruction
*Leader
= UnionFind
.getLeaderValue(Inst
);
1882 // Since previous iterations might have merged sets, some items in
1883 // SeenLeaders are not leaders anymore. However, The new leader of
1884 // previously merged instructions must be one of the former leaders of
1885 // these merged instructions.
1886 bool Inserted
= SeenLeaders
.insert(Leader
);
1890 // Merge statements to close holes. Say, we have already seen statements A
1891 // and B, in this order. Then we see an instruction of A again and we would
1892 // see the pattern "A B A". This function joins all statements until the
1893 // only seen occurrence of A.
1894 for (Instruction
*Prev
: reverse(SeenLeaders
)) {
1895 // We are backtracking from the last element until we see Inst's leader
1896 // in SeenLeaders and merge all into one set. Although leaders of
1897 // instructions change during the execution of this loop, it's irrelevant
1898 // as we are just searching for the element that we already confirmed is
1902 UnionFind
.unionSets(Prev
, Leader
);
1907 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1908 /// the incoming values from this block are executed after the PHI READ.
1910 /// Otherwise it could overwrite the incoming value from before the BB with the
1911 /// value for the next execution. This can happen if the PHI WRITE is added to
1912 /// the statement with the instruction that defines the incoming value (instead
1913 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1914 /// are in order, we put both into the statement. PHI WRITEs are always executed
1915 /// after PHI READs when they are in the same statement.
1917 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1918 /// WRITE is not put into a statement containing the PHI itself. That could also
1920 /// - having all (strongly connected) PHIs in a single statement,
1921 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1922 /// has a chance of being lifted before a PHI by being in a statement with a
1923 /// PHI that comes before in the basic block), or
1924 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1925 static void joinOrderedPHIs(EquivalenceClasses
<Instruction
*> &UnionFind
,
1926 ArrayRef
<Instruction
*> ModeledInsts
) {
1927 for (Instruction
*Inst
: ModeledInsts
) {
1928 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
1932 int Idx
= PHI
->getBasicBlockIndex(PHI
->getParent());
1936 Instruction
*IncomingVal
=
1937 dyn_cast
<Instruction
>(PHI
->getIncomingValue(Idx
));
1941 UnionFind
.unionSets(PHI
, IncomingVal
);
1945 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock
*BB
) {
1946 Loop
*L
= LI
.getLoopFor(BB
);
1948 // Extracting out modeled instructions saves us from checking
1949 // shouldModelInst() repeatedly.
1950 SmallVector
<Instruction
*, 32> ModeledInsts
;
1951 EquivalenceClasses
<Instruction
*> UnionFind
;
1952 Instruction
*MainInst
= nullptr, *MainLeader
= nullptr;
1953 for (Instruction
&Inst
: *BB
) {
1954 if (!shouldModelInst(&Inst
, L
))
1956 ModeledInsts
.push_back(&Inst
);
1957 UnionFind
.insert(&Inst
);
1959 // When a BB is split into multiple statements, the main statement is the
1960 // one containing the 'main' instruction. We select the first instruction
1961 // that is unlikely to be removed (because it has side-effects) as the main
1962 // one. It is used to ensure that at least one statement from the bb has the
1963 // same name as with -polly-stmt-granularity=bb.
1964 if (!MainInst
&& (isa
<StoreInst
>(Inst
) ||
1965 (isa
<CallInst
>(Inst
) && !isa
<IntrinsicInst
>(Inst
))))
1969 joinOperandTree(UnionFind
, ModeledInsts
);
1970 joinOrderedInstructions(UnionFind
, ModeledInsts
);
1971 joinOrderedPHIs(UnionFind
, ModeledInsts
);
1973 // The list of instructions for statement (statement represented by the leader
1975 MapVector
<Instruction
*, std::vector
<Instruction
*>> LeaderToInstList
;
1977 // The order of statements must be preserved w.r.t. their ordered
1978 // instructions. Without this explicit scan, we would also use non-ordered
1979 // instructions (whose order is arbitrary) to determine statement order.
1980 for (Instruction
*Inst
: ModeledInsts
) {
1981 if (!isOrderedInstruction(Inst
))
1984 auto LeaderIt
= UnionFind
.findLeader(Inst
);
1985 if (LeaderIt
== UnionFind
.member_end())
1988 // Insert element for the leader instruction.
1989 (void)LeaderToInstList
[*LeaderIt
];
1992 // Collect the instructions of all leaders. UnionFind's member iterator
1993 // unfortunately are not in any specific order.
1994 for (Instruction
*Inst
: ModeledInsts
) {
1995 auto LeaderIt
= UnionFind
.findLeader(Inst
);
1996 if (LeaderIt
== UnionFind
.member_end())
1999 if (Inst
== MainInst
)
2000 MainLeader
= *LeaderIt
;
2001 std::vector
<Instruction
*> &InstList
= LeaderToInstList
[*LeaderIt
];
2002 InstList
.push_back(Inst
);
2005 // Finally build the statements.
2007 long BBIdx
= scop
->getNextStmtIdx();
2008 for (auto &Instructions
: LeaderToInstList
) {
2009 std::vector
<Instruction
*> &InstList
= Instructions
.second
;
2011 // If there is no main instruction, make the first statement the main.
2012 bool IsMain
= (MainInst
? MainLeader
== Instructions
.first
: Count
== 0);
2014 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, IsMain
);
2015 scop
->addScopStmt(BB
, Name
, L
, std::move(InstList
));
2019 // Unconditionally add an epilogue (last statement). It contains no
2020 // instructions, but holds the PHI write accesses for successor basic blocks,
2021 // if the incoming value is not defined in another statement if the same BB.
2022 // The epilogue becomes the main statement only if there is no other
2023 // statement that could become main.
2024 // The epilogue will be removed if no PHIWrite is added to it.
2025 std::string EpilogueName
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0, true);
2026 scop
->addScopStmt(BB
, EpilogueName
, L
, {});
2029 void ScopBuilder::buildStmts(Region
&SR
) {
2030 if (scop
->isNonAffineSubRegion(&SR
)) {
2031 std::vector
<Instruction
*> Instructions
;
2032 Loop
*SurroundingLoop
=
2033 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
2034 for (Instruction
&Inst
: *SR
.getEntry())
2035 if (shouldModelInst(&Inst
, SurroundingLoop
))
2036 Instructions
.push_back(&Inst
);
2037 long RIdx
= scop
->getNextStmtIdx();
2038 std::string Name
= makeStmtName(&SR
, RIdx
);
2039 scop
->addScopStmt(&SR
, Name
, SurroundingLoop
, Instructions
);
2043 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
2044 if (I
->isSubRegion())
2045 buildStmts(*I
->getNodeAs
<Region
>());
2047 BasicBlock
*BB
= I
->getNodeAs
<BasicBlock
>();
2048 switch (StmtGranularity
) {
2049 case GranularityChoice::BasicBlocks
:
2050 buildSequentialBlockStmts(BB
);
2052 case GranularityChoice::ScalarIndependence
:
2053 buildEqivClassBlockStmts(BB
);
2055 case GranularityChoice::Stores
:
2056 buildSequentialBlockStmts(BB
, true);
2062 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
2063 Region
*NonAffineSubRegion
) {
2066 "The exit BB is the only one that cannot be represented by a statement");
2067 assert(Stmt
->represents(&BB
));
2069 // We do not build access functions for error blocks, as they may contain
2070 // instructions we can not model.
2071 if (SD
.isErrorBlock(BB
, scop
->getRegion()))
2074 auto BuildAccessesForInst
= [this, Stmt
,
2075 NonAffineSubRegion
](Instruction
*Inst
) {
2076 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
2078 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, false);
2080 if (auto MemInst
= MemAccInst::dyn_cast(*Inst
)) {
2081 assert(Stmt
&& "Cannot build access function in non-existing statement");
2082 buildMemoryAccess(MemInst
, Stmt
);
2085 // PHI nodes have already been modeled above and terminators that are
2086 // not part of a non-affine subregion are fully modeled and regenerated
2087 // from the polyhedral domains. Hence, they do not need to be modeled as
2088 // explicit data dependences.
2090 buildScalarDependences(Stmt
, Inst
);
2093 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
2094 bool IsEntryBlock
= (Stmt
->getEntryBlock() == &BB
);
2096 for (Instruction
*Inst
: Stmt
->getInstructions())
2097 BuildAccessesForInst(Inst
);
2098 if (Stmt
->isRegionStmt())
2099 BuildAccessesForInst(BB
.getTerminator());
2101 for (Instruction
&Inst
: BB
) {
2102 if (isIgnoredIntrinsic(&Inst
))
2105 // Invariant loads already have been processed.
2106 if (isa
<LoadInst
>(Inst
) && RIL
.count(cast
<LoadInst
>(&Inst
)))
2109 BuildAccessesForInst(&Inst
);
2114 MemoryAccess
*ScopBuilder::addMemoryAccess(
2115 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
2116 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
2117 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
2119 bool isKnownMustAccess
= false;
2121 // Accesses in single-basic block statements are always executed.
2122 if (Stmt
->isBlockStmt())
2123 isKnownMustAccess
= true;
2125 if (Stmt
->isRegionStmt()) {
2126 // Accesses that dominate the exit block of a non-affine region are always
2127 // executed. In non-affine regions there may exist MemoryKind::Values that
2128 // do not dominate the exit. MemoryKind::Values will always dominate the
2129 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2130 // non-affine region.
2131 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
2132 isKnownMustAccess
= true;
2135 // Non-affine PHI writes do not "happen" at a particular instruction, but
2136 // after exiting the statement. Therefore they are guaranteed to execute and
2137 // overwrite the old value.
2138 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
2139 isKnownMustAccess
= true;
2141 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
2142 AccType
= MemoryAccess::MAY_WRITE
;
2144 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
2145 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
2147 scop
->addAccessFunction(Access
);
2148 Stmt
->addAccess(Access
);
2152 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
2153 MemoryAccess::AccessType AccType
,
2154 Value
*BaseAddress
, Type
*ElementType
,
2156 ArrayRef
<const SCEV
*> Subscripts
,
2157 ArrayRef
<const SCEV
*> Sizes
,
2158 Value
*AccessValue
) {
2159 ArrayBasePointers
.insert(BaseAddress
);
2160 addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
, ElementType
, IsAffine
,
2161 AccessValue
, Subscripts
, Sizes
, MemoryKind::Array
);
2164 /// Check if @p Expr is divisible by @p Size.
2165 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
2170 // Only one factor needs to be divisible.
2171 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
2172 for (const SCEV
*FactorExpr
: MulExpr
->operands())
2173 if (isDivisible(FactorExpr
, Size
, SE
))
2178 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2180 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
2181 for (const SCEV
*OpExpr
: NAryExpr
->operands())
2182 if (!isDivisible(OpExpr
, Size
, SE
))
2187 const SCEV
*SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
2188 const SCEV
*UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
2189 const SCEV
*MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
2190 return MulSCEV
== Expr
;
2193 void ScopBuilder::foldSizeConstantsToRight() {
2194 isl::union_set Accessed
= scop
->getAccesses().range();
2196 for (auto Array
: scop
->arrays()) {
2197 if (Array
->getNumberOfDimensions() <= 1)
2200 isl::space Space
= Array
->getSpace();
2201 Space
= Space
.align_params(Accessed
.get_space());
2203 if (!Accessed
.contains(Space
))
2206 isl::set Elements
= Accessed
.extract_set(Space
);
2207 isl::map Transform
= isl::map::universe(Array
->getSpace().map_from_set());
2209 std::vector
<int> Int
;
2210 unsigned Dims
= unsignedFromIslSize(Elements
.tuple_dim());
2211 for (unsigned i
= 0; i
< Dims
; i
++) {
2212 isl::set DimOnly
= isl::set(Elements
).project_out(isl::dim::set
, 0, i
);
2213 DimOnly
= DimOnly
.project_out(isl::dim::set
, 1, Dims
- i
- 1);
2214 DimOnly
= DimOnly
.lower_bound_si(isl::dim::set
, 0, 0);
2216 isl::basic_set DimHull
= DimOnly
.affine_hull();
2218 if (i
== Dims
- 1) {
2220 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
2224 if (unsignedFromIslSize(DimHull
.dim(isl::dim::div
)) == 1) {
2225 isl::aff Diff
= DimHull
.get_div(0);
2226 isl::val Val
= Diff
.get_denominator_val();
2230 auto ValAPInt
= APIntFromVal(Val
);
2231 if (ValAPInt
.isSignedIntN(32))
2232 ValInt
= ValAPInt
.getSExtValue();
2236 Int
.push_back(ValInt
);
2237 isl::constraint C
= isl::constraint::alloc_equality(
2238 isl::local_space(Transform
.get_space()));
2239 C
= C
.set_coefficient_si(isl::dim::out
, i
, ValInt
);
2240 C
= C
.set_coefficient_si(isl::dim::in
, i
, -1);
2241 Transform
= Transform
.add_constraint(C
);
2245 isl::basic_set ZeroSet
= isl::basic_set(DimHull
);
2246 ZeroSet
= ZeroSet
.fix_si(isl::dim::set
, 0, 0);
2249 if (ZeroSet
.is_equal(DimHull
)) {
2253 Int
.push_back(ValInt
);
2254 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
2257 isl::set MappedElements
= isl::map(Transform
).domain();
2258 if (!Elements
.is_subset(MappedElements
))
2261 bool CanFold
= true;
2265 unsigned NumDims
= Array
->getNumberOfDimensions();
2266 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
2267 if (Int
[0] != Int
[i
] && Int
[i
])
2273 for (auto &Access
: scop
->access_functions())
2274 if (Access
->getScopArrayInfo() == Array
)
2275 Access
->setAccessRelation(
2276 Access
->getAccessRelation().apply_range(Transform
));
2278 std::vector
<const SCEV
*> Sizes
;
2279 for (unsigned i
= 0; i
< NumDims
; i
++) {
2280 auto Size
= Array
->getDimensionSize(i
);
2282 if (i
== NumDims
- 1)
2283 Size
= SE
.getMulExpr(Size
, SE
.getConstant(Size
->getType(), Int
[0]));
2284 Sizes
.push_back(Size
);
2287 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
2291 void ScopBuilder::finalizeAccesses() {
2292 updateAccessDimensionality();
2293 foldSizeConstantsToRight();
2294 foldAccessRelations();
2295 assumeNoOutOfBounds();
2298 void ScopBuilder::updateAccessDimensionality() {
2299 // Check all array accesses for each base pointer and find a (virtual) element
2300 // size for the base pointer that divides all access functions.
2301 for (ScopStmt
&Stmt
: *scop
)
2302 for (MemoryAccess
*Access
: Stmt
) {
2303 if (!Access
->isArrayKind())
2305 ScopArrayInfo
*Array
=
2306 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
2308 if (Array
->getNumberOfDimensions() != 1)
2310 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
2311 const SCEV
*Subscript
= Access
->getSubscript(0);
2312 while (!isDivisible(Subscript
, DivisibleSize
, SE
))
2314 auto *Ty
= IntegerType::get(SE
.getContext(), DivisibleSize
* 8);
2315 Array
->updateElementType(Ty
);
2318 for (auto &Stmt
: *scop
)
2319 for (auto &Access
: Stmt
)
2320 Access
->updateDimensionality();
2323 void ScopBuilder::foldAccessRelations() {
2324 for (auto &Stmt
: *scop
)
2325 for (auto &Access
: Stmt
)
2326 Access
->foldAccessRelation();
2329 void ScopBuilder::assumeNoOutOfBounds() {
2330 if (PollyIgnoreInbounds
)
2332 for (auto &Stmt
: *scop
)
2333 for (auto &Access
: Stmt
) {
2334 isl::set Outside
= Access
->assumeNoOutOfBound();
2335 const auto &Loc
= Access
->getAccessInstruction()
2336 ? Access
->getAccessInstruction()->getDebugLoc()
2338 recordAssumption(&RecordedAssumptions
, INBOUNDS
, Outside
, Loc
,
2343 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
2344 // Find the statement that defines the value of Inst. That statement has to
2345 // write the value to make it available to those statements that read it.
2346 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
2348 // It is possible that the value is synthesizable within a loop (such that it
2349 // is not part of any statement), but not after the loop (where you need the
2350 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2351 // avoid this. In case the IR has no such PHI, use the last statement (where
2352 // the value is synthesizable) to write the value.
2354 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
2356 // Inst not defined within this SCoP.
2360 // Do not process further if the instruction is already written.
2361 if (Stmt
->lookupValueWriteOf(Inst
))
2364 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
2365 true, Inst
, ArrayRef
<const SCEV
*>(),
2366 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
2369 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
2370 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2371 // to be able to replace this one. Currently, there is a split responsibility.
2372 // In a first step, the MemoryAccess is created, but without the
2373 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2374 // AccessRelation is created. At least for scalar accesses, there is no new
2375 // information available at ScopStmt::buildAccessRelations(), so we could
2376 // create the AccessRelation right away. This is what
2377 // ScopStmt::ensureValueRead(Value*) does.
2379 auto *Scope
= UserStmt
->getSurroundingLoop();
2380 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
2381 switch (VUse
.getKind()) {
2382 case VirtualUse::Constant
:
2383 case VirtualUse::Block
:
2384 case VirtualUse::Synthesizable
:
2385 case VirtualUse::Hoisted
:
2386 case VirtualUse::Intra
:
2387 // Uses of these kinds do not need a MemoryAccess.
2390 case VirtualUse::ReadOnly
:
2391 // Add MemoryAccess for invariant values only if requested.
2392 if (!ModelReadOnlyScalars
)
2396 case VirtualUse::Inter
:
2398 // Do not create another MemoryAccess for reloading the value if one already
2400 if (UserStmt
->lookupValueReadOf(V
))
2403 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
2404 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2407 // Inter-statement uses need to write the value in their defining statement.
2409 ensureValueWrite(cast
<Instruction
>(V
));
2414 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
2415 BasicBlock
*IncomingBlock
,
2416 Value
*IncomingValue
, bool IsExitBlock
) {
2417 // As the incoming block might turn out to be an error statement ensure we
2418 // will create an exit PHI SAI object. It is needed during code generation
2419 // and would be created later anyway.
2421 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
2422 MemoryKind::ExitPHI
);
2424 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2425 // from outside the SCoP's region have no statement representation.
2429 // Take care for the incoming value being available in the incoming block.
2430 // This must be done before the check for multiple PHI writes because multiple
2431 // exiting edges from subregion each can be the effective written value of the
2432 // subregion. As such, all of them must be made available in the subregion
2434 ensureValueRead(IncomingValue
, IncomingStmt
);
2436 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2437 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
2438 assert(Acc
->getAccessInstruction() == PHI
);
2439 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
2443 MemoryAccess
*Acc
= addMemoryAccess(
2444 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
2445 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2446 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
2448 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
2451 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
2452 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
2453 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2457 void ScopBuilder::buildDomain(ScopStmt
&Stmt
) {
2458 isl::id Id
= isl::id::alloc(scop
->getIslCtx(), Stmt
.getBaseName(), &Stmt
);
2460 Stmt
.Domain
= scop
->getDomainConditions(&Stmt
);
2461 Stmt
.Domain
= Stmt
.Domain
.set_tuple_id(Id
);
2464 void ScopBuilder::collectSurroundingLoops(ScopStmt
&Stmt
) {
2465 isl::set Domain
= Stmt
.getDomain();
2466 BasicBlock
*BB
= Stmt
.getEntryBlock();
2468 Loop
*L
= LI
.getLoopFor(BB
);
2470 while (L
&& Stmt
.isRegionStmt() && Stmt
.getRegion()->contains(L
))
2471 L
= L
->getParentLoop();
2473 SmallVector
<llvm::Loop
*, 8> Loops
;
2475 while (L
&& Stmt
.getParent()->getRegion().contains(L
)) {
2477 L
= L
->getParentLoop();
2480 Stmt
.NestLoops
.insert(Stmt
.NestLoops
.begin(), Loops
.rbegin(), Loops
.rend());
2483 /// Return the reduction type for a given binary operator.
2484 static MemoryAccess::ReductionType
2485 getReductionType(const BinaryOperator
*BinOp
) {
2487 return MemoryAccess::RT_NONE
;
2488 switch (BinOp
->getOpcode()) {
2489 case Instruction::FAdd
:
2490 if (!BinOp
->isFast())
2491 return MemoryAccess::RT_NONE
;
2493 case Instruction::Add
:
2494 return MemoryAccess::RT_ADD
;
2495 case Instruction::Or
:
2496 return MemoryAccess::RT_BOR
;
2497 case Instruction::Xor
:
2498 return MemoryAccess::RT_BXOR
;
2499 case Instruction::And
:
2500 return MemoryAccess::RT_BAND
;
2501 case Instruction::FMul
:
2502 if (!BinOp
->isFast())
2503 return MemoryAccess::RT_NONE
;
2505 case Instruction::Mul
:
2506 if (DisableMultiplicativeReductions
)
2507 return MemoryAccess::RT_NONE
;
2508 return MemoryAccess::RT_MUL
;
2510 return MemoryAccess::RT_NONE
;
2514 /// @brief Combine two reduction types
2515 static MemoryAccess::ReductionType
2516 combineReductionType(MemoryAccess::ReductionType RT0
,
2517 MemoryAccess::ReductionType RT1
) {
2518 if (RT0
== MemoryAccess::RT_BOTTOM
)
2522 return MemoryAccess::RT_NONE
;
2525 /// True if @p AllAccs intersects with @p MemAccs execpt @p LoadMA and @p
2527 bool hasIntersectingAccesses(isl::set AllAccs
, MemoryAccess
*LoadMA
,
2528 MemoryAccess
*StoreMA
, isl::set Domain
,
2529 SmallVector
<MemoryAccess
*, 8> &MemAccs
) {
2530 bool HasIntersectingAccs
= false;
2531 auto AllAccsNoParams
= AllAccs
.project_out_all_params();
2533 for (MemoryAccess
*MA
: MemAccs
) {
2534 if (MA
== LoadMA
|| MA
== StoreMA
)
2536 auto AccRel
= MA
->getAccessRelation().intersect_domain(Domain
);
2537 auto Accs
= AccRel
.range();
2538 auto AccsNoParams
= Accs
.project_out_all_params();
2540 bool CompatibleSpace
= AllAccsNoParams
.has_equal_space(AccsNoParams
);
2542 if (CompatibleSpace
) {
2543 auto OverlapAccs
= Accs
.intersect(AllAccs
);
2544 bool DoesIntersect
= !OverlapAccs
.is_empty();
2545 HasIntersectingAccs
|= DoesIntersect
;
2548 return HasIntersectingAccs
;
2551 /// Test if the accesses of @p LoadMA and @p StoreMA can form a reduction
2552 bool checkCandidatePairAccesses(MemoryAccess
*LoadMA
, MemoryAccess
*StoreMA
,
2554 SmallVector
<MemoryAccess
*, 8> &MemAccs
) {
2555 // First check if the base value is the same.
2556 isl::map LoadAccs
= LoadMA
->getAccessRelation();
2557 isl::map StoreAccs
= StoreMA
->getAccessRelation();
2558 bool Valid
= LoadAccs
.has_equal_space(StoreAccs
);
2559 POLLY_DEBUG(dbgs() << " == The accessed space below is "
2560 << (Valid
? "" : "not ") << "equal!\n");
2561 POLLY_DEBUG(LoadMA
->dump(); StoreMA
->dump());
2564 // Then check if they actually access the same memory.
2565 isl::map R
= isl::manage(LoadAccs
.copy())
2566 .intersect_domain(isl::manage(Domain
.copy()));
2567 isl::map W
= isl::manage(StoreAccs
.copy())
2568 .intersect_domain(isl::manage(Domain
.copy()));
2569 isl::set RS
= R
.range();
2570 isl::set WS
= W
.range();
2572 isl::set InterAccs
=
2573 isl::manage(RS
.copy()).intersect(isl::manage(WS
.copy()));
2574 Valid
= !InterAccs
.is_empty();
2575 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid
? "" : "not ")
2576 << "overlapping!\n");
2580 // Finally, check if they are no other instructions accessing this memory
2581 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
2582 AllAccsRel
= AllAccsRel
.intersect_domain(Domain
);
2583 isl::set AllAccs
= AllAccsRel
.range();
2584 Valid
= !hasIntersectingAccesses(AllAccs
, LoadMA
, StoreMA
, Domain
, MemAccs
);
2585 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid
? "not " : "")
2586 << "accessed by other instructions!\n");
2592 void ScopBuilder::checkForReductions(ScopStmt
&Stmt
) {
2593 // Perform a data flow analysis on the current scop statement to propagate the
2594 // uses of loaded values. Then check and mark the memory accesses which are
2595 // part of reduction like chains.
2596 // During the data flow analysis we use the State variable to keep track of
2597 // the used "load-instructions" for each instruction in the scop statement.
2598 // This includes the LLVM-IR of the load and the "number of uses" (or the
2599 // number of paths in the operand tree which end in this load).
2600 using StatePairTy
= std::pair
<unsigned, MemoryAccess::ReductionType
>;
2601 using FlowInSetTy
= MapVector
<const LoadInst
*, StatePairTy
>;
2602 using StateTy
= MapVector
<const Instruction
*, FlowInSetTy
>;
2605 // Invalid loads are loads which have uses we can't track properly in the
2606 // state map. This includes loads which:
2607 // o do not form a reduction when they flow into a memory location:
2608 // (e.g., A[i] = B[i] * 3 and A[i] = A[i] * A[i] + A[i])
2609 // o are used by a non binary operator or one which is not commutative
2610 // and associative (e.g., A[i] = A[i] % 3)
2611 // o might change the control flow (e.g., if (A[i]))
2612 // o are used in indirect memory accesses (e.g., A[B[i]])
2613 // o are used outside the current scop statement
2614 SmallPtrSet
<const Instruction
*, 8> InvalidLoads
;
2615 SmallVector
<BasicBlock
*, 8> ScopBlocks
;
2616 BasicBlock
*BB
= Stmt
.getBasicBlock();
2618 ScopBlocks
.push_back(BB
);
2620 for (BasicBlock
*Block
: Stmt
.getRegion()->blocks())
2621 ScopBlocks
.push_back(Block
);
2622 // Run the data flow analysis for all values in the scop statement
2623 for (BasicBlock
*Block
: ScopBlocks
) {
2624 for (Instruction
&Inst
: *Block
) {
2625 if ((Stmt
.getParent())->getStmtFor(&Inst
) != &Stmt
)
2627 bool UsedOutsideStmt
= any_of(Inst
.users(), [&Stmt
](User
*U
) {
2628 return (Stmt
.getParent())->getStmtFor(cast
<Instruction
>(U
)) != &Stmt
;
2630 // Treat loads and stores special
2631 if (auto *Load
= dyn_cast
<LoadInst
>(&Inst
)) {
2632 // Invalidate all loads used which feed into the address of this load.
2633 if (auto *Ptr
= dyn_cast
<Instruction
>(Load
->getPointerOperand())) {
2634 const auto &It
= State
.find(Ptr
);
2635 if (It
!= State
.end())
2636 for (const auto &FlowInSetElem
: It
->second
)
2637 InvalidLoads
.insert(FlowInSetElem
.first
);
2640 // If this load is used outside this stmt, invalidate it.
2641 if (UsedOutsideStmt
)
2642 InvalidLoads
.insert(Load
);
2644 // And indicate that this load uses itself once but without specifying
2645 // any reduction operator.
2647 std::make_pair(Load
, std::make_pair(1, MemoryAccess::RT_BOTTOM
)));
2651 if (auto *Store
= dyn_cast
<StoreInst
>(&Inst
)) {
2652 // Invalidate all loads which feed into the address of this store.
2653 if (const Instruction
*Ptr
=
2654 dyn_cast
<Instruction
>(Store
->getPointerOperand())) {
2655 const auto &It
= State
.find(Ptr
);
2656 if (It
!= State
.end())
2657 for (const auto &FlowInSetElem
: It
->second
)
2658 InvalidLoads
.insert(FlowInSetElem
.first
);
2661 // Propagate the uses of the value operand to the store
2662 if (auto *ValueInst
= dyn_cast
<Instruction
>(Store
->getValueOperand()))
2663 State
.insert(std::make_pair(Store
, State
[ValueInst
]));
2667 // Non load and store instructions are either binary operators or they
2668 // will invalidate all used loads.
2669 auto *BinOp
= dyn_cast
<BinaryOperator
>(&Inst
);
2670 MemoryAccess::ReductionType CurRedType
= getReductionType(BinOp
);
2671 POLLY_DEBUG(dbgs() << "CurInst: " << Inst
<< " RT: " << CurRedType
2674 // Iterate over all operands and propagate their input loads to
2676 FlowInSetTy
&InstInFlowSet
= State
[&Inst
];
2677 for (Use
&Op
: Inst
.operands()) {
2678 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
2682 POLLY_DEBUG(dbgs().indent(4) << "Op Inst: " << *OpInst
<< "\n");
2683 const StateTy::iterator
&OpInFlowSetIt
= State
.find(OpInst
);
2684 if (OpInFlowSetIt
== State
.end())
2687 // Iterate over all the input loads of the operand and combine them
2688 // with the input loads of current instruction.
2689 FlowInSetTy
&OpInFlowSet
= OpInFlowSetIt
->second
;
2690 for (auto &OpInFlowPair
: OpInFlowSet
) {
2691 unsigned OpFlowIn
= OpInFlowPair
.second
.first
;
2692 unsigned InstFlowIn
= InstInFlowSet
[OpInFlowPair
.first
].first
;
2694 MemoryAccess::ReductionType OpRedType
= OpInFlowPair
.second
.second
;
2695 MemoryAccess::ReductionType InstRedType
=
2696 InstInFlowSet
[OpInFlowPair
.first
].second
;
2698 MemoryAccess::ReductionType NewRedType
=
2699 combineReductionType(OpRedType
, CurRedType
);
2701 NewRedType
= combineReductionType(NewRedType
, InstRedType
);
2703 POLLY_DEBUG(dbgs().indent(8) << "OpRedType: " << OpRedType
<< "\n");
2704 POLLY_DEBUG(dbgs().indent(8) << "NewRedType: " << NewRedType
<< "\n");
2705 InstInFlowSet
[OpInFlowPair
.first
] =
2706 std::make_pair(OpFlowIn
+ InstFlowIn
, NewRedType
);
2710 // If this operation is used outside the stmt, invalidate all the loads
2711 // which feed into it.
2712 if (UsedOutsideStmt
)
2713 for (const auto &FlowInSetElem
: InstInFlowSet
)
2714 InvalidLoads
.insert(FlowInSetElem
.first
);
2718 // All used loads are propagated through the whole basic block; now try to
2719 // find valid reduction-like candidate pairs. These load-store pairs fulfill
2720 // all reduction like properties with regards to only this load-store chain.
2721 // We later have to check if the loaded value was invalidated by an
2722 // instruction not in that chain.
2723 using MemAccPair
= std::pair
<MemoryAccess
*, MemoryAccess
*>;
2724 DenseMap
<MemAccPair
, MemoryAccess::ReductionType
> ValidCandidates
;
2726 // Iterate over all write memory accesses and check the loads flowing into
2727 // it for reduction candidate pairs.
2728 for (MemoryAccess
*WriteMA
: Stmt
.MemAccs
) {
2729 if (WriteMA
->isRead())
2731 StoreInst
*St
= dyn_cast
<StoreInst
>(WriteMA
->getAccessInstruction());
2734 assert(!St
->isVolatile());
2736 FlowInSetTy
&MaInFlowSet
= State
[WriteMA
->getAccessInstruction()];
2737 for (auto &MaInFlowSetElem
: MaInFlowSet
) {
2738 MemoryAccess
*ReadMA
= &Stmt
.getArrayAccessFor(MaInFlowSetElem
.first
);
2739 assert(ReadMA
&& "Couldn't find memory access for incoming load!");
2741 POLLY_DEBUG(dbgs() << "'" << *ReadMA
->getAccessInstruction()
2742 << "'\n\tflows into\n'"
2743 << *WriteMA
->getAccessInstruction() << "'\n\t #"
2744 << MaInFlowSetElem
.second
.first
<< " times & RT: "
2745 << MaInFlowSetElem
.second
.second
<< "\n");
2747 MemoryAccess::ReductionType RT
= MaInFlowSetElem
.second
.second
;
2748 unsigned NumAllowableInFlow
= 1;
2750 // We allow the load to flow in exactly once for binary reductions
2751 bool Valid
= (MaInFlowSetElem
.second
.first
== NumAllowableInFlow
);
2753 // Check if we saw a valid chain of binary operators.
2754 Valid
= Valid
&& RT
!= MemoryAccess::RT_BOTTOM
;
2755 Valid
= Valid
&& RT
!= MemoryAccess::RT_NONE
;
2757 // Then check if the memory accesses allow a reduction.
2758 Valid
= Valid
&& checkCandidatePairAccesses(
2759 ReadMA
, WriteMA
, Stmt
.getDomain(), Stmt
.MemAccs
);
2761 // Finally, mark the pair as a candidate or the load as a invalid one.
2763 ValidCandidates
[std::make_pair(ReadMA
, WriteMA
)] = RT
;
2765 InvalidLoads
.insert(ReadMA
->getAccessInstruction());
2769 // In the last step mark the memory accesses of candidate pairs as reduction
2770 // like if the load wasn't marked invalid in the previous step.
2771 for (auto &CandidatePair
: ValidCandidates
) {
2772 MemoryAccess
*LoadMA
= CandidatePair
.first
.first
;
2773 if (InvalidLoads
.count(LoadMA
->getAccessInstruction()))
2776 dbgs() << " Load :: "
2777 << *((CandidatePair
.first
.first
)->getAccessInstruction())
2779 << *((CandidatePair
.first
.second
)->getAccessInstruction())
2780 << "\n are marked as reduction like\n");
2781 MemoryAccess::ReductionType RT
= CandidatePair
.second
;
2782 CandidatePair
.first
.first
->markAsReductionLike(RT
);
2783 CandidatePair
.first
.second
->markAsReductionLike(RT
);
2787 void ScopBuilder::verifyInvariantLoads() {
2788 auto &RIL
= scop
->getRequiredInvariantLoads();
2789 for (LoadInst
*LI
: RIL
) {
2790 assert(LI
&& scop
->contains(LI
));
2791 // If there exists a statement in the scop which has a memory access for
2792 // @p LI, then mark this scop as infeasible for optimization.
2793 for (ScopStmt
&Stmt
: *scop
)
2794 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
2795 scop
->invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
2801 void ScopBuilder::hoistInvariantLoads() {
2802 if (!PollyInvariantLoadHoisting
)
2805 isl::union_map Writes
= scop
->getWrites();
2806 for (ScopStmt
&Stmt
: *scop
) {
2807 InvariantAccessesTy InvariantAccesses
;
2809 for (MemoryAccess
*Access
: Stmt
) {
2810 isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
);
2811 if (!NHCtx
.is_null())
2812 InvariantAccesses
.push_back({Access
, NHCtx
});
2815 // Transfer the memory access from the statement to the SCoP.
2816 for (auto InvMA
: InvariantAccesses
)
2817 Stmt
.removeMemoryAccess(InvMA
.MA
);
2818 addInvariantLoads(Stmt
, InvariantAccesses
);
2822 /// Check if an access range is too complex.
2824 /// An access range is too complex, if it contains either many disjuncts or
2825 /// very complex expressions. As a simple heuristic, we assume if a set to
2826 /// be too complex if the sum of existentially quantified dimensions and
2827 /// set dimensions is larger than a threshold. This reliably detects both
2828 /// sets with many disjuncts as well as sets with many divisions as they
2831 /// @param AccessRange The range to check for complexity.
2833 /// @returns True if the access range is too complex.
2834 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
2835 unsigned NumTotalDims
= 0;
2837 for (isl::basic_set BSet
: AccessRange
.get_basic_set_list()) {
2838 NumTotalDims
+= unsignedFromIslSize(BSet
.dim(isl::dim::div
));
2839 NumTotalDims
+= unsignedFromIslSize(BSet
.dim(isl::dim::set
));
2842 if (NumTotalDims
> MaxDimensionsInAccessRange
)
2848 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
2849 isl::union_map Writes
) {
2850 if (auto *BasePtrMA
= scop
->lookupBasePtrAccess(MA
)) {
2851 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
2854 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
2855 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
2856 if (!isa
<LoadInst
>(BasePtrInst
))
2857 return scop
->contains(BasePtrInst
);
2862 void ScopBuilder::addUserContext() {
2863 if (UserContextStr
.empty())
2866 isl::set UserContext
= isl::set(scop
->getIslCtx(), UserContextStr
.c_str());
2867 isl::space Space
= scop
->getParamSpace();
2868 isl::size SpaceParams
= Space
.dim(isl::dim::param
);
2869 if (unsignedFromIslSize(SpaceParams
) !=
2870 unsignedFromIslSize(UserContext
.dim(isl::dim::param
))) {
2871 std::string SpaceStr
= stringFromIslObj(Space
, "null");
2872 errs() << "Error: the context provided in -polly-context has not the same "
2873 << "number of dimensions than the computed context. Due to this "
2874 << "mismatch, the -polly-context option is ignored. Please provide "
2875 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2879 for (auto i
: rangeIslSize(0, SpaceParams
)) {
2880 std::string NameContext
=
2881 scop
->getContext().get_dim_name(isl::dim::param
, i
);
2882 std::string NameUserContext
= UserContext
.get_dim_name(isl::dim::param
, i
);
2884 if (NameContext
!= NameUserContext
) {
2885 std::string SpaceStr
= stringFromIslObj(Space
, "null");
2886 errs() << "Error: the name of dimension " << i
2887 << " provided in -polly-context "
2888 << "is '" << NameUserContext
<< "', but the name in the computed "
2889 << "context is '" << NameContext
2890 << "'. Due to this name mismatch, "
2891 << "the -polly-context option is ignored. Please provide "
2892 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2896 UserContext
= UserContext
.set_dim_id(isl::dim::param
, i
,
2897 Space
.get_dim_id(isl::dim::param
, i
));
2899 isl::set newContext
= scop
->getContext().intersect(UserContext
);
2900 scop
->setContext(newContext
);
2903 isl::set
ScopBuilder::getNonHoistableCtx(MemoryAccess
*Access
,
2904 isl::union_map Writes
) {
2905 // TODO: Loads that are not loop carried, hence are in a statement with
2906 // zero iterators, are by construction invariant, though we
2907 // currently "hoist" them anyway. This is necessary because we allow
2908 // them to be treated as parameters (e.g., in conditions) and our code
2909 // generation would otherwise use the old value.
2911 auto &Stmt
= *Access
->getStatement();
2912 BasicBlock
*BB
= Stmt
.getEntryBlock();
2914 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
2915 Access
->isMemoryIntrinsic())
2918 // Skip accesses that have an invariant base pointer which is defined but
2919 // not loaded inside the SCoP. This can happened e.g., if a readnone call
2920 // returns a pointer that is used as a base address. However, as we want
2921 // to hoist indirect pointers, we allow the base pointer to be defined in
2922 // the region if it is also a memory access. Each ScopArrayInfo object
2923 // that has a base pointer origin has a base pointer that is loaded and
2924 // that it is invariant, thus it will be hoisted too. However, if there is
2925 // no base pointer origin we check that the base pointer is defined
2926 // outside the region.
2927 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
2928 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
2931 isl::map AccessRelation
= Access
->getAccessRelation();
2932 assert(!AccessRelation
.is_empty());
2934 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
2937 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
2938 isl::set SafeToLoad
;
2940 auto &DL
= scop
->getFunction().getDataLayout();
2941 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getType(),
2942 LI
->getAlign(), DL
, nullptr)) {
2943 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
2944 } else if (BB
!= LI
->getParent()) {
2945 // Skip accesses in non-affine subregions as they might not be executed
2946 // under the same condition as the entry of the non-affine subregion.
2949 SafeToLoad
= AccessRelation
.range();
2952 if (isAccessRangeTooComplex(AccessRelation
.range()))
2955 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
2956 isl::set WrittenCtx
= Written
.params();
2957 bool IsWritten
= !WrittenCtx
.is_empty();
2962 WrittenCtx
= WrittenCtx
.remove_divs();
2964 unsignedFromIslSize(WrittenCtx
.n_basic_set()) >= MaxDisjunctsInDomain
;
2965 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
2968 scop
->addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(),
2969 AS_RESTRICTION
, LI
->getParent());
2973 static bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
2974 for (const llvm::Argument
&Arg
: F
.args())
2975 if (&Arg
== maybeParam
)
2981 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess
*MA
,
2982 bool StmtInvalidCtxIsEmpty
,
2983 bool MAInvalidCtxIsEmpty
,
2984 bool NonHoistableCtxIsEmpty
) {
2985 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
2986 const DataLayout
&DL
= LInst
->getDataLayout();
2987 if (PollyAllowDereferenceOfAllFunctionParams
&&
2988 isAParameter(LInst
->getPointerOperand(), scop
->getFunction()))
2991 // TODO: We can provide more information for better but more expensive
2993 if (!isDereferenceableAndAlignedPointer(
2994 LInst
->getPointerOperand(), LInst
->getType(), LInst
->getAlign(), DL
))
2997 // If the location might be overwritten we do not hoist it unconditionally.
2999 // TODO: This is probably too conservative.
3000 if (!NonHoistableCtxIsEmpty
)
3003 // If a dereferenceable load is in a statement that is modeled precisely we
3005 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
3008 // Even if the statement is not modeled precisely we can hoist the load if it
3009 // does not involve any parameters that might have been specialized by the
3010 // statement domain.
3011 for (const SCEV
*Subscript
: MA
->subscripts())
3012 if (!isa
<SCEVConstant
>(Subscript
))
3017 void ScopBuilder::addInvariantLoads(ScopStmt
&Stmt
,
3018 InvariantAccessesTy
&InvMAs
) {
3022 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
3023 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
3025 // Get the context under which the statement is executed but remove the error
3026 // context under which this statement is reached.
3027 isl::set DomainCtx
= Stmt
.getDomain().params();
3028 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
3030 if (unsignedFromIslSize(DomainCtx
.n_basic_set()) >= MaxDisjunctsInDomain
) {
3031 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
3032 scop
->invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
3036 // Project out all parameters that relate to loads in the statement. Otherwise
3037 // we could have cyclic dependences on the constraints under which the
3038 // hoisted loads are executed and we could not determine an order in which to
3039 // pre-load them. This happens because not only lower bounds are part of the
3040 // domain but also upper bounds.
3041 for (auto &InvMA
: InvMAs
) {
3042 auto *MA
= InvMA
.MA
;
3043 Instruction
*AccInst
= MA
->getAccessInstruction();
3044 if (SE
.isSCEVable(AccInst
->getType())) {
3045 SetVector
<Value
*> Values
;
3046 for (const SCEV
*Parameter
: scop
->parameters()) {
3048 findValues(Parameter
, SE
, Values
);
3049 if (!Values
.count(AccInst
))
3052 isl::id ParamId
= scop
->getIdForParam(Parameter
);
3053 if (!ParamId
.is_null()) {
3054 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
3056 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
3062 for (auto &InvMA
: InvMAs
) {
3063 auto *MA
= InvMA
.MA
;
3064 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
3066 // Check for another invariant access that accesses the same location as
3067 // MA and if found consolidate them. Otherwise create a new equivalence
3068 // class at the end of InvariantEquivClasses.
3069 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
3070 Type
*Ty
= LInst
->getType();
3071 const SCEV
*PointerSCEV
= SE
.getSCEV(LInst
->getPointerOperand());
3073 isl::set MAInvalidCtx
= MA
->getInvalidContext();
3074 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
3075 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
3078 // Check if we know that this pointer can be speculatively accessed.
3079 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
3080 NonHoistableCtxIsEmpty
)) {
3081 MACtx
= isl::set::universe(DomainCtx
.get_space());
3084 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
3085 MACtx
= MACtx
.gist_params(scop
->getContext());
3088 bool Consolidated
= false;
3089 for (auto &IAClass
: scop
->invariantEquivClasses()) {
3090 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
3093 // If the pointer and the type is equal check if the access function wrt.
3094 // to the domain is equal too. It can happen that the domain fixes
3095 // parameter values and these can be different for distinct part of the
3096 // SCoP. If this happens we cannot consolidate the loads but need to
3097 // create a new invariant load equivalence class.
3098 auto &MAs
= IAClass
.InvariantAccesses
;
3100 auto *LastMA
= MAs
.front();
3102 isl::set AR
= MA
->getAccessRelation().range();
3103 isl::set LastAR
= LastMA
->getAccessRelation().range();
3104 bool SameAR
= AR
.is_equal(LastAR
);
3110 // Add MA to the list of accesses that are in this class.
3113 Consolidated
= true;
3115 // Unify the execution context of the class and this statement.
3116 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
3117 if (!IAClassDomainCtx
.is_null())
3118 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
3120 IAClassDomainCtx
= MACtx
;
3121 IAClass
.ExecutionContext
= IAClassDomainCtx
;
3128 MACtx
= MACtx
.coalesce();
3130 // If we did not consolidate MA, thus did not find an equivalence class
3131 // for it, we create a new one.
3132 scop
->addInvariantEquivClass(
3133 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
3137 /// Find the canonical scop array info object for a set of invariant load
3138 /// hoisted loads. The canonical array is the one that corresponds to the
3139 /// first load in the list of accesses which is used as base pointer of a
3141 static const ScopArrayInfo
*findCanonicalArray(Scop
&S
,
3142 MemoryAccessList
&Accesses
) {
3143 for (MemoryAccess
*Access
: Accesses
) {
3144 const ScopArrayInfo
*CanonicalArray
= S
.getScopArrayInfoOrNull(
3145 Access
->getAccessInstruction(), MemoryKind::Array
);
3147 return CanonicalArray
;
3152 /// Check if @p Array severs as base array in an invariant load.
3153 static bool isUsedForIndirectHoistedLoad(Scop
&S
, const ScopArrayInfo
*Array
) {
3154 for (InvariantEquivClassTy
&EqClass2
: S
.getInvariantAccesses())
3155 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
3156 if (Access2
->getScopArrayInfo() == Array
)
3161 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3162 /// with a reference to @p New.
3163 static void replaceBasePtrArrays(Scop
&S
, const ScopArrayInfo
*Old
,
3164 const ScopArrayInfo
*New
) {
3165 for (ScopStmt
&Stmt
: S
)
3166 for (MemoryAccess
*Access
: Stmt
) {
3167 if (Access
->getLatestScopArrayInfo() != Old
)
3170 isl::id Id
= New
->getBasePtrId();
3171 isl::map Map
= Access
->getAccessRelation();
3172 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3173 Access
->setAccessRelation(Map
);
3177 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3178 for (InvariantEquivClassTy
&EqClass
: scop
->InvariantEquivClasses
) {
3179 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3181 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3182 findCanonicalArray(*scop
, BasePtrAccesses
);
3184 if (!CanonicalBasePtrSAI
)
3187 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3188 const ScopArrayInfo
*BasePtrSAI
= scop
->getScopArrayInfoOrNull(
3189 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3190 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3191 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3194 // we currently do not canonicalize arrays where some accesses are
3195 // hoisted as invariant loads. If we would, we need to update the access
3196 // function of the invariant loads as well. However, as this is not a
3197 // very common situation, we leave this for now to avoid further
3198 // complexity increases.
3199 if (isUsedForIndirectHoistedLoad(*scop
, BasePtrSAI
))
3202 replaceBasePtrArrays(*scop
, BasePtrSAI
, CanonicalBasePtrSAI
);
3207 void ScopBuilder::buildAccessRelations(ScopStmt
&Stmt
) {
3208 for (MemoryAccess
*Access
: Stmt
.MemAccs
) {
3209 Type
*ElementType
= Access
->getElementType();
3212 if (Access
->isPHIKind())
3213 Ty
= MemoryKind::PHI
;
3214 else if (Access
->isExitPHIKind())
3215 Ty
= MemoryKind::ExitPHI
;
3216 else if (Access
->isValueKind())
3217 Ty
= MemoryKind::Value
;
3219 Ty
= MemoryKind::Array
;
3221 // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3222 // assumptions which are taken. isl::pw_aff objects are cached internally
3223 // and they are used later by scop.
3224 for (const SCEV
*Size
: Access
->Sizes
) {
3227 scop
->getPwAff(Size
, nullptr, false, &RecordedAssumptions
);
3229 auto *SAI
= scop
->getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
3230 ElementType
, Access
->Sizes
, Ty
);
3232 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3233 // assumptions which are taken. isl::pw_aff objects are cached internally
3234 // and they are used later by scop.
3235 for (const SCEV
*Subscript
: Access
->subscripts()) {
3236 if (!Access
->isAffine() || !Subscript
)
3238 scop
->getPwAff(Subscript
, Stmt
.getEntryBlock(), false,
3239 &RecordedAssumptions
);
3241 Access
->buildAccessRelation(SAI
);
3242 scop
->addAccessData(Access
);
3246 /// Add the minimal/maximal access in @p Set to @p User.
3248 /// @return True if more accesses should be added, false if we reached the
3249 /// maximal number of run-time checks to be generated.
3250 static bool buildMinMaxAccess(isl::set Set
,
3251 Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
3252 isl::pw_multi_aff MinPMA
, MaxPMA
;
3253 isl::pw_aff LastDimAff
;
3257 Set
= Set
.remove_divs();
3258 polly::simplify(Set
);
3260 if (unsignedFromIslSize(Set
.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts
)
3261 Set
= Set
.simple_hull();
3263 // Restrict the number of parameters involved in the access as the lexmin/
3264 // lexmax computation will take too long if this number is high.
3266 // Experiments with a simple test case using an i7 4800MQ:
3268 // #Parameters involved | Time (in sec)
3277 if (isl_set_n_param(Set
.get()) >
3278 static_cast<isl_size
>(RunTimeChecksMaxParameters
)) {
3279 unsigned InvolvedParams
= 0;
3280 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
3281 if (Set
.involves_dims(isl::dim::param
, u
, 1))
3284 if (InvolvedParams
> RunTimeChecksMaxParameters
)
3288 MinPMA
= Set
.lexmin_pw_multi_aff();
3289 MaxPMA
= Set
.lexmax_pw_multi_aff();
3291 MinPMA
= MinPMA
.coalesce();
3292 MaxPMA
= MaxPMA
.coalesce();
3294 if (MaxPMA
.is_null())
3297 unsigned MaxOutputSize
= unsignedFromIslSize(MaxPMA
.dim(isl::dim::out
));
3299 // Adjust the last dimension of the maximal access by one as we want to
3300 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3301 // we test during code generation might now point after the end of the
3302 // allocated array but we will never dereference it anyway.
3303 assert(MaxOutputSize
>= 1 && "Assumed at least one output dimension");
3305 Pos
= MaxOutputSize
- 1;
3306 LastDimAff
= MaxPMA
.at(Pos
);
3307 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
3308 OneAff
= OneAff
.add_constant_si(1);
3309 LastDimAff
= LastDimAff
.add(OneAff
);
3310 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
3312 if (MinPMA
.is_null() || MaxPMA
.is_null())
3315 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
3320 /// Wrapper function to calculate minimal/maximal accesses to each array.
3321 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup
,
3322 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
3323 MinMaxAccesses
.reserve(AliasGroup
.size());
3325 isl::union_set Domains
= scop
->getDomains();
3326 isl::union_map Accesses
= isl::union_map::empty(scop
->getIslCtx());
3328 for (MemoryAccess
*MA
: AliasGroup
)
3329 Accesses
= Accesses
.unite(MA
->getAccessRelation());
3331 Accesses
= Accesses
.intersect_domain(Domains
);
3332 isl::union_set Locations
= Accesses
.range();
3334 bool LimitReached
= false;
3335 for (isl::set Set
: Locations
.get_set_list()) {
3336 LimitReached
|= !buildMinMaxAccess(Set
, MinMaxAccesses
, *scop
);
3341 return !LimitReached
;
3344 static isl::set
getAccessDomain(MemoryAccess
*MA
) {
3345 isl::set Domain
= MA
->getStatement()->getDomain();
3346 Domain
= Domain
.project_out(isl::dim::set
, 0,
3347 unsignedFromIslSize(Domain
.tuple_dim()));
3348 return Domain
.reset_tuple_id();
3351 bool ScopBuilder::buildAliasChecks() {
3352 if (!PollyUseRuntimeAliasChecks
)
3355 if (buildAliasGroups()) {
3356 // Aliasing assumptions do not go through addAssumption but we still want to
3357 // collect statistics so we do it here explicitly.
3358 if (scop
->getAliasGroups().size())
3359 Scop::incrementNumberOfAliasingAssumptions(1);
3363 // If a problem occurs while building the alias groups we need to delete
3364 // this SCoP and pretend it wasn't valid in the first place. To this end
3365 // we make the assumed context infeasible.
3366 scop
->invalidate(ALIASING
, DebugLoc());
3368 POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop
->getNameStr()
3369 << " could not be created. This SCoP has been dismissed.");
3373 std::tuple
<ScopBuilder::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3374 ScopBuilder::buildAliasGroupsForAccesses() {
3375 BatchAAResults
BAA(AA
);
3376 AliasSetTracker
AST(BAA
);
3378 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3379 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3380 for (ScopStmt
&Stmt
: *scop
) {
3382 isl::set StmtDomain
= Stmt
.getDomain();
3383 bool StmtDomainEmpty
= StmtDomain
.is_empty();
3385 // Statements with an empty domain will never be executed.
3386 if (StmtDomainEmpty
)
3389 for (MemoryAccess
*MA
: Stmt
) {
3390 if (MA
->isScalarKind())
3393 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3394 MemAccInst
Acc(MA
->getAccessInstruction());
3395 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3396 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3398 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3403 AliasGroupVectorTy AliasGroups
;
3404 for (AliasSet
&AS
: AST
) {
3405 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3408 for (const Value
*Ptr
: AS
.getPointers())
3409 AG
.push_back(PtrToAcc
[const_cast<Value
*>(Ptr
)]);
3412 AliasGroups
.push_back(std::move(AG
));
3415 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3418 bool ScopBuilder::buildAliasGroups() {
3419 // To create sound alias checks we perform the following steps:
3420 // o) We partition each group into read only and non read only accesses.
3421 // o) For each group with more than one base pointer we then compute minimal
3422 // and maximal accesses to each array of a group in read only and non
3423 // read only partitions separately.
3424 AliasGroupVectorTy AliasGroups
;
3425 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3427 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses();
3429 splitAliasGroupsByDomain(AliasGroups
);
3431 for (AliasGroupTy
&AG
: AliasGroups
) {
3432 if (!scop
->hasFeasibleRuntimeContext())
3436 IslMaxOperationsGuard
MaxOpGuard(scop
->getIslCtx().get(), OptComputeOut
);
3437 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3441 if (isl_ctx_last_error(scop
->getIslCtx().get()) == isl_error_quota
) {
3442 scop
->invalidate(COMPLEXITY
, DebugLoc());
3450 bool ScopBuilder::buildAliasGroup(
3451 AliasGroupTy
&AliasGroup
, DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3452 AliasGroupTy ReadOnlyAccesses
;
3453 AliasGroupTy ReadWriteAccesses
;
3454 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3455 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3457 if (AliasGroup
.size() < 2)
3460 for (MemoryAccess
*Access
: AliasGroup
) {
3461 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3462 Access
->getAccessInstruction())
3463 << "Possibly aliasing pointer, use restrict keyword.");
3464 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3465 if (HasWriteAccess
.count(Array
)) {
3466 ReadWriteArrays
.insert(Array
);
3467 ReadWriteAccesses
.push_back(Access
);
3469 ReadOnlyArrays
.insert(Array
);
3470 ReadOnlyAccesses
.push_back(Access
);
3474 // If there are no read-only pointers, and less than two read-write pointers,
3475 // no alias check is needed.
3476 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3479 // If there is no read-write pointer, no alias check is needed.
3480 if (ReadWriteArrays
.empty())
3483 // For non-affine accesses, no alias check can be generated as we cannot
3484 // compute a sufficiently tight lower and upper bound: bail out.
3485 for (MemoryAccess
*MA
: AliasGroup
) {
3486 if (!MA
->isAffine()) {
3487 scop
->invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3488 MA
->getAccessInstruction()->getParent());
3493 // Ensure that for all memory accesses for which we generate alias checks,
3494 // their base pointers are available.
3495 for (MemoryAccess
*MA
: AliasGroup
) {
3496 if (MemoryAccess
*BasePtrMA
= scop
->lookupBasePtrAccess(MA
))
3497 scop
->addRequiredInvariantLoad(
3498 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3501 // scop->getAliasGroups().emplace_back();
3502 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3503 Scop::MinMaxVectorTy MinMaxAccessesReadWrite
;
3504 Scop::MinMaxVectorTy MinMaxAccessesReadOnly
;
3508 Valid
= calculateMinMaxAccess(ReadWriteAccesses
, MinMaxAccessesReadWrite
);
3513 // Bail out if the number of values we need to compare is too large.
3514 // This is important as the number of comparisons grows quadratically with
3515 // the number of values we need to compare.
3516 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3517 RunTimeChecksMaxArraysPerGroup
)
3520 Valid
= calculateMinMaxAccess(ReadOnlyAccesses
, MinMaxAccessesReadOnly
);
3522 scop
->addAliasGroup(MinMaxAccessesReadWrite
, MinMaxAccessesReadOnly
);
3529 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3530 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3532 AliasGroupTy
&AG
= AliasGroups
[u
];
3533 AliasGroupTy::iterator AGI
= AG
.begin();
3534 isl::set AGDomain
= getAccessDomain(*AGI
);
3535 while (AGI
!= AG
.end()) {
3536 MemoryAccess
*MA
= *AGI
;
3537 isl::set MADomain
= getAccessDomain(MA
);
3538 if (AGDomain
.is_disjoint(MADomain
)) {
3539 NewAG
.push_back(MA
);
3540 AGI
= AG
.erase(AGI
);
3542 AGDomain
= AGDomain
.unite(MADomain
);
3546 if (NewAG
.size() > 1)
3547 AliasGroups
.push_back(std::move(NewAG
));
3552 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
3553 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
3554 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
3555 assert(PhysUse
.getKind() == VirtUse
.getKind());
3558 /// Check the consistency of every statement's MemoryAccesses.
3560 /// The check is carried out by expecting the "physical" kind of use (derived
3561 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3562 /// kind of use (derived from a statement's MemoryAccess).
3564 /// The "physical" uses are taken by ensureValueRead to determine whether to
3565 /// create MemoryAccesses. When done, the kind of scalar access should be the
3566 /// same no matter which way it was derived.
3568 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3569 /// can intentionally influence on the kind of uses (not corresponding to the
3570 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3571 /// to pick up the virtual uses. But here in the code generator, this has not
3572 /// happened yet, such that virtual and physical uses are equivalent.
3573 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
3574 for (auto *BB
: S
->getRegion().blocks()) {
3575 for (auto &Inst
: *BB
) {
3576 auto *Stmt
= S
->getStmtFor(&Inst
);
3580 if (isIgnoredIntrinsic(&Inst
))
3583 // Branch conditions are encoded in the statement domains.
3584 if (Inst
.isTerminator() && Stmt
->isBlockStmt())
3588 for (auto &Op
: Inst
.operands())
3589 verifyUse(S
, Op
, LI
);
3591 // Stores do not produce values used by other statements.
3592 if (isa
<StoreInst
>(Inst
))
3595 // For every value defined in the block, also check that a use of that
3596 // value in the same statement would not be an inter-statement use. It can
3597 // still be synthesizable or load-hoisted, but these kind of instructions
3598 // are not directly copied in code-generation.
3600 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
3601 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
3602 VirtDef
.getKind() == VirtualUse::Intra
||
3603 VirtDef
.getKind() == VirtualUse::Hoisted
);
3607 if (S
->hasSingleExitEdge())
3610 // PHINodes in the SCoP region's exit block are also uses to be checked.
3611 if (!S
->getRegion().isTopLevelRegion()) {
3612 for (auto &Inst
: *S
->getRegion().getExit()) {
3613 if (!isa
<PHINode
>(Inst
))
3616 for (auto &Op
: Inst
.operands())
3617 verifyUse(S
, Op
, LI
);
3623 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
) {
3624 scop
.reset(new Scop(R
, SE
, LI
, DT
, *SD
.getDetectionContext(&R
), ORE
,
3629 // Create all invariant load instructions first. These are categorized as
3630 // 'synthesizable', therefore are not part of any ScopStmt but need to be
3631 // created somewhere.
3632 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
3633 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
3634 if (SD
.isErrorBlock(*BB
, scop
->getRegion()))
3637 for (Instruction
&Inst
: *BB
) {
3638 LoadInst
*Load
= dyn_cast
<LoadInst
>(&Inst
);
3642 if (!RIL
.count(Load
))
3645 // Invariant loads require a MemoryAccess to be created in some statement.
3646 // It is not important to which statement the MemoryAccess is added
3647 // because it will later be removed from the ScopStmt again. We chose the
3648 // first statement of the basic block the LoadInst is in.
3649 ArrayRef
<ScopStmt
*> List
= scop
->getStmtListFor(BB
);
3650 assert(!List
.empty());
3651 ScopStmt
*RILStmt
= List
.front();
3652 buildMemoryAccess(Load
, RILStmt
);
3655 buildAccessFunctions();
3657 // In case the region does not have an exiting block we will later (during
3658 // code generation) split the exit block. This will move potential PHI nodes
3659 // from the current exit block into the new region exiting block. Hence, PHI
3660 // nodes that are at this point not part of the region will be.
3661 // To handle these PHI nodes later we will now model their operands as scalar
3662 // accesses. Note that we do not model anything in the exit block if we have
3663 // an exiting block in the region, as there will not be any splitting later.
3664 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge()) {
3665 for (Instruction
&Inst
: *R
.getExit()) {
3666 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
3670 buildPHIAccesses(nullptr, PHI
, nullptr, true);
3674 // Create memory accesses for global reads since all arrays are now known.
3675 const SCEV
*AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
3676 for (auto GlobalReadPair
: GlobalReads
) {
3677 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
3678 Instruction
*GlobalRead
= GlobalReadPair
.second
;
3679 for (auto *BP
: ArrayBasePointers
)
3680 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
3681 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
3684 buildInvariantEquivalenceClasses();
3686 /// A map from basic blocks to their invalid domains.
3687 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
3689 if (!buildDomains(&R
, InvalidDomainMap
)) {
3691 dbgs() << "Bailing-out because buildDomains encountered problems\n");
3695 addUserAssumptions(AC
, InvalidDomainMap
);
3697 // Initialize the invalid domain.
3698 for (ScopStmt
&Stmt
: scop
->Stmts
)
3699 if (Stmt
.isBlockStmt())
3700 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()]);
3702 Stmt
.setInvalidDomain(InvalidDomainMap
[getRegionNodeBasicBlock(
3703 Stmt
.getRegion()->getNode())]);
3705 // Remove empty statements.
3706 // Exit early in case there are no executable statements left in this scop.
3707 scop
->removeStmtNotInDomainMap();
3708 scop
->simplifySCoP(false);
3709 if (scop
->isEmpty()) {
3710 POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3714 // The ScopStmts now have enough information to initialize themselves.
3715 for (ScopStmt
&Stmt
: *scop
) {
3716 collectSurroundingLoops(Stmt
);
3719 buildAccessRelations(Stmt
);
3721 if (DetectReductions
)
3722 checkForReductions(Stmt
);
3725 // Check early for a feasible runtime context.
3726 if (!scop
->hasFeasibleRuntimeContext()) {
3728 dbgs() << "Bailing-out because of unfeasible context (early)\n");
3732 // Check early for profitability. Afterwards it cannot change anymore,
3733 // only the runtime context could become infeasible.
3734 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
3735 scop
->invalidate(PROFITABLE
, DebugLoc());
3737 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3745 scop
->realignParams();
3748 // After the context was fully constructed, thus all our knowledge about
3749 // the parameters is in there, we add all recorded assumptions to the
3750 // assumed/invalid context.
3751 addRecordedAssumptions();
3753 scop
->simplifyContexts();
3754 if (!buildAliasChecks()) {
3755 POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3759 hoistInvariantLoads();
3760 canonicalizeDynamicBasePtrs();
3761 verifyInvariantLoads();
3762 scop
->simplifySCoP(true);
3764 // Check late for a feasible runtime context because profitability did not
3766 if (!scop
->hasFeasibleRuntimeContext()) {
3767 POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3772 verifyUses(scop
.get(), LI
, DT
);
3776 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AAResults
&AA
,
3777 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
3778 ScopDetection
&SD
, ScalarEvolution
&SE
,
3779 OptimizationRemarkEmitter
&ORE
)
3780 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
), ORE(ORE
) {
3782 auto P
= getBBPairForRegion(R
);
3783 getDebugLocations(P
, Beg
, End
);
3785 std::string Msg
= "SCoP begins here.";
3786 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
3791 POLLY_DEBUG(dbgs() << *scop
);
3793 if (!scop
->hasFeasibleRuntimeContext()) {
3795 Msg
= "SCoP ends here but was dismissed.";
3796 POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3797 RecordedAssumptions
.clear();
3800 Msg
= "SCoP ends here.";
3802 if (scop
->getMaxLoopDepth() > 0)
3806 if (R
->isTopLevelRegion())
3807 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
3810 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)