1 //===- ScopBuilder.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Create a polyhedral description for a static control flow region.
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
14 //===----------------------------------------------------------------------===//
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/Sequence.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/AssumptionCache.h"
33 #include "llvm/Analysis/Delinearization.h"
34 #include "llvm/Analysis/Loads.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
37 #include "llvm/Analysis/RegionInfo.h"
38 #include "llvm/Analysis/RegionIterator.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/raw_ostream.h"
61 using namespace polly
;
63 #define DEBUG_TYPE "polly-scops"
65 STATISTIC(ScopFound
, "Number of valid Scops");
66 STATISTIC(RichScopFound
, "Number of Scops containing a loop");
67 STATISTIC(InfeasibleScops
,
68 "Number of SCoPs with statically infeasible context.");
70 bool polly::ModelReadOnlyScalars
;
72 // The maximal number of dimensions we allow during invariant load construction.
73 // More complex access ranges will result in very high compile time and are also
74 // unlikely to result in good code. This value is very high and should only
75 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
76 static unsigned const MaxDimensionsInAccessRange
= 9;
78 static cl::opt
<bool, true> XModelReadOnlyScalars(
79 "polly-analyze-read-only-scalars",
80 cl::desc("Model read-only scalar values in the scop description"),
81 cl::location(ModelReadOnlyScalars
), cl::Hidden
, cl::init(true),
82 cl::cat(PollyCategory
));
85 OptComputeOut("polly-analysis-computeout",
86 cl::desc("Bound the scop analysis by a maximal amount of "
87 "computational steps (0 means no bound)"),
88 cl::Hidden
, cl::init(800000), cl::cat(PollyCategory
));
90 static cl::opt
<bool> PollyAllowDereferenceOfAllFunctionParams(
91 "polly-allow-dereference-of-all-function-parameters",
93 "Treat all parameters to functions that are pointers as dereferencible."
94 " This is useful for invariant load hoisting, since we can generate"
95 " less runtime checks. This is only valid if all pointers to functions"
96 " are always initialized, so that Polly can choose to hoist"
98 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
101 PollyIgnoreInbounds("polly-ignore-inbounds",
102 cl::desc("Do not take inbounds assumptions at all"),
103 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
105 static cl::opt
<unsigned> RunTimeChecksMaxArraysPerGroup(
106 "polly-rtc-max-arrays-per-group",
107 cl::desc("The maximal number of arrays to compare in each alias group."),
108 cl::Hidden
, cl::init(20), cl::cat(PollyCategory
));
110 static cl::opt
<unsigned> RunTimeChecksMaxAccessDisjuncts(
111 "polly-rtc-max-array-disjuncts",
112 cl::desc("The maximal number of disjunts allowed in memory accesses to "
114 cl::Hidden
, cl::init(8), cl::cat(PollyCategory
));
116 static cl::opt
<unsigned> RunTimeChecksMaxParameters(
117 "polly-rtc-max-parameters",
118 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden
,
119 cl::init(8), cl::cat(PollyCategory
));
121 static cl::opt
<bool> UnprofitableScalarAccs(
122 "polly-unprofitable-scalar-accs",
123 cl::desc("Count statements with scalar accesses as not optimizable"),
124 cl::Hidden
, cl::init(false), cl::cat(PollyCategory
));
126 static cl::opt
<std::string
> UserContextStr(
127 "polly-context", cl::value_desc("isl parameter set"),
128 cl::desc("Provide additional constraints on the context parameters"),
129 cl::init(""), cl::cat(PollyCategory
));
131 static cl::opt
<bool> DetectReductions("polly-detect-reductions",
132 cl::desc("Detect and exploit reductions"),
133 cl::Hidden
, cl::init(true),
134 cl::cat(PollyCategory
));
136 // Multiplicative reductions can be disabled separately as these kind of
137 // operations can overflow easily. Additive reductions and bit operations
138 // are in contrast pretty stable.
139 static cl::opt
<bool> DisableMultiplicativeReductions(
140 "polly-disable-multiplicative-reductions",
141 cl::desc("Disable multiplicative reductions"), cl::Hidden
,
142 cl::cat(PollyCategory
));
144 enum class GranularityChoice
{ BasicBlocks
, ScalarIndependence
, Stores
};
146 static cl::opt
<GranularityChoice
> StmtGranularity(
147 "polly-stmt-granularity",
149 "Algorithm to use for splitting basic blocks into multiple statements"),
150 cl::values(clEnumValN(GranularityChoice::BasicBlocks
, "bb",
151 "One statement per basic block"),
152 clEnumValN(GranularityChoice::ScalarIndependence
, "scalar-indep",
153 "Scalar independence heuristic"),
154 clEnumValN(GranularityChoice::Stores
, "store",
155 "Store-level granularity")),
156 cl::init(GranularityChoice::ScalarIndependence
), cl::cat(PollyCategory
));
158 /// Helper to treat non-affine regions and basic blocks the same.
162 /// Return the block that is the representing block for @p RN.
163 static inline BasicBlock
*getRegionNodeBasicBlock(RegionNode
*RN
) {
164 return RN
->isSubRegion() ? RN
->getNodeAs
<Region
>()->getEntry()
165 : RN
->getNodeAs
<BasicBlock
>();
168 /// Return the @p idx'th block that is executed after @p RN.
169 static inline BasicBlock
*
170 getRegionNodeSuccessor(RegionNode
*RN
, Instruction
*TI
, unsigned idx
) {
171 if (RN
->isSubRegion()) {
173 return RN
->getNodeAs
<Region
>()->getExit();
175 return TI
->getSuccessor(idx
);
178 static bool containsErrorBlock(RegionNode
*RN
, const Region
&R
,
180 if (!RN
->isSubRegion())
181 return SD
->isErrorBlock(*RN
->getNodeAs
<BasicBlock
>(), R
);
182 for (BasicBlock
*BB
: RN
->getNodeAs
<Region
>()->blocks())
183 if (SD
->isErrorBlock(*BB
, R
))
190 /// Create a map to map from a given iteration to a subsequent iteration.
192 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
193 /// is incremented by one and all other dimensions are equal, e.g.,
194 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
196 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
197 static isl::map
createNextIterationMap(isl::space SetSpace
, unsigned Dim
) {
198 isl::space MapSpace
= SetSpace
.map_from_set();
199 isl::map NextIterationMap
= isl::map::universe(MapSpace
);
200 for (unsigned u
: rangeIslSize(0, NextIterationMap
.domain_tuple_dim()))
203 NextIterationMap
.equate(isl::dim::in
, u
, isl::dim::out
, u
);
205 isl::constraint::alloc_equality(isl::local_space(MapSpace
));
206 C
= C
.set_constant_si(1);
207 C
= C
.set_coefficient_si(isl::dim::in
, Dim
, 1);
208 C
= C
.set_coefficient_si(isl::dim::out
, Dim
, -1);
209 NextIterationMap
= NextIterationMap
.add_constraint(C
);
210 return NextIterationMap
;
213 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
214 static isl::set
collectBoundedParts(isl::set S
) {
215 isl::set BoundedParts
= isl::set::empty(S
.get_space());
216 for (isl::basic_set BSet
: S
.get_basic_set_list())
217 if (BSet
.is_bounded())
218 BoundedParts
= BoundedParts
.unite(isl::set(BSet
));
222 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
224 /// @returns A separation of @p S into first an unbounded then a bounded subset,
225 /// both with regards to the dimension @p Dim.
226 static std::pair
<isl::set
, isl::set
> partitionSetParts(isl::set S
,
228 for (unsigned u
: rangeIslSize(0, S
.tuple_dim()))
229 S
= S
.lower_bound_si(isl::dim::set
, u
, 0);
231 unsigned NumDimsS
= unsignedFromIslSize(S
.tuple_dim());
232 isl::set OnlyDimS
= S
;
234 // Remove dimensions that are greater than Dim as they are not interesting.
235 assert(NumDimsS
>= Dim
+ 1);
236 OnlyDimS
= OnlyDimS
.project_out(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
238 // Create artificial parametric upper bounds for dimensions smaller than Dim
239 // as we are not interested in them.
240 OnlyDimS
= OnlyDimS
.insert_dims(isl::dim::param
, 0, Dim
);
242 for (unsigned u
= 0; u
< Dim
; u
++) {
243 isl::constraint C
= isl::constraint::alloc_inequality(
244 isl::local_space(OnlyDimS
.get_space()));
245 C
= C
.set_coefficient_si(isl::dim::param
, u
, 1);
246 C
= C
.set_coefficient_si(isl::dim::set
, u
, -1);
247 OnlyDimS
= OnlyDimS
.add_constraint(C
);
250 // Collect all bounded parts of OnlyDimS.
251 isl::set BoundedParts
= collectBoundedParts(OnlyDimS
);
253 // Create the dimensions greater than Dim again.
255 BoundedParts
.insert_dims(isl::dim::set
, Dim
+ 1, NumDimsS
- Dim
- 1);
257 // Remove the artificial upper bound parameters again.
258 BoundedParts
= BoundedParts
.remove_dims(isl::dim::param
, 0, Dim
);
260 isl::set UnboundedParts
= S
.subtract(BoundedParts
);
261 return std::make_pair(UnboundedParts
, BoundedParts
);
264 /// Create the conditions under which @p L @p Pred @p R is true.
265 static isl::set
buildConditionSet(ICmpInst::Predicate Pred
, isl::pw_aff L
,
268 case ICmpInst::ICMP_EQ
:
270 case ICmpInst::ICMP_NE
:
272 case ICmpInst::ICMP_SLT
:
274 case ICmpInst::ICMP_SLE
:
276 case ICmpInst::ICMP_SGT
:
278 case ICmpInst::ICMP_SGE
:
280 case ICmpInst::ICMP_ULT
:
282 case ICmpInst::ICMP_UGT
:
284 case ICmpInst::ICMP_ULE
:
286 case ICmpInst::ICMP_UGE
:
289 llvm_unreachable("Non integer predicate not supported");
293 isl::set
ScopBuilder::adjustDomainDimensions(isl::set Dom
, Loop
*OldL
,
295 // If the loops are the same there is nothing to do.
299 int OldDepth
= scop
->getRelativeLoopDepth(OldL
);
300 int NewDepth
= scop
->getRelativeLoopDepth(NewL
);
301 // If both loops are non-affine loops there is nothing to do.
302 if (OldDepth
== -1 && NewDepth
== -1)
305 // Distinguish three cases:
306 // 1) The depth is the same but the loops are not.
307 // => One loop was left one was entered.
308 // 2) The depth increased from OldL to NewL.
309 // => One loop was entered, none was left.
310 // 3) The depth decreased from OldL to NewL.
311 // => Loops were left were difference of the depths defines how many.
312 if (OldDepth
== NewDepth
) {
313 assert(OldL
->getParentLoop() == NewL
->getParentLoop());
314 Dom
= Dom
.project_out(isl::dim::set
, NewDepth
, 1);
315 Dom
= Dom
.add_dims(isl::dim::set
, 1);
316 } else if (OldDepth
< NewDepth
) {
317 assert(OldDepth
+ 1 == NewDepth
);
318 auto &R
= scop
->getRegion();
320 assert(NewL
->getParentLoop() == OldL
||
321 ((!OldL
|| !R
.contains(OldL
)) && R
.contains(NewL
)));
322 Dom
= Dom
.add_dims(isl::dim::set
, 1);
324 assert(OldDepth
> NewDepth
);
325 unsigned Diff
= OldDepth
- NewDepth
;
326 unsigned NumDim
= unsignedFromIslSize(Dom
.tuple_dim());
327 assert(NumDim
>= Diff
);
328 Dom
= Dom
.project_out(isl::dim::set
, NumDim
- Diff
, Diff
);
334 /// Compute the isl representation for the SCEV @p E in this BB.
336 /// @param BB The BB for which isl representation is to be
338 /// @param InvalidDomainMap A map of BB to their invalid domains.
339 /// @param E The SCEV that should be translated.
340 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
342 /// Note that this function will also adjust the invalid context accordingly.
344 __isl_give isl_pw_aff
*
345 ScopBuilder::getPwAff(BasicBlock
*BB
,
346 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
347 const SCEV
*E
, bool NonNegative
) {
348 PWACtx PWAC
= scop
->getPwAff(E
, BB
, NonNegative
, &RecordedAssumptions
);
349 InvalidDomainMap
[BB
] = InvalidDomainMap
[BB
].unite(PWAC
.second
);
350 return PWAC
.first
.release();
353 /// Build condition sets for unsigned ICmpInst(s).
354 /// Special handling is required for unsigned operands to ensure that if
355 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
356 /// it should wrap around.
358 /// @param IsStrictUpperBound holds information on the predicate relation
359 /// between TestVal and UpperBound, i.e,
360 /// TestVal < UpperBound OR TestVal <= UpperBound
361 __isl_give isl_set
*ScopBuilder::buildUnsignedConditionSets(
362 BasicBlock
*BB
, Value
*Condition
, __isl_keep isl_set
*Domain
,
363 const SCEV
*SCEV_TestVal
, const SCEV
*SCEV_UpperBound
,
364 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
365 bool IsStrictUpperBound
) {
366 // Do not take NonNeg assumption on TestVal
367 // as it might have MSB (Sign bit) set.
368 isl_pw_aff
*TestVal
= getPwAff(BB
, InvalidDomainMap
, SCEV_TestVal
, false);
369 // Take NonNeg assumption on UpperBound.
370 isl_pw_aff
*UpperBound
=
371 getPwAff(BB
, InvalidDomainMap
, SCEV_UpperBound
, true);
375 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
376 isl_pw_aff_get_domain_space(TestVal
))),
377 isl_pw_aff_copy(TestVal
));
380 if (IsStrictUpperBound
)
381 // TestVal < UpperBound
382 Second
= isl_pw_aff_lt_set(TestVal
, UpperBound
);
384 // TestVal <= UpperBound
385 Second
= isl_pw_aff_le_set(TestVal
, UpperBound
);
387 isl_set
*ConsequenceCondSet
= isl_set_intersect(First
, Second
);
388 return ConsequenceCondSet
;
391 bool ScopBuilder::buildConditionSets(
392 BasicBlock
*BB
, SwitchInst
*SI
, Loop
*L
, __isl_keep isl_set
*Domain
,
393 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
394 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
395 Value
*Condition
= getConditionFromTerminator(SI
);
396 assert(Condition
&& "No condition for switch");
398 isl_pw_aff
*LHS
, *RHS
;
399 LHS
= getPwAff(BB
, InvalidDomainMap
, SE
.getSCEVAtScope(Condition
, L
));
401 unsigned NumSuccessors
= SI
->getNumSuccessors();
402 ConditionSets
.resize(NumSuccessors
);
403 for (auto &Case
: SI
->cases()) {
404 unsigned Idx
= Case
.getSuccessorIndex();
405 ConstantInt
*CaseValue
= Case
.getCaseValue();
407 RHS
= getPwAff(BB
, InvalidDomainMap
, SE
.getSCEV(CaseValue
));
408 isl_set
*CaseConditionSet
=
409 buildConditionSet(ICmpInst::ICMP_EQ
, isl::manage_copy(LHS
),
412 ConditionSets
[Idx
] = isl_set_coalesce(
413 isl_set_intersect(CaseConditionSet
, isl_set_copy(Domain
)));
416 assert(ConditionSets
[0] == nullptr && "Default condition set was set");
417 isl_set
*ConditionSetUnion
= isl_set_copy(ConditionSets
[1]);
418 for (unsigned u
= 2; u
< NumSuccessors
; u
++)
420 isl_set_union(ConditionSetUnion
, isl_set_copy(ConditionSets
[u
]));
421 ConditionSets
[0] = isl_set_subtract(isl_set_copy(Domain
), ConditionSetUnion
);
423 isl_pw_aff_free(LHS
);
428 bool ScopBuilder::buildConditionSets(
429 BasicBlock
*BB
, Value
*Condition
, Instruction
*TI
, Loop
*L
,
430 __isl_keep isl_set
*Domain
,
431 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
432 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
433 isl_set
*ConsequenceCondSet
= nullptr;
435 if (auto Load
= dyn_cast
<LoadInst
>(Condition
)) {
436 const SCEV
*LHSSCEV
= SE
.getSCEVAtScope(Load
, L
);
437 const SCEV
*RHSSCEV
= SE
.getZero(LHSSCEV
->getType());
439 isl_pw_aff
*LHS
= getPwAff(BB
, InvalidDomainMap
, LHSSCEV
, NonNeg
);
440 isl_pw_aff
*RHS
= getPwAff(BB
, InvalidDomainMap
, RHSSCEV
, NonNeg
);
441 ConsequenceCondSet
= buildConditionSet(ICmpInst::ICMP_SLE
, isl::manage(LHS
),
444 } else if (auto *PHI
= dyn_cast
<PHINode
>(Condition
)) {
445 auto *Unique
= dyn_cast
<ConstantInt
>(
446 getUniqueNonErrorValue(PHI
, &scop
->getRegion(), &SD
));
448 "A PHINode condition should only be accepted by ScopDetection if "
449 "getUniqueNonErrorValue returns non-NULL");
451 if (Unique
->isZero())
452 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
454 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
455 } else if (auto *CCond
= dyn_cast
<ConstantInt
>(Condition
)) {
457 ConsequenceCondSet
= isl_set_empty(isl_set_get_space(Domain
));
459 ConsequenceCondSet
= isl_set_universe(isl_set_get_space(Domain
));
460 } else if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Condition
)) {
461 auto Opcode
= BinOp
->getOpcode();
462 assert(Opcode
== Instruction::And
|| Opcode
== Instruction::Or
);
464 bool Valid
= buildConditionSets(BB
, BinOp
->getOperand(0), TI
, L
, Domain
,
465 InvalidDomainMap
, ConditionSets
) &&
466 buildConditionSets(BB
, BinOp
->getOperand(1), TI
, L
, Domain
,
467 InvalidDomainMap
, ConditionSets
);
469 while (!ConditionSets
.empty())
470 isl_set_free(ConditionSets
.pop_back_val());
474 isl_set_free(ConditionSets
.pop_back_val());
475 isl_set
*ConsCondPart0
= ConditionSets
.pop_back_val();
476 isl_set_free(ConditionSets
.pop_back_val());
477 isl_set
*ConsCondPart1
= ConditionSets
.pop_back_val();
479 if (Opcode
== Instruction::And
)
480 ConsequenceCondSet
= isl_set_intersect(ConsCondPart0
, ConsCondPart1
);
482 ConsequenceCondSet
= isl_set_union(ConsCondPart0
, ConsCondPart1
);
484 auto *ICond
= dyn_cast
<ICmpInst
>(Condition
);
486 "Condition of exiting branch was neither constant nor ICmp!");
488 Region
&R
= scop
->getRegion();
490 isl_pw_aff
*LHS
, *RHS
;
491 // For unsigned comparisons we assumed the signed bit of neither operand
492 // to be set. The comparison is equal to a signed comparison under this
494 bool NonNeg
= ICond
->isUnsigned();
495 const SCEV
*LeftOperand
= SE
.getSCEVAtScope(ICond
->getOperand(0), L
),
496 *RightOperand
= SE
.getSCEVAtScope(ICond
->getOperand(1), L
);
498 LeftOperand
= tryForwardThroughPHI(LeftOperand
, R
, SE
, &SD
);
499 RightOperand
= tryForwardThroughPHI(RightOperand
, R
, SE
, &SD
);
501 switch (ICond
->getPredicate()) {
502 case ICmpInst::ICMP_ULT
:
504 buildUnsignedConditionSets(BB
, Condition
, Domain
, LeftOperand
,
505 RightOperand
, InvalidDomainMap
, true);
507 case ICmpInst::ICMP_ULE
:
509 buildUnsignedConditionSets(BB
, Condition
, Domain
, LeftOperand
,
510 RightOperand
, InvalidDomainMap
, false);
512 case ICmpInst::ICMP_UGT
:
514 buildUnsignedConditionSets(BB
, Condition
, Domain
, RightOperand
,
515 LeftOperand
, InvalidDomainMap
, true);
517 case ICmpInst::ICMP_UGE
:
519 buildUnsignedConditionSets(BB
, Condition
, Domain
, RightOperand
,
520 LeftOperand
, InvalidDomainMap
, false);
523 LHS
= getPwAff(BB
, InvalidDomainMap
, LeftOperand
, NonNeg
);
524 RHS
= getPwAff(BB
, InvalidDomainMap
, RightOperand
, NonNeg
);
525 ConsequenceCondSet
= buildConditionSet(ICond
->getPredicate(),
526 isl::manage(LHS
), isl::manage(RHS
))
532 // If no terminator was given we are only looking for parameter constraints
533 // under which @p Condition is true/false.
535 ConsequenceCondSet
= isl_set_params(ConsequenceCondSet
);
536 assert(ConsequenceCondSet
);
537 ConsequenceCondSet
= isl_set_coalesce(
538 isl_set_intersect(ConsequenceCondSet
, isl_set_copy(Domain
)));
540 isl_set
*AlternativeCondSet
= nullptr;
542 isl_set_n_basic_set(ConsequenceCondSet
) >= (int)MaxDisjunctsInDomain
;
545 AlternativeCondSet
= isl_set_subtract(isl_set_copy(Domain
),
546 isl_set_copy(ConsequenceCondSet
));
548 isl_set_n_basic_set(AlternativeCondSet
) >= (int)MaxDisjunctsInDomain
;
552 scop
->invalidate(COMPLEXITY
, TI
? TI
->getDebugLoc() : DebugLoc(),
553 TI
? TI
->getParent() : nullptr /* BasicBlock */);
554 isl_set_free(AlternativeCondSet
);
555 isl_set_free(ConsequenceCondSet
);
559 ConditionSets
.push_back(ConsequenceCondSet
);
560 ConditionSets
.push_back(isl_set_coalesce(AlternativeCondSet
));
565 bool ScopBuilder::buildConditionSets(
566 BasicBlock
*BB
, Instruction
*TI
, Loop
*L
, __isl_keep isl_set
*Domain
,
567 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
,
568 SmallVectorImpl
<__isl_give isl_set
*> &ConditionSets
) {
569 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
))
570 return buildConditionSets(BB
, SI
, L
, Domain
, InvalidDomainMap
,
573 assert(isa
<BranchInst
>(TI
) && "Terminator was neither branch nor switch.");
575 if (TI
->getNumSuccessors() == 1) {
576 ConditionSets
.push_back(isl_set_copy(Domain
));
580 Value
*Condition
= getConditionFromTerminator(TI
);
581 assert(Condition
&& "No condition for Terminator");
583 return buildConditionSets(BB
, Condition
, TI
, L
, Domain
, InvalidDomainMap
,
587 bool ScopBuilder::propagateDomainConstraints(
588 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
589 // Iterate over the region R and propagate the domain constrains from the
590 // predecessors to the current node. In contrast to the
591 // buildDomainsWithBranchConstraints function, this one will pull the domain
592 // information from the predecessors instead of pushing it to the successors.
593 // Additionally, we assume the domains to be already present in the domain
594 // map here. However, we iterate again in reverse post order so we know all
595 // predecessors have been visited before a block or non-affine subregion is
598 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
599 for (auto *RN
: RTraversal
) {
600 // Recurse for affine subregions but go on for basic blocks and non-affine
602 if (RN
->isSubRegion()) {
603 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
604 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
605 if (!propagateDomainConstraints(SubRegion
, InvalidDomainMap
))
611 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
612 isl::set
&Domain
= scop
->getOrInitEmptyDomain(BB
);
613 assert(!Domain
.is_null());
615 // Under the union of all predecessor conditions we can reach this block.
616 isl::set PredDom
= getPredecessorDomainConstraints(BB
, Domain
);
617 Domain
= Domain
.intersect(PredDom
).coalesce();
618 Domain
= Domain
.align_params(scop
->getParamSpace());
620 Loop
*BBLoop
= getRegionNodeLoop(RN
, LI
);
621 if (BBLoop
&& BBLoop
->getHeader() == BB
&& scop
->contains(BBLoop
))
622 if (!addLoopBoundsToHeaderDomain(BBLoop
, InvalidDomainMap
))
629 void ScopBuilder::propagateDomainConstraintsToRegionExit(
630 BasicBlock
*BB
, Loop
*BBLoop
,
631 SmallPtrSetImpl
<BasicBlock
*> &FinishedExitBlocks
,
632 DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
633 // Check if the block @p BB is the entry of a region. If so we propagate it's
634 // domain to the exit block of the region. Otherwise we are done.
635 auto *RI
= scop
->getRegion().getRegionInfo();
636 auto *BBReg
= RI
? RI
->getRegionFor(BB
) : nullptr;
637 auto *ExitBB
= BBReg
? BBReg
->getExit() : nullptr;
638 if (!BBReg
|| BBReg
->getEntry() != BB
|| !scop
->contains(ExitBB
))
641 // Do not propagate the domain if there is a loop backedge inside the region
642 // that would prevent the exit block from being executed.
644 while (L
&& scop
->contains(L
)) {
645 SmallVector
<BasicBlock
*, 4> LatchBBs
;
646 BBLoop
->getLoopLatches(LatchBBs
);
647 for (auto *LatchBB
: LatchBBs
)
648 if (BB
!= LatchBB
&& BBReg
->contains(LatchBB
))
650 L
= L
->getParentLoop();
653 isl::set Domain
= scop
->getOrInitEmptyDomain(BB
);
654 assert(!Domain
.is_null() && "Cannot propagate a nullptr");
656 Loop
*ExitBBLoop
= getFirstNonBoxedLoopFor(ExitBB
, LI
, scop
->getBoxedLoops());
658 // Since the dimensions of @p BB and @p ExitBB might be different we have to
659 // adjust the domain before we can propagate it.
660 isl::set AdjustedDomain
= adjustDomainDimensions(Domain
, BBLoop
, ExitBBLoop
);
661 isl::set
&ExitDomain
= scop
->getOrInitEmptyDomain(ExitBB
);
663 // If the exit domain is not yet created we set it otherwise we "add" the
666 !ExitDomain
.is_null() ? AdjustedDomain
.unite(ExitDomain
) : AdjustedDomain
;
668 // Initialize the invalid domain.
669 InvalidDomainMap
[ExitBB
] = ExitDomain
.empty(ExitDomain
.get_space());
671 FinishedExitBlocks
.insert(ExitBB
);
674 isl::set
ScopBuilder::getPredecessorDomainConstraints(BasicBlock
*BB
,
676 // If @p BB is the ScopEntry we are done
677 if (scop
->getRegion().getEntry() == BB
)
678 return isl::set::universe(Domain
.get_space());
680 // The region info of this function.
681 auto &RI
= *scop
->getRegion().getRegionInfo();
683 Loop
*BBLoop
= getFirstNonBoxedLoopFor(BB
, LI
, scop
->getBoxedLoops());
685 // A domain to collect all predecessor domains, thus all conditions under
686 // which the block is executed. To this end we start with the empty domain.
687 isl::set PredDom
= isl::set::empty(Domain
.get_space());
689 // Set of regions of which the entry block domain has been propagated to BB.
690 // all predecessors inside any of the regions can be skipped.
691 SmallSet
<Region
*, 8> PropagatedRegions
;
693 for (auto *PredBB
: predecessors(BB
)) {
695 if (DT
.dominates(BB
, PredBB
))
698 // If the predecessor is in a region we used for propagation we can skip it.
699 auto PredBBInRegion
= [PredBB
](Region
*PR
) { return PR
->contains(PredBB
); };
700 if (llvm::any_of(PropagatedRegions
, PredBBInRegion
)) {
704 // Check if there is a valid region we can use for propagation, thus look
705 // for a region that contains the predecessor and has @p BB as exit block.
706 // FIXME: This was an side-effect-free (and possibly infinite) loop when
707 // committed and seems not to be needed.
708 auto *PredR
= RI
.getRegionFor(PredBB
);
709 while (PredR
->getExit() != BB
&& !PredR
->contains(BB
))
710 PredR
= PredR
->getParent();
712 // If a valid region for propagation was found use the entry of that region
713 // for propagation, otherwise the PredBB directly.
714 if (PredR
->getExit() == BB
) {
715 PredBB
= PredR
->getEntry();
716 PropagatedRegions
.insert(PredR
);
719 isl::set PredBBDom
= scop
->getDomainConditions(PredBB
);
721 getFirstNonBoxedLoopFor(PredBB
, LI
, scop
->getBoxedLoops());
722 PredBBDom
= adjustDomainDimensions(PredBBDom
, PredBBLoop
, BBLoop
);
723 PredDom
= PredDom
.unite(PredBBDom
);
729 bool ScopBuilder::addLoopBoundsToHeaderDomain(
730 Loop
*L
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
731 int LoopDepth
= scop
->getRelativeLoopDepth(L
);
732 assert(LoopDepth
>= 0 && "Loop in region should have at least depth one");
734 BasicBlock
*HeaderBB
= L
->getHeader();
735 assert(scop
->isDomainDefined(HeaderBB
));
736 isl::set
&HeaderBBDom
= scop
->getOrInitEmptyDomain(HeaderBB
);
738 isl::map NextIterationMap
=
739 createNextIterationMap(HeaderBBDom
.get_space(), LoopDepth
);
741 isl::set UnionBackedgeCondition
= HeaderBBDom
.empty(HeaderBBDom
.get_space());
743 SmallVector
<BasicBlock
*, 4> LatchBlocks
;
744 L
->getLoopLatches(LatchBlocks
);
746 for (BasicBlock
*LatchBB
: LatchBlocks
) {
747 // If the latch is only reachable via error statements we skip it.
748 if (!scop
->isDomainDefined(LatchBB
))
751 isl::set LatchBBDom
= scop
->getDomainConditions(LatchBB
);
753 isl::set BackedgeCondition
;
755 Instruction
*TI
= LatchBB
->getTerminator();
756 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
757 assert(BI
&& "Only branch instructions allowed in loop latches");
759 if (BI
->isUnconditional())
760 BackedgeCondition
= LatchBBDom
;
762 SmallVector
<isl_set
*, 8> ConditionSets
;
763 int idx
= BI
->getSuccessor(0) != HeaderBB
;
764 if (!buildConditionSets(LatchBB
, TI
, L
, LatchBBDom
.get(),
765 InvalidDomainMap
, ConditionSets
))
768 // Free the non back edge condition set as we do not need it.
769 isl_set_free(ConditionSets
[1 - idx
]);
771 BackedgeCondition
= isl::manage(ConditionSets
[idx
]);
774 int LatchLoopDepth
= scop
->getRelativeLoopDepth(LI
.getLoopFor(LatchBB
));
775 assert(LatchLoopDepth
>= LoopDepth
);
776 BackedgeCondition
= BackedgeCondition
.project_out(
777 isl::dim::set
, LoopDepth
+ 1, LatchLoopDepth
- LoopDepth
);
778 UnionBackedgeCondition
= UnionBackedgeCondition
.unite(BackedgeCondition
);
781 isl::map ForwardMap
= ForwardMap
.lex_le(HeaderBBDom
.get_space());
782 for (int i
= 0; i
< LoopDepth
; i
++)
783 ForwardMap
= ForwardMap
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
785 isl::set UnionBackedgeConditionComplement
=
786 UnionBackedgeCondition
.complement();
787 UnionBackedgeConditionComplement
=
788 UnionBackedgeConditionComplement
.lower_bound_si(isl::dim::set
, LoopDepth
,
790 UnionBackedgeConditionComplement
=
791 UnionBackedgeConditionComplement
.apply(ForwardMap
);
792 HeaderBBDom
= HeaderBBDom
.subtract(UnionBackedgeConditionComplement
);
793 HeaderBBDom
= HeaderBBDom
.apply(NextIterationMap
);
795 auto Parts
= partitionSetParts(HeaderBBDom
, LoopDepth
);
796 HeaderBBDom
= Parts
.second
;
798 // Check if there is a <nsw> tagged AddRec for this loop and if so do not
799 // require a runtime check. The assumption is already implied by the <nsw>
801 bool RequiresRTC
= !scop
->hasNSWAddRecForLoop(L
);
803 isl::set UnboundedCtx
= Parts
.first
.params();
804 recordAssumption(&RecordedAssumptions
, INFINITELOOP
, UnboundedCtx
,
805 HeaderBB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
,
806 nullptr, RequiresRTC
);
810 void ScopBuilder::buildInvariantEquivalenceClasses() {
811 DenseMap
<std::pair
<const SCEV
*, Type
*>, LoadInst
*> EquivClasses
;
813 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
814 for (LoadInst
*LInst
: RIL
) {
815 const SCEV
*PointerSCEV
= SE
.getSCEV(LInst
->getPointerOperand());
817 Type
*Ty
= LInst
->getType();
818 LoadInst
*&ClassRep
= EquivClasses
[std::make_pair(PointerSCEV
, Ty
)];
820 scop
->addInvariantLoadMapping(LInst
, ClassRep
);
825 scop
->addInvariantEquivClass(
826 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList(), {}, Ty
});
830 bool ScopBuilder::buildDomains(
831 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
832 bool IsOnlyNonAffineRegion
= scop
->isNonAffineSubRegion(R
);
833 auto *EntryBB
= R
->getEntry();
834 auto *L
= IsOnlyNonAffineRegion
? nullptr : LI
.getLoopFor(EntryBB
);
835 int LD
= scop
->getRelativeLoopDepth(L
);
837 isl_set_universe(isl_space_set_alloc(scop
->getIslCtx().get(), 0, LD
+ 1));
839 InvalidDomainMap
[EntryBB
] = isl::manage(isl_set_empty(isl_set_get_space(S
)));
840 isl::set Domain
= isl::manage(S
);
841 scop
->setDomain(EntryBB
, Domain
);
843 if (IsOnlyNonAffineRegion
)
844 return !containsErrorBlock(R
->getNode(), *R
, &SD
);
846 if (!buildDomainsWithBranchConstraints(R
, InvalidDomainMap
))
849 if (!propagateDomainConstraints(R
, InvalidDomainMap
))
852 // Error blocks and blocks dominated by them have been assumed to never be
853 // executed. Representing them in the Scop does not add any value. In fact,
854 // it is likely to cause issues during construction of the ScopStmts. The
855 // contents of error blocks have not been verified to be expressible and
856 // will cause problems when building up a ScopStmt for them.
857 // Furthermore, basic blocks dominated by error blocks may reference
858 // instructions in the error block which, if the error block is not modeled,
859 // can themselves not be constructed properly. To this end we will replace
860 // the domains of error blocks and those only reachable via error blocks
861 // with an empty set. Additionally, we will record for each block under which
862 // parameter combination it would be reached via an error block in its
863 // InvalidDomain. This information is needed during load hoisting.
864 if (!propagateInvalidStmtDomains(R
, InvalidDomainMap
))
870 bool ScopBuilder::buildDomainsWithBranchConstraints(
871 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
872 // To create the domain for each block in R we iterate over all blocks and
873 // subregions in R and propagate the conditions under which the current region
874 // element is executed. To this end we iterate in reverse post order over R as
875 // it ensures that we first visit all predecessors of a region node (either a
876 // basic block or a subregion) before we visit the region node itself.
877 // Initially, only the domain for the SCoP region entry block is set and from
878 // there we propagate the current domain to all successors, however we add the
879 // condition that the successor is actually executed next.
880 // As we are only interested in non-loop carried constraints here we can
881 // simply skip loop back edges.
883 SmallPtrSet
<BasicBlock
*, 8> FinishedExitBlocks
;
884 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
885 for (auto *RN
: RTraversal
) {
886 // Recurse for affine subregions but go on for basic blocks and non-affine
888 if (RN
->isSubRegion()) {
889 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
890 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
891 if (!buildDomainsWithBranchConstraints(SubRegion
, InvalidDomainMap
))
897 if (containsErrorBlock(RN
, scop
->getRegion(), &SD
))
898 scop
->notifyErrorBlock();
901 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
902 Instruction
*TI
= BB
->getTerminator();
904 if (isa
<UnreachableInst
>(TI
))
907 if (!scop
->isDomainDefined(BB
))
909 isl::set Domain
= scop
->getDomainConditions(BB
);
911 scop
->updateMaxLoopDepth(unsignedFromIslSize(Domain
.tuple_dim()));
913 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
914 // Propagate the domain from BB directly to blocks that have a superset
915 // domain, at the moment only region exit nodes of regions that start in BB.
916 propagateDomainConstraintsToRegionExit(BB
, BBLoop
, FinishedExitBlocks
,
919 // If all successors of BB have been set a domain through the propagation
920 // above we do not need to build condition sets but can just skip this
921 // block. However, it is important to note that this is a local property
922 // with regards to the region @p R. To this end FinishedExitBlocks is a
924 auto IsFinishedRegionExit
= [&FinishedExitBlocks
](BasicBlock
*SuccBB
) {
925 return FinishedExitBlocks
.count(SuccBB
);
927 if (std::all_of(succ_begin(BB
), succ_end(BB
), IsFinishedRegionExit
))
930 // Build the condition sets for the successor nodes of the current region
931 // node. If it is a non-affine subregion we will always execute the single
932 // exit node, hence the single entry node domain is the condition set. For
933 // basic blocks we use the helper function buildConditionSets.
934 SmallVector
<isl_set
*, 8> ConditionSets
;
935 if (RN
->isSubRegion())
936 ConditionSets
.push_back(Domain
.copy());
937 else if (!buildConditionSets(BB
, TI
, BBLoop
, Domain
.get(), InvalidDomainMap
,
941 // Now iterate over the successors and set their initial domain based on
942 // their condition set. We skip back edges here and have to be careful when
943 // we leave a loop not to keep constraints over a dimension that doesn't
945 assert(RN
->isSubRegion() || TI
->getNumSuccessors() == ConditionSets
.size());
946 for (unsigned u
= 0, e
= ConditionSets
.size(); u
< e
; u
++) {
947 isl::set CondSet
= isl::manage(ConditionSets
[u
]);
948 BasicBlock
*SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
950 // Skip blocks outside the region.
951 if (!scop
->contains(SuccBB
))
954 // If we propagate the domain of some block to "SuccBB" we do not have to
955 // adjust the domain.
956 if (FinishedExitBlocks
.count(SuccBB
))
960 if (DT
.dominates(SuccBB
, BB
))
964 getFirstNonBoxedLoopFor(SuccBB
, LI
, scop
->getBoxedLoops());
966 CondSet
= adjustDomainDimensions(CondSet
, BBLoop
, SuccBBLoop
);
968 // Set the domain for the successor or merge it with an existing domain in
969 // case there are multiple paths (without loop back edges) to the
971 isl::set
&SuccDomain
= scop
->getOrInitEmptyDomain(SuccBB
);
973 if (!SuccDomain
.is_null()) {
974 SuccDomain
= SuccDomain
.unite(CondSet
).coalesce();
976 // Initialize the invalid domain.
977 InvalidDomainMap
[SuccBB
] = CondSet
.empty(CondSet
.get_space());
978 SuccDomain
= CondSet
;
981 SuccDomain
= SuccDomain
.detect_equalities();
983 // Check if the maximal number of domain disjunctions was reached.
984 // In case this happens we will clean up and bail.
985 if (unsignedFromIslSize(SuccDomain
.n_basic_set()) < MaxDisjunctsInDomain
)
988 scop
->invalidate(COMPLEXITY
, DebugLoc());
989 while (++u
< ConditionSets
.size())
990 isl_set_free(ConditionSets
[u
]);
998 bool ScopBuilder::propagateInvalidStmtDomains(
999 Region
*R
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
1000 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
1001 for (auto *RN
: RTraversal
) {
1003 // Recurse for affine subregions but go on for basic blocks and non-affine
1005 if (RN
->isSubRegion()) {
1006 Region
*SubRegion
= RN
->getNodeAs
<Region
>();
1007 if (!scop
->isNonAffineSubRegion(SubRegion
)) {
1008 propagateInvalidStmtDomains(SubRegion
, InvalidDomainMap
);
1013 bool ContainsErrorBlock
= containsErrorBlock(RN
, scop
->getRegion(), &SD
);
1014 BasicBlock
*BB
= getRegionNodeBasicBlock(RN
);
1015 isl::set
&Domain
= scop
->getOrInitEmptyDomain(BB
);
1016 assert(!Domain
.is_null() && "Cannot propagate a nullptr");
1018 isl::set InvalidDomain
= InvalidDomainMap
[BB
];
1020 bool IsInvalidBlock
= ContainsErrorBlock
|| Domain
.is_subset(InvalidDomain
);
1022 if (!IsInvalidBlock
) {
1023 InvalidDomain
= InvalidDomain
.intersect(Domain
);
1025 InvalidDomain
= Domain
;
1026 isl::set DomPar
= Domain
.params();
1027 recordAssumption(&RecordedAssumptions
, ERRORBLOCK
, DomPar
,
1028 BB
->getTerminator()->getDebugLoc(), AS_RESTRICTION
);
1029 Domain
= isl::set::empty(Domain
.get_space());
1032 if (InvalidDomain
.is_empty()) {
1033 InvalidDomainMap
[BB
] = InvalidDomain
;
1037 auto *BBLoop
= getRegionNodeLoop(RN
, LI
);
1038 auto *TI
= BB
->getTerminator();
1039 unsigned NumSuccs
= RN
->isSubRegion() ? 1 : TI
->getNumSuccessors();
1040 for (unsigned u
= 0; u
< NumSuccs
; u
++) {
1041 auto *SuccBB
= getRegionNodeSuccessor(RN
, TI
, u
);
1043 // Skip successors outside the SCoP.
1044 if (!scop
->contains(SuccBB
))
1048 if (DT
.dominates(SuccBB
, BB
))
1052 getFirstNonBoxedLoopFor(SuccBB
, LI
, scop
->getBoxedLoops());
1054 auto AdjustedInvalidDomain
=
1055 adjustDomainDimensions(InvalidDomain
, BBLoop
, SuccBBLoop
);
1057 isl::set SuccInvalidDomain
= InvalidDomainMap
[SuccBB
];
1058 SuccInvalidDomain
= SuccInvalidDomain
.unite(AdjustedInvalidDomain
);
1059 SuccInvalidDomain
= SuccInvalidDomain
.coalesce();
1061 InvalidDomainMap
[SuccBB
] = SuccInvalidDomain
;
1063 // Check if the maximal number of domain disjunctions was reached.
1064 // In case this happens we will bail.
1065 if (unsignedFromIslSize(SuccInvalidDomain
.n_basic_set()) <
1066 MaxDisjunctsInDomain
)
1069 InvalidDomainMap
.erase(BB
);
1070 scop
->invalidate(COMPLEXITY
, TI
->getDebugLoc(), TI
->getParent());
1074 InvalidDomainMap
[BB
] = InvalidDomain
;
1080 void ScopBuilder::buildPHIAccesses(ScopStmt
*PHIStmt
, PHINode
*PHI
,
1081 Region
*NonAffineSubRegion
,
1083 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1084 // true, are not modeled as ordinary PHI nodes as they are not part of the
1085 // region. However, we model the operands in the predecessor blocks that are
1086 // part of the region as regular scalar accesses.
1088 // If we can synthesize a PHI we can skip it, however only if it is in
1089 // the region. If it is not it can only be in the exit block of the region.
1090 // In this case we model the operands but not the PHI itself.
1091 auto *Scope
= LI
.getLoopFor(PHI
->getParent());
1092 if (!IsExitBlock
&& canSynthesize(PHI
, *scop
, &SE
, Scope
))
1095 // PHI nodes are modeled as if they had been demoted prior to the SCoP
1096 // detection. Hence, the PHI is a load of a new memory location in which the
1097 // incoming value was written at the end of the incoming basic block.
1098 bool OnlyNonAffineSubRegionOperands
= true;
1099 for (unsigned u
= 0; u
< PHI
->getNumIncomingValues(); u
++) {
1100 Value
*Op
= PHI
->getIncomingValue(u
);
1101 BasicBlock
*OpBB
= PHI
->getIncomingBlock(u
);
1102 ScopStmt
*OpStmt
= scop
->getIncomingStmtFor(PHI
->getOperandUse(u
));
1104 // Do not build PHI dependences inside a non-affine subregion, but make
1105 // sure that the necessary scalar values are still made available.
1106 if (NonAffineSubRegion
&& NonAffineSubRegion
->contains(OpBB
)) {
1107 auto *OpInst
= dyn_cast
<Instruction
>(Op
);
1108 if (!OpInst
|| !NonAffineSubRegion
->contains(OpInst
))
1109 ensureValueRead(Op
, OpStmt
);
1113 OnlyNonAffineSubRegionOperands
= false;
1114 ensurePHIWrite(PHI
, OpStmt
, OpBB
, Op
, IsExitBlock
);
1117 if (!OnlyNonAffineSubRegionOperands
&& !IsExitBlock
) {
1118 addPHIReadAccess(PHIStmt
, PHI
);
1122 void ScopBuilder::buildScalarDependences(ScopStmt
*UserStmt
,
1123 Instruction
*Inst
) {
1124 assert(!isa
<PHINode
>(Inst
));
1126 // Pull-in required operands.
1127 for (Use
&Op
: Inst
->operands())
1128 ensureValueRead(Op
.get(), UserStmt
);
1131 // Create a sequence of two schedules. Either argument may be null and is
1132 // interpreted as the empty schedule. Can also return null if both schedules are
1134 static isl::schedule
combineInSequence(isl::schedule Prev
, isl::schedule Succ
) {
1140 return Prev
.sequence(Succ
);
1143 // Create an isl_multi_union_aff that defines an identity mapping from the
1144 // elements of USet to their N-th dimension.
1148 // Domain: { A[i,j]; B[i,j,k] }
1151 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1153 // @param USet A union set describing the elements for which to generate a
1155 // @param N The dimension to map to.
1156 // @returns A mapping from USet to its N-th dimension.
1157 static isl::multi_union_pw_aff
mapToDimension(isl::union_set USet
, unsigned N
) {
1158 assert(!USet
.is_null());
1159 assert(!USet
.is_empty());
1161 auto Result
= isl::union_pw_multi_aff::empty(USet
.get_space());
1163 for (isl::set S
: USet
.get_set_list()) {
1164 unsigned Dim
= unsignedFromIslSize(S
.tuple_dim());
1166 auto PMA
= isl::pw_multi_aff::project_out_map(S
.get_space(), isl::dim::set
,
1169 PMA
= PMA
.drop_dims(isl::dim::out
, 0, N
- 1);
1171 Result
= Result
.add_pw_multi_aff(PMA
);
1174 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result
));
1177 void ScopBuilder::buildSchedule() {
1178 Loop
*L
= getLoopSurroundingScop(*scop
, LI
);
1179 LoopStackTy
LoopStack({LoopStackElementTy(L
, {}, 0)});
1180 buildSchedule(scop
->getRegion().getNode(), LoopStack
);
1181 assert(LoopStack
.size() == 1 && LoopStack
.back().L
== L
);
1182 scop
->setScheduleTree(LoopStack
[0].Schedule
);
1185 /// To generate a schedule for the elements in a Region we traverse the Region
1186 /// in reverse-post-order and add the contained RegionNodes in traversal order
1187 /// to the schedule of the loop that is currently at the top of the LoopStack.
1188 /// For loop-free codes, this results in a correct sequential ordering.
1199 /// Including loops requires additional processing. Whenever a loop header is
1200 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1201 /// from an empty schedule, we first process all RegionNodes that are within
1202 /// this loop and complete the sequential schedule at this loop-level before
1203 /// processing about any other nodes. To implement this
1204 /// loop-nodes-first-processing, the reverse post-order traversal is
1205 /// insufficient. Hence, we additionally check if the traversal yields
1206 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1207 /// These region-nodes are then queue and only traverse after the all nodes
1208 /// within the current loop have been processed.
1209 void ScopBuilder::buildSchedule(Region
*R
, LoopStackTy
&LoopStack
) {
1210 Loop
*OuterScopLoop
= getLoopSurroundingScop(*scop
, LI
);
1212 ReversePostOrderTraversal
<Region
*> RTraversal(R
);
1213 std::deque
<RegionNode
*> WorkList(RTraversal
.begin(), RTraversal
.end());
1214 std::deque
<RegionNode
*> DelayList
;
1215 bool LastRNWaiting
= false;
1217 // Iterate over the region @p R in reverse post-order but queue
1218 // sub-regions/blocks iff they are not part of the last encountered but not
1219 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1220 // that we queued the last sub-region/block from the reverse post-order
1221 // iterator. If it is set we have to explore the next sub-region/block from
1222 // the iterator (if any) to guarantee progress. If it is not set we first try
1223 // the next queued sub-region/blocks.
1224 while (!WorkList
.empty() || !DelayList
.empty()) {
1227 if ((LastRNWaiting
&& !WorkList
.empty()) || DelayList
.empty()) {
1228 RN
= WorkList
.front();
1229 WorkList
.pop_front();
1230 LastRNWaiting
= false;
1232 RN
= DelayList
.front();
1233 DelayList
.pop_front();
1236 Loop
*L
= getRegionNodeLoop(RN
, LI
);
1237 if (!scop
->contains(L
))
1240 Loop
*LastLoop
= LoopStack
.back().L
;
1241 if (LastLoop
!= L
) {
1242 if (LastLoop
&& !LastLoop
->contains(L
)) {
1243 LastRNWaiting
= true;
1244 DelayList
.push_back(RN
);
1247 LoopStack
.push_back({L
, {}, 0});
1249 buildSchedule(RN
, LoopStack
);
1253 void ScopBuilder::buildSchedule(RegionNode
*RN
, LoopStackTy
&LoopStack
) {
1254 if (RN
->isSubRegion()) {
1255 auto *LocalRegion
= RN
->getNodeAs
<Region
>();
1256 if (!scop
->isNonAffineSubRegion(LocalRegion
)) {
1257 buildSchedule(LocalRegion
, LoopStack
);
1262 assert(LoopStack
.rbegin() != LoopStack
.rend());
1263 auto LoopData
= LoopStack
.rbegin();
1264 LoopData
->NumBlocksProcessed
+= getNumBlocksInRegionNode(RN
);
1266 for (auto *Stmt
: scop
->getStmtListFor(RN
)) {
1267 isl::union_set UDomain
{Stmt
->getDomain()};
1268 auto StmtSchedule
= isl::schedule::from_domain(UDomain
);
1269 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, StmtSchedule
);
1272 // Check if we just processed the last node in this loop. If we did, finalize
1275 // - adding new schedule dimensions
1276 // - folding the resulting schedule into the parent loop schedule
1277 // - dropping the loop schedule from the LoopStack.
1279 // Then continue to check surrounding loops, which might also have been
1280 // completed by this node.
1281 size_t Dimension
= LoopStack
.size();
1282 while (LoopData
->L
&&
1283 LoopData
->NumBlocksProcessed
== getNumBlocksInLoop(LoopData
->L
)) {
1284 isl::schedule Schedule
= LoopData
->Schedule
;
1285 auto NumBlocksProcessed
= LoopData
->NumBlocksProcessed
;
1287 assert(std::next(LoopData
) != LoopStack
.rend());
1288 Loop
*L
= LoopData
->L
;
1292 if (!Schedule
.is_null()) {
1293 isl::union_set Domain
= Schedule
.get_domain();
1294 isl::multi_union_pw_aff MUPA
= mapToDimension(Domain
, Dimension
);
1295 Schedule
= Schedule
.insert_partial_schedule(MUPA
);
1297 if (hasDisableAllTransformsHint(L
)) {
1298 /// If any of the loops has a disable_nonforced heuristic, mark the
1299 /// entire SCoP as such. The ISL rescheduler can only reschedule the
1300 /// SCoP in its entirety.
1301 /// TODO: ScopDetection could avoid including such loops or warp them as
1302 /// boxed loop. It still needs to pass-through loop with user-defined
1304 scop
->markDisableHeuristics();
1307 // It is easier to insert the marks here that do it retroactively.
1308 isl::id IslLoopId
= createIslLoopAttr(scop
->getIslCtx(), L
);
1309 if (!IslLoopId
.is_null())
1311 Schedule
.get_root().child(0).insert_mark(IslLoopId
).get_schedule();
1313 LoopData
->Schedule
= combineInSequence(LoopData
->Schedule
, Schedule
);
1316 LoopData
->NumBlocksProcessed
+= NumBlocksProcessed
;
1318 // Now pop all loops processed up there from the LoopStack
1319 LoopStack
.erase(LoopStack
.begin() + Dimension
, LoopStack
.end());
1322 void ScopBuilder::buildEscapingDependences(Instruction
*Inst
) {
1323 // Check for uses of this instruction outside the scop. Because we do not
1324 // iterate over such instructions and therefore did not "ensure" the existence
1325 // of a write, we must determine such use here.
1326 if (scop
->isEscaping(Inst
))
1327 ensureValueWrite(Inst
);
1330 void ScopBuilder::addRecordedAssumptions() {
1331 for (auto &AS
: llvm::reverse(RecordedAssumptions
)) {
1334 scop
->addAssumption(AS
.Kind
, AS
.Set
, AS
.Loc
, AS
.Sign
,
1335 nullptr /* BasicBlock */, AS
.RequiresRTC
);
1339 // If the domain was deleted the assumptions are void.
1340 isl_set
*Dom
= scop
->getDomainConditions(AS
.BB
).release();
1344 // If a basic block was given use its domain to simplify the assumption.
1345 // In case of restrictions we know they only have to hold on the domain,
1346 // thus we can intersect them with the domain of the block. However, for
1347 // assumptions the domain has to imply them, thus:
1349 // Dom => S <==> A v B <==> A - B
1351 // To avoid the complement we will register A - B as a restriction not an
1353 isl_set
*S
= AS
.Set
.copy();
1354 if (AS
.Sign
== AS_RESTRICTION
)
1355 S
= isl_set_params(isl_set_intersect(S
, Dom
));
1356 else /* (AS.Sign == AS_ASSUMPTION) */
1357 S
= isl_set_params(isl_set_subtract(Dom
, S
));
1359 scop
->addAssumption(AS
.Kind
, isl::manage(S
), AS
.Loc
, AS_RESTRICTION
, AS
.BB
,
1364 void ScopBuilder::addUserAssumptions(
1365 AssumptionCache
&AC
, DenseMap
<BasicBlock
*, isl::set
> &InvalidDomainMap
) {
1366 for (auto &Assumption
: AC
.assumptions()) {
1367 auto *CI
= dyn_cast_or_null
<CallInst
>(Assumption
);
1368 if (!CI
|| CI
->arg_size() != 1)
1371 bool InScop
= scop
->contains(CI
);
1372 if (!InScop
&& !scop
->isDominatedBy(DT
, CI
->getParent()))
1375 auto *L
= LI
.getLoopFor(CI
->getParent());
1376 auto *Val
= CI
->getArgOperand(0);
1377 ParameterSetTy DetectedParams
;
1378 auto &R
= scop
->getRegion();
1379 if (!isAffineConstraint(Val
, &R
, L
, SE
, DetectedParams
)) {
1381 OptimizationRemarkAnalysis(DEBUG_TYPE
, "IgnoreUserAssumption", CI
)
1382 << "Non-affine user assumption ignored.");
1386 // Collect all newly introduced parameters.
1387 ParameterSetTy NewParams
;
1388 for (auto *Param
: DetectedParams
) {
1389 Param
= extractConstantFactor(Param
, SE
).second
;
1390 Param
= scop
->getRepresentingInvariantLoadSCEV(Param
);
1391 if (scop
->isParam(Param
))
1393 NewParams
.insert(Param
);
1396 SmallVector
<isl_set
*, 2> ConditionSets
;
1397 auto *TI
= InScop
? CI
->getParent()->getTerminator() : nullptr;
1398 BasicBlock
*BB
= InScop
? CI
->getParent() : R
.getEntry();
1399 auto *Dom
= InScop
? isl_set_copy(scop
->getDomainConditions(BB
).get())
1400 : isl_set_copy(scop
->getContext().get());
1401 assert(Dom
&& "Cannot propagate a nullptr.");
1402 bool Valid
= buildConditionSets(BB
, Val
, TI
, L
, Dom
, InvalidDomainMap
,
1409 isl_set
*AssumptionCtx
= nullptr;
1411 AssumptionCtx
= isl_set_complement(isl_set_params(ConditionSets
[1]));
1412 isl_set_free(ConditionSets
[0]);
1414 AssumptionCtx
= isl_set_complement(ConditionSets
[1]);
1415 AssumptionCtx
= isl_set_intersect(AssumptionCtx
, ConditionSets
[0]);
1418 // Project out newly introduced parameters as they are not otherwise useful.
1419 if (!NewParams
.empty()) {
1420 for (isl_size u
= 0; u
< isl_set_n_param(AssumptionCtx
); u
++) {
1421 auto *Id
= isl_set_get_dim_id(AssumptionCtx
, isl_dim_param
, u
);
1422 auto *Param
= static_cast<const SCEV
*>(isl_id_get_user(Id
));
1425 if (!NewParams
.count(Param
))
1429 isl_set_project_out(AssumptionCtx
, isl_dim_param
, u
--, 1);
1432 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "UserAssumption", CI
)
1433 << "Use user assumption: "
1434 << stringFromIslObj(AssumptionCtx
, "null"));
1435 isl::set newContext
=
1436 scop
->getContext().intersect(isl::manage(AssumptionCtx
));
1437 scop
->setContext(newContext
);
1441 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst
, ScopStmt
*Stmt
) {
1442 // Memory builtins are not considered in this function.
1443 if (!Inst
.isLoad() && !Inst
.isStore())
1446 Value
*Val
= Inst
.getValueOperand();
1447 Type
*ElementType
= Val
->getType();
1448 Value
*Address
= Inst
.getPointerOperand();
1449 const SCEV
*AccessFunction
=
1450 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1451 const SCEVUnknown
*BasePointer
=
1452 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1453 enum MemoryAccess::AccessType AccType
=
1454 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1456 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Address
))
1457 Address
= BitCast
->getOperand(0);
1459 auto *GEP
= dyn_cast
<GetElementPtrInst
>(Address
);
1460 if (!GEP
|| DL
.getTypeAllocSize(GEP
->getResultElementType()) !=
1461 DL
.getTypeAllocSize(ElementType
))
1464 SmallVector
<const SCEV
*, 4> Subscripts
;
1465 SmallVector
<int, 4> Sizes
;
1466 getIndexExpressionsFromGEP(SE
, GEP
, Subscripts
, Sizes
);
1467 auto *BasePtr
= GEP
->getOperand(0);
1469 if (auto *BasePtrCast
= dyn_cast
<BitCastInst
>(BasePtr
))
1470 BasePtr
= BasePtrCast
->getOperand(0);
1472 // Check for identical base pointers to ensure that we do not miss index
1473 // offsets that have been added before this GEP is applied.
1474 if (BasePtr
!= BasePointer
->getValue())
1477 std::vector
<const SCEV
*> SizesSCEV
;
1479 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1481 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1482 for (auto *Subscript
: Subscripts
) {
1483 InvariantLoadsSetTy AccessILS
;
1484 if (!isAffineExpr(&scop
->getRegion(), SurroundingLoop
, Subscript
, SE
,
1488 for (LoadInst
*LInst
: AccessILS
)
1489 if (!ScopRIL
.count(LInst
))
1496 SizesSCEV
.push_back(nullptr);
1498 for (auto V
: Sizes
)
1499 SizesSCEV
.push_back(SE
.getSCEV(
1500 ConstantInt::get(IntegerType::getInt64Ty(BasePtr
->getContext()), V
)));
1502 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1503 true, Subscripts
, SizesSCEV
, Val
);
1507 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst
, ScopStmt
*Stmt
) {
1508 // Memory builtins are not considered by this function.
1509 if (!Inst
.isLoad() && !Inst
.isStore())
1512 if (!PollyDelinearize
)
1515 Value
*Address
= Inst
.getPointerOperand();
1516 Value
*Val
= Inst
.getValueOperand();
1517 Type
*ElementType
= Val
->getType();
1518 unsigned ElementSize
= DL
.getTypeAllocSize(ElementType
);
1519 enum MemoryAccess::AccessType AccType
=
1520 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1522 const SCEV
*AccessFunction
=
1523 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1524 const SCEVUnknown
*BasePointer
=
1525 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1527 assert(BasePointer
&& "Could not find base pointer");
1529 auto &InsnToMemAcc
= scop
->getInsnToMemAccMap();
1530 auto AccItr
= InsnToMemAcc
.find(Inst
);
1531 if (AccItr
== InsnToMemAcc
.end())
1534 std::vector
<const SCEV
*> Sizes
= {nullptr};
1536 Sizes
.insert(Sizes
.end(), AccItr
->second
.Shape
->DelinearizedSizes
.begin(),
1537 AccItr
->second
.Shape
->DelinearizedSizes
.end());
1539 // In case only the element size is contained in the 'Sizes' array, the
1540 // access does not access a real multi-dimensional array. Hence, we allow
1541 // the normal single-dimensional access construction to handle this.
1542 if (Sizes
.size() == 1)
1545 // Remove the element size. This information is already provided by the
1546 // ElementSize parameter. In case the element size of this access and the
1547 // element size used for delinearization differs the delinearization is
1548 // incorrect. Hence, we invalidate the scop.
1550 // TODO: Handle delinearization with differing element sizes.
1551 auto DelinearizedSize
=
1552 cast
<SCEVConstant
>(Sizes
.back())->getAPInt().getSExtValue();
1554 if (ElementSize
!= DelinearizedSize
)
1555 scop
->invalidate(DELINEARIZATION
, Inst
->getDebugLoc(), Inst
->getParent());
1557 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1558 true, AccItr
->second
.DelinearizedSubscripts
, Sizes
, Val
);
1562 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst
, ScopStmt
*Stmt
) {
1563 auto *MemIntr
= dyn_cast_or_null
<MemIntrinsic
>(Inst
);
1565 if (MemIntr
== nullptr)
1568 auto *L
= LI
.getLoopFor(Inst
->getParent());
1569 auto *LengthVal
= SE
.getSCEVAtScope(MemIntr
->getLength(), L
);
1572 // Check if the length val is actually affine or if we overapproximate it
1573 InvariantLoadsSetTy AccessILS
;
1574 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1576 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1577 bool LengthIsAffine
= isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
1578 LengthVal
, SE
, &AccessILS
);
1579 for (LoadInst
*LInst
: AccessILS
)
1580 if (!ScopRIL
.count(LInst
))
1581 LengthIsAffine
= false;
1582 if (!LengthIsAffine
)
1583 LengthVal
= nullptr;
1585 auto *DestPtrVal
= MemIntr
->getDest();
1588 auto *DestAccFunc
= SE
.getSCEVAtScope(DestPtrVal
, L
);
1589 assert(DestAccFunc
);
1590 // Ignore accesses to "NULL".
1591 // TODO: We could use this to optimize the region further, e.g., intersect
1593 // isl_set_complement(isl_set_params(getDomain()))
1594 // as we know it would be undefined to execute this instruction anyway.
1595 if (DestAccFunc
->isZero())
1598 if (auto *U
= dyn_cast
<SCEVUnknown
>(DestAccFunc
)) {
1599 if (isa
<ConstantPointerNull
>(U
->getValue()))
1603 auto *DestPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(DestAccFunc
));
1604 assert(DestPtrSCEV
);
1605 DestAccFunc
= SE
.getMinusSCEV(DestAccFunc
, DestPtrSCEV
);
1606 addArrayAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, DestPtrSCEV
->getValue(),
1607 IntegerType::getInt8Ty(DestPtrVal
->getContext()),
1608 LengthIsAffine
, {DestAccFunc
, LengthVal
}, {nullptr},
1609 Inst
.getValueOperand());
1611 auto *MemTrans
= dyn_cast
<MemTransferInst
>(MemIntr
);
1615 auto *SrcPtrVal
= MemTrans
->getSource();
1618 auto *SrcAccFunc
= SE
.getSCEVAtScope(SrcPtrVal
, L
);
1620 // Ignore accesses to "NULL".
1621 // TODO: See above TODO
1622 if (SrcAccFunc
->isZero())
1625 auto *SrcPtrSCEV
= dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(SrcAccFunc
));
1627 SrcAccFunc
= SE
.getMinusSCEV(SrcAccFunc
, SrcPtrSCEV
);
1628 addArrayAccess(Stmt
, Inst
, MemoryAccess::READ
, SrcPtrSCEV
->getValue(),
1629 IntegerType::getInt8Ty(SrcPtrVal
->getContext()),
1630 LengthIsAffine
, {SrcAccFunc
, LengthVal
}, {nullptr},
1631 Inst
.getValueOperand());
1636 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst
, ScopStmt
*Stmt
) {
1637 auto *CI
= dyn_cast_or_null
<CallInst
>(Inst
);
1642 if (CI
->doesNotAccessMemory() || isIgnoredIntrinsic(CI
) || isDebugCall(CI
))
1645 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(CI
->getContext()), 0);
1646 auto *CalledFunction
= CI
->getCalledFunction();
1647 MemoryEffects ME
= AA
.getMemoryEffects(CalledFunction
);
1648 if (ME
.doesNotAccessMemory())
1651 if (ME
.onlyAccessesArgPointees()) {
1652 ModRefInfo ArgMR
= ME
.getModRef(IRMemLocation::ArgMem
);
1654 !isModSet(ArgMR
) ? MemoryAccess::READ
: MemoryAccess::MAY_WRITE
;
1655 Loop
*L
= LI
.getLoopFor(Inst
->getParent());
1656 for (const auto &Arg
: CI
->args()) {
1657 if (!Arg
->getType()->isPointerTy())
1660 auto *ArgSCEV
= SE
.getSCEVAtScope(Arg
, L
);
1661 if (ArgSCEV
->isZero())
1664 if (auto *U
= dyn_cast
<SCEVUnknown
>(ArgSCEV
)) {
1665 if (isa
<ConstantPointerNull
>(U
->getValue()))
1669 auto *ArgBasePtr
= cast
<SCEVUnknown
>(SE
.getPointerBase(ArgSCEV
));
1670 addArrayAccess(Stmt
, Inst
, AccType
, ArgBasePtr
->getValue(),
1671 ArgBasePtr
->getType(), false, {AF
}, {nullptr}, CI
);
1676 if (ME
.onlyReadsMemory()) {
1677 GlobalReads
.emplace_back(Stmt
, CI
);
1683 bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst
, ScopStmt
*Stmt
) {
1684 // Memory builtins are not considered by this function.
1685 if (!Inst
.isLoad() && !Inst
.isStore())
1688 Value
*Address
= Inst
.getPointerOperand();
1689 Value
*Val
= Inst
.getValueOperand();
1690 Type
*ElementType
= Val
->getType();
1691 enum MemoryAccess::AccessType AccType
=
1692 isa
<LoadInst
>(Inst
) ? MemoryAccess::READ
: MemoryAccess::MUST_WRITE
;
1694 const SCEV
*AccessFunction
=
1695 SE
.getSCEVAtScope(Address
, LI
.getLoopFor(Inst
->getParent()));
1696 const SCEVUnknown
*BasePointer
=
1697 dyn_cast
<SCEVUnknown
>(SE
.getPointerBase(AccessFunction
));
1699 assert(BasePointer
&& "Could not find base pointer");
1700 AccessFunction
= SE
.getMinusSCEV(AccessFunction
, BasePointer
);
1702 // Check if the access depends on a loop contained in a non-affine subregion.
1703 bool isVariantInNonAffineLoop
= false;
1704 SetVector
<const Loop
*> Loops
;
1705 findLoops(AccessFunction
, Loops
);
1706 for (const Loop
*L
: Loops
)
1707 if (Stmt
->contains(L
)) {
1708 isVariantInNonAffineLoop
= true;
1712 InvariantLoadsSetTy AccessILS
;
1714 Loop
*SurroundingLoop
= Stmt
->getSurroundingLoop();
1715 bool IsAffine
= !isVariantInNonAffineLoop
&&
1716 isAffineExpr(&scop
->getRegion(), SurroundingLoop
,
1717 AccessFunction
, SE
, &AccessILS
);
1719 const InvariantLoadsSetTy
&ScopRIL
= scop
->getRequiredInvariantLoads();
1720 for (LoadInst
*LInst
: AccessILS
)
1721 if (!ScopRIL
.count(LInst
))
1724 if (!IsAffine
&& AccType
== MemoryAccess::MUST_WRITE
)
1725 AccType
= MemoryAccess::MAY_WRITE
;
1727 addArrayAccess(Stmt
, Inst
, AccType
, BasePointer
->getValue(), ElementType
,
1728 IsAffine
, {AccessFunction
}, {nullptr}, Val
);
1732 void ScopBuilder::buildMemoryAccess(MemAccInst Inst
, ScopStmt
*Stmt
) {
1733 if (buildAccessMemIntrinsic(Inst
, Stmt
))
1736 if (buildAccessCallInst(Inst
, Stmt
))
1739 if (buildAccessMultiDimFixed(Inst
, Stmt
))
1742 if (buildAccessMultiDimParam(Inst
, Stmt
))
1745 if (buildAccessSingleDim(Inst
, Stmt
))
1749 "At least one of the buildAccess functions must handled this access, or "
1750 "ScopDetection should have rejected this SCoP");
1753 void ScopBuilder::buildAccessFunctions() {
1754 for (auto &Stmt
: *scop
) {
1755 if (Stmt
.isBlockStmt()) {
1756 buildAccessFunctions(&Stmt
, *Stmt
.getBasicBlock());
1760 Region
*R
= Stmt
.getRegion();
1761 for (BasicBlock
*BB
: R
->blocks())
1762 buildAccessFunctions(&Stmt
, *BB
, R
);
1765 // Build write accesses for values that are used after the SCoP.
1766 // The instructions defining them might be synthesizable and therefore not
1767 // contained in any statement, hence we iterate over the original instructions
1768 // to identify all escaping values.
1769 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
1770 for (Instruction
&Inst
: *BB
)
1771 buildEscapingDependences(&Inst
);
1775 bool ScopBuilder::shouldModelInst(Instruction
*Inst
, Loop
*L
) {
1776 return !Inst
->isTerminator() && !isIgnoredIntrinsic(Inst
) &&
1777 !canSynthesize(Inst
, *scop
, &SE
, L
);
1780 /// Generate a name for a statement.
1782 /// @param BB The basic block the statement will represent.
1783 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
1784 /// @param Count The index of the created statement in @p BB.
1785 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1786 /// no suffix will be added.
1787 /// @param IsLast Uses a special indicator for the last statement of a BB.
1788 static std::string
makeStmtName(BasicBlock
*BB
, long BBIdx
, int Count
,
1789 bool IsMain
, bool IsLast
= false) {
1792 if (UseInstructionNames
)
1796 else if (Count
< 26)
1797 Suffix
+= 'a' + Count
;
1799 Suffix
+= std::to_string(Count
);
1801 return getIslCompatibleName("Stmt", BB
, BBIdx
, Suffix
, UseInstructionNames
);
1804 /// Generate a name for a statement that represents a non-affine subregion.
1806 /// @param R The region the statement will represent.
1807 /// @param RIdx The index of the @p R relative to other BBs/regions.
1808 static std::string
makeStmtName(Region
*R
, long RIdx
) {
1809 return getIslCompatibleName("Stmt", R
->getNameStr(), RIdx
, "",
1810 UseInstructionNames
);
1813 void ScopBuilder::buildSequentialBlockStmts(BasicBlock
*BB
, bool SplitOnStore
) {
1814 Loop
*SurroundingLoop
= LI
.getLoopFor(BB
);
1817 long BBIdx
= scop
->getNextStmtIdx();
1818 std::vector
<Instruction
*> Instructions
;
1819 for (Instruction
&Inst
: *BB
) {
1820 if (shouldModelInst(&Inst
, SurroundingLoop
))
1821 Instructions
.push_back(&Inst
);
1822 if (Inst
.getMetadata("polly_split_after") ||
1823 (SplitOnStore
&& isa
<StoreInst
>(Inst
))) {
1824 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
1825 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
1827 Instructions
.clear();
1831 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0);
1832 scop
->addScopStmt(BB
, Name
, SurroundingLoop
, Instructions
);
1835 /// Is @p Inst an ordered instruction?
1837 /// An unordered instruction is an instruction, such that a sequence of
1838 /// unordered instructions can be permuted without changing semantics. Any
1839 /// instruction for which this is not always the case is ordered.
1840 static bool isOrderedInstruction(Instruction
*Inst
) {
1841 return Inst
->mayHaveSideEffects() || Inst
->mayReadOrWriteMemory();
1844 /// Join instructions to the same statement if one uses the scalar result of the
1846 static void joinOperandTree(EquivalenceClasses
<Instruction
*> &UnionFind
,
1847 ArrayRef
<Instruction
*> ModeledInsts
) {
1848 for (Instruction
*Inst
: ModeledInsts
) {
1849 if (isa
<PHINode
>(Inst
))
1852 for (Use
&Op
: Inst
->operands()) {
1853 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
.get());
1857 // Check if OpInst is in the BB and is a modeled instruction.
1858 auto OpVal
= UnionFind
.findValue(OpInst
);
1859 if (OpVal
== UnionFind
.end())
1862 UnionFind
.unionSets(Inst
, OpInst
);
1867 /// Ensure that the order of ordered instructions does not change.
1869 /// If we encounter an ordered instruction enclosed in instructions belonging to
1870 /// a different statement (which might as well contain ordered instructions, but
1871 /// this is not tested here), join them.
1873 joinOrderedInstructions(EquivalenceClasses
<Instruction
*> &UnionFind
,
1874 ArrayRef
<Instruction
*> ModeledInsts
) {
1875 SetVector
<Instruction
*> SeenLeaders
;
1876 for (Instruction
*Inst
: ModeledInsts
) {
1877 if (!isOrderedInstruction(Inst
))
1880 Instruction
*Leader
= UnionFind
.getLeaderValue(Inst
);
1881 // Since previous iterations might have merged sets, some items in
1882 // SeenLeaders are not leaders anymore. However, The new leader of
1883 // previously merged instructions must be one of the former leaders of
1884 // these merged instructions.
1885 bool Inserted
= SeenLeaders
.insert(Leader
);
1889 // Merge statements to close holes. Say, we have already seen statements A
1890 // and B, in this order. Then we see an instruction of A again and we would
1891 // see the pattern "A B A". This function joins all statements until the
1892 // only seen occurrence of A.
1893 for (Instruction
*Prev
: reverse(SeenLeaders
)) {
1894 // We are backtracking from the last element until we see Inst's leader
1895 // in SeenLeaders and merge all into one set. Although leaders of
1896 // instructions change during the execution of this loop, it's irrelevant
1897 // as we are just searching for the element that we already confirmed is
1901 UnionFind
.unionSets(Prev
, Leader
);
1906 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1907 /// the incoming values from this block are executed after the PHI READ.
1909 /// Otherwise it could overwrite the incoming value from before the BB with the
1910 /// value for the next execution. This can happen if the PHI WRITE is added to
1911 /// the statement with the instruction that defines the incoming value (instead
1912 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1913 /// are in order, we put both into the statement. PHI WRITEs are always executed
1914 /// after PHI READs when they are in the same statement.
1916 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1917 /// WRITE is not put into a statement containing the PHI itself. That could also
1919 /// - having all (strongly connected) PHIs in a single statement,
1920 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1921 /// has a chance of being lifted before a PHI by being in a statement with a
1922 /// PHI that comes before in the basic block), or
1923 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1924 static void joinOrderedPHIs(EquivalenceClasses
<Instruction
*> &UnionFind
,
1925 ArrayRef
<Instruction
*> ModeledInsts
) {
1926 for (Instruction
*Inst
: ModeledInsts
) {
1927 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
1931 int Idx
= PHI
->getBasicBlockIndex(PHI
->getParent());
1935 Instruction
*IncomingVal
=
1936 dyn_cast
<Instruction
>(PHI
->getIncomingValue(Idx
));
1940 UnionFind
.unionSets(PHI
, IncomingVal
);
1944 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock
*BB
) {
1945 Loop
*L
= LI
.getLoopFor(BB
);
1947 // Extracting out modeled instructions saves us from checking
1948 // shouldModelInst() repeatedly.
1949 SmallVector
<Instruction
*, 32> ModeledInsts
;
1950 EquivalenceClasses
<Instruction
*> UnionFind
;
1951 Instruction
*MainInst
= nullptr, *MainLeader
= nullptr;
1952 for (Instruction
&Inst
: *BB
) {
1953 if (!shouldModelInst(&Inst
, L
))
1955 ModeledInsts
.push_back(&Inst
);
1956 UnionFind
.insert(&Inst
);
1958 // When a BB is split into multiple statements, the main statement is the
1959 // one containing the 'main' instruction. We select the first instruction
1960 // that is unlikely to be removed (because it has side-effects) as the main
1961 // one. It is used to ensure that at least one statement from the bb has the
1962 // same name as with -polly-stmt-granularity=bb.
1963 if (!MainInst
&& (isa
<StoreInst
>(Inst
) ||
1964 (isa
<CallInst
>(Inst
) && !isa
<IntrinsicInst
>(Inst
))))
1968 joinOperandTree(UnionFind
, ModeledInsts
);
1969 joinOrderedInstructions(UnionFind
, ModeledInsts
);
1970 joinOrderedPHIs(UnionFind
, ModeledInsts
);
1972 // The list of instructions for statement (statement represented by the leader
1974 MapVector
<Instruction
*, std::vector
<Instruction
*>> LeaderToInstList
;
1976 // The order of statements must be preserved w.r.t. their ordered
1977 // instructions. Without this explicit scan, we would also use non-ordered
1978 // instructions (whose order is arbitrary) to determine statement order.
1979 for (Instruction
*Inst
: ModeledInsts
) {
1980 if (!isOrderedInstruction(Inst
))
1983 auto LeaderIt
= UnionFind
.findLeader(Inst
);
1984 if (LeaderIt
== UnionFind
.member_end())
1987 // Insert element for the leader instruction.
1988 (void)LeaderToInstList
[*LeaderIt
];
1991 // Collect the instructions of all leaders. UnionFind's member iterator
1992 // unfortunately are not in any specific order.
1993 for (Instruction
*Inst
: ModeledInsts
) {
1994 auto LeaderIt
= UnionFind
.findLeader(Inst
);
1995 if (LeaderIt
== UnionFind
.member_end())
1998 if (Inst
== MainInst
)
1999 MainLeader
= *LeaderIt
;
2000 std::vector
<Instruction
*> &InstList
= LeaderToInstList
[*LeaderIt
];
2001 InstList
.push_back(Inst
);
2004 // Finally build the statements.
2006 long BBIdx
= scop
->getNextStmtIdx();
2007 for (auto &Instructions
: LeaderToInstList
) {
2008 std::vector
<Instruction
*> &InstList
= Instructions
.second
;
2010 // If there is no main instruction, make the first statement the main.
2011 bool IsMain
= (MainInst
? MainLeader
== Instructions
.first
: Count
== 0);
2013 std::string Name
= makeStmtName(BB
, BBIdx
, Count
, IsMain
);
2014 scop
->addScopStmt(BB
, Name
, L
, std::move(InstList
));
2018 // Unconditionally add an epilogue (last statement). It contains no
2019 // instructions, but holds the PHI write accesses for successor basic blocks,
2020 // if the incoming value is not defined in another statement if the same BB.
2021 // The epilogue becomes the main statement only if there is no other
2022 // statement that could become main.
2023 // The epilogue will be removed if no PHIWrite is added to it.
2024 std::string EpilogueName
= makeStmtName(BB
, BBIdx
, Count
, Count
== 0, true);
2025 scop
->addScopStmt(BB
, EpilogueName
, L
, {});
2028 void ScopBuilder::buildStmts(Region
&SR
) {
2029 if (scop
->isNonAffineSubRegion(&SR
)) {
2030 std::vector
<Instruction
*> Instructions
;
2031 Loop
*SurroundingLoop
=
2032 getFirstNonBoxedLoopFor(SR
.getEntry(), LI
, scop
->getBoxedLoops());
2033 for (Instruction
&Inst
: *SR
.getEntry())
2034 if (shouldModelInst(&Inst
, SurroundingLoop
))
2035 Instructions
.push_back(&Inst
);
2036 long RIdx
= scop
->getNextStmtIdx();
2037 std::string Name
= makeStmtName(&SR
, RIdx
);
2038 scop
->addScopStmt(&SR
, Name
, SurroundingLoop
, Instructions
);
2042 for (auto I
= SR
.element_begin(), E
= SR
.element_end(); I
!= E
; ++I
)
2043 if (I
->isSubRegion())
2044 buildStmts(*I
->getNodeAs
<Region
>());
2046 BasicBlock
*BB
= I
->getNodeAs
<BasicBlock
>();
2047 switch (StmtGranularity
) {
2048 case GranularityChoice::BasicBlocks
:
2049 buildSequentialBlockStmts(BB
);
2051 case GranularityChoice::ScalarIndependence
:
2052 buildEqivClassBlockStmts(BB
);
2054 case GranularityChoice::Stores
:
2055 buildSequentialBlockStmts(BB
, true);
2061 void ScopBuilder::buildAccessFunctions(ScopStmt
*Stmt
, BasicBlock
&BB
,
2062 Region
*NonAffineSubRegion
) {
2065 "The exit BB is the only one that cannot be represented by a statement");
2066 assert(Stmt
->represents(&BB
));
2068 // We do not build access functions for error blocks, as they may contain
2069 // instructions we can not model.
2070 if (SD
.isErrorBlock(BB
, scop
->getRegion()))
2073 auto BuildAccessesForInst
= [this, Stmt
,
2074 NonAffineSubRegion
](Instruction
*Inst
) {
2075 PHINode
*PHI
= dyn_cast
<PHINode
>(Inst
);
2077 buildPHIAccesses(Stmt
, PHI
, NonAffineSubRegion
, false);
2079 if (auto MemInst
= MemAccInst::dyn_cast(*Inst
)) {
2080 assert(Stmt
&& "Cannot build access function in non-existing statement");
2081 buildMemoryAccess(MemInst
, Stmt
);
2084 // PHI nodes have already been modeled above and terminators that are
2085 // not part of a non-affine subregion are fully modeled and regenerated
2086 // from the polyhedral domains. Hence, they do not need to be modeled as
2087 // explicit data dependences.
2089 buildScalarDependences(Stmt
, Inst
);
2092 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
2093 bool IsEntryBlock
= (Stmt
->getEntryBlock() == &BB
);
2095 for (Instruction
*Inst
: Stmt
->getInstructions())
2096 BuildAccessesForInst(Inst
);
2097 if (Stmt
->isRegionStmt())
2098 BuildAccessesForInst(BB
.getTerminator());
2100 for (Instruction
&Inst
: BB
) {
2101 if (isIgnoredIntrinsic(&Inst
))
2104 // Invariant loads already have been processed.
2105 if (isa
<LoadInst
>(Inst
) && RIL
.count(cast
<LoadInst
>(&Inst
)))
2108 BuildAccessesForInst(&Inst
);
2113 MemoryAccess
*ScopBuilder::addMemoryAccess(
2114 ScopStmt
*Stmt
, Instruction
*Inst
, MemoryAccess::AccessType AccType
,
2115 Value
*BaseAddress
, Type
*ElementType
, bool Affine
, Value
*AccessValue
,
2116 ArrayRef
<const SCEV
*> Subscripts
, ArrayRef
<const SCEV
*> Sizes
,
2118 bool isKnownMustAccess
= false;
2120 // Accesses in single-basic block statements are always executed.
2121 if (Stmt
->isBlockStmt())
2122 isKnownMustAccess
= true;
2124 if (Stmt
->isRegionStmt()) {
2125 // Accesses that dominate the exit block of a non-affine region are always
2126 // executed. In non-affine regions there may exist MemoryKind::Values that
2127 // do not dominate the exit. MemoryKind::Values will always dominate the
2128 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2129 // non-affine region.
2130 if (Inst
&& DT
.dominates(Inst
->getParent(), Stmt
->getRegion()->getExit()))
2131 isKnownMustAccess
= true;
2134 // Non-affine PHI writes do not "happen" at a particular instruction, but
2135 // after exiting the statement. Therefore they are guaranteed to execute and
2136 // overwrite the old value.
2137 if (Kind
== MemoryKind::PHI
|| Kind
== MemoryKind::ExitPHI
)
2138 isKnownMustAccess
= true;
2140 if (!isKnownMustAccess
&& AccType
== MemoryAccess::MUST_WRITE
)
2141 AccType
= MemoryAccess::MAY_WRITE
;
2143 auto *Access
= new MemoryAccess(Stmt
, Inst
, AccType
, BaseAddress
, ElementType
,
2144 Affine
, Subscripts
, Sizes
, AccessValue
, Kind
);
2146 scop
->addAccessFunction(Access
);
2147 Stmt
->addAccess(Access
);
2151 void ScopBuilder::addArrayAccess(ScopStmt
*Stmt
, MemAccInst MemAccInst
,
2152 MemoryAccess::AccessType AccType
,
2153 Value
*BaseAddress
, Type
*ElementType
,
2155 ArrayRef
<const SCEV
*> Subscripts
,
2156 ArrayRef
<const SCEV
*> Sizes
,
2157 Value
*AccessValue
) {
2158 ArrayBasePointers
.insert(BaseAddress
);
2159 addMemoryAccess(Stmt
, MemAccInst
, AccType
, BaseAddress
, ElementType
, IsAffine
,
2160 AccessValue
, Subscripts
, Sizes
, MemoryKind::Array
);
2163 /// Check if @p Expr is divisible by @p Size.
2164 static bool isDivisible(const SCEV
*Expr
, unsigned Size
, ScalarEvolution
&SE
) {
2169 // Only one factor needs to be divisible.
2170 if (auto *MulExpr
= dyn_cast
<SCEVMulExpr
>(Expr
)) {
2171 for (auto *FactorExpr
: MulExpr
->operands())
2172 if (isDivisible(FactorExpr
, Size
, SE
))
2177 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2179 if (auto *NAryExpr
= dyn_cast
<SCEVNAryExpr
>(Expr
)) {
2180 for (auto *OpExpr
: NAryExpr
->operands())
2181 if (!isDivisible(OpExpr
, Size
, SE
))
2186 auto *SizeSCEV
= SE
.getConstant(Expr
->getType(), Size
);
2187 auto *UDivSCEV
= SE
.getUDivExpr(Expr
, SizeSCEV
);
2188 auto *MulSCEV
= SE
.getMulExpr(UDivSCEV
, SizeSCEV
);
2189 return MulSCEV
== Expr
;
2192 void ScopBuilder::foldSizeConstantsToRight() {
2193 isl::union_set Accessed
= scop
->getAccesses().range();
2195 for (auto Array
: scop
->arrays()) {
2196 if (Array
->getNumberOfDimensions() <= 1)
2199 isl::space Space
= Array
->getSpace();
2200 Space
= Space
.align_params(Accessed
.get_space());
2202 if (!Accessed
.contains(Space
))
2205 isl::set Elements
= Accessed
.extract_set(Space
);
2206 isl::map Transform
= isl::map::universe(Array
->getSpace().map_from_set());
2208 std::vector
<int> Int
;
2209 unsigned Dims
= unsignedFromIslSize(Elements
.tuple_dim());
2210 for (unsigned i
= 0; i
< Dims
; i
++) {
2211 isl::set DimOnly
= isl::set(Elements
).project_out(isl::dim::set
, 0, i
);
2212 DimOnly
= DimOnly
.project_out(isl::dim::set
, 1, Dims
- i
- 1);
2213 DimOnly
= DimOnly
.lower_bound_si(isl::dim::set
, 0, 0);
2215 isl::basic_set DimHull
= DimOnly
.affine_hull();
2217 if (i
== Dims
- 1) {
2219 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
2223 if (unsignedFromIslSize(DimHull
.dim(isl::dim::div
)) == 1) {
2224 isl::aff Diff
= DimHull
.get_div(0);
2225 isl::val Val
= Diff
.get_denominator_val();
2229 auto ValAPInt
= APIntFromVal(Val
);
2230 if (ValAPInt
.isSignedIntN(32))
2231 ValInt
= ValAPInt
.getSExtValue();
2235 Int
.push_back(ValInt
);
2236 isl::constraint C
= isl::constraint::alloc_equality(
2237 isl::local_space(Transform
.get_space()));
2238 C
= C
.set_coefficient_si(isl::dim::out
, i
, ValInt
);
2239 C
= C
.set_coefficient_si(isl::dim::in
, i
, -1);
2240 Transform
= Transform
.add_constraint(C
);
2244 isl::basic_set ZeroSet
= isl::basic_set(DimHull
);
2245 ZeroSet
= ZeroSet
.fix_si(isl::dim::set
, 0, 0);
2248 if (ZeroSet
.is_equal(DimHull
)) {
2252 Int
.push_back(ValInt
);
2253 Transform
= Transform
.equate(isl::dim::in
, i
, isl::dim::out
, i
);
2256 isl::set MappedElements
= isl::map(Transform
).domain();
2257 if (!Elements
.is_subset(MappedElements
))
2260 bool CanFold
= true;
2264 unsigned NumDims
= Array
->getNumberOfDimensions();
2265 for (unsigned i
= 1; i
< NumDims
- 1; i
++)
2266 if (Int
[0] != Int
[i
] && Int
[i
])
2272 for (auto &Access
: scop
->access_functions())
2273 if (Access
->getScopArrayInfo() == Array
)
2274 Access
->setAccessRelation(
2275 Access
->getAccessRelation().apply_range(Transform
));
2277 std::vector
<const SCEV
*> Sizes
;
2278 for (unsigned i
= 0; i
< NumDims
; i
++) {
2279 auto Size
= Array
->getDimensionSize(i
);
2281 if (i
== NumDims
- 1)
2282 Size
= SE
.getMulExpr(Size
, SE
.getConstant(Size
->getType(), Int
[0]));
2283 Sizes
.push_back(Size
);
2286 Array
->updateSizes(Sizes
, false /* CheckConsistency */);
2290 void ScopBuilder::finalizeAccesses() {
2291 updateAccessDimensionality();
2292 foldSizeConstantsToRight();
2293 foldAccessRelations();
2294 assumeNoOutOfBounds();
2297 void ScopBuilder::updateAccessDimensionality() {
2298 // Check all array accesses for each base pointer and find a (virtual) element
2299 // size for the base pointer that divides all access functions.
2300 for (ScopStmt
&Stmt
: *scop
)
2301 for (MemoryAccess
*Access
: Stmt
) {
2302 if (!Access
->isArrayKind())
2304 ScopArrayInfo
*Array
=
2305 const_cast<ScopArrayInfo
*>(Access
->getScopArrayInfo());
2307 if (Array
->getNumberOfDimensions() != 1)
2309 unsigned DivisibleSize
= Array
->getElemSizeInBytes();
2310 const SCEV
*Subscript
= Access
->getSubscript(0);
2311 while (!isDivisible(Subscript
, DivisibleSize
, SE
))
2313 auto *Ty
= IntegerType::get(SE
.getContext(), DivisibleSize
* 8);
2314 Array
->updateElementType(Ty
);
2317 for (auto &Stmt
: *scop
)
2318 for (auto &Access
: Stmt
)
2319 Access
->updateDimensionality();
2322 void ScopBuilder::foldAccessRelations() {
2323 for (auto &Stmt
: *scop
)
2324 for (auto &Access
: Stmt
)
2325 Access
->foldAccessRelation();
2328 void ScopBuilder::assumeNoOutOfBounds() {
2329 if (PollyIgnoreInbounds
)
2331 for (auto &Stmt
: *scop
)
2332 for (auto &Access
: Stmt
) {
2333 isl::set Outside
= Access
->assumeNoOutOfBound();
2334 const auto &Loc
= Access
->getAccessInstruction()
2335 ? Access
->getAccessInstruction()->getDebugLoc()
2337 recordAssumption(&RecordedAssumptions
, INBOUNDS
, Outside
, Loc
,
2342 void ScopBuilder::ensureValueWrite(Instruction
*Inst
) {
2343 // Find the statement that defines the value of Inst. That statement has to
2344 // write the value to make it available to those statements that read it.
2345 ScopStmt
*Stmt
= scop
->getStmtFor(Inst
);
2347 // It is possible that the value is synthesizable within a loop (such that it
2348 // is not part of any statement), but not after the loop (where you need the
2349 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2350 // avoid this. In case the IR has no such PHI, use the last statement (where
2351 // the value is synthesizable) to write the value.
2353 Stmt
= scop
->getLastStmtFor(Inst
->getParent());
2355 // Inst not defined within this SCoP.
2359 // Do not process further if the instruction is already written.
2360 if (Stmt
->lookupValueWriteOf(Inst
))
2363 addMemoryAccess(Stmt
, Inst
, MemoryAccess::MUST_WRITE
, Inst
, Inst
->getType(),
2364 true, Inst
, ArrayRef
<const SCEV
*>(),
2365 ArrayRef
<const SCEV
*>(), MemoryKind::Value
);
2368 void ScopBuilder::ensureValueRead(Value
*V
, ScopStmt
*UserStmt
) {
2369 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2370 // to be able to replace this one. Currently, there is a split responsibility.
2371 // In a first step, the MemoryAccess is created, but without the
2372 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2373 // AccessRelation is created. At least for scalar accesses, there is no new
2374 // information available at ScopStmt::buildAccessRelations(), so we could
2375 // create the AccessRelation right away. This is what
2376 // ScopStmt::ensureValueRead(Value*) does.
2378 auto *Scope
= UserStmt
->getSurroundingLoop();
2379 auto VUse
= VirtualUse::create(scop
.get(), UserStmt
, Scope
, V
, false);
2380 switch (VUse
.getKind()) {
2381 case VirtualUse::Constant
:
2382 case VirtualUse::Block
:
2383 case VirtualUse::Synthesizable
:
2384 case VirtualUse::Hoisted
:
2385 case VirtualUse::Intra
:
2386 // Uses of these kinds do not need a MemoryAccess.
2389 case VirtualUse::ReadOnly
:
2390 // Add MemoryAccess for invariant values only if requested.
2391 if (!ModelReadOnlyScalars
)
2395 case VirtualUse::Inter
:
2397 // Do not create another MemoryAccess for reloading the value if one already
2399 if (UserStmt
->lookupValueReadOf(V
))
2402 addMemoryAccess(UserStmt
, nullptr, MemoryAccess::READ
, V
, V
->getType(),
2403 true, V
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2406 // Inter-statement uses need to write the value in their defining statement.
2408 ensureValueWrite(cast
<Instruction
>(V
));
2413 void ScopBuilder::ensurePHIWrite(PHINode
*PHI
, ScopStmt
*IncomingStmt
,
2414 BasicBlock
*IncomingBlock
,
2415 Value
*IncomingValue
, bool IsExitBlock
) {
2416 // As the incoming block might turn out to be an error statement ensure we
2417 // will create an exit PHI SAI object. It is needed during code generation
2418 // and would be created later anyway.
2420 scop
->getOrCreateScopArrayInfo(PHI
, PHI
->getType(), {},
2421 MemoryKind::ExitPHI
);
2423 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2424 // from outside the SCoP's region have no statement representation.
2428 // Take care for the incoming value being available in the incoming block.
2429 // This must be done before the check for multiple PHI writes because multiple
2430 // exiting edges from subregion each can be the effective written value of the
2431 // subregion. As such, all of them must be made available in the subregion
2433 ensureValueRead(IncomingValue
, IncomingStmt
);
2435 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2436 if (MemoryAccess
*Acc
= IncomingStmt
->lookupPHIWriteOf(PHI
)) {
2437 assert(Acc
->getAccessInstruction() == PHI
);
2438 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
2442 MemoryAccess
*Acc
= addMemoryAccess(
2443 IncomingStmt
, PHI
, MemoryAccess::MUST_WRITE
, PHI
, PHI
->getType(), true,
2444 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2445 IsExitBlock
? MemoryKind::ExitPHI
: MemoryKind::PHI
);
2447 Acc
->addIncoming(IncomingBlock
, IncomingValue
);
2450 void ScopBuilder::addPHIReadAccess(ScopStmt
*PHIStmt
, PHINode
*PHI
) {
2451 addMemoryAccess(PHIStmt
, PHI
, MemoryAccess::READ
, PHI
, PHI
->getType(), true,
2452 PHI
, ArrayRef
<const SCEV
*>(), ArrayRef
<const SCEV
*>(),
2456 void ScopBuilder::buildDomain(ScopStmt
&Stmt
) {
2457 isl::id Id
= isl::id::alloc(scop
->getIslCtx(), Stmt
.getBaseName(), &Stmt
);
2459 Stmt
.Domain
= scop
->getDomainConditions(&Stmt
);
2460 Stmt
.Domain
= Stmt
.Domain
.set_tuple_id(Id
);
2463 void ScopBuilder::collectSurroundingLoops(ScopStmt
&Stmt
) {
2464 isl::set Domain
= Stmt
.getDomain();
2465 BasicBlock
*BB
= Stmt
.getEntryBlock();
2467 Loop
*L
= LI
.getLoopFor(BB
);
2469 while (L
&& Stmt
.isRegionStmt() && Stmt
.getRegion()->contains(L
))
2470 L
= L
->getParentLoop();
2472 SmallVector
<llvm::Loop
*, 8> Loops
;
2474 while (L
&& Stmt
.getParent()->getRegion().contains(L
)) {
2476 L
= L
->getParentLoop();
2479 Stmt
.NestLoops
.insert(Stmt
.NestLoops
.begin(), Loops
.rbegin(), Loops
.rend());
2482 /// Return the reduction type for a given binary operator.
2483 static MemoryAccess::ReductionType
getReductionType(const BinaryOperator
*BinOp
,
2484 const Instruction
*Load
) {
2486 return MemoryAccess::RT_NONE
;
2487 switch (BinOp
->getOpcode()) {
2488 case Instruction::FAdd
:
2489 if (!BinOp
->isFast())
2490 return MemoryAccess::RT_NONE
;
2492 case Instruction::Add
:
2493 return MemoryAccess::RT_ADD
;
2494 case Instruction::Or
:
2495 return MemoryAccess::RT_BOR
;
2496 case Instruction::Xor
:
2497 return MemoryAccess::RT_BXOR
;
2498 case Instruction::And
:
2499 return MemoryAccess::RT_BAND
;
2500 case Instruction::FMul
:
2501 if (!BinOp
->isFast())
2502 return MemoryAccess::RT_NONE
;
2504 case Instruction::Mul
:
2505 if (DisableMultiplicativeReductions
)
2506 return MemoryAccess::RT_NONE
;
2507 return MemoryAccess::RT_MUL
;
2509 return MemoryAccess::RT_NONE
;
2513 void ScopBuilder::checkForReductions(ScopStmt
&Stmt
) {
2514 SmallVector
<MemoryAccess
*, 2> Loads
;
2515 SmallVector
<std::pair
<MemoryAccess
*, MemoryAccess
*>, 4> Candidates
;
2517 // First collect candidate load-store reduction chains by iterating over all
2518 // stores and collecting possible reduction loads.
2519 for (MemoryAccess
*StoreMA
: Stmt
) {
2520 if (StoreMA
->isRead())
2524 collectCandidateReductionLoads(StoreMA
, Loads
);
2525 for (MemoryAccess
*LoadMA
: Loads
)
2526 Candidates
.push_back(std::make_pair(LoadMA
, StoreMA
));
2529 // Then check each possible candidate pair.
2530 for (const auto &CandidatePair
: Candidates
) {
2532 isl::map LoadAccs
= CandidatePair
.first
->getAccessRelation();
2533 isl::map StoreAccs
= CandidatePair
.second
->getAccessRelation();
2535 // Skip those with obviously unequal base addresses.
2536 if (!LoadAccs
.has_equal_space(StoreAccs
)) {
2540 // And check if the remaining for overlap with other memory accesses.
2541 isl::map AllAccsRel
= LoadAccs
.unite(StoreAccs
);
2542 AllAccsRel
= AllAccsRel
.intersect_domain(Stmt
.getDomain());
2543 isl::set AllAccs
= AllAccsRel
.range();
2545 for (MemoryAccess
*MA
: Stmt
) {
2546 if (MA
== CandidatePair
.first
|| MA
== CandidatePair
.second
)
2550 MA
->getAccessRelation().intersect_domain(Stmt
.getDomain());
2551 isl::set Accs
= AccRel
.range();
2553 if (AllAccs
.has_equal_space(Accs
)) {
2554 isl::set OverlapAccs
= Accs
.intersect(AllAccs
);
2555 Valid
= Valid
&& OverlapAccs
.is_empty();
2562 const LoadInst
*Load
=
2563 dyn_cast
<const LoadInst
>(CandidatePair
.first
->getAccessInstruction());
2564 MemoryAccess::ReductionType RT
=
2565 getReductionType(dyn_cast
<BinaryOperator
>(Load
->user_back()), Load
);
2567 // If no overlapping access was found we mark the load and store as
2569 CandidatePair
.first
->markAsReductionLike(RT
);
2570 CandidatePair
.second
->markAsReductionLike(RT
);
2574 void ScopBuilder::verifyInvariantLoads() {
2575 auto &RIL
= scop
->getRequiredInvariantLoads();
2576 for (LoadInst
*LI
: RIL
) {
2577 assert(LI
&& scop
->contains(LI
));
2578 // If there exists a statement in the scop which has a memory access for
2579 // @p LI, then mark this scop as infeasible for optimization.
2580 for (ScopStmt
&Stmt
: *scop
)
2581 if (Stmt
.getArrayAccessOrNULLFor(LI
)) {
2582 scop
->invalidate(INVARIANTLOAD
, LI
->getDebugLoc(), LI
->getParent());
2588 void ScopBuilder::hoistInvariantLoads() {
2589 if (!PollyInvariantLoadHoisting
)
2592 isl::union_map Writes
= scop
->getWrites();
2593 for (ScopStmt
&Stmt
: *scop
) {
2594 InvariantAccessesTy InvariantAccesses
;
2596 for (MemoryAccess
*Access
: Stmt
) {
2597 isl::set NHCtx
= getNonHoistableCtx(Access
, Writes
);
2598 if (!NHCtx
.is_null())
2599 InvariantAccesses
.push_back({Access
, NHCtx
});
2602 // Transfer the memory access from the statement to the SCoP.
2603 for (auto InvMA
: InvariantAccesses
)
2604 Stmt
.removeMemoryAccess(InvMA
.MA
);
2605 addInvariantLoads(Stmt
, InvariantAccesses
);
2609 /// Check if an access range is too complex.
2611 /// An access range is too complex, if it contains either many disjuncts or
2612 /// very complex expressions. As a simple heuristic, we assume if a set to
2613 /// be too complex if the sum of existentially quantified dimensions and
2614 /// set dimensions is larger than a threshold. This reliably detects both
2615 /// sets with many disjuncts as well as sets with many divisions as they
2618 /// @param AccessRange The range to check for complexity.
2620 /// @returns True if the access range is too complex.
2621 static bool isAccessRangeTooComplex(isl::set AccessRange
) {
2622 unsigned NumTotalDims
= 0;
2624 for (isl::basic_set BSet
: AccessRange
.get_basic_set_list()) {
2625 NumTotalDims
+= unsignedFromIslSize(BSet
.dim(isl::dim::div
));
2626 NumTotalDims
+= unsignedFromIslSize(BSet
.dim(isl::dim::set
));
2629 if (NumTotalDims
> MaxDimensionsInAccessRange
)
2635 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess
*MA
,
2636 isl::union_map Writes
) {
2637 if (auto *BasePtrMA
= scop
->lookupBasePtrAccess(MA
)) {
2638 return getNonHoistableCtx(BasePtrMA
, Writes
).is_null();
2641 Value
*BaseAddr
= MA
->getOriginalBaseAddr();
2642 if (auto *BasePtrInst
= dyn_cast
<Instruction
>(BaseAddr
))
2643 if (!isa
<LoadInst
>(BasePtrInst
))
2644 return scop
->contains(BasePtrInst
);
2649 void ScopBuilder::addUserContext() {
2650 if (UserContextStr
.empty())
2653 isl::set UserContext
= isl::set(scop
->getIslCtx(), UserContextStr
.c_str());
2654 isl::space Space
= scop
->getParamSpace();
2655 isl::size SpaceParams
= Space
.dim(isl::dim::param
);
2656 if (unsignedFromIslSize(SpaceParams
) !=
2657 unsignedFromIslSize(UserContext
.dim(isl::dim::param
))) {
2658 std::string SpaceStr
= stringFromIslObj(Space
, "null");
2659 errs() << "Error: the context provided in -polly-context has not the same "
2660 << "number of dimensions than the computed context. Due to this "
2661 << "mismatch, the -polly-context option is ignored. Please provide "
2662 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2666 for (auto i
: rangeIslSize(0, SpaceParams
)) {
2667 std::string NameContext
=
2668 scop
->getContext().get_dim_name(isl::dim::param
, i
);
2669 std::string NameUserContext
= UserContext
.get_dim_name(isl::dim::param
, i
);
2671 if (NameContext
!= NameUserContext
) {
2672 std::string SpaceStr
= stringFromIslObj(Space
, "null");
2673 errs() << "Error: the name of dimension " << i
2674 << " provided in -polly-context " << "is '" << NameUserContext
2675 << "', but the name in the computed " << "context is '"
2676 << NameContext
<< "'. Due to this name mismatch, "
2677 << "the -polly-context option is ignored. Please provide "
2678 << "the context in the parameter space: " << SpaceStr
<< ".\n";
2682 UserContext
= UserContext
.set_dim_id(isl::dim::param
, i
,
2683 Space
.get_dim_id(isl::dim::param
, i
));
2685 isl::set newContext
= scop
->getContext().intersect(UserContext
);
2686 scop
->setContext(newContext
);
2689 isl::set
ScopBuilder::getNonHoistableCtx(MemoryAccess
*Access
,
2690 isl::union_map Writes
) {
2691 // TODO: Loads that are not loop carried, hence are in a statement with
2692 // zero iterators, are by construction invariant, though we
2693 // currently "hoist" them anyway. This is necessary because we allow
2694 // them to be treated as parameters (e.g., in conditions) and our code
2695 // generation would otherwise use the old value.
2697 auto &Stmt
= *Access
->getStatement();
2698 BasicBlock
*BB
= Stmt
.getEntryBlock();
2700 if (Access
->isScalarKind() || Access
->isWrite() || !Access
->isAffine() ||
2701 Access
->isMemoryIntrinsic())
2704 // Skip accesses that have an invariant base pointer which is defined but
2705 // not loaded inside the SCoP. This can happened e.g., if a readnone call
2706 // returns a pointer that is used as a base address. However, as we want
2707 // to hoist indirect pointers, we allow the base pointer to be defined in
2708 // the region if it is also a memory access. Each ScopArrayInfo object
2709 // that has a base pointer origin has a base pointer that is loaded and
2710 // that it is invariant, thus it will be hoisted too. However, if there is
2711 // no base pointer origin we check that the base pointer is defined
2712 // outside the region.
2713 auto *LI
= cast
<LoadInst
>(Access
->getAccessInstruction());
2714 if (hasNonHoistableBasePtrInScop(Access
, Writes
))
2717 isl::map AccessRelation
= Access
->getAccessRelation();
2718 assert(!AccessRelation
.is_empty());
2720 if (AccessRelation
.involves_dims(isl::dim::in
, 0, Stmt
.getNumIterators()))
2723 AccessRelation
= AccessRelation
.intersect_domain(Stmt
.getDomain());
2724 isl::set SafeToLoad
;
2726 auto &DL
= scop
->getFunction().getParent()->getDataLayout();
2727 if (isSafeToLoadUnconditionally(LI
->getPointerOperand(), LI
->getType(),
2728 LI
->getAlign(), DL
)) {
2729 SafeToLoad
= isl::set::universe(AccessRelation
.get_space().range());
2730 } else if (BB
!= LI
->getParent()) {
2731 // Skip accesses in non-affine subregions as they might not be executed
2732 // under the same condition as the entry of the non-affine subregion.
2735 SafeToLoad
= AccessRelation
.range();
2738 if (isAccessRangeTooComplex(AccessRelation
.range()))
2741 isl::union_map Written
= Writes
.intersect_range(SafeToLoad
);
2742 isl::set WrittenCtx
= Written
.params();
2743 bool IsWritten
= !WrittenCtx
.is_empty();
2748 WrittenCtx
= WrittenCtx
.remove_divs();
2750 unsignedFromIslSize(WrittenCtx
.n_basic_set()) >= MaxDisjunctsInDomain
;
2751 if (TooComplex
|| !isRequiredInvariantLoad(LI
))
2754 scop
->addAssumption(INVARIANTLOAD
, WrittenCtx
, LI
->getDebugLoc(),
2755 AS_RESTRICTION
, LI
->getParent());
2759 static bool isAParameter(llvm::Value
*maybeParam
, const Function
&F
) {
2760 for (const llvm::Argument
&Arg
: F
.args())
2761 if (&Arg
== maybeParam
)
2767 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess
*MA
,
2768 bool StmtInvalidCtxIsEmpty
,
2769 bool MAInvalidCtxIsEmpty
,
2770 bool NonHoistableCtxIsEmpty
) {
2771 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
2772 const DataLayout
&DL
= LInst
->getParent()->getModule()->getDataLayout();
2773 if (PollyAllowDereferenceOfAllFunctionParams
&&
2774 isAParameter(LInst
->getPointerOperand(), scop
->getFunction()))
2777 // TODO: We can provide more information for better but more expensive
2779 if (!isDereferenceableAndAlignedPointer(
2780 LInst
->getPointerOperand(), LInst
->getType(), LInst
->getAlign(), DL
))
2783 // If the location might be overwritten we do not hoist it unconditionally.
2785 // TODO: This is probably too conservative.
2786 if (!NonHoistableCtxIsEmpty
)
2789 // If a dereferenceable load is in a statement that is modeled precisely we
2791 if (StmtInvalidCtxIsEmpty
&& MAInvalidCtxIsEmpty
)
2794 // Even if the statement is not modeled precisely we can hoist the load if it
2795 // does not involve any parameters that might have been specialized by the
2796 // statement domain.
2797 for (const SCEV
*Subscript
: MA
->subscripts())
2798 if (!isa
<SCEVConstant
>(Subscript
))
2803 void ScopBuilder::addInvariantLoads(ScopStmt
&Stmt
,
2804 InvariantAccessesTy
&InvMAs
) {
2808 isl::set StmtInvalidCtx
= Stmt
.getInvalidContext();
2809 bool StmtInvalidCtxIsEmpty
= StmtInvalidCtx
.is_empty();
2811 // Get the context under which the statement is executed but remove the error
2812 // context under which this statement is reached.
2813 isl::set DomainCtx
= Stmt
.getDomain().params();
2814 DomainCtx
= DomainCtx
.subtract(StmtInvalidCtx
);
2816 if (unsignedFromIslSize(DomainCtx
.n_basic_set()) >= MaxDisjunctsInDomain
) {
2817 auto *AccInst
= InvMAs
.front().MA
->getAccessInstruction();
2818 scop
->invalidate(COMPLEXITY
, AccInst
->getDebugLoc(), AccInst
->getParent());
2822 // Project out all parameters that relate to loads in the statement. Otherwise
2823 // we could have cyclic dependences on the constraints under which the
2824 // hoisted loads are executed and we could not determine an order in which to
2825 // pre-load them. This happens because not only lower bounds are part of the
2826 // domain but also upper bounds.
2827 for (auto &InvMA
: InvMAs
) {
2828 auto *MA
= InvMA
.MA
;
2829 Instruction
*AccInst
= MA
->getAccessInstruction();
2830 if (SE
.isSCEVable(AccInst
->getType())) {
2831 SetVector
<Value
*> Values
;
2832 for (const SCEV
*Parameter
: scop
->parameters()) {
2834 findValues(Parameter
, SE
, Values
);
2835 if (!Values
.count(AccInst
))
2838 isl::id ParamId
= scop
->getIdForParam(Parameter
);
2839 if (!ParamId
.is_null()) {
2840 int Dim
= DomainCtx
.find_dim_by_id(isl::dim::param
, ParamId
);
2842 DomainCtx
= DomainCtx
.eliminate(isl::dim::param
, Dim
, 1);
2848 for (auto &InvMA
: InvMAs
) {
2849 auto *MA
= InvMA
.MA
;
2850 isl::set NHCtx
= InvMA
.NonHoistableCtx
;
2852 // Check for another invariant access that accesses the same location as
2853 // MA and if found consolidate them. Otherwise create a new equivalence
2854 // class at the end of InvariantEquivClasses.
2855 LoadInst
*LInst
= cast
<LoadInst
>(MA
->getAccessInstruction());
2856 Type
*Ty
= LInst
->getType();
2857 const SCEV
*PointerSCEV
= SE
.getSCEV(LInst
->getPointerOperand());
2859 isl::set MAInvalidCtx
= MA
->getInvalidContext();
2860 bool NonHoistableCtxIsEmpty
= NHCtx
.is_empty();
2861 bool MAInvalidCtxIsEmpty
= MAInvalidCtx
.is_empty();
2864 // Check if we know that this pointer can be speculatively accessed.
2865 if (canAlwaysBeHoisted(MA
, StmtInvalidCtxIsEmpty
, MAInvalidCtxIsEmpty
,
2866 NonHoistableCtxIsEmpty
)) {
2867 MACtx
= isl::set::universe(DomainCtx
.get_space());
2870 MACtx
= MACtx
.subtract(MAInvalidCtx
.unite(NHCtx
));
2871 MACtx
= MACtx
.gist_params(scop
->getContext());
2874 bool Consolidated
= false;
2875 for (auto &IAClass
: scop
->invariantEquivClasses()) {
2876 if (PointerSCEV
!= IAClass
.IdentifyingPointer
|| Ty
!= IAClass
.AccessType
)
2879 // If the pointer and the type is equal check if the access function wrt.
2880 // to the domain is equal too. It can happen that the domain fixes
2881 // parameter values and these can be different for distinct part of the
2882 // SCoP. If this happens we cannot consolidate the loads but need to
2883 // create a new invariant load equivalence class.
2884 auto &MAs
= IAClass
.InvariantAccesses
;
2886 auto *LastMA
= MAs
.front();
2888 isl::set AR
= MA
->getAccessRelation().range();
2889 isl::set LastAR
= LastMA
->getAccessRelation().range();
2890 bool SameAR
= AR
.is_equal(LastAR
);
2896 // Add MA to the list of accesses that are in this class.
2899 Consolidated
= true;
2901 // Unify the execution context of the class and this statement.
2902 isl::set IAClassDomainCtx
= IAClass
.ExecutionContext
;
2903 if (!IAClassDomainCtx
.is_null())
2904 IAClassDomainCtx
= IAClassDomainCtx
.unite(MACtx
).coalesce();
2906 IAClassDomainCtx
= MACtx
;
2907 IAClass
.ExecutionContext
= IAClassDomainCtx
;
2914 MACtx
= MACtx
.coalesce();
2916 // If we did not consolidate MA, thus did not find an equivalence class
2917 // for it, we create a new one.
2918 scop
->addInvariantEquivClass(
2919 InvariantEquivClassTy
{PointerSCEV
, MemoryAccessList
{MA
}, MACtx
, Ty
});
2923 void ScopBuilder::collectCandidateReductionLoads(
2924 MemoryAccess
*StoreMA
, SmallVectorImpl
<MemoryAccess
*> &Loads
) {
2925 ScopStmt
*Stmt
= StoreMA
->getStatement();
2927 auto *Store
= dyn_cast
<StoreInst
>(StoreMA
->getAccessInstruction());
2931 // Skip if there is not one binary operator between the load and the store
2932 auto *BinOp
= dyn_cast
<BinaryOperator
>(Store
->getValueOperand());
2936 // Skip if the binary operators has multiple uses
2937 if (BinOp
->getNumUses() != 1)
2940 // Skip if the opcode of the binary operator is not commutative/associative
2941 if (!BinOp
->isCommutative() || !BinOp
->isAssociative())
2944 // Skip if the binary operator is outside the current SCoP
2945 if (BinOp
->getParent() != Store
->getParent())
2948 // Skip if it is a multiplicative reduction and we disabled them
2949 if (DisableMultiplicativeReductions
&&
2950 (BinOp
->getOpcode() == Instruction::Mul
||
2951 BinOp
->getOpcode() == Instruction::FMul
))
2954 // Check the binary operator operands for a candidate load
2955 auto *PossibleLoad0
= dyn_cast
<LoadInst
>(BinOp
->getOperand(0));
2956 auto *PossibleLoad1
= dyn_cast
<LoadInst
>(BinOp
->getOperand(1));
2957 if (!PossibleLoad0
&& !PossibleLoad1
)
2960 // A load is only a candidate if it cannot escape (thus has only this use)
2961 if (PossibleLoad0
&& PossibleLoad0
->getNumUses() == 1)
2962 if (PossibleLoad0
->getParent() == Store
->getParent())
2963 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad0
));
2964 if (PossibleLoad1
&& PossibleLoad1
->getNumUses() == 1)
2965 if (PossibleLoad1
->getParent() == Store
->getParent())
2966 Loads
.push_back(&Stmt
->getArrayAccessFor(PossibleLoad1
));
2969 /// Find the canonical scop array info object for a set of invariant load
2970 /// hoisted loads. The canonical array is the one that corresponds to the
2971 /// first load in the list of accesses which is used as base pointer of a
2973 static const ScopArrayInfo
*findCanonicalArray(Scop
&S
,
2974 MemoryAccessList
&Accesses
) {
2975 for (MemoryAccess
*Access
: Accesses
) {
2976 const ScopArrayInfo
*CanonicalArray
= S
.getScopArrayInfoOrNull(
2977 Access
->getAccessInstruction(), MemoryKind::Array
);
2979 return CanonicalArray
;
2984 /// Check if @p Array severs as base array in an invariant load.
2985 static bool isUsedForIndirectHoistedLoad(Scop
&S
, const ScopArrayInfo
*Array
) {
2986 for (InvariantEquivClassTy
&EqClass2
: S
.getInvariantAccesses())
2987 for (MemoryAccess
*Access2
: EqClass2
.InvariantAccesses
)
2988 if (Access2
->getScopArrayInfo() == Array
)
2993 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
2994 /// with a reference to @p New.
2995 static void replaceBasePtrArrays(Scop
&S
, const ScopArrayInfo
*Old
,
2996 const ScopArrayInfo
*New
) {
2997 for (ScopStmt
&Stmt
: S
)
2998 for (MemoryAccess
*Access
: Stmt
) {
2999 if (Access
->getLatestScopArrayInfo() != Old
)
3002 isl::id Id
= New
->getBasePtrId();
3003 isl::map Map
= Access
->getAccessRelation();
3004 Map
= Map
.set_tuple_id(isl::dim::out
, Id
);
3005 Access
->setAccessRelation(Map
);
3009 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3010 for (InvariantEquivClassTy
&EqClass
: scop
->InvariantEquivClasses
) {
3011 MemoryAccessList
&BasePtrAccesses
= EqClass
.InvariantAccesses
;
3013 const ScopArrayInfo
*CanonicalBasePtrSAI
=
3014 findCanonicalArray(*scop
, BasePtrAccesses
);
3016 if (!CanonicalBasePtrSAI
)
3019 for (MemoryAccess
*BasePtrAccess
: BasePtrAccesses
) {
3020 const ScopArrayInfo
*BasePtrSAI
= scop
->getScopArrayInfoOrNull(
3021 BasePtrAccess
->getAccessInstruction(), MemoryKind::Array
);
3022 if (!BasePtrSAI
|| BasePtrSAI
== CanonicalBasePtrSAI
||
3023 !BasePtrSAI
->isCompatibleWith(CanonicalBasePtrSAI
))
3026 // we currently do not canonicalize arrays where some accesses are
3027 // hoisted as invariant loads. If we would, we need to update the access
3028 // function of the invariant loads as well. However, as this is not a
3029 // very common situation, we leave this for now to avoid further
3030 // complexity increases.
3031 if (isUsedForIndirectHoistedLoad(*scop
, BasePtrSAI
))
3034 replaceBasePtrArrays(*scop
, BasePtrSAI
, CanonicalBasePtrSAI
);
3039 void ScopBuilder::buildAccessRelations(ScopStmt
&Stmt
) {
3040 for (MemoryAccess
*Access
: Stmt
.MemAccs
) {
3041 Type
*ElementType
= Access
->getElementType();
3044 if (Access
->isPHIKind())
3045 Ty
= MemoryKind::PHI
;
3046 else if (Access
->isExitPHIKind())
3047 Ty
= MemoryKind::ExitPHI
;
3048 else if (Access
->isValueKind())
3049 Ty
= MemoryKind::Value
;
3051 Ty
= MemoryKind::Array
;
3053 // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3054 // assumptions which are taken. isl::pw_aff objects are cached internally
3055 // and they are used later by scop.
3056 for (const SCEV
*Size
: Access
->Sizes
) {
3059 scop
->getPwAff(Size
, nullptr, false, &RecordedAssumptions
);
3061 auto *SAI
= scop
->getOrCreateScopArrayInfo(Access
->getOriginalBaseAddr(),
3062 ElementType
, Access
->Sizes
, Ty
);
3064 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3065 // assumptions which are taken. isl::pw_aff objects are cached internally
3066 // and they are used later by scop.
3067 for (const SCEV
*Subscript
: Access
->subscripts()) {
3068 if (!Access
->isAffine() || !Subscript
)
3070 scop
->getPwAff(Subscript
, Stmt
.getEntryBlock(), false,
3071 &RecordedAssumptions
);
3073 Access
->buildAccessRelation(SAI
);
3074 scop
->addAccessData(Access
);
3078 /// Add the minimal/maximal access in @p Set to @p User.
3080 /// @return True if more accesses should be added, false if we reached the
3081 /// maximal number of run-time checks to be generated.
3082 static bool buildMinMaxAccess(isl::set Set
,
3083 Scop::MinMaxVectorTy
&MinMaxAccesses
, Scop
&S
) {
3084 isl::pw_multi_aff MinPMA
, MaxPMA
;
3085 isl::pw_aff LastDimAff
;
3089 Set
= Set
.remove_divs();
3090 polly::simplify(Set
);
3092 if (unsignedFromIslSize(Set
.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts
)
3093 Set
= Set
.simple_hull();
3095 // Restrict the number of parameters involved in the access as the lexmin/
3096 // lexmax computation will take too long if this number is high.
3098 // Experiments with a simple test case using an i7 4800MQ:
3100 // #Parameters involved | Time (in sec)
3109 if (isl_set_n_param(Set
.get()) >
3110 static_cast<isl_size
>(RunTimeChecksMaxParameters
)) {
3111 unsigned InvolvedParams
= 0;
3112 for (unsigned u
= 0, e
= isl_set_n_param(Set
.get()); u
< e
; u
++)
3113 if (Set
.involves_dims(isl::dim::param
, u
, 1))
3116 if (InvolvedParams
> RunTimeChecksMaxParameters
)
3120 MinPMA
= Set
.lexmin_pw_multi_aff();
3121 MaxPMA
= Set
.lexmax_pw_multi_aff();
3123 MinPMA
= MinPMA
.coalesce();
3124 MaxPMA
= MaxPMA
.coalesce();
3126 if (MaxPMA
.is_null())
3129 unsigned MaxOutputSize
= unsignedFromIslSize(MaxPMA
.dim(isl::dim::out
));
3131 // Adjust the last dimension of the maximal access by one as we want to
3132 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3133 // we test during code generation might now point after the end of the
3134 // allocated array but we will never dereference it anyway.
3135 assert(MaxOutputSize
>= 1 && "Assumed at least one output dimension");
3137 Pos
= MaxOutputSize
- 1;
3138 LastDimAff
= MaxPMA
.at(Pos
);
3139 OneAff
= isl::aff(isl::local_space(LastDimAff
.get_domain_space()));
3140 OneAff
= OneAff
.add_constant_si(1);
3141 LastDimAff
= LastDimAff
.add(OneAff
);
3142 MaxPMA
= MaxPMA
.set_pw_aff(Pos
, LastDimAff
);
3144 if (MinPMA
.is_null() || MaxPMA
.is_null())
3147 MinMaxAccesses
.push_back(std::make_pair(MinPMA
, MaxPMA
));
3152 /// Wrapper function to calculate minimal/maximal accesses to each array.
3153 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup
,
3154 Scop::MinMaxVectorTy
&MinMaxAccesses
) {
3155 MinMaxAccesses
.reserve(AliasGroup
.size());
3157 isl::union_set Domains
= scop
->getDomains();
3158 isl::union_map Accesses
= isl::union_map::empty(scop
->getIslCtx());
3160 for (MemoryAccess
*MA
: AliasGroup
)
3161 Accesses
= Accesses
.unite(MA
->getAccessRelation());
3163 Accesses
= Accesses
.intersect_domain(Domains
);
3164 isl::union_set Locations
= Accesses
.range();
3166 bool LimitReached
= false;
3167 for (isl::set Set
: Locations
.get_set_list()) {
3168 LimitReached
|= !buildMinMaxAccess(Set
, MinMaxAccesses
, *scop
);
3173 return !LimitReached
;
3176 static isl::set
getAccessDomain(MemoryAccess
*MA
) {
3177 isl::set Domain
= MA
->getStatement()->getDomain();
3178 Domain
= Domain
.project_out(isl::dim::set
, 0,
3179 unsignedFromIslSize(Domain
.tuple_dim()));
3180 return Domain
.reset_tuple_id();
3183 bool ScopBuilder::buildAliasChecks() {
3184 if (!PollyUseRuntimeAliasChecks
)
3187 if (buildAliasGroups()) {
3188 // Aliasing assumptions do not go through addAssumption but we still want to
3189 // collect statistics so we do it here explicitly.
3190 if (scop
->getAliasGroups().size())
3191 Scop::incrementNumberOfAliasingAssumptions(1);
3195 // If a problem occurs while building the alias groups we need to delete
3196 // this SCoP and pretend it wasn't valid in the first place. To this end
3197 // we make the assumed context infeasible.
3198 scop
->invalidate(ALIASING
, DebugLoc());
3200 LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop
->getNameStr()
3201 << " could not be created. This SCoP has been dismissed.");
3205 std::tuple
<ScopBuilder::AliasGroupVectorTy
, DenseSet
<const ScopArrayInfo
*>>
3206 ScopBuilder::buildAliasGroupsForAccesses() {
3207 BatchAAResults
BAA(AA
);
3208 AliasSetTracker
AST(BAA
);
3210 DenseMap
<Value
*, MemoryAccess
*> PtrToAcc
;
3211 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3212 for (ScopStmt
&Stmt
: *scop
) {
3214 isl::set StmtDomain
= Stmt
.getDomain();
3215 bool StmtDomainEmpty
= StmtDomain
.is_empty();
3217 // Statements with an empty domain will never be executed.
3218 if (StmtDomainEmpty
)
3221 for (MemoryAccess
*MA
: Stmt
) {
3222 if (MA
->isScalarKind())
3225 HasWriteAccess
.insert(MA
->getScopArrayInfo());
3226 MemAccInst
Acc(MA
->getAccessInstruction());
3227 if (MA
->isRead() && isa
<MemTransferInst
>(Acc
))
3228 PtrToAcc
[cast
<MemTransferInst
>(Acc
)->getRawSource()] = MA
;
3230 PtrToAcc
[Acc
.getPointerOperand()] = MA
;
3235 AliasGroupVectorTy AliasGroups
;
3236 for (AliasSet
&AS
: AST
) {
3237 if (AS
.isMustAlias() || AS
.isForwardingAliasSet())
3241 AG
.push_back(PtrToAcc
[PR
.getValue()]);
3244 AliasGroups
.push_back(std::move(AG
));
3247 return std::make_tuple(AliasGroups
, HasWriteAccess
);
3250 bool ScopBuilder::buildAliasGroups() {
3251 // To create sound alias checks we perform the following steps:
3252 // o) We partition each group into read only and non read only accesses.
3253 // o) For each group with more than one base pointer we then compute minimal
3254 // and maximal accesses to each array of a group in read only and non
3255 // read only partitions separately.
3256 AliasGroupVectorTy AliasGroups
;
3257 DenseSet
<const ScopArrayInfo
*> HasWriteAccess
;
3259 std::tie(AliasGroups
, HasWriteAccess
) = buildAliasGroupsForAccesses();
3261 splitAliasGroupsByDomain(AliasGroups
);
3263 for (AliasGroupTy
&AG
: AliasGroups
) {
3264 if (!scop
->hasFeasibleRuntimeContext())
3268 IslMaxOperationsGuard
MaxOpGuard(scop
->getIslCtx().get(), OptComputeOut
);
3269 bool Valid
= buildAliasGroup(AG
, HasWriteAccess
);
3273 if (isl_ctx_last_error(scop
->getIslCtx().get()) == isl_error_quota
) {
3274 scop
->invalidate(COMPLEXITY
, DebugLoc());
3282 bool ScopBuilder::buildAliasGroup(
3283 AliasGroupTy
&AliasGroup
, DenseSet
<const ScopArrayInfo
*> HasWriteAccess
) {
3284 AliasGroupTy ReadOnlyAccesses
;
3285 AliasGroupTy ReadWriteAccesses
;
3286 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadWriteArrays
;
3287 SmallPtrSet
<const ScopArrayInfo
*, 4> ReadOnlyArrays
;
3289 if (AliasGroup
.size() < 2)
3292 for (MemoryAccess
*Access
: AliasGroup
) {
3293 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "PossibleAlias",
3294 Access
->getAccessInstruction())
3295 << "Possibly aliasing pointer, use restrict keyword.");
3296 const ScopArrayInfo
*Array
= Access
->getScopArrayInfo();
3297 if (HasWriteAccess
.count(Array
)) {
3298 ReadWriteArrays
.insert(Array
);
3299 ReadWriteAccesses
.push_back(Access
);
3301 ReadOnlyArrays
.insert(Array
);
3302 ReadOnlyAccesses
.push_back(Access
);
3306 // If there are no read-only pointers, and less than two read-write pointers,
3307 // no alias check is needed.
3308 if (ReadOnlyAccesses
.empty() && ReadWriteArrays
.size() <= 1)
3311 // If there is no read-write pointer, no alias check is needed.
3312 if (ReadWriteArrays
.empty())
3315 // For non-affine accesses, no alias check can be generated as we cannot
3316 // compute a sufficiently tight lower and upper bound: bail out.
3317 for (MemoryAccess
*MA
: AliasGroup
) {
3318 if (!MA
->isAffine()) {
3319 scop
->invalidate(ALIASING
, MA
->getAccessInstruction()->getDebugLoc(),
3320 MA
->getAccessInstruction()->getParent());
3325 // Ensure that for all memory accesses for which we generate alias checks,
3326 // their base pointers are available.
3327 for (MemoryAccess
*MA
: AliasGroup
) {
3328 if (MemoryAccess
*BasePtrMA
= scop
->lookupBasePtrAccess(MA
))
3329 scop
->addRequiredInvariantLoad(
3330 cast
<LoadInst
>(BasePtrMA
->getAccessInstruction()));
3333 // scop->getAliasGroups().emplace_back();
3334 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3335 Scop::MinMaxVectorTy MinMaxAccessesReadWrite
;
3336 Scop::MinMaxVectorTy MinMaxAccessesReadOnly
;
3340 Valid
= calculateMinMaxAccess(ReadWriteAccesses
, MinMaxAccessesReadWrite
);
3345 // Bail out if the number of values we need to compare is too large.
3346 // This is important as the number of comparisons grows quadratically with
3347 // the number of values we need to compare.
3348 if (MinMaxAccessesReadWrite
.size() + ReadOnlyArrays
.size() >
3349 RunTimeChecksMaxArraysPerGroup
)
3352 Valid
= calculateMinMaxAccess(ReadOnlyAccesses
, MinMaxAccessesReadOnly
);
3354 scop
->addAliasGroup(MinMaxAccessesReadWrite
, MinMaxAccessesReadOnly
);
3361 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy
&AliasGroups
) {
3362 for (unsigned u
= 0; u
< AliasGroups
.size(); u
++) {
3364 AliasGroupTy
&AG
= AliasGroups
[u
];
3365 AliasGroupTy::iterator AGI
= AG
.begin();
3366 isl::set AGDomain
= getAccessDomain(*AGI
);
3367 while (AGI
!= AG
.end()) {
3368 MemoryAccess
*MA
= *AGI
;
3369 isl::set MADomain
= getAccessDomain(MA
);
3370 if (AGDomain
.is_disjoint(MADomain
)) {
3371 NewAG
.push_back(MA
);
3372 AGI
= AG
.erase(AGI
);
3374 AGDomain
= AGDomain
.unite(MADomain
);
3378 if (NewAG
.size() > 1)
3379 AliasGroups
.push_back(std::move(NewAG
));
3384 static void verifyUse(Scop
*S
, Use
&Op
, LoopInfo
&LI
) {
3385 auto PhysUse
= VirtualUse::create(S
, Op
, &LI
, false);
3386 auto VirtUse
= VirtualUse::create(S
, Op
, &LI
, true);
3387 assert(PhysUse
.getKind() == VirtUse
.getKind());
3390 /// Check the consistency of every statement's MemoryAccesses.
3392 /// The check is carried out by expecting the "physical" kind of use (derived
3393 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3394 /// kind of use (derived from a statement's MemoryAccess).
3396 /// The "physical" uses are taken by ensureValueRead to determine whether to
3397 /// create MemoryAccesses. When done, the kind of scalar access should be the
3398 /// same no matter which way it was derived.
3400 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3401 /// can intentionally influence on the kind of uses (not corresponding to the
3402 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3403 /// to pick up the virtual uses. But here in the code generator, this has not
3404 /// happened yet, such that virtual and physical uses are equivalent.
3405 static void verifyUses(Scop
*S
, LoopInfo
&LI
, DominatorTree
&DT
) {
3406 for (auto *BB
: S
->getRegion().blocks()) {
3407 for (auto &Inst
: *BB
) {
3408 auto *Stmt
= S
->getStmtFor(&Inst
);
3412 if (isIgnoredIntrinsic(&Inst
))
3415 // Branch conditions are encoded in the statement domains.
3416 if (Inst
.isTerminator() && Stmt
->isBlockStmt())
3420 for (auto &Op
: Inst
.operands())
3421 verifyUse(S
, Op
, LI
);
3423 // Stores do not produce values used by other statements.
3424 if (isa
<StoreInst
>(Inst
))
3427 // For every value defined in the block, also check that a use of that
3428 // value in the same statement would not be an inter-statement use. It can
3429 // still be synthesizable or load-hoisted, but these kind of instructions
3430 // are not directly copied in code-generation.
3432 VirtualUse::create(S
, Stmt
, Stmt
->getSurroundingLoop(), &Inst
, true);
3433 assert(VirtDef
.getKind() == VirtualUse::Synthesizable
||
3434 VirtDef
.getKind() == VirtualUse::Intra
||
3435 VirtDef
.getKind() == VirtualUse::Hoisted
);
3439 if (S
->hasSingleExitEdge())
3442 // PHINodes in the SCoP region's exit block are also uses to be checked.
3443 if (!S
->getRegion().isTopLevelRegion()) {
3444 for (auto &Inst
: *S
->getRegion().getExit()) {
3445 if (!isa
<PHINode
>(Inst
))
3448 for (auto &Op
: Inst
.operands())
3449 verifyUse(S
, Op
, LI
);
3455 void ScopBuilder::buildScop(Region
&R
, AssumptionCache
&AC
) {
3456 scop
.reset(new Scop(R
, SE
, LI
, DT
, *SD
.getDetectionContext(&R
), ORE
,
3461 // Create all invariant load instructions first. These are categorized as
3462 // 'synthesizable', therefore are not part of any ScopStmt but need to be
3463 // created somewhere.
3464 const InvariantLoadsSetTy
&RIL
= scop
->getRequiredInvariantLoads();
3465 for (BasicBlock
*BB
: scop
->getRegion().blocks()) {
3466 if (SD
.isErrorBlock(*BB
, scop
->getRegion()))
3469 for (Instruction
&Inst
: *BB
) {
3470 LoadInst
*Load
= dyn_cast
<LoadInst
>(&Inst
);
3474 if (!RIL
.count(Load
))
3477 // Invariant loads require a MemoryAccess to be created in some statement.
3478 // It is not important to which statement the MemoryAccess is added
3479 // because it will later be removed from the ScopStmt again. We chose the
3480 // first statement of the basic block the LoadInst is in.
3481 ArrayRef
<ScopStmt
*> List
= scop
->getStmtListFor(BB
);
3482 assert(!List
.empty());
3483 ScopStmt
*RILStmt
= List
.front();
3484 buildMemoryAccess(Load
, RILStmt
);
3487 buildAccessFunctions();
3489 // In case the region does not have an exiting block we will later (during
3490 // code generation) split the exit block. This will move potential PHI nodes
3491 // from the current exit block into the new region exiting block. Hence, PHI
3492 // nodes that are at this point not part of the region will be.
3493 // To handle these PHI nodes later we will now model their operands as scalar
3494 // accesses. Note that we do not model anything in the exit block if we have
3495 // an exiting block in the region, as there will not be any splitting later.
3496 if (!R
.isTopLevelRegion() && !scop
->hasSingleExitEdge()) {
3497 for (Instruction
&Inst
: *R
.getExit()) {
3498 PHINode
*PHI
= dyn_cast
<PHINode
>(&Inst
);
3502 buildPHIAccesses(nullptr, PHI
, nullptr, true);
3506 // Create memory accesses for global reads since all arrays are now known.
3507 auto *AF
= SE
.getConstant(IntegerType::getInt64Ty(SE
.getContext()), 0);
3508 for (auto GlobalReadPair
: GlobalReads
) {
3509 ScopStmt
*GlobalReadStmt
= GlobalReadPair
.first
;
3510 Instruction
*GlobalRead
= GlobalReadPair
.second
;
3511 for (auto *BP
: ArrayBasePointers
)
3512 addArrayAccess(GlobalReadStmt
, MemAccInst(GlobalRead
), MemoryAccess::READ
,
3513 BP
, BP
->getType(), false, {AF
}, {nullptr}, GlobalRead
);
3516 buildInvariantEquivalenceClasses();
3518 /// A map from basic blocks to their invalid domains.
3519 DenseMap
<BasicBlock
*, isl::set
> InvalidDomainMap
;
3521 if (!buildDomains(&R
, InvalidDomainMap
)) {
3523 dbgs() << "Bailing-out because buildDomains encountered problems\n");
3527 addUserAssumptions(AC
, InvalidDomainMap
);
3529 // Initialize the invalid domain.
3530 for (ScopStmt
&Stmt
: scop
->Stmts
)
3531 if (Stmt
.isBlockStmt())
3532 Stmt
.setInvalidDomain(InvalidDomainMap
[Stmt
.getEntryBlock()]);
3534 Stmt
.setInvalidDomain(InvalidDomainMap
[getRegionNodeBasicBlock(
3535 Stmt
.getRegion()->getNode())]);
3537 // Remove empty statements.
3538 // Exit early in case there are no executable statements left in this scop.
3539 scop
->removeStmtNotInDomainMap();
3540 scop
->simplifySCoP(false);
3541 if (scop
->isEmpty()) {
3542 LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3546 // The ScopStmts now have enough information to initialize themselves.
3547 for (ScopStmt
&Stmt
: *scop
) {
3548 collectSurroundingLoops(Stmt
);
3551 buildAccessRelations(Stmt
);
3553 if (DetectReductions
)
3554 checkForReductions(Stmt
);
3557 // Check early for a feasible runtime context.
3558 if (!scop
->hasFeasibleRuntimeContext()) {
3559 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3563 // Check early for profitability. Afterwards it cannot change anymore,
3564 // only the runtime context could become infeasible.
3565 if (!scop
->isProfitable(UnprofitableScalarAccs
)) {
3566 scop
->invalidate(PROFITABLE
, DebugLoc());
3568 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3576 scop
->realignParams();
3579 // After the context was fully constructed, thus all our knowledge about
3580 // the parameters is in there, we add all recorded assumptions to the
3581 // assumed/invalid context.
3582 addRecordedAssumptions();
3584 scop
->simplifyContexts();
3585 if (!buildAliasChecks()) {
3586 LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3590 hoistInvariantLoads();
3591 canonicalizeDynamicBasePtrs();
3592 verifyInvariantLoads();
3593 scop
->simplifySCoP(true);
3595 // Check late for a feasible runtime context because profitability did not
3597 if (!scop
->hasFeasibleRuntimeContext()) {
3598 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3603 verifyUses(scop
.get(), LI
, DT
);
3607 ScopBuilder::ScopBuilder(Region
*R
, AssumptionCache
&AC
, AAResults
&AA
,
3608 const DataLayout
&DL
, DominatorTree
&DT
, LoopInfo
&LI
,
3609 ScopDetection
&SD
, ScalarEvolution
&SE
,
3610 OptimizationRemarkEmitter
&ORE
)
3611 : AA(AA
), DL(DL
), DT(DT
), LI(LI
), SD(SD
), SE(SE
), ORE(ORE
) {
3613 auto P
= getBBPairForRegion(R
);
3614 getDebugLocations(P
, Beg
, End
);
3616 std::string Msg
= "SCoP begins here.";
3617 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEntry", Beg
, P
.first
)
3622 LLVM_DEBUG(dbgs() << *scop
);
3624 if (!scop
->hasFeasibleRuntimeContext()) {
3626 Msg
= "SCoP ends here but was dismissed.";
3627 LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3628 RecordedAssumptions
.clear();
3631 Msg
= "SCoP ends here.";
3633 if (scop
->getMaxLoopDepth() > 0)
3637 if (R
->isTopLevelRegion())
3638 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.first
)
3641 ORE
.emit(OptimizationRemarkAnalysis(DEBUG_TYPE
, "ScopEnd", End
, P
.second
)