1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory
10 // accesses. Currently, it is an (incomplete) implementation of the approach
13 // Practical Dependence Testing
14 // Goff, Kennedy, Tseng
17 // There's a single entry point that analyzes the dependence between a pair
18 // of memory references in a function, returning either NULL, for no dependence,
19 // or a more-or-less detailed description of the dependence between them.
21 // Currently, the implementation cannot propagate constraints between
22 // coupled RDIV subscripts and lacks a multi-subscript MIV test.
23 // Both of these are conservative weaknesses;
24 // that is, not a source of correctness problems.
26 // Since Clang linearizes some array subscripts, the dependence
27 // analysis is using SCEV->delinearize to recover the representation of multiple
28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The
29 // delinearization is controlled by the flag -da-delinearize.
31 // We should pay some careful attention to the possibility of integer overflow
32 // in the implementation of the various tests. This could happen with Add,
33 // Subtract, or Multiply, with both APInt's and SCEV's.
35 // Some non-linear subscript pairs can be handled by the GCD test
36 // (and perhaps other tests).
37 // Should explore how often these things occur.
39 // Finally, it seems like certain test cases expose weaknesses in the SCEV
40 // simplification, especially in the handling of sign and zero extensions.
41 // It could be useful to spend time exploring these.
43 // Please note that this is work in progress and the interface is subject to
46 //===----------------------------------------------------------------------===//
48 // In memory of Ken Kennedy, 1945 - 2007 //
50 //===----------------------------------------------------------------------===//
52 #include "llvm/Analysis/DependenceAnalysis.h"
53 #include "llvm/ADT/STLExtras.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/AliasAnalysis.h"
56 #include "llvm/Analysis/LoopInfo.h"
57 #include "llvm/Analysis/ScalarEvolution.h"
58 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
59 #include "llvm/Analysis/ValueTracking.h"
60 #include "llvm/Config/llvm-config.h"
61 #include "llvm/IR/InstIterator.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/Support/CommandLine.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/raw_ostream.h"
72 #define DEBUG_TYPE "da"
74 //===----------------------------------------------------------------------===//
77 STATISTIC(TotalArrayPairs
, "Array pairs tested");
78 STATISTIC(SeparableSubscriptPairs
, "Separable subscript pairs");
79 STATISTIC(CoupledSubscriptPairs
, "Coupled subscript pairs");
80 STATISTIC(NonlinearSubscriptPairs
, "Nonlinear subscript pairs");
81 STATISTIC(ZIVapplications
, "ZIV applications");
82 STATISTIC(ZIVindependence
, "ZIV independence");
83 STATISTIC(StrongSIVapplications
, "Strong SIV applications");
84 STATISTIC(StrongSIVsuccesses
, "Strong SIV successes");
85 STATISTIC(StrongSIVindependence
, "Strong SIV independence");
86 STATISTIC(WeakCrossingSIVapplications
, "Weak-Crossing SIV applications");
87 STATISTIC(WeakCrossingSIVsuccesses
, "Weak-Crossing SIV successes");
88 STATISTIC(WeakCrossingSIVindependence
, "Weak-Crossing SIV independence");
89 STATISTIC(ExactSIVapplications
, "Exact SIV applications");
90 STATISTIC(ExactSIVsuccesses
, "Exact SIV successes");
91 STATISTIC(ExactSIVindependence
, "Exact SIV independence");
92 STATISTIC(WeakZeroSIVapplications
, "Weak-Zero SIV applications");
93 STATISTIC(WeakZeroSIVsuccesses
, "Weak-Zero SIV successes");
94 STATISTIC(WeakZeroSIVindependence
, "Weak-Zero SIV independence");
95 STATISTIC(ExactRDIVapplications
, "Exact RDIV applications");
96 STATISTIC(ExactRDIVindependence
, "Exact RDIV independence");
97 STATISTIC(SymbolicRDIVapplications
, "Symbolic RDIV applications");
98 STATISTIC(SymbolicRDIVindependence
, "Symbolic RDIV independence");
99 STATISTIC(DeltaApplications
, "Delta applications");
100 STATISTIC(DeltaSuccesses
, "Delta successes");
101 STATISTIC(DeltaIndependence
, "Delta independence");
102 STATISTIC(DeltaPropagations
, "Delta propagations");
103 STATISTIC(GCDapplications
, "GCD applications");
104 STATISTIC(GCDsuccesses
, "GCD successes");
105 STATISTIC(GCDindependence
, "GCD independence");
106 STATISTIC(BanerjeeApplications
, "Banerjee applications");
107 STATISTIC(BanerjeeIndependence
, "Banerjee independence");
108 STATISTIC(BanerjeeSuccesses
, "Banerjee successes");
111 Delinearize("da-delinearize", cl::init(true), cl::Hidden
, cl::ZeroOrMore
,
112 cl::desc("Try to delinearize array references."));
113 static cl::opt
<bool> DisableDelinearizationChecks(
114 "da-disable-delinearization-checks", cl::init(false), cl::Hidden
,
117 "Disable checks that try to statically verify validity of "
118 "delinearized subscripts. Enabling this option may result in incorrect "
119 "dependence vectors for languages that allow the subscript of one "
120 "dimension to underflow or overflow into another dimension."));
122 static cl::opt
<unsigned> MIVMaxLevelThreshold(
123 "da-miv-max-level-threshold", cl::init(7), cl::Hidden
, cl::ZeroOrMore
,
124 cl::desc("Maximum depth allowed for the recursive algorithm used to "
125 "explore MIV direction vectors."));
127 //===----------------------------------------------------------------------===//
130 DependenceAnalysis::Result
131 DependenceAnalysis::run(Function
&F
, FunctionAnalysisManager
&FAM
) {
132 auto &AA
= FAM
.getResult
<AAManager
>(F
);
133 auto &SE
= FAM
.getResult
<ScalarEvolutionAnalysis
>(F
);
134 auto &LI
= FAM
.getResult
<LoopAnalysis
>(F
);
135 return DependenceInfo(&F
, &AA
, &SE
, &LI
);
138 AnalysisKey
DependenceAnalysis::Key
;
140 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass
, "da",
141 "Dependence Analysis", true, true)
142 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
143 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
)
144 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
145 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass
, "da", "Dependence Analysis",
148 char DependenceAnalysisWrapperPass::ID
= 0;
150 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
152 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
155 FunctionPass
*llvm::createDependenceAnalysisWrapperPass() {
156 return new DependenceAnalysisWrapperPass();
159 bool DependenceAnalysisWrapperPass::runOnFunction(Function
&F
) {
160 auto &AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
161 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
162 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
163 info
.reset(new DependenceInfo(&F
, &AA
, &SE
, &LI
));
167 DependenceInfo
&DependenceAnalysisWrapperPass::getDI() const { return *info
; }
169 void DependenceAnalysisWrapperPass::releaseMemory() { info
.reset(); }
171 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
172 AU
.setPreservesAll();
173 AU
.addRequiredTransitive
<AAResultsWrapperPass
>();
174 AU
.addRequiredTransitive
<ScalarEvolutionWrapperPass
>();
175 AU
.addRequiredTransitive
<LoopInfoWrapperPass
>();
178 // Used to test the dependence analyzer.
179 // Looks through the function, noting instructions that may access memory.
180 // Calls depends() on every possible pair and prints out the result.
181 // Ignores all other instructions.
182 static void dumpExampleDependence(raw_ostream
&OS
, DependenceInfo
*DA
) {
183 auto *F
= DA
->getFunction();
184 for (inst_iterator SrcI
= inst_begin(F
), SrcE
= inst_end(F
); SrcI
!= SrcE
;
186 if (SrcI
->mayReadOrWriteMemory()) {
187 for (inst_iterator DstI
= SrcI
, DstE
= inst_end(F
);
188 DstI
!= DstE
; ++DstI
) {
189 if (DstI
->mayReadOrWriteMemory()) {
190 OS
<< "Src:" << *SrcI
<< " --> Dst:" << *DstI
<< "\n";
191 OS
<< " da analyze - ";
192 if (auto D
= DA
->depends(&*SrcI
, &*DstI
, true)) {
194 for (unsigned Level
= 1; Level
<= D
->getLevels(); Level
++) {
195 if (D
->isSplitable(Level
)) {
196 OS
<< " da analyze - split level = " << Level
;
197 OS
<< ", iteration = " << *DA
->getSplitIteration(*D
, Level
);
210 void DependenceAnalysisWrapperPass::print(raw_ostream
&OS
,
211 const Module
*) const {
212 dumpExampleDependence(OS
, info
.get());
216 DependenceAnalysisPrinterPass::run(Function
&F
, FunctionAnalysisManager
&FAM
) {
217 OS
<< "'Dependence Analysis' for function '" << F
.getName() << "':\n";
218 dumpExampleDependence(OS
, &FAM
.getResult
<DependenceAnalysis
>(F
));
219 return PreservedAnalyses::all();
222 //===----------------------------------------------------------------------===//
223 // Dependence methods
225 // Returns true if this is an input dependence.
226 bool Dependence::isInput() const {
227 return Src
->mayReadFromMemory() && Dst
->mayReadFromMemory();
231 // Returns true if this is an output dependence.
232 bool Dependence::isOutput() const {
233 return Src
->mayWriteToMemory() && Dst
->mayWriteToMemory();
237 // Returns true if this is an flow (aka true) dependence.
238 bool Dependence::isFlow() const {
239 return Src
->mayWriteToMemory() && Dst
->mayReadFromMemory();
243 // Returns true if this is an anti dependence.
244 bool Dependence::isAnti() const {
245 return Src
->mayReadFromMemory() && Dst
->mayWriteToMemory();
249 // Returns true if a particular level is scalar; that is,
250 // if no subscript in the source or destination mention the induction
251 // variable associated with the loop at this level.
252 // Leave this out of line, so it will serve as a virtual method anchor
253 bool Dependence::isScalar(unsigned level
) const {
258 //===----------------------------------------------------------------------===//
259 // FullDependence methods
261 FullDependence::FullDependence(Instruction
*Source
, Instruction
*Destination
,
262 bool PossiblyLoopIndependent
,
263 unsigned CommonLevels
)
264 : Dependence(Source
, Destination
), Levels(CommonLevels
),
265 LoopIndependent(PossiblyLoopIndependent
) {
268 DV
= std::make_unique
<DVEntry
[]>(CommonLevels
);
271 // The rest are simple getters that hide the implementation.
273 // getDirection - Returns the direction associated with a particular level.
274 unsigned FullDependence::getDirection(unsigned Level
) const {
275 assert(0 < Level
&& Level
<= Levels
&& "Level out of range");
276 return DV
[Level
- 1].Direction
;
280 // Returns the distance (or NULL) associated with a particular level.
281 const SCEV
*FullDependence::getDistance(unsigned Level
) const {
282 assert(0 < Level
&& Level
<= Levels
&& "Level out of range");
283 return DV
[Level
- 1].Distance
;
287 // Returns true if a particular level is scalar; that is,
288 // if no subscript in the source or destination mention the induction
289 // variable associated with the loop at this level.
290 bool FullDependence::isScalar(unsigned Level
) const {
291 assert(0 < Level
&& Level
<= Levels
&& "Level out of range");
292 return DV
[Level
- 1].Scalar
;
296 // Returns true if peeling the first iteration from this loop
297 // will break this dependence.
298 bool FullDependence::isPeelFirst(unsigned Level
) const {
299 assert(0 < Level
&& Level
<= Levels
&& "Level out of range");
300 return DV
[Level
- 1].PeelFirst
;
304 // Returns true if peeling the last iteration from this loop
305 // will break this dependence.
306 bool FullDependence::isPeelLast(unsigned Level
) const {
307 assert(0 < Level
&& Level
<= Levels
&& "Level out of range");
308 return DV
[Level
- 1].PeelLast
;
312 // Returns true if splitting this loop will break the dependence.
313 bool FullDependence::isSplitable(unsigned Level
) const {
314 assert(0 < Level
&& Level
<= Levels
&& "Level out of range");
315 return DV
[Level
- 1].Splitable
;
319 //===----------------------------------------------------------------------===//
320 // DependenceInfo::Constraint methods
322 // If constraint is a point <X, Y>, returns X.
324 const SCEV
*DependenceInfo::Constraint::getX() const {
325 assert(Kind
== Point
&& "Kind should be Point");
330 // If constraint is a point <X, Y>, returns Y.
332 const SCEV
*DependenceInfo::Constraint::getY() const {
333 assert(Kind
== Point
&& "Kind should be Point");
338 // If constraint is a line AX + BY = C, returns A.
340 const SCEV
*DependenceInfo::Constraint::getA() const {
341 assert((Kind
== Line
|| Kind
== Distance
) &&
342 "Kind should be Line (or Distance)");
347 // If constraint is a line AX + BY = C, returns B.
349 const SCEV
*DependenceInfo::Constraint::getB() const {
350 assert((Kind
== Line
|| Kind
== Distance
) &&
351 "Kind should be Line (or Distance)");
356 // If constraint is a line AX + BY = C, returns C.
358 const SCEV
*DependenceInfo::Constraint::getC() const {
359 assert((Kind
== Line
|| Kind
== Distance
) &&
360 "Kind should be Line (or Distance)");
365 // If constraint is a distance, returns D.
367 const SCEV
*DependenceInfo::Constraint::getD() const {
368 assert(Kind
== Distance
&& "Kind should be Distance");
369 return SE
->getNegativeSCEV(C
);
373 // Returns the loop associated with this constraint.
374 const Loop
*DependenceInfo::Constraint::getAssociatedLoop() const {
375 assert((Kind
== Distance
|| Kind
== Line
|| Kind
== Point
) &&
376 "Kind should be Distance, Line, or Point");
377 return AssociatedLoop
;
380 void DependenceInfo::Constraint::setPoint(const SCEV
*X
, const SCEV
*Y
,
381 const Loop
*CurLoop
) {
385 AssociatedLoop
= CurLoop
;
388 void DependenceInfo::Constraint::setLine(const SCEV
*AA
, const SCEV
*BB
,
389 const SCEV
*CC
, const Loop
*CurLoop
) {
394 AssociatedLoop
= CurLoop
;
397 void DependenceInfo::Constraint::setDistance(const SCEV
*D
,
398 const Loop
*CurLoop
) {
400 A
= SE
->getOne(D
->getType());
401 B
= SE
->getNegativeSCEV(A
);
402 C
= SE
->getNegativeSCEV(D
);
403 AssociatedLoop
= CurLoop
;
406 void DependenceInfo::Constraint::setEmpty() { Kind
= Empty
; }
408 void DependenceInfo::Constraint::setAny(ScalarEvolution
*NewSE
) {
413 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
414 // For debugging purposes. Dumps the constraint out to OS.
415 LLVM_DUMP_METHOD
void DependenceInfo::Constraint::dump(raw_ostream
&OS
) const {
421 OS
<< " Point is <" << *getX() << ", " << *getY() << ">\n";
422 else if (isDistance())
423 OS
<< " Distance is " << *getD() <<
424 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
426 OS
<< " Line is " << *getA() << "*X + " <<
427 *getB() << "*Y = " << *getC() << "\n";
429 llvm_unreachable("unknown constraint type in Constraint::dump");
434 // Updates X with the intersection
435 // of the Constraints X and Y. Returns true if X has changed.
436 // Corresponds to Figure 4 from the paper
438 // Practical Dependence Testing
439 // Goff, Kennedy, Tseng
441 bool DependenceInfo::intersectConstraints(Constraint
*X
, const Constraint
*Y
) {
443 LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
444 LLVM_DEBUG(dbgs() << "\t X ="; X
->dump(dbgs()));
445 LLVM_DEBUG(dbgs() << "\t Y ="; Y
->dump(dbgs()));
446 assert(!Y
->isPoint() && "Y must not be a Point");
460 if (X
->isDistance() && Y
->isDistance()) {
461 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");
462 if (isKnownPredicate(CmpInst::ICMP_EQ
, X
->getD(), Y
->getD()))
464 if (isKnownPredicate(CmpInst::ICMP_NE
, X
->getD(), Y
->getD())) {
469 // Hmmm, interesting situation.
470 // I guess if either is constant, keep it and ignore the other.
471 if (isa
<SCEVConstant
>(Y
->getD())) {
478 // At this point, the pseudo-code in Figure 4 of the paper
479 // checks if (X->isPoint() && Y->isPoint()).
480 // This case can't occur in our implementation,
481 // since a Point can only arise as the result of intersecting
482 // two Line constraints, and the right-hand value, Y, is never
483 // the result of an intersection.
484 assert(!(X
->isPoint() && Y
->isPoint()) &&
485 "We shouldn't ever see X->isPoint() && Y->isPoint()");
487 if (X
->isLine() && Y
->isLine()) {
488 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");
489 const SCEV
*Prod1
= SE
->getMulExpr(X
->getA(), Y
->getB());
490 const SCEV
*Prod2
= SE
->getMulExpr(X
->getB(), Y
->getA());
491 if (isKnownPredicate(CmpInst::ICMP_EQ
, Prod1
, Prod2
)) {
492 // slopes are equal, so lines are parallel
493 LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
494 Prod1
= SE
->getMulExpr(X
->getC(), Y
->getB());
495 Prod2
= SE
->getMulExpr(X
->getB(), Y
->getC());
496 if (isKnownPredicate(CmpInst::ICMP_EQ
, Prod1
, Prod2
))
498 if (isKnownPredicate(CmpInst::ICMP_NE
, Prod1
, Prod2
)) {
505 if (isKnownPredicate(CmpInst::ICMP_NE
, Prod1
, Prod2
)) {
506 // slopes differ, so lines intersect
507 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
508 const SCEV
*C1B2
= SE
->getMulExpr(X
->getC(), Y
->getB());
509 const SCEV
*C1A2
= SE
->getMulExpr(X
->getC(), Y
->getA());
510 const SCEV
*C2B1
= SE
->getMulExpr(Y
->getC(), X
->getB());
511 const SCEV
*C2A1
= SE
->getMulExpr(Y
->getC(), X
->getA());
512 const SCEV
*A1B2
= SE
->getMulExpr(X
->getA(), Y
->getB());
513 const SCEV
*A2B1
= SE
->getMulExpr(Y
->getA(), X
->getB());
514 const SCEVConstant
*C1A2_C2A1
=
515 dyn_cast
<SCEVConstant
>(SE
->getMinusSCEV(C1A2
, C2A1
));
516 const SCEVConstant
*C1B2_C2B1
=
517 dyn_cast
<SCEVConstant
>(SE
->getMinusSCEV(C1B2
, C2B1
));
518 const SCEVConstant
*A1B2_A2B1
=
519 dyn_cast
<SCEVConstant
>(SE
->getMinusSCEV(A1B2
, A2B1
));
520 const SCEVConstant
*A2B1_A1B2
=
521 dyn_cast
<SCEVConstant
>(SE
->getMinusSCEV(A2B1
, A1B2
));
522 if (!C1B2_C2B1
|| !C1A2_C2A1
||
523 !A1B2_A2B1
|| !A2B1_A1B2
)
525 APInt Xtop
= C1B2_C2B1
->getAPInt();
526 APInt Xbot
= A1B2_A2B1
->getAPInt();
527 APInt Ytop
= C1A2_C2A1
->getAPInt();
528 APInt Ybot
= A2B1_A1B2
->getAPInt();
529 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop
<< "\n");
530 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot
<< "\n");
531 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop
<< "\n");
532 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot
<< "\n");
533 APInt Xq
= Xtop
; // these need to be initialized, even
534 APInt Xr
= Xtop
; // though they're just going to be overwritten
535 APInt::sdivrem(Xtop
, Xbot
, Xq
, Xr
);
538 APInt::sdivrem(Ytop
, Ybot
, Yq
, Yr
);
539 if (Xr
!= 0 || Yr
!= 0) {
544 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq
<< ", Y = " << Yq
<< "\n");
545 if (Xq
.slt(0) || Yq
.slt(0)) {
550 if (const SCEVConstant
*CUB
=
551 collectConstantUpperBound(X
->getAssociatedLoop(), Prod1
->getType())) {
552 const APInt
&UpperBound
= CUB
->getAPInt();
553 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound
<< "\n");
554 if (Xq
.sgt(UpperBound
) || Yq
.sgt(UpperBound
)) {
560 X
->setPoint(SE
->getConstant(Xq
),
562 X
->getAssociatedLoop());
569 // if (X->isLine() && Y->isPoint()) This case can't occur.
570 assert(!(X
->isLine() && Y
->isPoint()) && "This case should never occur");
572 if (X
->isPoint() && Y
->isLine()) {
573 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");
574 const SCEV
*A1X1
= SE
->getMulExpr(Y
->getA(), X
->getX());
575 const SCEV
*B1Y1
= SE
->getMulExpr(Y
->getB(), X
->getY());
576 const SCEV
*Sum
= SE
->getAddExpr(A1X1
, B1Y1
);
577 if (isKnownPredicate(CmpInst::ICMP_EQ
, Sum
, Y
->getC()))
579 if (isKnownPredicate(CmpInst::ICMP_NE
, Sum
, Y
->getC())) {
587 llvm_unreachable("shouldn't reach the end of Constraint intersection");
592 //===----------------------------------------------------------------------===//
593 // DependenceInfo methods
595 // For debugging purposes. Dumps a dependence to OS.
596 void Dependence::dump(raw_ostream
&OS
) const {
597 bool Splitable
= false;
611 unsigned Levels
= getLevels();
613 for (unsigned II
= 1; II
<= Levels
; ++II
) {
618 const SCEV
*Distance
= getDistance(II
);
621 else if (isScalar(II
))
624 unsigned Direction
= getDirection(II
);
625 if (Direction
== DVEntry::ALL
)
628 if (Direction
& DVEntry::LT
)
630 if (Direction
& DVEntry::EQ
)
632 if (Direction
& DVEntry::GT
)
641 if (isLoopIndependent())
650 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
651 // underlaying objects. If LocA and LocB are known to not alias (for any reason:
652 // tbaa, non-overlapping regions etc), then it is known there is no dependecy.
653 // Otherwise the underlying objects are checked to see if they point to
654 // different identifiable objects.
655 static AliasResult
underlyingObjectsAlias(AAResults
*AA
,
656 const DataLayout
&DL
,
657 const MemoryLocation
&LocA
,
658 const MemoryLocation
&LocB
) {
659 // Check the original locations (minus size) for noalias, which can happen for
660 // tbaa, incompatible underlying object locations, etc.
661 MemoryLocation LocAS
=
662 MemoryLocation::getBeforeOrAfter(LocA
.Ptr
, LocA
.AATags
);
663 MemoryLocation LocBS
=
664 MemoryLocation::getBeforeOrAfter(LocB
.Ptr
, LocB
.AATags
);
665 if (AA
->isNoAlias(LocAS
, LocBS
))
666 return AliasResult::NoAlias
;
668 // Check the underlying objects are the same
669 const Value
*AObj
= getUnderlyingObject(LocA
.Ptr
);
670 const Value
*BObj
= getUnderlyingObject(LocB
.Ptr
);
672 // If the underlying objects are the same, they must alias
674 return AliasResult::MustAlias
;
676 // We may have hit the recursion limit for underlying objects, or have
677 // underlying objects where we don't know they will alias.
678 if (!isIdentifiedObject(AObj
) || !isIdentifiedObject(BObj
))
679 return AliasResult::MayAlias
;
681 // Otherwise we know the objects are different and both identified objects so
683 return AliasResult::NoAlias
;
687 // Returns true if the load or store can be analyzed. Atomic and volatile
688 // operations have properties which this analysis does not understand.
690 bool isLoadOrStore(const Instruction
*I
) {
691 if (const LoadInst
*LI
= dyn_cast
<LoadInst
>(I
))
692 return LI
->isUnordered();
693 else if (const StoreInst
*SI
= dyn_cast
<StoreInst
>(I
))
694 return SI
->isUnordered();
699 // Examines the loop nesting of the Src and Dst
700 // instructions and establishes their shared loops. Sets the variables
701 // CommonLevels, SrcLevels, and MaxLevels.
702 // The source and destination instructions needn't be contained in the same
703 // loop. The routine establishNestingLevels finds the level of most deeply
704 // nested loop that contains them both, CommonLevels. An instruction that's
705 // not contained in a loop is at level = 0. MaxLevels is equal to the level
706 // of the source plus the level of the destination, minus CommonLevels.
707 // This lets us allocate vectors MaxLevels in length, with room for every
708 // distinct loop referenced in both the source and destination subscripts.
709 // The variable SrcLevels is the nesting depth of the source instruction.
710 // It's used to help calculate distinct loops referenced by the destination.
711 // Here's the map from loops to levels:
713 // 1 - outermost common loop
714 // ... - other common loops
715 // CommonLevels - innermost common loop
716 // ... - loops containing Src but not Dst
717 // SrcLevels - innermost loop containing Src but not Dst
718 // ... - loops containing Dst but not Src
719 // MaxLevels - innermost loops containing Dst but not Src
720 // Consider the follow code fragment:
737 // If we're looking at the possibility of a dependence between the store
738 // to A (the Src) and the load from A (the Dst), we'll note that they
739 // have 2 loops in common, so CommonLevels will equal 2 and the direction
740 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
741 // A map from loop names to loop numbers would look like
743 // b - 2 = CommonLevels
749 void DependenceInfo::establishNestingLevels(const Instruction
*Src
,
750 const Instruction
*Dst
) {
751 const BasicBlock
*SrcBlock
= Src
->getParent();
752 const BasicBlock
*DstBlock
= Dst
->getParent();
753 unsigned SrcLevel
= LI
->getLoopDepth(SrcBlock
);
754 unsigned DstLevel
= LI
->getLoopDepth(DstBlock
);
755 const Loop
*SrcLoop
= LI
->getLoopFor(SrcBlock
);
756 const Loop
*DstLoop
= LI
->getLoopFor(DstBlock
);
757 SrcLevels
= SrcLevel
;
758 MaxLevels
= SrcLevel
+ DstLevel
;
759 while (SrcLevel
> DstLevel
) {
760 SrcLoop
= SrcLoop
->getParentLoop();
763 while (DstLevel
> SrcLevel
) {
764 DstLoop
= DstLoop
->getParentLoop();
767 while (SrcLoop
!= DstLoop
) {
768 SrcLoop
= SrcLoop
->getParentLoop();
769 DstLoop
= DstLoop
->getParentLoop();
772 CommonLevels
= SrcLevel
;
773 MaxLevels
-= CommonLevels
;
777 // Given one of the loops containing the source, return
778 // its level index in our numbering scheme.
779 unsigned DependenceInfo::mapSrcLoop(const Loop
*SrcLoop
) const {
780 return SrcLoop
->getLoopDepth();
784 // Given one of the loops containing the destination,
785 // return its level index in our numbering scheme.
786 unsigned DependenceInfo::mapDstLoop(const Loop
*DstLoop
) const {
787 unsigned D
= DstLoop
->getLoopDepth();
788 if (D
> CommonLevels
)
789 return D
- CommonLevels
+ SrcLevels
;
795 // Returns true if Expression is loop invariant in LoopNest.
796 bool DependenceInfo::isLoopInvariant(const SCEV
*Expression
,
797 const Loop
*LoopNest
) const {
800 return SE
->isLoopInvariant(Expression
, LoopNest
) &&
801 isLoopInvariant(Expression
, LoopNest
->getParentLoop());
806 // Finds the set of loops from the LoopNest that
807 // have a level <= CommonLevels and are referred to by the SCEV Expression.
808 void DependenceInfo::collectCommonLoops(const SCEV
*Expression
,
809 const Loop
*LoopNest
,
810 SmallBitVector
&Loops
) const {
812 unsigned Level
= LoopNest
->getLoopDepth();
813 if (Level
<= CommonLevels
&& !SE
->isLoopInvariant(Expression
, LoopNest
))
815 LoopNest
= LoopNest
->getParentLoop();
819 void DependenceInfo::unifySubscriptType(ArrayRef
<Subscript
*> Pairs
) {
821 unsigned widestWidthSeen
= 0;
824 // Go through each pair and find the widest bit to which we need
825 // to extend all of them.
826 for (Subscript
*Pair
: Pairs
) {
827 const SCEV
*Src
= Pair
->Src
;
828 const SCEV
*Dst
= Pair
->Dst
;
829 IntegerType
*SrcTy
= dyn_cast
<IntegerType
>(Src
->getType());
830 IntegerType
*DstTy
= dyn_cast
<IntegerType
>(Dst
->getType());
831 if (SrcTy
== nullptr || DstTy
== nullptr) {
832 assert(SrcTy
== DstTy
&& "This function only unify integer types and "
833 "expect Src and Dst share the same type "
837 if (SrcTy
->getBitWidth() > widestWidthSeen
) {
838 widestWidthSeen
= SrcTy
->getBitWidth();
841 if (DstTy
->getBitWidth() > widestWidthSeen
) {
842 widestWidthSeen
= DstTy
->getBitWidth();
848 assert(widestWidthSeen
> 0);
850 // Now extend each pair to the widest seen.
851 for (Subscript
*Pair
: Pairs
) {
852 const SCEV
*Src
= Pair
->Src
;
853 const SCEV
*Dst
= Pair
->Dst
;
854 IntegerType
*SrcTy
= dyn_cast
<IntegerType
>(Src
->getType());
855 IntegerType
*DstTy
= dyn_cast
<IntegerType
>(Dst
->getType());
856 if (SrcTy
== nullptr || DstTy
== nullptr) {
857 assert(SrcTy
== DstTy
&& "This function only unify integer types and "
858 "expect Src and Dst share the same type "
862 if (SrcTy
->getBitWidth() < widestWidthSeen
)
863 // Sign-extend Src to widestType
864 Pair
->Src
= SE
->getSignExtendExpr(Src
, widestType
);
865 if (DstTy
->getBitWidth() < widestWidthSeen
) {
866 // Sign-extend Dst to widestType
867 Pair
->Dst
= SE
->getSignExtendExpr(Dst
, widestType
);
872 // removeMatchingExtensions - Examines a subscript pair.
873 // If the source and destination are identically sign (or zero)
874 // extended, it strips off the extension in an effect to simplify
875 // the actual analysis.
876 void DependenceInfo::removeMatchingExtensions(Subscript
*Pair
) {
877 const SCEV
*Src
= Pair
->Src
;
878 const SCEV
*Dst
= Pair
->Dst
;
879 if ((isa
<SCEVZeroExtendExpr
>(Src
) && isa
<SCEVZeroExtendExpr
>(Dst
)) ||
880 (isa
<SCEVSignExtendExpr
>(Src
) && isa
<SCEVSignExtendExpr
>(Dst
))) {
881 const SCEVIntegralCastExpr
*SrcCast
= cast
<SCEVIntegralCastExpr
>(Src
);
882 const SCEVIntegralCastExpr
*DstCast
= cast
<SCEVIntegralCastExpr
>(Dst
);
883 const SCEV
*SrcCastOp
= SrcCast
->getOperand();
884 const SCEV
*DstCastOp
= DstCast
->getOperand();
885 if (SrcCastOp
->getType() == DstCastOp
->getType()) {
886 Pair
->Src
= SrcCastOp
;
887 Pair
->Dst
= DstCastOp
;
892 // Examine the scev and return true iff it's linear.
893 // Collect any loops mentioned in the set of "Loops".
894 bool DependenceInfo::checkSubscript(const SCEV
*Expr
, const Loop
*LoopNest
,
895 SmallBitVector
&Loops
, bool IsSrc
) {
896 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Expr
);
898 return isLoopInvariant(Expr
, LoopNest
);
899 const SCEV
*Start
= AddRec
->getStart();
900 const SCEV
*Step
= AddRec
->getStepRecurrence(*SE
);
901 const SCEV
*UB
= SE
->getBackedgeTakenCount(AddRec
->getLoop());
902 if (!isa
<SCEVCouldNotCompute
>(UB
)) {
903 if (SE
->getTypeSizeInBits(Start
->getType()) <
904 SE
->getTypeSizeInBits(UB
->getType())) {
905 if (!AddRec
->getNoWrapFlags())
909 if (!isLoopInvariant(Step
, LoopNest
))
912 Loops
.set(mapSrcLoop(AddRec
->getLoop()));
914 Loops
.set(mapDstLoop(AddRec
->getLoop()));
915 return checkSubscript(Start
, LoopNest
, Loops
, IsSrc
);
918 // Examine the scev and return true iff it's linear.
919 // Collect any loops mentioned in the set of "Loops".
920 bool DependenceInfo::checkSrcSubscript(const SCEV
*Src
, const Loop
*LoopNest
,
921 SmallBitVector
&Loops
) {
922 return checkSubscript(Src
, LoopNest
, Loops
, true);
925 // Examine the scev and return true iff it's linear.
926 // Collect any loops mentioned in the set of "Loops".
927 bool DependenceInfo::checkDstSubscript(const SCEV
*Dst
, const Loop
*LoopNest
,
928 SmallBitVector
&Loops
) {
929 return checkSubscript(Dst
, LoopNest
, Loops
, false);
933 // Examines the subscript pair (the Src and Dst SCEVs)
934 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
935 // Collects the associated loops in a set.
936 DependenceInfo::Subscript::ClassificationKind
937 DependenceInfo::classifyPair(const SCEV
*Src
, const Loop
*SrcLoopNest
,
938 const SCEV
*Dst
, const Loop
*DstLoopNest
,
939 SmallBitVector
&Loops
) {
940 SmallBitVector
SrcLoops(MaxLevels
+ 1);
941 SmallBitVector
DstLoops(MaxLevels
+ 1);
942 if (!checkSrcSubscript(Src
, SrcLoopNest
, SrcLoops
))
943 return Subscript::NonLinear
;
944 if (!checkDstSubscript(Dst
, DstLoopNest
, DstLoops
))
945 return Subscript::NonLinear
;
948 unsigned N
= Loops
.count();
950 return Subscript::ZIV
;
952 return Subscript::SIV
;
953 if (N
== 2 && (SrcLoops
.count() == 0 ||
954 DstLoops
.count() == 0 ||
955 (SrcLoops
.count() == 1 && DstLoops
.count() == 1)))
956 return Subscript::RDIV
;
957 return Subscript::MIV
;
961 // A wrapper around SCEV::isKnownPredicate.
962 // Looks for cases where we're interested in comparing for equality.
963 // If both X and Y have been identically sign or zero extended,
964 // it strips off the (confusing) extensions before invoking
965 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
966 // will be similarly updated.
968 // If SCEV::isKnownPredicate can't prove the predicate,
969 // we try simple subtraction, which seems to help in some cases
970 // involving symbolics.
971 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred
, const SCEV
*X
,
972 const SCEV
*Y
) const {
973 if (Pred
== CmpInst::ICMP_EQ
||
974 Pred
== CmpInst::ICMP_NE
) {
975 if ((isa
<SCEVSignExtendExpr
>(X
) &&
976 isa
<SCEVSignExtendExpr
>(Y
)) ||
977 (isa
<SCEVZeroExtendExpr
>(X
) &&
978 isa
<SCEVZeroExtendExpr
>(Y
))) {
979 const SCEVIntegralCastExpr
*CX
= cast
<SCEVIntegralCastExpr
>(X
);
980 const SCEVIntegralCastExpr
*CY
= cast
<SCEVIntegralCastExpr
>(Y
);
981 const SCEV
*Xop
= CX
->getOperand();
982 const SCEV
*Yop
= CY
->getOperand();
983 if (Xop
->getType() == Yop
->getType()) {
989 if (SE
->isKnownPredicate(Pred
, X
, Y
))
991 // If SE->isKnownPredicate can't prove the condition,
992 // we try the brute-force approach of subtracting
993 // and testing the difference.
994 // By testing with SE->isKnownPredicate first, we avoid
995 // the possibility of overflow when the arguments are constants.
996 const SCEV
*Delta
= SE
->getMinusSCEV(X
, Y
);
998 case CmpInst::ICMP_EQ
:
999 return Delta
->isZero();
1000 case CmpInst::ICMP_NE
:
1001 return SE
->isKnownNonZero(Delta
);
1002 case CmpInst::ICMP_SGE
:
1003 return SE
->isKnownNonNegative(Delta
);
1004 case CmpInst::ICMP_SLE
:
1005 return SE
->isKnownNonPositive(Delta
);
1006 case CmpInst::ICMP_SGT
:
1007 return SE
->isKnownPositive(Delta
);
1008 case CmpInst::ICMP_SLT
:
1009 return SE
->isKnownNegative(Delta
);
1011 llvm_unreachable("unexpected predicate in isKnownPredicate");
1015 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
1016 /// with some extra checking if S is an AddRec and we can prove less-than using
1017 /// the loop bounds.
1018 bool DependenceInfo::isKnownLessThan(const SCEV
*S
, const SCEV
*Size
) const {
1019 // First unify to the same type
1020 auto *SType
= dyn_cast
<IntegerType
>(S
->getType());
1021 auto *SizeType
= dyn_cast
<IntegerType
>(Size
->getType());
1022 if (!SType
|| !SizeType
)
1025 (SType
->getBitWidth() >= SizeType
->getBitWidth()) ? SType
: SizeType
;
1026 S
= SE
->getTruncateOrZeroExtend(S
, MaxType
);
1027 Size
= SE
->getTruncateOrZeroExtend(Size
, MaxType
);
1029 // Special check for addrecs using BE taken count
1030 const SCEV
*Bound
= SE
->getMinusSCEV(S
, Size
);
1031 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Bound
)) {
1032 if (AddRec
->isAffine()) {
1033 const SCEV
*BECount
= SE
->getBackedgeTakenCount(AddRec
->getLoop());
1034 if (!isa
<SCEVCouldNotCompute
>(BECount
)) {
1035 const SCEV
*Limit
= AddRec
->evaluateAtIteration(BECount
, *SE
);
1036 if (SE
->isKnownNegative(Limit
))
1042 // Check using normal isKnownNegative
1043 const SCEV
*LimitedBound
=
1044 SE
->getMinusSCEV(S
, SE
->getSMaxExpr(Size
, SE
->getOne(Size
->getType())));
1045 return SE
->isKnownNegative(LimitedBound
);
1048 bool DependenceInfo::isKnownNonNegative(const SCEV
*S
, const Value
*Ptr
) const {
1049 bool Inbounds
= false;
1050 if (auto *SrcGEP
= dyn_cast
<GetElementPtrInst
>(Ptr
))
1051 Inbounds
= SrcGEP
->isInBounds();
1053 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
1054 if (AddRec
->isAffine()) {
1055 // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
1056 // If both parts are NonNegative, the end result will be NonNegative
1057 if (SE
->isKnownNonNegative(AddRec
->getStart()) &&
1058 SE
->isKnownNonNegative(AddRec
->getOperand(1)))
1064 return SE
->isKnownNonNegative(S
);
1067 // All subscripts are all the same type.
1068 // Loop bound may be smaller (e.g., a char).
1069 // Should zero extend loop bound, since it's always >= 0.
1070 // This routine collects upper bound and extends or truncates if needed.
1071 // Truncating is safe when subscripts are known not to wrap. Cases without
1072 // nowrap flags should have been rejected earlier.
1073 // Return null if no bound available.
1074 const SCEV
*DependenceInfo::collectUpperBound(const Loop
*L
, Type
*T
) const {
1075 if (SE
->hasLoopInvariantBackedgeTakenCount(L
)) {
1076 const SCEV
*UB
= SE
->getBackedgeTakenCount(L
);
1077 return SE
->getTruncateOrZeroExtend(UB
, T
);
1083 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1084 // If the cast fails, returns NULL.
1085 const SCEVConstant
*DependenceInfo::collectConstantUpperBound(const Loop
*L
,
1087 if (const SCEV
*UB
= collectUpperBound(L
, T
))
1088 return dyn_cast
<SCEVConstant
>(UB
);
1094 // When we have a pair of subscripts of the form [c1] and [c2],
1095 // where c1 and c2 are both loop invariant, we attack it using
1096 // the ZIV test. Basically, we test by comparing the two values,
1097 // but there are actually three possible results:
1098 // 1) the values are equal, so there's a dependence
1099 // 2) the values are different, so there's no dependence
1100 // 3) the values might be equal, so we have to assume a dependence.
1102 // Return true if dependence disproved.
1103 bool DependenceInfo::testZIV(const SCEV
*Src
, const SCEV
*Dst
,
1104 FullDependence
&Result
) const {
1105 LLVM_DEBUG(dbgs() << " src = " << *Src
<< "\n");
1106 LLVM_DEBUG(dbgs() << " dst = " << *Dst
<< "\n");
1108 if (isKnownPredicate(CmpInst::ICMP_EQ
, Src
, Dst
)) {
1109 LLVM_DEBUG(dbgs() << " provably dependent\n");
1110 return false; // provably dependent
1112 if (isKnownPredicate(CmpInst::ICMP_NE
, Src
, Dst
)) {
1113 LLVM_DEBUG(dbgs() << " provably independent\n");
1115 return true; // provably independent
1117 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1118 Result
.Consistent
= false;
1119 return false; // possibly dependent
1124 // From the paper, Practical Dependence Testing, Section 4.2.1
1126 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1127 // where i is an induction variable, c1 and c2 are loop invariant,
1128 // and a is a constant, we can solve it exactly using the Strong SIV test.
1130 // Can prove independence. Failing that, can compute distance (and direction).
1131 // In the presence of symbolic terms, we can sometimes make progress.
1133 // If there's a dependence,
1135 // c1 + a*i = c2 + a*i'
1137 // The dependence distance is
1139 // d = i' - i = (c1 - c2)/a
1141 // A dependence only exists if d is an integer and abs(d) <= U, where U is the
1142 // loop's upper bound. If a dependence exists, the dependence direction is
1146 // direction = { = if d = 0
1149 // Return true if dependence disproved.
1150 bool DependenceInfo::strongSIVtest(const SCEV
*Coeff
, const SCEV
*SrcConst
,
1151 const SCEV
*DstConst
, const Loop
*CurLoop
,
1152 unsigned Level
, FullDependence
&Result
,
1153 Constraint
&NewConstraint
) const {
1154 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1155 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff
);
1156 LLVM_DEBUG(dbgs() << ", " << *Coeff
->getType() << "\n");
1157 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst
);
1158 LLVM_DEBUG(dbgs() << ", " << *SrcConst
->getType() << "\n");
1159 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst
);
1160 LLVM_DEBUG(dbgs() << ", " << *DstConst
->getType() << "\n");
1161 ++StrongSIVapplications
;
1162 assert(0 < Level
&& Level
<= CommonLevels
&& "level out of range");
1165 const SCEV
*Delta
= SE
->getMinusSCEV(SrcConst
, DstConst
);
1166 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
);
1167 LLVM_DEBUG(dbgs() << ", " << *Delta
->getType() << "\n");
1169 // check that |Delta| < iteration count
1170 if (const SCEV
*UpperBound
= collectUpperBound(CurLoop
, Delta
->getType())) {
1171 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound
);
1172 LLVM_DEBUG(dbgs() << ", " << *UpperBound
->getType() << "\n");
1173 const SCEV
*AbsDelta
=
1174 SE
->isKnownNonNegative(Delta
) ? Delta
: SE
->getNegativeSCEV(Delta
);
1175 const SCEV
*AbsCoeff
=
1176 SE
->isKnownNonNegative(Coeff
) ? Coeff
: SE
->getNegativeSCEV(Coeff
);
1177 const SCEV
*Product
= SE
->getMulExpr(UpperBound
, AbsCoeff
);
1178 if (isKnownPredicate(CmpInst::ICMP_SGT
, AbsDelta
, Product
)) {
1179 // Distance greater than trip count - no dependence
1180 ++StrongSIVindependence
;
1181 ++StrongSIVsuccesses
;
1186 // Can we compute distance?
1187 if (isa
<SCEVConstant
>(Delta
) && isa
<SCEVConstant
>(Coeff
)) {
1188 APInt ConstDelta
= cast
<SCEVConstant
>(Delta
)->getAPInt();
1189 APInt ConstCoeff
= cast
<SCEVConstant
>(Coeff
)->getAPInt();
1190 APInt Distance
= ConstDelta
; // these need to be initialized
1191 APInt Remainder
= ConstDelta
;
1192 APInt::sdivrem(ConstDelta
, ConstCoeff
, Distance
, Remainder
);
1193 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance
<< "\n");
1194 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder
<< "\n");
1195 // Make sure Coeff divides Delta exactly
1196 if (Remainder
!= 0) {
1197 // Coeff doesn't divide Distance, no dependence
1198 ++StrongSIVindependence
;
1199 ++StrongSIVsuccesses
;
1202 Result
.DV
[Level
].Distance
= SE
->getConstant(Distance
);
1203 NewConstraint
.setDistance(SE
->getConstant(Distance
), CurLoop
);
1204 if (Distance
.sgt(0))
1205 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::LT
;
1206 else if (Distance
.slt(0))
1207 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::GT
;
1209 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::EQ
;
1210 ++StrongSIVsuccesses
;
1212 else if (Delta
->isZero()) {
1214 Result
.DV
[Level
].Distance
= Delta
;
1215 NewConstraint
.setDistance(Delta
, CurLoop
);
1216 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::EQ
;
1217 ++StrongSIVsuccesses
;
1220 if (Coeff
->isOne()) {
1221 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta
<< "\n");
1222 Result
.DV
[Level
].Distance
= Delta
; // since X/1 == X
1223 NewConstraint
.setDistance(Delta
, CurLoop
);
1226 Result
.Consistent
= false;
1227 NewConstraint
.setLine(Coeff
,
1228 SE
->getNegativeSCEV(Coeff
),
1229 SE
->getNegativeSCEV(Delta
), CurLoop
);
1232 // maybe we can get a useful direction
1233 bool DeltaMaybeZero
= !SE
->isKnownNonZero(Delta
);
1234 bool DeltaMaybePositive
= !SE
->isKnownNonPositive(Delta
);
1235 bool DeltaMaybeNegative
= !SE
->isKnownNonNegative(Delta
);
1236 bool CoeffMaybePositive
= !SE
->isKnownNonPositive(Coeff
);
1237 bool CoeffMaybeNegative
= !SE
->isKnownNonNegative(Coeff
);
1238 // The double negatives above are confusing.
1239 // It helps to read !SE->isKnownNonZero(Delta)
1240 // as "Delta might be Zero"
1241 unsigned NewDirection
= Dependence::DVEntry::NONE
;
1242 if ((DeltaMaybePositive
&& CoeffMaybePositive
) ||
1243 (DeltaMaybeNegative
&& CoeffMaybeNegative
))
1244 NewDirection
= Dependence::DVEntry::LT
;
1246 NewDirection
|= Dependence::DVEntry::EQ
;
1247 if ((DeltaMaybeNegative
&& CoeffMaybePositive
) ||
1248 (DeltaMaybePositive
&& CoeffMaybeNegative
))
1249 NewDirection
|= Dependence::DVEntry::GT
;
1250 if (NewDirection
< Result
.DV
[Level
].Direction
)
1251 ++StrongSIVsuccesses
;
1252 Result
.DV
[Level
].Direction
&= NewDirection
;
1258 // weakCrossingSIVtest -
1259 // From the paper, Practical Dependence Testing, Section 4.2.2
1261 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1262 // where i is an induction variable, c1 and c2 are loop invariant,
1263 // and a is a constant, we can solve it exactly using the
1264 // Weak-Crossing SIV test.
1266 // Given c1 + a*i = c2 - a*i', we can look for the intersection of
1267 // the two lines, where i = i', yielding
1269 // c1 + a*i = c2 - a*i
1273 // If i < 0, there is no dependence.
1274 // If i > upperbound, there is no dependence.
1275 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1276 // If i = upperbound, there's a dependence with distance = 0.
1277 // If i is integral, there's a dependence (all directions).
1278 // If the non-integer part = 1/2, there's a dependence (<> directions).
1279 // Otherwise, there's no dependence.
1281 // Can prove independence. Failing that,
1282 // can sometimes refine the directions.
1283 // Can determine iteration for splitting.
1285 // Return true if dependence disproved.
1286 bool DependenceInfo::weakCrossingSIVtest(
1287 const SCEV
*Coeff
, const SCEV
*SrcConst
, const SCEV
*DstConst
,
1288 const Loop
*CurLoop
, unsigned Level
, FullDependence
&Result
,
1289 Constraint
&NewConstraint
, const SCEV
*&SplitIter
) const {
1290 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1291 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff
<< "\n");
1292 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst
<< "\n");
1293 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst
<< "\n");
1294 ++WeakCrossingSIVapplications
;
1295 assert(0 < Level
&& Level
<= CommonLevels
&& "Level out of range");
1297 Result
.Consistent
= false;
1298 const SCEV
*Delta
= SE
->getMinusSCEV(DstConst
, SrcConst
);
1299 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
<< "\n");
1300 NewConstraint
.setLine(Coeff
, Coeff
, Delta
, CurLoop
);
1301 if (Delta
->isZero()) {
1302 Result
.DV
[Level
].Direction
&= unsigned(~Dependence::DVEntry::LT
);
1303 Result
.DV
[Level
].Direction
&= unsigned(~Dependence::DVEntry::GT
);
1304 ++WeakCrossingSIVsuccesses
;
1305 if (!Result
.DV
[Level
].Direction
) {
1306 ++WeakCrossingSIVindependence
;
1309 Result
.DV
[Level
].Distance
= Delta
; // = 0
1312 const SCEVConstant
*ConstCoeff
= dyn_cast
<SCEVConstant
>(Coeff
);
1316 Result
.DV
[Level
].Splitable
= true;
1317 if (SE
->isKnownNegative(ConstCoeff
)) {
1318 ConstCoeff
= dyn_cast
<SCEVConstant
>(SE
->getNegativeSCEV(ConstCoeff
));
1319 assert(ConstCoeff
&&
1320 "dynamic cast of negative of ConstCoeff should yield constant");
1321 Delta
= SE
->getNegativeSCEV(Delta
);
1323 assert(SE
->isKnownPositive(ConstCoeff
) && "ConstCoeff should be positive");
1325 // compute SplitIter for use by DependenceInfo::getSplitIteration()
1326 SplitIter
= SE
->getUDivExpr(
1327 SE
->getSMaxExpr(SE
->getZero(Delta
->getType()), Delta
),
1328 SE
->getMulExpr(SE
->getConstant(Delta
->getType(), 2), ConstCoeff
));
1329 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter
<< "\n");
1331 const SCEVConstant
*ConstDelta
= dyn_cast
<SCEVConstant
>(Delta
);
1335 // We're certain that ConstCoeff > 0; therefore,
1336 // if Delta < 0, then no dependence.
1337 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
<< "\n");
1338 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff
<< "\n");
1339 if (SE
->isKnownNegative(Delta
)) {
1340 // No dependence, Delta < 0
1341 ++WeakCrossingSIVindependence
;
1342 ++WeakCrossingSIVsuccesses
;
1346 // We're certain that Delta > 0 and ConstCoeff > 0.
1347 // Check Delta/(2*ConstCoeff) against upper loop bound
1348 if (const SCEV
*UpperBound
= collectUpperBound(CurLoop
, Delta
->getType())) {
1349 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound
<< "\n");
1350 const SCEV
*ConstantTwo
= SE
->getConstant(UpperBound
->getType(), 2);
1351 const SCEV
*ML
= SE
->getMulExpr(SE
->getMulExpr(ConstCoeff
, UpperBound
),
1353 LLVM_DEBUG(dbgs() << "\t ML = " << *ML
<< "\n");
1354 if (isKnownPredicate(CmpInst::ICMP_SGT
, Delta
, ML
)) {
1355 // Delta too big, no dependence
1356 ++WeakCrossingSIVindependence
;
1357 ++WeakCrossingSIVsuccesses
;
1360 if (isKnownPredicate(CmpInst::ICMP_EQ
, Delta
, ML
)) {
1362 Result
.DV
[Level
].Direction
&= unsigned(~Dependence::DVEntry::LT
);
1363 Result
.DV
[Level
].Direction
&= unsigned(~Dependence::DVEntry::GT
);
1364 ++WeakCrossingSIVsuccesses
;
1365 if (!Result
.DV
[Level
].Direction
) {
1366 ++WeakCrossingSIVindependence
;
1369 Result
.DV
[Level
].Splitable
= false;
1370 Result
.DV
[Level
].Distance
= SE
->getZero(Delta
->getType());
1375 // check that Coeff divides Delta
1376 APInt APDelta
= ConstDelta
->getAPInt();
1377 APInt APCoeff
= ConstCoeff
->getAPInt();
1378 APInt Distance
= APDelta
; // these need to be initialzed
1379 APInt Remainder
= APDelta
;
1380 APInt::sdivrem(APDelta
, APCoeff
, Distance
, Remainder
);
1381 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder
<< "\n");
1382 if (Remainder
!= 0) {
1383 // Coeff doesn't divide Delta, no dependence
1384 ++WeakCrossingSIVindependence
;
1385 ++WeakCrossingSIVsuccesses
;
1388 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance
<< "\n");
1390 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1391 APInt Two
= APInt(Distance
.getBitWidth(), 2, true);
1392 Remainder
= Distance
.srem(Two
);
1393 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder
<< "\n");
1394 if (Remainder
!= 0) {
1395 // Equal direction isn't possible
1396 Result
.DV
[Level
].Direction
&= unsigned(~Dependence::DVEntry::EQ
);
1397 ++WeakCrossingSIVsuccesses
;
1403 // Kirch's algorithm, from
1405 // Optimizing Supercompilers for Supercomputers
1409 // Program 2.1, page 29.
1410 // Computes the GCD of AM and BM.
1411 // Also finds a solution to the equation ax - by = gcd(a, b).
1412 // Returns true if dependence disproved; i.e., gcd does not divide Delta.
1413 static bool findGCD(unsigned Bits
, const APInt
&AM
, const APInt
&BM
,
1414 const APInt
&Delta
, APInt
&G
, APInt
&X
, APInt
&Y
) {
1415 APInt
A0(Bits
, 1, true), A1(Bits
, 0, true);
1416 APInt
B0(Bits
, 0, true), B1(Bits
, 1, true);
1417 APInt G0
= AM
.abs();
1418 APInt G1
= BM
.abs();
1419 APInt Q
= G0
; // these need to be initialized
1421 APInt::sdivrem(G0
, G1
, Q
, R
);
1423 APInt A2
= A0
- Q
*A1
; A0
= A1
; A1
= A2
;
1424 APInt B2
= B0
- Q
*B1
; B0
= B1
; B1
= B2
;
1426 APInt::sdivrem(G0
, G1
, Q
, R
);
1429 LLVM_DEBUG(dbgs() << "\t GCD = " << G
<< "\n");
1430 X
= AM
.slt(0) ? -A1
: A1
;
1431 Y
= BM
.slt(0) ? B1
: -B1
;
1433 // make sure gcd divides Delta
1436 return true; // gcd doesn't divide Delta, no dependence
1441 static APInt
floorOfQuotient(const APInt
&A
, const APInt
&B
) {
1442 APInt Q
= A
; // these need to be initialized
1444 APInt::sdivrem(A
, B
, Q
, R
);
1447 if ((A
.sgt(0) && B
.sgt(0)) ||
1448 (A
.slt(0) && B
.slt(0)))
1454 static APInt
ceilingOfQuotient(const APInt
&A
, const APInt
&B
) {
1455 APInt Q
= A
; // these need to be initialized
1457 APInt::sdivrem(A
, B
, Q
, R
);
1460 if ((A
.sgt(0) && B
.sgt(0)) ||
1461 (A
.slt(0) && B
.slt(0)))
1468 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1469 // where i is an induction variable, c1 and c2 are loop invariant, and a1
1470 // and a2 are constant, we can solve it exactly using an algorithm developed
1471 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1473 // Dependence Analysis for Supercomputing
1475 // Kluwer Academic Publishers, 1988
1477 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1478 // so use them if possible. They're also a bit better with symbolics and,
1479 // in the case of the strong SIV test, can compute Distances.
1481 // Return true if dependence disproved.
1483 // This is a modified version of the original Banerjee algorithm. The original
1484 // only tested whether Dst depends on Src. This algorithm extends that and
1485 // returns all the dependencies that exist between Dst and Src.
1486 bool DependenceInfo::exactSIVtest(const SCEV
*SrcCoeff
, const SCEV
*DstCoeff
,
1487 const SCEV
*SrcConst
, const SCEV
*DstConst
,
1488 const Loop
*CurLoop
, unsigned Level
,
1489 FullDependence
&Result
,
1490 Constraint
&NewConstraint
) const {
1491 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1492 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff
<< " = AM\n");
1493 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff
<< " = BM\n");
1494 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst
<< "\n");
1495 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst
<< "\n");
1496 ++ExactSIVapplications
;
1497 assert(0 < Level
&& Level
<= CommonLevels
&& "Level out of range");
1499 Result
.Consistent
= false;
1500 const SCEV
*Delta
= SE
->getMinusSCEV(DstConst
, SrcConst
);
1501 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
<< "\n");
1502 NewConstraint
.setLine(SrcCoeff
, SE
->getNegativeSCEV(DstCoeff
), Delta
,
1504 const SCEVConstant
*ConstDelta
= dyn_cast
<SCEVConstant
>(Delta
);
1505 const SCEVConstant
*ConstSrcCoeff
= dyn_cast
<SCEVConstant
>(SrcCoeff
);
1506 const SCEVConstant
*ConstDstCoeff
= dyn_cast
<SCEVConstant
>(DstCoeff
);
1507 if (!ConstDelta
|| !ConstSrcCoeff
|| !ConstDstCoeff
)
1512 APInt AM
= ConstSrcCoeff
->getAPInt();
1513 APInt BM
= ConstDstCoeff
->getAPInt();
1514 APInt CM
= ConstDelta
->getAPInt();
1515 unsigned Bits
= AM
.getBitWidth();
1516 if (findGCD(Bits
, AM
, BM
, CM
, G
, X
, Y
)) {
1517 // gcd doesn't divide Delta, no dependence
1518 ++ExactSIVindependence
;
1519 ++ExactSIVsuccesses
;
1523 LLVM_DEBUG(dbgs() << "\t X = " << X
<< ", Y = " << Y
<< "\n");
1525 // since SCEV construction normalizes, LM = 0
1526 APInt
UM(Bits
, 1, true);
1527 bool UMValid
= false;
1528 // UM is perhaps unavailable, let's check
1529 if (const SCEVConstant
*CUB
=
1530 collectConstantUpperBound(CurLoop
, Delta
->getType())) {
1531 UM
= CUB
->getAPInt();
1532 LLVM_DEBUG(dbgs() << "\t UM = " << UM
<< "\n");
1536 APInt
TU(APInt::getSignedMaxValue(Bits
));
1537 APInt
TL(APInt::getSignedMinValue(Bits
));
1538 APInt TC
= CM
.sdiv(G
);
1541 LLVM_DEBUG(dbgs() << "\t TC = " << TC
<< "\n");
1542 LLVM_DEBUG(dbgs() << "\t TX = " << TX
<< "\n");
1543 LLVM_DEBUG(dbgs() << "\t TY = " << TY
<< "\n");
1545 SmallVector
<APInt
, 2> TLVec
, TUVec
;
1546 APInt TB
= BM
.sdiv(G
);
1548 TLVec
.push_back(ceilingOfQuotient(-TX
, TB
));
1549 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1550 // New bound check - modification to Banerjee's e3 check
1552 TUVec
.push_back(floorOfQuotient(UM
- TX
, TB
));
1553 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1556 TUVec
.push_back(floorOfQuotient(-TX
, TB
));
1557 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1558 // New bound check - modification to Banerjee's e3 check
1560 TLVec
.push_back(ceilingOfQuotient(UM
- TX
, TB
));
1561 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1565 APInt TA
= AM
.sdiv(G
);
1568 TUVec
.push_back(floorOfQuotient(UM
- TY
, TA
));
1569 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1571 // New bound check - modification to Banerjee's e3 check
1572 TLVec
.push_back(ceilingOfQuotient(-TY
, TA
));
1573 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1576 TLVec
.push_back(ceilingOfQuotient(UM
- TY
, TA
));
1577 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1579 // New bound check - modification to Banerjee's e3 check
1580 TUVec
.push_back(floorOfQuotient(-TY
, TA
));
1581 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1584 LLVM_DEBUG(dbgs() << "\t TA = " << TA
<< "\n");
1585 LLVM_DEBUG(dbgs() << "\t TB = " << TB
<< "\n");
1587 if (TLVec
.empty() || TUVec
.empty())
1589 TL
= APIntOps::smax(TLVec
.front(), TLVec
.back());
1590 TU
= APIntOps::smin(TUVec
.front(), TUVec
.back());
1591 LLVM_DEBUG(dbgs() << "\t TL = " << TL
<< "\n");
1592 LLVM_DEBUG(dbgs() << "\t TU = " << TU
<< "\n");
1595 ++ExactSIVindependence
;
1596 ++ExactSIVsuccesses
;
1600 // explore directions
1601 unsigned NewDirection
= Dependence::DVEntry::NONE
;
1602 APInt LowerDistance
, UpperDistance
;
1604 LowerDistance
= (TY
- TX
) + (TA
- TB
) * TL
;
1605 UpperDistance
= (TY
- TX
) + (TA
- TB
) * TU
;
1607 LowerDistance
= (TY
- TX
) + (TA
- TB
) * TU
;
1608 UpperDistance
= (TY
- TX
) + (TA
- TB
) * TL
;
1611 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance
<< "\n");
1612 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance
<< "\n");
1614 APInt
Zero(Bits
, 0, true);
1615 if (LowerDistance
.sle(Zero
) && UpperDistance
.sge(Zero
)) {
1616 NewDirection
|= Dependence::DVEntry::EQ
;
1617 ++ExactSIVsuccesses
;
1619 if (LowerDistance
.slt(0)) {
1620 NewDirection
|= Dependence::DVEntry::GT
;
1621 ++ExactSIVsuccesses
;
1623 if (UpperDistance
.sgt(0)) {
1624 NewDirection
|= Dependence::DVEntry::LT
;
1625 ++ExactSIVsuccesses
;
1629 Result
.DV
[Level
].Direction
&= NewDirection
;
1630 if (Result
.DV
[Level
].Direction
== Dependence::DVEntry::NONE
)
1631 ++ExactSIVindependence
;
1632 LLVM_DEBUG(dbgs() << "\t Result = ");
1633 LLVM_DEBUG(Result
.dump(dbgs()));
1634 return Result
.DV
[Level
].Direction
== Dependence::DVEntry::NONE
;
1638 // Return true if the divisor evenly divides the dividend.
1640 bool isRemainderZero(const SCEVConstant
*Dividend
,
1641 const SCEVConstant
*Divisor
) {
1642 const APInt
&ConstDividend
= Dividend
->getAPInt();
1643 const APInt
&ConstDivisor
= Divisor
->getAPInt();
1644 return ConstDividend
.srem(ConstDivisor
) == 0;
1648 // weakZeroSrcSIVtest -
1649 // From the paper, Practical Dependence Testing, Section 4.2.2
1651 // When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1652 // where i is an induction variable, c1 and c2 are loop invariant,
1653 // and a is a constant, we can solve it exactly using the
1654 // Weak-Zero SIV test.
1664 // If i is not an integer, there's no dependence.
1665 // If i < 0 or > UB, there's no dependence.
1666 // If i = 0, the direction is >= and peeling the
1667 // 1st iteration will break the dependence.
1668 // If i = UB, the direction is <= and peeling the
1669 // last iteration will break the dependence.
1670 // Otherwise, the direction is *.
1672 // Can prove independence. Failing that, we can sometimes refine
1673 // the directions. Can sometimes show that first or last
1674 // iteration carries all the dependences (so worth peeling).
1676 // (see also weakZeroDstSIVtest)
1678 // Return true if dependence disproved.
1679 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV
*DstCoeff
,
1680 const SCEV
*SrcConst
,
1681 const SCEV
*DstConst
,
1682 const Loop
*CurLoop
, unsigned Level
,
1683 FullDependence
&Result
,
1684 Constraint
&NewConstraint
) const {
1685 // For the WeakSIV test, it's possible the loop isn't common to
1686 // the Src and Dst loops. If it isn't, then there's no need to
1687 // record a direction.
1688 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1689 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff
<< "\n");
1690 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst
<< "\n");
1691 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst
<< "\n");
1692 ++WeakZeroSIVapplications
;
1693 assert(0 < Level
&& Level
<= MaxLevels
&& "Level out of range");
1695 Result
.Consistent
= false;
1696 const SCEV
*Delta
= SE
->getMinusSCEV(SrcConst
, DstConst
);
1697 NewConstraint
.setLine(SE
->getZero(Delta
->getType()), DstCoeff
, Delta
,
1699 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
<< "\n");
1700 if (isKnownPredicate(CmpInst::ICMP_EQ
, SrcConst
, DstConst
)) {
1701 if (Level
< CommonLevels
) {
1702 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::GE
;
1703 Result
.DV
[Level
].PeelFirst
= true;
1704 ++WeakZeroSIVsuccesses
;
1706 return false; // dependences caused by first iteration
1708 const SCEVConstant
*ConstCoeff
= dyn_cast
<SCEVConstant
>(DstCoeff
);
1711 const SCEV
*AbsCoeff
=
1712 SE
->isKnownNegative(ConstCoeff
) ?
1713 SE
->getNegativeSCEV(ConstCoeff
) : ConstCoeff
;
1714 const SCEV
*NewDelta
=
1715 SE
->isKnownNegative(ConstCoeff
) ? SE
->getNegativeSCEV(Delta
) : Delta
;
1717 // check that Delta/SrcCoeff < iteration count
1718 // really check NewDelta < count*AbsCoeff
1719 if (const SCEV
*UpperBound
= collectUpperBound(CurLoop
, Delta
->getType())) {
1720 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound
<< "\n");
1721 const SCEV
*Product
= SE
->getMulExpr(AbsCoeff
, UpperBound
);
1722 if (isKnownPredicate(CmpInst::ICMP_SGT
, NewDelta
, Product
)) {
1723 ++WeakZeroSIVindependence
;
1724 ++WeakZeroSIVsuccesses
;
1727 if (isKnownPredicate(CmpInst::ICMP_EQ
, NewDelta
, Product
)) {
1728 // dependences caused by last iteration
1729 if (Level
< CommonLevels
) {
1730 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::LE
;
1731 Result
.DV
[Level
].PeelLast
= true;
1732 ++WeakZeroSIVsuccesses
;
1738 // check that Delta/SrcCoeff >= 0
1739 // really check that NewDelta >= 0
1740 if (SE
->isKnownNegative(NewDelta
)) {
1741 // No dependence, newDelta < 0
1742 ++WeakZeroSIVindependence
;
1743 ++WeakZeroSIVsuccesses
;
1747 // if SrcCoeff doesn't divide Delta, then no dependence
1748 if (isa
<SCEVConstant
>(Delta
) &&
1749 !isRemainderZero(cast
<SCEVConstant
>(Delta
), ConstCoeff
)) {
1750 ++WeakZeroSIVindependence
;
1751 ++WeakZeroSIVsuccesses
;
1758 // weakZeroDstSIVtest -
1759 // From the paper, Practical Dependence Testing, Section 4.2.2
1761 // When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1762 // where i is an induction variable, c1 and c2 are loop invariant,
1763 // and a is a constant, we can solve it exactly using the
1764 // Weak-Zero SIV test.
1774 // If i is not an integer, there's no dependence.
1775 // If i < 0 or > UB, there's no dependence.
1776 // If i = 0, the direction is <= and peeling the
1777 // 1st iteration will break the dependence.
1778 // If i = UB, the direction is >= and peeling the
1779 // last iteration will break the dependence.
1780 // Otherwise, the direction is *.
1782 // Can prove independence. Failing that, we can sometimes refine
1783 // the directions. Can sometimes show that first or last
1784 // iteration carries all the dependences (so worth peeling).
1786 // (see also weakZeroSrcSIVtest)
1788 // Return true if dependence disproved.
1789 bool DependenceInfo::weakZeroDstSIVtest(const SCEV
*SrcCoeff
,
1790 const SCEV
*SrcConst
,
1791 const SCEV
*DstConst
,
1792 const Loop
*CurLoop
, unsigned Level
,
1793 FullDependence
&Result
,
1794 Constraint
&NewConstraint
) const {
1795 // For the WeakSIV test, it's possible the loop isn't common to the
1796 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1797 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1798 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff
<< "\n");
1799 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst
<< "\n");
1800 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst
<< "\n");
1801 ++WeakZeroSIVapplications
;
1802 assert(0 < Level
&& Level
<= SrcLevels
&& "Level out of range");
1804 Result
.Consistent
= false;
1805 const SCEV
*Delta
= SE
->getMinusSCEV(DstConst
, SrcConst
);
1806 NewConstraint
.setLine(SrcCoeff
, SE
->getZero(Delta
->getType()), Delta
,
1808 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
<< "\n");
1809 if (isKnownPredicate(CmpInst::ICMP_EQ
, DstConst
, SrcConst
)) {
1810 if (Level
< CommonLevels
) {
1811 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::LE
;
1812 Result
.DV
[Level
].PeelFirst
= true;
1813 ++WeakZeroSIVsuccesses
;
1815 return false; // dependences caused by first iteration
1817 const SCEVConstant
*ConstCoeff
= dyn_cast
<SCEVConstant
>(SrcCoeff
);
1820 const SCEV
*AbsCoeff
=
1821 SE
->isKnownNegative(ConstCoeff
) ?
1822 SE
->getNegativeSCEV(ConstCoeff
) : ConstCoeff
;
1823 const SCEV
*NewDelta
=
1824 SE
->isKnownNegative(ConstCoeff
) ? SE
->getNegativeSCEV(Delta
) : Delta
;
1826 // check that Delta/SrcCoeff < iteration count
1827 // really check NewDelta < count*AbsCoeff
1828 if (const SCEV
*UpperBound
= collectUpperBound(CurLoop
, Delta
->getType())) {
1829 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound
<< "\n");
1830 const SCEV
*Product
= SE
->getMulExpr(AbsCoeff
, UpperBound
);
1831 if (isKnownPredicate(CmpInst::ICMP_SGT
, NewDelta
, Product
)) {
1832 ++WeakZeroSIVindependence
;
1833 ++WeakZeroSIVsuccesses
;
1836 if (isKnownPredicate(CmpInst::ICMP_EQ
, NewDelta
, Product
)) {
1837 // dependences caused by last iteration
1838 if (Level
< CommonLevels
) {
1839 Result
.DV
[Level
].Direction
&= Dependence::DVEntry::GE
;
1840 Result
.DV
[Level
].PeelLast
= true;
1841 ++WeakZeroSIVsuccesses
;
1847 // check that Delta/SrcCoeff >= 0
1848 // really check that NewDelta >= 0
1849 if (SE
->isKnownNegative(NewDelta
)) {
1850 // No dependence, newDelta < 0
1851 ++WeakZeroSIVindependence
;
1852 ++WeakZeroSIVsuccesses
;
1856 // if SrcCoeff doesn't divide Delta, then no dependence
1857 if (isa
<SCEVConstant
>(Delta
) &&
1858 !isRemainderZero(cast
<SCEVConstant
>(Delta
), ConstCoeff
)) {
1859 ++WeakZeroSIVindependence
;
1860 ++WeakZeroSIVsuccesses
;
1867 // exactRDIVtest - Tests the RDIV subscript pair for dependence.
1868 // Things of the form [c1 + a*i] and [c2 + b*j],
1869 // where i and j are induction variable, c1 and c2 are loop invariant,
1870 // and a and b are constants.
1871 // Returns true if any possible dependence is disproved.
1872 // Marks the result as inconsistent.
1873 // Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1874 bool DependenceInfo::exactRDIVtest(const SCEV
*SrcCoeff
, const SCEV
*DstCoeff
,
1875 const SCEV
*SrcConst
, const SCEV
*DstConst
,
1876 const Loop
*SrcLoop
, const Loop
*DstLoop
,
1877 FullDependence
&Result
) const {
1878 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1879 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff
<< " = AM\n");
1880 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff
<< " = BM\n");
1881 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst
<< "\n");
1882 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst
<< "\n");
1883 ++ExactRDIVapplications
;
1884 Result
.Consistent
= false;
1885 const SCEV
*Delta
= SE
->getMinusSCEV(DstConst
, SrcConst
);
1886 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta
<< "\n");
1887 const SCEVConstant
*ConstDelta
= dyn_cast
<SCEVConstant
>(Delta
);
1888 const SCEVConstant
*ConstSrcCoeff
= dyn_cast
<SCEVConstant
>(SrcCoeff
);
1889 const SCEVConstant
*ConstDstCoeff
= dyn_cast
<SCEVConstant
>(DstCoeff
);
1890 if (!ConstDelta
|| !ConstSrcCoeff
|| !ConstDstCoeff
)
1895 APInt AM
= ConstSrcCoeff
->getAPInt();
1896 APInt BM
= ConstDstCoeff
->getAPInt();
1897 APInt CM
= ConstDelta
->getAPInt();
1898 unsigned Bits
= AM
.getBitWidth();
1899 if (findGCD(Bits
, AM
, BM
, CM
, G
, X
, Y
)) {
1900 // gcd doesn't divide Delta, no dependence
1901 ++ExactRDIVindependence
;
1905 LLVM_DEBUG(dbgs() << "\t X = " << X
<< ", Y = " << Y
<< "\n");
1907 // since SCEV construction seems to normalize, LM = 0
1908 APInt
SrcUM(Bits
, 1, true);
1909 bool SrcUMvalid
= false;
1910 // SrcUM is perhaps unavailable, let's check
1911 if (const SCEVConstant
*UpperBound
=
1912 collectConstantUpperBound(SrcLoop
, Delta
->getType())) {
1913 SrcUM
= UpperBound
->getAPInt();
1914 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM
<< "\n");
1918 APInt
DstUM(Bits
, 1, true);
1919 bool DstUMvalid
= false;
1920 // UM is perhaps unavailable, let's check
1921 if (const SCEVConstant
*UpperBound
=
1922 collectConstantUpperBound(DstLoop
, Delta
->getType())) {
1923 DstUM
= UpperBound
->getAPInt();
1924 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM
<< "\n");
1928 APInt
TU(APInt::getSignedMaxValue(Bits
));
1929 APInt
TL(APInt::getSignedMinValue(Bits
));
1930 APInt TC
= CM
.sdiv(G
);
1933 LLVM_DEBUG(dbgs() << "\t TC = " << TC
<< "\n");
1934 LLVM_DEBUG(dbgs() << "\t TX = " << TX
<< "\n");
1935 LLVM_DEBUG(dbgs() << "\t TY = " << TY
<< "\n");
1937 SmallVector
<APInt
, 2> TLVec
, TUVec
;
1938 APInt TB
= BM
.sdiv(G
);
1940 TLVec
.push_back(ceilingOfQuotient(-TX
, TB
));
1941 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1943 TUVec
.push_back(floorOfQuotient(SrcUM
- TX
, TB
));
1944 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1947 TUVec
.push_back(floorOfQuotient(-TX
, TB
));
1948 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1950 TLVec
.push_back(ceilingOfQuotient(SrcUM
- TX
, TB
));
1951 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1955 APInt TA
= AM
.sdiv(G
);
1957 TLVec
.push_back(ceilingOfQuotient(-TY
, TA
));
1958 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1960 TUVec
.push_back(floorOfQuotient(DstUM
- TY
, TA
));
1961 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1964 TUVec
.push_back(floorOfQuotient(-TY
, TA
));
1965 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec
.back() << "\n");
1967 TLVec
.push_back(ceilingOfQuotient(DstUM
- TY
, TA
));
1968 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec
.back() << "\n");
1972 if (TLVec
.empty() || TUVec
.empty())
1975 LLVM_DEBUG(dbgs() << "\t TA = " << TA
<< "\n");
1976 LLVM_DEBUG(dbgs() << "\t TB = " << TB
<< "\n");
1978 TL
= APIntOps::smax(TLVec
.front(), TLVec
.back());
1979 TU
= APIntOps::smin(TUVec
.front(), TUVec
.back());
1980 LLVM_DEBUG(dbgs() << "\t TL = " << TL
<< "\n");
1981 LLVM_DEBUG(dbgs() << "\t TU = " << TU
<< "\n");
1984 ++ExactRDIVindependence
;
1989 // symbolicRDIVtest -
1990 // In Section 4.5 of the Practical Dependence Testing paper,the authors
1991 // introduce a special case of Banerjee's Inequalities (also called the
1992 // Extreme-Value Test) that can handle some of the SIV and RDIV cases,
1993 // particularly cases with symbolics. Since it's only able to disprove
1994 // dependence (not compute distances or directions), we'll use it as a
1995 // fall back for the other tests.
1997 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
1998 // where i and j are induction variables and c1 and c2 are loop invariants,
1999 // we can use the symbolic tests to disprove some dependences, serving as a
2000 // backup for the RDIV test. Note that i and j can be the same variable,
2001 // letting this test serve as a backup for the various SIV tests.
2003 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2004 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2005 // loop bounds for the i and j loops, respectively. So, ...
2007 // c1 + a1*i = c2 + a2*j
2008 // a1*i - a2*j = c2 - c1
2010 // To test for a dependence, we compute c2 - c1 and make sure it's in the
2011 // range of the maximum and minimum possible values of a1*i - a2*j.
2012 // Considering the signs of a1 and a2, we have 4 possible cases:
2014 // 1) If a1 >= 0 and a2 >= 0, then
2015 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2016 // -a2*N2 <= c2 - c1 <= a1*N1
2018 // 2) If a1 >= 0 and a2 <= 0, then
2019 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2020 // 0 <= c2 - c1 <= a1*N1 - a2*N2
2022 // 3) If a1 <= 0 and a2 >= 0, then
2023 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2024 // a1*N1 - a2*N2 <= c2 - c1 <= 0
2026 // 4) If a1 <= 0 and a2 <= 0, then
2027 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
2028 // a1*N1 <= c2 - c1 <= -a2*N2
2030 // return true if dependence disproved
2031 bool DependenceInfo::symbolicRDIVtest(const SCEV
*A1
, const SCEV
*A2
,
2032 const SCEV
*C1
, const SCEV
*C2
,
2034 const Loop
*Loop2
) const {
2035 ++SymbolicRDIVapplications
;
2036 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2037 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1
);
2038 LLVM_DEBUG(dbgs() << ", type = " << *A1
->getType() << "\n");
2039 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2
<< "\n");
2040 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1
<< "\n");
2041 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2
<< "\n");
2042 const SCEV
*N1
= collectUpperBound(Loop1
, A1
->getType());
2043 const SCEV
*N2
= collectUpperBound(Loop2
, A1
->getType());
2044 LLVM_DEBUG(if (N1
) dbgs() << "\t N1 = " << *N1
<< "\n");
2045 LLVM_DEBUG(if (N2
) dbgs() << "\t N2 = " << *N2
<< "\n");
2046 const SCEV
*C2_C1
= SE
->getMinusSCEV(C2
, C1
);
2047 const SCEV
*C1_C2
= SE
->getMinusSCEV(C1
, C2
);
2048 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1
<< "\n");
2049 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2
<< "\n");
2050 if (SE
->isKnownNonNegative(A1
)) {
2051 if (SE
->isKnownNonNegative(A2
)) {
2052 // A1 >= 0 && A2 >= 0
2054 // make sure that c2 - c1 <= a1*N1
2055 const SCEV
*A1N1
= SE
->getMulExpr(A1
, N1
);
2056 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1
<< "\n");
2057 if (isKnownPredicate(CmpInst::ICMP_SGT
, C2_C1
, A1N1
)) {
2058 ++SymbolicRDIVindependence
;
2063 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2064 const SCEV
*A2N2
= SE
->getMulExpr(A2
, N2
);
2065 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2
<< "\n");
2066 if (isKnownPredicate(CmpInst::ICMP_SLT
, A2N2
, C1_C2
)) {
2067 ++SymbolicRDIVindependence
;
2072 else if (SE
->isKnownNonPositive(A2
)) {
2073 // a1 >= 0 && a2 <= 0
2075 // make sure that c2 - c1 <= a1*N1 - a2*N2
2076 const SCEV
*A1N1
= SE
->getMulExpr(A1
, N1
);
2077 const SCEV
*A2N2
= SE
->getMulExpr(A2
, N2
);
2078 const SCEV
*A1N1_A2N2
= SE
->getMinusSCEV(A1N1
, A2N2
);
2079 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2
<< "\n");
2080 if (isKnownPredicate(CmpInst::ICMP_SGT
, C2_C1
, A1N1_A2N2
)) {
2081 ++SymbolicRDIVindependence
;
2085 // make sure that 0 <= c2 - c1
2086 if (SE
->isKnownNegative(C2_C1
)) {
2087 ++SymbolicRDIVindependence
;
2092 else if (SE
->isKnownNonPositive(A1
)) {
2093 if (SE
->isKnownNonNegative(A2
)) {
2094 // a1 <= 0 && a2 >= 0
2096 // make sure that a1*N1 - a2*N2 <= c2 - c1
2097 const SCEV
*A1N1
= SE
->getMulExpr(A1
, N1
);
2098 const SCEV
*A2N2
= SE
->getMulExpr(A2
, N2
);
2099 const SCEV
*A1N1_A2N2
= SE
->getMinusSCEV(A1N1
, A2N2
);
2100 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2
<< "\n");
2101 if (isKnownPredicate(CmpInst::ICMP_SGT
, A1N1_A2N2
, C2_C1
)) {
2102 ++SymbolicRDIVindependence
;
2106 // make sure that c2 - c1 <= 0
2107 if (SE
->isKnownPositive(C2_C1
)) {
2108 ++SymbolicRDIVindependence
;
2112 else if (SE
->isKnownNonPositive(A2
)) {
2113 // a1 <= 0 && a2 <= 0
2115 // make sure that a1*N1 <= c2 - c1
2116 const SCEV
*A1N1
= SE
->getMulExpr(A1
, N1
);
2117 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1
<< "\n");
2118 if (isKnownPredicate(CmpInst::ICMP_SGT
, A1N1
, C2_C1
)) {
2119 ++SymbolicRDIVindependence
;
2124 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2125 const SCEV
*A2N2
= SE
->getMulExpr(A2
, N2
);
2126 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2
<< "\n");
2127 if (isKnownPredicate(CmpInst::ICMP_SLT
, C1_C2
, A2N2
)) {
2128 ++SymbolicRDIVindependence
;
2139 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2140 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2141 // a2 are constant, we attack it with an SIV test. While they can all be
2142 // solved with the Exact SIV test, it's worthwhile to use simpler tests when
2143 // they apply; they're cheaper and sometimes more precise.
2145 // Return true if dependence disproved.
2146 bool DependenceInfo::testSIV(const SCEV
*Src
, const SCEV
*Dst
, unsigned &Level
,
2147 FullDependence
&Result
, Constraint
&NewConstraint
,
2148 const SCEV
*&SplitIter
) const {
2149 LLVM_DEBUG(dbgs() << " src = " << *Src
<< "\n");
2150 LLVM_DEBUG(dbgs() << " dst = " << *Dst
<< "\n");
2151 const SCEVAddRecExpr
*SrcAddRec
= dyn_cast
<SCEVAddRecExpr
>(Src
);
2152 const SCEVAddRecExpr
*DstAddRec
= dyn_cast
<SCEVAddRecExpr
>(Dst
);
2153 if (SrcAddRec
&& DstAddRec
) {
2154 const SCEV
*SrcConst
= SrcAddRec
->getStart();
2155 const SCEV
*DstConst
= DstAddRec
->getStart();
2156 const SCEV
*SrcCoeff
= SrcAddRec
->getStepRecurrence(*SE
);
2157 const SCEV
*DstCoeff
= DstAddRec
->getStepRecurrence(*SE
);
2158 const Loop
*CurLoop
= SrcAddRec
->getLoop();
2159 assert(CurLoop
== DstAddRec
->getLoop() &&
2160 "both loops in SIV should be same");
2161 Level
= mapSrcLoop(CurLoop
);
2163 if (SrcCoeff
== DstCoeff
)
2164 disproven
= strongSIVtest(SrcCoeff
, SrcConst
, DstConst
, CurLoop
,
2165 Level
, Result
, NewConstraint
);
2166 else if (SrcCoeff
== SE
->getNegativeSCEV(DstCoeff
))
2167 disproven
= weakCrossingSIVtest(SrcCoeff
, SrcConst
, DstConst
, CurLoop
,
2168 Level
, Result
, NewConstraint
, SplitIter
);
2170 disproven
= exactSIVtest(SrcCoeff
, DstCoeff
, SrcConst
, DstConst
, CurLoop
,
2171 Level
, Result
, NewConstraint
);
2173 gcdMIVtest(Src
, Dst
, Result
) ||
2174 symbolicRDIVtest(SrcCoeff
, DstCoeff
, SrcConst
, DstConst
, CurLoop
, CurLoop
);
2177 const SCEV
*SrcConst
= SrcAddRec
->getStart();
2178 const SCEV
*SrcCoeff
= SrcAddRec
->getStepRecurrence(*SE
);
2179 const SCEV
*DstConst
= Dst
;
2180 const Loop
*CurLoop
= SrcAddRec
->getLoop();
2181 Level
= mapSrcLoop(CurLoop
);
2182 return weakZeroDstSIVtest(SrcCoeff
, SrcConst
, DstConst
, CurLoop
,
2183 Level
, Result
, NewConstraint
) ||
2184 gcdMIVtest(Src
, Dst
, Result
);
2187 const SCEV
*DstConst
= DstAddRec
->getStart();
2188 const SCEV
*DstCoeff
= DstAddRec
->getStepRecurrence(*SE
);
2189 const SCEV
*SrcConst
= Src
;
2190 const Loop
*CurLoop
= DstAddRec
->getLoop();
2191 Level
= mapDstLoop(CurLoop
);
2192 return weakZeroSrcSIVtest(DstCoeff
, SrcConst
, DstConst
,
2193 CurLoop
, Level
, Result
, NewConstraint
) ||
2194 gcdMIVtest(Src
, Dst
, Result
);
2196 llvm_unreachable("SIV test expected at least one AddRec");
2202 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2203 // where i and j are induction variables, c1 and c2 are loop invariant,
2204 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2205 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2206 // It doesn't make sense to talk about distance or direction in this case,
2207 // so there's no point in making special versions of the Strong SIV test or
2208 // the Weak-crossing SIV test.
2210 // With minor algebra, this test can also be used for things like
2211 // [c1 + a1*i + a2*j][c2].
2213 // Return true if dependence disproved.
2214 bool DependenceInfo::testRDIV(const SCEV
*Src
, const SCEV
*Dst
,
2215 FullDependence
&Result
) const {
2216 // we have 3 possible situations here:
2217 // 1) [a*i + b] and [c*j + d]
2218 // 2) [a*i + c*j + b] and [d]
2219 // 3) [b] and [a*i + c*j + d]
2220 // We need to find what we've got and get organized
2222 const SCEV
*SrcConst
, *DstConst
;
2223 const SCEV
*SrcCoeff
, *DstCoeff
;
2224 const Loop
*SrcLoop
, *DstLoop
;
2226 LLVM_DEBUG(dbgs() << " src = " << *Src
<< "\n");
2227 LLVM_DEBUG(dbgs() << " dst = " << *Dst
<< "\n");
2228 const SCEVAddRecExpr
*SrcAddRec
= dyn_cast
<SCEVAddRecExpr
>(Src
);
2229 const SCEVAddRecExpr
*DstAddRec
= dyn_cast
<SCEVAddRecExpr
>(Dst
);
2230 if (SrcAddRec
&& DstAddRec
) {
2231 SrcConst
= SrcAddRec
->getStart();
2232 SrcCoeff
= SrcAddRec
->getStepRecurrence(*SE
);
2233 SrcLoop
= SrcAddRec
->getLoop();
2234 DstConst
= DstAddRec
->getStart();
2235 DstCoeff
= DstAddRec
->getStepRecurrence(*SE
);
2236 DstLoop
= DstAddRec
->getLoop();
2238 else if (SrcAddRec
) {
2239 if (const SCEVAddRecExpr
*tmpAddRec
=
2240 dyn_cast
<SCEVAddRecExpr
>(SrcAddRec
->getStart())) {
2241 SrcConst
= tmpAddRec
->getStart();
2242 SrcCoeff
= tmpAddRec
->getStepRecurrence(*SE
);
2243 SrcLoop
= tmpAddRec
->getLoop();
2245 DstCoeff
= SE
->getNegativeSCEV(SrcAddRec
->getStepRecurrence(*SE
));
2246 DstLoop
= SrcAddRec
->getLoop();
2249 llvm_unreachable("RDIV reached by surprising SCEVs");
2251 else if (DstAddRec
) {
2252 if (const SCEVAddRecExpr
*tmpAddRec
=
2253 dyn_cast
<SCEVAddRecExpr
>(DstAddRec
->getStart())) {
2254 DstConst
= tmpAddRec
->getStart();
2255 DstCoeff
= tmpAddRec
->getStepRecurrence(*SE
);
2256 DstLoop
= tmpAddRec
->getLoop();
2258 SrcCoeff
= SE
->getNegativeSCEV(DstAddRec
->getStepRecurrence(*SE
));
2259 SrcLoop
= DstAddRec
->getLoop();
2262 llvm_unreachable("RDIV reached by surprising SCEVs");
2265 llvm_unreachable("RDIV expected at least one AddRec");
2266 return exactRDIVtest(SrcCoeff
, DstCoeff
,
2270 gcdMIVtest(Src
, Dst
, Result
) ||
2271 symbolicRDIVtest(SrcCoeff
, DstCoeff
,
2277 // Tests the single-subscript MIV pair (Src and Dst) for dependence.
2278 // Return true if dependence disproved.
2279 // Can sometimes refine direction vectors.
2280 bool DependenceInfo::testMIV(const SCEV
*Src
, const SCEV
*Dst
,
2281 const SmallBitVector
&Loops
,
2282 FullDependence
&Result
) const {
2283 LLVM_DEBUG(dbgs() << " src = " << *Src
<< "\n");
2284 LLVM_DEBUG(dbgs() << " dst = " << *Dst
<< "\n");
2285 Result
.Consistent
= false;
2286 return gcdMIVtest(Src
, Dst
, Result
) ||
2287 banerjeeMIVtest(Src
, Dst
, Loops
, Result
);
2291 // Given a product, e.g., 10*X*Y, returns the first constant operand,
2292 // in this case 10. If there is no constant part, returns NULL.
2294 const SCEVConstant
*getConstantPart(const SCEV
*Expr
) {
2295 if (const auto *Constant
= dyn_cast
<SCEVConstant
>(Expr
))
2297 else if (const auto *Product
= dyn_cast
<SCEVMulExpr
>(Expr
))
2298 if (const auto *Constant
= dyn_cast
<SCEVConstant
>(Product
->getOperand(0)))
2304 //===----------------------------------------------------------------------===//
2306 // Tests an MIV subscript pair for dependence.
2307 // Returns true if any possible dependence is disproved.
2308 // Marks the result as inconsistent.
2309 // Can sometimes disprove the equal direction for 1 or more loops,
2310 // as discussed in Michael Wolfe's book,
2311 // High Performance Compilers for Parallel Computing, page 235.
2313 // We spend some effort (code!) to handle cases like
2314 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2315 // but M and N are just loop-invariant variables.
2316 // This should help us handle linearized subscripts;
2317 // also makes this test a useful backup to the various SIV tests.
2319 // It occurs to me that the presence of loop-invariant variables
2320 // changes the nature of the test from "greatest common divisor"
2321 // to "a common divisor".
2322 bool DependenceInfo::gcdMIVtest(const SCEV
*Src
, const SCEV
*Dst
,
2323 FullDependence
&Result
) const {
2324 LLVM_DEBUG(dbgs() << "starting gcd\n");
2326 unsigned BitWidth
= SE
->getTypeSizeInBits(Src
->getType());
2327 APInt RunningGCD
= APInt::getNullValue(BitWidth
);
2329 // Examine Src coefficients.
2330 // Compute running GCD and record source constant.
2331 // Because we're looking for the constant at the end of the chain,
2332 // we can't quit the loop just because the GCD == 1.
2333 const SCEV
*Coefficients
= Src
;
2334 while (const SCEVAddRecExpr
*AddRec
=
2335 dyn_cast
<SCEVAddRecExpr
>(Coefficients
)) {
2336 const SCEV
*Coeff
= AddRec
->getStepRecurrence(*SE
);
2337 // If the coefficient is the product of a constant and other stuff,
2338 // we can use the constant in the GCD computation.
2339 const auto *Constant
= getConstantPart(Coeff
);
2342 APInt ConstCoeff
= Constant
->getAPInt();
2343 RunningGCD
= APIntOps::GreatestCommonDivisor(RunningGCD
, ConstCoeff
.abs());
2344 Coefficients
= AddRec
->getStart();
2346 const SCEV
*SrcConst
= Coefficients
;
2348 // Examine Dst coefficients.
2349 // Compute running GCD and record destination constant.
2350 // Because we're looking for the constant at the end of the chain,
2351 // we can't quit the loop just because the GCD == 1.
2353 while (const SCEVAddRecExpr
*AddRec
=
2354 dyn_cast
<SCEVAddRecExpr
>(Coefficients
)) {
2355 const SCEV
*Coeff
= AddRec
->getStepRecurrence(*SE
);
2356 // If the coefficient is the product of a constant and other stuff,
2357 // we can use the constant in the GCD computation.
2358 const auto *Constant
= getConstantPart(Coeff
);
2361 APInt ConstCoeff
= Constant
->getAPInt();
2362 RunningGCD
= APIntOps::GreatestCommonDivisor(RunningGCD
, ConstCoeff
.abs());
2363 Coefficients
= AddRec
->getStart();
2365 const SCEV
*DstConst
= Coefficients
;
2367 APInt ExtraGCD
= APInt::getNullValue(BitWidth
);
2368 const SCEV
*Delta
= SE
->getMinusSCEV(DstConst
, SrcConst
);
2369 LLVM_DEBUG(dbgs() << " Delta = " << *Delta
<< "\n");
2370 const SCEVConstant
*Constant
= dyn_cast
<SCEVConstant
>(Delta
);
2371 if (const SCEVAddExpr
*Sum
= dyn_cast
<SCEVAddExpr
>(Delta
)) {
2372 // If Delta is a sum of products, we may be able to make further progress.
2373 for (unsigned Op
= 0, Ops
= Sum
->getNumOperands(); Op
< Ops
; Op
++) {
2374 const SCEV
*Operand
= Sum
->getOperand(Op
);
2375 if (isa
<SCEVConstant
>(Operand
)) {
2376 assert(!Constant
&& "Surprised to find multiple constants");
2377 Constant
= cast
<SCEVConstant
>(Operand
);
2379 else if (const SCEVMulExpr
*Product
= dyn_cast
<SCEVMulExpr
>(Operand
)) {
2380 // Search for constant operand to participate in GCD;
2381 // If none found; return false.
2382 const SCEVConstant
*ConstOp
= getConstantPart(Product
);
2385 APInt ConstOpValue
= ConstOp
->getAPInt();
2386 ExtraGCD
= APIntOps::GreatestCommonDivisor(ExtraGCD
,
2387 ConstOpValue
.abs());
2395 APInt ConstDelta
= cast
<SCEVConstant
>(Constant
)->getAPInt();
2396 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta
<< "\n");
2397 if (ConstDelta
== 0)
2399 RunningGCD
= APIntOps::GreatestCommonDivisor(RunningGCD
, ExtraGCD
);
2400 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD
<< "\n");
2401 APInt Remainder
= ConstDelta
.srem(RunningGCD
);
2402 if (Remainder
!= 0) {
2407 // Try to disprove equal directions.
2408 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2409 // the code above can't disprove the dependence because the GCD = 1.
2410 // So we consider what happen if i = i' and what happens if j = j'.
2411 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2412 // which is infeasible, so we can disallow the = direction for the i level.
2413 // Setting j = j' doesn't help matters, so we end up with a direction vector
2416 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2417 // we need to remember that the constant part is 5 and the RunningGCD should
2418 // be initialized to ExtraGCD = 30.
2419 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD
<< '\n');
2421 bool Improved
= false;
2423 while (const SCEVAddRecExpr
*AddRec
=
2424 dyn_cast
<SCEVAddRecExpr
>(Coefficients
)) {
2425 Coefficients
= AddRec
->getStart();
2426 const Loop
*CurLoop
= AddRec
->getLoop();
2427 RunningGCD
= ExtraGCD
;
2428 const SCEV
*SrcCoeff
= AddRec
->getStepRecurrence(*SE
);
2429 const SCEV
*DstCoeff
= SE
->getMinusSCEV(SrcCoeff
, SrcCoeff
);
2430 const SCEV
*Inner
= Src
;
2431 while (RunningGCD
!= 1 && isa
<SCEVAddRecExpr
>(Inner
)) {
2432 AddRec
= cast
<SCEVAddRecExpr
>(Inner
);
2433 const SCEV
*Coeff
= AddRec
->getStepRecurrence(*SE
);
2434 if (CurLoop
== AddRec
->getLoop())
2435 ; // SrcCoeff == Coeff
2437 // If the coefficient is the product of a constant and other stuff,
2438 // we can use the constant in the GCD computation.
2439 Constant
= getConstantPart(Coeff
);
2442 APInt ConstCoeff
= Constant
->getAPInt();
2443 RunningGCD
= APIntOps::GreatestCommonDivisor(RunningGCD
, ConstCoeff
.abs());
2445 Inner
= AddRec
->getStart();
2448 while (RunningGCD
!= 1 && isa
<SCEVAddRecExpr
>(Inner
)) {
2449 AddRec
= cast
<SCEVAddRecExpr
>(Inner
);
2450 const SCEV
*Coeff
= AddRec
->getStepRecurrence(*SE
);
2451 if (CurLoop
== AddRec
->getLoop())
2454 // If the coefficient is the product of a constant and other stuff,
2455 // we can use the constant in the GCD computation.
2456 Constant
= getConstantPart(Coeff
);
2459 APInt ConstCoeff
= Constant
->getAPInt();
2460 RunningGCD
= APIntOps::GreatestCommonDivisor(RunningGCD
, ConstCoeff
.abs());
2462 Inner
= AddRec
->getStart();
2464 Delta
= SE
->getMinusSCEV(SrcCoeff
, DstCoeff
);
2465 // If the coefficient is the product of a constant and other stuff,
2466 // we can use the constant in the GCD computation.
2467 Constant
= getConstantPart(Delta
);
2469 // The difference of the two coefficients might not be a product
2470 // or constant, in which case we give up on this direction.
2472 APInt ConstCoeff
= Constant
->getAPInt();
2473 RunningGCD
= APIntOps::GreatestCommonDivisor(RunningGCD
, ConstCoeff
.abs());
2474 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD
<< "\n");
2475 if (RunningGCD
!= 0) {
2476 Remainder
= ConstDelta
.srem(RunningGCD
);
2477 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder
<< "\n");
2478 if (Remainder
!= 0) {
2479 unsigned Level
= mapSrcLoop(CurLoop
);
2480 Result
.DV
[Level
- 1].Direction
&= unsigned(~Dependence::DVEntry::EQ
);
2487 LLVM_DEBUG(dbgs() << "all done\n");
2492 //===----------------------------------------------------------------------===//
2493 // banerjeeMIVtest -
2494 // Use Banerjee's Inequalities to test an MIV subscript pair.
2495 // (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2496 // Generally follows the discussion in Section 2.5.2 of
2498 // Optimizing Supercompilers for Supercomputers
2501 // The inequalities given on page 25 are simplified in that loops are
2502 // normalized so that the lower bound is always 0 and the stride is always 1.
2503 // For example, Wolfe gives
2505 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2507 // where A_k is the coefficient of the kth index in the source subscript,
2508 // B_k is the coefficient of the kth index in the destination subscript,
2509 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2510 // index, and N_k is the stride of the kth index. Since all loops are normalized
2511 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2514 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2515 // = (A^-_k - B_k)^- (U_k - 1) - B_k
2517 // Similar simplifications are possible for the other equations.
2519 // When we can't determine the number of iterations for a loop,
2520 // we use NULL as an indicator for the worst case, infinity.
2521 // When computing the upper bound, NULL denotes +inf;
2522 // for the lower bound, NULL denotes -inf.
2524 // Return true if dependence disproved.
2525 bool DependenceInfo::banerjeeMIVtest(const SCEV
*Src
, const SCEV
*Dst
,
2526 const SmallBitVector
&Loops
,
2527 FullDependence
&Result
) const {
2528 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2529 ++BanerjeeApplications
;
2530 LLVM_DEBUG(dbgs() << " Src = " << *Src
<< '\n');
2532 CoefficientInfo
*A
= collectCoeffInfo(Src
, true, A0
);
2533 LLVM_DEBUG(dbgs() << " Dst = " << *Dst
<< '\n');
2535 CoefficientInfo
*B
= collectCoeffInfo(Dst
, false, B0
);
2536 BoundInfo
*Bound
= new BoundInfo
[MaxLevels
+ 1];
2537 const SCEV
*Delta
= SE
->getMinusSCEV(B0
, A0
);
2538 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta
<< '\n');
2540 // Compute bounds for all the * directions.
2541 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2542 for (unsigned K
= 1; K
<= MaxLevels
; ++K
) {
2543 Bound
[K
].Iterations
= A
[K
].Iterations
? A
[K
].Iterations
: B
[K
].Iterations
;
2544 Bound
[K
].Direction
= Dependence::DVEntry::ALL
;
2545 Bound
[K
].DirSet
= Dependence::DVEntry::NONE
;
2546 findBoundsALL(A
, B
, Bound
, K
);
2548 LLVM_DEBUG(dbgs() << "\t " << K
<< '\t');
2549 if (Bound
[K
].Lower
[Dependence::DVEntry::ALL
])
2550 LLVM_DEBUG(dbgs() << *Bound
[K
].Lower
[Dependence::DVEntry::ALL
] << '\t');
2552 LLVM_DEBUG(dbgs() << "-inf\t");
2553 if (Bound
[K
].Upper
[Dependence::DVEntry::ALL
])
2554 LLVM_DEBUG(dbgs() << *Bound
[K
].Upper
[Dependence::DVEntry::ALL
] << '\n');
2556 LLVM_DEBUG(dbgs() << "+inf\n");
2560 // Test the *, *, *, ... case.
2561 bool Disproved
= false;
2562 if (testBounds(Dependence::DVEntry::ALL
, 0, Bound
, Delta
)) {
2563 // Explore the direction vector hierarchy.
2564 unsigned DepthExpanded
= 0;
2565 unsigned NewDeps
= exploreDirections(1, A
, B
, Bound
,
2566 Loops
, DepthExpanded
, Delta
);
2568 bool Improved
= false;
2569 for (unsigned K
= 1; K
<= CommonLevels
; ++K
) {
2571 unsigned Old
= Result
.DV
[K
- 1].Direction
;
2572 Result
.DV
[K
- 1].Direction
= Old
& Bound
[K
].DirSet
;
2573 Improved
|= Old
!= Result
.DV
[K
- 1].Direction
;
2574 if (!Result
.DV
[K
- 1].Direction
) {
2582 ++BanerjeeSuccesses
;
2585 ++BanerjeeIndependence
;
2590 ++BanerjeeIndependence
;
2600 // Hierarchically expands the direction vector
2601 // search space, combining the directions of discovered dependences
2602 // in the DirSet field of Bound. Returns the number of distinct
2603 // dependences discovered. If the dependence is disproved,
2604 // it will return 0.
2605 unsigned DependenceInfo::exploreDirections(unsigned Level
, CoefficientInfo
*A
,
2606 CoefficientInfo
*B
, BoundInfo
*Bound
,
2607 const SmallBitVector
&Loops
,
2608 unsigned &DepthExpanded
,
2609 const SCEV
*Delta
) const {
2610 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2611 // of common loop levels. To avoid excessive compile-time, pessimize all the
2612 // results and immediately return when the number of common levels is beyond
2613 // the given threshold.
2614 if (CommonLevels
> MIVMaxLevelThreshold
) {
2615 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2616 "direction exploration is terminated.\n");
2617 for (unsigned K
= 1; K
<= CommonLevels
; ++K
)
2619 Bound
[K
].DirSet
= Dependence::DVEntry::ALL
;
2623 if (Level
> CommonLevels
) {
2625 LLVM_DEBUG(dbgs() << "\t[");
2626 for (unsigned K
= 1; K
<= CommonLevels
; ++K
) {
2628 Bound
[K
].DirSet
|= Bound
[K
].Direction
;
2630 switch (Bound
[K
].Direction
) {
2631 case Dependence::DVEntry::LT
:
2632 LLVM_DEBUG(dbgs() << " <");
2634 case Dependence::DVEntry::EQ
:
2635 LLVM_DEBUG(dbgs() << " =");
2637 case Dependence::DVEntry::GT
:
2638 LLVM_DEBUG(dbgs() << " >");
2640 case Dependence::DVEntry::ALL
:
2641 LLVM_DEBUG(dbgs() << " *");
2644 llvm_unreachable("unexpected Bound[K].Direction");
2649 LLVM_DEBUG(dbgs() << " ]\n");
2653 if (Level
> DepthExpanded
) {
2654 DepthExpanded
= Level
;
2655 // compute bounds for <, =, > at current level
2656 findBoundsLT(A
, B
, Bound
, Level
);
2657 findBoundsGT(A
, B
, Bound
, Level
);
2658 findBoundsEQ(A
, B
, Bound
, Level
);
2660 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level
<< '\n');
2661 LLVM_DEBUG(dbgs() << "\t <\t");
2662 if (Bound
[Level
].Lower
[Dependence::DVEntry::LT
])
2663 LLVM_DEBUG(dbgs() << *Bound
[Level
].Lower
[Dependence::DVEntry::LT
]
2666 LLVM_DEBUG(dbgs() << "-inf\t");
2667 if (Bound
[Level
].Upper
[Dependence::DVEntry::LT
])
2668 LLVM_DEBUG(dbgs() << *Bound
[Level
].Upper
[Dependence::DVEntry::LT
]
2671 LLVM_DEBUG(dbgs() << "+inf\n");
2672 LLVM_DEBUG(dbgs() << "\t =\t");
2673 if (Bound
[Level
].Lower
[Dependence::DVEntry::EQ
])
2674 LLVM_DEBUG(dbgs() << *Bound
[Level
].Lower
[Dependence::DVEntry::EQ
]
2677 LLVM_DEBUG(dbgs() << "-inf\t");
2678 if (Bound
[Level
].Upper
[Dependence::DVEntry::EQ
])
2679 LLVM_DEBUG(dbgs() << *Bound
[Level
].Upper
[Dependence::DVEntry::EQ
]
2682 LLVM_DEBUG(dbgs() << "+inf\n");
2683 LLVM_DEBUG(dbgs() << "\t >\t");
2684 if (Bound
[Level
].Lower
[Dependence::DVEntry::GT
])
2685 LLVM_DEBUG(dbgs() << *Bound
[Level
].Lower
[Dependence::DVEntry::GT
]
2688 LLVM_DEBUG(dbgs() << "-inf\t");
2689 if (Bound
[Level
].Upper
[Dependence::DVEntry::GT
])
2690 LLVM_DEBUG(dbgs() << *Bound
[Level
].Upper
[Dependence::DVEntry::GT
]
2693 LLVM_DEBUG(dbgs() << "+inf\n");
2697 unsigned NewDeps
= 0;
2699 // test bounds for <, *, *, ...
2700 if (testBounds(Dependence::DVEntry::LT
, Level
, Bound
, Delta
))
2701 NewDeps
+= exploreDirections(Level
+ 1, A
, B
, Bound
,
2702 Loops
, DepthExpanded
, Delta
);
2704 // Test bounds for =, *, *, ...
2705 if (testBounds(Dependence::DVEntry::EQ
, Level
, Bound
, Delta
))
2706 NewDeps
+= exploreDirections(Level
+ 1, A
, B
, Bound
,
2707 Loops
, DepthExpanded
, Delta
);
2709 // test bounds for >, *, *, ...
2710 if (testBounds(Dependence::DVEntry::GT
, Level
, Bound
, Delta
))
2711 NewDeps
+= exploreDirections(Level
+ 1, A
, B
, Bound
,
2712 Loops
, DepthExpanded
, Delta
);
2714 Bound
[Level
].Direction
= Dependence::DVEntry::ALL
;
2718 return exploreDirections(Level
+ 1, A
, B
, Bound
, Loops
, DepthExpanded
, Delta
);
2722 // Returns true iff the current bounds are plausible.
2723 bool DependenceInfo::testBounds(unsigned char DirKind
, unsigned Level
,
2724 BoundInfo
*Bound
, const SCEV
*Delta
) const {
2725 Bound
[Level
].Direction
= DirKind
;
2726 if (const SCEV
*LowerBound
= getLowerBound(Bound
))
2727 if (isKnownPredicate(CmpInst::ICMP_SGT
, LowerBound
, Delta
))
2729 if (const SCEV
*UpperBound
= getUpperBound(Bound
))
2730 if (isKnownPredicate(CmpInst::ICMP_SGT
, Delta
, UpperBound
))
2736 // Computes the upper and lower bounds for level K
2737 // using the * direction. Records them in Bound.
2738 // Wolfe gives the equations
2740 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2741 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2743 // Since we normalize loops, we can simplify these equations to
2745 // LB^*_k = (A^-_k - B^+_k)U_k
2746 // UB^*_k = (A^+_k - B^-_k)U_k
2748 // We must be careful to handle the case where the upper bound is unknown.
2749 // Note that the lower bound is always <= 0
2750 // and the upper bound is always >= 0.
2751 void DependenceInfo::findBoundsALL(CoefficientInfo
*A
, CoefficientInfo
*B
,
2752 BoundInfo
*Bound
, unsigned K
) const {
2753 Bound
[K
].Lower
[Dependence::DVEntry::ALL
] = nullptr; // Default value = -infinity.
2754 Bound
[K
].Upper
[Dependence::DVEntry::ALL
] = nullptr; // Default value = +infinity.
2755 if (Bound
[K
].Iterations
) {
2756 Bound
[K
].Lower
[Dependence::DVEntry::ALL
] =
2757 SE
->getMulExpr(SE
->getMinusSCEV(A
[K
].NegPart
, B
[K
].PosPart
),
2758 Bound
[K
].Iterations
);
2759 Bound
[K
].Upper
[Dependence::DVEntry::ALL
] =
2760 SE
->getMulExpr(SE
->getMinusSCEV(A
[K
].PosPart
, B
[K
].NegPart
),
2761 Bound
[K
].Iterations
);
2764 // If the difference is 0, we won't need to know the number of iterations.
2765 if (isKnownPredicate(CmpInst::ICMP_EQ
, A
[K
].NegPart
, B
[K
].PosPart
))
2766 Bound
[K
].Lower
[Dependence::DVEntry::ALL
] =
2767 SE
->getZero(A
[K
].Coeff
->getType());
2768 if (isKnownPredicate(CmpInst::ICMP_EQ
, A
[K
].PosPart
, B
[K
].NegPart
))
2769 Bound
[K
].Upper
[Dependence::DVEntry::ALL
] =
2770 SE
->getZero(A
[K
].Coeff
->getType());
2775 // Computes the upper and lower bounds for level K
2776 // using the = direction. Records them in Bound.
2777 // Wolfe gives the equations
2779 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2780 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2782 // Since we normalize loops, we can simplify these equations to
2784 // LB^=_k = (A_k - B_k)^- U_k
2785 // UB^=_k = (A_k - B_k)^+ U_k
2787 // We must be careful to handle the case where the upper bound is unknown.
2788 // Note that the lower bound is always <= 0
2789 // and the upper bound is always >= 0.
2790 void DependenceInfo::findBoundsEQ(CoefficientInfo
*A
, CoefficientInfo
*B
,
2791 BoundInfo
*Bound
, unsigned K
) const {
2792 Bound
[K
].Lower
[Dependence::DVEntry::EQ
] = nullptr; // Default value = -infinity.
2793 Bound
[K
].Upper
[Dependence::DVEntry::EQ
] = nullptr; // Default value = +infinity.
2794 if (Bound
[K
].Iterations
) {
2795 const SCEV
*Delta
= SE
->getMinusSCEV(A
[K
].Coeff
, B
[K
].Coeff
);
2796 const SCEV
*NegativePart
= getNegativePart(Delta
);
2797 Bound
[K
].Lower
[Dependence::DVEntry::EQ
] =
2798 SE
->getMulExpr(NegativePart
, Bound
[K
].Iterations
);
2799 const SCEV
*PositivePart
= getPositivePart(Delta
);
2800 Bound
[K
].Upper
[Dependence::DVEntry::EQ
] =
2801 SE
->getMulExpr(PositivePart
, Bound
[K
].Iterations
);
2804 // If the positive/negative part of the difference is 0,
2805 // we won't need to know the number of iterations.
2806 const SCEV
*Delta
= SE
->getMinusSCEV(A
[K
].Coeff
, B
[K
].Coeff
);
2807 const SCEV
*NegativePart
= getNegativePart(Delta
);
2808 if (NegativePart
->isZero())
2809 Bound
[K
].Lower
[Dependence::DVEntry::EQ
] = NegativePart
; // Zero
2810 const SCEV
*PositivePart
= getPositivePart(Delta
);
2811 if (PositivePart
->isZero())
2812 Bound
[K
].Upper
[Dependence::DVEntry::EQ
] = PositivePart
; // Zero
2817 // Computes the upper and lower bounds for level K
2818 // using the < direction. Records them in Bound.
2819 // Wolfe gives the equations
2821 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2822 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2824 // Since we normalize loops, we can simplify these equations to
2826 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2827 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2829 // We must be careful to handle the case where the upper bound is unknown.
2830 void DependenceInfo::findBoundsLT(CoefficientInfo
*A
, CoefficientInfo
*B
,
2831 BoundInfo
*Bound
, unsigned K
) const {
2832 Bound
[K
].Lower
[Dependence::DVEntry::LT
] = nullptr; // Default value = -infinity.
2833 Bound
[K
].Upper
[Dependence::DVEntry::LT
] = nullptr; // Default value = +infinity.
2834 if (Bound
[K
].Iterations
) {
2835 const SCEV
*Iter_1
= SE
->getMinusSCEV(
2836 Bound
[K
].Iterations
, SE
->getOne(Bound
[K
].Iterations
->getType()));
2837 const SCEV
*NegPart
=
2838 getNegativePart(SE
->getMinusSCEV(A
[K
].NegPart
, B
[K
].Coeff
));
2839 Bound
[K
].Lower
[Dependence::DVEntry::LT
] =
2840 SE
->getMinusSCEV(SE
->getMulExpr(NegPart
, Iter_1
), B
[K
].Coeff
);
2841 const SCEV
*PosPart
=
2842 getPositivePart(SE
->getMinusSCEV(A
[K
].PosPart
, B
[K
].Coeff
));
2843 Bound
[K
].Upper
[Dependence::DVEntry::LT
] =
2844 SE
->getMinusSCEV(SE
->getMulExpr(PosPart
, Iter_1
), B
[K
].Coeff
);
2847 // If the positive/negative part of the difference is 0,
2848 // we won't need to know the number of iterations.
2849 const SCEV
*NegPart
=
2850 getNegativePart(SE
->getMinusSCEV(A
[K
].NegPart
, B
[K
].Coeff
));
2851 if (NegPart
->isZero())
2852 Bound
[K
].Lower
[Dependence::DVEntry::LT
] = SE
->getNegativeSCEV(B
[K
].Coeff
);
2853 const SCEV
*PosPart
=
2854 getPositivePart(SE
->getMinusSCEV(A
[K
].PosPart
, B
[K
].Coeff
));
2855 if (PosPart
->isZero())
2856 Bound
[K
].Upper
[Dependence::DVEntry::LT
] = SE
->getNegativeSCEV(B
[K
].Coeff
);
2861 // Computes the upper and lower bounds for level K
2862 // using the > direction. Records them in Bound.
2863 // Wolfe gives the equations
2865 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2866 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2868 // Since we normalize loops, we can simplify these equations to
2870 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2871 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2873 // We must be careful to handle the case where the upper bound is unknown.
2874 void DependenceInfo::findBoundsGT(CoefficientInfo
*A
, CoefficientInfo
*B
,
2875 BoundInfo
*Bound
, unsigned K
) const {
2876 Bound
[K
].Lower
[Dependence::DVEntry::GT
] = nullptr; // Default value = -infinity.
2877 Bound
[K
].Upper
[Dependence::DVEntry::GT
] = nullptr; // Default value = +infinity.
2878 if (Bound
[K
].Iterations
) {
2879 const SCEV
*Iter_1
= SE
->getMinusSCEV(
2880 Bound
[K
].Iterations
, SE
->getOne(Bound
[K
].Iterations
->getType()));
2881 const SCEV
*NegPart
=
2882 getNegativePart(SE
->getMinusSCEV(A
[K
].Coeff
, B
[K
].PosPart
));
2883 Bound
[K
].Lower
[Dependence::DVEntry::GT
] =
2884 SE
->getAddExpr(SE
->getMulExpr(NegPart
, Iter_1
), A
[K
].Coeff
);
2885 const SCEV
*PosPart
=
2886 getPositivePart(SE
->getMinusSCEV(A
[K
].Coeff
, B
[K
].NegPart
));
2887 Bound
[K
].Upper
[Dependence::DVEntry::GT
] =
2888 SE
->getAddExpr(SE
->getMulExpr(PosPart
, Iter_1
), A
[K
].Coeff
);
2891 // If the positive/negative part of the difference is 0,
2892 // we won't need to know the number of iterations.
2893 const SCEV
*NegPart
= getNegativePart(SE
->getMinusSCEV(A
[K
].Coeff
, B
[K
].PosPart
));
2894 if (NegPart
->isZero())
2895 Bound
[K
].Lower
[Dependence::DVEntry::GT
] = A
[K
].Coeff
;
2896 const SCEV
*PosPart
= getPositivePart(SE
->getMinusSCEV(A
[K
].Coeff
, B
[K
].NegPart
));
2897 if (PosPart
->isZero())
2898 Bound
[K
].Upper
[Dependence::DVEntry::GT
] = A
[K
].Coeff
;
2904 const SCEV
*DependenceInfo::getPositivePart(const SCEV
*X
) const {
2905 return SE
->getSMaxExpr(X
, SE
->getZero(X
->getType()));
2910 const SCEV
*DependenceInfo::getNegativePart(const SCEV
*X
) const {
2911 return SE
->getSMinExpr(X
, SE
->getZero(X
->getType()));
2915 // Walks through the subscript,
2916 // collecting each coefficient, the associated loop bounds,
2917 // and recording its positive and negative parts for later use.
2918 DependenceInfo::CoefficientInfo
*
2919 DependenceInfo::collectCoeffInfo(const SCEV
*Subscript
, bool SrcFlag
,
2920 const SCEV
*&Constant
) const {
2921 const SCEV
*Zero
= SE
->getZero(Subscript
->getType());
2922 CoefficientInfo
*CI
= new CoefficientInfo
[MaxLevels
+ 1];
2923 for (unsigned K
= 1; K
<= MaxLevels
; ++K
) {
2925 CI
[K
].PosPart
= Zero
;
2926 CI
[K
].NegPart
= Zero
;
2927 CI
[K
].Iterations
= nullptr;
2929 while (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Subscript
)) {
2930 const Loop
*L
= AddRec
->getLoop();
2931 unsigned K
= SrcFlag
? mapSrcLoop(L
) : mapDstLoop(L
);
2932 CI
[K
].Coeff
= AddRec
->getStepRecurrence(*SE
);
2933 CI
[K
].PosPart
= getPositivePart(CI
[K
].Coeff
);
2934 CI
[K
].NegPart
= getNegativePart(CI
[K
].Coeff
);
2935 CI
[K
].Iterations
= collectUpperBound(L
, Subscript
->getType());
2936 Subscript
= AddRec
->getStart();
2938 Constant
= Subscript
;
2940 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
2941 for (unsigned K
= 1; K
<= MaxLevels
; ++K
) {
2942 LLVM_DEBUG(dbgs() << "\t " << K
<< "\t" << *CI
[K
].Coeff
);
2943 LLVM_DEBUG(dbgs() << "\tPos Part = ");
2944 LLVM_DEBUG(dbgs() << *CI
[K
].PosPart
);
2945 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
2946 LLVM_DEBUG(dbgs() << *CI
[K
].NegPart
);
2947 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
2948 if (CI
[K
].Iterations
)
2949 LLVM_DEBUG(dbgs() << *CI
[K
].Iterations
);
2951 LLVM_DEBUG(dbgs() << "+inf");
2952 LLVM_DEBUG(dbgs() << '\n');
2954 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript
<< '\n');
2960 // Looks through all the bounds info and
2961 // computes the lower bound given the current direction settings
2962 // at each level. If the lower bound for any level is -inf,
2963 // the result is -inf.
2964 const SCEV
*DependenceInfo::getLowerBound(BoundInfo
*Bound
) const {
2965 const SCEV
*Sum
= Bound
[1].Lower
[Bound
[1].Direction
];
2966 for (unsigned K
= 2; Sum
&& K
<= MaxLevels
; ++K
) {
2967 if (Bound
[K
].Lower
[Bound
[K
].Direction
])
2968 Sum
= SE
->getAddExpr(Sum
, Bound
[K
].Lower
[Bound
[K
].Direction
]);
2976 // Looks through all the bounds info and
2977 // computes the upper bound given the current direction settings
2978 // at each level. If the upper bound at any level is +inf,
2979 // the result is +inf.
2980 const SCEV
*DependenceInfo::getUpperBound(BoundInfo
*Bound
) const {
2981 const SCEV
*Sum
= Bound
[1].Upper
[Bound
[1].Direction
];
2982 for (unsigned K
= 2; Sum
&& K
<= MaxLevels
; ++K
) {
2983 if (Bound
[K
].Upper
[Bound
[K
].Direction
])
2984 Sum
= SE
->getAddExpr(Sum
, Bound
[K
].Upper
[Bound
[K
].Direction
]);
2992 //===----------------------------------------------------------------------===//
2993 // Constraint manipulation for Delta test.
2995 // Given a linear SCEV,
2996 // return the coefficient (the step)
2997 // corresponding to the specified loop.
2998 // If there isn't one, return 0.
2999 // For example, given a*i + b*j + c*k, finding the coefficient
3000 // corresponding to the j loop would yield b.
3001 const SCEV
*DependenceInfo::findCoefficient(const SCEV
*Expr
,
3002 const Loop
*TargetLoop
) const {
3003 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Expr
);
3005 return SE
->getZero(Expr
->getType());
3006 if (AddRec
->getLoop() == TargetLoop
)
3007 return AddRec
->getStepRecurrence(*SE
);
3008 return findCoefficient(AddRec
->getStart(), TargetLoop
);
3012 // Given a linear SCEV,
3013 // return the SCEV given by zeroing out the coefficient
3014 // corresponding to the specified loop.
3015 // For example, given a*i + b*j + c*k, zeroing the coefficient
3016 // corresponding to the j loop would yield a*i + c*k.
3017 const SCEV
*DependenceInfo::zeroCoefficient(const SCEV
*Expr
,
3018 const Loop
*TargetLoop
) const {
3019 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Expr
);
3021 return Expr
; // ignore
3022 if (AddRec
->getLoop() == TargetLoop
)
3023 return AddRec
->getStart();
3024 return SE
->getAddRecExpr(zeroCoefficient(AddRec
->getStart(), TargetLoop
),
3025 AddRec
->getStepRecurrence(*SE
),
3027 AddRec
->getNoWrapFlags());
3031 // Given a linear SCEV Expr,
3032 // return the SCEV given by adding some Value to the
3033 // coefficient corresponding to the specified TargetLoop.
3034 // For example, given a*i + b*j + c*k, adding 1 to the coefficient
3035 // corresponding to the j loop would yield a*i + (b+1)*j + c*k.
3036 const SCEV
*DependenceInfo::addToCoefficient(const SCEV
*Expr
,
3037 const Loop
*TargetLoop
,
3038 const SCEV
*Value
) const {
3039 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Expr
);
3040 if (!AddRec
) // create a new addRec
3041 return SE
->getAddRecExpr(Expr
,
3044 SCEV::FlagAnyWrap
); // Worst case, with no info.
3045 if (AddRec
->getLoop() == TargetLoop
) {
3046 const SCEV
*Sum
= SE
->getAddExpr(AddRec
->getStepRecurrence(*SE
), Value
);
3048 return AddRec
->getStart();
3049 return SE
->getAddRecExpr(AddRec
->getStart(),
3052 AddRec
->getNoWrapFlags());
3054 if (SE
->isLoopInvariant(AddRec
, TargetLoop
))
3055 return SE
->getAddRecExpr(AddRec
, Value
, TargetLoop
, SCEV::FlagAnyWrap
);
3056 return SE
->getAddRecExpr(
3057 addToCoefficient(AddRec
->getStart(), TargetLoop
, Value
),
3058 AddRec
->getStepRecurrence(*SE
), AddRec
->getLoop(),
3059 AddRec
->getNoWrapFlags());
3063 // Review the constraints, looking for opportunities
3064 // to simplify a subscript pair (Src and Dst).
3065 // Return true if some simplification occurs.
3066 // If the simplification isn't exact (that is, if it is conservative
3067 // in terms of dependence), set consistent to false.
3068 // Corresponds to Figure 5 from the paper
3070 // Practical Dependence Testing
3071 // Goff, Kennedy, Tseng
3073 bool DependenceInfo::propagate(const SCEV
*&Src
, const SCEV
*&Dst
,
3074 SmallBitVector
&Loops
,
3075 SmallVectorImpl
<Constraint
> &Constraints
,
3077 bool Result
= false;
3078 for (unsigned LI
: Loops
.set_bits()) {
3079 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI
<< "] is");
3080 LLVM_DEBUG(Constraints
[LI
].dump(dbgs()));
3081 if (Constraints
[LI
].isDistance())
3082 Result
|= propagateDistance(Src
, Dst
, Constraints
[LI
], Consistent
);
3083 else if (Constraints
[LI
].isLine())
3084 Result
|= propagateLine(Src
, Dst
, Constraints
[LI
], Consistent
);
3085 else if (Constraints
[LI
].isPoint())
3086 Result
|= propagatePoint(Src
, Dst
, Constraints
[LI
]);
3092 // Attempt to propagate a distance
3093 // constraint into a subscript pair (Src and Dst).
3094 // Return true if some simplification occurs.
3095 // If the simplification isn't exact (that is, if it is conservative
3096 // in terms of dependence), set consistent to false.
3097 bool DependenceInfo::propagateDistance(const SCEV
*&Src
, const SCEV
*&Dst
,
3098 Constraint
&CurConstraint
,
3100 const Loop
*CurLoop
= CurConstraint
.getAssociatedLoop();
3101 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src
<< "\n");
3102 const SCEV
*A_K
= findCoefficient(Src
, CurLoop
);
3105 const SCEV
*DA_K
= SE
->getMulExpr(A_K
, CurConstraint
.getD());
3106 Src
= SE
->getMinusSCEV(Src
, DA_K
);
3107 Src
= zeroCoefficient(Src
, CurLoop
);
3108 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src
<< "\n");
3109 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst
<< "\n");
3110 Dst
= addToCoefficient(Dst
, CurLoop
, SE
->getNegativeSCEV(A_K
));
3111 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst
<< "\n");
3112 if (!findCoefficient(Dst
, CurLoop
)->isZero())
3118 // Attempt to propagate a line
3119 // constraint into a subscript pair (Src and Dst).
3120 // Return true if some simplification occurs.
3121 // If the simplification isn't exact (that is, if it is conservative
3122 // in terms of dependence), set consistent to false.
3123 bool DependenceInfo::propagateLine(const SCEV
*&Src
, const SCEV
*&Dst
,
3124 Constraint
&CurConstraint
,
3126 const Loop
*CurLoop
= CurConstraint
.getAssociatedLoop();
3127 const SCEV
*A
= CurConstraint
.getA();
3128 const SCEV
*B
= CurConstraint
.getB();
3129 const SCEV
*C
= CurConstraint
.getC();
3130 LLVM_DEBUG(dbgs() << "\t\tA = " << *A
<< ", B = " << *B
<< ", C = " << *C
3132 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src
<< "\n");
3133 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst
<< "\n");
3135 const SCEVConstant
*Bconst
= dyn_cast
<SCEVConstant
>(B
);
3136 const SCEVConstant
*Cconst
= dyn_cast
<SCEVConstant
>(C
);
3137 if (!Bconst
|| !Cconst
) return false;
3138 APInt Beta
= Bconst
->getAPInt();
3139 APInt Charlie
= Cconst
->getAPInt();
3140 APInt CdivB
= Charlie
.sdiv(Beta
);
3141 assert(Charlie
.srem(Beta
) == 0 && "C should be evenly divisible by B");
3142 const SCEV
*AP_K
= findCoefficient(Dst
, CurLoop
);
3143 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3144 Src
= SE
->getMinusSCEV(Src
, SE
->getMulExpr(AP_K
, SE
->getConstant(CdivB
)));
3145 Dst
= zeroCoefficient(Dst
, CurLoop
);
3146 if (!findCoefficient(Src
, CurLoop
)->isZero())
3149 else if (B
->isZero()) {
3150 const SCEVConstant
*Aconst
= dyn_cast
<SCEVConstant
>(A
);
3151 const SCEVConstant
*Cconst
= dyn_cast
<SCEVConstant
>(C
);
3152 if (!Aconst
|| !Cconst
) return false;
3153 APInt Alpha
= Aconst
->getAPInt();
3154 APInt Charlie
= Cconst
->getAPInt();
3155 APInt CdivA
= Charlie
.sdiv(Alpha
);
3156 assert(Charlie
.srem(Alpha
) == 0 && "C should be evenly divisible by A");
3157 const SCEV
*A_K
= findCoefficient(Src
, CurLoop
);
3158 Src
= SE
->getAddExpr(Src
, SE
->getMulExpr(A_K
, SE
->getConstant(CdivA
)));
3159 Src
= zeroCoefficient(Src
, CurLoop
);
3160 if (!findCoefficient(Dst
, CurLoop
)->isZero())
3163 else if (isKnownPredicate(CmpInst::ICMP_EQ
, A
, B
)) {
3164 const SCEVConstant
*Aconst
= dyn_cast
<SCEVConstant
>(A
);
3165 const SCEVConstant
*Cconst
= dyn_cast
<SCEVConstant
>(C
);
3166 if (!Aconst
|| !Cconst
) return false;
3167 APInt Alpha
= Aconst
->getAPInt();
3168 APInt Charlie
= Cconst
->getAPInt();
3169 APInt CdivA
= Charlie
.sdiv(Alpha
);
3170 assert(Charlie
.srem(Alpha
) == 0 && "C should be evenly divisible by A");
3171 const SCEV
*A_K
= findCoefficient(Src
, CurLoop
);
3172 Src
= SE
->getAddExpr(Src
, SE
->getMulExpr(A_K
, SE
->getConstant(CdivA
)));
3173 Src
= zeroCoefficient(Src
, CurLoop
);
3174 Dst
= addToCoefficient(Dst
, CurLoop
, A_K
);
3175 if (!findCoefficient(Dst
, CurLoop
)->isZero())
3179 // paper is incorrect here, or perhaps just misleading
3180 const SCEV
*A_K
= findCoefficient(Src
, CurLoop
);
3181 Src
= SE
->getMulExpr(Src
, A
);
3182 Dst
= SE
->getMulExpr(Dst
, A
);
3183 Src
= SE
->getAddExpr(Src
, SE
->getMulExpr(A_K
, C
));
3184 Src
= zeroCoefficient(Src
, CurLoop
);
3185 Dst
= addToCoefficient(Dst
, CurLoop
, SE
->getMulExpr(A_K
, B
));
3186 if (!findCoefficient(Dst
, CurLoop
)->isZero())
3189 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src
<< "\n");
3190 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst
<< "\n");
3195 // Attempt to propagate a point
3196 // constraint into a subscript pair (Src and Dst).
3197 // Return true if some simplification occurs.
3198 bool DependenceInfo::propagatePoint(const SCEV
*&Src
, const SCEV
*&Dst
,
3199 Constraint
&CurConstraint
) {
3200 const Loop
*CurLoop
= CurConstraint
.getAssociatedLoop();
3201 const SCEV
*A_K
= findCoefficient(Src
, CurLoop
);
3202 const SCEV
*AP_K
= findCoefficient(Dst
, CurLoop
);
3203 const SCEV
*XA_K
= SE
->getMulExpr(A_K
, CurConstraint
.getX());
3204 const SCEV
*YAP_K
= SE
->getMulExpr(AP_K
, CurConstraint
.getY());
3205 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src
<< "\n");
3206 Src
= SE
->getAddExpr(Src
, SE
->getMinusSCEV(XA_K
, YAP_K
));
3207 Src
= zeroCoefficient(Src
, CurLoop
);
3208 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src
<< "\n");
3209 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst
<< "\n");
3210 Dst
= zeroCoefficient(Dst
, CurLoop
);
3211 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst
<< "\n");
3216 // Update direction vector entry based on the current constraint.
3217 void DependenceInfo::updateDirection(Dependence::DVEntry
&Level
,
3218 const Constraint
&CurConstraint
) const {
3219 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
3220 LLVM_DEBUG(CurConstraint
.dump(dbgs()));
3221 if (CurConstraint
.isAny())
3223 else if (CurConstraint
.isDistance()) {
3224 // this one is consistent, the others aren't
3225 Level
.Scalar
= false;
3226 Level
.Distance
= CurConstraint
.getD();
3227 unsigned NewDirection
= Dependence::DVEntry::NONE
;
3228 if (!SE
->isKnownNonZero(Level
.Distance
)) // if may be zero
3229 NewDirection
= Dependence::DVEntry::EQ
;
3230 if (!SE
->isKnownNonPositive(Level
.Distance
)) // if may be positive
3231 NewDirection
|= Dependence::DVEntry::LT
;
3232 if (!SE
->isKnownNonNegative(Level
.Distance
)) // if may be negative
3233 NewDirection
|= Dependence::DVEntry::GT
;
3234 Level
.Direction
&= NewDirection
;
3236 else if (CurConstraint
.isLine()) {
3237 Level
.Scalar
= false;
3238 Level
.Distance
= nullptr;
3239 // direction should be accurate
3241 else if (CurConstraint
.isPoint()) {
3242 Level
.Scalar
= false;
3243 Level
.Distance
= nullptr;
3244 unsigned NewDirection
= Dependence::DVEntry::NONE
;
3245 if (!isKnownPredicate(CmpInst::ICMP_NE
,
3246 CurConstraint
.getY(),
3247 CurConstraint
.getX()))
3249 NewDirection
|= Dependence::DVEntry::EQ
;
3250 if (!isKnownPredicate(CmpInst::ICMP_SLE
,
3251 CurConstraint
.getY(),
3252 CurConstraint
.getX()))
3254 NewDirection
|= Dependence::DVEntry::LT
;
3255 if (!isKnownPredicate(CmpInst::ICMP_SGE
,
3256 CurConstraint
.getY(),
3257 CurConstraint
.getX()))
3259 NewDirection
|= Dependence::DVEntry::GT
;
3260 Level
.Direction
&= NewDirection
;
3263 llvm_unreachable("constraint has unexpected kind");
3266 /// Check if we can delinearize the subscripts. If the SCEVs representing the
3267 /// source and destination array references are recurrences on a nested loop,
3268 /// this function flattens the nested recurrences into separate recurrences
3269 /// for each loop level.
3270 bool DependenceInfo::tryDelinearize(Instruction
*Src
, Instruction
*Dst
,
3271 SmallVectorImpl
<Subscript
> &Pair
) {
3272 assert(isLoadOrStore(Src
) && "instruction is not load or store");
3273 assert(isLoadOrStore(Dst
) && "instruction is not load or store");
3274 Value
*SrcPtr
= getLoadStorePointerOperand(Src
);
3275 Value
*DstPtr
= getLoadStorePointerOperand(Dst
);
3276 Loop
*SrcLoop
= LI
->getLoopFor(Src
->getParent());
3277 Loop
*DstLoop
= LI
->getLoopFor(Dst
->getParent());
3278 const SCEV
*SrcAccessFn
= SE
->getSCEVAtScope(SrcPtr
, SrcLoop
);
3279 const SCEV
*DstAccessFn
= SE
->getSCEVAtScope(DstPtr
, DstLoop
);
3280 const SCEVUnknown
*SrcBase
=
3281 dyn_cast
<SCEVUnknown
>(SE
->getPointerBase(SrcAccessFn
));
3282 const SCEVUnknown
*DstBase
=
3283 dyn_cast
<SCEVUnknown
>(SE
->getPointerBase(DstAccessFn
));
3285 if (!SrcBase
|| !DstBase
|| SrcBase
!= DstBase
)
3288 SmallVector
<const SCEV
*, 4> SrcSubscripts
, DstSubscripts
;
3290 if (!tryDelinearizeFixedSize(Src
, Dst
, SrcAccessFn
, DstAccessFn
,
3291 SrcSubscripts
, DstSubscripts
) &&
3292 !tryDelinearizeParametricSize(Src
, Dst
, SrcAccessFn
, DstAccessFn
,
3293 SrcSubscripts
, DstSubscripts
))
3296 int Size
= SrcSubscripts
.size();
3298 dbgs() << "\nSrcSubscripts: ";
3299 for (int I
= 0; I
< Size
; I
++)
3300 dbgs() << *SrcSubscripts
[I
];
3301 dbgs() << "\nDstSubscripts: ";
3302 for (int I
= 0; I
< Size
; I
++)
3303 dbgs() << *DstSubscripts
[I
];
3306 // The delinearization transforms a single-subscript MIV dependence test into
3307 // a multi-subscript SIV dependence test that is easier to compute. So we
3308 // resize Pair to contain as many pairs of subscripts as the delinearization
3309 // has found, and then initialize the pairs following the delinearization.
3311 for (int I
= 0; I
< Size
; ++I
) {
3312 Pair
[I
].Src
= SrcSubscripts
[I
];
3313 Pair
[I
].Dst
= DstSubscripts
[I
];
3314 unifySubscriptType(&Pair
[I
]);
3320 bool DependenceInfo::tryDelinearizeFixedSize(
3321 Instruction
*Src
, Instruction
*Dst
, const SCEV
*SrcAccessFn
,
3322 const SCEV
*DstAccessFn
, SmallVectorImpl
<const SCEV
*> &SrcSubscripts
,
3323 SmallVectorImpl
<const SCEV
*> &DstSubscripts
) {
3325 Value
*SrcPtr
= getLoadStorePointerOperand(Src
);
3326 Value
*DstPtr
= getLoadStorePointerOperand(Dst
);
3327 const SCEVUnknown
*SrcBase
=
3328 dyn_cast
<SCEVUnknown
>(SE
->getPointerBase(SrcAccessFn
));
3329 const SCEVUnknown
*DstBase
=
3330 dyn_cast
<SCEVUnknown
>(SE
->getPointerBase(DstAccessFn
));
3331 assert(SrcBase
&& DstBase
&& SrcBase
== DstBase
&&
3332 "expected src and dst scev unknowns to be equal");
3334 // Check the simple case where the array dimensions are fixed size.
3335 auto *SrcGEP
= dyn_cast
<GetElementPtrInst
>(SrcPtr
);
3336 auto *DstGEP
= dyn_cast
<GetElementPtrInst
>(DstPtr
);
3337 if (!SrcGEP
|| !DstGEP
)
3340 SmallVector
<int, 4> SrcSizes
, DstSizes
;
3341 SE
->getIndexExpressionsFromGEP(SrcGEP
, SrcSubscripts
, SrcSizes
);
3342 SE
->getIndexExpressionsFromGEP(DstGEP
, DstSubscripts
, DstSizes
);
3344 // Check that the two size arrays are non-empty and equal in length and
3346 if (SrcSizes
.empty() || SrcSubscripts
.size() <= 1 ||
3347 SrcSizes
.size() != DstSizes
.size() ||
3348 !std::equal(SrcSizes
.begin(), SrcSizes
.end(), DstSizes
.begin())) {
3349 SrcSubscripts
.clear();
3350 DstSubscripts
.clear();
3354 Value
*SrcBasePtr
= SrcGEP
->getOperand(0);
3355 Value
*DstBasePtr
= DstGEP
->getOperand(0);
3356 while (auto *PCast
= dyn_cast
<BitCastInst
>(SrcBasePtr
))
3357 SrcBasePtr
= PCast
->getOperand(0);
3358 while (auto *PCast
= dyn_cast
<BitCastInst
>(DstBasePtr
))
3359 DstBasePtr
= PCast
->getOperand(0);
3361 // Check that for identical base pointers we do not miss index offsets
3362 // that have been added before this GEP is applied.
3363 if (SrcBasePtr
!= SrcBase
->getValue() || DstBasePtr
!= DstBase
->getValue()) {
3364 SrcSubscripts
.clear();
3365 DstSubscripts
.clear();
3369 assert(SrcSubscripts
.size() == DstSubscripts
.size() &&
3370 SrcSubscripts
.size() == SrcSizes
.size() + 1 &&
3371 "Expected equal number of entries in the list of sizes and "
3374 // In general we cannot safely assume that the subscripts recovered from GEPs
3375 // are in the range of values defined for their corresponding array
3376 // dimensions. For example some C language usage/interpretation make it
3377 // impossible to verify this at compile-time. As such we can only delinearize
3378 // iff the subscripts are positive and are less than the range of the
3380 if (!DisableDelinearizationChecks
) {
3381 auto AllIndiciesInRange
= [&](SmallVector
<int, 4> &DimensionSizes
,
3382 SmallVectorImpl
<const SCEV
*> &Subscripts
,
3384 size_t SSize
= Subscripts
.size();
3385 for (size_t I
= 1; I
< SSize
; ++I
) {
3386 const SCEV
*S
= Subscripts
[I
];
3387 if (!isKnownNonNegative(S
, Ptr
))
3389 if (auto *SType
= dyn_cast
<IntegerType
>(S
->getType())) {
3390 const SCEV
*Range
= SE
->getConstant(
3391 ConstantInt::get(SType
, DimensionSizes
[I
- 1], false));
3392 if (!isKnownLessThan(S
, Range
))
3399 if (!AllIndiciesInRange(SrcSizes
, SrcSubscripts
, SrcPtr
) ||
3400 !AllIndiciesInRange(DstSizes
, DstSubscripts
, DstPtr
)) {
3401 SrcSubscripts
.clear();
3402 DstSubscripts
.clear();
3407 dbgs() << "Delinearized subscripts of fixed-size array\n"
3408 << "SrcGEP:" << *SrcGEP
<< "\n"
3409 << "DstGEP:" << *DstGEP
<< "\n";
3414 bool DependenceInfo::tryDelinearizeParametricSize(
3415 Instruction
*Src
, Instruction
*Dst
, const SCEV
*SrcAccessFn
,
3416 const SCEV
*DstAccessFn
, SmallVectorImpl
<const SCEV
*> &SrcSubscripts
,
3417 SmallVectorImpl
<const SCEV
*> &DstSubscripts
) {
3419 Value
*SrcPtr
= getLoadStorePointerOperand(Src
);
3420 Value
*DstPtr
= getLoadStorePointerOperand(Dst
);
3421 const SCEVUnknown
*SrcBase
=
3422 dyn_cast
<SCEVUnknown
>(SE
->getPointerBase(SrcAccessFn
));
3423 const SCEVUnknown
*DstBase
=
3424 dyn_cast
<SCEVUnknown
>(SE
->getPointerBase(DstAccessFn
));
3425 assert(SrcBase
&& DstBase
&& SrcBase
== DstBase
&&
3426 "expected src and dst scev unknowns to be equal");
3428 const SCEV
*ElementSize
= SE
->getElementSize(Src
);
3429 if (ElementSize
!= SE
->getElementSize(Dst
))
3432 const SCEV
*SrcSCEV
= SE
->getMinusSCEV(SrcAccessFn
, SrcBase
);
3433 const SCEV
*DstSCEV
= SE
->getMinusSCEV(DstAccessFn
, DstBase
);
3435 const SCEVAddRecExpr
*SrcAR
= dyn_cast
<SCEVAddRecExpr
>(SrcSCEV
);
3436 const SCEVAddRecExpr
*DstAR
= dyn_cast
<SCEVAddRecExpr
>(DstSCEV
);
3437 if (!SrcAR
|| !DstAR
|| !SrcAR
->isAffine() || !DstAR
->isAffine())
3440 // First step: collect parametric terms in both array references.
3441 SmallVector
<const SCEV
*, 4> Terms
;
3442 SE
->collectParametricTerms(SrcAR
, Terms
);
3443 SE
->collectParametricTerms(DstAR
, Terms
);
3445 // Second step: find subscript sizes.
3446 SmallVector
<const SCEV
*, 4> Sizes
;
3447 SE
->findArrayDimensions(Terms
, Sizes
, ElementSize
);
3449 // Third step: compute the access functions for each subscript.
3450 SE
->computeAccessFunctions(SrcAR
, SrcSubscripts
, Sizes
);
3451 SE
->computeAccessFunctions(DstAR
, DstSubscripts
, Sizes
);
3453 // Fail when there is only a subscript: that's a linearized access function.
3454 if (SrcSubscripts
.size() < 2 || DstSubscripts
.size() < 2 ||
3455 SrcSubscripts
.size() != DstSubscripts
.size())
3458 size_t Size
= SrcSubscripts
.size();
3460 // Statically check that the array bounds are in-range. The first subscript we
3461 // don't have a size for and it cannot overflow into another subscript, so is
3462 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3464 // FIXME: It may be better to record these sizes and add them as constraints
3465 // to the dependency checks.
3466 if (!DisableDelinearizationChecks
)
3467 for (size_t I
= 1; I
< Size
; ++I
) {
3468 if (!isKnownNonNegative(SrcSubscripts
[I
], SrcPtr
))
3471 if (!isKnownLessThan(SrcSubscripts
[I
], Sizes
[I
- 1]))
3474 if (!isKnownNonNegative(DstSubscripts
[I
], DstPtr
))
3477 if (!isKnownLessThan(DstSubscripts
[I
], Sizes
[I
- 1]))
3484 //===----------------------------------------------------------------------===//
3487 // For debugging purposes, dump a small bit vector to dbgs().
3488 static void dumpSmallBitVector(SmallBitVector
&BV
) {
3490 for (unsigned VI
: BV
.set_bits()) {
3492 if (BV
.find_next(VI
) >= 0)
3499 bool DependenceInfo::invalidate(Function
&F
, const PreservedAnalyses
&PA
,
3500 FunctionAnalysisManager::Invalidator
&Inv
) {
3501 // Check if the analysis itself has been invalidated.
3502 auto PAC
= PA
.getChecker
<DependenceAnalysis
>();
3503 if (!PAC
.preserved() && !PAC
.preservedSet
<AllAnalysesOn
<Function
>>())
3506 // Check transitive dependencies.
3507 return Inv
.invalidate
<AAManager
>(F
, PA
) ||
3508 Inv
.invalidate
<ScalarEvolutionAnalysis
>(F
, PA
) ||
3509 Inv
.invalidate
<LoopAnalysis
>(F
, PA
);
3513 // Returns NULL if there is no dependence.
3514 // Otherwise, return a Dependence with as many details as possible.
3515 // Corresponds to Section 3.1 in the paper
3517 // Practical Dependence Testing
3518 // Goff, Kennedy, Tseng
3521 // Care is required to keep the routine below, getSplitIteration(),
3522 // up to date with respect to this routine.
3523 std::unique_ptr
<Dependence
>
3524 DependenceInfo::depends(Instruction
*Src
, Instruction
*Dst
,
3525 bool PossiblyLoopIndependent
) {
3527 PossiblyLoopIndependent
= false;
3529 if (!(Src
->mayReadOrWriteMemory() && Dst
->mayReadOrWriteMemory()))
3530 // if both instructions don't reference memory, there's no dependence
3533 if (!isLoadOrStore(Src
) || !isLoadOrStore(Dst
)) {
3534 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3535 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3536 return std::make_unique
<Dependence
>(Src
, Dst
);
3539 assert(isLoadOrStore(Src
) && "instruction is not load or store");
3540 assert(isLoadOrStore(Dst
) && "instruction is not load or store");
3541 Value
*SrcPtr
= getLoadStorePointerOperand(Src
);
3542 Value
*DstPtr
= getLoadStorePointerOperand(Dst
);
3544 switch (underlyingObjectsAlias(AA
, F
->getParent()->getDataLayout(),
3545 MemoryLocation::get(Dst
),
3546 MemoryLocation::get(Src
))) {
3547 case AliasResult::MayAlias
:
3548 case AliasResult::PartialAlias
:
3549 // cannot analyse objects if we don't understand their aliasing.
3550 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3551 return std::make_unique
<Dependence
>(Src
, Dst
);
3552 case AliasResult::NoAlias
:
3553 // If the objects noalias, they are distinct, accesses are independent.
3554 LLVM_DEBUG(dbgs() << "no alias\n");
3556 case AliasResult::MustAlias
:
3557 break; // The underlying objects alias; test accesses for dependence.
3560 // establish loop nesting levels
3561 establishNestingLevels(Src
, Dst
);
3562 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels
<< "\n");
3563 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels
<< "\n");
3565 FullDependence
Result(Src
, Dst
, PossiblyLoopIndependent
, CommonLevels
);
3569 SmallVector
<Subscript
, 2> Pair(Pairs
);
3570 const SCEV
*SrcSCEV
= SE
->getSCEV(SrcPtr
);
3571 const SCEV
*DstSCEV
= SE
->getSCEV(DstPtr
);
3572 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV
<< "\n");
3573 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV
<< "\n");
3574 if (SE
->getPointerBase(SrcSCEV
) != SE
->getPointerBase(DstSCEV
)) {
3575 // If two pointers have different bases, trying to analyze indexes won't
3576 // work; we can't compare them to each other. This can happen, for example,
3577 // if one is produced by an LCSSA PHI node.
3579 // We check this upfront so we don't crash in cases where getMinusSCEV()
3580 // returns a SCEVCouldNotCompute.
3581 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3582 return std::make_unique
<Dependence
>(Src
, Dst
);
3584 Pair
[0].Src
= SrcSCEV
;
3585 Pair
[0].Dst
= DstSCEV
;
3588 if (tryDelinearize(Src
, Dst
, Pair
)) {
3589 LLVM_DEBUG(dbgs() << " delinearized\n");
3590 Pairs
= Pair
.size();
3594 for (unsigned P
= 0; P
< Pairs
; ++P
) {
3595 Pair
[P
].Loops
.resize(MaxLevels
+ 1);
3596 Pair
[P
].GroupLoops
.resize(MaxLevels
+ 1);
3597 Pair
[P
].Group
.resize(Pairs
);
3598 removeMatchingExtensions(&Pair
[P
]);
3599 Pair
[P
].Classification
=
3600 classifyPair(Pair
[P
].Src
, LI
->getLoopFor(Src
->getParent()),
3601 Pair
[P
].Dst
, LI
->getLoopFor(Dst
->getParent()),
3603 Pair
[P
].GroupLoops
= Pair
[P
].Loops
;
3604 Pair
[P
].Group
.set(P
);
3605 LLVM_DEBUG(dbgs() << " subscript " << P
<< "\n");
3606 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair
[P
].Src
<< "\n");
3607 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair
[P
].Dst
<< "\n");
3608 LLVM_DEBUG(dbgs() << "\tclass = " << Pair
[P
].Classification
<< "\n");
3609 LLVM_DEBUG(dbgs() << "\tloops = ");
3610 LLVM_DEBUG(dumpSmallBitVector(Pair
[P
].Loops
));
3613 SmallBitVector
Separable(Pairs
);
3614 SmallBitVector
Coupled(Pairs
);
3616 // Partition subscripts into separable and minimally-coupled groups
3617 // Algorithm in paper is algorithmically better;
3618 // this may be faster in practice. Check someday.
3620 // Here's an example of how it works. Consider this code:
3627 // A[i][j][k][m] = ...;
3628 // ... = A[0][j][l][i + j];
3635 // There are 4 subscripts here:
3639 // 3 [m] and [i + j]
3641 // We've already classified each subscript pair as ZIV, SIV, etc.,
3642 // and collected all the loops mentioned by pair P in Pair[P].Loops.
3643 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3644 // and set Pair[P].Group = {P}.
3646 // Src Dst Classification Loops GroupLoops Group
3647 // 0 [i] [0] SIV {1} {1} {0}
3648 // 1 [j] [j] SIV {2} {2} {1}
3649 // 2 [k] [l] RDIV {3,4} {3,4} {2}
3650 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
3652 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3653 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3655 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3656 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3657 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3658 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3659 // to either Separable or Coupled).
3661 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3662 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
3663 // so Pair[3].Group = {0, 1, 3} and Done = false.
3665 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3666 // Since Done remains true, we add 2 to the set of Separable pairs.
3668 // Finally, we consider 3. There's nothing to compare it with,
3669 // so Done remains true and we add it to the Coupled set.
3670 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3672 // In the end, we've got 1 separable subscript and 1 coupled group.
3673 for (unsigned SI
= 0; SI
< Pairs
; ++SI
) {
3674 if (Pair
[SI
].Classification
== Subscript::NonLinear
) {
3675 // ignore these, but collect loops for later
3676 ++NonlinearSubscriptPairs
;
3677 collectCommonLoops(Pair
[SI
].Src
,
3678 LI
->getLoopFor(Src
->getParent()),
3680 collectCommonLoops(Pair
[SI
].Dst
,
3681 LI
->getLoopFor(Dst
->getParent()),
3683 Result
.Consistent
= false;
3684 } else if (Pair
[SI
].Classification
== Subscript::ZIV
) {
3689 // SIV, RDIV, or MIV, so check for coupled group
3691 for (unsigned SJ
= SI
+ 1; SJ
< Pairs
; ++SJ
) {
3692 SmallBitVector Intersection
= Pair
[SI
].GroupLoops
;
3693 Intersection
&= Pair
[SJ
].GroupLoops
;
3694 if (Intersection
.any()) {
3695 // accumulate set of all the loops in group
3696 Pair
[SJ
].GroupLoops
|= Pair
[SI
].GroupLoops
;
3697 // accumulate set of all subscripts in group
3698 Pair
[SJ
].Group
|= Pair
[SI
].Group
;
3703 if (Pair
[SI
].Group
.count() == 1) {
3705 ++SeparableSubscriptPairs
;
3709 ++CoupledSubscriptPairs
;
3715 LLVM_DEBUG(dbgs() << " Separable = ");
3716 LLVM_DEBUG(dumpSmallBitVector(Separable
));
3717 LLVM_DEBUG(dbgs() << " Coupled = ");
3718 LLVM_DEBUG(dumpSmallBitVector(Coupled
));
3720 Constraint NewConstraint
;
3721 NewConstraint
.setAny(SE
);
3723 // test separable subscripts
3724 for (unsigned SI
: Separable
.set_bits()) {
3725 LLVM_DEBUG(dbgs() << "testing subscript " << SI
);
3726 switch (Pair
[SI
].Classification
) {
3727 case Subscript::ZIV
:
3728 LLVM_DEBUG(dbgs() << ", ZIV\n");
3729 if (testZIV(Pair
[SI
].Src
, Pair
[SI
].Dst
, Result
))
3732 case Subscript::SIV
: {
3733 LLVM_DEBUG(dbgs() << ", SIV\n");
3735 const SCEV
*SplitIter
= nullptr;
3736 if (testSIV(Pair
[SI
].Src
, Pair
[SI
].Dst
, Level
, Result
, NewConstraint
,
3741 case Subscript::RDIV
:
3742 LLVM_DEBUG(dbgs() << ", RDIV\n");
3743 if (testRDIV(Pair
[SI
].Src
, Pair
[SI
].Dst
, Result
))
3746 case Subscript::MIV
:
3747 LLVM_DEBUG(dbgs() << ", MIV\n");
3748 if (testMIV(Pair
[SI
].Src
, Pair
[SI
].Dst
, Pair
[SI
].Loops
, Result
))
3752 llvm_unreachable("subscript has unexpected classification");
3756 if (Coupled
.count()) {
3757 // test coupled subscript groups
3758 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
3759 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels
+ 1 << "\n");
3760 SmallVector
<Constraint
, 4> Constraints(MaxLevels
+ 1);
3761 for (unsigned II
= 0; II
<= MaxLevels
; ++II
)
3762 Constraints
[II
].setAny(SE
);
3763 for (unsigned SI
: Coupled
.set_bits()) {
3764 LLVM_DEBUG(dbgs() << "testing subscript group " << SI
<< " { ");
3765 SmallBitVector
Group(Pair
[SI
].Group
);
3766 SmallBitVector
Sivs(Pairs
);
3767 SmallBitVector
Mivs(Pairs
);
3768 SmallBitVector
ConstrainedLevels(MaxLevels
+ 1);
3769 SmallVector
<Subscript
*, 4> PairsInGroup
;
3770 for (unsigned SJ
: Group
.set_bits()) {
3771 LLVM_DEBUG(dbgs() << SJ
<< " ");
3772 if (Pair
[SJ
].Classification
== Subscript::SIV
)
3776 PairsInGroup
.push_back(&Pair
[SJ
]);
3778 unifySubscriptType(PairsInGroup
);
3779 LLVM_DEBUG(dbgs() << "}\n");
3780 while (Sivs
.any()) {
3781 bool Changed
= false;
3782 for (unsigned SJ
: Sivs
.set_bits()) {
3783 LLVM_DEBUG(dbgs() << "testing subscript " << SJ
<< ", SIV\n");
3784 // SJ is an SIV subscript that's part of the current coupled group
3786 const SCEV
*SplitIter
= nullptr;
3787 LLVM_DEBUG(dbgs() << "SIV\n");
3788 if (testSIV(Pair
[SJ
].Src
, Pair
[SJ
].Dst
, Level
, Result
, NewConstraint
,
3791 ConstrainedLevels
.set(Level
);
3792 if (intersectConstraints(&Constraints
[Level
], &NewConstraint
)) {
3793 if (Constraints
[Level
].isEmpty()) {
3794 ++DeltaIndependence
;
3802 // propagate, possibly creating new SIVs and ZIVs
3803 LLVM_DEBUG(dbgs() << " propagating\n");
3804 LLVM_DEBUG(dbgs() << "\tMivs = ");
3805 LLVM_DEBUG(dumpSmallBitVector(Mivs
));
3806 for (unsigned SJ
: Mivs
.set_bits()) {
3807 // SJ is an MIV subscript that's part of the current coupled group
3808 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ
<< "\n");
3809 if (propagate(Pair
[SJ
].Src
, Pair
[SJ
].Dst
, Pair
[SJ
].Loops
,
3810 Constraints
, Result
.Consistent
)) {
3811 LLVM_DEBUG(dbgs() << "\t Changed\n");
3812 ++DeltaPropagations
;
3813 Pair
[SJ
].Classification
=
3814 classifyPair(Pair
[SJ
].Src
, LI
->getLoopFor(Src
->getParent()),
3815 Pair
[SJ
].Dst
, LI
->getLoopFor(Dst
->getParent()),
3817 switch (Pair
[SJ
].Classification
) {
3818 case Subscript::ZIV
:
3819 LLVM_DEBUG(dbgs() << "ZIV\n");
3820 if (testZIV(Pair
[SJ
].Src
, Pair
[SJ
].Dst
, Result
))
3824 case Subscript::SIV
:
3828 case Subscript::RDIV
:
3829 case Subscript::MIV
:
3832 llvm_unreachable("bad subscript classification");
3839 // test & propagate remaining RDIVs
3840 for (unsigned SJ
: Mivs
.set_bits()) {
3841 if (Pair
[SJ
].Classification
== Subscript::RDIV
) {
3842 LLVM_DEBUG(dbgs() << "RDIV test\n");
3843 if (testRDIV(Pair
[SJ
].Src
, Pair
[SJ
].Dst
, Result
))
3845 // I don't yet understand how to propagate RDIV results
3850 // test remaining MIVs
3851 // This code is temporary.
3852 // Better to somehow test all remaining subscripts simultaneously.
3853 for (unsigned SJ
: Mivs
.set_bits()) {
3854 if (Pair
[SJ
].Classification
== Subscript::MIV
) {
3855 LLVM_DEBUG(dbgs() << "MIV test\n");
3856 if (testMIV(Pair
[SJ
].Src
, Pair
[SJ
].Dst
, Pair
[SJ
].Loops
, Result
))
3860 llvm_unreachable("expected only MIV subscripts at this point");
3863 // update Result.DV from constraint vector
3864 LLVM_DEBUG(dbgs() << " updating\n");
3865 for (unsigned SJ
: ConstrainedLevels
.set_bits()) {
3866 if (SJ
> CommonLevels
)
3868 updateDirection(Result
.DV
[SJ
- 1], Constraints
[SJ
]);
3869 if (Result
.DV
[SJ
- 1].Direction
== Dependence::DVEntry::NONE
)
3875 // Make sure the Scalar flags are set correctly.
3876 SmallBitVector
CompleteLoops(MaxLevels
+ 1);
3877 for (unsigned SI
= 0; SI
< Pairs
; ++SI
)
3878 CompleteLoops
|= Pair
[SI
].Loops
;
3879 for (unsigned II
= 1; II
<= CommonLevels
; ++II
)
3880 if (CompleteLoops
[II
])
3881 Result
.DV
[II
- 1].Scalar
= false;
3883 if (PossiblyLoopIndependent
) {
3884 // Make sure the LoopIndependent flag is set correctly.
3885 // All directions must include equal, otherwise no
3886 // loop-independent dependence is possible.
3887 for (unsigned II
= 1; II
<= CommonLevels
; ++II
) {
3888 if (!(Result
.getDirection(II
) & Dependence::DVEntry::EQ
)) {
3889 Result
.LoopIndependent
= false;
3895 // On the other hand, if all directions are equal and there's no
3896 // loop-independent dependence possible, then no dependence exists.
3897 bool AllEqual
= true;
3898 for (unsigned II
= 1; II
<= CommonLevels
; ++II
) {
3899 if (Result
.getDirection(II
) != Dependence::DVEntry::EQ
) {
3908 return std::make_unique
<FullDependence
>(std::move(Result
));
3911 //===----------------------------------------------------------------------===//
3912 // getSplitIteration -
3913 // Rather than spend rarely-used space recording the splitting iteration
3914 // during the Weak-Crossing SIV test, we re-compute it on demand.
3915 // The re-computation is basically a repeat of the entire dependence test,
3916 // though simplified since we know that the dependence exists.
3917 // It's tedious, since we must go through all propagations, etc.
3919 // Care is required to keep this code up to date with respect to the routine
3920 // above, depends().
3922 // Generally, the dependence analyzer will be used to build
3923 // a dependence graph for a function (basically a map from instructions
3924 // to dependences). Looking for cycles in the graph shows us loops
3925 // that cannot be trivially vectorized/parallelized.
3927 // We can try to improve the situation by examining all the dependences
3928 // that make up the cycle, looking for ones we can break.
3929 // Sometimes, peeling the first or last iteration of a loop will break
3930 // dependences, and we've got flags for those possibilities.
3931 // Sometimes, splitting a loop at some other iteration will do the trick,
3932 // and we've got a flag for that case. Rather than waste the space to
3933 // record the exact iteration (since we rarely know), we provide
3934 // a method that calculates the iteration. It's a drag that it must work
3935 // from scratch, but wonderful in that it's possible.
3937 // Here's an example:
3939 // for (i = 0; i < 10; i++)
3943 // There's a loop-carried flow dependence from the store to the load,
3944 // found by the weak-crossing SIV test. The dependence will have a flag,
3945 // indicating that the dependence can be broken by splitting the loop.
3946 // Calling getSplitIteration will return 5.
3947 // Splitting the loop breaks the dependence, like so:
3949 // for (i = 0; i <= 5; i++)
3952 // for (i = 6; i < 10; i++)
3956 // breaks the dependence and allows us to vectorize/parallelize
3958 const SCEV
*DependenceInfo::getSplitIteration(const Dependence
&Dep
,
3959 unsigned SplitLevel
) {
3960 assert(Dep
.isSplitable(SplitLevel
) &&
3961 "Dep should be splitable at SplitLevel");
3962 Instruction
*Src
= Dep
.getSrc();
3963 Instruction
*Dst
= Dep
.getDst();
3964 assert(Src
->mayReadFromMemory() || Src
->mayWriteToMemory());
3965 assert(Dst
->mayReadFromMemory() || Dst
->mayWriteToMemory());
3966 assert(isLoadOrStore(Src
));
3967 assert(isLoadOrStore(Dst
));
3968 Value
*SrcPtr
= getLoadStorePointerOperand(Src
);
3969 Value
*DstPtr
= getLoadStorePointerOperand(Dst
);
3970 assert(underlyingObjectsAlias(
3971 AA
, F
->getParent()->getDataLayout(), MemoryLocation::get(Dst
),
3972 MemoryLocation::get(Src
)) == AliasResult::MustAlias
);
3974 // establish loop nesting levels
3975 establishNestingLevels(Src
, Dst
);
3977 FullDependence
Result(Src
, Dst
, false, CommonLevels
);
3980 SmallVector
<Subscript
, 2> Pair(Pairs
);
3981 const SCEV
*SrcSCEV
= SE
->getSCEV(SrcPtr
);
3982 const SCEV
*DstSCEV
= SE
->getSCEV(DstPtr
);
3983 Pair
[0].Src
= SrcSCEV
;
3984 Pair
[0].Dst
= DstSCEV
;
3987 if (tryDelinearize(Src
, Dst
, Pair
)) {
3988 LLVM_DEBUG(dbgs() << " delinearized\n");
3989 Pairs
= Pair
.size();
3993 for (unsigned P
= 0; P
< Pairs
; ++P
) {
3994 Pair
[P
].Loops
.resize(MaxLevels
+ 1);
3995 Pair
[P
].GroupLoops
.resize(MaxLevels
+ 1);
3996 Pair
[P
].Group
.resize(Pairs
);
3997 removeMatchingExtensions(&Pair
[P
]);
3998 Pair
[P
].Classification
=
3999 classifyPair(Pair
[P
].Src
, LI
->getLoopFor(Src
->getParent()),
4000 Pair
[P
].Dst
, LI
->getLoopFor(Dst
->getParent()),
4002 Pair
[P
].GroupLoops
= Pair
[P
].Loops
;
4003 Pair
[P
].Group
.set(P
);
4006 SmallBitVector
Separable(Pairs
);
4007 SmallBitVector
Coupled(Pairs
);
4009 // partition subscripts into separable and minimally-coupled groups
4010 for (unsigned SI
= 0; SI
< Pairs
; ++SI
) {
4011 if (Pair
[SI
].Classification
== Subscript::NonLinear
) {
4012 // ignore these, but collect loops for later
4013 collectCommonLoops(Pair
[SI
].Src
,
4014 LI
->getLoopFor(Src
->getParent()),
4016 collectCommonLoops(Pair
[SI
].Dst
,
4017 LI
->getLoopFor(Dst
->getParent()),
4019 Result
.Consistent
= false;
4021 else if (Pair
[SI
].Classification
== Subscript::ZIV
)
4024 // SIV, RDIV, or MIV, so check for coupled group
4026 for (unsigned SJ
= SI
+ 1; SJ
< Pairs
; ++SJ
) {
4027 SmallBitVector Intersection
= Pair
[SI
].GroupLoops
;
4028 Intersection
&= Pair
[SJ
].GroupLoops
;
4029 if (Intersection
.any()) {
4030 // accumulate set of all the loops in group
4031 Pair
[SJ
].GroupLoops
|= Pair
[SI
].GroupLoops
;
4032 // accumulate set of all subscripts in group
4033 Pair
[SJ
].Group
|= Pair
[SI
].Group
;
4038 if (Pair
[SI
].Group
.count() == 1)
4046 Constraint NewConstraint
;
4047 NewConstraint
.setAny(SE
);
4049 // test separable subscripts
4050 for (unsigned SI
: Separable
.set_bits()) {
4051 switch (Pair
[SI
].Classification
) {
4052 case Subscript::SIV
: {
4054 const SCEV
*SplitIter
= nullptr;
4055 (void) testSIV(Pair
[SI
].Src
, Pair
[SI
].Dst
, Level
,
4056 Result
, NewConstraint
, SplitIter
);
4057 if (Level
== SplitLevel
) {
4058 assert(SplitIter
!= nullptr);
4063 case Subscript::ZIV
:
4064 case Subscript::RDIV
:
4065 case Subscript::MIV
:
4068 llvm_unreachable("subscript has unexpected classification");
4072 if (Coupled
.count()) {
4073 // test coupled subscript groups
4074 SmallVector
<Constraint
, 4> Constraints(MaxLevels
+ 1);
4075 for (unsigned II
= 0; II
<= MaxLevels
; ++II
)
4076 Constraints
[II
].setAny(SE
);
4077 for (unsigned SI
: Coupled
.set_bits()) {
4078 SmallBitVector
Group(Pair
[SI
].Group
);
4079 SmallBitVector
Sivs(Pairs
);
4080 SmallBitVector
Mivs(Pairs
);
4081 SmallBitVector
ConstrainedLevels(MaxLevels
+ 1);
4082 for (unsigned SJ
: Group
.set_bits()) {
4083 if (Pair
[SJ
].Classification
== Subscript::SIV
)
4088 while (Sivs
.any()) {
4089 bool Changed
= false;
4090 for (unsigned SJ
: Sivs
.set_bits()) {
4091 // SJ is an SIV subscript that's part of the current coupled group
4093 const SCEV
*SplitIter
= nullptr;
4094 (void) testSIV(Pair
[SJ
].Src
, Pair
[SJ
].Dst
, Level
,
4095 Result
, NewConstraint
, SplitIter
);
4096 if (Level
== SplitLevel
&& SplitIter
)
4098 ConstrainedLevels
.set(Level
);
4099 if (intersectConstraints(&Constraints
[Level
], &NewConstraint
))
4104 // propagate, possibly creating new SIVs and ZIVs
4105 for (unsigned SJ
: Mivs
.set_bits()) {
4106 // SJ is an MIV subscript that's part of the current coupled group
4107 if (propagate(Pair
[SJ
].Src
, Pair
[SJ
].Dst
,
4108 Pair
[SJ
].Loops
, Constraints
, Result
.Consistent
)) {
4109 Pair
[SJ
].Classification
=
4110 classifyPair(Pair
[SJ
].Src
, LI
->getLoopFor(Src
->getParent()),
4111 Pair
[SJ
].Dst
, LI
->getLoopFor(Dst
->getParent()),
4113 switch (Pair
[SJ
].Classification
) {
4114 case Subscript::ZIV
:
4117 case Subscript::SIV
:
4121 case Subscript::RDIV
:
4122 case Subscript::MIV
:
4125 llvm_unreachable("bad subscript classification");
4133 llvm_unreachable("somehow reached end of routine");