1 //===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the implementation of the scalar evolution analysis
11 // engine, which is used primarily to analyze expressions involving induction
12 // variables in loops.
14 // There are several aspects to this library. First is the representation of
15 // scalar expressions, which are represented as subclasses of the SCEV class.
16 // These classes are used to represent certain types of subexpressions that we
17 // can handle. We only create one SCEV of a particular shape, so
18 // pointer-comparisons for equality are legal.
20 // One important aspect of the SCEV objects is that they are never cyclic, even
21 // if there is a cycle in the dataflow for an expression (ie, a PHI node). If
22 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
23 // recurrence) then we represent it directly as a recurrence node, otherwise we
24 // represent it as a SCEVUnknown node.
26 // In addition to being able to represent expressions of various types, we also
27 // have folders that are used to build the *canonical* representation for a
28 // particular expression. These folders are capable of using a variety of
29 // rewrite rules to simplify the expressions.
31 // Once the folders are defined, we can implement the more interesting
32 // higher-level code, such as the code that recognizes PHI nodes of various
33 // types, computes the execution count of a loop, etc.
35 // TODO: We should use these routines and value representations to implement
36 // dependence analysis!
38 //===----------------------------------------------------------------------===//
40 // There are several good references for the techniques used in this analysis.
42 // Chains of recurrences -- a method to expedite the evaluation
43 // of closed-form functions
44 // Olaf Bachmann, Paul S. Wang, Eugene V. Zima
46 // On computational properties of chains of recurrences
49 // Symbolic Evaluation of Chains of Recurrences for Loop Optimization
50 // Robert A. van Engelen
52 // Efficient Symbolic Analysis for Optimizing Compilers
53 // Robert A. van Engelen
55 // Using the chains of recurrences algebra for data dependence testing and
56 // induction variable substitution
57 // MS Thesis, Johnie Birch
59 //===----------------------------------------------------------------------===//
61 #define DEBUG_TYPE "scalar-evolution"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/Constants.h"
64 #include "llvm/DerivedTypes.h"
65 #include "llvm/GlobalVariable.h"
66 #include "llvm/GlobalAlias.h"
67 #include "llvm/Instructions.h"
68 #include "llvm/LLVMContext.h"
69 #include "llvm/Operator.h"
70 #include "llvm/Analysis/ConstantFolding.h"
71 #include "llvm/Analysis/Dominators.h"
72 #include "llvm/Analysis/LoopInfo.h"
73 #include "llvm/Analysis/ValueTracking.h"
74 #include "llvm/Assembly/Writer.h"
75 #include "llvm/Target/TargetData.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/ConstantRange.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/ErrorHandling.h"
80 #include "llvm/Support/GetElementPtrTypeIterator.h"
81 #include "llvm/Support/InstIterator.h"
82 #include "llvm/Support/MathExtras.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/ADT/Statistic.h"
85 #include "llvm/ADT/STLExtras.h"
86 #include "llvm/ADT/SmallPtrSet.h"
90 STATISTIC(NumArrayLenItCounts
,
91 "Number of trip counts computed with array length");
92 STATISTIC(NumTripCountsComputed
,
93 "Number of loops with predictable loop counts");
94 STATISTIC(NumTripCountsNotComputed
,
95 "Number of loops without predictable loop counts");
96 STATISTIC(NumBruteForceTripCountsComputed
,
97 "Number of loops with trip counts computed by force");
99 static cl::opt
<unsigned>
100 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden
,
101 cl::desc("Maximum number of iterations SCEV will "
102 "symbolically execute a constant "
106 INITIALIZE_PASS_BEGIN(ScalarEvolution
, "scalar-evolution",
107 "Scalar Evolution Analysis", false, true)
108 INITIALIZE_PASS_DEPENDENCY(LoopInfo
)
109 INITIALIZE_PASS_DEPENDENCY(DominatorTree
)
110 INITIALIZE_PASS_END(ScalarEvolution
, "scalar-evolution",
111 "Scalar Evolution Analysis", false, true)
112 char ScalarEvolution::ID
= 0;
114 //===----------------------------------------------------------------------===//
115 // SCEV class definitions
116 //===----------------------------------------------------------------------===//
118 //===----------------------------------------------------------------------===//
119 // Implementation of the SCEV class.
124 void SCEV::dump() const {
129 bool SCEV::isZero() const {
130 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(this))
131 return SC
->getValue()->isZero();
135 bool SCEV::isOne() const {
136 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(this))
137 return SC
->getValue()->isOne();
141 bool SCEV::isAllOnesValue() const {
142 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(this))
143 return SC
->getValue()->isAllOnesValue();
147 SCEVCouldNotCompute::SCEVCouldNotCompute() :
148 SCEV(FoldingSetNodeIDRef(), scCouldNotCompute
) {}
150 bool SCEVCouldNotCompute::isLoopInvariant(const Loop
*L
) const {
151 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
155 const Type
*SCEVCouldNotCompute::getType() const {
156 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
160 bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop
*L
) const {
161 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
165 bool SCEVCouldNotCompute::hasOperand(const SCEV
*) const {
166 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
170 void SCEVCouldNotCompute::print(raw_ostream
&OS
) const {
171 OS
<< "***COULDNOTCOMPUTE***";
174 bool SCEVCouldNotCompute::classof(const SCEV
*S
) {
175 return S
->getSCEVType() == scCouldNotCompute
;
178 const SCEV
*ScalarEvolution::getConstant(ConstantInt
*V
) {
180 ID
.AddInteger(scConstant
);
183 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
184 SCEV
*S
= new (SCEVAllocator
) SCEVConstant(ID
.Intern(SCEVAllocator
), V
);
185 UniqueSCEVs
.InsertNode(S
, IP
);
189 const SCEV
*ScalarEvolution::getConstant(const APInt
& Val
) {
190 return getConstant(ConstantInt::get(getContext(), Val
));
194 ScalarEvolution::getConstant(const Type
*Ty
, uint64_t V
, bool isSigned
) {
195 const IntegerType
*ITy
= cast
<IntegerType
>(getEffectiveSCEVType(Ty
));
196 return getConstant(ConstantInt::get(ITy
, V
, isSigned
));
199 const Type
*SCEVConstant::getType() const { return V
->getType(); }
201 void SCEVConstant::print(raw_ostream
&OS
) const {
202 WriteAsOperand(OS
, V
, false);
205 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID
,
206 unsigned SCEVTy
, const SCEV
*op
, const Type
*ty
)
207 : SCEV(ID
, SCEVTy
), Op(op
), Ty(ty
) {}
209 bool SCEVCastExpr::dominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
210 return Op
->dominates(BB
, DT
);
213 bool SCEVCastExpr::properlyDominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
214 return Op
->properlyDominates(BB
, DT
);
217 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID
,
218 const SCEV
*op
, const Type
*ty
)
219 : SCEVCastExpr(ID
, scTruncate
, op
, ty
) {
220 assert((Op
->getType()->isIntegerTy() || Op
->getType()->isPointerTy()) &&
221 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
222 "Cannot truncate non-integer value!");
225 void SCEVTruncateExpr::print(raw_ostream
&OS
) const {
226 OS
<< "(trunc " << *Op
->getType() << " " << *Op
<< " to " << *Ty
<< ")";
229 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID
,
230 const SCEV
*op
, const Type
*ty
)
231 : SCEVCastExpr(ID
, scZeroExtend
, op
, ty
) {
232 assert((Op
->getType()->isIntegerTy() || Op
->getType()->isPointerTy()) &&
233 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
234 "Cannot zero extend non-integer value!");
237 void SCEVZeroExtendExpr::print(raw_ostream
&OS
) const {
238 OS
<< "(zext " << *Op
->getType() << " " << *Op
<< " to " << *Ty
<< ")";
241 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID
,
242 const SCEV
*op
, const Type
*ty
)
243 : SCEVCastExpr(ID
, scSignExtend
, op
, ty
) {
244 assert((Op
->getType()->isIntegerTy() || Op
->getType()->isPointerTy()) &&
245 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
246 "Cannot sign extend non-integer value!");
249 void SCEVSignExtendExpr::print(raw_ostream
&OS
) const {
250 OS
<< "(sext " << *Op
->getType() << " " << *Op
<< " to " << *Ty
<< ")";
253 void SCEVCommutativeExpr::print(raw_ostream
&OS
) const {
254 const char *OpStr
= getOperationStr();
256 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
) {
258 if (llvm::next(I
) != E
)
264 bool SCEVNAryExpr::dominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
265 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
)
266 if (!(*I
)->dominates(BB
, DT
))
271 bool SCEVNAryExpr::properlyDominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
272 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
)
273 if (!(*I
)->properlyDominates(BB
, DT
))
278 bool SCEVNAryExpr::isLoopInvariant(const Loop
*L
) const {
279 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
)
280 if (!(*I
)->isLoopInvariant(L
))
285 // hasComputableLoopEvolution - N-ary expressions have computable loop
286 // evolutions iff they have at least one operand that varies with the loop,
287 // but that all varying operands are computable.
288 bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop
*L
) const {
289 bool HasVarying
= false;
290 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
) {
292 if (!S
->isLoopInvariant(L
)) {
293 if (S
->hasComputableLoopEvolution(L
))
302 bool SCEVNAryExpr::hasOperand(const SCEV
*O
) const {
303 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
) {
305 if (O
== S
|| S
->hasOperand(O
))
311 bool SCEVUDivExpr::dominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
312 return LHS
->dominates(BB
, DT
) && RHS
->dominates(BB
, DT
);
315 bool SCEVUDivExpr::properlyDominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
316 return LHS
->properlyDominates(BB
, DT
) && RHS
->properlyDominates(BB
, DT
);
319 void SCEVUDivExpr::print(raw_ostream
&OS
) const {
320 OS
<< "(" << *LHS
<< " /u " << *RHS
<< ")";
323 const Type
*SCEVUDivExpr::getType() const {
324 // In most cases the types of LHS and RHS will be the same, but in some
325 // crazy cases one or the other may be a pointer. ScalarEvolution doesn't
326 // depend on the type for correctness, but handling types carefully can
327 // avoid extra casts in the SCEVExpander. The LHS is more likely to be
328 // a pointer type than the RHS, so use the RHS' type here.
329 return RHS
->getType();
332 bool SCEVAddRecExpr::isLoopInvariant(const Loop
*QueryLoop
) const {
333 // Add recurrences are never invariant in the function-body (null loop).
337 // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L.
338 if (QueryLoop
->contains(L
))
341 // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop.
342 if (L
->contains(QueryLoop
))
345 // This recurrence is variant w.r.t. QueryLoop if any of its operands
347 for (op_iterator I
= op_begin(), E
= op_end(); I
!= E
; ++I
)
348 if (!(*I
)->isLoopInvariant(QueryLoop
))
351 // Otherwise it's loop-invariant.
356 SCEVAddRecExpr::dominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
357 return DT
->dominates(L
->getHeader(), BB
) &&
358 SCEVNAryExpr::dominates(BB
, DT
);
362 SCEVAddRecExpr::properlyDominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
363 // This uses a "dominates" query instead of "properly dominates" query because
364 // the instruction which produces the addrec's value is a PHI, and a PHI
365 // effectively properly dominates its entire containing block.
366 return DT
->dominates(L
->getHeader(), BB
) &&
367 SCEVNAryExpr::properlyDominates(BB
, DT
);
370 void SCEVAddRecExpr::print(raw_ostream
&OS
) const {
371 OS
<< "{" << *Operands
[0];
372 for (unsigned i
= 1, e
= NumOperands
; i
!= e
; ++i
)
373 OS
<< ",+," << *Operands
[i
];
375 WriteAsOperand(OS
, L
->getHeader(), /*PrintType=*/false);
379 void SCEVUnknown::deleted() {
380 // Clear this SCEVUnknown from ValuesAtScopes.
381 SE
->ValuesAtScopes
.erase(this);
383 // Remove this SCEVUnknown from the uniquing map.
384 SE
->UniqueSCEVs
.RemoveNode(this);
386 // Release the value.
390 void SCEVUnknown::allUsesReplacedWith(Value
*New
) {
391 // Clear this SCEVUnknown from ValuesAtScopes.
392 SE
->ValuesAtScopes
.erase(this);
394 // Remove this SCEVUnknown from the uniquing map.
395 SE
->UniqueSCEVs
.RemoveNode(this);
397 // Update this SCEVUnknown to point to the new value. This is needed
398 // because there may still be outstanding SCEVs which still point to
403 bool SCEVUnknown::isLoopInvariant(const Loop
*L
) const {
404 // All non-instruction values are loop invariant. All instructions are loop
405 // invariant if they are not contained in the specified loop.
406 // Instructions are never considered invariant in the function body
407 // (null loop) because they are defined within the "loop".
408 if (Instruction
*I
= dyn_cast
<Instruction
>(getValue()))
409 return L
&& !L
->contains(I
);
413 bool SCEVUnknown::dominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
414 if (Instruction
*I
= dyn_cast
<Instruction
>(getValue()))
415 return DT
->dominates(I
->getParent(), BB
);
419 bool SCEVUnknown::properlyDominates(BasicBlock
*BB
, DominatorTree
*DT
) const {
420 if (Instruction
*I
= dyn_cast
<Instruction
>(getValue()))
421 return DT
->properlyDominates(I
->getParent(), BB
);
425 const Type
*SCEVUnknown::getType() const {
426 return getValue()->getType();
429 bool SCEVUnknown::isSizeOf(const Type
*&AllocTy
) const {
430 if (ConstantExpr
*VCE
= dyn_cast
<ConstantExpr
>(getValue()))
431 if (VCE
->getOpcode() == Instruction::PtrToInt
)
432 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(VCE
->getOperand(0)))
433 if (CE
->getOpcode() == Instruction::GetElementPtr
&&
434 CE
->getOperand(0)->isNullValue() &&
435 CE
->getNumOperands() == 2)
436 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(CE
->getOperand(1)))
438 AllocTy
= cast
<PointerType
>(CE
->getOperand(0)->getType())
446 bool SCEVUnknown::isAlignOf(const Type
*&AllocTy
) const {
447 if (ConstantExpr
*VCE
= dyn_cast
<ConstantExpr
>(getValue()))
448 if (VCE
->getOpcode() == Instruction::PtrToInt
)
449 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(VCE
->getOperand(0)))
450 if (CE
->getOpcode() == Instruction::GetElementPtr
&&
451 CE
->getOperand(0)->isNullValue()) {
453 cast
<PointerType
>(CE
->getOperand(0)->getType())->getElementType();
454 if (const StructType
*STy
= dyn_cast
<StructType
>(Ty
))
455 if (!STy
->isPacked() &&
456 CE
->getNumOperands() == 3 &&
457 CE
->getOperand(1)->isNullValue()) {
458 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(CE
->getOperand(2)))
460 STy
->getNumElements() == 2 &&
461 STy
->getElementType(0)->isIntegerTy(1)) {
462 AllocTy
= STy
->getElementType(1);
471 bool SCEVUnknown::isOffsetOf(const Type
*&CTy
, Constant
*&FieldNo
) const {
472 if (ConstantExpr
*VCE
= dyn_cast
<ConstantExpr
>(getValue()))
473 if (VCE
->getOpcode() == Instruction::PtrToInt
)
474 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(VCE
->getOperand(0)))
475 if (CE
->getOpcode() == Instruction::GetElementPtr
&&
476 CE
->getNumOperands() == 3 &&
477 CE
->getOperand(0)->isNullValue() &&
478 CE
->getOperand(1)->isNullValue()) {
480 cast
<PointerType
>(CE
->getOperand(0)->getType())->getElementType();
481 // Ignore vector types here so that ScalarEvolutionExpander doesn't
482 // emit getelementptrs that index into vectors.
483 if (Ty
->isStructTy() || Ty
->isArrayTy()) {
485 FieldNo
= CE
->getOperand(2);
493 void SCEVUnknown::print(raw_ostream
&OS
) const {
495 if (isSizeOf(AllocTy
)) {
496 OS
<< "sizeof(" << *AllocTy
<< ")";
499 if (isAlignOf(AllocTy
)) {
500 OS
<< "alignof(" << *AllocTy
<< ")";
506 if (isOffsetOf(CTy
, FieldNo
)) {
507 OS
<< "offsetof(" << *CTy
<< ", ";
508 WriteAsOperand(OS
, FieldNo
, false);
513 // Otherwise just print it normally.
514 WriteAsOperand(OS
, getValue(), false);
517 //===----------------------------------------------------------------------===//
519 //===----------------------------------------------------------------------===//
522 /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
523 /// than the complexity of the RHS. This comparator is used to canonicalize
525 class SCEVComplexityCompare
{
526 const LoopInfo
*const LI
;
528 explicit SCEVComplexityCompare(const LoopInfo
*li
) : LI(li
) {}
530 // Return true or false if LHS is less than, or at least RHS, respectively.
531 bool operator()(const SCEV
*LHS
, const SCEV
*RHS
) const {
532 return compare(LHS
, RHS
) < 0;
535 // Return negative, zero, or positive, if LHS is less than, equal to, or
536 // greater than RHS, respectively. A three-way result allows recursive
537 // comparisons to be more efficient.
538 int compare(const SCEV
*LHS
, const SCEV
*RHS
) const {
539 // Fast-path: SCEVs are uniqued so we can do a quick equality check.
543 // Primarily, sort the SCEVs by their getSCEVType().
544 unsigned LType
= LHS
->getSCEVType(), RType
= RHS
->getSCEVType();
546 return (int)LType
- (int)RType
;
548 // Aside from the getSCEVType() ordering, the particular ordering
549 // isn't very important except that it's beneficial to be consistent,
550 // so that (a + b) and (b + a) don't end up as different expressions.
553 const SCEVUnknown
*LU
= cast
<SCEVUnknown
>(LHS
);
554 const SCEVUnknown
*RU
= cast
<SCEVUnknown
>(RHS
);
556 // Sort SCEVUnknown values with some loose heuristics. TODO: This is
557 // not as complete as it could be.
558 const Value
*LV
= LU
->getValue(), *RV
= RU
->getValue();
560 // Order pointer values after integer values. This helps SCEVExpander
562 bool LIsPointer
= LV
->getType()->isPointerTy(),
563 RIsPointer
= RV
->getType()->isPointerTy();
564 if (LIsPointer
!= RIsPointer
)
565 return (int)LIsPointer
- (int)RIsPointer
;
567 // Compare getValueID values.
568 unsigned LID
= LV
->getValueID(),
569 RID
= RV
->getValueID();
571 return (int)LID
- (int)RID
;
573 // Sort arguments by their position.
574 if (const Argument
*LA
= dyn_cast
<Argument
>(LV
)) {
575 const Argument
*RA
= cast
<Argument
>(RV
);
576 unsigned LArgNo
= LA
->getArgNo(), RArgNo
= RA
->getArgNo();
577 return (int)LArgNo
- (int)RArgNo
;
580 // For instructions, compare their loop depth, and their operand
581 // count. This is pretty loose.
582 if (const Instruction
*LInst
= dyn_cast
<Instruction
>(LV
)) {
583 const Instruction
*RInst
= cast
<Instruction
>(RV
);
585 // Compare loop depths.
586 const BasicBlock
*LParent
= LInst
->getParent(),
587 *RParent
= RInst
->getParent();
588 if (LParent
!= RParent
) {
589 unsigned LDepth
= LI
->getLoopDepth(LParent
),
590 RDepth
= LI
->getLoopDepth(RParent
);
591 if (LDepth
!= RDepth
)
592 return (int)LDepth
- (int)RDepth
;
595 // Compare the number of operands.
596 unsigned LNumOps
= LInst
->getNumOperands(),
597 RNumOps
= RInst
->getNumOperands();
598 return (int)LNumOps
- (int)RNumOps
;
605 const SCEVConstant
*LC
= cast
<SCEVConstant
>(LHS
);
606 const SCEVConstant
*RC
= cast
<SCEVConstant
>(RHS
);
608 // Compare constant values.
609 const APInt
&LA
= LC
->getValue()->getValue();
610 const APInt
&RA
= RC
->getValue()->getValue();
611 unsigned LBitWidth
= LA
.getBitWidth(), RBitWidth
= RA
.getBitWidth();
612 if (LBitWidth
!= RBitWidth
)
613 return (int)LBitWidth
- (int)RBitWidth
;
614 return LA
.ult(RA
) ? -1 : 1;
618 const SCEVAddRecExpr
*LA
= cast
<SCEVAddRecExpr
>(LHS
);
619 const SCEVAddRecExpr
*RA
= cast
<SCEVAddRecExpr
>(RHS
);
621 // Compare addrec loop depths.
622 const Loop
*LLoop
= LA
->getLoop(), *RLoop
= RA
->getLoop();
623 if (LLoop
!= RLoop
) {
624 unsigned LDepth
= LLoop
->getLoopDepth(),
625 RDepth
= RLoop
->getLoopDepth();
626 if (LDepth
!= RDepth
)
627 return (int)LDepth
- (int)RDepth
;
630 // Addrec complexity grows with operand count.
631 unsigned LNumOps
= LA
->getNumOperands(), RNumOps
= RA
->getNumOperands();
632 if (LNumOps
!= RNumOps
)
633 return (int)LNumOps
- (int)RNumOps
;
635 // Lexicographically compare.
636 for (unsigned i
= 0; i
!= LNumOps
; ++i
) {
637 long X
= compare(LA
->getOperand(i
), RA
->getOperand(i
));
649 const SCEVNAryExpr
*LC
= cast
<SCEVNAryExpr
>(LHS
);
650 const SCEVNAryExpr
*RC
= cast
<SCEVNAryExpr
>(RHS
);
652 // Lexicographically compare n-ary expressions.
653 unsigned LNumOps
= LC
->getNumOperands(), RNumOps
= RC
->getNumOperands();
654 for (unsigned i
= 0; i
!= LNumOps
; ++i
) {
657 long X
= compare(LC
->getOperand(i
), RC
->getOperand(i
));
661 return (int)LNumOps
- (int)RNumOps
;
665 const SCEVUDivExpr
*LC
= cast
<SCEVUDivExpr
>(LHS
);
666 const SCEVUDivExpr
*RC
= cast
<SCEVUDivExpr
>(RHS
);
668 // Lexicographically compare udiv expressions.
669 long X
= compare(LC
->getLHS(), RC
->getLHS());
672 return compare(LC
->getRHS(), RC
->getRHS());
678 const SCEVCastExpr
*LC
= cast
<SCEVCastExpr
>(LHS
);
679 const SCEVCastExpr
*RC
= cast
<SCEVCastExpr
>(RHS
);
681 // Compare cast expressions by operand.
682 return compare(LC
->getOperand(), RC
->getOperand());
689 llvm_unreachable("Unknown SCEV kind!");
695 /// GroupByComplexity - Given a list of SCEV objects, order them by their
696 /// complexity, and group objects of the same complexity together by value.
697 /// When this routine is finished, we know that any duplicates in the vector are
698 /// consecutive and that complexity is monotonically increasing.
700 /// Note that we go take special precautions to ensure that we get deterministic
701 /// results from this routine. In other words, we don't want the results of
702 /// this to depend on where the addresses of various SCEV objects happened to
705 static void GroupByComplexity(SmallVectorImpl
<const SCEV
*> &Ops
,
707 if (Ops
.size() < 2) return; // Noop
708 if (Ops
.size() == 2) {
709 // This is the common case, which also happens to be trivially simple.
711 const SCEV
*&LHS
= Ops
[0], *&RHS
= Ops
[1];
712 if (SCEVComplexityCompare(LI
)(RHS
, LHS
))
717 // Do the rough sort by complexity.
718 std::stable_sort(Ops
.begin(), Ops
.end(), SCEVComplexityCompare(LI
));
720 // Now that we are sorted by complexity, group elements of the same
721 // complexity. Note that this is, at worst, N^2, but the vector is likely to
722 // be extremely short in practice. Note that we take this approach because we
723 // do not want to depend on the addresses of the objects we are grouping.
724 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
-2; ++i
) {
725 const SCEV
*S
= Ops
[i
];
726 unsigned Complexity
= S
->getSCEVType();
728 // If there are any objects of the same complexity and same value as this
730 for (unsigned j
= i
+1; j
!= e
&& Ops
[j
]->getSCEVType() == Complexity
; ++j
) {
731 if (Ops
[j
] == S
) { // Found a duplicate.
732 // Move it to immediately after i'th element.
733 std::swap(Ops
[i
+1], Ops
[j
]);
734 ++i
; // no need to rescan it.
735 if (i
== e
-2) return; // Done!
743 //===----------------------------------------------------------------------===//
744 // Simple SCEV method implementations
745 //===----------------------------------------------------------------------===//
747 /// BinomialCoefficient - Compute BC(It, K). The result has width W.
749 static const SCEV
*BinomialCoefficient(const SCEV
*It
, unsigned K
,
751 const Type
* ResultTy
) {
752 // Handle the simplest case efficiently.
754 return SE
.getTruncateOrZeroExtend(It
, ResultTy
);
756 // We are using the following formula for BC(It, K):
758 // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
760 // Suppose, W is the bitwidth of the return value. We must be prepared for
761 // overflow. Hence, we must assure that the result of our computation is
762 // equal to the accurate one modulo 2^W. Unfortunately, division isn't
763 // safe in modular arithmetic.
765 // However, this code doesn't use exactly that formula; the formula it uses
766 // is something like the following, where T is the number of factors of 2 in
767 // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
770 // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
772 // This formula is trivially equivalent to the previous formula. However,
773 // this formula can be implemented much more efficiently. The trick is that
774 // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
775 // arithmetic. To do exact division in modular arithmetic, all we have
776 // to do is multiply by the inverse. Therefore, this step can be done at
779 // The next issue is how to safely do the division by 2^T. The way this
780 // is done is by doing the multiplication step at a width of at least W + T
781 // bits. This way, the bottom W+T bits of the product are accurate. Then,
782 // when we perform the division by 2^T (which is equivalent to a right shift
783 // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
784 // truncated out after the division by 2^T.
786 // In comparison to just directly using the first formula, this technique
787 // is much more efficient; using the first formula requires W * K bits,
788 // but this formula less than W + K bits. Also, the first formula requires
789 // a division step, whereas this formula only requires multiplies and shifts.
791 // It doesn't matter whether the subtraction step is done in the calculation
792 // width or the input iteration count's width; if the subtraction overflows,
793 // the result must be zero anyway. We prefer here to do it in the width of
794 // the induction variable because it helps a lot for certain cases; CodeGen
795 // isn't smart enough to ignore the overflow, which leads to much less
796 // efficient code if the width of the subtraction is wider than the native
799 // (It's possible to not widen at all by pulling out factors of 2 before
800 // the multiplication; for example, K=2 can be calculated as
801 // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
802 // extra arithmetic, so it's not an obvious win, and it gets
803 // much more complicated for K > 3.)
805 // Protection from insane SCEVs; this bound is conservative,
806 // but it probably doesn't matter.
808 return SE
.getCouldNotCompute();
810 unsigned W
= SE
.getTypeSizeInBits(ResultTy
);
812 // Calculate K! / 2^T and T; we divide out the factors of two before
813 // multiplying for calculating K! / 2^T to avoid overflow.
814 // Other overflow doesn't matter because we only care about the bottom
815 // W bits of the result.
816 APInt
OddFactorial(W
, 1);
818 for (unsigned i
= 3; i
<= K
; ++i
) {
820 unsigned TwoFactors
= Mult
.countTrailingZeros();
822 Mult
= Mult
.lshr(TwoFactors
);
823 OddFactorial
*= Mult
;
826 // We need at least W + T bits for the multiplication step
827 unsigned CalculationBits
= W
+ T
;
829 // Calculate 2^T, at width T+W.
830 APInt DivFactor
= APInt(CalculationBits
, 1).shl(T
);
832 // Calculate the multiplicative inverse of K! / 2^T;
833 // this multiplication factor will perform the exact division by
835 APInt Mod
= APInt::getSignedMinValue(W
+1);
836 APInt MultiplyFactor
= OddFactorial
.zext(W
+1);
837 MultiplyFactor
= MultiplyFactor
.multiplicativeInverse(Mod
);
838 MultiplyFactor
= MultiplyFactor
.trunc(W
);
840 // Calculate the product, at width T+W
841 const IntegerType
*CalculationTy
= IntegerType::get(SE
.getContext(),
843 const SCEV
*Dividend
= SE
.getTruncateOrZeroExtend(It
, CalculationTy
);
844 for (unsigned i
= 1; i
!= K
; ++i
) {
845 const SCEV
*S
= SE
.getMinusSCEV(It
, SE
.getConstant(It
->getType(), i
));
846 Dividend
= SE
.getMulExpr(Dividend
,
847 SE
.getTruncateOrZeroExtend(S
, CalculationTy
));
851 const SCEV
*DivResult
= SE
.getUDivExpr(Dividend
, SE
.getConstant(DivFactor
));
853 // Truncate the result, and divide by K! / 2^T.
855 return SE
.getMulExpr(SE
.getConstant(MultiplyFactor
),
856 SE
.getTruncateOrZeroExtend(DivResult
, ResultTy
));
859 /// evaluateAtIteration - Return the value of this chain of recurrences at
860 /// the specified iteration number. We can evaluate this recurrence by
861 /// multiplying each element in the chain by the binomial coefficient
862 /// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
864 /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
866 /// where BC(It, k) stands for binomial coefficient.
868 const SCEV
*SCEVAddRecExpr::evaluateAtIteration(const SCEV
*It
,
869 ScalarEvolution
&SE
) const {
870 const SCEV
*Result
= getStart();
871 for (unsigned i
= 1, e
= getNumOperands(); i
!= e
; ++i
) {
872 // The computation is correct in the face of overflow provided that the
873 // multiplication is performed _after_ the evaluation of the binomial
875 const SCEV
*Coeff
= BinomialCoefficient(It
, i
, SE
, getType());
876 if (isa
<SCEVCouldNotCompute
>(Coeff
))
879 Result
= SE
.getAddExpr(Result
, SE
.getMulExpr(getOperand(i
), Coeff
));
884 //===----------------------------------------------------------------------===//
885 // SCEV Expression folder implementations
886 //===----------------------------------------------------------------------===//
888 const SCEV
*ScalarEvolution::getTruncateExpr(const SCEV
*Op
,
890 assert(getTypeSizeInBits(Op
->getType()) > getTypeSizeInBits(Ty
) &&
891 "This is not a truncating conversion!");
892 assert(isSCEVable(Ty
) &&
893 "This is not a conversion to a SCEVable type!");
894 Ty
= getEffectiveSCEVType(Ty
);
897 ID
.AddInteger(scTruncate
);
901 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
903 // Fold if the operand is constant.
904 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(Op
))
906 cast
<ConstantInt
>(ConstantExpr::getTrunc(SC
->getValue(),
907 getEffectiveSCEVType(Ty
))));
909 // trunc(trunc(x)) --> trunc(x)
910 if (const SCEVTruncateExpr
*ST
= dyn_cast
<SCEVTruncateExpr
>(Op
))
911 return getTruncateExpr(ST
->getOperand(), Ty
);
913 // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
914 if (const SCEVSignExtendExpr
*SS
= dyn_cast
<SCEVSignExtendExpr
>(Op
))
915 return getTruncateOrSignExtend(SS
->getOperand(), Ty
);
917 // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
918 if (const SCEVZeroExtendExpr
*SZ
= dyn_cast
<SCEVZeroExtendExpr
>(Op
))
919 return getTruncateOrZeroExtend(SZ
->getOperand(), Ty
);
921 // If the input value is a chrec scev, truncate the chrec's operands.
922 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(Op
)) {
923 SmallVector
<const SCEV
*, 4> Operands
;
924 for (unsigned i
= 0, e
= AddRec
->getNumOperands(); i
!= e
; ++i
)
925 Operands
.push_back(getTruncateExpr(AddRec
->getOperand(i
), Ty
));
926 return getAddRecExpr(Operands
, AddRec
->getLoop());
929 // As a special case, fold trunc(undef) to undef. We don't want to
930 // know too much about SCEVUnknowns, but this special case is handy
932 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(Op
))
933 if (isa
<UndefValue
>(U
->getValue()))
934 return getSCEV(UndefValue::get(Ty
));
936 // The cast wasn't folded; create an explicit cast node. We can reuse
937 // the existing insert position since if we get here, we won't have
938 // made any changes which would invalidate it.
939 SCEV
*S
= new (SCEVAllocator
) SCEVTruncateExpr(ID
.Intern(SCEVAllocator
),
941 UniqueSCEVs
.InsertNode(S
, IP
);
945 const SCEV
*ScalarEvolution::getZeroExtendExpr(const SCEV
*Op
,
947 assert(getTypeSizeInBits(Op
->getType()) < getTypeSizeInBits(Ty
) &&
948 "This is not an extending conversion!");
949 assert(isSCEVable(Ty
) &&
950 "This is not a conversion to a SCEVable type!");
951 Ty
= getEffectiveSCEVType(Ty
);
953 // Fold if the operand is constant.
954 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(Op
))
956 cast
<ConstantInt
>(ConstantExpr::getZExt(SC
->getValue(),
957 getEffectiveSCEVType(Ty
))));
959 // zext(zext(x)) --> zext(x)
960 if (const SCEVZeroExtendExpr
*SZ
= dyn_cast
<SCEVZeroExtendExpr
>(Op
))
961 return getZeroExtendExpr(SZ
->getOperand(), Ty
);
963 // Before doing any expensive analysis, check to see if we've already
964 // computed a SCEV for this Op and Ty.
966 ID
.AddInteger(scZeroExtend
);
970 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
972 // If the input value is a chrec scev, and we can prove that the value
973 // did not overflow the old, smaller, value, we can zero extend all of the
974 // operands (often constants). This allows analysis of something like
975 // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
976 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(Op
))
977 if (AR
->isAffine()) {
978 const SCEV
*Start
= AR
->getStart();
979 const SCEV
*Step
= AR
->getStepRecurrence(*this);
980 unsigned BitWidth
= getTypeSizeInBits(AR
->getType());
981 const Loop
*L
= AR
->getLoop();
983 // If we have special knowledge that this addrec won't overflow,
984 // we don't need to do any further analysis.
985 if (AR
->hasNoUnsignedWrap())
986 return getAddRecExpr(getZeroExtendExpr(Start
, Ty
),
987 getZeroExtendExpr(Step
, Ty
),
990 // Check whether the backedge-taken count is SCEVCouldNotCompute.
991 // Note that this serves two purposes: It filters out loops that are
992 // simply not analyzable, and it covers the case where this code is
993 // being called from within backedge-taken count analysis, such that
994 // attempting to ask for the backedge-taken count would likely result
995 // in infinite recursion. In the later case, the analysis code will
996 // cope with a conservative value, and it will take care to purge
997 // that value once it has finished.
998 const SCEV
*MaxBECount
= getMaxBackedgeTakenCount(L
);
999 if (!isa
<SCEVCouldNotCompute
>(MaxBECount
)) {
1000 // Manually compute the final value for AR, checking for
1003 // Check whether the backedge-taken count can be losslessly casted to
1004 // the addrec's type. The count is always unsigned.
1005 const SCEV
*CastedMaxBECount
=
1006 getTruncateOrZeroExtend(MaxBECount
, Start
->getType());
1007 const SCEV
*RecastedMaxBECount
=
1008 getTruncateOrZeroExtend(CastedMaxBECount
, MaxBECount
->getType());
1009 if (MaxBECount
== RecastedMaxBECount
) {
1010 const Type
*WideTy
= IntegerType::get(getContext(), BitWidth
* 2);
1011 // Check whether Start+Step*MaxBECount has no unsigned overflow.
1012 const SCEV
*ZMul
= getMulExpr(CastedMaxBECount
, Step
);
1013 const SCEV
*Add
= getAddExpr(Start
, ZMul
);
1014 const SCEV
*OperandExtendedAdd
=
1015 getAddExpr(getZeroExtendExpr(Start
, WideTy
),
1016 getMulExpr(getZeroExtendExpr(CastedMaxBECount
, WideTy
),
1017 getZeroExtendExpr(Step
, WideTy
)));
1018 if (getZeroExtendExpr(Add
, WideTy
) == OperandExtendedAdd
)
1019 // Return the expression with the addrec on the outside.
1020 return getAddRecExpr(getZeroExtendExpr(Start
, Ty
),
1021 getZeroExtendExpr(Step
, Ty
),
1024 // Similar to above, only this time treat the step value as signed.
1025 // This covers loops that count down.
1026 const SCEV
*SMul
= getMulExpr(CastedMaxBECount
, Step
);
1027 Add
= getAddExpr(Start
, SMul
);
1028 OperandExtendedAdd
=
1029 getAddExpr(getZeroExtendExpr(Start
, WideTy
),
1030 getMulExpr(getZeroExtendExpr(CastedMaxBECount
, WideTy
),
1031 getSignExtendExpr(Step
, WideTy
)));
1032 if (getZeroExtendExpr(Add
, WideTy
) == OperandExtendedAdd
)
1033 // Return the expression with the addrec on the outside.
1034 return getAddRecExpr(getZeroExtendExpr(Start
, Ty
),
1035 getSignExtendExpr(Step
, Ty
),
1039 // If the backedge is guarded by a comparison with the pre-inc value
1040 // the addrec is safe. Also, if the entry is guarded by a comparison
1041 // with the start value and the backedge is guarded by a comparison
1042 // with the post-inc value, the addrec is safe.
1043 if (isKnownPositive(Step
)) {
1044 const SCEV
*N
= getConstant(APInt::getMinValue(BitWidth
) -
1045 getUnsignedRange(Step
).getUnsignedMax());
1046 if (isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_ULT
, AR
, N
) ||
1047 (isLoopEntryGuardedByCond(L
, ICmpInst::ICMP_ULT
, Start
, N
) &&
1048 isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_ULT
,
1049 AR
->getPostIncExpr(*this), N
)))
1050 // Return the expression with the addrec on the outside.
1051 return getAddRecExpr(getZeroExtendExpr(Start
, Ty
),
1052 getZeroExtendExpr(Step
, Ty
),
1054 } else if (isKnownNegative(Step
)) {
1055 const SCEV
*N
= getConstant(APInt::getMaxValue(BitWidth
) -
1056 getSignedRange(Step
).getSignedMin());
1057 if (isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_UGT
, AR
, N
) ||
1058 (isLoopEntryGuardedByCond(L
, ICmpInst::ICMP_UGT
, Start
, N
) &&
1059 isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_UGT
,
1060 AR
->getPostIncExpr(*this), N
)))
1061 // Return the expression with the addrec on the outside.
1062 return getAddRecExpr(getZeroExtendExpr(Start
, Ty
),
1063 getSignExtendExpr(Step
, Ty
),
1069 // The cast wasn't folded; create an explicit cast node.
1070 // Recompute the insert position, as it may have been invalidated.
1071 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
1072 SCEV
*S
= new (SCEVAllocator
) SCEVZeroExtendExpr(ID
.Intern(SCEVAllocator
),
1074 UniqueSCEVs
.InsertNode(S
, IP
);
1078 const SCEV
*ScalarEvolution::getSignExtendExpr(const SCEV
*Op
,
1080 assert(getTypeSizeInBits(Op
->getType()) < getTypeSizeInBits(Ty
) &&
1081 "This is not an extending conversion!");
1082 assert(isSCEVable(Ty
) &&
1083 "This is not a conversion to a SCEVable type!");
1084 Ty
= getEffectiveSCEVType(Ty
);
1086 // Fold if the operand is constant.
1087 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(Op
))
1089 cast
<ConstantInt
>(ConstantExpr::getSExt(SC
->getValue(),
1090 getEffectiveSCEVType(Ty
))));
1092 // sext(sext(x)) --> sext(x)
1093 if (const SCEVSignExtendExpr
*SS
= dyn_cast
<SCEVSignExtendExpr
>(Op
))
1094 return getSignExtendExpr(SS
->getOperand(), Ty
);
1096 // Before doing any expensive analysis, check to see if we've already
1097 // computed a SCEV for this Op and Ty.
1098 FoldingSetNodeID ID
;
1099 ID
.AddInteger(scSignExtend
);
1103 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
1105 // If the input value is a chrec scev, and we can prove that the value
1106 // did not overflow the old, smaller, value, we can sign extend all of the
1107 // operands (often constants). This allows analysis of something like
1108 // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
1109 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(Op
))
1110 if (AR
->isAffine()) {
1111 const SCEV
*Start
= AR
->getStart();
1112 const SCEV
*Step
= AR
->getStepRecurrence(*this);
1113 unsigned BitWidth
= getTypeSizeInBits(AR
->getType());
1114 const Loop
*L
= AR
->getLoop();
1116 // If we have special knowledge that this addrec won't overflow,
1117 // we don't need to do any further analysis.
1118 if (AR
->hasNoSignedWrap())
1119 return getAddRecExpr(getSignExtendExpr(Start
, Ty
),
1120 getSignExtendExpr(Step
, Ty
),
1123 // Check whether the backedge-taken count is SCEVCouldNotCompute.
1124 // Note that this serves two purposes: It filters out loops that are
1125 // simply not analyzable, and it covers the case where this code is
1126 // being called from within backedge-taken count analysis, such that
1127 // attempting to ask for the backedge-taken count would likely result
1128 // in infinite recursion. In the later case, the analysis code will
1129 // cope with a conservative value, and it will take care to purge
1130 // that value once it has finished.
1131 const SCEV
*MaxBECount
= getMaxBackedgeTakenCount(L
);
1132 if (!isa
<SCEVCouldNotCompute
>(MaxBECount
)) {
1133 // Manually compute the final value for AR, checking for
1136 // Check whether the backedge-taken count can be losslessly casted to
1137 // the addrec's type. The count is always unsigned.
1138 const SCEV
*CastedMaxBECount
=
1139 getTruncateOrZeroExtend(MaxBECount
, Start
->getType());
1140 const SCEV
*RecastedMaxBECount
=
1141 getTruncateOrZeroExtend(CastedMaxBECount
, MaxBECount
->getType());
1142 if (MaxBECount
== RecastedMaxBECount
) {
1143 const Type
*WideTy
= IntegerType::get(getContext(), BitWidth
* 2);
1144 // Check whether Start+Step*MaxBECount has no signed overflow.
1145 const SCEV
*SMul
= getMulExpr(CastedMaxBECount
, Step
);
1146 const SCEV
*Add
= getAddExpr(Start
, SMul
);
1147 const SCEV
*OperandExtendedAdd
=
1148 getAddExpr(getSignExtendExpr(Start
, WideTy
),
1149 getMulExpr(getZeroExtendExpr(CastedMaxBECount
, WideTy
),
1150 getSignExtendExpr(Step
, WideTy
)));
1151 if (getSignExtendExpr(Add
, WideTy
) == OperandExtendedAdd
)
1152 // Return the expression with the addrec on the outside.
1153 return getAddRecExpr(getSignExtendExpr(Start
, Ty
),
1154 getSignExtendExpr(Step
, Ty
),
1157 // Similar to above, only this time treat the step value as unsigned.
1158 // This covers loops that count up with an unsigned step.
1159 const SCEV
*UMul
= getMulExpr(CastedMaxBECount
, Step
);
1160 Add
= getAddExpr(Start
, UMul
);
1161 OperandExtendedAdd
=
1162 getAddExpr(getSignExtendExpr(Start
, WideTy
),
1163 getMulExpr(getZeroExtendExpr(CastedMaxBECount
, WideTy
),
1164 getZeroExtendExpr(Step
, WideTy
)));
1165 if (getSignExtendExpr(Add
, WideTy
) == OperandExtendedAdd
)
1166 // Return the expression with the addrec on the outside.
1167 return getAddRecExpr(getSignExtendExpr(Start
, Ty
),
1168 getZeroExtendExpr(Step
, Ty
),
1172 // If the backedge is guarded by a comparison with the pre-inc value
1173 // the addrec is safe. Also, if the entry is guarded by a comparison
1174 // with the start value and the backedge is guarded by a comparison
1175 // with the post-inc value, the addrec is safe.
1176 if (isKnownPositive(Step
)) {
1177 const SCEV
*N
= getConstant(APInt::getSignedMinValue(BitWidth
) -
1178 getSignedRange(Step
).getSignedMax());
1179 if (isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_SLT
, AR
, N
) ||
1180 (isLoopEntryGuardedByCond(L
, ICmpInst::ICMP_SLT
, Start
, N
) &&
1181 isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_SLT
,
1182 AR
->getPostIncExpr(*this), N
)))
1183 // Return the expression with the addrec on the outside.
1184 return getAddRecExpr(getSignExtendExpr(Start
, Ty
),
1185 getSignExtendExpr(Step
, Ty
),
1187 } else if (isKnownNegative(Step
)) {
1188 const SCEV
*N
= getConstant(APInt::getSignedMaxValue(BitWidth
) -
1189 getSignedRange(Step
).getSignedMin());
1190 if (isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_SGT
, AR
, N
) ||
1191 (isLoopEntryGuardedByCond(L
, ICmpInst::ICMP_SGT
, Start
, N
) &&
1192 isLoopBackedgeGuardedByCond(L
, ICmpInst::ICMP_SGT
,
1193 AR
->getPostIncExpr(*this), N
)))
1194 // Return the expression with the addrec on the outside.
1195 return getAddRecExpr(getSignExtendExpr(Start
, Ty
),
1196 getSignExtendExpr(Step
, Ty
),
1202 // The cast wasn't folded; create an explicit cast node.
1203 // Recompute the insert position, as it may have been invalidated.
1204 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
1205 SCEV
*S
= new (SCEVAllocator
) SCEVSignExtendExpr(ID
.Intern(SCEVAllocator
),
1207 UniqueSCEVs
.InsertNode(S
, IP
);
1211 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
1212 /// unspecified bits out to the given type.
1214 const SCEV
*ScalarEvolution::getAnyExtendExpr(const SCEV
*Op
,
1216 assert(getTypeSizeInBits(Op
->getType()) < getTypeSizeInBits(Ty
) &&
1217 "This is not an extending conversion!");
1218 assert(isSCEVable(Ty
) &&
1219 "This is not a conversion to a SCEVable type!");
1220 Ty
= getEffectiveSCEVType(Ty
);
1222 // Sign-extend negative constants.
1223 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(Op
))
1224 if (SC
->getValue()->getValue().isNegative())
1225 return getSignExtendExpr(Op
, Ty
);
1227 // Peel off a truncate cast.
1228 if (const SCEVTruncateExpr
*T
= dyn_cast
<SCEVTruncateExpr
>(Op
)) {
1229 const SCEV
*NewOp
= T
->getOperand();
1230 if (getTypeSizeInBits(NewOp
->getType()) < getTypeSizeInBits(Ty
))
1231 return getAnyExtendExpr(NewOp
, Ty
);
1232 return getTruncateOrNoop(NewOp
, Ty
);
1235 // Next try a zext cast. If the cast is folded, use it.
1236 const SCEV
*ZExt
= getZeroExtendExpr(Op
, Ty
);
1237 if (!isa
<SCEVZeroExtendExpr
>(ZExt
))
1240 // Next try a sext cast. If the cast is folded, use it.
1241 const SCEV
*SExt
= getSignExtendExpr(Op
, Ty
);
1242 if (!isa
<SCEVSignExtendExpr
>(SExt
))
1245 // Force the cast to be folded into the operands of an addrec.
1246 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(Op
)) {
1247 SmallVector
<const SCEV
*, 4> Ops
;
1248 for (SCEVAddRecExpr::op_iterator I
= AR
->op_begin(), E
= AR
->op_end();
1250 Ops
.push_back(getAnyExtendExpr(*I
, Ty
));
1251 return getAddRecExpr(Ops
, AR
->getLoop());
1254 // As a special case, fold anyext(undef) to undef. We don't want to
1255 // know too much about SCEVUnknowns, but this special case is handy
1257 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(Op
))
1258 if (isa
<UndefValue
>(U
->getValue()))
1259 return getSCEV(UndefValue::get(Ty
));
1261 // If the expression is obviously signed, use the sext cast value.
1262 if (isa
<SCEVSMaxExpr
>(Op
))
1265 // Absent any other information, use the zext cast value.
1269 /// CollectAddOperandsWithScales - Process the given Ops list, which is
1270 /// a list of operands to be added under the given scale, update the given
1271 /// map. This is a helper function for getAddRecExpr. As an example of
1272 /// what it does, given a sequence of operands that would form an add
1273 /// expression like this:
1275 /// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
1277 /// where A and B are constants, update the map with these values:
1279 /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
1281 /// and add 13 + A*B*29 to AccumulatedConstant.
1282 /// This will allow getAddRecExpr to produce this:
1284 /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
1286 /// This form often exposes folding opportunities that are hidden in
1287 /// the original operand list.
1289 /// Return true iff it appears that any interesting folding opportunities
1290 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
1291 /// the common case where no interesting opportunities are present, and
1292 /// is also used as a check to avoid infinite recursion.
1295 CollectAddOperandsWithScales(DenseMap
<const SCEV
*, APInt
> &M
,
1296 SmallVector
<const SCEV
*, 8> &NewOps
,
1297 APInt
&AccumulatedConstant
,
1298 const SCEV
*const *Ops
, size_t NumOperands
,
1300 ScalarEvolution
&SE
) {
1301 bool Interesting
= false;
1303 // Iterate over the add operands. They are sorted, with constants first.
1305 while (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(Ops
[i
])) {
1307 // Pull a buried constant out to the outside.
1308 if (Scale
!= 1 || AccumulatedConstant
!= 0 || C
->getValue()->isZero())
1310 AccumulatedConstant
+= Scale
* C
->getValue()->getValue();
1313 // Next comes everything else. We're especially interested in multiplies
1314 // here, but they're in the middle, so just visit the rest with one loop.
1315 for (; i
!= NumOperands
; ++i
) {
1316 const SCEVMulExpr
*Mul
= dyn_cast
<SCEVMulExpr
>(Ops
[i
]);
1317 if (Mul
&& isa
<SCEVConstant
>(Mul
->getOperand(0))) {
1319 Scale
* cast
<SCEVConstant
>(Mul
->getOperand(0))->getValue()->getValue();
1320 if (Mul
->getNumOperands() == 2 && isa
<SCEVAddExpr
>(Mul
->getOperand(1))) {
1321 // A multiplication of a constant with another add; recurse.
1322 const SCEVAddExpr
*Add
= cast
<SCEVAddExpr
>(Mul
->getOperand(1));
1324 CollectAddOperandsWithScales(M
, NewOps
, AccumulatedConstant
,
1325 Add
->op_begin(), Add
->getNumOperands(),
1328 // A multiplication of a constant with some other value. Update
1330 SmallVector
<const SCEV
*, 4> MulOps(Mul
->op_begin()+1, Mul
->op_end());
1331 const SCEV
*Key
= SE
.getMulExpr(MulOps
);
1332 std::pair
<DenseMap
<const SCEV
*, APInt
>::iterator
, bool> Pair
=
1333 M
.insert(std::make_pair(Key
, NewScale
));
1335 NewOps
.push_back(Pair
.first
->first
);
1337 Pair
.first
->second
+= NewScale
;
1338 // The map already had an entry for this value, which may indicate
1339 // a folding opportunity.
1344 // An ordinary operand. Update the map.
1345 std::pair
<DenseMap
<const SCEV
*, APInt
>::iterator
, bool> Pair
=
1346 M
.insert(std::make_pair(Ops
[i
], Scale
));
1348 NewOps
.push_back(Pair
.first
->first
);
1350 Pair
.first
->second
+= Scale
;
1351 // The map already had an entry for this value, which may indicate
1352 // a folding opportunity.
1362 struct APIntCompare
{
1363 bool operator()(const APInt
&LHS
, const APInt
&RHS
) const {
1364 return LHS
.ult(RHS
);
1369 /// getAddExpr - Get a canonical add expression, or something simpler if
1371 const SCEV
*ScalarEvolution::getAddExpr(SmallVectorImpl
<const SCEV
*> &Ops
,
1372 bool HasNUW
, bool HasNSW
) {
1373 assert(!Ops
.empty() && "Cannot get empty add!");
1374 if (Ops
.size() == 1) return Ops
[0];
1376 const Type
*ETy
= getEffectiveSCEVType(Ops
[0]->getType());
1377 for (unsigned i
= 1, e
= Ops
.size(); i
!= e
; ++i
)
1378 assert(getEffectiveSCEVType(Ops
[i
]->getType()) == ETy
&&
1379 "SCEVAddExpr operand types don't match!");
1382 // If HasNSW is true and all the operands are non-negative, infer HasNUW.
1383 if (!HasNUW
&& HasNSW
) {
1385 for (SmallVectorImpl
<const SCEV
*>::const_iterator I
= Ops
.begin(),
1386 E
= Ops
.end(); I
!= E
; ++I
)
1387 if (!isKnownNonNegative(*I
)) {
1391 if (All
) HasNUW
= true;
1394 // Sort by complexity, this groups all similar expression types together.
1395 GroupByComplexity(Ops
, LI
);
1397 // If there are any constants, fold them together.
1399 if (const SCEVConstant
*LHSC
= dyn_cast
<SCEVConstant
>(Ops
[0])) {
1401 assert(Idx
< Ops
.size());
1402 while (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(Ops
[Idx
])) {
1403 // We found two constants, fold them together!
1404 Ops
[0] = getConstant(LHSC
->getValue()->getValue() +
1405 RHSC
->getValue()->getValue());
1406 if (Ops
.size() == 2) return Ops
[0];
1407 Ops
.erase(Ops
.begin()+1); // Erase the folded element
1408 LHSC
= cast
<SCEVConstant
>(Ops
[0]);
1411 // If we are left with a constant zero being added, strip it off.
1412 if (LHSC
->getValue()->isZero()) {
1413 Ops
.erase(Ops
.begin());
1417 if (Ops
.size() == 1) return Ops
[0];
1420 // Okay, check to see if the same value occurs in the operand list more than
1421 // once. If so, merge them together into an multiply expression. Since we
1422 // sorted the list, these values are required to be adjacent.
1423 const Type
*Ty
= Ops
[0]->getType();
1424 bool FoundMatch
= false;
1425 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
-1; ++i
)
1426 if (Ops
[i
] == Ops
[i
+1]) { // X + Y + Y --> X + Y*2
1427 // Scan ahead to count how many equal operands there are.
1429 while (i
+Count
!= e
&& Ops
[i
+Count
] == Ops
[i
])
1431 // Merge the values into a multiply.
1432 const SCEV
*Scale
= getConstant(Ty
, Count
);
1433 const SCEV
*Mul
= getMulExpr(Scale
, Ops
[i
]);
1434 if (Ops
.size() == Count
)
1437 Ops
.erase(Ops
.begin()+i
+1, Ops
.begin()+i
+Count
);
1438 --i
; e
-= Count
- 1;
1442 return getAddExpr(Ops
, HasNUW
, HasNSW
);
1444 // Check for truncates. If all the operands are truncated from the same
1445 // type, see if factoring out the truncate would permit the result to be
1446 // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
1447 // if the contents of the resulting outer trunc fold to something simple.
1448 for (; Idx
< Ops
.size() && isa
<SCEVTruncateExpr
>(Ops
[Idx
]); ++Idx
) {
1449 const SCEVTruncateExpr
*Trunc
= cast
<SCEVTruncateExpr
>(Ops
[Idx
]);
1450 const Type
*DstType
= Trunc
->getType();
1451 const Type
*SrcType
= Trunc
->getOperand()->getType();
1452 SmallVector
<const SCEV
*, 8> LargeOps
;
1454 // Check all the operands to see if they can be represented in the
1455 // source type of the truncate.
1456 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
1457 if (const SCEVTruncateExpr
*T
= dyn_cast
<SCEVTruncateExpr
>(Ops
[i
])) {
1458 if (T
->getOperand()->getType() != SrcType
) {
1462 LargeOps
.push_back(T
->getOperand());
1463 } else if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(Ops
[i
])) {
1464 LargeOps
.push_back(getAnyExtendExpr(C
, SrcType
));
1465 } else if (const SCEVMulExpr
*M
= dyn_cast
<SCEVMulExpr
>(Ops
[i
])) {
1466 SmallVector
<const SCEV
*, 8> LargeMulOps
;
1467 for (unsigned j
= 0, f
= M
->getNumOperands(); j
!= f
&& Ok
; ++j
) {
1468 if (const SCEVTruncateExpr
*T
=
1469 dyn_cast
<SCEVTruncateExpr
>(M
->getOperand(j
))) {
1470 if (T
->getOperand()->getType() != SrcType
) {
1474 LargeMulOps
.push_back(T
->getOperand());
1475 } else if (const SCEVConstant
*C
=
1476 dyn_cast
<SCEVConstant
>(M
->getOperand(j
))) {
1477 LargeMulOps
.push_back(getAnyExtendExpr(C
, SrcType
));
1484 LargeOps
.push_back(getMulExpr(LargeMulOps
));
1491 // Evaluate the expression in the larger type.
1492 const SCEV
*Fold
= getAddExpr(LargeOps
, HasNUW
, HasNSW
);
1493 // If it folds to something simple, use it. Otherwise, don't.
1494 if (isa
<SCEVConstant
>(Fold
) || isa
<SCEVUnknown
>(Fold
))
1495 return getTruncateExpr(Fold
, DstType
);
1499 // Skip past any other cast SCEVs.
1500 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scAddExpr
)
1503 // If there are add operands they would be next.
1504 if (Idx
< Ops
.size()) {
1505 bool DeletedAdd
= false;
1506 while (const SCEVAddExpr
*Add
= dyn_cast
<SCEVAddExpr
>(Ops
[Idx
])) {
1507 // If we have an add, expand the add operands onto the end of the operands
1509 Ops
.erase(Ops
.begin()+Idx
);
1510 Ops
.append(Add
->op_begin(), Add
->op_end());
1514 // If we deleted at least one add, we added operands to the end of the list,
1515 // and they are not necessarily sorted. Recurse to resort and resimplify
1516 // any operands we just acquired.
1518 return getAddExpr(Ops
);
1521 // Skip over the add expression until we get to a multiply.
1522 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scMulExpr
)
1525 // Check to see if there are any folding opportunities present with
1526 // operands multiplied by constant values.
1527 if (Idx
< Ops
.size() && isa
<SCEVMulExpr
>(Ops
[Idx
])) {
1528 uint64_t BitWidth
= getTypeSizeInBits(Ty
);
1529 DenseMap
<const SCEV
*, APInt
> M
;
1530 SmallVector
<const SCEV
*, 8> NewOps
;
1531 APInt
AccumulatedConstant(BitWidth
, 0);
1532 if (CollectAddOperandsWithScales(M
, NewOps
, AccumulatedConstant
,
1533 Ops
.data(), Ops
.size(),
1534 APInt(BitWidth
, 1), *this)) {
1535 // Some interesting folding opportunity is present, so its worthwhile to
1536 // re-generate the operands list. Group the operands by constant scale,
1537 // to avoid multiplying by the same constant scale multiple times.
1538 std::map
<APInt
, SmallVector
<const SCEV
*, 4>, APIntCompare
> MulOpLists
;
1539 for (SmallVector
<const SCEV
*, 8>::const_iterator I
= NewOps
.begin(),
1540 E
= NewOps
.end(); I
!= E
; ++I
)
1541 MulOpLists
[M
.find(*I
)->second
].push_back(*I
);
1542 // Re-generate the operands list.
1544 if (AccumulatedConstant
!= 0)
1545 Ops
.push_back(getConstant(AccumulatedConstant
));
1546 for (std::map
<APInt
, SmallVector
<const SCEV
*, 4>, APIntCompare
>::iterator
1547 I
= MulOpLists
.begin(), E
= MulOpLists
.end(); I
!= E
; ++I
)
1549 Ops
.push_back(getMulExpr(getConstant(I
->first
),
1550 getAddExpr(I
->second
)));
1552 return getConstant(Ty
, 0);
1553 if (Ops
.size() == 1)
1555 return getAddExpr(Ops
);
1559 // If we are adding something to a multiply expression, make sure the
1560 // something is not already an operand of the multiply. If so, merge it into
1562 for (; Idx
< Ops
.size() && isa
<SCEVMulExpr
>(Ops
[Idx
]); ++Idx
) {
1563 const SCEVMulExpr
*Mul
= cast
<SCEVMulExpr
>(Ops
[Idx
]);
1564 for (unsigned MulOp
= 0, e
= Mul
->getNumOperands(); MulOp
!= e
; ++MulOp
) {
1565 const SCEV
*MulOpSCEV
= Mul
->getOperand(MulOp
);
1566 if (isa
<SCEVConstant
>(MulOpSCEV
))
1568 for (unsigned AddOp
= 0, e
= Ops
.size(); AddOp
!= e
; ++AddOp
)
1569 if (MulOpSCEV
== Ops
[AddOp
]) {
1570 // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
1571 const SCEV
*InnerMul
= Mul
->getOperand(MulOp
== 0);
1572 if (Mul
->getNumOperands() != 2) {
1573 // If the multiply has more than two operands, we must get the
1575 SmallVector
<const SCEV
*, 4> MulOps(Mul
->op_begin(),
1576 Mul
->op_begin()+MulOp
);
1577 MulOps
.append(Mul
->op_begin()+MulOp
+1, Mul
->op_end());
1578 InnerMul
= getMulExpr(MulOps
);
1580 const SCEV
*One
= getConstant(Ty
, 1);
1581 const SCEV
*AddOne
= getAddExpr(One
, InnerMul
);
1582 const SCEV
*OuterMul
= getMulExpr(AddOne
, MulOpSCEV
);
1583 if (Ops
.size() == 2) return OuterMul
;
1585 Ops
.erase(Ops
.begin()+AddOp
);
1586 Ops
.erase(Ops
.begin()+Idx
-1);
1588 Ops
.erase(Ops
.begin()+Idx
);
1589 Ops
.erase(Ops
.begin()+AddOp
-1);
1591 Ops
.push_back(OuterMul
);
1592 return getAddExpr(Ops
);
1595 // Check this multiply against other multiplies being added together.
1596 for (unsigned OtherMulIdx
= Idx
+1;
1597 OtherMulIdx
< Ops
.size() && isa
<SCEVMulExpr
>(Ops
[OtherMulIdx
]);
1599 const SCEVMulExpr
*OtherMul
= cast
<SCEVMulExpr
>(Ops
[OtherMulIdx
]);
1600 // If MulOp occurs in OtherMul, we can fold the two multiplies
1602 for (unsigned OMulOp
= 0, e
= OtherMul
->getNumOperands();
1603 OMulOp
!= e
; ++OMulOp
)
1604 if (OtherMul
->getOperand(OMulOp
) == MulOpSCEV
) {
1605 // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
1606 const SCEV
*InnerMul1
= Mul
->getOperand(MulOp
== 0);
1607 if (Mul
->getNumOperands() != 2) {
1608 SmallVector
<const SCEV
*, 4> MulOps(Mul
->op_begin(),
1609 Mul
->op_begin()+MulOp
);
1610 MulOps
.append(Mul
->op_begin()+MulOp
+1, Mul
->op_end());
1611 InnerMul1
= getMulExpr(MulOps
);
1613 const SCEV
*InnerMul2
= OtherMul
->getOperand(OMulOp
== 0);
1614 if (OtherMul
->getNumOperands() != 2) {
1615 SmallVector
<const SCEV
*, 4> MulOps(OtherMul
->op_begin(),
1616 OtherMul
->op_begin()+OMulOp
);
1617 MulOps
.append(OtherMul
->op_begin()+OMulOp
+1, OtherMul
->op_end());
1618 InnerMul2
= getMulExpr(MulOps
);
1620 const SCEV
*InnerMulSum
= getAddExpr(InnerMul1
,InnerMul2
);
1621 const SCEV
*OuterMul
= getMulExpr(MulOpSCEV
, InnerMulSum
);
1622 if (Ops
.size() == 2) return OuterMul
;
1623 Ops
.erase(Ops
.begin()+Idx
);
1624 Ops
.erase(Ops
.begin()+OtherMulIdx
-1);
1625 Ops
.push_back(OuterMul
);
1626 return getAddExpr(Ops
);
1632 // If there are any add recurrences in the operands list, see if any other
1633 // added values are loop invariant. If so, we can fold them into the
1635 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scAddRecExpr
)
1638 // Scan over all recurrences, trying to fold loop invariants into them.
1639 for (; Idx
< Ops
.size() && isa
<SCEVAddRecExpr
>(Ops
[Idx
]); ++Idx
) {
1640 // Scan all of the other operands to this add and add them to the vector if
1641 // they are loop invariant w.r.t. the recurrence.
1642 SmallVector
<const SCEV
*, 8> LIOps
;
1643 const SCEVAddRecExpr
*AddRec
= cast
<SCEVAddRecExpr
>(Ops
[Idx
]);
1644 const Loop
*AddRecLoop
= AddRec
->getLoop();
1645 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
1646 if (Ops
[i
]->isLoopInvariant(AddRecLoop
)) {
1647 LIOps
.push_back(Ops
[i
]);
1648 Ops
.erase(Ops
.begin()+i
);
1652 // If we found some loop invariants, fold them into the recurrence.
1653 if (!LIOps
.empty()) {
1654 // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
1655 LIOps
.push_back(AddRec
->getStart());
1657 SmallVector
<const SCEV
*, 4> AddRecOps(AddRec
->op_begin(),
1659 AddRecOps
[0] = getAddExpr(LIOps
);
1661 // Build the new addrec. Propagate the NUW and NSW flags if both the
1662 // outer add and the inner addrec are guaranteed to have no overflow.
1663 const SCEV
*NewRec
= getAddRecExpr(AddRecOps
, AddRecLoop
,
1664 HasNUW
&& AddRec
->hasNoUnsignedWrap(),
1665 HasNSW
&& AddRec
->hasNoSignedWrap());
1667 // If all of the other operands were loop invariant, we are done.
1668 if (Ops
.size() == 1) return NewRec
;
1670 // Otherwise, add the folded AddRec by the non-liv parts.
1671 for (unsigned i
= 0;; ++i
)
1672 if (Ops
[i
] == AddRec
) {
1676 return getAddExpr(Ops
);
1679 // Okay, if there weren't any loop invariants to be folded, check to see if
1680 // there are multiple AddRec's with the same loop induction variable being
1681 // added together. If so, we can fold them.
1682 for (unsigned OtherIdx
= Idx
+1;
1683 OtherIdx
< Ops
.size() && isa
<SCEVAddRecExpr
>(Ops
[OtherIdx
]);
1685 if (AddRecLoop
== cast
<SCEVAddRecExpr
>(Ops
[OtherIdx
])->getLoop()) {
1686 // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
1687 SmallVector
<const SCEV
*, 4> AddRecOps(AddRec
->op_begin(),
1689 for (; OtherIdx
!= Ops
.size() && isa
<SCEVAddRecExpr
>(Ops
[OtherIdx
]);
1691 if (const SCEVAddRecExpr
*OtherAddRec
=
1692 dyn_cast
<SCEVAddRecExpr
>(Ops
[OtherIdx
]))
1693 if (OtherAddRec
->getLoop() == AddRecLoop
) {
1694 for (unsigned i
= 0, e
= OtherAddRec
->getNumOperands();
1696 if (i
>= AddRecOps
.size()) {
1697 AddRecOps
.append(OtherAddRec
->op_begin()+i
,
1698 OtherAddRec
->op_end());
1701 AddRecOps
[i
] = getAddExpr(AddRecOps
[i
],
1702 OtherAddRec
->getOperand(i
));
1704 Ops
.erase(Ops
.begin() + OtherIdx
); --OtherIdx
;
1706 Ops
[Idx
] = getAddRecExpr(AddRecOps
, AddRecLoop
);
1707 return getAddExpr(Ops
);
1710 // Otherwise couldn't fold anything into this recurrence. Move onto the
1714 // Okay, it looks like we really DO need an add expr. Check to see if we
1715 // already have one, otherwise create a new one.
1716 FoldingSetNodeID ID
;
1717 ID
.AddInteger(scAddExpr
);
1718 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
1719 ID
.AddPointer(Ops
[i
]);
1722 static_cast<SCEVAddExpr
*>(UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
));
1724 const SCEV
**O
= SCEVAllocator
.Allocate
<const SCEV
*>(Ops
.size());
1725 std::uninitialized_copy(Ops
.begin(), Ops
.end(), O
);
1726 S
= new (SCEVAllocator
) SCEVAddExpr(ID
.Intern(SCEVAllocator
),
1728 UniqueSCEVs
.InsertNode(S
, IP
);
1730 if (HasNUW
) S
->setHasNoUnsignedWrap(true);
1731 if (HasNSW
) S
->setHasNoSignedWrap(true);
1735 /// getMulExpr - Get a canonical multiply expression, or something simpler if
1737 const SCEV
*ScalarEvolution::getMulExpr(SmallVectorImpl
<const SCEV
*> &Ops
,
1738 bool HasNUW
, bool HasNSW
) {
1739 assert(!Ops
.empty() && "Cannot get empty mul!");
1740 if (Ops
.size() == 1) return Ops
[0];
1742 const Type
*ETy
= getEffectiveSCEVType(Ops
[0]->getType());
1743 for (unsigned i
= 1, e
= Ops
.size(); i
!= e
; ++i
)
1744 assert(getEffectiveSCEVType(Ops
[i
]->getType()) == ETy
&&
1745 "SCEVMulExpr operand types don't match!");
1748 // If HasNSW is true and all the operands are non-negative, infer HasNUW.
1749 if (!HasNUW
&& HasNSW
) {
1751 for (SmallVectorImpl
<const SCEV
*>::const_iterator I
= Ops
.begin(),
1752 E
= Ops
.end(); I
!= E
; ++I
)
1753 if (!isKnownNonNegative(*I
)) {
1757 if (All
) HasNUW
= true;
1760 // Sort by complexity, this groups all similar expression types together.
1761 GroupByComplexity(Ops
, LI
);
1763 // If there are any constants, fold them together.
1765 if (const SCEVConstant
*LHSC
= dyn_cast
<SCEVConstant
>(Ops
[0])) {
1767 // C1*(C2+V) -> C1*C2 + C1*V
1768 if (Ops
.size() == 2)
1769 if (const SCEVAddExpr
*Add
= dyn_cast
<SCEVAddExpr
>(Ops
[1]))
1770 if (Add
->getNumOperands() == 2 &&
1771 isa
<SCEVConstant
>(Add
->getOperand(0)))
1772 return getAddExpr(getMulExpr(LHSC
, Add
->getOperand(0)),
1773 getMulExpr(LHSC
, Add
->getOperand(1)));
1776 while (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(Ops
[Idx
])) {
1777 // We found two constants, fold them together!
1778 ConstantInt
*Fold
= ConstantInt::get(getContext(),
1779 LHSC
->getValue()->getValue() *
1780 RHSC
->getValue()->getValue());
1781 Ops
[0] = getConstant(Fold
);
1782 Ops
.erase(Ops
.begin()+1); // Erase the folded element
1783 if (Ops
.size() == 1) return Ops
[0];
1784 LHSC
= cast
<SCEVConstant
>(Ops
[0]);
1787 // If we are left with a constant one being multiplied, strip it off.
1788 if (cast
<SCEVConstant
>(Ops
[0])->getValue()->equalsInt(1)) {
1789 Ops
.erase(Ops
.begin());
1791 } else if (cast
<SCEVConstant
>(Ops
[0])->getValue()->isZero()) {
1792 // If we have a multiply of zero, it will always be zero.
1794 } else if (Ops
[0]->isAllOnesValue()) {
1795 // If we have a mul by -1 of an add, try distributing the -1 among the
1797 if (Ops
.size() == 2)
1798 if (const SCEVAddExpr
*Add
= dyn_cast
<SCEVAddExpr
>(Ops
[1])) {
1799 SmallVector
<const SCEV
*, 4> NewOps
;
1800 bool AnyFolded
= false;
1801 for (SCEVAddRecExpr::op_iterator I
= Add
->op_begin(), E
= Add
->op_end();
1803 const SCEV
*Mul
= getMulExpr(Ops
[0], *I
);
1804 if (!isa
<SCEVMulExpr
>(Mul
)) AnyFolded
= true;
1805 NewOps
.push_back(Mul
);
1808 return getAddExpr(NewOps
);
1812 if (Ops
.size() == 1)
1816 // Skip over the add expression until we get to a multiply.
1817 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scMulExpr
)
1820 // If there are mul operands inline them all into this expression.
1821 if (Idx
< Ops
.size()) {
1822 bool DeletedMul
= false;
1823 while (const SCEVMulExpr
*Mul
= dyn_cast
<SCEVMulExpr
>(Ops
[Idx
])) {
1824 // If we have an mul, expand the mul operands onto the end of the operands
1826 Ops
.erase(Ops
.begin()+Idx
);
1827 Ops
.append(Mul
->op_begin(), Mul
->op_end());
1831 // If we deleted at least one mul, we added operands to the end of the list,
1832 // and they are not necessarily sorted. Recurse to resort and resimplify
1833 // any operands we just acquired.
1835 return getMulExpr(Ops
);
1838 // If there are any add recurrences in the operands list, see if any other
1839 // added values are loop invariant. If so, we can fold them into the
1841 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scAddRecExpr
)
1844 // Scan over all recurrences, trying to fold loop invariants into them.
1845 for (; Idx
< Ops
.size() && isa
<SCEVAddRecExpr
>(Ops
[Idx
]); ++Idx
) {
1846 // Scan all of the other operands to this mul and add them to the vector if
1847 // they are loop invariant w.r.t. the recurrence.
1848 SmallVector
<const SCEV
*, 8> LIOps
;
1849 const SCEVAddRecExpr
*AddRec
= cast
<SCEVAddRecExpr
>(Ops
[Idx
]);
1850 const Loop
*AddRecLoop
= AddRec
->getLoop();
1851 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
1852 if (Ops
[i
]->isLoopInvariant(AddRecLoop
)) {
1853 LIOps
.push_back(Ops
[i
]);
1854 Ops
.erase(Ops
.begin()+i
);
1858 // If we found some loop invariants, fold them into the recurrence.
1859 if (!LIOps
.empty()) {
1860 // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
1861 SmallVector
<const SCEV
*, 4> NewOps
;
1862 NewOps
.reserve(AddRec
->getNumOperands());
1863 const SCEV
*Scale
= getMulExpr(LIOps
);
1864 for (unsigned i
= 0, e
= AddRec
->getNumOperands(); i
!= e
; ++i
)
1865 NewOps
.push_back(getMulExpr(Scale
, AddRec
->getOperand(i
)));
1867 // Build the new addrec. Propagate the NUW and NSW flags if both the
1868 // outer mul and the inner addrec are guaranteed to have no overflow.
1869 const SCEV
*NewRec
= getAddRecExpr(NewOps
, AddRecLoop
,
1870 HasNUW
&& AddRec
->hasNoUnsignedWrap(),
1871 HasNSW
&& AddRec
->hasNoSignedWrap());
1873 // If all of the other operands were loop invariant, we are done.
1874 if (Ops
.size() == 1) return NewRec
;
1876 // Otherwise, multiply the folded AddRec by the non-liv parts.
1877 for (unsigned i
= 0;; ++i
)
1878 if (Ops
[i
] == AddRec
) {
1882 return getMulExpr(Ops
);
1885 // Okay, if there weren't any loop invariants to be folded, check to see if
1886 // there are multiple AddRec's with the same loop induction variable being
1887 // multiplied together. If so, we can fold them.
1888 for (unsigned OtherIdx
= Idx
+1;
1889 OtherIdx
< Ops
.size() && isa
<SCEVAddRecExpr
>(Ops
[OtherIdx
]);
1891 if (AddRecLoop
== cast
<SCEVAddRecExpr
>(Ops
[OtherIdx
])->getLoop()) {
1892 // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
1893 // {A*C,+,F*D + G*B + B*D}<L>
1894 for (; OtherIdx
!= Ops
.size() && isa
<SCEVAddRecExpr
>(Ops
[OtherIdx
]);
1896 if (const SCEVAddRecExpr
*OtherAddRec
=
1897 dyn_cast
<SCEVAddRecExpr
>(Ops
[OtherIdx
]))
1898 if (OtherAddRec
->getLoop() == AddRecLoop
) {
1899 const SCEVAddRecExpr
*F
= AddRec
, *G
= OtherAddRec
;
1900 const SCEV
*NewStart
= getMulExpr(F
->getStart(), G
->getStart());
1901 const SCEV
*B
= F
->getStepRecurrence(*this);
1902 const SCEV
*D
= G
->getStepRecurrence(*this);
1903 const SCEV
*NewStep
= getAddExpr(getMulExpr(F
, D
),
1906 const SCEV
*NewAddRec
= getAddRecExpr(NewStart
, NewStep
,
1908 if (Ops
.size() == 2) return NewAddRec
;
1909 Ops
[Idx
] = AddRec
= cast
<SCEVAddRecExpr
>(NewAddRec
);
1910 Ops
.erase(Ops
.begin() + OtherIdx
); --OtherIdx
;
1912 return getMulExpr(Ops
);
1915 // Otherwise couldn't fold anything into this recurrence. Move onto the
1919 // Okay, it looks like we really DO need an mul expr. Check to see if we
1920 // already have one, otherwise create a new one.
1921 FoldingSetNodeID ID
;
1922 ID
.AddInteger(scMulExpr
);
1923 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
1924 ID
.AddPointer(Ops
[i
]);
1927 static_cast<SCEVMulExpr
*>(UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
));
1929 const SCEV
**O
= SCEVAllocator
.Allocate
<const SCEV
*>(Ops
.size());
1930 std::uninitialized_copy(Ops
.begin(), Ops
.end(), O
);
1931 S
= new (SCEVAllocator
) SCEVMulExpr(ID
.Intern(SCEVAllocator
),
1933 UniqueSCEVs
.InsertNode(S
, IP
);
1935 if (HasNUW
) S
->setHasNoUnsignedWrap(true);
1936 if (HasNSW
) S
->setHasNoSignedWrap(true);
1940 /// getUDivExpr - Get a canonical unsigned division expression, or something
1941 /// simpler if possible.
1942 const SCEV
*ScalarEvolution::getUDivExpr(const SCEV
*LHS
,
1944 assert(getEffectiveSCEVType(LHS
->getType()) ==
1945 getEffectiveSCEVType(RHS
->getType()) &&
1946 "SCEVUDivExpr operand types don't match!");
1948 if (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(RHS
)) {
1949 if (RHSC
->getValue()->equalsInt(1))
1950 return LHS
; // X udiv 1 --> x
1951 // If the denominator is zero, the result of the udiv is undefined. Don't
1952 // try to analyze it, because the resolution chosen here may differ from
1953 // the resolution chosen in other parts of the compiler.
1954 if (!RHSC
->getValue()->isZero()) {
1955 // Determine if the division can be folded into the operands of
1957 // TODO: Generalize this to non-constants by using known-bits information.
1958 const Type
*Ty
= LHS
->getType();
1959 unsigned LZ
= RHSC
->getValue()->getValue().countLeadingZeros();
1960 unsigned MaxShiftAmt
= getTypeSizeInBits(Ty
) - LZ
- 1;
1961 // For non-power-of-two values, effectively round the value up to the
1962 // nearest power of two.
1963 if (!RHSC
->getValue()->getValue().isPowerOf2())
1965 const IntegerType
*ExtTy
=
1966 IntegerType::get(getContext(), getTypeSizeInBits(Ty
) + MaxShiftAmt
);
1967 // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
1968 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(LHS
))
1969 if (const SCEVConstant
*Step
=
1970 dyn_cast
<SCEVConstant
>(AR
->getStepRecurrence(*this)))
1971 if (!Step
->getValue()->getValue()
1972 .urem(RHSC
->getValue()->getValue()) &&
1973 getZeroExtendExpr(AR
, ExtTy
) ==
1974 getAddRecExpr(getZeroExtendExpr(AR
->getStart(), ExtTy
),
1975 getZeroExtendExpr(Step
, ExtTy
),
1977 SmallVector
<const SCEV
*, 4> Operands
;
1978 for (unsigned i
= 0, e
= AR
->getNumOperands(); i
!= e
; ++i
)
1979 Operands
.push_back(getUDivExpr(AR
->getOperand(i
), RHS
));
1980 return getAddRecExpr(Operands
, AR
->getLoop());
1982 // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
1983 if (const SCEVMulExpr
*M
= dyn_cast
<SCEVMulExpr
>(LHS
)) {
1984 SmallVector
<const SCEV
*, 4> Operands
;
1985 for (unsigned i
= 0, e
= M
->getNumOperands(); i
!= e
; ++i
)
1986 Operands
.push_back(getZeroExtendExpr(M
->getOperand(i
), ExtTy
));
1987 if (getZeroExtendExpr(M
, ExtTy
) == getMulExpr(Operands
))
1988 // Find an operand that's safely divisible.
1989 for (unsigned i
= 0, e
= M
->getNumOperands(); i
!= e
; ++i
) {
1990 const SCEV
*Op
= M
->getOperand(i
);
1991 const SCEV
*Div
= getUDivExpr(Op
, RHSC
);
1992 if (!isa
<SCEVUDivExpr
>(Div
) && getMulExpr(Div
, RHSC
) == Op
) {
1993 Operands
= SmallVector
<const SCEV
*, 4>(M
->op_begin(),
1996 return getMulExpr(Operands
);
2000 // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
2001 if (const SCEVAddRecExpr
*A
= dyn_cast
<SCEVAddRecExpr
>(LHS
)) {
2002 SmallVector
<const SCEV
*, 4> Operands
;
2003 for (unsigned i
= 0, e
= A
->getNumOperands(); i
!= e
; ++i
)
2004 Operands
.push_back(getZeroExtendExpr(A
->getOperand(i
), ExtTy
));
2005 if (getZeroExtendExpr(A
, ExtTy
) == getAddExpr(Operands
)) {
2007 for (unsigned i
= 0, e
= A
->getNumOperands(); i
!= e
; ++i
) {
2008 const SCEV
*Op
= getUDivExpr(A
->getOperand(i
), RHS
);
2009 if (isa
<SCEVUDivExpr
>(Op
) ||
2010 getMulExpr(Op
, RHS
) != A
->getOperand(i
))
2012 Operands
.push_back(Op
);
2014 if (Operands
.size() == A
->getNumOperands())
2015 return getAddExpr(Operands
);
2019 // Fold if both operands are constant.
2020 if (const SCEVConstant
*LHSC
= dyn_cast
<SCEVConstant
>(LHS
)) {
2021 Constant
*LHSCV
= LHSC
->getValue();
2022 Constant
*RHSCV
= RHSC
->getValue();
2023 return getConstant(cast
<ConstantInt
>(ConstantExpr::getUDiv(LHSCV
,
2029 FoldingSetNodeID ID
;
2030 ID
.AddInteger(scUDivExpr
);
2034 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
2035 SCEV
*S
= new (SCEVAllocator
) SCEVUDivExpr(ID
.Intern(SCEVAllocator
),
2037 UniqueSCEVs
.InsertNode(S
, IP
);
2042 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
2043 /// Simplify the expression as much as possible.
2044 const SCEV
*ScalarEvolution::getAddRecExpr(const SCEV
*Start
,
2045 const SCEV
*Step
, const Loop
*L
,
2046 bool HasNUW
, bool HasNSW
) {
2047 SmallVector
<const SCEV
*, 4> Operands
;
2048 Operands
.push_back(Start
);
2049 if (const SCEVAddRecExpr
*StepChrec
= dyn_cast
<SCEVAddRecExpr
>(Step
))
2050 if (StepChrec
->getLoop() == L
) {
2051 Operands
.append(StepChrec
->op_begin(), StepChrec
->op_end());
2052 return getAddRecExpr(Operands
, L
);
2055 Operands
.push_back(Step
);
2056 return getAddRecExpr(Operands
, L
, HasNUW
, HasNSW
);
2059 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
2060 /// Simplify the expression as much as possible.
2062 ScalarEvolution::getAddRecExpr(SmallVectorImpl
<const SCEV
*> &Operands
,
2064 bool HasNUW
, bool HasNSW
) {
2065 if (Operands
.size() == 1) return Operands
[0];
2067 const Type
*ETy
= getEffectiveSCEVType(Operands
[0]->getType());
2068 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
)
2069 assert(getEffectiveSCEVType(Operands
[i
]->getType()) == ETy
&&
2070 "SCEVAddRecExpr operand types don't match!");
2073 if (Operands
.back()->isZero()) {
2074 Operands
.pop_back();
2075 return getAddRecExpr(Operands
, L
, HasNUW
, HasNSW
); // {X,+,0} --> X
2078 // It's tempting to want to call getMaxBackedgeTakenCount count here and
2079 // use that information to infer NUW and NSW flags. However, computing a
2080 // BE count requires calling getAddRecExpr, so we may not yet have a
2081 // meaningful BE count at this point (and if we don't, we'd be stuck
2082 // with a SCEVCouldNotCompute as the cached BE count).
2084 // If HasNSW is true and all the operands are non-negative, infer HasNUW.
2085 if (!HasNUW
&& HasNSW
) {
2087 for (SmallVectorImpl
<const SCEV
*>::const_iterator I
= Operands
.begin(),
2088 E
= Operands
.end(); I
!= E
; ++I
)
2089 if (!isKnownNonNegative(*I
)) {
2093 if (All
) HasNUW
= true;
2096 // Canonicalize nested AddRecs in by nesting them in order of loop depth.
2097 if (const SCEVAddRecExpr
*NestedAR
= dyn_cast
<SCEVAddRecExpr
>(Operands
[0])) {
2098 const Loop
*NestedLoop
= NestedAR
->getLoop();
2099 if (L
->contains(NestedLoop
) ?
2100 (L
->getLoopDepth() < NestedLoop
->getLoopDepth()) :
2101 (!NestedLoop
->contains(L
) &&
2102 DT
->dominates(L
->getHeader(), NestedLoop
->getHeader()))) {
2103 SmallVector
<const SCEV
*, 4> NestedOperands(NestedAR
->op_begin(),
2104 NestedAR
->op_end());
2105 Operands
[0] = NestedAR
->getStart();
2106 // AddRecs require their operands be loop-invariant with respect to their
2107 // loops. Don't perform this transformation if it would break this
2109 bool AllInvariant
= true;
2110 for (unsigned i
= 0, e
= Operands
.size(); i
!= e
; ++i
)
2111 if (!Operands
[i
]->isLoopInvariant(L
)) {
2112 AllInvariant
= false;
2116 NestedOperands
[0] = getAddRecExpr(Operands
, L
);
2117 AllInvariant
= true;
2118 for (unsigned i
= 0, e
= NestedOperands
.size(); i
!= e
; ++i
)
2119 if (!NestedOperands
[i
]->isLoopInvariant(NestedLoop
)) {
2120 AllInvariant
= false;
2124 // Ok, both add recurrences are valid after the transformation.
2125 return getAddRecExpr(NestedOperands
, NestedLoop
, HasNUW
, HasNSW
);
2127 // Reset Operands to its original state.
2128 Operands
[0] = NestedAR
;
2132 // Okay, it looks like we really DO need an addrec expr. Check to see if we
2133 // already have one, otherwise create a new one.
2134 FoldingSetNodeID ID
;
2135 ID
.AddInteger(scAddRecExpr
);
2136 for (unsigned i
= 0, e
= Operands
.size(); i
!= e
; ++i
)
2137 ID
.AddPointer(Operands
[i
]);
2141 static_cast<SCEVAddRecExpr
*>(UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
));
2143 const SCEV
**O
= SCEVAllocator
.Allocate
<const SCEV
*>(Operands
.size());
2144 std::uninitialized_copy(Operands
.begin(), Operands
.end(), O
);
2145 S
= new (SCEVAllocator
) SCEVAddRecExpr(ID
.Intern(SCEVAllocator
),
2146 O
, Operands
.size(), L
);
2147 UniqueSCEVs
.InsertNode(S
, IP
);
2149 if (HasNUW
) S
->setHasNoUnsignedWrap(true);
2150 if (HasNSW
) S
->setHasNoSignedWrap(true);
2154 const SCEV
*ScalarEvolution::getSMaxExpr(const SCEV
*LHS
,
2156 SmallVector
<const SCEV
*, 2> Ops
;
2159 return getSMaxExpr(Ops
);
2163 ScalarEvolution::getSMaxExpr(SmallVectorImpl
<const SCEV
*> &Ops
) {
2164 assert(!Ops
.empty() && "Cannot get empty smax!");
2165 if (Ops
.size() == 1) return Ops
[0];
2167 const Type
*ETy
= getEffectiveSCEVType(Ops
[0]->getType());
2168 for (unsigned i
= 1, e
= Ops
.size(); i
!= e
; ++i
)
2169 assert(getEffectiveSCEVType(Ops
[i
]->getType()) == ETy
&&
2170 "SCEVSMaxExpr operand types don't match!");
2173 // Sort by complexity, this groups all similar expression types together.
2174 GroupByComplexity(Ops
, LI
);
2176 // If there are any constants, fold them together.
2178 if (const SCEVConstant
*LHSC
= dyn_cast
<SCEVConstant
>(Ops
[0])) {
2180 assert(Idx
< Ops
.size());
2181 while (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(Ops
[Idx
])) {
2182 // We found two constants, fold them together!
2183 ConstantInt
*Fold
= ConstantInt::get(getContext(),
2184 APIntOps::smax(LHSC
->getValue()->getValue(),
2185 RHSC
->getValue()->getValue()));
2186 Ops
[0] = getConstant(Fold
);
2187 Ops
.erase(Ops
.begin()+1); // Erase the folded element
2188 if (Ops
.size() == 1) return Ops
[0];
2189 LHSC
= cast
<SCEVConstant
>(Ops
[0]);
2192 // If we are left with a constant minimum-int, strip it off.
2193 if (cast
<SCEVConstant
>(Ops
[0])->getValue()->isMinValue(true)) {
2194 Ops
.erase(Ops
.begin());
2196 } else if (cast
<SCEVConstant
>(Ops
[0])->getValue()->isMaxValue(true)) {
2197 // If we have an smax with a constant maximum-int, it will always be
2202 if (Ops
.size() == 1) return Ops
[0];
2205 // Find the first SMax
2206 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scSMaxExpr
)
2209 // Check to see if one of the operands is an SMax. If so, expand its operands
2210 // onto our operand list, and recurse to simplify.
2211 if (Idx
< Ops
.size()) {
2212 bool DeletedSMax
= false;
2213 while (const SCEVSMaxExpr
*SMax
= dyn_cast
<SCEVSMaxExpr
>(Ops
[Idx
])) {
2214 Ops
.erase(Ops
.begin()+Idx
);
2215 Ops
.append(SMax
->op_begin(), SMax
->op_end());
2220 return getSMaxExpr(Ops
);
2223 // Okay, check to see if the same value occurs in the operand list twice. If
2224 // so, delete one. Since we sorted the list, these values are required to
2226 for (unsigned i
= 0, e
= Ops
.size()-1; i
!= e
; ++i
)
2227 // X smax Y smax Y --> X smax Y
2228 // X smax Y --> X, if X is always greater than Y
2229 if (Ops
[i
] == Ops
[i
+1] ||
2230 isKnownPredicate(ICmpInst::ICMP_SGE
, Ops
[i
], Ops
[i
+1])) {
2231 Ops
.erase(Ops
.begin()+i
+1, Ops
.begin()+i
+2);
2233 } else if (isKnownPredicate(ICmpInst::ICMP_SLE
, Ops
[i
], Ops
[i
+1])) {
2234 Ops
.erase(Ops
.begin()+i
, Ops
.begin()+i
+1);
2238 if (Ops
.size() == 1) return Ops
[0];
2240 assert(!Ops
.empty() && "Reduced smax down to nothing!");
2242 // Okay, it looks like we really DO need an smax expr. Check to see if we
2243 // already have one, otherwise create a new one.
2244 FoldingSetNodeID ID
;
2245 ID
.AddInteger(scSMaxExpr
);
2246 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
2247 ID
.AddPointer(Ops
[i
]);
2249 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
2250 const SCEV
**O
= SCEVAllocator
.Allocate
<const SCEV
*>(Ops
.size());
2251 std::uninitialized_copy(Ops
.begin(), Ops
.end(), O
);
2252 SCEV
*S
= new (SCEVAllocator
) SCEVSMaxExpr(ID
.Intern(SCEVAllocator
),
2254 UniqueSCEVs
.InsertNode(S
, IP
);
2258 const SCEV
*ScalarEvolution::getUMaxExpr(const SCEV
*LHS
,
2260 SmallVector
<const SCEV
*, 2> Ops
;
2263 return getUMaxExpr(Ops
);
2267 ScalarEvolution::getUMaxExpr(SmallVectorImpl
<const SCEV
*> &Ops
) {
2268 assert(!Ops
.empty() && "Cannot get empty umax!");
2269 if (Ops
.size() == 1) return Ops
[0];
2271 const Type
*ETy
= getEffectiveSCEVType(Ops
[0]->getType());
2272 for (unsigned i
= 1, e
= Ops
.size(); i
!= e
; ++i
)
2273 assert(getEffectiveSCEVType(Ops
[i
]->getType()) == ETy
&&
2274 "SCEVUMaxExpr operand types don't match!");
2277 // Sort by complexity, this groups all similar expression types together.
2278 GroupByComplexity(Ops
, LI
);
2280 // If there are any constants, fold them together.
2282 if (const SCEVConstant
*LHSC
= dyn_cast
<SCEVConstant
>(Ops
[0])) {
2284 assert(Idx
< Ops
.size());
2285 while (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(Ops
[Idx
])) {
2286 // We found two constants, fold them together!
2287 ConstantInt
*Fold
= ConstantInt::get(getContext(),
2288 APIntOps::umax(LHSC
->getValue()->getValue(),
2289 RHSC
->getValue()->getValue()));
2290 Ops
[0] = getConstant(Fold
);
2291 Ops
.erase(Ops
.begin()+1); // Erase the folded element
2292 if (Ops
.size() == 1) return Ops
[0];
2293 LHSC
= cast
<SCEVConstant
>(Ops
[0]);
2296 // If we are left with a constant minimum-int, strip it off.
2297 if (cast
<SCEVConstant
>(Ops
[0])->getValue()->isMinValue(false)) {
2298 Ops
.erase(Ops
.begin());
2300 } else if (cast
<SCEVConstant
>(Ops
[0])->getValue()->isMaxValue(false)) {
2301 // If we have an umax with a constant maximum-int, it will always be
2306 if (Ops
.size() == 1) return Ops
[0];
2309 // Find the first UMax
2310 while (Idx
< Ops
.size() && Ops
[Idx
]->getSCEVType() < scUMaxExpr
)
2313 // Check to see if one of the operands is a UMax. If so, expand its operands
2314 // onto our operand list, and recurse to simplify.
2315 if (Idx
< Ops
.size()) {
2316 bool DeletedUMax
= false;
2317 while (const SCEVUMaxExpr
*UMax
= dyn_cast
<SCEVUMaxExpr
>(Ops
[Idx
])) {
2318 Ops
.erase(Ops
.begin()+Idx
);
2319 Ops
.append(UMax
->op_begin(), UMax
->op_end());
2324 return getUMaxExpr(Ops
);
2327 // Okay, check to see if the same value occurs in the operand list twice. If
2328 // so, delete one. Since we sorted the list, these values are required to
2330 for (unsigned i
= 0, e
= Ops
.size()-1; i
!= e
; ++i
)
2331 // X umax Y umax Y --> X umax Y
2332 // X umax Y --> X, if X is always greater than Y
2333 if (Ops
[i
] == Ops
[i
+1] ||
2334 isKnownPredicate(ICmpInst::ICMP_UGE
, Ops
[i
], Ops
[i
+1])) {
2335 Ops
.erase(Ops
.begin()+i
+1, Ops
.begin()+i
+2);
2337 } else if (isKnownPredicate(ICmpInst::ICMP_ULE
, Ops
[i
], Ops
[i
+1])) {
2338 Ops
.erase(Ops
.begin()+i
, Ops
.begin()+i
+1);
2342 if (Ops
.size() == 1) return Ops
[0];
2344 assert(!Ops
.empty() && "Reduced umax down to nothing!");
2346 // Okay, it looks like we really DO need a umax expr. Check to see if we
2347 // already have one, otherwise create a new one.
2348 FoldingSetNodeID ID
;
2349 ID
.AddInteger(scUMaxExpr
);
2350 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
)
2351 ID
.AddPointer(Ops
[i
]);
2353 if (const SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) return S
;
2354 const SCEV
**O
= SCEVAllocator
.Allocate
<const SCEV
*>(Ops
.size());
2355 std::uninitialized_copy(Ops
.begin(), Ops
.end(), O
);
2356 SCEV
*S
= new (SCEVAllocator
) SCEVUMaxExpr(ID
.Intern(SCEVAllocator
),
2358 UniqueSCEVs
.InsertNode(S
, IP
);
2362 const SCEV
*ScalarEvolution::getSMinExpr(const SCEV
*LHS
,
2364 // ~smax(~x, ~y) == smin(x, y).
2365 return getNotSCEV(getSMaxExpr(getNotSCEV(LHS
), getNotSCEV(RHS
)));
2368 const SCEV
*ScalarEvolution::getUMinExpr(const SCEV
*LHS
,
2370 // ~umax(~x, ~y) == umin(x, y)
2371 return getNotSCEV(getUMaxExpr(getNotSCEV(LHS
), getNotSCEV(RHS
)));
2374 const SCEV
*ScalarEvolution::getSizeOfExpr(const Type
*AllocTy
) {
2375 // If we have TargetData, we can bypass creating a target-independent
2376 // constant expression and then folding it back into a ConstantInt.
2377 // This is just a compile-time optimization.
2379 return getConstant(TD
->getIntPtrType(getContext()),
2380 TD
->getTypeAllocSize(AllocTy
));
2382 Constant
*C
= ConstantExpr::getSizeOf(AllocTy
);
2383 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
2384 if (Constant
*Folded
= ConstantFoldConstantExpression(CE
, TD
))
2386 const Type
*Ty
= getEffectiveSCEVType(PointerType::getUnqual(AllocTy
));
2387 return getTruncateOrZeroExtend(getSCEV(C
), Ty
);
2390 const SCEV
*ScalarEvolution::getAlignOfExpr(const Type
*AllocTy
) {
2391 Constant
*C
= ConstantExpr::getAlignOf(AllocTy
);
2392 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
2393 if (Constant
*Folded
= ConstantFoldConstantExpression(CE
, TD
))
2395 const Type
*Ty
= getEffectiveSCEVType(PointerType::getUnqual(AllocTy
));
2396 return getTruncateOrZeroExtend(getSCEV(C
), Ty
);
2399 const SCEV
*ScalarEvolution::getOffsetOfExpr(const StructType
*STy
,
2401 // If we have TargetData, we can bypass creating a target-independent
2402 // constant expression and then folding it back into a ConstantInt.
2403 // This is just a compile-time optimization.
2405 return getConstant(TD
->getIntPtrType(getContext()),
2406 TD
->getStructLayout(STy
)->getElementOffset(FieldNo
));
2408 Constant
*C
= ConstantExpr::getOffsetOf(STy
, FieldNo
);
2409 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
2410 if (Constant
*Folded
= ConstantFoldConstantExpression(CE
, TD
))
2412 const Type
*Ty
= getEffectiveSCEVType(PointerType::getUnqual(STy
));
2413 return getTruncateOrZeroExtend(getSCEV(C
), Ty
);
2416 const SCEV
*ScalarEvolution::getOffsetOfExpr(const Type
*CTy
,
2417 Constant
*FieldNo
) {
2418 Constant
*C
= ConstantExpr::getOffsetOf(CTy
, FieldNo
);
2419 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
2420 if (Constant
*Folded
= ConstantFoldConstantExpression(CE
, TD
))
2422 const Type
*Ty
= getEffectiveSCEVType(PointerType::getUnqual(CTy
));
2423 return getTruncateOrZeroExtend(getSCEV(C
), Ty
);
2426 const SCEV
*ScalarEvolution::getUnknown(Value
*V
) {
2427 // Don't attempt to do anything other than create a SCEVUnknown object
2428 // here. createSCEV only calls getUnknown after checking for all other
2429 // interesting possibilities, and any other code that calls getUnknown
2430 // is doing so in order to hide a value from SCEV canonicalization.
2432 FoldingSetNodeID ID
;
2433 ID
.AddInteger(scUnknown
);
2436 if (SCEV
*S
= UniqueSCEVs
.FindNodeOrInsertPos(ID
, IP
)) {
2437 assert(cast
<SCEVUnknown
>(S
)->getValue() == V
&&
2438 "Stale SCEVUnknown in uniquing map!");
2441 SCEV
*S
= new (SCEVAllocator
) SCEVUnknown(ID
.Intern(SCEVAllocator
), V
, this,
2443 FirstUnknown
= cast
<SCEVUnknown
>(S
);
2444 UniqueSCEVs
.InsertNode(S
, IP
);
2448 //===----------------------------------------------------------------------===//
2449 // Basic SCEV Analysis and PHI Idiom Recognition Code
2452 /// isSCEVable - Test if values of the given type are analyzable within
2453 /// the SCEV framework. This primarily includes integer types, and it
2454 /// can optionally include pointer types if the ScalarEvolution class
2455 /// has access to target-specific information.
2456 bool ScalarEvolution::isSCEVable(const Type
*Ty
) const {
2457 // Integers and pointers are always SCEVable.
2458 return Ty
->isIntegerTy() || Ty
->isPointerTy();
2461 /// getTypeSizeInBits - Return the size in bits of the specified type,
2462 /// for which isSCEVable must return true.
2463 uint64_t ScalarEvolution::getTypeSizeInBits(const Type
*Ty
) const {
2464 assert(isSCEVable(Ty
) && "Type is not SCEVable!");
2466 // If we have a TargetData, use it!
2468 return TD
->getTypeSizeInBits(Ty
);
2470 // Integer types have fixed sizes.
2471 if (Ty
->isIntegerTy())
2472 return Ty
->getPrimitiveSizeInBits();
2474 // The only other support type is pointer. Without TargetData, conservatively
2475 // assume pointers are 64-bit.
2476 assert(Ty
->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
2480 /// getEffectiveSCEVType - Return a type with the same bitwidth as
2481 /// the given type and which represents how SCEV will treat the given
2482 /// type, for which isSCEVable must return true. For pointer types,
2483 /// this is the pointer-sized integer type.
2484 const Type
*ScalarEvolution::getEffectiveSCEVType(const Type
*Ty
) const {
2485 assert(isSCEVable(Ty
) && "Type is not SCEVable!");
2487 if (Ty
->isIntegerTy())
2490 // The only other support type is pointer.
2491 assert(Ty
->isPointerTy() && "Unexpected non-pointer non-integer type!");
2492 if (TD
) return TD
->getIntPtrType(getContext());
2494 // Without TargetData, conservatively assume pointers are 64-bit.
2495 return Type::getInt64Ty(getContext());
2498 const SCEV
*ScalarEvolution::getCouldNotCompute() {
2499 return &CouldNotCompute
;
2502 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
2503 /// expression and create a new one.
2504 const SCEV
*ScalarEvolution::getSCEV(Value
*V
) {
2505 assert(isSCEVable(V
->getType()) && "Value is not SCEVable!");
2507 ValueExprMapType::const_iterator I
= ValueExprMap
.find(V
);
2508 if (I
!= ValueExprMap
.end()) return I
->second
;
2509 const SCEV
*S
= createSCEV(V
);
2511 // The process of creating a SCEV for V may have caused other SCEVs
2512 // to have been created, so it's necessary to insert the new entry
2513 // from scratch, rather than trying to remember the insert position
2515 ValueExprMap
.insert(std::make_pair(SCEVCallbackVH(V
, this), S
));
2519 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
2521 const SCEV
*ScalarEvolution::getNegativeSCEV(const SCEV
*V
) {
2522 if (const SCEVConstant
*VC
= dyn_cast
<SCEVConstant
>(V
))
2524 cast
<ConstantInt
>(ConstantExpr::getNeg(VC
->getValue())));
2526 const Type
*Ty
= V
->getType();
2527 Ty
= getEffectiveSCEVType(Ty
);
2528 return getMulExpr(V
,
2529 getConstant(cast
<ConstantInt
>(Constant::getAllOnesValue(Ty
))));
2532 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
2533 const SCEV
*ScalarEvolution::getNotSCEV(const SCEV
*V
) {
2534 if (const SCEVConstant
*VC
= dyn_cast
<SCEVConstant
>(V
))
2536 cast
<ConstantInt
>(ConstantExpr::getNot(VC
->getValue())));
2538 const Type
*Ty
= V
->getType();
2539 Ty
= getEffectiveSCEVType(Ty
);
2540 const SCEV
*AllOnes
=
2541 getConstant(cast
<ConstantInt
>(Constant::getAllOnesValue(Ty
)));
2542 return getMinusSCEV(AllOnes
, V
);
2545 /// getMinusSCEV - Return a SCEV corresponding to LHS - RHS.
2547 const SCEV
*ScalarEvolution::getMinusSCEV(const SCEV
*LHS
,
2549 // Fast path: X - X --> 0.
2551 return getConstant(LHS
->getType(), 0);
2554 return getAddExpr(LHS
, getNegativeSCEV(RHS
));
2557 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
2558 /// input value to the specified type. If the type must be extended, it is zero
2561 ScalarEvolution::getTruncateOrZeroExtend(const SCEV
*V
,
2563 const Type
*SrcTy
= V
->getType();
2564 assert((SrcTy
->isIntegerTy() || SrcTy
->isPointerTy()) &&
2565 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
2566 "Cannot truncate or zero extend with non-integer arguments!");
2567 if (getTypeSizeInBits(SrcTy
) == getTypeSizeInBits(Ty
))
2568 return V
; // No conversion
2569 if (getTypeSizeInBits(SrcTy
) > getTypeSizeInBits(Ty
))
2570 return getTruncateExpr(V
, Ty
);
2571 return getZeroExtendExpr(V
, Ty
);
2574 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
2575 /// input value to the specified type. If the type must be extended, it is sign
2578 ScalarEvolution::getTruncateOrSignExtend(const SCEV
*V
,
2580 const Type
*SrcTy
= V
->getType();
2581 assert((SrcTy
->isIntegerTy() || SrcTy
->isPointerTy()) &&
2582 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
2583 "Cannot truncate or zero extend with non-integer arguments!");
2584 if (getTypeSizeInBits(SrcTy
) == getTypeSizeInBits(Ty
))
2585 return V
; // No conversion
2586 if (getTypeSizeInBits(SrcTy
) > getTypeSizeInBits(Ty
))
2587 return getTruncateExpr(V
, Ty
);
2588 return getSignExtendExpr(V
, Ty
);
2591 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
2592 /// input value to the specified type. If the type must be extended, it is zero
2593 /// extended. The conversion must not be narrowing.
2595 ScalarEvolution::getNoopOrZeroExtend(const SCEV
*V
, const Type
*Ty
) {
2596 const Type
*SrcTy
= V
->getType();
2597 assert((SrcTy
->isIntegerTy() || SrcTy
->isPointerTy()) &&
2598 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
2599 "Cannot noop or zero extend with non-integer arguments!");
2600 assert(getTypeSizeInBits(SrcTy
) <= getTypeSizeInBits(Ty
) &&
2601 "getNoopOrZeroExtend cannot truncate!");
2602 if (getTypeSizeInBits(SrcTy
) == getTypeSizeInBits(Ty
))
2603 return V
; // No conversion
2604 return getZeroExtendExpr(V
, Ty
);
2607 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
2608 /// input value to the specified type. If the type must be extended, it is sign
2609 /// extended. The conversion must not be narrowing.
2611 ScalarEvolution::getNoopOrSignExtend(const SCEV
*V
, const Type
*Ty
) {
2612 const Type
*SrcTy
= V
->getType();
2613 assert((SrcTy
->isIntegerTy() || SrcTy
->isPointerTy()) &&
2614 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
2615 "Cannot noop or sign extend with non-integer arguments!");
2616 assert(getTypeSizeInBits(SrcTy
) <= getTypeSizeInBits(Ty
) &&
2617 "getNoopOrSignExtend cannot truncate!");
2618 if (getTypeSizeInBits(SrcTy
) == getTypeSizeInBits(Ty
))
2619 return V
; // No conversion
2620 return getSignExtendExpr(V
, Ty
);
2623 /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
2624 /// the input value to the specified type. If the type must be extended,
2625 /// it is extended with unspecified bits. The conversion must not be
2628 ScalarEvolution::getNoopOrAnyExtend(const SCEV
*V
, const Type
*Ty
) {
2629 const Type
*SrcTy
= V
->getType();
2630 assert((SrcTy
->isIntegerTy() || SrcTy
->isPointerTy()) &&
2631 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
2632 "Cannot noop or any extend with non-integer arguments!");
2633 assert(getTypeSizeInBits(SrcTy
) <= getTypeSizeInBits(Ty
) &&
2634 "getNoopOrAnyExtend cannot truncate!");
2635 if (getTypeSizeInBits(SrcTy
) == getTypeSizeInBits(Ty
))
2636 return V
; // No conversion
2637 return getAnyExtendExpr(V
, Ty
);
2640 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
2641 /// input value to the specified type. The conversion must not be widening.
2643 ScalarEvolution::getTruncateOrNoop(const SCEV
*V
, const Type
*Ty
) {
2644 const Type
*SrcTy
= V
->getType();
2645 assert((SrcTy
->isIntegerTy() || SrcTy
->isPointerTy()) &&
2646 (Ty
->isIntegerTy() || Ty
->isPointerTy()) &&
2647 "Cannot truncate or noop with non-integer arguments!");
2648 assert(getTypeSizeInBits(SrcTy
) >= getTypeSizeInBits(Ty
) &&
2649 "getTruncateOrNoop cannot extend!");
2650 if (getTypeSizeInBits(SrcTy
) == getTypeSizeInBits(Ty
))
2651 return V
; // No conversion
2652 return getTruncateExpr(V
, Ty
);
2655 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
2656 /// the types using zero-extension, and then perform a umax operation
2658 const SCEV
*ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV
*LHS
,
2660 const SCEV
*PromotedLHS
= LHS
;
2661 const SCEV
*PromotedRHS
= RHS
;
2663 if (getTypeSizeInBits(LHS
->getType()) > getTypeSizeInBits(RHS
->getType()))
2664 PromotedRHS
= getZeroExtendExpr(RHS
, LHS
->getType());
2666 PromotedLHS
= getNoopOrZeroExtend(LHS
, RHS
->getType());
2668 return getUMaxExpr(PromotedLHS
, PromotedRHS
);
2671 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
2672 /// the types using zero-extension, and then perform a umin operation
2674 const SCEV
*ScalarEvolution::getUMinFromMismatchedTypes(const SCEV
*LHS
,
2676 const SCEV
*PromotedLHS
= LHS
;
2677 const SCEV
*PromotedRHS
= RHS
;
2679 if (getTypeSizeInBits(LHS
->getType()) > getTypeSizeInBits(RHS
->getType()))
2680 PromotedRHS
= getZeroExtendExpr(RHS
, LHS
->getType());
2682 PromotedLHS
= getNoopOrZeroExtend(LHS
, RHS
->getType());
2684 return getUMinExpr(PromotedLHS
, PromotedRHS
);
2687 /// PushDefUseChildren - Push users of the given Instruction
2688 /// onto the given Worklist.
2690 PushDefUseChildren(Instruction
*I
,
2691 SmallVectorImpl
<Instruction
*> &Worklist
) {
2692 // Push the def-use children onto the Worklist stack.
2693 for (Value::use_iterator UI
= I
->use_begin(), UE
= I
->use_end();
2695 Worklist
.push_back(cast
<Instruction
>(*UI
));
2698 /// ForgetSymbolicValue - This looks up computed SCEV values for all
2699 /// instructions that depend on the given instruction and removes them from
2700 /// the ValueExprMapType map if they reference SymName. This is used during PHI
2703 ScalarEvolution::ForgetSymbolicName(Instruction
*PN
, const SCEV
*SymName
) {
2704 SmallVector
<Instruction
*, 16> Worklist
;
2705 PushDefUseChildren(PN
, Worklist
);
2707 SmallPtrSet
<Instruction
*, 8> Visited
;
2709 while (!Worklist
.empty()) {
2710 Instruction
*I
= Worklist
.pop_back_val();
2711 if (!Visited
.insert(I
)) continue;
2713 ValueExprMapType::iterator It
=
2714 ValueExprMap
.find(static_cast<Value
*>(I
));
2715 if (It
!= ValueExprMap
.end()) {
2716 // Short-circuit the def-use traversal if the symbolic name
2717 // ceases to appear in expressions.
2718 if (It
->second
!= SymName
&& !It
->second
->hasOperand(SymName
))
2721 // SCEVUnknown for a PHI either means that it has an unrecognized
2722 // structure, it's a PHI that's in the progress of being computed
2723 // by createNodeForPHI, or it's a single-value PHI. In the first case,
2724 // additional loop trip count information isn't going to change anything.
2725 // In the second case, createNodeForPHI will perform the necessary
2726 // updates on its own when it gets to that point. In the third, we do
2727 // want to forget the SCEVUnknown.
2728 if (!isa
<PHINode
>(I
) ||
2729 !isa
<SCEVUnknown
>(It
->second
) ||
2730 (I
!= PN
&& It
->second
== SymName
)) {
2731 ValuesAtScopes
.erase(It
->second
);
2732 ValueExprMap
.erase(It
);
2736 PushDefUseChildren(I
, Worklist
);
2740 /// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
2741 /// a loop header, making it a potential recurrence, or it doesn't.
2743 const SCEV
*ScalarEvolution::createNodeForPHI(PHINode
*PN
) {
2744 if (const Loop
*L
= LI
->getLoopFor(PN
->getParent()))
2745 if (L
->getHeader() == PN
->getParent()) {
2746 // The loop may have multiple entrances or multiple exits; we can analyze
2747 // this phi as an addrec if it has a unique entry value and a unique
2749 Value
*BEValueV
= 0, *StartValueV
= 0;
2750 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
) {
2751 Value
*V
= PN
->getIncomingValue(i
);
2752 if (L
->contains(PN
->getIncomingBlock(i
))) {
2755 } else if (BEValueV
!= V
) {
2759 } else if (!StartValueV
) {
2761 } else if (StartValueV
!= V
) {
2766 if (BEValueV
&& StartValueV
) {
2767 // While we are analyzing this PHI node, handle its value symbolically.
2768 const SCEV
*SymbolicName
= getUnknown(PN
);
2769 assert(ValueExprMap
.find(PN
) == ValueExprMap
.end() &&
2770 "PHI node already processed?");
2771 ValueExprMap
.insert(std::make_pair(SCEVCallbackVH(PN
, this), SymbolicName
));
2773 // Using this symbolic name for the PHI, analyze the value coming around
2775 const SCEV
*BEValue
= getSCEV(BEValueV
);
2777 // NOTE: If BEValue is loop invariant, we know that the PHI node just
2778 // has a special value for the first iteration of the loop.
2780 // If the value coming around the backedge is an add with the symbolic
2781 // value we just inserted, then we found a simple induction variable!
2782 if (const SCEVAddExpr
*Add
= dyn_cast
<SCEVAddExpr
>(BEValue
)) {
2783 // If there is a single occurrence of the symbolic value, replace it
2784 // with a recurrence.
2785 unsigned FoundIndex
= Add
->getNumOperands();
2786 for (unsigned i
= 0, e
= Add
->getNumOperands(); i
!= e
; ++i
)
2787 if (Add
->getOperand(i
) == SymbolicName
)
2788 if (FoundIndex
== e
) {
2793 if (FoundIndex
!= Add
->getNumOperands()) {
2794 // Create an add with everything but the specified operand.
2795 SmallVector
<const SCEV
*, 8> Ops
;
2796 for (unsigned i
= 0, e
= Add
->getNumOperands(); i
!= e
; ++i
)
2797 if (i
!= FoundIndex
)
2798 Ops
.push_back(Add
->getOperand(i
));
2799 const SCEV
*Accum
= getAddExpr(Ops
);
2801 // This is not a valid addrec if the step amount is varying each
2802 // loop iteration, but is not itself an addrec in this loop.
2803 if (Accum
->isLoopInvariant(L
) ||
2804 (isa
<SCEVAddRecExpr
>(Accum
) &&
2805 cast
<SCEVAddRecExpr
>(Accum
)->getLoop() == L
)) {
2806 bool HasNUW
= false;
2807 bool HasNSW
= false;
2809 // If the increment doesn't overflow, then neither the addrec nor
2810 // the post-increment will overflow.
2811 if (const AddOperator
*OBO
= dyn_cast
<AddOperator
>(BEValueV
)) {
2812 if (OBO
->hasNoUnsignedWrap())
2814 if (OBO
->hasNoSignedWrap())
2818 const SCEV
*StartVal
= getSCEV(StartValueV
);
2819 const SCEV
*PHISCEV
=
2820 getAddRecExpr(StartVal
, Accum
, L
, HasNUW
, HasNSW
);
2822 // Since the no-wrap flags are on the increment, they apply to the
2823 // post-incremented value as well.
2824 if (Accum
->isLoopInvariant(L
))
2825 (void)getAddRecExpr(getAddExpr(StartVal
, Accum
),
2826 Accum
, L
, HasNUW
, HasNSW
);
2828 // Okay, for the entire analysis of this edge we assumed the PHI
2829 // to be symbolic. We now need to go back and purge all of the
2830 // entries for the scalars that use the symbolic expression.
2831 ForgetSymbolicName(PN
, SymbolicName
);
2832 ValueExprMap
[SCEVCallbackVH(PN
, this)] = PHISCEV
;
2836 } else if (const SCEVAddRecExpr
*AddRec
=
2837 dyn_cast
<SCEVAddRecExpr
>(BEValue
)) {
2838 // Otherwise, this could be a loop like this:
2839 // i = 0; for (j = 1; ..; ++j) { .... i = j; }
2840 // In this case, j = {1,+,1} and BEValue is j.
2841 // Because the other in-value of i (0) fits the evolution of BEValue
2842 // i really is an addrec evolution.
2843 if (AddRec
->getLoop() == L
&& AddRec
->isAffine()) {
2844 const SCEV
*StartVal
= getSCEV(StartValueV
);
2846 // If StartVal = j.start - j.stride, we can use StartVal as the
2847 // initial step of the addrec evolution.
2848 if (StartVal
== getMinusSCEV(AddRec
->getOperand(0),
2849 AddRec
->getOperand(1))) {
2850 const SCEV
*PHISCEV
=
2851 getAddRecExpr(StartVal
, AddRec
->getOperand(1), L
);
2853 // Okay, for the entire analysis of this edge we assumed the PHI
2854 // to be symbolic. We now need to go back and purge all of the
2855 // entries for the scalars that use the symbolic expression.
2856 ForgetSymbolicName(PN
, SymbolicName
);
2857 ValueExprMap
[SCEVCallbackVH(PN
, this)] = PHISCEV
;
2865 // If the PHI has a single incoming value, follow that value, unless the
2866 // PHI's incoming blocks are in a different loop, in which case doing so
2867 // risks breaking LCSSA form. Instcombine would normally zap these, but
2868 // it doesn't have DominatorTree information, so it may miss cases.
2869 if (Value
*V
= PN
->hasConstantValue(DT
)) {
2870 bool AllSameLoop
= true;
2871 Loop
*PNLoop
= LI
->getLoopFor(PN
->getParent());
2872 for (size_t i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
2873 if (LI
->getLoopFor(PN
->getIncomingBlock(i
)) != PNLoop
) {
2874 AllSameLoop
= false;
2881 // If it's not a loop phi, we can't handle it yet.
2882 return getUnknown(PN
);
2885 /// createNodeForGEP - Expand GEP instructions into add and multiply
2886 /// operations. This allows them to be analyzed by regular SCEV code.
2888 const SCEV
*ScalarEvolution::createNodeForGEP(GEPOperator
*GEP
) {
2890 // Don't blindly transfer the inbounds flag from the GEP instruction to the
2891 // Add expression, because the Instruction may be guarded by control flow
2892 // and the no-overflow bits may not be valid for the expression in any
2895 const Type
*IntPtrTy
= getEffectiveSCEVType(GEP
->getType());
2896 Value
*Base
= GEP
->getOperand(0);
2897 // Don't attempt to analyze GEPs over unsized objects.
2898 if (!cast
<PointerType
>(Base
->getType())->getElementType()->isSized())
2899 return getUnknown(GEP
);
2900 const SCEV
*TotalOffset
= getConstant(IntPtrTy
, 0);
2901 gep_type_iterator GTI
= gep_type_begin(GEP
);
2902 for (GetElementPtrInst::op_iterator I
= llvm::next(GEP
->op_begin()),
2906 // Compute the (potentially symbolic) offset in bytes for this index.
2907 if (const StructType
*STy
= dyn_cast
<StructType
>(*GTI
++)) {
2908 // For a struct, add the member offset.
2909 unsigned FieldNo
= cast
<ConstantInt
>(Index
)->getZExtValue();
2910 const SCEV
*FieldOffset
= getOffsetOfExpr(STy
, FieldNo
);
2912 // Add the field offset to the running total offset.
2913 TotalOffset
= getAddExpr(TotalOffset
, FieldOffset
);
2915 // For an array, add the element offset, explicitly scaled.
2916 const SCEV
*ElementSize
= getSizeOfExpr(*GTI
);
2917 const SCEV
*IndexS
= getSCEV(Index
);
2918 // Getelementptr indices are signed.
2919 IndexS
= getTruncateOrSignExtend(IndexS
, IntPtrTy
);
2921 // Multiply the index by the element size to compute the element offset.
2922 const SCEV
*LocalOffset
= getMulExpr(IndexS
, ElementSize
);
2924 // Add the element offset to the running total offset.
2925 TotalOffset
= getAddExpr(TotalOffset
, LocalOffset
);
2929 // Get the SCEV for the GEP base.
2930 const SCEV
*BaseS
= getSCEV(Base
);
2932 // Add the total offset from all the GEP indices to the base.
2933 return getAddExpr(BaseS
, TotalOffset
);
2936 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
2937 /// guaranteed to end in (at every loop iteration). It is, at the same time,
2938 /// the minimum number of times S is divisible by 2. For example, given {4,+,8}
2939 /// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
2941 ScalarEvolution::GetMinTrailingZeros(const SCEV
*S
) {
2942 if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(S
))
2943 return C
->getValue()->getValue().countTrailingZeros();
2945 if (const SCEVTruncateExpr
*T
= dyn_cast
<SCEVTruncateExpr
>(S
))
2946 return std::min(GetMinTrailingZeros(T
->getOperand()),
2947 (uint32_t)getTypeSizeInBits(T
->getType()));
2949 if (const SCEVZeroExtendExpr
*E
= dyn_cast
<SCEVZeroExtendExpr
>(S
)) {
2950 uint32_t OpRes
= GetMinTrailingZeros(E
->getOperand());
2951 return OpRes
== getTypeSizeInBits(E
->getOperand()->getType()) ?
2952 getTypeSizeInBits(E
->getType()) : OpRes
;
2955 if (const SCEVSignExtendExpr
*E
= dyn_cast
<SCEVSignExtendExpr
>(S
)) {
2956 uint32_t OpRes
= GetMinTrailingZeros(E
->getOperand());
2957 return OpRes
== getTypeSizeInBits(E
->getOperand()->getType()) ?
2958 getTypeSizeInBits(E
->getType()) : OpRes
;
2961 if (const SCEVAddExpr
*A
= dyn_cast
<SCEVAddExpr
>(S
)) {
2962 // The result is the min of all operands results.
2963 uint32_t MinOpRes
= GetMinTrailingZeros(A
->getOperand(0));
2964 for (unsigned i
= 1, e
= A
->getNumOperands(); MinOpRes
&& i
!= e
; ++i
)
2965 MinOpRes
= std::min(MinOpRes
, GetMinTrailingZeros(A
->getOperand(i
)));
2969 if (const SCEVMulExpr
*M
= dyn_cast
<SCEVMulExpr
>(S
)) {
2970 // The result is the sum of all operands results.
2971 uint32_t SumOpRes
= GetMinTrailingZeros(M
->getOperand(0));
2972 uint32_t BitWidth
= getTypeSizeInBits(M
->getType());
2973 for (unsigned i
= 1, e
= M
->getNumOperands();
2974 SumOpRes
!= BitWidth
&& i
!= e
; ++i
)
2975 SumOpRes
= std::min(SumOpRes
+ GetMinTrailingZeros(M
->getOperand(i
)),
2980 if (const SCEVAddRecExpr
*A
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
2981 // The result is the min of all operands results.
2982 uint32_t MinOpRes
= GetMinTrailingZeros(A
->getOperand(0));
2983 for (unsigned i
= 1, e
= A
->getNumOperands(); MinOpRes
&& i
!= e
; ++i
)
2984 MinOpRes
= std::min(MinOpRes
, GetMinTrailingZeros(A
->getOperand(i
)));
2988 if (const SCEVSMaxExpr
*M
= dyn_cast
<SCEVSMaxExpr
>(S
)) {
2989 // The result is the min of all operands results.
2990 uint32_t MinOpRes
= GetMinTrailingZeros(M
->getOperand(0));
2991 for (unsigned i
= 1, e
= M
->getNumOperands(); MinOpRes
&& i
!= e
; ++i
)
2992 MinOpRes
= std::min(MinOpRes
, GetMinTrailingZeros(M
->getOperand(i
)));
2996 if (const SCEVUMaxExpr
*M
= dyn_cast
<SCEVUMaxExpr
>(S
)) {
2997 // The result is the min of all operands results.
2998 uint32_t MinOpRes
= GetMinTrailingZeros(M
->getOperand(0));
2999 for (unsigned i
= 1, e
= M
->getNumOperands(); MinOpRes
&& i
!= e
; ++i
)
3000 MinOpRes
= std::min(MinOpRes
, GetMinTrailingZeros(M
->getOperand(i
)));
3004 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(S
)) {
3005 // For a SCEVUnknown, ask ValueTracking.
3006 unsigned BitWidth
= getTypeSizeInBits(U
->getType());
3007 APInt Mask
= APInt::getAllOnesValue(BitWidth
);
3008 APInt
Zeros(BitWidth
, 0), Ones(BitWidth
, 0);
3009 ComputeMaskedBits(U
->getValue(), Mask
, Zeros
, Ones
);
3010 return Zeros
.countTrailingOnes();
3017 /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
3020 ScalarEvolution::getUnsignedRange(const SCEV
*S
) {
3022 if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(S
))
3023 return ConstantRange(C
->getValue()->getValue());
3025 unsigned BitWidth
= getTypeSizeInBits(S
->getType());
3026 ConstantRange
ConservativeResult(BitWidth
, /*isFullSet=*/true);
3028 // If the value has known zeros, the maximum unsigned value will have those
3029 // known zeros as well.
3030 uint32_t TZ
= GetMinTrailingZeros(S
);
3032 ConservativeResult
=
3033 ConstantRange(APInt::getMinValue(BitWidth
),
3034 APInt::getMaxValue(BitWidth
).lshr(TZ
).shl(TZ
) + 1);
3036 if (const SCEVAddExpr
*Add
= dyn_cast
<SCEVAddExpr
>(S
)) {
3037 ConstantRange X
= getUnsignedRange(Add
->getOperand(0));
3038 for (unsigned i
= 1, e
= Add
->getNumOperands(); i
!= e
; ++i
)
3039 X
= X
.add(getUnsignedRange(Add
->getOperand(i
)));
3040 return ConservativeResult
.intersectWith(X
);
3043 if (const SCEVMulExpr
*Mul
= dyn_cast
<SCEVMulExpr
>(S
)) {
3044 ConstantRange X
= getUnsignedRange(Mul
->getOperand(0));
3045 for (unsigned i
= 1, e
= Mul
->getNumOperands(); i
!= e
; ++i
)
3046 X
= X
.multiply(getUnsignedRange(Mul
->getOperand(i
)));
3047 return ConservativeResult
.intersectWith(X
);
3050 if (const SCEVSMaxExpr
*SMax
= dyn_cast
<SCEVSMaxExpr
>(S
)) {
3051 ConstantRange X
= getUnsignedRange(SMax
->getOperand(0));
3052 for (unsigned i
= 1, e
= SMax
->getNumOperands(); i
!= e
; ++i
)
3053 X
= X
.smax(getUnsignedRange(SMax
->getOperand(i
)));
3054 return ConservativeResult
.intersectWith(X
);
3057 if (const SCEVUMaxExpr
*UMax
= dyn_cast
<SCEVUMaxExpr
>(S
)) {
3058 ConstantRange X
= getUnsignedRange(UMax
->getOperand(0));
3059 for (unsigned i
= 1, e
= UMax
->getNumOperands(); i
!= e
; ++i
)
3060 X
= X
.umax(getUnsignedRange(UMax
->getOperand(i
)));
3061 return ConservativeResult
.intersectWith(X
);
3064 if (const SCEVUDivExpr
*UDiv
= dyn_cast
<SCEVUDivExpr
>(S
)) {
3065 ConstantRange X
= getUnsignedRange(UDiv
->getLHS());
3066 ConstantRange Y
= getUnsignedRange(UDiv
->getRHS());
3067 return ConservativeResult
.intersectWith(X
.udiv(Y
));
3070 if (const SCEVZeroExtendExpr
*ZExt
= dyn_cast
<SCEVZeroExtendExpr
>(S
)) {
3071 ConstantRange X
= getUnsignedRange(ZExt
->getOperand());
3072 return ConservativeResult
.intersectWith(X
.zeroExtend(BitWidth
));
3075 if (const SCEVSignExtendExpr
*SExt
= dyn_cast
<SCEVSignExtendExpr
>(S
)) {
3076 ConstantRange X
= getUnsignedRange(SExt
->getOperand());
3077 return ConservativeResult
.intersectWith(X
.signExtend(BitWidth
));
3080 if (const SCEVTruncateExpr
*Trunc
= dyn_cast
<SCEVTruncateExpr
>(S
)) {
3081 ConstantRange X
= getUnsignedRange(Trunc
->getOperand());
3082 return ConservativeResult
.intersectWith(X
.truncate(BitWidth
));
3085 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
3086 // If there's no unsigned wrap, the value will never be less than its
3088 if (AddRec
->hasNoUnsignedWrap())
3089 if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(AddRec
->getStart()))
3090 if (!C
->getValue()->isZero())
3091 ConservativeResult
=
3092 ConservativeResult
.intersectWith(
3093 ConstantRange(C
->getValue()->getValue(), APInt(BitWidth
, 0)));
3095 // TODO: non-affine addrec
3096 if (AddRec
->isAffine()) {
3097 const Type
*Ty
= AddRec
->getType();
3098 const SCEV
*MaxBECount
= getMaxBackedgeTakenCount(AddRec
->getLoop());
3099 if (!isa
<SCEVCouldNotCompute
>(MaxBECount
) &&
3100 getTypeSizeInBits(MaxBECount
->getType()) <= BitWidth
) {
3101 MaxBECount
= getNoopOrZeroExtend(MaxBECount
, Ty
);
3103 const SCEV
*Start
= AddRec
->getStart();
3104 const SCEV
*Step
= AddRec
->getStepRecurrence(*this);
3106 ConstantRange StartRange
= getUnsignedRange(Start
);
3107 ConstantRange StepRange
= getSignedRange(Step
);
3108 ConstantRange MaxBECountRange
= getUnsignedRange(MaxBECount
);
3109 ConstantRange EndRange
=
3110 StartRange
.add(MaxBECountRange
.multiply(StepRange
));
3112 // Check for overflow. This must be done with ConstantRange arithmetic
3113 // because we could be called from within the ScalarEvolution overflow
3115 ConstantRange ExtStartRange
= StartRange
.zextOrTrunc(BitWidth
*2+1);
3116 ConstantRange ExtStepRange
= StepRange
.sextOrTrunc(BitWidth
*2+1);
3117 ConstantRange ExtMaxBECountRange
=
3118 MaxBECountRange
.zextOrTrunc(BitWidth
*2+1);
3119 ConstantRange ExtEndRange
= EndRange
.zextOrTrunc(BitWidth
*2+1);
3120 if (ExtStartRange
.add(ExtMaxBECountRange
.multiply(ExtStepRange
)) !=
3122 return ConservativeResult
;
3124 APInt Min
= APIntOps::umin(StartRange
.getUnsignedMin(),
3125 EndRange
.getUnsignedMin());
3126 APInt Max
= APIntOps::umax(StartRange
.getUnsignedMax(),
3127 EndRange
.getUnsignedMax());
3128 if (Min
.isMinValue() && Max
.isMaxValue())
3129 return ConservativeResult
;
3130 return ConservativeResult
.intersectWith(ConstantRange(Min
, Max
+1));
3134 return ConservativeResult
;
3137 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(S
)) {
3138 // For a SCEVUnknown, ask ValueTracking.
3139 APInt Mask
= APInt::getAllOnesValue(BitWidth
);
3140 APInt
Zeros(BitWidth
, 0), Ones(BitWidth
, 0);
3141 ComputeMaskedBits(U
->getValue(), Mask
, Zeros
, Ones
, TD
);
3142 if (Ones
== ~Zeros
+ 1)
3143 return ConservativeResult
;
3144 return ConservativeResult
.intersectWith(ConstantRange(Ones
, ~Zeros
+ 1));
3147 return ConservativeResult
;
3150 /// getSignedRange - Determine the signed range for a particular SCEV.
3153 ScalarEvolution::getSignedRange(const SCEV
*S
) {
3155 if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(S
))
3156 return ConstantRange(C
->getValue()->getValue());
3158 unsigned BitWidth
= getTypeSizeInBits(S
->getType());
3159 ConstantRange
ConservativeResult(BitWidth
, /*isFullSet=*/true);
3161 // If the value has known zeros, the maximum signed value will have those
3162 // known zeros as well.
3163 uint32_t TZ
= GetMinTrailingZeros(S
);
3165 ConservativeResult
=
3166 ConstantRange(APInt::getSignedMinValue(BitWidth
),
3167 APInt::getSignedMaxValue(BitWidth
).ashr(TZ
).shl(TZ
) + 1);
3169 if (const SCEVAddExpr
*Add
= dyn_cast
<SCEVAddExpr
>(S
)) {
3170 ConstantRange X
= getSignedRange(Add
->getOperand(0));
3171 for (unsigned i
= 1, e
= Add
->getNumOperands(); i
!= e
; ++i
)
3172 X
= X
.add(getSignedRange(Add
->getOperand(i
)));
3173 return ConservativeResult
.intersectWith(X
);
3176 if (const SCEVMulExpr
*Mul
= dyn_cast
<SCEVMulExpr
>(S
)) {
3177 ConstantRange X
= getSignedRange(Mul
->getOperand(0));
3178 for (unsigned i
= 1, e
= Mul
->getNumOperands(); i
!= e
; ++i
)
3179 X
= X
.multiply(getSignedRange(Mul
->getOperand(i
)));
3180 return ConservativeResult
.intersectWith(X
);
3183 if (const SCEVSMaxExpr
*SMax
= dyn_cast
<SCEVSMaxExpr
>(S
)) {
3184 ConstantRange X
= getSignedRange(SMax
->getOperand(0));
3185 for (unsigned i
= 1, e
= SMax
->getNumOperands(); i
!= e
; ++i
)
3186 X
= X
.smax(getSignedRange(SMax
->getOperand(i
)));
3187 return ConservativeResult
.intersectWith(X
);
3190 if (const SCEVUMaxExpr
*UMax
= dyn_cast
<SCEVUMaxExpr
>(S
)) {
3191 ConstantRange X
= getSignedRange(UMax
->getOperand(0));
3192 for (unsigned i
= 1, e
= UMax
->getNumOperands(); i
!= e
; ++i
)
3193 X
= X
.umax(getSignedRange(UMax
->getOperand(i
)));
3194 return ConservativeResult
.intersectWith(X
);
3197 if (const SCEVUDivExpr
*UDiv
= dyn_cast
<SCEVUDivExpr
>(S
)) {
3198 ConstantRange X
= getSignedRange(UDiv
->getLHS());
3199 ConstantRange Y
= getSignedRange(UDiv
->getRHS());
3200 return ConservativeResult
.intersectWith(X
.udiv(Y
));
3203 if (const SCEVZeroExtendExpr
*ZExt
= dyn_cast
<SCEVZeroExtendExpr
>(S
)) {
3204 ConstantRange X
= getSignedRange(ZExt
->getOperand());
3205 return ConservativeResult
.intersectWith(X
.zeroExtend(BitWidth
));
3208 if (const SCEVSignExtendExpr
*SExt
= dyn_cast
<SCEVSignExtendExpr
>(S
)) {
3209 ConstantRange X
= getSignedRange(SExt
->getOperand());
3210 return ConservativeResult
.intersectWith(X
.signExtend(BitWidth
));
3213 if (const SCEVTruncateExpr
*Trunc
= dyn_cast
<SCEVTruncateExpr
>(S
)) {
3214 ConstantRange X
= getSignedRange(Trunc
->getOperand());
3215 return ConservativeResult
.intersectWith(X
.truncate(BitWidth
));
3218 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
3219 // If there's no signed wrap, and all the operands have the same sign or
3220 // zero, the value won't ever change sign.
3221 if (AddRec
->hasNoSignedWrap()) {
3222 bool AllNonNeg
= true;
3223 bool AllNonPos
= true;
3224 for (unsigned i
= 0, e
= AddRec
->getNumOperands(); i
!= e
; ++i
) {
3225 if (!isKnownNonNegative(AddRec
->getOperand(i
))) AllNonNeg
= false;
3226 if (!isKnownNonPositive(AddRec
->getOperand(i
))) AllNonPos
= false;
3229 ConservativeResult
= ConservativeResult
.intersectWith(
3230 ConstantRange(APInt(BitWidth
, 0),
3231 APInt::getSignedMinValue(BitWidth
)));
3233 ConservativeResult
= ConservativeResult
.intersectWith(
3234 ConstantRange(APInt::getSignedMinValue(BitWidth
),
3235 APInt(BitWidth
, 1)));
3238 // TODO: non-affine addrec
3239 if (AddRec
->isAffine()) {
3240 const Type
*Ty
= AddRec
->getType();
3241 const SCEV
*MaxBECount
= getMaxBackedgeTakenCount(AddRec
->getLoop());
3242 if (!isa
<SCEVCouldNotCompute
>(MaxBECount
) &&
3243 getTypeSizeInBits(MaxBECount
->getType()) <= BitWidth
) {
3244 MaxBECount
= getNoopOrZeroExtend(MaxBECount
, Ty
);
3246 const SCEV
*Start
= AddRec
->getStart();
3247 const SCEV
*Step
= AddRec
->getStepRecurrence(*this);
3249 ConstantRange StartRange
= getSignedRange(Start
);
3250 ConstantRange StepRange
= getSignedRange(Step
);
3251 ConstantRange MaxBECountRange
= getUnsignedRange(MaxBECount
);
3252 ConstantRange EndRange
=
3253 StartRange
.add(MaxBECountRange
.multiply(StepRange
));
3255 // Check for overflow. This must be done with ConstantRange arithmetic
3256 // because we could be called from within the ScalarEvolution overflow
3258 ConstantRange ExtStartRange
= StartRange
.sextOrTrunc(BitWidth
*2+1);
3259 ConstantRange ExtStepRange
= StepRange
.sextOrTrunc(BitWidth
*2+1);
3260 ConstantRange ExtMaxBECountRange
=
3261 MaxBECountRange
.zextOrTrunc(BitWidth
*2+1);
3262 ConstantRange ExtEndRange
= EndRange
.sextOrTrunc(BitWidth
*2+1);
3263 if (ExtStartRange
.add(ExtMaxBECountRange
.multiply(ExtStepRange
)) !=
3265 return ConservativeResult
;
3267 APInt Min
= APIntOps::smin(StartRange
.getSignedMin(),
3268 EndRange
.getSignedMin());
3269 APInt Max
= APIntOps::smax(StartRange
.getSignedMax(),
3270 EndRange
.getSignedMax());
3271 if (Min
.isMinSignedValue() && Max
.isMaxSignedValue())
3272 return ConservativeResult
;
3273 return ConservativeResult
.intersectWith(ConstantRange(Min
, Max
+1));
3277 return ConservativeResult
;
3280 if (const SCEVUnknown
*U
= dyn_cast
<SCEVUnknown
>(S
)) {
3281 // For a SCEVUnknown, ask ValueTracking.
3282 if (!U
->getValue()->getType()->isIntegerTy() && !TD
)
3283 return ConservativeResult
;
3284 unsigned NS
= ComputeNumSignBits(U
->getValue(), TD
);
3286 return ConservativeResult
;
3287 return ConservativeResult
.intersectWith(
3288 ConstantRange(APInt::getSignedMinValue(BitWidth
).ashr(NS
- 1),
3289 APInt::getSignedMaxValue(BitWidth
).ashr(NS
- 1)+1));
3292 return ConservativeResult
;
3295 /// createSCEV - We know that there is no SCEV for the specified value.
3296 /// Analyze the expression.
3298 const SCEV
*ScalarEvolution::createSCEV(Value
*V
) {
3299 if (!isSCEVable(V
->getType()))
3300 return getUnknown(V
);
3302 unsigned Opcode
= Instruction::UserOp1
;
3303 if (Instruction
*I
= dyn_cast
<Instruction
>(V
)) {
3304 Opcode
= I
->getOpcode();
3306 // Don't attempt to analyze instructions in blocks that aren't
3307 // reachable. Such instructions don't matter, and they aren't required
3308 // to obey basic rules for definitions dominating uses which this
3309 // analysis depends on.
3310 if (!DT
->isReachableFromEntry(I
->getParent()))
3311 return getUnknown(V
);
3312 } else if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(V
))
3313 Opcode
= CE
->getOpcode();
3314 else if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(V
))
3315 return getConstant(CI
);
3316 else if (isa
<ConstantPointerNull
>(V
))
3317 return getConstant(V
->getType(), 0);
3318 else if (GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(V
))
3319 return GA
->mayBeOverridden() ? getUnknown(V
) : getSCEV(GA
->getAliasee());
3321 return getUnknown(V
);
3323 Operator
*U
= cast
<Operator
>(V
);
3325 case Instruction::Add
: {
3326 // The simple thing to do would be to just call getSCEV on both operands
3327 // and call getAddExpr with the result. However if we're looking at a
3328 // bunch of things all added together, this can be quite inefficient,
3329 // because it leads to N-1 getAddExpr calls for N ultimate operands.
3330 // Instead, gather up all the operands and make a single getAddExpr call.
3331 // LLVM IR canonical form means we need only traverse the left operands.
3332 SmallVector
<const SCEV
*, 4> AddOps
;
3333 AddOps
.push_back(getSCEV(U
->getOperand(1)));
3334 for (Value
*Op
= U
->getOperand(0); ; Op
= U
->getOperand(0)) {
3335 unsigned Opcode
= Op
->getValueID() - Value::InstructionVal
;
3336 if (Opcode
!= Instruction::Add
&& Opcode
!= Instruction::Sub
)
3338 U
= cast
<Operator
>(Op
);
3339 const SCEV
*Op1
= getSCEV(U
->getOperand(1));
3340 if (Opcode
== Instruction::Sub
)
3341 AddOps
.push_back(getNegativeSCEV(Op1
));
3343 AddOps
.push_back(Op1
);
3345 AddOps
.push_back(getSCEV(U
->getOperand(0)));
3346 return getAddExpr(AddOps
);
3348 case Instruction::Mul
: {
3349 // See the Add code above.
3350 SmallVector
<const SCEV
*, 4> MulOps
;
3351 MulOps
.push_back(getSCEV(U
->getOperand(1)));
3352 for (Value
*Op
= U
->getOperand(0);
3353 Op
->getValueID() == Instruction::Mul
+ Value::InstructionVal
;
3354 Op
= U
->getOperand(0)) {
3355 U
= cast
<Operator
>(Op
);
3356 MulOps
.push_back(getSCEV(U
->getOperand(1)));
3358 MulOps
.push_back(getSCEV(U
->getOperand(0)));
3359 return getMulExpr(MulOps
);
3361 case Instruction::UDiv
:
3362 return getUDivExpr(getSCEV(U
->getOperand(0)),
3363 getSCEV(U
->getOperand(1)));
3364 case Instruction::Sub
:
3365 return getMinusSCEV(getSCEV(U
->getOperand(0)),
3366 getSCEV(U
->getOperand(1)));
3367 case Instruction::And
:
3368 // For an expression like x&255 that merely masks off the high bits,
3369 // use zext(trunc(x)) as the SCEV expression.
3370 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
3371 if (CI
->isNullValue())
3372 return getSCEV(U
->getOperand(1));
3373 if (CI
->isAllOnesValue())
3374 return getSCEV(U
->getOperand(0));
3375 const APInt
&A
= CI
->getValue();
3377 // Instcombine's ShrinkDemandedConstant may strip bits out of
3378 // constants, obscuring what would otherwise be a low-bits mask.
3379 // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
3380 // knew about to reconstruct a low-bits mask value.
3381 unsigned LZ
= A
.countLeadingZeros();
3382 unsigned BitWidth
= A
.getBitWidth();
3383 APInt AllOnes
= APInt::getAllOnesValue(BitWidth
);
3384 APInt
KnownZero(BitWidth
, 0), KnownOne(BitWidth
, 0);
3385 ComputeMaskedBits(U
->getOperand(0), AllOnes
, KnownZero
, KnownOne
, TD
);
3387 APInt EffectiveMask
= APInt::getLowBitsSet(BitWidth
, BitWidth
- LZ
);
3389 if (LZ
!= 0 && !((~A
& ~KnownZero
) & EffectiveMask
))
3391 getZeroExtendExpr(getTruncateExpr(getSCEV(U
->getOperand(0)),
3392 IntegerType::get(getContext(), BitWidth
- LZ
)),
3397 case Instruction::Or
:
3398 // If the RHS of the Or is a constant, we may have something like:
3399 // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
3400 // optimizations will transparently handle this case.
3402 // In order for this transformation to be safe, the LHS must be of the
3403 // form X*(2^n) and the Or constant must be less than 2^n.
3404 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
3405 const SCEV
*LHS
= getSCEV(U
->getOperand(0));
3406 const APInt
&CIVal
= CI
->getValue();
3407 if (GetMinTrailingZeros(LHS
) >=
3408 (CIVal
.getBitWidth() - CIVal
.countLeadingZeros())) {
3409 // Build a plain add SCEV.
3410 const SCEV
*S
= getAddExpr(LHS
, getSCEV(CI
));
3411 // If the LHS of the add was an addrec and it has no-wrap flags,
3412 // transfer the no-wrap flags, since an or won't introduce a wrap.
3413 if (const SCEVAddRecExpr
*NewAR
= dyn_cast
<SCEVAddRecExpr
>(S
)) {
3414 const SCEVAddRecExpr
*OldAR
= cast
<SCEVAddRecExpr
>(LHS
);
3415 if (OldAR
->hasNoUnsignedWrap())
3416 const_cast<SCEVAddRecExpr
*>(NewAR
)->setHasNoUnsignedWrap(true);
3417 if (OldAR
->hasNoSignedWrap())
3418 const_cast<SCEVAddRecExpr
*>(NewAR
)->setHasNoSignedWrap(true);
3424 case Instruction::Xor
:
3425 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
3426 // If the RHS of the xor is a signbit, then this is just an add.
3427 // Instcombine turns add of signbit into xor as a strength reduction step.
3428 if (CI
->getValue().isSignBit())
3429 return getAddExpr(getSCEV(U
->getOperand(0)),
3430 getSCEV(U
->getOperand(1)));
3432 // If the RHS of xor is -1, then this is a not operation.
3433 if (CI
->isAllOnesValue())
3434 return getNotSCEV(getSCEV(U
->getOperand(0)));
3436 // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
3437 // This is a variant of the check for xor with -1, and it handles
3438 // the case where instcombine has trimmed non-demanded bits out
3439 // of an xor with -1.
3440 if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(U
->getOperand(0)))
3441 if (ConstantInt
*LCI
= dyn_cast
<ConstantInt
>(BO
->getOperand(1)))
3442 if (BO
->getOpcode() == Instruction::And
&&
3443 LCI
->getValue() == CI
->getValue())
3444 if (const SCEVZeroExtendExpr
*Z
=
3445 dyn_cast
<SCEVZeroExtendExpr
>(getSCEV(U
->getOperand(0)))) {
3446 const Type
*UTy
= U
->getType();
3447 const SCEV
*Z0
= Z
->getOperand();
3448 const Type
*Z0Ty
= Z0
->getType();
3449 unsigned Z0TySize
= getTypeSizeInBits(Z0Ty
);
3451 // If C is a low-bits mask, the zero extend is serving to
3452 // mask off the high bits. Complement the operand and
3453 // re-apply the zext.
3454 if (APIntOps::isMask(Z0TySize
, CI
->getValue()))
3455 return getZeroExtendExpr(getNotSCEV(Z0
), UTy
);
3457 // If C is a single bit, it may be in the sign-bit position
3458 // before the zero-extend. In this case, represent the xor
3459 // using an add, which is equivalent, and re-apply the zext.
3460 APInt Trunc
= APInt(CI
->getValue()).trunc(Z0TySize
);
3461 if (APInt(Trunc
).zext(getTypeSizeInBits(UTy
)) == CI
->getValue() &&
3463 return getZeroExtendExpr(getAddExpr(Z0
, getConstant(Trunc
)),
3469 case Instruction::Shl
:
3470 // Turn shift left of a constant amount into a multiply.
3471 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
3472 uint32_t BitWidth
= cast
<IntegerType
>(U
->getType())->getBitWidth();
3474 // If the shift count is not less than the bitwidth, the result of
3475 // the shift is undefined. Don't try to analyze it, because the
3476 // resolution chosen here may differ from the resolution chosen in
3477 // other parts of the compiler.
3478 if (SA
->getValue().uge(BitWidth
))
3481 Constant
*X
= ConstantInt::get(getContext(),
3482 APInt(BitWidth
, 1).shl(SA
->getZExtValue()));
3483 return getMulExpr(getSCEV(U
->getOperand(0)), getSCEV(X
));
3487 case Instruction::LShr
:
3488 // Turn logical shift right of a constant into a unsigned divide.
3489 if (ConstantInt
*SA
= dyn_cast
<ConstantInt
>(U
->getOperand(1))) {
3490 uint32_t BitWidth
= cast
<IntegerType
>(U
->getType())->getBitWidth();
3492 // If the shift count is not less than the bitwidth, the result of
3493 // the shift is undefined. Don't try to analyze it, because the
3494 // resolution chosen here may differ from the resolution chosen in
3495 // other parts of the compiler.
3496 if (SA
->getValue().uge(BitWidth
))
3499 Constant
*X
= ConstantInt::get(getContext(),
3500 APInt(BitWidth
, 1).shl(SA
->getZExtValue()));
3501 return getUDivExpr(getSCEV(U
->getOperand(0)), getSCEV(X
));
3505 case Instruction::AShr
:
3506 // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
3507 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(U
->getOperand(1)))
3508 if (Operator
*L
= dyn_cast
<Operator
>(U
->getOperand(0)))
3509 if (L
->getOpcode() == Instruction::Shl
&&
3510 L
->getOperand(1) == U
->getOperand(1)) {
3511 uint64_t BitWidth
= getTypeSizeInBits(U
->getType());
3513 // If the shift count is not less than the bitwidth, the result of
3514 // the shift is undefined. Don't try to analyze it, because the
3515 // resolution chosen here may differ from the resolution chosen in
3516 // other parts of the compiler.
3517 if (CI
->getValue().uge(BitWidth
))
3520 uint64_t Amt
= BitWidth
- CI
->getZExtValue();
3521 if (Amt
== BitWidth
)
3522 return getSCEV(L
->getOperand(0)); // shift by zero --> noop
3524 getSignExtendExpr(getTruncateExpr(getSCEV(L
->getOperand(0)),
3525 IntegerType::get(getContext(),
3531 case Instruction::Trunc
:
3532 return getTruncateExpr(getSCEV(U
->getOperand(0)), U
->getType());
3534 case Instruction::ZExt
:
3535 return getZeroExtendExpr(getSCEV(U
->getOperand(0)), U
->getType());
3537 case Instruction::SExt
:
3538 return getSignExtendExpr(getSCEV(U
->getOperand(0)), U
->getType());
3540 case Instruction::BitCast
:
3541 // BitCasts are no-op casts so we just eliminate the cast.
3542 if (isSCEVable(U
->getType()) && isSCEVable(U
->getOperand(0)->getType()))
3543 return getSCEV(U
->getOperand(0));
3546 // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
3547 // lead to pointer expressions which cannot safely be expanded to GEPs,
3548 // because ScalarEvolution doesn't respect the GEP aliasing rules when
3549 // simplifying integer expressions.
3551 case Instruction::GetElementPtr
:
3552 return createNodeForGEP(cast
<GEPOperator
>(U
));
3554 case Instruction::PHI
:
3555 return createNodeForPHI(cast
<PHINode
>(U
));
3557 case Instruction::Select
:
3558 // This could be a smax or umax that was lowered earlier.
3559 // Try to recover it.
3560 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(U
->getOperand(0))) {
3561 Value
*LHS
= ICI
->getOperand(0);
3562 Value
*RHS
= ICI
->getOperand(1);
3563 switch (ICI
->getPredicate()) {
3564 case ICmpInst::ICMP_SLT
:
3565 case ICmpInst::ICMP_SLE
:
3566 std::swap(LHS
, RHS
);
3568 case ICmpInst::ICMP_SGT
:
3569 case ICmpInst::ICMP_SGE
:
3570 // a >s b ? a+x : b+x -> smax(a, b)+x
3571 // a >s b ? b+x : a+x -> smin(a, b)+x
3572 if (LHS
->getType() == U
->getType()) {
3573 const SCEV
*LS
= getSCEV(LHS
);
3574 const SCEV
*RS
= getSCEV(RHS
);
3575 const SCEV
*LA
= getSCEV(U
->getOperand(1));
3576 const SCEV
*RA
= getSCEV(U
->getOperand(2));
3577 const SCEV
*LDiff
= getMinusSCEV(LA
, LS
);
3578 const SCEV
*RDiff
= getMinusSCEV(RA
, RS
);
3580 return getAddExpr(getSMaxExpr(LS
, RS
), LDiff
);
3581 LDiff
= getMinusSCEV(LA
, RS
);
3582 RDiff
= getMinusSCEV(RA
, LS
);
3584 return getAddExpr(getSMinExpr(LS
, RS
), LDiff
);
3587 case ICmpInst::ICMP_ULT
:
3588 case ICmpInst::ICMP_ULE
:
3589 std::swap(LHS
, RHS
);
3591 case ICmpInst::ICMP_UGT
:
3592 case ICmpInst::ICMP_UGE
:
3593 // a >u b ? a+x : b+x -> umax(a, b)+x
3594 // a >u b ? b+x : a+x -> umin(a, b)+x
3595 if (LHS
->getType() == U
->getType()) {
3596 const SCEV
*LS
= getSCEV(LHS
);
3597 const SCEV
*RS
= getSCEV(RHS
);
3598 const SCEV
*LA
= getSCEV(U
->getOperand(1));
3599 const SCEV
*RA
= getSCEV(U
->getOperand(2));
3600 const SCEV
*LDiff
= getMinusSCEV(LA
, LS
);
3601 const SCEV
*RDiff
= getMinusSCEV(RA
, RS
);
3603 return getAddExpr(getUMaxExpr(LS
, RS
), LDiff
);
3604 LDiff
= getMinusSCEV(LA
, RS
);
3605 RDiff
= getMinusSCEV(RA
, LS
);
3607 return getAddExpr(getUMinExpr(LS
, RS
), LDiff
);
3610 case ICmpInst::ICMP_NE
:
3611 // n != 0 ? n+x : 1+x -> umax(n, 1)+x
3612 if (LHS
->getType() == U
->getType() &&
3613 isa
<ConstantInt
>(RHS
) &&
3614 cast
<ConstantInt
>(RHS
)->isZero()) {
3615 const SCEV
*One
= getConstant(LHS
->getType(), 1);
3616 const SCEV
*LS
= getSCEV(LHS
);
3617 const SCEV
*LA
= getSCEV(U
->getOperand(1));
3618 const SCEV
*RA
= getSCEV(U
->getOperand(2));
3619 const SCEV
*LDiff
= getMinusSCEV(LA
, LS
);
3620 const SCEV
*RDiff
= getMinusSCEV(RA
, One
);
3622 return getAddExpr(getUMaxExpr(One
, LS
), LDiff
);
3625 case ICmpInst::ICMP_EQ
:
3626 // n == 0 ? 1+x : n+x -> umax(n, 1)+x
3627 if (LHS
->getType() == U
->getType() &&
3628 isa
<ConstantInt
>(RHS
) &&
3629 cast
<ConstantInt
>(RHS
)->isZero()) {
3630 const SCEV
*One
= getConstant(LHS
->getType(), 1);
3631 const SCEV
*LS
= getSCEV(LHS
);
3632 const SCEV
*LA
= getSCEV(U
->getOperand(1));
3633 const SCEV
*RA
= getSCEV(U
->getOperand(2));
3634 const SCEV
*LDiff
= getMinusSCEV(LA
, One
);
3635 const SCEV
*RDiff
= getMinusSCEV(RA
, LS
);
3637 return getAddExpr(getUMaxExpr(One
, LS
), LDiff
);
3645 default: // We cannot analyze this expression.
3649 return getUnknown(V
);
3654 //===----------------------------------------------------------------------===//
3655 // Iteration Count Computation Code
3658 /// getBackedgeTakenCount - If the specified loop has a predictable
3659 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
3660 /// object. The backedge-taken count is the number of times the loop header
3661 /// will be branched to from within the loop. This is one less than the
3662 /// trip count of the loop, since it doesn't count the first iteration,
3663 /// when the header is branched to from outside the loop.
3665 /// Note that it is not valid to call this method on a loop without a
3666 /// loop-invariant backedge-taken count (see
3667 /// hasLoopInvariantBackedgeTakenCount).
3669 const SCEV
*ScalarEvolution::getBackedgeTakenCount(const Loop
*L
) {
3670 return getBackedgeTakenInfo(L
).Exact
;
3673 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
3674 /// return the least SCEV value that is known never to be less than the
3675 /// actual backedge taken count.
3676 const SCEV
*ScalarEvolution::getMaxBackedgeTakenCount(const Loop
*L
) {
3677 return getBackedgeTakenInfo(L
).Max
;
3680 /// PushLoopPHIs - Push PHI nodes in the header of the given loop
3681 /// onto the given Worklist.
3683 PushLoopPHIs(const Loop
*L
, SmallVectorImpl
<Instruction
*> &Worklist
) {
3684 BasicBlock
*Header
= L
->getHeader();
3686 // Push all Loop-header PHIs onto the Worklist stack.
3687 for (BasicBlock::iterator I
= Header
->begin();
3688 PHINode
*PN
= dyn_cast
<PHINode
>(I
); ++I
)
3689 Worklist
.push_back(PN
);
3692 const ScalarEvolution::BackedgeTakenInfo
&
3693 ScalarEvolution::getBackedgeTakenInfo(const Loop
*L
) {
3694 // Initially insert a CouldNotCompute for this loop. If the insertion
3695 // succeeds, proceed to actually compute a backedge-taken count and
3696 // update the value. The temporary CouldNotCompute value tells SCEV
3697 // code elsewhere that it shouldn't attempt to request a new
3698 // backedge-taken count, which could result in infinite recursion.
3699 std::pair
<std::map
<const Loop
*, BackedgeTakenInfo
>::iterator
, bool> Pair
=
3700 BackedgeTakenCounts
.insert(std::make_pair(L
, getCouldNotCompute()));
3702 BackedgeTakenInfo BECount
= ComputeBackedgeTakenCount(L
);
3703 if (BECount
.Exact
!= getCouldNotCompute()) {
3704 assert(BECount
.Exact
->isLoopInvariant(L
) &&
3705 BECount
.Max
->isLoopInvariant(L
) &&
3706 "Computed backedge-taken count isn't loop invariant for loop!");
3707 ++NumTripCountsComputed
;
3709 // Update the value in the map.
3710 Pair
.first
->second
= BECount
;
3712 if (BECount
.Max
!= getCouldNotCompute())
3713 // Update the value in the map.
3714 Pair
.first
->second
= BECount
;
3715 if (isa
<PHINode
>(L
->getHeader()->begin()))
3716 // Only count loops that have phi nodes as not being computable.
3717 ++NumTripCountsNotComputed
;
3720 // Now that we know more about the trip count for this loop, forget any
3721 // existing SCEV values for PHI nodes in this loop since they are only
3722 // conservative estimates made without the benefit of trip count
3723 // information. This is similar to the code in forgetLoop, except that
3724 // it handles SCEVUnknown PHI nodes specially.
3725 if (BECount
.hasAnyInfo()) {
3726 SmallVector
<Instruction
*, 16> Worklist
;
3727 PushLoopPHIs(L
, Worklist
);
3729 SmallPtrSet
<Instruction
*, 8> Visited
;
3730 while (!Worklist
.empty()) {
3731 Instruction
*I
= Worklist
.pop_back_val();
3732 if (!Visited
.insert(I
)) continue;
3734 ValueExprMapType::iterator It
=
3735 ValueExprMap
.find(static_cast<Value
*>(I
));
3736 if (It
!= ValueExprMap
.end()) {
3737 // SCEVUnknown for a PHI either means that it has an unrecognized
3738 // structure, or it's a PHI that's in the progress of being computed
3739 // by createNodeForPHI. In the former case, additional loop trip
3740 // count information isn't going to change anything. In the later
3741 // case, createNodeForPHI will perform the necessary updates on its
3742 // own when it gets to that point.
3743 if (!isa
<PHINode
>(I
) || !isa
<SCEVUnknown
>(It
->second
)) {
3744 ValuesAtScopes
.erase(It
->second
);
3745 ValueExprMap
.erase(It
);
3747 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
))
3748 ConstantEvolutionLoopExitValue
.erase(PN
);
3751 PushDefUseChildren(I
, Worklist
);
3755 return Pair
.first
->second
;
3758 /// forgetLoop - This method should be called by the client when it has
3759 /// changed a loop in a way that may effect ScalarEvolution's ability to
3760 /// compute a trip count, or if the loop is deleted.
3761 void ScalarEvolution::forgetLoop(const Loop
*L
) {
3762 // Drop any stored trip count value.
3763 BackedgeTakenCounts
.erase(L
);
3765 // Drop information about expressions based on loop-header PHIs.
3766 SmallVector
<Instruction
*, 16> Worklist
;
3767 PushLoopPHIs(L
, Worklist
);
3769 SmallPtrSet
<Instruction
*, 8> Visited
;
3770 while (!Worklist
.empty()) {
3771 Instruction
*I
= Worklist
.pop_back_val();
3772 if (!Visited
.insert(I
)) continue;
3774 ValueExprMapType::iterator It
= ValueExprMap
.find(static_cast<Value
*>(I
));
3775 if (It
!= ValueExprMap
.end()) {
3776 ValuesAtScopes
.erase(It
->second
);
3777 ValueExprMap
.erase(It
);
3778 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
))
3779 ConstantEvolutionLoopExitValue
.erase(PN
);
3782 PushDefUseChildren(I
, Worklist
);
3785 // Forget all contained loops too, to avoid dangling entries in the
3786 // ValuesAtScopes map.
3787 for (Loop::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
)
3791 /// forgetValue - This method should be called by the client when it has
3792 /// changed a value in a way that may effect its value, or which may
3793 /// disconnect it from a def-use chain linking it to a loop.
3794 void ScalarEvolution::forgetValue(Value
*V
) {
3795 Instruction
*I
= dyn_cast
<Instruction
>(V
);
3798 // Drop information about expressions based on loop-header PHIs.
3799 SmallVector
<Instruction
*, 16> Worklist
;
3800 Worklist
.push_back(I
);
3802 SmallPtrSet
<Instruction
*, 8> Visited
;
3803 while (!Worklist
.empty()) {
3804 I
= Worklist
.pop_back_val();
3805 if (!Visited
.insert(I
)) continue;
3807 ValueExprMapType::iterator It
= ValueExprMap
.find(static_cast<Value
*>(I
));
3808 if (It
!= ValueExprMap
.end()) {
3809 ValuesAtScopes
.erase(It
->second
);
3810 ValueExprMap
.erase(It
);
3811 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
))
3812 ConstantEvolutionLoopExitValue
.erase(PN
);
3815 PushDefUseChildren(I
, Worklist
);
3819 /// ComputeBackedgeTakenCount - Compute the number of times the backedge
3820 /// of the specified loop will execute.
3821 ScalarEvolution::BackedgeTakenInfo
3822 ScalarEvolution::ComputeBackedgeTakenCount(const Loop
*L
) {
3823 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
3824 L
->getExitingBlocks(ExitingBlocks
);
3826 // Examine all exits and pick the most conservative values.
3827 const SCEV
*BECount
= getCouldNotCompute();
3828 const SCEV
*MaxBECount
= getCouldNotCompute();
3829 bool CouldNotComputeBECount
= false;
3830 for (unsigned i
= 0, e
= ExitingBlocks
.size(); i
!= e
; ++i
) {
3831 BackedgeTakenInfo NewBTI
=
3832 ComputeBackedgeTakenCountFromExit(L
, ExitingBlocks
[i
]);
3834 if (NewBTI
.Exact
== getCouldNotCompute()) {
3835 // We couldn't compute an exact value for this exit, so
3836 // we won't be able to compute an exact value for the loop.
3837 CouldNotComputeBECount
= true;
3838 BECount
= getCouldNotCompute();
3839 } else if (!CouldNotComputeBECount
) {
3840 if (BECount
== getCouldNotCompute())
3841 BECount
= NewBTI
.Exact
;
3843 BECount
= getUMinFromMismatchedTypes(BECount
, NewBTI
.Exact
);
3845 if (MaxBECount
== getCouldNotCompute())
3846 MaxBECount
= NewBTI
.Max
;
3847 else if (NewBTI
.Max
!= getCouldNotCompute())
3848 MaxBECount
= getUMinFromMismatchedTypes(MaxBECount
, NewBTI
.Max
);
3851 return BackedgeTakenInfo(BECount
, MaxBECount
);
3854 /// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
3855 /// of the specified loop will execute if it exits via the specified block.
3856 ScalarEvolution::BackedgeTakenInfo
3857 ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop
*L
,
3858 BasicBlock
*ExitingBlock
) {
3860 // Okay, we've chosen an exiting block. See what condition causes us to
3861 // exit at this block.
3863 // FIXME: we should be able to handle switch instructions (with a single exit)
3864 BranchInst
*ExitBr
= dyn_cast
<BranchInst
>(ExitingBlock
->getTerminator());
3865 if (ExitBr
== 0) return getCouldNotCompute();
3866 assert(ExitBr
->isConditional() && "If unconditional, it can't be in loop!");
3868 // At this point, we know we have a conditional branch that determines whether
3869 // the loop is exited. However, we don't know if the branch is executed each
3870 // time through the loop. If not, then the execution count of the branch will
3871 // not be equal to the trip count of the loop.
3873 // Currently we check for this by checking to see if the Exit branch goes to
3874 // the loop header. If so, we know it will always execute the same number of
3875 // times as the loop. We also handle the case where the exit block *is* the
3876 // loop header. This is common for un-rotated loops.
3878 // If both of those tests fail, walk up the unique predecessor chain to the
3879 // header, stopping if there is an edge that doesn't exit the loop. If the
3880 // header is reached, the execution count of the branch will be equal to the
3881 // trip count of the loop.
3883 // More extensive analysis could be done to handle more cases here.
3885 if (ExitBr
->getSuccessor(0) != L
->getHeader() &&
3886 ExitBr
->getSuccessor(1) != L
->getHeader() &&
3887 ExitBr
->getParent() != L
->getHeader()) {
3888 // The simple checks failed, try climbing the unique predecessor chain
3889 // up to the header.
3891 for (BasicBlock
*BB
= ExitBr
->getParent(); BB
; ) {
3892 BasicBlock
*Pred
= BB
->getUniquePredecessor();
3894 return getCouldNotCompute();
3895 TerminatorInst
*PredTerm
= Pred
->getTerminator();
3896 for (unsigned i
= 0, e
= PredTerm
->getNumSuccessors(); i
!= e
; ++i
) {
3897 BasicBlock
*PredSucc
= PredTerm
->getSuccessor(i
);
3900 // If the predecessor has a successor that isn't BB and isn't
3901 // outside the loop, assume the worst.
3902 if (L
->contains(PredSucc
))
3903 return getCouldNotCompute();
3905 if (Pred
== L
->getHeader()) {
3912 return getCouldNotCompute();
3915 // Proceed to the next level to examine the exit condition expression.
3916 return ComputeBackedgeTakenCountFromExitCond(L
, ExitBr
->getCondition(),
3917 ExitBr
->getSuccessor(0),
3918 ExitBr
->getSuccessor(1));
3921 /// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
3922 /// backedge of the specified loop will execute if its exit condition
3923 /// were a conditional branch of ExitCond, TBB, and FBB.
3924 ScalarEvolution::BackedgeTakenInfo
3925 ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop
*L
,
3929 // Check if the controlling expression for this loop is an And or Or.
3930 if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(ExitCond
)) {
3931 if (BO
->getOpcode() == Instruction::And
) {
3932 // Recurse on the operands of the and.
3933 BackedgeTakenInfo BTI0
=
3934 ComputeBackedgeTakenCountFromExitCond(L
, BO
->getOperand(0), TBB
, FBB
);
3935 BackedgeTakenInfo BTI1
=
3936 ComputeBackedgeTakenCountFromExitCond(L
, BO
->getOperand(1), TBB
, FBB
);
3937 const SCEV
*BECount
= getCouldNotCompute();
3938 const SCEV
*MaxBECount
= getCouldNotCompute();
3939 if (L
->contains(TBB
)) {
3940 // Both conditions must be true for the loop to continue executing.
3941 // Choose the less conservative count.
3942 if (BTI0
.Exact
== getCouldNotCompute() ||
3943 BTI1
.Exact
== getCouldNotCompute())
3944 BECount
= getCouldNotCompute();
3946 BECount
= getUMinFromMismatchedTypes(BTI0
.Exact
, BTI1
.Exact
);
3947 if (BTI0
.Max
== getCouldNotCompute())
3948 MaxBECount
= BTI1
.Max
;
3949 else if (BTI1
.Max
== getCouldNotCompute())
3950 MaxBECount
= BTI0
.Max
;
3952 MaxBECount
= getUMinFromMismatchedTypes(BTI0
.Max
, BTI1
.Max
);
3954 // Both conditions must be true at the same time for the loop to exit.
3955 // For now, be conservative.
3956 assert(L
->contains(FBB
) && "Loop block has no successor in loop!");
3957 if (BTI0
.Max
== BTI1
.Max
)
3958 MaxBECount
= BTI0
.Max
;
3959 if (BTI0
.Exact
== BTI1
.Exact
)
3960 BECount
= BTI0
.Exact
;
3963 return BackedgeTakenInfo(BECount
, MaxBECount
);
3965 if (BO
->getOpcode() == Instruction::Or
) {
3966 // Recurse on the operands of the or.
3967 BackedgeTakenInfo BTI0
=
3968 ComputeBackedgeTakenCountFromExitCond(L
, BO
->getOperand(0), TBB
, FBB
);
3969 BackedgeTakenInfo BTI1
=
3970 ComputeBackedgeTakenCountFromExitCond(L
, BO
->getOperand(1), TBB
, FBB
);
3971 const SCEV
*BECount
= getCouldNotCompute();
3972 const SCEV
*MaxBECount
= getCouldNotCompute();
3973 if (L
->contains(FBB
)) {
3974 // Both conditions must be false for the loop to continue executing.
3975 // Choose the less conservative count.
3976 if (BTI0
.Exact
== getCouldNotCompute() ||
3977 BTI1
.Exact
== getCouldNotCompute())
3978 BECount
= getCouldNotCompute();
3980 BECount
= getUMinFromMismatchedTypes(BTI0
.Exact
, BTI1
.Exact
);
3981 if (BTI0
.Max
== getCouldNotCompute())
3982 MaxBECount
= BTI1
.Max
;
3983 else if (BTI1
.Max
== getCouldNotCompute())
3984 MaxBECount
= BTI0
.Max
;
3986 MaxBECount
= getUMinFromMismatchedTypes(BTI0
.Max
, BTI1
.Max
);
3988 // Both conditions must be false at the same time for the loop to exit.
3989 // For now, be conservative.
3990 assert(L
->contains(TBB
) && "Loop block has no successor in loop!");
3991 if (BTI0
.Max
== BTI1
.Max
)
3992 MaxBECount
= BTI0
.Max
;
3993 if (BTI0
.Exact
== BTI1
.Exact
)
3994 BECount
= BTI0
.Exact
;
3997 return BackedgeTakenInfo(BECount
, MaxBECount
);
4001 // With an icmp, it may be feasible to compute an exact backedge-taken count.
4002 // Proceed to the next level to examine the icmp.
4003 if (ICmpInst
*ExitCondICmp
= dyn_cast
<ICmpInst
>(ExitCond
))
4004 return ComputeBackedgeTakenCountFromExitCondICmp(L
, ExitCondICmp
, TBB
, FBB
);
4006 // Check for a constant condition. These are normally stripped out by
4007 // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
4008 // preserve the CFG and is temporarily leaving constant conditions
4010 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(ExitCond
)) {
4011 if (L
->contains(FBB
) == !CI
->getZExtValue())
4012 // The backedge is always taken.
4013 return getCouldNotCompute();
4015 // The backedge is never taken.
4016 return getConstant(CI
->getType(), 0);
4019 // If it's not an integer or pointer comparison then compute it the hard way.
4020 return ComputeBackedgeTakenCountExhaustively(L
, ExitCond
, !L
->contains(TBB
));
4023 /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
4024 /// backedge of the specified loop will execute if its exit condition
4025 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
4026 ScalarEvolution::BackedgeTakenInfo
4027 ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop
*L
,
4032 // If the condition was exit on true, convert the condition to exit on false
4033 ICmpInst::Predicate Cond
;
4034 if (!L
->contains(FBB
))
4035 Cond
= ExitCond
->getPredicate();
4037 Cond
= ExitCond
->getInversePredicate();
4039 // Handle common loops like: for (X = "string"; *X; ++X)
4040 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(ExitCond
->getOperand(0)))
4041 if (Constant
*RHS
= dyn_cast
<Constant
>(ExitCond
->getOperand(1))) {
4042 BackedgeTakenInfo ItCnt
=
4043 ComputeLoadConstantCompareBackedgeTakenCount(LI
, RHS
, L
, Cond
);
4044 if (ItCnt
.hasAnyInfo())
4048 const SCEV
*LHS
= getSCEV(ExitCond
->getOperand(0));
4049 const SCEV
*RHS
= getSCEV(ExitCond
->getOperand(1));
4051 // Try to evaluate any dependencies out of the loop.
4052 LHS
= getSCEVAtScope(LHS
, L
);
4053 RHS
= getSCEVAtScope(RHS
, L
);
4055 // At this point, we would like to compute how many iterations of the
4056 // loop the predicate will return true for these inputs.
4057 if (LHS
->isLoopInvariant(L
) && !RHS
->isLoopInvariant(L
)) {
4058 // If there is a loop-invariant, force it into the RHS.
4059 std::swap(LHS
, RHS
);
4060 Cond
= ICmpInst::getSwappedPredicate(Cond
);
4063 // Simplify the operands before analyzing them.
4064 (void)SimplifyICmpOperands(Cond
, LHS
, RHS
);
4066 // If we have a comparison of a chrec against a constant, try to use value
4067 // ranges to answer this query.
4068 if (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(RHS
))
4069 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(LHS
))
4070 if (AddRec
->getLoop() == L
) {
4071 // Form the constant range.
4072 ConstantRange
CompRange(
4073 ICmpInst::makeConstantRange(Cond
, RHSC
->getValue()->getValue()));
4075 const SCEV
*Ret
= AddRec
->getNumIterationsInRange(CompRange
, *this);
4076 if (!isa
<SCEVCouldNotCompute
>(Ret
)) return Ret
;
4080 case ICmpInst::ICMP_NE
: { // while (X != Y)
4081 // Convert to: while (X-Y != 0)
4082 BackedgeTakenInfo BTI
= HowFarToZero(getMinusSCEV(LHS
, RHS
), L
);
4083 if (BTI
.hasAnyInfo()) return BTI
;
4086 case ICmpInst::ICMP_EQ
: { // while (X == Y)
4087 // Convert to: while (X-Y == 0)
4088 BackedgeTakenInfo BTI
= HowFarToNonZero(getMinusSCEV(LHS
, RHS
), L
);
4089 if (BTI
.hasAnyInfo()) return BTI
;
4092 case ICmpInst::ICMP_SLT
: {
4093 BackedgeTakenInfo BTI
= HowManyLessThans(LHS
, RHS
, L
, true);
4094 if (BTI
.hasAnyInfo()) return BTI
;
4097 case ICmpInst::ICMP_SGT
: {
4098 BackedgeTakenInfo BTI
= HowManyLessThans(getNotSCEV(LHS
),
4099 getNotSCEV(RHS
), L
, true);
4100 if (BTI
.hasAnyInfo()) return BTI
;
4103 case ICmpInst::ICMP_ULT
: {
4104 BackedgeTakenInfo BTI
= HowManyLessThans(LHS
, RHS
, L
, false);
4105 if (BTI
.hasAnyInfo()) return BTI
;
4108 case ICmpInst::ICMP_UGT
: {
4109 BackedgeTakenInfo BTI
= HowManyLessThans(getNotSCEV(LHS
),
4110 getNotSCEV(RHS
), L
, false);
4111 if (BTI
.hasAnyInfo()) return BTI
;
4116 dbgs() << "ComputeBackedgeTakenCount ";
4117 if (ExitCond
->getOperand(0)->getType()->isUnsigned())
4118 dbgs() << "[unsigned] ";
4119 dbgs() << *LHS
<< " "
4120 << Instruction::getOpcodeName(Instruction::ICmp
)
4121 << " " << *RHS
<< "\n";
4126 ComputeBackedgeTakenCountExhaustively(L
, ExitCond
, !L
->contains(TBB
));
4129 static ConstantInt
*
4130 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr
*AddRec
, ConstantInt
*C
,
4131 ScalarEvolution
&SE
) {
4132 const SCEV
*InVal
= SE
.getConstant(C
);
4133 const SCEV
*Val
= AddRec
->evaluateAtIteration(InVal
, SE
);
4134 assert(isa
<SCEVConstant
>(Val
) &&
4135 "Evaluation of SCEV at constant didn't fold correctly?");
4136 return cast
<SCEVConstant
>(Val
)->getValue();
4139 /// GetAddressedElementFromGlobal - Given a global variable with an initializer
4140 /// and a GEP expression (missing the pointer index) indexing into it, return
4141 /// the addressed element of the initializer or null if the index expression is
4144 GetAddressedElementFromGlobal(GlobalVariable
*GV
,
4145 const std::vector
<ConstantInt
*> &Indices
) {
4146 Constant
*Init
= GV
->getInitializer();
4147 for (unsigned i
= 0, e
= Indices
.size(); i
!= e
; ++i
) {
4148 uint64_t Idx
= Indices
[i
]->getZExtValue();
4149 if (ConstantStruct
*CS
= dyn_cast
<ConstantStruct
>(Init
)) {
4150 assert(Idx
< CS
->getNumOperands() && "Bad struct index!");
4151 Init
= cast
<Constant
>(CS
->getOperand(Idx
));
4152 } else if (ConstantArray
*CA
= dyn_cast
<ConstantArray
>(Init
)) {
4153 if (Idx
>= CA
->getNumOperands()) return 0; // Bogus program
4154 Init
= cast
<Constant
>(CA
->getOperand(Idx
));
4155 } else if (isa
<ConstantAggregateZero
>(Init
)) {
4156 if (const StructType
*STy
= dyn_cast
<StructType
>(Init
->getType())) {
4157 assert(Idx
< STy
->getNumElements() && "Bad struct index!");
4158 Init
= Constant::getNullValue(STy
->getElementType(Idx
));
4159 } else if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(Init
->getType())) {
4160 if (Idx
>= ATy
->getNumElements()) return 0; // Bogus program
4161 Init
= Constant::getNullValue(ATy
->getElementType());
4163 llvm_unreachable("Unknown constant aggregate type!");
4167 return 0; // Unknown initializer type
4173 /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
4174 /// 'icmp op load X, cst', try to see if we can compute the backedge
4175 /// execution count.
4176 ScalarEvolution::BackedgeTakenInfo
4177 ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
4181 ICmpInst::Predicate predicate
) {
4182 if (LI
->isVolatile()) return getCouldNotCompute();
4184 // Check to see if the loaded pointer is a getelementptr of a global.
4185 // TODO: Use SCEV instead of manually grubbing with GEPs.
4186 GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(LI
->getOperand(0));
4187 if (!GEP
) return getCouldNotCompute();
4189 // Make sure that it is really a constant global we are gepping, with an
4190 // initializer, and make sure the first IDX is really 0.
4191 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(GEP
->getOperand(0));
4192 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer() ||
4193 GEP
->getNumOperands() < 3 || !isa
<Constant
>(GEP
->getOperand(1)) ||
4194 !cast
<Constant
>(GEP
->getOperand(1))->isNullValue())
4195 return getCouldNotCompute();
4197 // Okay, we allow one non-constant index into the GEP instruction.
4199 std::vector
<ConstantInt
*> Indexes
;
4200 unsigned VarIdxNum
= 0;
4201 for (unsigned i
= 2, e
= GEP
->getNumOperands(); i
!= e
; ++i
)
4202 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(GEP
->getOperand(i
))) {
4203 Indexes
.push_back(CI
);
4204 } else if (!isa
<ConstantInt
>(GEP
->getOperand(i
))) {
4205 if (VarIdx
) return getCouldNotCompute(); // Multiple non-constant idx's.
4206 VarIdx
= GEP
->getOperand(i
);
4208 Indexes
.push_back(0);
4211 // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
4212 // Check to see if X is a loop variant variable value now.
4213 const SCEV
*Idx
= getSCEV(VarIdx
);
4214 Idx
= getSCEVAtScope(Idx
, L
);
4216 // We can only recognize very limited forms of loop index expressions, in
4217 // particular, only affine AddRec's like {C1,+,C2}.
4218 const SCEVAddRecExpr
*IdxExpr
= dyn_cast
<SCEVAddRecExpr
>(Idx
);
4219 if (!IdxExpr
|| !IdxExpr
->isAffine() || IdxExpr
->isLoopInvariant(L
) ||
4220 !isa
<SCEVConstant
>(IdxExpr
->getOperand(0)) ||
4221 !isa
<SCEVConstant
>(IdxExpr
->getOperand(1)))
4222 return getCouldNotCompute();
4224 unsigned MaxSteps
= MaxBruteForceIterations
;
4225 for (unsigned IterationNum
= 0; IterationNum
!= MaxSteps
; ++IterationNum
) {
4226 ConstantInt
*ItCst
= ConstantInt::get(
4227 cast
<IntegerType
>(IdxExpr
->getType()), IterationNum
);
4228 ConstantInt
*Val
= EvaluateConstantChrecAtConstant(IdxExpr
, ItCst
, *this);
4230 // Form the GEP offset.
4231 Indexes
[VarIdxNum
] = Val
;
4233 Constant
*Result
= GetAddressedElementFromGlobal(GV
, Indexes
);
4234 if (Result
== 0) break; // Cannot compute!
4236 // Evaluate the condition for this iteration.
4237 Result
= ConstantExpr::getICmp(predicate
, Result
, RHS
);
4238 if (!isa
<ConstantInt
>(Result
)) break; // Couldn't decide for sure
4239 if (cast
<ConstantInt
>(Result
)->getValue().isMinValue()) {
4241 dbgs() << "\n***\n*** Computed loop count " << *ItCst
4242 << "\n*** From global " << *GV
<< "*** BB: " << *L
->getHeader()
4245 ++NumArrayLenItCounts
;
4246 return getConstant(ItCst
); // Found terminating iteration!
4249 return getCouldNotCompute();
4253 /// CanConstantFold - Return true if we can constant fold an instruction of the
4254 /// specified type, assuming that all operands were constants.
4255 static bool CanConstantFold(const Instruction
*I
) {
4256 if (isa
<BinaryOperator
>(I
) || isa
<CmpInst
>(I
) ||
4257 isa
<SelectInst
>(I
) || isa
<CastInst
>(I
) || isa
<GetElementPtrInst
>(I
))
4260 if (const CallInst
*CI
= dyn_cast
<CallInst
>(I
))
4261 if (const Function
*F
= CI
->getCalledFunction())
4262 return canConstantFoldCallTo(F
);
4266 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
4267 /// in the loop that V is derived from. We allow arbitrary operations along the
4268 /// way, but the operands of an operation must either be constants or a value
4269 /// derived from a constant PHI. If this expression does not fit with these
4270 /// constraints, return null.
4271 static PHINode
*getConstantEvolvingPHI(Value
*V
, const Loop
*L
) {
4272 // If this is not an instruction, or if this is an instruction outside of the
4273 // loop, it can't be derived from a loop PHI.
4274 Instruction
*I
= dyn_cast
<Instruction
>(V
);
4275 if (I
== 0 || !L
->contains(I
)) return 0;
4277 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
)) {
4278 if (L
->getHeader() == I
->getParent())
4281 // We don't currently keep track of the control flow needed to evaluate
4282 // PHIs, so we cannot handle PHIs inside of loops.
4286 // If we won't be able to constant fold this expression even if the operands
4287 // are constants, return early.
4288 if (!CanConstantFold(I
)) return 0;
4290 // Otherwise, we can evaluate this instruction if all of its operands are
4291 // constant or derived from a PHI node themselves.
4293 for (unsigned Op
= 0, e
= I
->getNumOperands(); Op
!= e
; ++Op
)
4294 if (!isa
<Constant
>(I
->getOperand(Op
))) {
4295 PHINode
*P
= getConstantEvolvingPHI(I
->getOperand(Op
), L
);
4296 if (P
== 0) return 0; // Not evolving from PHI
4300 return 0; // Evolving from multiple different PHIs.
4303 // This is a expression evolving from a constant PHI!
4307 /// EvaluateExpression - Given an expression that passes the
4308 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
4309 /// in the loop has the value PHIVal. If we can't fold this expression for some
4310 /// reason, return null.
4311 static Constant
*EvaluateExpression(Value
*V
, Constant
*PHIVal
,
4312 const TargetData
*TD
) {
4313 if (isa
<PHINode
>(V
)) return PHIVal
;
4314 if (Constant
*C
= dyn_cast
<Constant
>(V
)) return C
;
4315 Instruction
*I
= cast
<Instruction
>(V
);
4317 std::vector
<Constant
*> Operands(I
->getNumOperands());
4319 for (unsigned i
= 0, e
= I
->getNumOperands(); i
!= e
; ++i
) {
4320 Operands
[i
] = EvaluateExpression(I
->getOperand(i
), PHIVal
, TD
);
4321 if (Operands
[i
] == 0) return 0;
4324 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(I
))
4325 return ConstantFoldCompareInstOperands(CI
->getPredicate(), Operands
[0],
4327 return ConstantFoldInstOperands(I
->getOpcode(), I
->getType(),
4328 &Operands
[0], Operands
.size(), TD
);
4331 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
4332 /// in the header of its containing loop, we know the loop executes a
4333 /// constant number of times, and the PHI node is just a recurrence
4334 /// involving constants, fold it.
4336 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode
*PN
,
4339 std::map
<PHINode
*, Constant
*>::const_iterator I
=
4340 ConstantEvolutionLoopExitValue
.find(PN
);
4341 if (I
!= ConstantEvolutionLoopExitValue
.end())
4344 if (BEs
.ugt(MaxBruteForceIterations
))
4345 return ConstantEvolutionLoopExitValue
[PN
] = 0; // Not going to evaluate it.
4347 Constant
*&RetVal
= ConstantEvolutionLoopExitValue
[PN
];
4349 // Since the loop is canonicalized, the PHI node must have two entries. One
4350 // entry must be a constant (coming in from outside of the loop), and the
4351 // second must be derived from the same PHI.
4352 bool SecondIsBackedge
= L
->contains(PN
->getIncomingBlock(1));
4353 Constant
*StartCST
=
4354 dyn_cast
<Constant
>(PN
->getIncomingValue(!SecondIsBackedge
));
4356 return RetVal
= 0; // Must be a constant.
4358 Value
*BEValue
= PN
->getIncomingValue(SecondIsBackedge
);
4359 if (getConstantEvolvingPHI(BEValue
, L
) != PN
&&
4360 !isa
<Constant
>(BEValue
))
4361 return RetVal
= 0; // Not derived from same PHI.
4363 // Execute the loop symbolically to determine the exit value.
4364 if (BEs
.getActiveBits() >= 32)
4365 return RetVal
= 0; // More than 2^32-1 iterations?? Not doing it!
4367 unsigned NumIterations
= BEs
.getZExtValue(); // must be in range
4368 unsigned IterationNum
= 0;
4369 for (Constant
*PHIVal
= StartCST
; ; ++IterationNum
) {
4370 if (IterationNum
== NumIterations
)
4371 return RetVal
= PHIVal
; // Got exit value!
4373 // Compute the value of the PHI node for the next iteration.
4374 Constant
*NextPHI
= EvaluateExpression(BEValue
, PHIVal
, TD
);
4375 if (NextPHI
== PHIVal
)
4376 return RetVal
= NextPHI
; // Stopped evolving!
4378 return 0; // Couldn't evaluate!
4383 /// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
4384 /// constant number of times (the condition evolves only from constants),
4385 /// try to evaluate a few iterations of the loop until we get the exit
4386 /// condition gets a value of ExitWhen (true or false). If we cannot
4387 /// evaluate the trip count of the loop, return getCouldNotCompute().
4389 ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop
*L
,
4392 PHINode
*PN
= getConstantEvolvingPHI(Cond
, L
);
4393 if (PN
== 0) return getCouldNotCompute();
4395 // If the loop is canonicalized, the PHI will have exactly two entries.
4396 // That's the only form we support here.
4397 if (PN
->getNumIncomingValues() != 2) return getCouldNotCompute();
4399 // One entry must be a constant (coming in from outside of the loop), and the
4400 // second must be derived from the same PHI.
4401 bool SecondIsBackedge
= L
->contains(PN
->getIncomingBlock(1));
4402 Constant
*StartCST
=
4403 dyn_cast
<Constant
>(PN
->getIncomingValue(!SecondIsBackedge
));
4404 if (StartCST
== 0) return getCouldNotCompute(); // Must be a constant.
4406 Value
*BEValue
= PN
->getIncomingValue(SecondIsBackedge
);
4407 if (getConstantEvolvingPHI(BEValue
, L
) != PN
&&
4408 !isa
<Constant
>(BEValue
))
4409 return getCouldNotCompute(); // Not derived from same PHI.
4411 // Okay, we find a PHI node that defines the trip count of this loop. Execute
4412 // the loop symbolically to determine when the condition gets a value of
4414 unsigned IterationNum
= 0;
4415 unsigned MaxIterations
= MaxBruteForceIterations
; // Limit analysis.
4416 for (Constant
*PHIVal
= StartCST
;
4417 IterationNum
!= MaxIterations
; ++IterationNum
) {
4418 ConstantInt
*CondVal
=
4419 dyn_cast_or_null
<ConstantInt
>(EvaluateExpression(Cond
, PHIVal
, TD
));
4421 // Couldn't symbolically evaluate.
4422 if (!CondVal
) return getCouldNotCompute();
4424 if (CondVal
->getValue() == uint64_t(ExitWhen
)) {
4425 ++NumBruteForceTripCountsComputed
;
4426 return getConstant(Type::getInt32Ty(getContext()), IterationNum
);
4429 // Compute the value of the PHI node for the next iteration.
4430 Constant
*NextPHI
= EvaluateExpression(BEValue
, PHIVal
, TD
);
4431 if (NextPHI
== 0 || NextPHI
== PHIVal
)
4432 return getCouldNotCompute();// Couldn't evaluate or not making progress...
4436 // Too many iterations were needed to evaluate.
4437 return getCouldNotCompute();
4440 /// getSCEVAtScope - Return a SCEV expression for the specified value
4441 /// at the specified scope in the program. The L value specifies a loop
4442 /// nest to evaluate the expression at, where null is the top-level or a
4443 /// specified loop is immediately inside of the loop.
4445 /// This method can be used to compute the exit value for a variable defined
4446 /// in a loop by querying what the value will hold in the parent loop.
4448 /// In the case that a relevant loop exit value cannot be computed, the
4449 /// original value V is returned.
4450 const SCEV
*ScalarEvolution::getSCEVAtScope(const SCEV
*V
, const Loop
*L
) {
4451 // Check to see if we've folded this expression at this loop before.
4452 std::map
<const Loop
*, const SCEV
*> &Values
= ValuesAtScopes
[V
];
4453 std::pair
<std::map
<const Loop
*, const SCEV
*>::iterator
, bool> Pair
=
4454 Values
.insert(std::make_pair(L
, static_cast<const SCEV
*>(0)));
4456 return Pair
.first
->second
? Pair
.first
->second
: V
;
4458 // Otherwise compute it.
4459 const SCEV
*C
= computeSCEVAtScope(V
, L
);
4460 ValuesAtScopes
[V
][L
] = C
;
4464 const SCEV
*ScalarEvolution::computeSCEVAtScope(const SCEV
*V
, const Loop
*L
) {
4465 if (isa
<SCEVConstant
>(V
)) return V
;
4467 // If this instruction is evolved from a constant-evolving PHI, compute the
4468 // exit value from the loop without using SCEVs.
4469 if (const SCEVUnknown
*SU
= dyn_cast
<SCEVUnknown
>(V
)) {
4470 if (Instruction
*I
= dyn_cast
<Instruction
>(SU
->getValue())) {
4471 const Loop
*LI
= (*this->LI
)[I
->getParent()];
4472 if (LI
&& LI
->getParentLoop() == L
) // Looking for loop exit value.
4473 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
))
4474 if (PN
->getParent() == LI
->getHeader()) {
4475 // Okay, there is no closed form solution for the PHI node. Check
4476 // to see if the loop that contains it has a known backedge-taken
4477 // count. If so, we may be able to force computation of the exit
4479 const SCEV
*BackedgeTakenCount
= getBackedgeTakenCount(LI
);
4480 if (const SCEVConstant
*BTCC
=
4481 dyn_cast
<SCEVConstant
>(BackedgeTakenCount
)) {
4482 // Okay, we know how many times the containing loop executes. If
4483 // this is a constant evolving PHI node, get the final value at
4484 // the specified iteration number.
4485 Constant
*RV
= getConstantEvolutionLoopExitValue(PN
,
4486 BTCC
->getValue()->getValue(),
4488 if (RV
) return getSCEV(RV
);
4492 // Okay, this is an expression that we cannot symbolically evaluate
4493 // into a SCEV. Check to see if it's possible to symbolically evaluate
4494 // the arguments into constants, and if so, try to constant propagate the
4495 // result. This is particularly useful for computing loop exit values.
4496 if (CanConstantFold(I
)) {
4497 SmallVector
<Constant
*, 4> Operands
;
4498 bool MadeImprovement
= false;
4499 for (unsigned i
= 0, e
= I
->getNumOperands(); i
!= e
; ++i
) {
4500 Value
*Op
= I
->getOperand(i
);
4501 if (Constant
*C
= dyn_cast
<Constant
>(Op
)) {
4502 Operands
.push_back(C
);
4506 // If any of the operands is non-constant and if they are
4507 // non-integer and non-pointer, don't even try to analyze them
4508 // with scev techniques.
4509 if (!isSCEVable(Op
->getType()))
4512 const SCEV
*OrigV
= getSCEV(Op
);
4513 const SCEV
*OpV
= getSCEVAtScope(OrigV
, L
);
4514 MadeImprovement
|= OrigV
!= OpV
;
4517 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(OpV
))
4519 if (const SCEVUnknown
*SU
= dyn_cast
<SCEVUnknown
>(OpV
))
4520 C
= dyn_cast
<Constant
>(SU
->getValue());
4522 if (C
->getType() != Op
->getType())
4523 C
= ConstantExpr::getCast(CastInst::getCastOpcode(C
, false,
4527 Operands
.push_back(C
);
4530 // Check to see if getSCEVAtScope actually made an improvement.
4531 if (MadeImprovement
) {
4533 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(I
))
4534 C
= ConstantFoldCompareInstOperands(CI
->getPredicate(),
4535 Operands
[0], Operands
[1], TD
);
4537 C
= ConstantFoldInstOperands(I
->getOpcode(), I
->getType(),
4538 &Operands
[0], Operands
.size(), TD
);
4545 // This is some other type of SCEVUnknown, just return it.
4549 if (const SCEVCommutativeExpr
*Comm
= dyn_cast
<SCEVCommutativeExpr
>(V
)) {
4550 // Avoid performing the look-up in the common case where the specified
4551 // expression has no loop-variant portions.
4552 for (unsigned i
= 0, e
= Comm
->getNumOperands(); i
!= e
; ++i
) {
4553 const SCEV
*OpAtScope
= getSCEVAtScope(Comm
->getOperand(i
), L
);
4554 if (OpAtScope
!= Comm
->getOperand(i
)) {
4555 // Okay, at least one of these operands is loop variant but might be
4556 // foldable. Build a new instance of the folded commutative expression.
4557 SmallVector
<const SCEV
*, 8> NewOps(Comm
->op_begin(),
4558 Comm
->op_begin()+i
);
4559 NewOps
.push_back(OpAtScope
);
4561 for (++i
; i
!= e
; ++i
) {
4562 OpAtScope
= getSCEVAtScope(Comm
->getOperand(i
), L
);
4563 NewOps
.push_back(OpAtScope
);
4565 if (isa
<SCEVAddExpr
>(Comm
))
4566 return getAddExpr(NewOps
);
4567 if (isa
<SCEVMulExpr
>(Comm
))
4568 return getMulExpr(NewOps
);
4569 if (isa
<SCEVSMaxExpr
>(Comm
))
4570 return getSMaxExpr(NewOps
);
4571 if (isa
<SCEVUMaxExpr
>(Comm
))
4572 return getUMaxExpr(NewOps
);
4573 llvm_unreachable("Unknown commutative SCEV type!");
4576 // If we got here, all operands are loop invariant.
4580 if (const SCEVUDivExpr
*Div
= dyn_cast
<SCEVUDivExpr
>(V
)) {
4581 const SCEV
*LHS
= getSCEVAtScope(Div
->getLHS(), L
);
4582 const SCEV
*RHS
= getSCEVAtScope(Div
->getRHS(), L
);
4583 if (LHS
== Div
->getLHS() && RHS
== Div
->getRHS())
4584 return Div
; // must be loop invariant
4585 return getUDivExpr(LHS
, RHS
);
4588 // If this is a loop recurrence for a loop that does not contain L, then we
4589 // are dealing with the final value computed by the loop.
4590 if (const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(V
)) {
4591 // First, attempt to evaluate each operand.
4592 // Avoid performing the look-up in the common case where the specified
4593 // expression has no loop-variant portions.
4594 for (unsigned i
= 0, e
= AddRec
->getNumOperands(); i
!= e
; ++i
) {
4595 const SCEV
*OpAtScope
= getSCEVAtScope(AddRec
->getOperand(i
), L
);
4596 if (OpAtScope
== AddRec
->getOperand(i
))
4599 // Okay, at least one of these operands is loop variant but might be
4600 // foldable. Build a new instance of the folded commutative expression.
4601 SmallVector
<const SCEV
*, 8> NewOps(AddRec
->op_begin(),
4602 AddRec
->op_begin()+i
);
4603 NewOps
.push_back(OpAtScope
);
4604 for (++i
; i
!= e
; ++i
)
4605 NewOps
.push_back(getSCEVAtScope(AddRec
->getOperand(i
), L
));
4607 AddRec
= cast
<SCEVAddRecExpr
>(getAddRecExpr(NewOps
, AddRec
->getLoop()));
4611 // If the scope is outside the addrec's loop, evaluate it by using the
4612 // loop exit value of the addrec.
4613 if (!AddRec
->getLoop()->contains(L
)) {
4614 // To evaluate this recurrence, we need to know how many times the AddRec
4615 // loop iterates. Compute this now.
4616 const SCEV
*BackedgeTakenCount
= getBackedgeTakenCount(AddRec
->getLoop());
4617 if (BackedgeTakenCount
== getCouldNotCompute()) return AddRec
;
4619 // Then, evaluate the AddRec.
4620 return AddRec
->evaluateAtIteration(BackedgeTakenCount
, *this);
4626 if (const SCEVZeroExtendExpr
*Cast
= dyn_cast
<SCEVZeroExtendExpr
>(V
)) {
4627 const SCEV
*Op
= getSCEVAtScope(Cast
->getOperand(), L
);
4628 if (Op
== Cast
->getOperand())
4629 return Cast
; // must be loop invariant
4630 return getZeroExtendExpr(Op
, Cast
->getType());
4633 if (const SCEVSignExtendExpr
*Cast
= dyn_cast
<SCEVSignExtendExpr
>(V
)) {
4634 const SCEV
*Op
= getSCEVAtScope(Cast
->getOperand(), L
);
4635 if (Op
== Cast
->getOperand())
4636 return Cast
; // must be loop invariant
4637 return getSignExtendExpr(Op
, Cast
->getType());
4640 if (const SCEVTruncateExpr
*Cast
= dyn_cast
<SCEVTruncateExpr
>(V
)) {
4641 const SCEV
*Op
= getSCEVAtScope(Cast
->getOperand(), L
);
4642 if (Op
== Cast
->getOperand())
4643 return Cast
; // must be loop invariant
4644 return getTruncateExpr(Op
, Cast
->getType());
4647 llvm_unreachable("Unknown SCEV type!");
4651 /// getSCEVAtScope - This is a convenience function which does
4652 /// getSCEVAtScope(getSCEV(V), L).
4653 const SCEV
*ScalarEvolution::getSCEVAtScope(Value
*V
, const Loop
*L
) {
4654 return getSCEVAtScope(getSCEV(V
), L
);
4657 /// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
4658 /// following equation:
4660 /// A * X = B (mod N)
4662 /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
4663 /// A and B isn't important.
4665 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
4666 static const SCEV
*SolveLinEquationWithOverflow(const APInt
&A
, const APInt
&B
,
4667 ScalarEvolution
&SE
) {
4668 uint32_t BW
= A
.getBitWidth();
4669 assert(BW
== B
.getBitWidth() && "Bit widths must be the same.");
4670 assert(A
!= 0 && "A must be non-zero.");
4674 // The gcd of A and N may have only one prime factor: 2. The number of
4675 // trailing zeros in A is its multiplicity
4676 uint32_t Mult2
= A
.countTrailingZeros();
4679 // 2. Check if B is divisible by D.
4681 // B is divisible by D if and only if the multiplicity of prime factor 2 for B
4682 // is not less than multiplicity of this prime factor for D.
4683 if (B
.countTrailingZeros() < Mult2
)
4684 return SE
.getCouldNotCompute();
4686 // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
4689 // (N / D) may need BW+1 bits in its representation. Hence, we'll use this
4690 // bit width during computations.
4691 APInt AD
= A
.lshr(Mult2
).zext(BW
+ 1); // AD = A / D
4692 APInt
Mod(BW
+ 1, 0);
4693 Mod
.set(BW
- Mult2
); // Mod = N / D
4694 APInt I
= AD
.multiplicativeInverse(Mod
);
4696 // 4. Compute the minimum unsigned root of the equation:
4697 // I * (B / D) mod (N / D)
4698 APInt Result
= (I
* B
.lshr(Mult2
).zext(BW
+ 1)).urem(Mod
);
4700 // The result is guaranteed to be less than 2^BW so we may truncate it to BW
4702 return SE
.getConstant(Result
.trunc(BW
));
4705 /// SolveQuadraticEquation - Find the roots of the quadratic equation for the
4706 /// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
4707 /// might be the same) or two SCEVCouldNotCompute objects.
4709 static std::pair
<const SCEV
*,const SCEV
*>
4710 SolveQuadraticEquation(const SCEVAddRecExpr
*AddRec
, ScalarEvolution
&SE
) {
4711 assert(AddRec
->getNumOperands() == 3 && "This is not a quadratic chrec!");
4712 const SCEVConstant
*LC
= dyn_cast
<SCEVConstant
>(AddRec
->getOperand(0));
4713 const SCEVConstant
*MC
= dyn_cast
<SCEVConstant
>(AddRec
->getOperand(1));
4714 const SCEVConstant
*NC
= dyn_cast
<SCEVConstant
>(AddRec
->getOperand(2));
4716 // We currently can only solve this if the coefficients are constants.
4717 if (!LC
|| !MC
|| !NC
) {
4718 const SCEV
*CNC
= SE
.getCouldNotCompute();
4719 return std::make_pair(CNC
, CNC
);
4722 uint32_t BitWidth
= LC
->getValue()->getValue().getBitWidth();
4723 const APInt
&L
= LC
->getValue()->getValue();
4724 const APInt
&M
= MC
->getValue()->getValue();
4725 const APInt
&N
= NC
->getValue()->getValue();
4726 APInt
Two(BitWidth
, 2);
4727 APInt
Four(BitWidth
, 4);
4730 using namespace APIntOps
;
4732 // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
4733 // The B coefficient is M-N/2
4737 // The A coefficient is N/2
4738 APInt
A(N
.sdiv(Two
));
4740 // Compute the B^2-4ac term.
4743 SqrtTerm
-= Four
* (A
* C
);
4745 // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
4746 // integer value or else APInt::sqrt() will assert.
4747 APInt
SqrtVal(SqrtTerm
.sqrt());
4749 // Compute the two solutions for the quadratic formula.
4750 // The divisions must be performed as signed divisions.
4752 APInt
TwoA( A
<< 1 );
4753 if (TwoA
.isMinValue()) {
4754 const SCEV
*CNC
= SE
.getCouldNotCompute();
4755 return std::make_pair(CNC
, CNC
);
4758 LLVMContext
&Context
= SE
.getContext();
4760 ConstantInt
*Solution1
=
4761 ConstantInt::get(Context
, (NegB
+ SqrtVal
).sdiv(TwoA
));
4762 ConstantInt
*Solution2
=
4763 ConstantInt::get(Context
, (NegB
- SqrtVal
).sdiv(TwoA
));
4765 return std::make_pair(SE
.getConstant(Solution1
),
4766 SE
.getConstant(Solution2
));
4767 } // end APIntOps namespace
4770 /// HowFarToZero - Return the number of times a backedge comparing the specified
4771 /// value to zero will execute. If not computable, return CouldNotCompute.
4772 ScalarEvolution::BackedgeTakenInfo
4773 ScalarEvolution::HowFarToZero(const SCEV
*V
, const Loop
*L
) {
4774 // If the value is a constant
4775 if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(V
)) {
4776 // If the value is already zero, the branch will execute zero times.
4777 if (C
->getValue()->isZero()) return C
;
4778 return getCouldNotCompute(); // Otherwise it will loop infinitely.
4781 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(V
);
4782 if (!AddRec
|| AddRec
->getLoop() != L
)
4783 return getCouldNotCompute();
4785 if (AddRec
->isAffine()) {
4786 // If this is an affine expression, the execution count of this branch is
4787 // the minimum unsigned root of the following equation:
4789 // Start + Step*N = 0 (mod 2^BW)
4793 // Step*N = -Start (mod 2^BW)
4795 // where BW is the common bit width of Start and Step.
4797 // Get the initial value for the loop.
4798 const SCEV
*Start
= getSCEVAtScope(AddRec
->getStart(),
4799 L
->getParentLoop());
4800 const SCEV
*Step
= getSCEVAtScope(AddRec
->getOperand(1),
4801 L
->getParentLoop());
4803 if (const SCEVConstant
*StepC
= dyn_cast
<SCEVConstant
>(Step
)) {
4804 // For now we handle only constant steps.
4806 // First, handle unitary steps.
4807 if (StepC
->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so:
4808 return getNegativeSCEV(Start
); // N = -Start (as unsigned)
4809 if (StepC
->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so:
4810 return Start
; // N = Start (as unsigned)
4812 // Then, try to solve the above equation provided that Start is constant.
4813 if (const SCEVConstant
*StartC
= dyn_cast
<SCEVConstant
>(Start
))
4814 return SolveLinEquationWithOverflow(StepC
->getValue()->getValue(),
4815 -StartC
->getValue()->getValue(),
4818 } else if (AddRec
->isQuadratic() && AddRec
->getType()->isIntegerTy()) {
4819 // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
4820 // the quadratic equation to solve it.
4821 std::pair
<const SCEV
*,const SCEV
*> Roots
= SolveQuadraticEquation(AddRec
,
4823 const SCEVConstant
*R1
= dyn_cast
<SCEVConstant
>(Roots
.first
);
4824 const SCEVConstant
*R2
= dyn_cast
<SCEVConstant
>(Roots
.second
);
4827 dbgs() << "HFTZ: " << *V
<< " - sol#1: " << *R1
4828 << " sol#2: " << *R2
<< "\n";
4830 // Pick the smallest positive root value.
4831 if (ConstantInt
*CB
=
4832 dyn_cast
<ConstantInt
>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT
,
4833 R1
->getValue(), R2
->getValue()))) {
4834 if (CB
->getZExtValue() == false)
4835 std::swap(R1
, R2
); // R1 is the minimum root now.
4837 // We can only use this value if the chrec ends up with an exact zero
4838 // value at this index. When solving for "X*X != 5", for example, we
4839 // should not accept a root of 2.
4840 const SCEV
*Val
= AddRec
->evaluateAtIteration(R1
, *this);
4842 return R1
; // We found a quadratic root!
4847 return getCouldNotCompute();
4850 /// HowFarToNonZero - Return the number of times a backedge checking the
4851 /// specified value for nonzero will execute. If not computable, return
4853 ScalarEvolution::BackedgeTakenInfo
4854 ScalarEvolution::HowFarToNonZero(const SCEV
*V
, const Loop
*L
) {
4855 // Loops that look like: while (X == 0) are very strange indeed. We don't
4856 // handle them yet except for the trivial case. This could be expanded in the
4857 // future as needed.
4859 // If the value is a constant, check to see if it is known to be non-zero
4860 // already. If so, the backedge will execute zero times.
4861 if (const SCEVConstant
*C
= dyn_cast
<SCEVConstant
>(V
)) {
4862 if (!C
->getValue()->isNullValue())
4863 return getConstant(C
->getType(), 0);
4864 return getCouldNotCompute(); // Otherwise it will loop infinitely.
4867 // We could implement others, but I really doubt anyone writes loops like
4868 // this, and if they did, they would already be constant folded.
4869 return getCouldNotCompute();
4872 /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
4873 /// (which may not be an immediate predecessor) which has exactly one
4874 /// successor from which BB is reachable, or null if no such block is
4877 std::pair
<BasicBlock
*, BasicBlock
*>
4878 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock
*BB
) {
4879 // If the block has a unique predecessor, then there is no path from the
4880 // predecessor to the block that does not go through the direct edge
4881 // from the predecessor to the block.
4882 if (BasicBlock
*Pred
= BB
->getSinglePredecessor())
4883 return std::make_pair(Pred
, BB
);
4885 // A loop's header is defined to be a block that dominates the loop.
4886 // If the header has a unique predecessor outside the loop, it must be
4887 // a block that has exactly one successor that can reach the loop.
4888 if (Loop
*L
= LI
->getLoopFor(BB
))
4889 return std::make_pair(L
->getLoopPredecessor(), L
->getHeader());
4891 return std::pair
<BasicBlock
*, BasicBlock
*>();
4894 /// HasSameValue - SCEV structural equivalence is usually sufficient for
4895 /// testing whether two expressions are equal, however for the purposes of
4896 /// looking for a condition guarding a loop, it can be useful to be a little
4897 /// more general, since a front-end may have replicated the controlling
4900 static bool HasSameValue(const SCEV
*A
, const SCEV
*B
) {
4901 // Quick check to see if they are the same SCEV.
4902 if (A
== B
) return true;
4904 // Otherwise, if they're both SCEVUnknown, it's possible that they hold
4905 // two different instructions with the same value. Check for this case.
4906 if (const SCEVUnknown
*AU
= dyn_cast
<SCEVUnknown
>(A
))
4907 if (const SCEVUnknown
*BU
= dyn_cast
<SCEVUnknown
>(B
))
4908 if (const Instruction
*AI
= dyn_cast
<Instruction
>(AU
->getValue()))
4909 if (const Instruction
*BI
= dyn_cast
<Instruction
>(BU
->getValue()))
4910 if (AI
->isIdenticalTo(BI
) && !AI
->mayReadFromMemory())
4913 // Otherwise assume they may have a different value.
4917 /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
4918 /// predicate Pred. Return true iff any changes were made.
4920 bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate
&Pred
,
4921 const SCEV
*&LHS
, const SCEV
*&RHS
) {
4922 bool Changed
= false;
4924 // Canonicalize a constant to the right side.
4925 if (const SCEVConstant
*LHSC
= dyn_cast
<SCEVConstant
>(LHS
)) {
4926 // Check for both operands constant.
4927 if (const SCEVConstant
*RHSC
= dyn_cast
<SCEVConstant
>(RHS
)) {
4928 if (ConstantExpr::getICmp(Pred
,
4930 RHSC
->getValue())->isNullValue())
4931 goto trivially_false
;
4933 goto trivially_true
;
4935 // Otherwise swap the operands to put the constant on the right.
4936 std::swap(LHS
, RHS
);
4937 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4941 // If we're comparing an addrec with a value which is loop-invariant in the
4942 // addrec's loop, put the addrec on the left. Also make a dominance check,
4943 // as both operands could be addrecs loop-invariant in each other's loop.
4944 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(RHS
)) {
4945 const Loop
*L
= AR
->getLoop();
4946 if (LHS
->isLoopInvariant(L
) && LHS
->properlyDominates(L
->getHeader(), DT
)) {
4947 std::swap(LHS
, RHS
);
4948 Pred
= ICmpInst::getSwappedPredicate(Pred
);
4953 // If there's a constant operand, canonicalize comparisons with boundary
4954 // cases, and canonicalize *-or-equal comparisons to regular comparisons.
4955 if (const SCEVConstant
*RC
= dyn_cast
<SCEVConstant
>(RHS
)) {
4956 const APInt
&RA
= RC
->getValue()->getValue();
4958 default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
4959 case ICmpInst::ICMP_EQ
:
4960 case ICmpInst::ICMP_NE
:
4962 case ICmpInst::ICMP_UGE
:
4963 if ((RA
- 1).isMinValue()) {
4964 Pred
= ICmpInst::ICMP_NE
;
4965 RHS
= getConstant(RA
- 1);
4969 if (RA
.isMaxValue()) {
4970 Pred
= ICmpInst::ICMP_EQ
;
4974 if (RA
.isMinValue()) goto trivially_true
;
4976 Pred
= ICmpInst::ICMP_UGT
;
4977 RHS
= getConstant(RA
- 1);
4980 case ICmpInst::ICMP_ULE
:
4981 if ((RA
+ 1).isMaxValue()) {
4982 Pred
= ICmpInst::ICMP_NE
;
4983 RHS
= getConstant(RA
+ 1);
4987 if (RA
.isMinValue()) {
4988 Pred
= ICmpInst::ICMP_EQ
;
4992 if (RA
.isMaxValue()) goto trivially_true
;
4994 Pred
= ICmpInst::ICMP_ULT
;
4995 RHS
= getConstant(RA
+ 1);
4998 case ICmpInst::ICMP_SGE
:
4999 if ((RA
- 1).isMinSignedValue()) {
5000 Pred
= ICmpInst::ICMP_NE
;
5001 RHS
= getConstant(RA
- 1);
5005 if (RA
.isMaxSignedValue()) {
5006 Pred
= ICmpInst::ICMP_EQ
;
5010 if (RA
.isMinSignedValue()) goto trivially_true
;
5012 Pred
= ICmpInst::ICMP_SGT
;
5013 RHS
= getConstant(RA
- 1);
5016 case ICmpInst::ICMP_SLE
:
5017 if ((RA
+ 1).isMaxSignedValue()) {
5018 Pred
= ICmpInst::ICMP_NE
;
5019 RHS
= getConstant(RA
+ 1);
5023 if (RA
.isMinSignedValue()) {
5024 Pred
= ICmpInst::ICMP_EQ
;
5028 if (RA
.isMaxSignedValue()) goto trivially_true
;
5030 Pred
= ICmpInst::ICMP_SLT
;
5031 RHS
= getConstant(RA
+ 1);
5034 case ICmpInst::ICMP_UGT
:
5035 if (RA
.isMinValue()) {
5036 Pred
= ICmpInst::ICMP_NE
;
5040 if ((RA
+ 1).isMaxValue()) {
5041 Pred
= ICmpInst::ICMP_EQ
;
5042 RHS
= getConstant(RA
+ 1);
5046 if (RA
.isMaxValue()) goto trivially_false
;
5048 case ICmpInst::ICMP_ULT
:
5049 if (RA
.isMaxValue()) {
5050 Pred
= ICmpInst::ICMP_NE
;
5054 if ((RA
- 1).isMinValue()) {
5055 Pred
= ICmpInst::ICMP_EQ
;
5056 RHS
= getConstant(RA
- 1);
5060 if (RA
.isMinValue()) goto trivially_false
;
5062 case ICmpInst::ICMP_SGT
:
5063 if (RA
.isMinSignedValue()) {
5064 Pred
= ICmpInst::ICMP_NE
;
5068 if ((RA
+ 1).isMaxSignedValue()) {
5069 Pred
= ICmpInst::ICMP_EQ
;
5070 RHS
= getConstant(RA
+ 1);
5074 if (RA
.isMaxSignedValue()) goto trivially_false
;
5076 case ICmpInst::ICMP_SLT
:
5077 if (RA
.isMaxSignedValue()) {
5078 Pred
= ICmpInst::ICMP_NE
;
5082 if ((RA
- 1).isMinSignedValue()) {
5083 Pred
= ICmpInst::ICMP_EQ
;
5084 RHS
= getConstant(RA
- 1);
5088 if (RA
.isMinSignedValue()) goto trivially_false
;
5093 // Check for obvious equality.
5094 if (HasSameValue(LHS
, RHS
)) {
5095 if (ICmpInst::isTrueWhenEqual(Pred
))
5096 goto trivially_true
;
5097 if (ICmpInst::isFalseWhenEqual(Pred
))
5098 goto trivially_false
;
5101 // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
5102 // adding or subtracting 1 from one of the operands.
5104 case ICmpInst::ICMP_SLE
:
5105 if (!getSignedRange(RHS
).getSignedMax().isMaxSignedValue()) {
5106 RHS
= getAddExpr(getConstant(RHS
->getType(), 1, true), RHS
,
5107 /*HasNUW=*/false, /*HasNSW=*/true);
5108 Pred
= ICmpInst::ICMP_SLT
;
5110 } else if (!getSignedRange(LHS
).getSignedMin().isMinSignedValue()) {
5111 LHS
= getAddExpr(getConstant(RHS
->getType(), (uint64_t)-1, true), LHS
,
5112 /*HasNUW=*/false, /*HasNSW=*/true);
5113 Pred
= ICmpInst::ICMP_SLT
;
5117 case ICmpInst::ICMP_SGE
:
5118 if (!getSignedRange(RHS
).getSignedMin().isMinSignedValue()) {
5119 RHS
= getAddExpr(getConstant(RHS
->getType(), (uint64_t)-1, true), RHS
,
5120 /*HasNUW=*/false, /*HasNSW=*/true);
5121 Pred
= ICmpInst::ICMP_SGT
;
5123 } else if (!getSignedRange(LHS
).getSignedMax().isMaxSignedValue()) {
5124 LHS
= getAddExpr(getConstant(RHS
->getType(), 1, true), LHS
,
5125 /*HasNUW=*/false, /*HasNSW=*/true);
5126 Pred
= ICmpInst::ICMP_SGT
;
5130 case ICmpInst::ICMP_ULE
:
5131 if (!getUnsignedRange(RHS
).getUnsignedMax().isMaxValue()) {
5132 RHS
= getAddExpr(getConstant(RHS
->getType(), 1, true), RHS
,
5133 /*HasNUW=*/true, /*HasNSW=*/false);
5134 Pred
= ICmpInst::ICMP_ULT
;
5136 } else if (!getUnsignedRange(LHS
).getUnsignedMin().isMinValue()) {
5137 LHS
= getAddExpr(getConstant(RHS
->getType(), (uint64_t)-1, true), LHS
,
5138 /*HasNUW=*/true, /*HasNSW=*/false);
5139 Pred
= ICmpInst::ICMP_ULT
;
5143 case ICmpInst::ICMP_UGE
:
5144 if (!getUnsignedRange(RHS
).getUnsignedMin().isMinValue()) {
5145 RHS
= getAddExpr(getConstant(RHS
->getType(), (uint64_t)-1, true), RHS
,
5146 /*HasNUW=*/true, /*HasNSW=*/false);
5147 Pred
= ICmpInst::ICMP_UGT
;
5149 } else if (!getUnsignedRange(LHS
).getUnsignedMax().isMaxValue()) {
5150 LHS
= getAddExpr(getConstant(RHS
->getType(), 1, true), LHS
,
5151 /*HasNUW=*/true, /*HasNSW=*/false);
5152 Pred
= ICmpInst::ICMP_UGT
;
5160 // TODO: More simplifications are possible here.
5166 LHS
= RHS
= getConstant(Type::getInt1Ty(getContext()), 0);
5167 Pred
= ICmpInst::ICMP_EQ
;
5172 LHS
= RHS
= getConstant(Type::getInt1Ty(getContext()), 0);
5173 Pred
= ICmpInst::ICMP_NE
;
5177 bool ScalarEvolution::isKnownNegative(const SCEV
*S
) {
5178 return getSignedRange(S
).getSignedMax().isNegative();
5181 bool ScalarEvolution::isKnownPositive(const SCEV
*S
) {
5182 return getSignedRange(S
).getSignedMin().isStrictlyPositive();
5185 bool ScalarEvolution::isKnownNonNegative(const SCEV
*S
) {
5186 return !getSignedRange(S
).getSignedMin().isNegative();
5189 bool ScalarEvolution::isKnownNonPositive(const SCEV
*S
) {
5190 return !getSignedRange(S
).getSignedMax().isStrictlyPositive();
5193 bool ScalarEvolution::isKnownNonZero(const SCEV
*S
) {
5194 return isKnownNegative(S
) || isKnownPositive(S
);
5197 bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred
,
5198 const SCEV
*LHS
, const SCEV
*RHS
) {
5199 // Canonicalize the inputs first.
5200 (void)SimplifyICmpOperands(Pred
, LHS
, RHS
);
5202 // If LHS or RHS is an addrec, check to see if the condition is true in
5203 // every iteration of the loop.
5204 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(LHS
))
5205 if (isLoopEntryGuardedByCond(
5206 AR
->getLoop(), Pred
, AR
->getStart(), RHS
) &&
5207 isLoopBackedgeGuardedByCond(
5208 AR
->getLoop(), Pred
, AR
->getPostIncExpr(*this), RHS
))
5210 if (const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(RHS
))
5211 if (isLoopEntryGuardedByCond(
5212 AR
->getLoop(), Pred
, LHS
, AR
->getStart()) &&
5213 isLoopBackedgeGuardedByCond(
5214 AR
->getLoop(), Pred
, LHS
, AR
->getPostIncExpr(*this)))
5217 // Otherwise see what can be done with known constant ranges.
5218 return isKnownPredicateWithRanges(Pred
, LHS
, RHS
);
5222 ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred
,
5223 const SCEV
*LHS
, const SCEV
*RHS
) {
5224 if (HasSameValue(LHS
, RHS
))
5225 return ICmpInst::isTrueWhenEqual(Pred
);
5227 // This code is split out from isKnownPredicate because it is called from
5228 // within isLoopEntryGuardedByCond.
5231 llvm_unreachable("Unexpected ICmpInst::Predicate value!");
5233 case ICmpInst::ICMP_SGT
:
5234 Pred
= ICmpInst::ICMP_SLT
;
5235 std::swap(LHS
, RHS
);
5236 case ICmpInst::ICMP_SLT
: {
5237 ConstantRange LHSRange
= getSignedRange(LHS
);
5238 ConstantRange RHSRange
= getSignedRange(RHS
);
5239 if (LHSRange
.getSignedMax().slt(RHSRange
.getSignedMin()))
5241 if (LHSRange
.getSignedMin().sge(RHSRange
.getSignedMax()))
5245 case ICmpInst::ICMP_SGE
:
5246 Pred
= ICmpInst::ICMP_SLE
;
5247 std::swap(LHS
, RHS
);
5248 case ICmpInst::ICMP_SLE
: {
5249 ConstantRange LHSRange
= getSignedRange(LHS
);
5250 ConstantRange RHSRange
= getSignedRange(RHS
);
5251 if (LHSRange
.getSignedMax().sle(RHSRange
.getSignedMin()))
5253 if (LHSRange
.getSignedMin().sgt(RHSRange
.getSignedMax()))
5257 case ICmpInst::ICMP_UGT
:
5258 Pred
= ICmpInst::ICMP_ULT
;
5259 std::swap(LHS
, RHS
);
5260 case ICmpInst::ICMP_ULT
: {
5261 ConstantRange LHSRange
= getUnsignedRange(LHS
);
5262 ConstantRange RHSRange
= getUnsignedRange(RHS
);
5263 if (LHSRange
.getUnsignedMax().ult(RHSRange
.getUnsignedMin()))
5265 if (LHSRange
.getUnsignedMin().uge(RHSRange
.getUnsignedMax()))
5269 case ICmpInst::ICMP_UGE
:
5270 Pred
= ICmpInst::ICMP_ULE
;
5271 std::swap(LHS
, RHS
);
5272 case ICmpInst::ICMP_ULE
: {
5273 ConstantRange LHSRange
= getUnsignedRange(LHS
);
5274 ConstantRange RHSRange
= getUnsignedRange(RHS
);
5275 if (LHSRange
.getUnsignedMax().ule(RHSRange
.getUnsignedMin()))
5277 if (LHSRange
.getUnsignedMin().ugt(RHSRange
.getUnsignedMax()))
5281 case ICmpInst::ICMP_NE
: {
5282 if (getUnsignedRange(LHS
).intersectWith(getUnsignedRange(RHS
)).isEmptySet())
5284 if (getSignedRange(LHS
).intersectWith(getSignedRange(RHS
)).isEmptySet())
5287 const SCEV
*Diff
= getMinusSCEV(LHS
, RHS
);
5288 if (isKnownNonZero(Diff
))
5292 case ICmpInst::ICMP_EQ
:
5293 // The check at the top of the function catches the case where
5294 // the values are known to be equal.
5300 /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
5301 /// protected by a conditional between LHS and RHS. This is used to
5302 /// to eliminate casts.
5304 ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop
*L
,
5305 ICmpInst::Predicate Pred
,
5306 const SCEV
*LHS
, const SCEV
*RHS
) {
5307 // Interpret a null as meaning no loop, where there is obviously no guard
5308 // (interprocedural conditions notwithstanding).
5309 if (!L
) return true;
5311 BasicBlock
*Latch
= L
->getLoopLatch();
5315 BranchInst
*LoopContinuePredicate
=
5316 dyn_cast
<BranchInst
>(Latch
->getTerminator());
5317 if (!LoopContinuePredicate
||
5318 LoopContinuePredicate
->isUnconditional())
5321 return isImpliedCond(Pred
, LHS
, RHS
,
5322 LoopContinuePredicate
->getCondition(),
5323 LoopContinuePredicate
->getSuccessor(0) != L
->getHeader());
5326 /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
5327 /// by a conditional between LHS and RHS. This is used to help avoid max
5328 /// expressions in loop trip counts, and to eliminate casts.
5330 ScalarEvolution::isLoopEntryGuardedByCond(const Loop
*L
,
5331 ICmpInst::Predicate Pred
,
5332 const SCEV
*LHS
, const SCEV
*RHS
) {
5333 // Interpret a null as meaning no loop, where there is obviously no guard
5334 // (interprocedural conditions notwithstanding).
5335 if (!L
) return false;
5337 // Starting at the loop predecessor, climb up the predecessor chain, as long
5338 // as there are predecessors that can be found that have unique successors
5339 // leading to the original header.
5340 for (std::pair
<BasicBlock
*, BasicBlock
*>
5341 Pair(L
->getLoopPredecessor(), L
->getHeader());
5343 Pair
= getPredecessorWithUniqueSuccessorForBB(Pair
.first
)) {
5345 BranchInst
*LoopEntryPredicate
=
5346 dyn_cast
<BranchInst
>(Pair
.first
->getTerminator());
5347 if (!LoopEntryPredicate
||
5348 LoopEntryPredicate
->isUnconditional())
5351 if (isImpliedCond(Pred
, LHS
, RHS
,
5352 LoopEntryPredicate
->getCondition(),
5353 LoopEntryPredicate
->getSuccessor(0) != Pair
.second
))
5360 /// isImpliedCond - Test whether the condition described by Pred, LHS,
5361 /// and RHS is true whenever the given Cond value evaluates to true.
5362 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred
,
5363 const SCEV
*LHS
, const SCEV
*RHS
,
5364 Value
*FoundCondValue
,
5366 // Recursively handle And and Or conditions.
5367 if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(FoundCondValue
)) {
5368 if (BO
->getOpcode() == Instruction::And
) {
5370 return isImpliedCond(Pred
, LHS
, RHS
, BO
->getOperand(0), Inverse
) ||
5371 isImpliedCond(Pred
, LHS
, RHS
, BO
->getOperand(1), Inverse
);
5372 } else if (BO
->getOpcode() == Instruction::Or
) {
5374 return isImpliedCond(Pred
, LHS
, RHS
, BO
->getOperand(0), Inverse
) ||
5375 isImpliedCond(Pred
, LHS
, RHS
, BO
->getOperand(1), Inverse
);
5379 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(FoundCondValue
);
5380 if (!ICI
) return false;
5382 // Bail if the ICmp's operands' types are wider than the needed type
5383 // before attempting to call getSCEV on them. This avoids infinite
5384 // recursion, since the analysis of widening casts can require loop
5385 // exit condition information for overflow checking, which would
5387 if (getTypeSizeInBits(LHS
->getType()) <
5388 getTypeSizeInBits(ICI
->getOperand(0)->getType()))
5391 // Now that we found a conditional branch that dominates the loop, check to
5392 // see if it is the comparison we are looking for.
5393 ICmpInst::Predicate FoundPred
;
5395 FoundPred
= ICI
->getInversePredicate();
5397 FoundPred
= ICI
->getPredicate();
5399 const SCEV
*FoundLHS
= getSCEV(ICI
->getOperand(0));
5400 const SCEV
*FoundRHS
= getSCEV(ICI
->getOperand(1));
5402 // Balance the types. The case where FoundLHS' type is wider than
5403 // LHS' type is checked for above.
5404 if (getTypeSizeInBits(LHS
->getType()) >
5405 getTypeSizeInBits(FoundLHS
->getType())) {
5406 if (CmpInst::isSigned(Pred
)) {
5407 FoundLHS
= getSignExtendExpr(FoundLHS
, LHS
->getType());
5408 FoundRHS
= getSignExtendExpr(FoundRHS
, LHS
->getType());
5410 FoundLHS
= getZeroExtendExpr(FoundLHS
, LHS
->getType());
5411 FoundRHS
= getZeroExtendExpr(FoundRHS
, LHS
->getType());
5415 // Canonicalize the query to match the way instcombine will have
5416 // canonicalized the comparison.
5417 if (SimplifyICmpOperands(Pred
, LHS
, RHS
))
5419 return CmpInst::isTrueWhenEqual(Pred
);
5420 if (SimplifyICmpOperands(FoundPred
, FoundLHS
, FoundRHS
))
5421 if (FoundLHS
== FoundRHS
)
5422 return CmpInst::isFalseWhenEqual(Pred
);
5424 // Check to see if we can make the LHS or RHS match.
5425 if (LHS
== FoundRHS
|| RHS
== FoundLHS
) {
5426 if (isa
<SCEVConstant
>(RHS
)) {
5427 std::swap(FoundLHS
, FoundRHS
);
5428 FoundPred
= ICmpInst::getSwappedPredicate(FoundPred
);
5430 std::swap(LHS
, RHS
);
5431 Pred
= ICmpInst::getSwappedPredicate(Pred
);
5435 // Check whether the found predicate is the same as the desired predicate.
5436 if (FoundPred
== Pred
)
5437 return isImpliedCondOperands(Pred
, LHS
, RHS
, FoundLHS
, FoundRHS
);
5439 // Check whether swapping the found predicate makes it the same as the
5440 // desired predicate.
5441 if (ICmpInst::getSwappedPredicate(FoundPred
) == Pred
) {
5442 if (isa
<SCEVConstant
>(RHS
))
5443 return isImpliedCondOperands(Pred
, LHS
, RHS
, FoundRHS
, FoundLHS
);
5445 return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred
),
5446 RHS
, LHS
, FoundLHS
, FoundRHS
);
5449 // Check whether the actual condition is beyond sufficient.
5450 if (FoundPred
== ICmpInst::ICMP_EQ
)
5451 if (ICmpInst::isTrueWhenEqual(Pred
))
5452 if (isImpliedCondOperands(Pred
, LHS
, RHS
, FoundLHS
, FoundRHS
))
5454 if (Pred
== ICmpInst::ICMP_NE
)
5455 if (!ICmpInst::isTrueWhenEqual(FoundPred
))
5456 if (isImpliedCondOperands(FoundPred
, LHS
, RHS
, FoundLHS
, FoundRHS
))
5459 // Otherwise assume the worst.
5463 /// isImpliedCondOperands - Test whether the condition described by Pred,
5464 /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
5465 /// and FoundRHS is true.
5466 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred
,
5467 const SCEV
*LHS
, const SCEV
*RHS
,
5468 const SCEV
*FoundLHS
,
5469 const SCEV
*FoundRHS
) {
5470 return isImpliedCondOperandsHelper(Pred
, LHS
, RHS
,
5471 FoundLHS
, FoundRHS
) ||
5472 // ~x < ~y --> x > y
5473 isImpliedCondOperandsHelper(Pred
, LHS
, RHS
,
5474 getNotSCEV(FoundRHS
),
5475 getNotSCEV(FoundLHS
));
5478 /// isImpliedCondOperandsHelper - Test whether the condition described by
5479 /// Pred, LHS, and RHS is true whenever the condition described by Pred,
5480 /// FoundLHS, and FoundRHS is true.
5482 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred
,
5483 const SCEV
*LHS
, const SCEV
*RHS
,
5484 const SCEV
*FoundLHS
,
5485 const SCEV
*FoundRHS
) {
5487 default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
5488 case ICmpInst::ICMP_EQ
:
5489 case ICmpInst::ICMP_NE
:
5490 if (HasSameValue(LHS
, FoundLHS
) && HasSameValue(RHS
, FoundRHS
))
5493 case ICmpInst::ICMP_SLT
:
5494 case ICmpInst::ICMP_SLE
:
5495 if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE
, LHS
, FoundLHS
) &&
5496 isKnownPredicateWithRanges(ICmpInst::ICMP_SGE
, RHS
, FoundRHS
))
5499 case ICmpInst::ICMP_SGT
:
5500 case ICmpInst::ICMP_SGE
:
5501 if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE
, LHS
, FoundLHS
) &&
5502 isKnownPredicateWithRanges(ICmpInst::ICMP_SLE
, RHS
, FoundRHS
))
5505 case ICmpInst::ICMP_ULT
:
5506 case ICmpInst::ICMP_ULE
:
5507 if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE
, LHS
, FoundLHS
) &&
5508 isKnownPredicateWithRanges(ICmpInst::ICMP_UGE
, RHS
, FoundRHS
))
5511 case ICmpInst::ICMP_UGT
:
5512 case ICmpInst::ICMP_UGE
:
5513 if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE
, LHS
, FoundLHS
) &&
5514 isKnownPredicateWithRanges(ICmpInst::ICMP_ULE
, RHS
, FoundRHS
))
5522 /// getBECount - Subtract the end and start values and divide by the step,
5523 /// rounding up, to get the number of times the backedge is executed. Return
5524 /// CouldNotCompute if an intermediate computation overflows.
5525 const SCEV
*ScalarEvolution::getBECount(const SCEV
*Start
,
5529 assert(!isKnownNegative(Step
) &&
5530 "This code doesn't handle negative strides yet!");
5532 const Type
*Ty
= Start
->getType();
5533 const SCEV
*NegOne
= getConstant(Ty
, (uint64_t)-1);
5534 const SCEV
*Diff
= getMinusSCEV(End
, Start
);
5535 const SCEV
*RoundUp
= getAddExpr(Step
, NegOne
);
5537 // Add an adjustment to the difference between End and Start so that
5538 // the division will effectively round up.
5539 const SCEV
*Add
= getAddExpr(Diff
, RoundUp
);
5542 // Check Add for unsigned overflow.
5543 // TODO: More sophisticated things could be done here.
5544 const Type
*WideTy
= IntegerType::get(getContext(),
5545 getTypeSizeInBits(Ty
) + 1);
5546 const SCEV
*EDiff
= getZeroExtendExpr(Diff
, WideTy
);
5547 const SCEV
*ERoundUp
= getZeroExtendExpr(RoundUp
, WideTy
);
5548 const SCEV
*OperandExtendedAdd
= getAddExpr(EDiff
, ERoundUp
);
5549 if (getZeroExtendExpr(Add
, WideTy
) != OperandExtendedAdd
)
5550 return getCouldNotCompute();
5553 return getUDivExpr(Add
, Step
);
5556 /// HowManyLessThans - Return the number of times a backedge containing the
5557 /// specified less-than comparison will execute. If not computable, return
5558 /// CouldNotCompute.
5559 ScalarEvolution::BackedgeTakenInfo
5560 ScalarEvolution::HowManyLessThans(const SCEV
*LHS
, const SCEV
*RHS
,
5561 const Loop
*L
, bool isSigned
) {
5562 // Only handle: "ADDREC < LoopInvariant".
5563 if (!RHS
->isLoopInvariant(L
)) return getCouldNotCompute();
5565 const SCEVAddRecExpr
*AddRec
= dyn_cast
<SCEVAddRecExpr
>(LHS
);
5566 if (!AddRec
|| AddRec
->getLoop() != L
)
5567 return getCouldNotCompute();
5569 // Check to see if we have a flag which makes analysis easy.
5570 bool NoWrap
= isSigned
? AddRec
->hasNoSignedWrap() :
5571 AddRec
->hasNoUnsignedWrap();
5573 if (AddRec
->isAffine()) {
5574 unsigned BitWidth
= getTypeSizeInBits(AddRec
->getType());
5575 const SCEV
*Step
= AddRec
->getStepRecurrence(*this);
5578 return getCouldNotCompute();
5579 if (Step
->isOne()) {
5580 // With unit stride, the iteration never steps past the limit value.
5581 } else if (isKnownPositive(Step
)) {
5582 // Test whether a positive iteration can step past the limit
5583 // value and past the maximum value for its type in a single step.
5584 // Note that it's not sufficient to check NoWrap here, because even
5585 // though the value after a wrap is undefined, it's not undefined
5586 // behavior, so if wrap does occur, the loop could either terminate or
5587 // loop infinitely, but in either case, the loop is guaranteed to
5588 // iterate at least until the iteration where the wrapping occurs.
5589 const SCEV
*One
= getConstant(Step
->getType(), 1);
5591 APInt Max
= APInt::getSignedMaxValue(BitWidth
);
5592 if ((Max
- getSignedRange(getMinusSCEV(Step
, One
)).getSignedMax())
5593 .slt(getSignedRange(RHS
).getSignedMax()))
5594 return getCouldNotCompute();
5596 APInt Max
= APInt::getMaxValue(BitWidth
);
5597 if ((Max
- getUnsignedRange(getMinusSCEV(Step
, One
)).getUnsignedMax())
5598 .ult(getUnsignedRange(RHS
).getUnsignedMax()))
5599 return getCouldNotCompute();
5602 // TODO: Handle negative strides here and below.
5603 return getCouldNotCompute();
5605 // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
5606 // m. So, we count the number of iterations in which {n,+,s} < m is true.
5607 // Note that we cannot simply return max(m-n,0)/s because it's not safe to
5608 // treat m-n as signed nor unsigned due to overflow possibility.
5610 // First, we get the value of the LHS in the first iteration: n
5611 const SCEV
*Start
= AddRec
->getOperand(0);
5613 // Determine the minimum constant start value.
5614 const SCEV
*MinStart
= getConstant(isSigned
?
5615 getSignedRange(Start
).getSignedMin() :
5616 getUnsignedRange(Start
).getUnsignedMin());
5618 // If we know that the condition is true in order to enter the loop,
5619 // then we know that it will run exactly (m-n)/s times. Otherwise, we
5620 // only know that it will execute (max(m,n)-n)/s times. In both cases,
5621 // the division must round up.
5622 const SCEV
*End
= RHS
;
5623 if (!isLoopEntryGuardedByCond(L
,
5624 isSigned
? ICmpInst::ICMP_SLT
:
5626 getMinusSCEV(Start
, Step
), RHS
))
5627 End
= isSigned
? getSMaxExpr(RHS
, Start
)
5628 : getUMaxExpr(RHS
, Start
);
5630 // Determine the maximum constant end value.
5631 const SCEV
*MaxEnd
= getConstant(isSigned
?
5632 getSignedRange(End
).getSignedMax() :
5633 getUnsignedRange(End
).getUnsignedMax());
5635 // If MaxEnd is within a step of the maximum integer value in its type,
5636 // adjust it down to the minimum value which would produce the same effect.
5637 // This allows the subsequent ceiling division of (N+(step-1))/step to
5638 // compute the correct value.
5639 const SCEV
*StepMinusOne
= getMinusSCEV(Step
,
5640 getConstant(Step
->getType(), 1));
5643 getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth
)),
5646 getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth
)),
5649 // Finally, we subtract these two values and divide, rounding up, to get
5650 // the number of times the backedge is executed.
5651 const SCEV
*BECount
= getBECount(Start
, End
, Step
, NoWrap
);
5653 // The maximum backedge count is similar, except using the minimum start
5654 // value and the maximum end value.
5655 const SCEV
*MaxBECount
= getBECount(MinStart
, MaxEnd
, Step
, NoWrap
);
5657 return BackedgeTakenInfo(BECount
, MaxBECount
);
5660 return getCouldNotCompute();
5663 /// getNumIterationsInRange - Return the number of iterations of this loop that
5664 /// produce values in the specified constant range. Another way of looking at
5665 /// this is that it returns the first iteration number where the value is not in
5666 /// the condition, thus computing the exit count. If the iteration count can't
5667 /// be computed, an instance of SCEVCouldNotCompute is returned.
5668 const SCEV
*SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range
,
5669 ScalarEvolution
&SE
) const {
5670 if (Range
.isFullSet()) // Infinite loop.
5671 return SE
.getCouldNotCompute();
5673 // If the start is a non-zero constant, shift the range to simplify things.
5674 if (const SCEVConstant
*SC
= dyn_cast
<SCEVConstant
>(getStart()))
5675 if (!SC
->getValue()->isZero()) {
5676 SmallVector
<const SCEV
*, 4> Operands(op_begin(), op_end());
5677 Operands
[0] = SE
.getConstant(SC
->getType(), 0);
5678 const SCEV
*Shifted
= SE
.getAddRecExpr(Operands
, getLoop());
5679 if (const SCEVAddRecExpr
*ShiftedAddRec
=
5680 dyn_cast
<SCEVAddRecExpr
>(Shifted
))
5681 return ShiftedAddRec
->getNumIterationsInRange(
5682 Range
.subtract(SC
->getValue()->getValue()), SE
);
5683 // This is strange and shouldn't happen.
5684 return SE
.getCouldNotCompute();
5687 // The only time we can solve this is when we have all constant indices.
5688 // Otherwise, we cannot determine the overflow conditions.
5689 for (unsigned i
= 0, e
= getNumOperands(); i
!= e
; ++i
)
5690 if (!isa
<SCEVConstant
>(getOperand(i
)))
5691 return SE
.getCouldNotCompute();
5694 // Okay at this point we know that all elements of the chrec are constants and
5695 // that the start element is zero.
5697 // First check to see if the range contains zero. If not, the first
5699 unsigned BitWidth
= SE
.getTypeSizeInBits(getType());
5700 if (!Range
.contains(APInt(BitWidth
, 0)))
5701 return SE
.getConstant(getType(), 0);
5704 // If this is an affine expression then we have this situation:
5705 // Solve {0,+,A} in Range === Ax in Range
5707 // We know that zero is in the range. If A is positive then we know that
5708 // the upper value of the range must be the first possible exit value.
5709 // If A is negative then the lower of the range is the last possible loop
5710 // value. Also note that we already checked for a full range.
5711 APInt
One(BitWidth
,1);
5712 APInt A
= cast
<SCEVConstant
>(getOperand(1))->getValue()->getValue();
5713 APInt End
= A
.sge(One
) ? (Range
.getUpper() - One
) : Range
.getLower();
5715 // The exit value should be (End+A)/A.
5716 APInt ExitVal
= (End
+ A
).udiv(A
);
5717 ConstantInt
*ExitValue
= ConstantInt::get(SE
.getContext(), ExitVal
);
5719 // Evaluate at the exit value. If we really did fall out of the valid
5720 // range, then we computed our trip count, otherwise wrap around or other
5721 // things must have happened.
5722 ConstantInt
*Val
= EvaluateConstantChrecAtConstant(this, ExitValue
, SE
);
5723 if (Range
.contains(Val
->getValue()))
5724 return SE
.getCouldNotCompute(); // Something strange happened
5726 // Ensure that the previous value is in the range. This is a sanity check.
5727 assert(Range
.contains(
5728 EvaluateConstantChrecAtConstant(this,
5729 ConstantInt::get(SE
.getContext(), ExitVal
- One
), SE
)->getValue()) &&
5730 "Linear scev computation is off in a bad way!");
5731 return SE
.getConstant(ExitValue
);
5732 } else if (isQuadratic()) {
5733 // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
5734 // quadratic equation to solve it. To do this, we must frame our problem in
5735 // terms of figuring out when zero is crossed, instead of when
5736 // Range.getUpper() is crossed.
5737 SmallVector
<const SCEV
*, 4> NewOps(op_begin(), op_end());
5738 NewOps
[0] = SE
.getNegativeSCEV(SE
.getConstant(Range
.getUpper()));
5739 const SCEV
*NewAddRec
= SE
.getAddRecExpr(NewOps
, getLoop());
5741 // Next, solve the constructed addrec
5742 std::pair
<const SCEV
*,const SCEV
*> Roots
=
5743 SolveQuadraticEquation(cast
<SCEVAddRecExpr
>(NewAddRec
), SE
);
5744 const SCEVConstant
*R1
= dyn_cast
<SCEVConstant
>(Roots
.first
);
5745 const SCEVConstant
*R2
= dyn_cast
<SCEVConstant
>(Roots
.second
);
5747 // Pick the smallest positive root value.
5748 if (ConstantInt
*CB
=
5749 dyn_cast
<ConstantInt
>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT
,
5750 R1
->getValue(), R2
->getValue()))) {
5751 if (CB
->getZExtValue() == false)
5752 std::swap(R1
, R2
); // R1 is the minimum root now.
5754 // Make sure the root is not off by one. The returned iteration should
5755 // not be in the range, but the previous one should be. When solving
5756 // for "X*X < 5", for example, we should not return a root of 2.
5757 ConstantInt
*R1Val
= EvaluateConstantChrecAtConstant(this,
5760 if (Range
.contains(R1Val
->getValue())) {
5761 // The next iteration must be out of the range...
5762 ConstantInt
*NextVal
=
5763 ConstantInt::get(SE
.getContext(), R1
->getValue()->getValue()+1);
5765 R1Val
= EvaluateConstantChrecAtConstant(this, NextVal
, SE
);
5766 if (!Range
.contains(R1Val
->getValue()))
5767 return SE
.getConstant(NextVal
);
5768 return SE
.getCouldNotCompute(); // Something strange happened
5771 // If R1 was not in the range, then it is a good return value. Make
5772 // sure that R1-1 WAS in the range though, just in case.
5773 ConstantInt
*NextVal
=
5774 ConstantInt::get(SE
.getContext(), R1
->getValue()->getValue()-1);
5775 R1Val
= EvaluateConstantChrecAtConstant(this, NextVal
, SE
);
5776 if (Range
.contains(R1Val
->getValue()))
5778 return SE
.getCouldNotCompute(); // Something strange happened
5783 return SE
.getCouldNotCompute();
5788 //===----------------------------------------------------------------------===//
5789 // SCEVCallbackVH Class Implementation
5790 //===----------------------------------------------------------------------===//
5792 void ScalarEvolution::SCEVCallbackVH::deleted() {
5793 assert(SE
&& "SCEVCallbackVH called with a null ScalarEvolution!");
5794 if (PHINode
*PN
= dyn_cast
<PHINode
>(getValPtr()))
5795 SE
->ConstantEvolutionLoopExitValue
.erase(PN
);
5796 SE
->ValueExprMap
.erase(getValPtr());
5797 // this now dangles!
5800 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value
*V
) {
5801 assert(SE
&& "SCEVCallbackVH called with a null ScalarEvolution!");
5803 // Forget all the expressions associated with users of the old value,
5804 // so that future queries will recompute the expressions using the new
5806 Value
*Old
= getValPtr();
5807 SmallVector
<User
*, 16> Worklist
;
5808 SmallPtrSet
<User
*, 8> Visited
;
5809 for (Value::use_iterator UI
= Old
->use_begin(), UE
= Old
->use_end();
5811 Worklist
.push_back(*UI
);
5812 while (!Worklist
.empty()) {
5813 User
*U
= Worklist
.pop_back_val();
5814 // Deleting the Old value will cause this to dangle. Postpone
5815 // that until everything else is done.
5818 if (!Visited
.insert(U
))
5820 if (PHINode
*PN
= dyn_cast
<PHINode
>(U
))
5821 SE
->ConstantEvolutionLoopExitValue
.erase(PN
);
5822 SE
->ValueExprMap
.erase(U
);
5823 for (Value::use_iterator UI
= U
->use_begin(), UE
= U
->use_end();
5825 Worklist
.push_back(*UI
);
5827 // Delete the Old value.
5828 if (PHINode
*PN
= dyn_cast
<PHINode
>(Old
))
5829 SE
->ConstantEvolutionLoopExitValue
.erase(PN
);
5830 SE
->ValueExprMap
.erase(Old
);
5831 // this now dangles!
5834 ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value
*V
, ScalarEvolution
*se
)
5835 : CallbackVH(V
), SE(se
) {}
5837 //===----------------------------------------------------------------------===//
5838 // ScalarEvolution Class Implementation
5839 //===----------------------------------------------------------------------===//
5841 ScalarEvolution::ScalarEvolution()
5842 : FunctionPass(ID
), FirstUnknown(0) {
5843 initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
5846 bool ScalarEvolution::runOnFunction(Function
&F
) {
5848 LI
= &getAnalysis
<LoopInfo
>();
5849 TD
= getAnalysisIfAvailable
<TargetData
>();
5850 DT
= &getAnalysis
<DominatorTree
>();
5854 void ScalarEvolution::releaseMemory() {
5855 // Iterate through all the SCEVUnknown instances and call their
5856 // destructors, so that they release their references to their values.
5857 for (SCEVUnknown
*U
= FirstUnknown
; U
; U
= U
->Next
)
5861 ValueExprMap
.clear();
5862 BackedgeTakenCounts
.clear();
5863 ConstantEvolutionLoopExitValue
.clear();
5864 ValuesAtScopes
.clear();
5865 UniqueSCEVs
.clear();
5866 SCEVAllocator
.Reset();
5869 void ScalarEvolution::getAnalysisUsage(AnalysisUsage
&AU
) const {
5870 AU
.setPreservesAll();
5871 AU
.addRequiredTransitive
<LoopInfo
>();
5872 AU
.addRequiredTransitive
<DominatorTree
>();
5875 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop
*L
) {
5876 return !isa
<SCEVCouldNotCompute
>(getBackedgeTakenCount(L
));
5879 static void PrintLoopInfo(raw_ostream
&OS
, ScalarEvolution
*SE
,
5881 // Print all inner loops first
5882 for (Loop::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
)
5883 PrintLoopInfo(OS
, SE
, *I
);
5886 WriteAsOperand(OS
, L
->getHeader(), /*PrintType=*/false);
5889 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
5890 L
->getExitBlocks(ExitBlocks
);
5891 if (ExitBlocks
.size() != 1)
5892 OS
<< "<multiple exits> ";
5894 if (SE
->hasLoopInvariantBackedgeTakenCount(L
)) {
5895 OS
<< "backedge-taken count is " << *SE
->getBackedgeTakenCount(L
);
5897 OS
<< "Unpredictable backedge-taken count. ";
5902 WriteAsOperand(OS
, L
->getHeader(), /*PrintType=*/false);
5905 if (!isa
<SCEVCouldNotCompute
>(SE
->getMaxBackedgeTakenCount(L
))) {
5906 OS
<< "max backedge-taken count is " << *SE
->getMaxBackedgeTakenCount(L
);
5908 OS
<< "Unpredictable max backedge-taken count. ";
5914 void ScalarEvolution::print(raw_ostream
&OS
, const Module
*) const {
5915 // ScalarEvolution's implementation of the print method is to print
5916 // out SCEV values of all instructions that are interesting. Doing
5917 // this potentially causes it to create new SCEV objects though,
5918 // which technically conflicts with the const qualifier. This isn't
5919 // observable from outside the class though, so casting away the
5920 // const isn't dangerous.
5921 ScalarEvolution
&SE
= *const_cast<ScalarEvolution
*>(this);
5923 OS
<< "Classifying expressions for: ";
5924 WriteAsOperand(OS
, F
, /*PrintType=*/false);
5926 for (inst_iterator I
= inst_begin(F
), E
= inst_end(F
); I
!= E
; ++I
)
5927 if (isSCEVable(I
->getType()) && !isa
<CmpInst
>(*I
)) {
5930 const SCEV
*SV
= SE
.getSCEV(&*I
);
5933 const Loop
*L
= LI
->getLoopFor((*I
).getParent());
5935 const SCEV
*AtUse
= SE
.getSCEVAtScope(SV
, L
);
5942 OS
<< "\t\t" "Exits: ";
5943 const SCEV
*ExitValue
= SE
.getSCEVAtScope(SV
, L
->getParentLoop());
5944 if (!ExitValue
->isLoopInvariant(L
)) {
5945 OS
<< "<<Unknown>>";
5954 OS
<< "Determining loop execution counts for: ";
5955 WriteAsOperand(OS
, F
, /*PrintType=*/false);
5957 for (LoopInfo::iterator I
= LI
->begin(), E
= LI
->end(); I
!= E
; ++I
)
5958 PrintLoopInfo(OS
, &SE
, *I
);