1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines RangeConstraintManager, a class that tracks simple
10 // equality and inequality constraints on symbolic values of ProgramState.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Basic/JsonSupport.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20 #include "llvm/ADT/FoldingSet.h"
21 #include "llvm/ADT/ImmutableSet.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/raw_ostream.h"
31 using namespace clang
;
34 // This class can be extended with other tables which will help to reason
35 // about ranges more precisely.
36 class OperatorRelationsTable
{
37 static_assert(BO_LT
< BO_GT
&& BO_GT
< BO_LE
&& BO_LE
< BO_GE
&&
38 BO_GE
< BO_EQ
&& BO_EQ
< BO_NE
,
39 "This class relies on operators order. Rework it otherwise.");
49 // CmpOpTable holds states which represent the corresponding range for
50 // branching an exploded graph. We can reason about the branch if there is
51 // a previously known fact of the existence of a comparison expression with
52 // operands used in the current expression.
53 // E.g. assuming (x < y) is true that means (x != y) is surely true.
54 // if (x previous_operation y) // < | != | >
55 // if (x operation y) // != | > | <
56 // tristate // True | Unknown | False
58 // CmpOpTable represents next:
59 // __|< |> |<=|>=|==|!=|UnknownX2|
60 // < |1 |0 |* |0 |0 |* |1 |
61 // > |0 |1 |0 |* |0 |* |1 |
62 // <=|1 |0 |1 |* |1 |* |0 |
63 // >=|0 |1 |* |1 |1 |* |0 |
64 // ==|0 |0 |* |* |1 |0 |1 |
65 // !=|1 |1 |* |* |0 |1 |0 |
67 // Columns stands for a previous operator.
68 // Rows stands for a current operator.
69 // Each row has exactly two `Unknown` cases.
70 // UnknownX2 means that both `Unknown` previous operators are met in code,
71 // and there is a special column for that, for example:
76 static constexpr size_t CmpOpCount
= BO_NE
- BO_LT
+ 1;
77 const TriStateKind CmpOpTable
[CmpOpCount
][CmpOpCount
+ 1] = {
78 // < > <= >= == != UnknownX2
79 {True
, False
, Unknown
, False
, False
, Unknown
, True
}, // <
80 {False
, True
, False
, Unknown
, False
, Unknown
, True
}, // >
81 {True
, False
, True
, Unknown
, True
, Unknown
, False
}, // <=
82 {False
, True
, Unknown
, True
, True
, Unknown
, False
}, // >=
83 {False
, False
, Unknown
, Unknown
, True
, False
, True
}, // ==
84 {True
, True
, Unknown
, Unknown
, False
, True
, False
}, // !=
87 static size_t getIndexFromOp(BinaryOperatorKind OP
) {
88 return static_cast<size_t>(OP
- BO_LT
);
92 constexpr size_t getCmpOpCount() const { return CmpOpCount
; }
94 static BinaryOperatorKind
getOpFromIndex(size_t Index
) {
95 return static_cast<BinaryOperatorKind
>(Index
+ BO_LT
);
98 TriStateKind
getCmpOpState(BinaryOperatorKind CurrentOP
,
99 BinaryOperatorKind QueriedOP
) const {
100 return CmpOpTable
[getIndexFromOp(CurrentOP
)][getIndexFromOp(QueriedOP
)];
103 TriStateKind
getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP
) const {
104 return CmpOpTable
[getIndexFromOp(CurrentOP
)][CmpOpCount
];
108 //===----------------------------------------------------------------------===//
109 // RangeSet implementation
110 //===----------------------------------------------------------------------===//
112 RangeSet::ContainerType
RangeSet::Factory::EmptySet
{};
114 RangeSet
RangeSet::Factory::add(RangeSet LHS
, RangeSet RHS
) {
115 ContainerType Result
;
116 Result
.reserve(LHS
.size() + RHS
.size());
117 std::merge(LHS
.begin(), LHS
.end(), RHS
.begin(), RHS
.end(),
118 std::back_inserter(Result
));
119 return makePersistent(std::move(Result
));
122 RangeSet
RangeSet::Factory::add(RangeSet Original
, Range Element
) {
123 ContainerType Result
;
124 Result
.reserve(Original
.size() + 1);
126 const_iterator Lower
= llvm::lower_bound(Original
, Element
);
127 Result
.insert(Result
.end(), Original
.begin(), Lower
);
128 Result
.push_back(Element
);
129 Result
.insert(Result
.end(), Lower
, Original
.end());
131 return makePersistent(std::move(Result
));
134 RangeSet
RangeSet::Factory::add(RangeSet Original
, const llvm::APSInt
&Point
) {
135 return add(Original
, Range(Point
));
138 RangeSet
RangeSet::Factory::unite(RangeSet LHS
, RangeSet RHS
) {
139 ContainerType Result
= unite(*LHS
.Impl
, *RHS
.Impl
);
140 return makePersistent(std::move(Result
));
143 RangeSet
RangeSet::Factory::unite(RangeSet Original
, Range R
) {
144 ContainerType Result
;
146 Result
= unite(*Original
.Impl
, Result
);
147 return makePersistent(std::move(Result
));
150 RangeSet
RangeSet::Factory::unite(RangeSet Original
, llvm::APSInt Point
) {
151 return unite(Original
, Range(ValueFactory
.getValue(Point
)));
154 RangeSet
RangeSet::Factory::unite(RangeSet Original
, llvm::APSInt From
,
156 return unite(Original
,
157 Range(ValueFactory
.getValue(From
), ValueFactory
.getValue(To
)));
160 template <typename T
>
161 void swapIterators(T
&First
, T
&FirstEnd
, T
&Second
, T
&SecondEnd
) {
162 std::swap(First
, Second
);
163 std::swap(FirstEnd
, SecondEnd
);
166 RangeSet::ContainerType
RangeSet::Factory::unite(const ContainerType
&LHS
,
167 const ContainerType
&RHS
) {
174 using iterator
= ContainerType::const_iterator
;
176 iterator First
= LHS
.begin();
177 iterator FirstEnd
= LHS
.end();
178 iterator Second
= RHS
.begin();
179 iterator SecondEnd
= RHS
.end();
180 APSIntType Ty
= APSIntType(First
->From());
181 const APSInt Min
= Ty
.getMinValue();
183 // Handle a corner case first when both range sets start from MIN.
184 // This helps to avoid complicated conditions below. Specifically, this
185 // particular check for `MIN` is not needed in the loop below every time
186 // when we do `Second->From() - One` operation.
187 if (Min
== First
->From() && Min
== Second
->From()) {
188 if (First
->To() > Second
->To()) {
192 // The Second range is entirely inside the First one.
194 // Check if Second is the last in its RangeSet.
195 if (++Second
== SecondEnd
)
196 // [ First ]--[ First + 1 ]--->
197 // [ Second ]--------------------->
199 // The Union is equal to First's RangeSet.
202 // case 1: [ First ]----->
203 // case 2: [ First ]--->
206 // The First range is entirely inside or equal to the Second one.
208 // Check if First is the last in its RangeSet.
209 if (++First
== FirstEnd
)
210 // [ First ]----------------------->
211 // [ Second ]--[ Second + 1 ]---->
213 // The Union is equal to Second's RangeSet.
218 const APSInt One
= Ty
.getValue(1);
219 ContainerType Result
;
221 // This is called when there are no ranges left in one of the ranges.
222 // Append the rest of the ranges from another range set to the Result
223 // and return with that.
224 const auto AppendTheRest
= [&Result
](iterator I
, iterator E
) {
230 // We want to keep the following invariant at all times:
231 // ---[ First ------>
232 // -----[ Second --->
233 if (First
->From() > Second
->From())
234 swapIterators(First
, FirstEnd
, Second
, SecondEnd
);
236 // The Union definitely starts with First->From().
237 // ----------[ First ------>
238 // ------------[ Second --->
239 // ----------[ Union ------>
241 const llvm::APSInt
&UnionStart
= First
->From();
243 // Loop where the invariant holds.
245 // Skip all enclosed ranges.
247 // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
248 while (First
->To() >= Second
->To()) {
249 // Check if Second is the last in its RangeSet.
250 if (++Second
== SecondEnd
) {
253 // -----[ Second ]----->
254 // --------[ First ]--->
256 Result
.emplace_back(UnionStart
, First
->To());
257 // ---[ Union ]----------------->
258 // --------------[ First + 1]--->
259 // Append all remaining ranges from the First's RangeSet.
260 return AppendTheRest(++First
, FirstEnd
);
264 // Check if First and Second are disjoint. It means that we find
265 // the end of the Union. Exit the loop and append the Union.
266 // ---[ First ]=------------->
267 // ------------=[ Second ]--->
269 if (First
->To() < Second
->From() - One
)
272 // First is entirely inside the Union. Go next.
273 // ---[ Union ----------->
274 // ---- [ First ]-------->
275 // -------[ Second ]----->
276 // Check if First is the last in its RangeSet.
277 if (++First
== FirstEnd
) {
280 // -----[ First ]------->
281 // --------[ Second ]--->
283 Result
.emplace_back(UnionStart
, Second
->To());
284 // ---[ Union ]------------------>
285 // --------------[ Second + 1]--->
286 // Append all remaining ranges from the Second's RangeSet.
287 return AppendTheRest(++Second
, SecondEnd
);
290 // We know that we are at one of the two cases:
291 // case 1: --[ First ]--------->
292 // case 2: ----[ First ]------->
293 // --------[ Second ]---------->
294 // In both cases First starts after Second->From().
295 // Make sure that the loop invariant holds.
296 swapIterators(First
, FirstEnd
, Second
, SecondEnd
);
299 // Here First and Second are disjoint.
301 // ---[ Union ]--------------->
302 // -----------------[ Second ]--->
303 // ------[ First ]--------------->
305 Result
.emplace_back(UnionStart
, First
->To());
307 // Check if First is the last in its RangeSet.
308 if (++First
== FirstEnd
)
309 // ---[ Union ]--------------->
310 // --------------[ Second ]--->
311 // Append all remaining ranges from the Second's RangeSet.
312 return AppendTheRest(Second
, SecondEnd
);
315 llvm_unreachable("Normally, we should not reach here");
318 RangeSet
RangeSet::Factory::getRangeSet(Range From
) {
319 ContainerType Result
;
320 Result
.push_back(From
);
321 return makePersistent(std::move(Result
));
324 RangeSet
RangeSet::Factory::makePersistent(ContainerType
&&From
) {
325 llvm::FoldingSetNodeID ID
;
329 ContainerType
*Result
= Cache
.FindNodeOrInsertPos(ID
, InsertPos
);
332 // It is cheaper to fully construct the resulting range on stack
333 // and move it to the freshly allocated buffer if we don't have
334 // a set like this already.
335 Result
= construct(std::move(From
));
336 Cache
.InsertNode(Result
, InsertPos
);
342 RangeSet::ContainerType
*RangeSet::Factory::construct(ContainerType
&&From
) {
343 void *Buffer
= Arena
.Allocate();
344 return new (Buffer
) ContainerType(std::move(From
));
347 const llvm::APSInt
&RangeSet::getMinValue() const {
349 return begin()->From();
352 const llvm::APSInt
&RangeSet::getMaxValue() const {
354 return std::prev(end())->To();
357 bool clang::ento::RangeSet::isUnsigned() const {
359 return begin()->From().isUnsigned();
362 uint32_t clang::ento::RangeSet::getBitWidth() const {
364 return begin()->From().getBitWidth();
367 APSIntType
clang::ento::RangeSet::getAPSIntType() const {
369 return APSIntType(begin()->From());
372 bool RangeSet::containsImpl(llvm::APSInt
&Point
) const {
373 if (isEmpty() || !pin(Point
))
377 const_iterator It
= llvm::upper_bound(*this, Dummy
);
381 return std::prev(It
)->Includes(Point
);
384 bool RangeSet::pin(llvm::APSInt
&Point
) const {
385 APSIntType
Type(getMinValue());
386 if (Type
.testInRange(Point
, true) != APSIntType::RTR_Within
)
393 bool RangeSet::pin(llvm::APSInt
&Lower
, llvm::APSInt
&Upper
) const {
394 // This function has nine cases, the cartesian product of range-testing
395 // both the upper and lower bounds against the symbol's type.
396 // Each case requires a different pinning operation.
397 // The function returns false if the described range is entirely outside
398 // the range of values for the associated symbol.
399 APSIntType
Type(getMinValue());
400 APSIntType::RangeTestResultKind LowerTest
= Type
.testInRange(Lower
, true);
401 APSIntType::RangeTestResultKind UpperTest
= Type
.testInRange(Upper
, true);
404 case APSIntType::RTR_Below
:
406 case APSIntType::RTR_Below
:
407 // The entire range is outside the symbol's set of possible values.
408 // If this is a conventionally-ordered range, the state is infeasible.
412 // However, if the range wraps around, it spans all possible values.
413 Lower
= Type
.getMinValue();
414 Upper
= Type
.getMaxValue();
416 case APSIntType::RTR_Within
:
417 // The range starts below what's possible but ends within it. Pin.
418 Lower
= Type
.getMinValue();
421 case APSIntType::RTR_Above
:
422 // The range spans all possible values for the symbol. Pin.
423 Lower
= Type
.getMinValue();
424 Upper
= Type
.getMaxValue();
428 case APSIntType::RTR_Within
:
430 case APSIntType::RTR_Below
:
431 // The range wraps around, but all lower values are not possible.
433 Upper
= Type
.getMaxValue();
435 case APSIntType::RTR_Within
:
436 // The range may or may not wrap around, but both limits are valid.
440 case APSIntType::RTR_Above
:
441 // The range starts within what's possible but ends above it. Pin.
443 Upper
= Type
.getMaxValue();
447 case APSIntType::RTR_Above
:
449 case APSIntType::RTR_Below
:
450 // The range wraps but is outside the symbol's set of possible values.
452 case APSIntType::RTR_Within
:
453 // The range starts above what's possible but ends within it (wrap).
454 Lower
= Type
.getMinValue();
457 case APSIntType::RTR_Above
:
458 // The entire range is outside the symbol's set of possible values.
459 // If this is a conventionally-ordered range, the state is infeasible.
463 // However, if the range wraps around, it spans all possible values.
464 Lower
= Type
.getMinValue();
465 Upper
= Type
.getMaxValue();
474 RangeSet
RangeSet::Factory::intersect(RangeSet What
, llvm::APSInt Lower
,
475 llvm::APSInt Upper
) {
476 if (What
.isEmpty() || !What
.pin(Lower
, Upper
))
477 return getEmptySet();
479 ContainerType DummyContainer
;
481 if (Lower
<= Upper
) {
482 // [Lower, Upper] is a regular range.
484 // Shortcut: check that there is even a possibility of the intersection
485 // by checking the two following situations:
487 // <---[ What ]---[------]------>
490 // <----[------]----[ What ]---->
492 if (What
.getMaxValue() < Lower
|| Upper
< What
.getMinValue())
493 return getEmptySet();
495 DummyContainer
.push_back(
496 Range(ValueFactory
.getValue(Lower
), ValueFactory
.getValue(Upper
)));
498 // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
500 // Shortcut: check that there is even a possibility of the intersection
501 // by checking the following situation:
503 // <------]---[ What ]---[------>
505 if (What
.getMaxValue() < Lower
&& Upper
< What
.getMinValue())
506 return getEmptySet();
508 DummyContainer
.push_back(
509 Range(ValueFactory
.getMinValue(Upper
), ValueFactory
.getValue(Upper
)));
510 DummyContainer
.push_back(
511 Range(ValueFactory
.getValue(Lower
), ValueFactory
.getMaxValue(Lower
)));
514 return intersect(*What
.Impl
, DummyContainer
);
517 RangeSet
RangeSet::Factory::intersect(const RangeSet::ContainerType
&LHS
,
518 const RangeSet::ContainerType
&RHS
) {
519 ContainerType Result
;
520 Result
.reserve(std::max(LHS
.size(), RHS
.size()));
522 const_iterator First
= LHS
.begin(), Second
= RHS
.begin(),
523 FirstEnd
= LHS
.end(), SecondEnd
= RHS
.end();
525 // If we ran out of ranges in one set, but not in the other,
526 // it means that those elements are definitely not in the
528 while (First
!= FirstEnd
&& Second
!= SecondEnd
) {
529 // We want to keep the following invariant at all times:
531 // ----[ First ---------------------->
532 // --------[ Second ----------------->
533 if (Second
->From() < First
->From())
534 swapIterators(First
, FirstEnd
, Second
, SecondEnd
);
536 // Loop where the invariant holds:
538 // Check for the following situation:
540 // ----[ First ]--------------------->
541 // ---------------[ Second ]--------->
543 // which means that...
544 if (Second
->From() > First
->To()) {
545 // ...First is not in the intersection.
547 // We should move on to the next range after First and break out of the
548 // loop because the invariant might not be true.
553 // We have a guaranteed intersection at this point!
554 // And this is the current situation:
556 // ----[ First ]----------------->
557 // -------[ Second ------------------>
559 // Additionally, it definitely starts with Second->From().
560 const llvm::APSInt
&IntersectionStart
= Second
->From();
562 // It is important to know which of the two ranges' ends
563 // is greater. That "longer" range might have some other
564 // intersections, while the "shorter" range might not.
565 if (Second
->To() > First
->To()) {
566 // Here we make a decision to keep First as the "longer"
568 swapIterators(First
, FirstEnd
, Second
, SecondEnd
);
571 // At this point, we have the following situation:
573 // ---- First ]-------------------->
574 // ---- Second ]--[ Second+1 ---------->
576 // We don't know the relationship between First->From and
577 // Second->From and we don't know whether Second+1 intersects
580 // However, we know that [IntersectionStart, Second->To] is
581 // a part of the intersection...
582 Result
.push_back(Range(IntersectionStart
, Second
->To()));
584 // ...and that the invariant will hold for a valid Second+1
585 // because First->From <= Second->To < (Second+1)->From.
586 } while (Second
!= SecondEnd
);
590 return getEmptySet();
592 return makePersistent(std::move(Result
));
595 RangeSet
RangeSet::Factory::intersect(RangeSet LHS
, RangeSet RHS
) {
596 // Shortcut: let's see if the intersection is even possible.
597 if (LHS
.isEmpty() || RHS
.isEmpty() || LHS
.getMaxValue() < RHS
.getMinValue() ||
598 RHS
.getMaxValue() < LHS
.getMinValue())
599 return getEmptySet();
601 return intersect(*LHS
.Impl
, *RHS
.Impl
);
604 RangeSet
RangeSet::Factory::intersect(RangeSet LHS
, llvm::APSInt Point
) {
605 if (LHS
.containsImpl(Point
))
606 return getRangeSet(ValueFactory
.getValue(Point
));
608 return getEmptySet();
611 RangeSet
RangeSet::Factory::negate(RangeSet What
) {
613 return getEmptySet();
615 const llvm::APSInt SampleValue
= What
.getMinValue();
616 const llvm::APSInt
&MIN
= ValueFactory
.getMinValue(SampleValue
);
617 const llvm::APSInt
&MAX
= ValueFactory
.getMaxValue(SampleValue
);
619 ContainerType Result
;
620 Result
.reserve(What
.size() + (SampleValue
== MIN
));
622 // Handle a special case for MIN value.
623 const_iterator It
= What
.begin();
624 const_iterator End
= What
.end();
626 const llvm::APSInt
&From
= It
->From();
627 const llvm::APSInt
&To
= It
->To();
630 // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
635 const_iterator Last
= std::prev(End
);
637 // Try to find and unite the following ranges:
638 // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
639 if (Last
->To() == MAX
) {
640 // It means that in the original range we have ranges
641 // [MIN, A], ... , [B, MAX]
642 // And the result should be [MIN, -B], ..., [-A, MAX]
643 Result
.emplace_back(MIN
, ValueFactory
.getValue(-Last
->From()));
644 // We already negated Last, so we can skip it.
647 // Add a separate range for the lowest value.
648 Result
.emplace_back(MIN
, MIN
);
651 // Skip adding the second range in case when [From, To] are [MIN, MIN].
653 Result
.emplace_back(ValueFactory
.getValue(-To
), MAX
);
656 // Skip the first range in the loop.
660 // Negate all other ranges.
661 for (; It
!= End
; ++It
) {
662 // Negate int values.
663 const llvm::APSInt
&NewFrom
= ValueFactory
.getValue(-It
->To());
664 const llvm::APSInt
&NewTo
= ValueFactory
.getValue(-It
->From());
666 // Add a negated range.
667 Result
.emplace_back(NewFrom
, NewTo
);
671 return makePersistent(std::move(Result
));
674 // Convert range set to the given integral type using truncation and promotion.
675 // This works similar to APSIntType::apply function but for the range set.
676 RangeSet
RangeSet::Factory::castTo(RangeSet What
, APSIntType Ty
) {
677 // Set is empty or NOOP (aka cast to the same type).
678 if (What
.isEmpty() || What
.getAPSIntType() == Ty
)
681 const bool IsConversion
= What
.isUnsigned() != Ty
.isUnsigned();
682 const bool IsTruncation
= What
.getBitWidth() > Ty
.getBitWidth();
683 const bool IsPromotion
= What
.getBitWidth() < Ty
.getBitWidth();
686 return makePersistent(truncateTo(What
, Ty
));
688 // Here we handle 2 cases:
689 // - IsConversion && !IsPromotion.
690 // In this case we handle changing a sign with same bitwidth: char -> uchar,
691 // uint -> int. Here we convert negatives to positives and positives which
692 // is out of range to negatives. We use convertTo function for that.
693 // - IsConversion && IsPromotion && !What.isUnsigned().
694 // In this case we handle changing a sign from signeds to unsigneds with
695 // higher bitwidth: char -> uint, int-> uint64. The point is that we also
696 // need convert negatives to positives and use convertTo function as well.
697 // For example, we don't need such a convertion when converting unsigned to
698 // signed with higher bitwidth, because all the values of unsigned is valid
699 // for the such signed.
700 if (IsConversion
&& (!IsPromotion
|| !What
.isUnsigned()))
701 return makePersistent(convertTo(What
, Ty
));
703 assert(IsPromotion
&& "Only promotion operation from unsigneds left.");
704 return makePersistent(promoteTo(What
, Ty
));
707 RangeSet
RangeSet::Factory::castTo(RangeSet What
, QualType T
) {
708 assert(T
->isIntegralOrEnumerationType() && "T shall be an integral type.");
709 return castTo(What
, ValueFactory
.getAPSIntType(T
));
712 RangeSet::ContainerType
RangeSet::Factory::truncateTo(RangeSet What
,
716 ContainerType Result
;
718 // CastRangeSize is an amount of all possible values of cast type.
719 // Example: `char` has 256 values; `short` has 65536 values.
720 // But in fact we use `amount of values` - 1, because
721 // we can't keep `amount of values of UINT64` inside uint64_t.
722 // E.g. 256 is an amount of all possible values of `char` and we can't keep
724 // And it's OK, it's enough to do correct calculations.
725 uint64_t CastRangeSize
= APInt::getMaxValue(Ty
.getBitWidth()).getZExtValue();
726 for (const Range
&R
: What
) {
727 // Get bounds of the given range.
728 APSInt FromInt
= R
.From();
729 APSInt ToInt
= R
.To();
730 // CurrentRangeSize is an amount of all possible values of the current
732 uint64_t CurrentRangeSize
= (ToInt
- FromInt
).getZExtValue();
733 // This is an optimization for a specific case when this Range covers
734 // the whole range of the target type.
736 if (CurrentRangeSize
>= CastRangeSize
) {
737 Dummy
.emplace_back(ValueFactory
.getMinValue(Ty
),
738 ValueFactory
.getMaxValue(Ty
));
739 Result
= std::move(Dummy
);
745 const APSInt
&PersistentFrom
= ValueFactory
.getValue(FromInt
);
746 const APSInt
&PersistentTo
= ValueFactory
.getValue(ToInt
);
747 if (FromInt
> ToInt
) {
748 Dummy
.emplace_back(ValueFactory
.getMinValue(Ty
), PersistentTo
);
749 Dummy
.emplace_back(PersistentFrom
, ValueFactory
.getMaxValue(Ty
));
751 Dummy
.emplace_back(PersistentFrom
, PersistentTo
);
752 // Every range retrieved after truncation potentialy has garbage values.
753 // So, we have to unite every next range with the previouses.
754 Result
= unite(Result
, Dummy
);
760 // Divide the convertion into two phases (presented as loops here).
761 // First phase(loop) works when casted values go in ascending order.
762 // E.g. char{1,3,5,127} -> uint{1,3,5,127}
763 // Interrupt the first phase and go to second one when casted values start
764 // go in descending order. That means that we crossed over the middle of
765 // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
767 // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
768 // Here we put {1,3,5} to one array and {-128, -1} to another
769 // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
770 // Here we put {128,129,255} to one array and {0,1,3} to another.
771 // After that we unite both arrays.
772 // NOTE: We don't just concatenate the arrays, because they may have
773 // adjacent ranges, e.g.:
774 // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
775 // unite -> uchar(0, 255)
776 // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
777 // unite -> uchar(-2, 1)
778 RangeSet::ContainerType
RangeSet::Factory::convertTo(RangeSet What
,
782 using Bounds
= std::pair
<const APSInt
&, const APSInt
&>;
783 ContainerType AscendArray
;
784 ContainerType DescendArray
;
785 auto CastRange
= [Ty
, &VF
= ValueFactory
](const Range
&R
) -> Bounds
{
786 // Get bounds of the given range.
787 APSInt FromInt
= R
.From();
788 APSInt ToInt
= R
.To();
792 return {VF
.getValue(FromInt
), VF
.getValue(ToInt
)};
794 // Phase 1. Fill the first array.
795 APSInt LastConvertedInt
= Ty
.getMinValue();
796 const auto *It
= What
.begin();
797 const auto *E
= What
.end();
799 Bounds NewBounds
= CastRange(*(It
++));
800 // If values stop going acsending order, go to the second phase(loop).
801 if (NewBounds
.first
< LastConvertedInt
) {
802 DescendArray
.emplace_back(NewBounds
.first
, NewBounds
.second
);
805 // If the range contains a midpoint, then split the range.
806 // E.g. char(-5, 5) -> uchar(251, 5)
807 // Here we shall add a range (251, 255) to the first array and (0, 5) to the
809 if (NewBounds
.first
> NewBounds
.second
) {
810 DescendArray
.emplace_back(ValueFactory
.getMinValue(Ty
), NewBounds
.second
);
811 AscendArray
.emplace_back(NewBounds
.first
, ValueFactory
.getMaxValue(Ty
));
813 // Values are going acsending order.
814 AscendArray
.emplace_back(NewBounds
.first
, NewBounds
.second
);
815 LastConvertedInt
= NewBounds
.first
;
817 // Phase 2. Fill the second array.
819 Bounds NewBounds
= CastRange(*(It
++));
820 DescendArray
.emplace_back(NewBounds
.first
, NewBounds
.second
);
822 // Unite both arrays.
823 return unite(AscendArray
, DescendArray
);
826 /// Promotion from unsigneds to signeds/unsigneds left.
827 RangeSet::ContainerType
RangeSet::Factory::promoteTo(RangeSet What
,
829 ContainerType Result
;
830 // We definitely know the size of the result set.
831 Result
.reserve(What
.size());
833 // Each unsigned value fits every larger type without any changes,
834 // whether the larger type is signed or unsigned. So just promote and push
835 // back each range one by one.
836 for (const Range
&R
: What
) {
837 // Get bounds of the given range.
838 llvm::APSInt FromInt
= R
.From();
839 llvm::APSInt ToInt
= R
.To();
843 Result
.emplace_back(ValueFactory
.getValue(FromInt
),
844 ValueFactory
.getValue(ToInt
));
849 RangeSet
RangeSet::Factory::deletePoint(RangeSet From
,
850 const llvm::APSInt
&Point
) {
851 if (!From
.contains(Point
))
854 llvm::APSInt Upper
= Point
;
855 llvm::APSInt Lower
= Point
;
860 // Notice that the lower bound is greater than the upper bound.
861 return intersect(From
, Upper
, Lower
);
864 LLVM_DUMP_METHOD
void Range::dump(raw_ostream
&OS
) const {
865 OS
<< '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
867 LLVM_DUMP_METHOD
void Range::dump() const { dump(llvm::errs()); }
869 LLVM_DUMP_METHOD
void RangeSet::dump(raw_ostream
&OS
) const {
871 llvm::interleaveComma(*this, OS
, [&OS
](const Range
&R
) { R
.dump(OS
); });
874 LLVM_DUMP_METHOD
void RangeSet::dump() const { dump(llvm::errs()); }
876 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet
, SymbolRef
)
879 class EquivalenceClass
;
880 } // end anonymous namespace
882 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap
, SymbolRef
, EquivalenceClass
)
883 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers
, EquivalenceClass
, SymbolSet
)
884 REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange
, EquivalenceClass
, RangeSet
)
886 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet
, EquivalenceClass
)
887 REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap
, EquivalenceClass
, ClassSet
)
890 /// This class encapsulates a set of symbols equal to each other.
892 /// The main idea of the approach requiring such classes is in narrowing
893 /// and sharing constraints between symbols within the class. Also we can
894 /// conclude that there is no practical need in storing constraints for
895 /// every member of the class separately.
897 /// Main terminology:
899 /// * "Equivalence class" is an object of this class, which can be efficiently
900 /// compared to other classes. It represents the whole class without
901 /// storing the actual in it. The members of the class however can be
902 /// retrieved from the state.
904 /// * "Class members" are the symbols corresponding to the class. This means
905 /// that A == B for every member symbols A and B from the class. Members of
906 /// each class are stored in the state.
908 /// * "Trivial class" is a class that has and ever had only one same symbol.
910 /// * "Merge operation" merges two classes into one. It is the main operation
911 /// to produce non-trivial classes.
912 /// If, at some point, we can assume that two symbols from two distinct
913 /// classes are equal, we can merge these classes.
914 class EquivalenceClass
: public llvm::FoldingSetNode
{
916 /// Find equivalence class for the given symbol in the given state.
917 [[nodiscard
]] static inline EquivalenceClass
find(ProgramStateRef State
,
920 /// Merge classes for the given symbols and return a new state.
921 [[nodiscard
]] static inline ProgramStateRef
merge(RangeSet::Factory
&F
,
922 ProgramStateRef State
,
925 // Merge this class with the given class and return a new state.
926 [[nodiscard
]] inline ProgramStateRef
927 merge(RangeSet::Factory
&F
, ProgramStateRef State
, EquivalenceClass Other
);
929 /// Return a set of class members for the given state.
930 [[nodiscard
]] inline SymbolSet
getClassMembers(ProgramStateRef State
) const;
932 /// Return true if the current class is trivial in the given state.
933 /// A class is trivial if and only if there is not any member relations stored
934 /// to it in State/ClassMembers.
935 /// An equivalence class with one member might seem as it does not hold any
936 /// meaningful information, i.e. that is a tautology. However, during the
937 /// removal of dead symbols we do not remove classes with one member for
938 /// resource and performance reasons. Consequently, a class with one member is
939 /// not necessarily trivial. It could happen that we have a class with two
940 /// members and then during the removal of dead symbols we remove one of its
941 /// members. In this case, the class is still non-trivial (it still has the
942 /// mappings in ClassMembers), even though it has only one member.
943 [[nodiscard
]] inline bool isTrivial(ProgramStateRef State
) const;
945 /// Return true if the current class is trivial and its only member is dead.
946 [[nodiscard
]] inline bool isTriviallyDead(ProgramStateRef State
,
947 SymbolReaper
&Reaper
) const;
949 [[nodiscard
]] static inline ProgramStateRef
950 markDisequal(RangeSet::Factory
&F
, ProgramStateRef State
, SymbolRef First
,
952 [[nodiscard
]] static inline ProgramStateRef
953 markDisequal(RangeSet::Factory
&F
, ProgramStateRef State
,
954 EquivalenceClass First
, EquivalenceClass Second
);
955 [[nodiscard
]] inline ProgramStateRef
956 markDisequal(RangeSet::Factory
&F
, ProgramStateRef State
,
957 EquivalenceClass Other
) const;
958 [[nodiscard
]] static inline ClassSet
getDisequalClasses(ProgramStateRef State
,
960 [[nodiscard
]] inline ClassSet
getDisequalClasses(ProgramStateRef State
) const;
961 [[nodiscard
]] inline ClassSet
962 getDisequalClasses(DisequalityMapTy Map
, ClassSet::Factory
&Factory
) const;
964 [[nodiscard
]] static inline std::optional
<bool>
965 areEqual(ProgramStateRef State
, EquivalenceClass First
,
966 EquivalenceClass Second
);
967 [[nodiscard
]] static inline std::optional
<bool>
968 areEqual(ProgramStateRef State
, SymbolRef First
, SymbolRef Second
);
970 /// Remove one member from the class.
971 [[nodiscard
]] ProgramStateRef
removeMember(ProgramStateRef State
,
972 const SymbolRef Old
);
974 /// Iterate over all symbols and try to simplify them.
975 [[nodiscard
]] static inline ProgramStateRef
simplify(SValBuilder
&SVB
,
976 RangeSet::Factory
&F
,
977 ProgramStateRef State
,
978 EquivalenceClass Class
);
980 void dumpToStream(ProgramStateRef State
, raw_ostream
&os
) const;
981 LLVM_DUMP_METHOD
void dump(ProgramStateRef State
) const {
982 dumpToStream(State
, llvm::errs());
985 /// Check equivalence data for consistency.
986 [[nodiscard
]] LLVM_ATTRIBUTE_UNUSED
static bool
987 isClassDataConsistent(ProgramStateRef State
);
989 [[nodiscard
]] QualType
getType() const {
990 return getRepresentativeSymbol()->getType();
993 EquivalenceClass() = delete;
994 EquivalenceClass(const EquivalenceClass
&) = default;
995 EquivalenceClass
&operator=(const EquivalenceClass
&) = delete;
996 EquivalenceClass(EquivalenceClass
&&) = default;
997 EquivalenceClass
&operator=(EquivalenceClass
&&) = delete;
999 bool operator==(const EquivalenceClass
&Other
) const {
1000 return ID
== Other
.ID
;
1002 bool operator<(const EquivalenceClass
&Other
) const { return ID
< Other
.ID
; }
1003 bool operator!=(const EquivalenceClass
&Other
) const {
1004 return !operator==(Other
);
1007 static void Profile(llvm::FoldingSetNodeID
&ID
, uintptr_t CID
) {
1011 void Profile(llvm::FoldingSetNodeID
&ID
) const { Profile(ID
, this->ID
); }
1014 /* implicit */ EquivalenceClass(SymbolRef Sym
)
1015 : ID(reinterpret_cast<uintptr_t>(Sym
)) {}
1017 /// This function is intended to be used ONLY within the class.
1018 /// The fact that ID is a pointer to a symbol is an implementation detail
1019 /// and should stay that way.
1020 /// In the current implementation, we use it to retrieve the only member
1021 /// of the trivial class.
1022 SymbolRef
getRepresentativeSymbol() const {
1023 return reinterpret_cast<SymbolRef
>(ID
);
1025 static inline SymbolSet::Factory
&getMembersFactory(ProgramStateRef State
);
1027 inline ProgramStateRef
mergeImpl(RangeSet::Factory
&F
, ProgramStateRef State
,
1028 SymbolSet Members
, EquivalenceClass Other
,
1029 SymbolSet OtherMembers
);
1032 addToDisequalityInfo(DisequalityMapTy
&Info
, ConstraintRangeTy
&Constraints
,
1033 RangeSet::Factory
&F
, ProgramStateRef State
,
1034 EquivalenceClass First
, EquivalenceClass Second
);
1036 /// This is a unique identifier of the class.
1040 //===----------------------------------------------------------------------===//
1041 // Constraint functions
1042 //===----------------------------------------------------------------------===//
1044 [[nodiscard
]] LLVM_ATTRIBUTE_UNUSED
bool
1045 areFeasible(ConstraintRangeTy Constraints
) {
1046 return llvm::none_of(
1048 [](const std::pair
<EquivalenceClass
, RangeSet
> &ClassConstraint
) {
1049 return ClassConstraint
.second
.isEmpty();
1053 [[nodiscard
]] inline const RangeSet
*getConstraint(ProgramStateRef State
,
1054 EquivalenceClass Class
) {
1055 return State
->get
<ConstraintRange
>(Class
);
1058 [[nodiscard
]] inline const RangeSet
*getConstraint(ProgramStateRef State
,
1060 return getConstraint(State
, EquivalenceClass::find(State
, Sym
));
1063 [[nodiscard
]] ProgramStateRef
setConstraint(ProgramStateRef State
,
1064 EquivalenceClass Class
,
1065 RangeSet Constraint
) {
1066 return State
->set
<ConstraintRange
>(Class
, Constraint
);
1069 [[nodiscard
]] ProgramStateRef
setConstraints(ProgramStateRef State
,
1070 ConstraintRangeTy Constraints
) {
1071 return State
->set
<ConstraintRange
>(Constraints
);
1074 //===----------------------------------------------------------------------===//
1075 // Equality/diseqiality abstraction
1076 //===----------------------------------------------------------------------===//
1078 /// A small helper function for detecting symbolic (dis)equality.
1080 /// Equality check can have different forms (like a == b or a - b) and this
1081 /// class encapsulates those away if the only thing the user wants to check -
1082 /// whether it's equality/diseqiality or not.
1084 /// \returns true if assuming this Sym to be true means equality of operands
1085 /// false if it means disequality of operands
1086 /// std::nullopt otherwise
1087 std::optional
<bool> meansEquality(const SymSymExpr
*Sym
) {
1088 switch (Sym
->getOpcode()) {
1090 // This case is: A - B != 0 -> disequality check.
1093 // This case is: A == B != 0 -> equality check.
1096 // This case is: A != B != 0 -> diseqiality check.
1099 return std::nullopt
;
1103 //===----------------------------------------------------------------------===//
1104 // Intersection functions
1105 //===----------------------------------------------------------------------===//
1107 template <class SecondTy
, class... RestTy
>
1108 [[nodiscard
]] inline RangeSet
intersect(RangeSet::Factory
&F
, RangeSet Head
,
1109 SecondTy Second
, RestTy
... Tail
);
1111 template <class... RangeTy
> struct IntersectionTraits
;
1113 template <class... TailTy
> struct IntersectionTraits
<RangeSet
, TailTy
...> {
1114 // Found RangeSet, no need to check any further
1115 using Type
= RangeSet
;
1118 template <> struct IntersectionTraits
<> {
1119 // We ran out of types, and we didn't find any RangeSet, so the result should
1121 using Type
= std::optional
<RangeSet
>;
1124 template <class OptionalOrPointer
, class... TailTy
>
1125 struct IntersectionTraits
<OptionalOrPointer
, TailTy
...> {
1126 // If current type is Optional or a raw pointer, we should keep looking.
1127 using Type
= typename IntersectionTraits
<TailTy
...>::Type
;
1130 template <class EndTy
>
1131 [[nodiscard
]] inline EndTy
intersect(RangeSet::Factory
&F
, EndTy End
) {
1132 // If the list contains only RangeSet or std::optional<RangeSet>, simply
1133 // return that range set.
1137 [[nodiscard
]] LLVM_ATTRIBUTE_UNUSED
inline std::optional
<RangeSet
>
1138 intersect(RangeSet::Factory
&F
, const RangeSet
*End
) {
1139 // This is an extraneous conversion from a raw pointer into
1140 // std::optional<RangeSet>
1144 return std::nullopt
;
1147 template <class... RestTy
>
1148 [[nodiscard
]] inline RangeSet
intersect(RangeSet::Factory
&F
, RangeSet Head
,
1149 RangeSet Second
, RestTy
... Tail
) {
1150 // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1151 // of the function and can be sure that the result is RangeSet.
1152 return intersect(F
, F
.intersect(Head
, Second
), Tail
...);
1155 template <class SecondTy
, class... RestTy
>
1156 [[nodiscard
]] inline RangeSet
intersect(RangeSet::Factory
&F
, RangeSet Head
,
1157 SecondTy Second
, RestTy
... Tail
) {
1159 // Here we call the <RangeSet,RangeSet,...> version of the function...
1160 return intersect(F
, Head
, *Second
, Tail
...);
1162 // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1163 // means that the result is definitely RangeSet.
1164 return intersect(F
, Head
, Tail
...);
1167 /// Main generic intersect function.
1168 /// It intersects all of the given range sets. If some of the given arguments
1169 /// don't hold a range set (nullptr or std::nullopt), the function will skip
1172 /// Available representations for the arguments are:
1174 /// * std::optional<RangeSet>
1176 /// Pointer to a RangeSet is automatically assumed to be nullable and will get
1177 /// checked as well as the optional version. If this behaviour is undesired,
1178 /// please dereference the pointer in the call.
1180 /// Return type depends on the arguments' types. If we can be sure in compile
1181 /// time that there will be a range set as a result, the returning type is
1182 /// simply RangeSet, in other cases we have to back off to
1183 /// std::optional<RangeSet>.
1185 /// Please, prefer optional range sets to raw pointers. If the last argument is
1186 /// a raw pointer and all previous arguments are std::nullopt, it will cost one
1187 /// additional check to convert RangeSet * into std::optional<RangeSet>.
1188 template <class HeadTy
, class SecondTy
, class... RestTy
>
1189 [[nodiscard
]] inline
1190 typename IntersectionTraits
<HeadTy
, SecondTy
, RestTy
...>::Type
1191 intersect(RangeSet::Factory
&F
, HeadTy Head
, SecondTy Second
,
1194 return intersect(F
, *Head
, Second
, Tail
...);
1196 return intersect(F
, Second
, Tail
...);
1199 //===----------------------------------------------------------------------===//
1200 // Symbolic reasoning logic
1201 //===----------------------------------------------------------------------===//
1203 /// A little component aggregating all of the reasoning we have about
1204 /// the ranges of symbolic expressions.
1206 /// Even when we don't know the exact values of the operands, we still
1207 /// can get a pretty good estimate of the result's range.
1208 class SymbolicRangeInferrer
1209 : public SymExprVisitor
<SymbolicRangeInferrer
, RangeSet
> {
1211 template <class SourceType
>
1212 static RangeSet
inferRange(RangeSet::Factory
&F
, ProgramStateRef State
,
1213 SourceType Origin
) {
1214 SymbolicRangeInferrer
Inferrer(F
, State
);
1215 return Inferrer
.infer(Origin
);
1218 RangeSet
VisitSymExpr(SymbolRef Sym
) {
1219 if (std::optional
<RangeSet
> RS
= getRangeForNegatedSym(Sym
))
1221 // If we've reached this line, the actual type of the symbolic
1222 // expression is not supported for advanced inference.
1223 // In this case, we simply backoff to the default "let's simply
1224 // infer the range from the expression's type".
1225 return infer(Sym
->getType());
1228 RangeSet
VisitUnarySymExpr(const UnarySymExpr
*USE
) {
1229 if (std::optional
<RangeSet
> RS
= getRangeForNegatedUnarySym(USE
))
1231 return infer(USE
->getType());
1234 RangeSet
VisitSymIntExpr(const SymIntExpr
*Sym
) {
1235 return VisitBinaryOperator(Sym
);
1238 RangeSet
VisitIntSymExpr(const IntSymExpr
*Sym
) {
1239 return VisitBinaryOperator(Sym
);
1242 RangeSet
VisitSymSymExpr(const SymSymExpr
*SSE
) {
1245 // If Sym is a difference of symbols A - B, then maybe we have range
1246 // set stored for B - A.
1248 // If we have range set stored for both A - B and B - A then
1249 // calculate the effective range set by intersecting the range set
1250 // for A - B and the negated range set of B - A.
1251 getRangeForNegatedSymSym(SSE
),
1252 // If Sym is a comparison expression (except <=>),
1253 // find any other comparisons with the same operands.
1254 // See function description.
1255 getRangeForComparisonSymbol(SSE
),
1256 // If Sym is (dis)equality, we might have some information
1257 // on that in our equality classes data structure.
1258 getRangeForEqualities(SSE
),
1259 // And we should always check what we can get from the operands.
1260 VisitBinaryOperator(SSE
));
1264 SymbolicRangeInferrer(RangeSet::Factory
&F
, ProgramStateRef S
)
1265 : ValueFactory(F
.getValueFactory()), RangeFactory(F
), State(S
) {}
1267 /// Infer range information from the given integer constant.
1269 /// It's not a real "inference", but is here for operating with
1270 /// sub-expressions in a more polymorphic manner.
1271 RangeSet
inferAs(const llvm::APSInt
&Val
, QualType
) {
1272 return {RangeFactory
, Val
};
1275 /// Infer range information from symbol in the context of the given type.
1276 RangeSet
inferAs(SymbolRef Sym
, QualType DestType
) {
1277 QualType ActualType
= Sym
->getType();
1278 // Check that we can reason about the symbol at all.
1279 if (ActualType
->isIntegralOrEnumerationType() ||
1280 Loc::isLocType(ActualType
)) {
1283 // Otherwise, let's simply infer from the destination type.
1284 // We couldn't figure out nothing else about that expression.
1285 return infer(DestType
);
1288 RangeSet
infer(SymbolRef Sym
) {
1289 return intersect(RangeFactory
,
1290 // Of course, we should take the constraint directly
1291 // associated with this symbol into consideration.
1292 getConstraint(State
, Sym
),
1293 // Apart from the Sym itself, we can infer quite a lot if
1294 // we look into subexpressions of Sym.
1298 RangeSet
infer(EquivalenceClass Class
) {
1299 if (const RangeSet
*AssociatedConstraint
= getConstraint(State
, Class
))
1300 return *AssociatedConstraint
;
1302 return infer(Class
.getType());
1305 /// Infer range information solely from the type.
1306 RangeSet
infer(QualType T
) {
1307 // Lazily generate a new RangeSet representing all possible values for the
1308 // given symbol type.
1309 RangeSet
Result(RangeFactory
, ValueFactory
.getMinValue(T
),
1310 ValueFactory
.getMaxValue(T
));
1312 // References are known to be non-zero.
1313 if (T
->isReferenceType())
1314 return assumeNonZero(Result
, T
);
1319 template <class BinarySymExprTy
>
1320 RangeSet
VisitBinaryOperator(const BinarySymExprTy
*Sym
) {
1321 // TODO #1: VisitBinaryOperator implementation might not make a good
1322 // use of the inferred ranges. In this case, we might be calculating
1323 // everything for nothing. This being said, we should introduce some
1324 // sort of laziness mechanism here.
1326 // TODO #2: We didn't go into the nested expressions before, so it
1327 // might cause us spending much more time doing the inference.
1328 // This can be a problem for deeply nested expressions that are
1329 // involved in conditions and get tested continuously. We definitely
1330 // need to address this issue and introduce some sort of caching
1332 QualType ResultType
= Sym
->getType();
1333 return VisitBinaryOperator(inferAs(Sym
->getLHS(), ResultType
),
1335 inferAs(Sym
->getRHS(), ResultType
), ResultType
);
1338 RangeSet
VisitBinaryOperator(RangeSet LHS
, BinaryOperator::Opcode Op
,
1339 RangeSet RHS
, QualType T
);
1341 //===----------------------------------------------------------------------===//
1342 // Ranges and operators
1343 //===----------------------------------------------------------------------===//
1345 /// Return a rough approximation of the given range set.
1347 /// For the range set:
1348 /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1349 /// it will return the range [x_0, y_N].
1350 static Range
fillGaps(RangeSet Origin
) {
1351 assert(!Origin
.isEmpty());
1352 return {Origin
.getMinValue(), Origin
.getMaxValue()};
1355 /// Try to convert given range into the given type.
1357 /// It will return std::nullopt only when the trivial conversion is possible.
1358 std::optional
<Range
> convert(const Range
&Origin
, APSIntType To
) {
1359 if (To
.testInRange(Origin
.From(), false) != APSIntType::RTR_Within
||
1360 To
.testInRange(Origin
.To(), false) != APSIntType::RTR_Within
) {
1361 return std::nullopt
;
1363 return Range(ValueFactory
.Convert(To
, Origin
.From()),
1364 ValueFactory
.Convert(To
, Origin
.To()));
1367 template <BinaryOperator::Opcode Op
>
1368 RangeSet
VisitBinaryOperator(RangeSet LHS
, RangeSet RHS
, QualType T
) {
1369 assert(!LHS
.isEmpty() && !RHS
.isEmpty());
1371 Range CoarseLHS
= fillGaps(LHS
);
1372 Range CoarseRHS
= fillGaps(RHS
);
1374 APSIntType ResultType
= ValueFactory
.getAPSIntType(T
);
1376 // We need to convert ranges to the resulting type, so we can compare values
1377 // and combine them in a meaningful (in terms of the given operation) way.
1378 auto ConvertedCoarseLHS
= convert(CoarseLHS
, ResultType
);
1379 auto ConvertedCoarseRHS
= convert(CoarseRHS
, ResultType
);
1381 // It is hard to reason about ranges when conversion changes
1382 // borders of the ranges.
1383 if (!ConvertedCoarseLHS
|| !ConvertedCoarseRHS
) {
1387 return VisitBinaryOperator
<Op
>(*ConvertedCoarseLHS
, *ConvertedCoarseRHS
, T
);
1390 template <BinaryOperator::Opcode Op
>
1391 RangeSet
VisitBinaryOperator(Range LHS
, Range RHS
, QualType T
) {
1395 /// Return a symmetrical range for the given range and type.
1397 /// If T is signed, return the smallest range [-x..x] that covers the original
1398 /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1399 /// exist due to original range covering min(T)).
1401 /// If T is unsigned, return the smallest range [0..x] that covers the
1403 Range
getSymmetricalRange(Range Origin
, QualType T
) {
1404 APSIntType RangeType
= ValueFactory
.getAPSIntType(T
);
1406 if (RangeType
.isUnsigned()) {
1407 return Range(ValueFactory
.getMinValue(RangeType
), Origin
.To());
1410 if (Origin
.From().isMinSignedValue()) {
1411 // If mini is a minimal signed value, absolute value of it is greater
1412 // than the maximal signed value. In order to avoid these
1413 // complications, we simply return the whole range.
1414 return {ValueFactory
.getMinValue(RangeType
),
1415 ValueFactory
.getMaxValue(RangeType
)};
1418 // At this point, we are sure that the type is signed and we can safely
1419 // use unary - operator.
1421 // While calculating absolute maximum, we can use the following formula
1422 // because of these reasons:
1423 // * If From >= 0 then To >= From and To >= -From.
1424 // AbsMax == To == max(To, -From)
1425 // * If To <= 0 then -From >= -To and -From >= From.
1426 // AbsMax == -From == max(-From, To)
1427 // * Otherwise, From <= 0, To >= 0, and
1428 // AbsMax == max(abs(From), abs(To))
1429 llvm::APSInt AbsMax
= std::max(-Origin
.From(), Origin
.To());
1431 // Intersection is guaranteed to be non-empty.
1432 return {ValueFactory
.getValue(-AbsMax
), ValueFactory
.getValue(AbsMax
)};
1435 /// Return a range set subtracting zero from \p Domain.
1436 RangeSet
assumeNonZero(RangeSet Domain
, QualType T
) {
1437 APSIntType IntType
= ValueFactory
.getAPSIntType(T
);
1438 return RangeFactory
.deletePoint(Domain
, IntType
.getZeroValue());
1441 template <typename ProduceNegatedSymFunc
>
1442 std::optional
<RangeSet
> getRangeForNegatedExpr(ProduceNegatedSymFunc F
,
1444 // Do not negate if the type cannot be meaningfully negated.
1445 if (!T
->isUnsignedIntegerOrEnumerationType() &&
1446 !T
->isSignedIntegerOrEnumerationType())
1447 return std::nullopt
;
1449 if (SymbolRef NegatedSym
= F())
1450 if (const RangeSet
*NegatedRange
= getConstraint(State
, NegatedSym
))
1451 return RangeFactory
.negate(*NegatedRange
);
1453 return std::nullopt
;
1456 std::optional
<RangeSet
> getRangeForNegatedUnarySym(const UnarySymExpr
*USE
) {
1457 // Just get the operand when we negate a symbol that is already negated.
1459 return getRangeForNegatedExpr(
1460 [USE
]() -> SymbolRef
{
1461 if (USE
->getOpcode() == UO_Minus
)
1462 return USE
->getOperand();
1468 std::optional
<RangeSet
> getRangeForNegatedSymSym(const SymSymExpr
*SSE
) {
1469 return getRangeForNegatedExpr(
1470 [SSE
, State
= this->State
]() -> SymbolRef
{
1471 if (SSE
->getOpcode() == BO_Sub
)
1472 return State
->getSymbolManager().getSymSymExpr(
1473 SSE
->getRHS(), BO_Sub
, SSE
->getLHS(), SSE
->getType());
1479 std::optional
<RangeSet
> getRangeForNegatedSym(SymbolRef Sym
) {
1480 return getRangeForNegatedExpr(
1481 [Sym
, State
= this->State
]() {
1482 return State
->getSymbolManager().getUnarySymExpr(Sym
, UO_Minus
,
1488 // Returns ranges only for binary comparison operators (except <=>)
1489 // when left and right operands are symbolic values.
1490 // Finds any other comparisons with the same operands.
1491 // Then do logical calculations and refuse impossible branches.
1492 // E.g. (x < y) and (x > y) at the same time are impossible.
1493 // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1494 // E.g. (x == y) and (y == x) are just reversed but the same.
1495 // It covers all possible combinations (see CmpOpTable description).
1496 // Note that `x` and `y` can also stand for subexpressions,
1497 // not only for actual symbols.
1498 std::optional
<RangeSet
> getRangeForComparisonSymbol(const SymSymExpr
*SSE
) {
1499 const BinaryOperatorKind CurrentOP
= SSE
->getOpcode();
1501 // We currently do not support <=> (C++20).
1502 if (!BinaryOperator::isComparisonOp(CurrentOP
) || (CurrentOP
== BO_Cmp
))
1503 return std::nullopt
;
1505 static const OperatorRelationsTable CmpOpTable
{};
1507 const SymExpr
*LHS
= SSE
->getLHS();
1508 const SymExpr
*RHS
= SSE
->getRHS();
1509 QualType T
= SSE
->getType();
1511 SymbolManager
&SymMgr
= State
->getSymbolManager();
1513 // We use this variable to store the last queried operator (`QueriedOP`)
1514 // for which the `getCmpOpState` returned with `Unknown`. If there are two
1515 // different OPs that returned `Unknown` then we have to query the special
1516 // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1517 // never returns `Unknown`, so `CurrentOP` is a good initial value.
1518 BinaryOperatorKind LastQueriedOpToUnknown
= CurrentOP
;
1520 // Loop goes through all of the columns exept the last one ('UnknownX2').
1521 // We treat `UnknownX2` column separately at the end of the loop body.
1522 for (size_t i
= 0; i
< CmpOpTable
.getCmpOpCount(); ++i
) {
1524 // Let's find an expression e.g. (x < y).
1525 BinaryOperatorKind QueriedOP
= OperatorRelationsTable::getOpFromIndex(i
);
1526 const SymSymExpr
*SymSym
= SymMgr
.getSymSymExpr(LHS
, QueriedOP
, RHS
, T
);
1527 const RangeSet
*QueriedRangeSet
= getConstraint(State
, SymSym
);
1529 // If ranges were not previously found,
1530 // try to find a reversed expression (y > x).
1531 if (!QueriedRangeSet
) {
1532 const BinaryOperatorKind ROP
=
1533 BinaryOperator::reverseComparisonOp(QueriedOP
);
1534 SymSym
= SymMgr
.getSymSymExpr(RHS
, ROP
, LHS
, T
);
1535 QueriedRangeSet
= getConstraint(State
, SymSym
);
1538 if (!QueriedRangeSet
|| QueriedRangeSet
->isEmpty())
1541 const llvm::APSInt
*ConcreteValue
= QueriedRangeSet
->getConcreteValue();
1542 const bool isInFalseBranch
=
1543 ConcreteValue
? (*ConcreteValue
== 0) : false;
1545 // If it is a false branch, we shall be guided by opposite operator,
1546 // because the table is made assuming we are in the true branch.
1547 // E.g. when (x <= y) is false, then (x > y) is true.
1548 if (isInFalseBranch
)
1549 QueriedOP
= BinaryOperator::negateComparisonOp(QueriedOP
);
1551 OperatorRelationsTable::TriStateKind BranchState
=
1552 CmpOpTable
.getCmpOpState(CurrentOP
, QueriedOP
);
1554 if (BranchState
== OperatorRelationsTable::Unknown
) {
1555 if (LastQueriedOpToUnknown
!= CurrentOP
&&
1556 LastQueriedOpToUnknown
!= QueriedOP
) {
1557 // If we got the Unknown state for both different operators.
1558 // if (x <= y) // assume true
1559 // if (x != y) // assume true
1560 // if (x < y) // would be also true
1561 // Get a state from `UnknownX2` column.
1562 BranchState
= CmpOpTable
.getCmpOpStateForUnknownX2(CurrentOP
);
1564 LastQueriedOpToUnknown
= QueriedOP
;
1569 return (BranchState
== OperatorRelationsTable::True
) ? getTrueRange(T
)
1573 return std::nullopt
;
1576 std::optional
<RangeSet
> getRangeForEqualities(const SymSymExpr
*Sym
) {
1577 std::optional
<bool> Equality
= meansEquality(Sym
);
1580 return std::nullopt
;
1582 if (std::optional
<bool> AreEqual
=
1583 EquivalenceClass::areEqual(State
, Sym
->getLHS(), Sym
->getRHS())) {
1584 // Here we cover two cases at once:
1585 // * if Sym is equality and its operands are known to be equal -> true
1586 // * if Sym is disequality and its operands are disequal -> true
1587 if (*AreEqual
== *Equality
) {
1588 return getTrueRange(Sym
->getType());
1590 // Opposite combinations result in false.
1591 return getFalseRange(Sym
->getType());
1594 return std::nullopt
;
1597 RangeSet
getTrueRange(QualType T
) {
1598 RangeSet TypeRange
= infer(T
);
1599 return assumeNonZero(TypeRange
, T
);
1602 RangeSet
getFalseRange(QualType T
) {
1603 const llvm::APSInt
&Zero
= ValueFactory
.getValue(0, T
);
1604 return RangeSet(RangeFactory
, Zero
);
1607 BasicValueFactory
&ValueFactory
;
1608 RangeSet::Factory
&RangeFactory
;
1609 ProgramStateRef State
;
1612 //===----------------------------------------------------------------------===//
1613 // Range-based reasoning about symbolic operations
1614 //===----------------------------------------------------------------------===//
1617 RangeSet
SymbolicRangeInferrer::VisitBinaryOperator
<BO_NE
>(RangeSet LHS
,
1620 assert(!LHS
.isEmpty() && !RHS
.isEmpty());
1622 if (LHS
.getAPSIntType() == RHS
.getAPSIntType()) {
1623 if (intersect(RangeFactory
, LHS
, RHS
).isEmpty())
1624 return getTrueRange(T
);
1627 // We can only lose information if we are casting smaller signed type to
1628 // bigger unsigned type. For e.g.,
1629 // LHS (unsigned short): [2, USHRT_MAX]
1630 // RHS (signed short): [SHRT_MIN, 0]
1632 // Casting RHS to LHS type will leave us with overlapping values
1633 // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
1635 // We can avoid this by checking if signed type's maximum value is lesser
1636 // than unsigned type's minimum value.
1638 // If both have different signs then only we can get more information.
1639 if (LHS
.isUnsigned() != RHS
.isUnsigned()) {
1640 if (LHS
.isUnsigned() && (LHS
.getBitWidth() >= RHS
.getBitWidth())) {
1641 if (RHS
.getMaxValue().isNegative() ||
1642 LHS
.getAPSIntType().convert(RHS
.getMaxValue()) < LHS
.getMinValue())
1643 return getTrueRange(T
);
1645 } else if (RHS
.isUnsigned() && (LHS
.getBitWidth() <= RHS
.getBitWidth())) {
1646 if (LHS
.getMaxValue().isNegative() ||
1647 RHS
.getAPSIntType().convert(LHS
.getMaxValue()) < RHS
.getMinValue())
1648 return getTrueRange(T
);
1652 // Both RangeSets should be casted to bigger unsigned type.
1653 APSIntType
CastingType(std::max(LHS
.getBitWidth(), RHS
.getBitWidth()),
1654 LHS
.isUnsigned() || RHS
.isUnsigned());
1656 RangeSet CastedLHS
= RangeFactory
.castTo(LHS
, CastingType
);
1657 RangeSet CastedRHS
= RangeFactory
.castTo(RHS
, CastingType
);
1659 if (intersect(RangeFactory
, CastedLHS
, CastedRHS
).isEmpty())
1660 return getTrueRange(T
);
1663 // In all other cases, the resulting range cannot be deduced.
1668 RangeSet
SymbolicRangeInferrer::VisitBinaryOperator
<BO_Or
>(Range LHS
, Range RHS
,
1670 APSIntType ResultType
= ValueFactory
.getAPSIntType(T
);
1671 llvm::APSInt Zero
= ResultType
.getZeroValue();
1673 bool IsLHSPositiveOrZero
= LHS
.From() >= Zero
;
1674 bool IsRHSPositiveOrZero
= RHS
.From() >= Zero
;
1676 bool IsLHSNegative
= LHS
.To() < Zero
;
1677 bool IsRHSNegative
= RHS
.To() < Zero
;
1679 // Check if both ranges have the same sign.
1680 if ((IsLHSPositiveOrZero
&& IsRHSPositiveOrZero
) ||
1681 (IsLHSNegative
&& IsRHSNegative
)) {
1682 // The result is definitely greater or equal than any of the operands.
1683 const llvm::APSInt
&Min
= std::max(LHS
.From(), RHS
.From());
1685 // We estimate maximal value for positives as the maximal value for the
1686 // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
1688 // TODO: We basically, limit the resulting range from below, but don't do
1689 // anything with the upper bound.
1691 // For positive operands, it can be done as follows: for the upper
1692 // bound of LHS and RHS we calculate the most significant bit set.
1693 // Let's call it the N-th bit. Then we can estimate the maximal
1694 // number to be 2^(N+1)-1, i.e. the number with all the bits up to
1695 // the N-th bit set.
1696 const llvm::APSInt
&Max
= IsLHSNegative
1697 ? ValueFactory
.getValue(--Zero
)
1698 : ValueFactory
.getMaxValue(ResultType
);
1700 return {RangeFactory
, ValueFactory
.getValue(Min
), Max
};
1703 // Otherwise, let's check if at least one of the operands is negative.
1704 if (IsLHSNegative
|| IsRHSNegative
) {
1705 // This means that the result is definitely negative as well.
1706 return {RangeFactory
, ValueFactory
.getMinValue(ResultType
),
1707 ValueFactory
.getValue(--Zero
)};
1710 RangeSet DefaultRange
= infer(T
);
1712 // It is pretty hard to reason about operands with different signs
1713 // (and especially with possibly different signs). We simply check if it
1714 // can be zero. In order to conclude that the result could not be zero,
1715 // at least one of the operands should be definitely not zero itself.
1716 if (!LHS
.Includes(Zero
) || !RHS
.Includes(Zero
)) {
1717 return assumeNonZero(DefaultRange
, T
);
1720 // Nothing much else to do here.
1721 return DefaultRange
;
1725 RangeSet
SymbolicRangeInferrer::VisitBinaryOperator
<BO_And
>(Range LHS
,
1728 APSIntType ResultType
= ValueFactory
.getAPSIntType(T
);
1729 llvm::APSInt Zero
= ResultType
.getZeroValue();
1731 bool IsLHSPositiveOrZero
= LHS
.From() >= Zero
;
1732 bool IsRHSPositiveOrZero
= RHS
.From() >= Zero
;
1734 bool IsLHSNegative
= LHS
.To() < Zero
;
1735 bool IsRHSNegative
= RHS
.To() < Zero
;
1737 // Check if both ranges have the same sign.
1738 if ((IsLHSPositiveOrZero
&& IsRHSPositiveOrZero
) ||
1739 (IsLHSNegative
&& IsRHSNegative
)) {
1740 // The result is definitely less or equal than any of the operands.
1741 const llvm::APSInt
&Max
= std::min(LHS
.To(), RHS
.To());
1743 // We conservatively estimate lower bound to be the smallest positive
1744 // or negative value corresponding to the sign of the operands.
1745 const llvm::APSInt
&Min
= IsLHSNegative
1746 ? ValueFactory
.getMinValue(ResultType
)
1747 : ValueFactory
.getValue(Zero
);
1749 return {RangeFactory
, Min
, Max
};
1752 // Otherwise, let's check if at least one of the operands is positive.
1753 if (IsLHSPositiveOrZero
|| IsRHSPositiveOrZero
) {
1754 // This makes result definitely positive.
1756 // We can also reason about a maximal value by finding the maximal
1757 // value of the positive operand.
1758 const llvm::APSInt
&Max
= IsLHSPositiveOrZero
? LHS
.To() : RHS
.To();
1760 // The minimal value on the other hand is much harder to reason about.
1761 // The only thing we know for sure is that the result is positive.
1762 return {RangeFactory
, ValueFactory
.getValue(Zero
),
1763 ValueFactory
.getValue(Max
)};
1766 // Nothing much else to do here.
1771 RangeSet
SymbolicRangeInferrer::VisitBinaryOperator
<BO_Rem
>(Range LHS
,
1774 llvm::APSInt Zero
= ValueFactory
.getAPSIntType(T
).getZeroValue();
1776 Range ConservativeRange
= getSymmetricalRange(RHS
, T
);
1778 llvm::APSInt Max
= ConservativeRange
.To();
1779 llvm::APSInt Min
= ConservativeRange
.From();
1782 // It's an undefined behaviour to divide by 0 and it seems like we know
1783 // for sure that RHS is 0. Let's say that the resulting range is
1784 // simply infeasible for that matter.
1785 return RangeFactory
.getEmptySet();
1788 // At this point, our conservative range is closed. The result, however,
1789 // couldn't be greater than the RHS' maximal absolute value. Because of
1790 // this reason, we turn the range into open (or half-open in case of
1791 // unsigned integers).
1793 // While we operate on integer values, an open interval (a, b) can be easily
1794 // represented by the closed interval [a + 1, b - 1]. And this is exactly
1797 // If we are dealing with unsigned case, we shouldn't move the lower bound.
1798 if (Min
.isSigned()) {
1803 bool IsLHSPositiveOrZero
= LHS
.From() >= Zero
;
1804 bool IsRHSPositiveOrZero
= RHS
.From() >= Zero
;
1806 // Remainder operator results with negative operands is implementation
1807 // defined. Positive cases are much easier to reason about though.
1808 if (IsLHSPositiveOrZero
&& IsRHSPositiveOrZero
) {
1809 // If maximal value of LHS is less than maximal value of RHS,
1810 // the result won't get greater than LHS.To().
1811 Max
= std::min(LHS
.To(), Max
);
1812 // We want to check if it is a situation similar to the following:
1814 // <------------|---[ LHS ]--------[ RHS ]----->
1817 // In this situation, we can conclude that (LHS / RHS) == 0 and
1818 // (LHS % RHS) == LHS.
1819 Min
= LHS
.To() < RHS
.From() ? LHS
.From() : Zero
;
1822 // Nevertheless, the symmetrical range for RHS is a conservative estimate
1823 // for any sign of either LHS, or RHS.
1824 return {RangeFactory
, ValueFactory
.getValue(Min
), ValueFactory
.getValue(Max
)};
1827 RangeSet
SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS
,
1828 BinaryOperator::Opcode Op
,
1829 RangeSet RHS
, QualType T
) {
1830 // We should propagate information about unfeasbility of one of the
1831 // operands to the resulting range.
1832 if (LHS
.isEmpty() || RHS
.isEmpty()) {
1833 return RangeFactory
.getEmptySet();
1838 return VisitBinaryOperator
<BO_NE
>(LHS
, RHS
, T
);
1840 return VisitBinaryOperator
<BO_Or
>(LHS
, RHS
, T
);
1842 return VisitBinaryOperator
<BO_And
>(LHS
, RHS
, T
);
1844 return VisitBinaryOperator
<BO_Rem
>(LHS
, RHS
, T
);
1850 //===----------------------------------------------------------------------===//
1851 // Constraint manager implementation details
1852 //===----------------------------------------------------------------------===//
1854 class RangeConstraintManager
: public RangedConstraintManager
{
1856 RangeConstraintManager(ExprEngine
*EE
, SValBuilder
&SVB
)
1857 : RangedConstraintManager(EE
, SVB
), F(getBasicVals()) {}
1859 //===------------------------------------------------------------------===//
1860 // Implementation for interface from ConstraintManager.
1861 //===------------------------------------------------------------------===//
1863 bool haveEqualConstraints(ProgramStateRef S1
,
1864 ProgramStateRef S2
) const override
{
1865 // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1866 // so comparing constraint ranges and class maps should be
1868 return S1
->get
<ConstraintRange
>() == S2
->get
<ConstraintRange
>() &&
1869 S1
->get
<ClassMap
>() == S2
->get
<ClassMap
>();
1872 bool canReasonAbout(SVal X
) const override
;
1874 ConditionTruthVal
checkNull(ProgramStateRef State
, SymbolRef Sym
) override
;
1876 const llvm::APSInt
*getSymVal(ProgramStateRef State
,
1877 SymbolRef Sym
) const override
;
1879 const llvm::APSInt
*getSymMinVal(ProgramStateRef State
,
1880 SymbolRef Sym
) const override
;
1882 const llvm::APSInt
*getSymMaxVal(ProgramStateRef State
,
1883 SymbolRef Sym
) const override
;
1885 ProgramStateRef
removeDeadBindings(ProgramStateRef State
,
1886 SymbolReaper
&SymReaper
) override
;
1888 void printJson(raw_ostream
&Out
, ProgramStateRef State
, const char *NL
= "\n",
1889 unsigned int Space
= 0, bool IsDot
= false) const override
;
1890 void printValue(raw_ostream
&Out
, ProgramStateRef State
,
1891 SymbolRef Sym
) override
;
1892 void printConstraints(raw_ostream
&Out
, ProgramStateRef State
,
1893 const char *NL
= "\n", unsigned int Space
= 0,
1894 bool IsDot
= false) const;
1895 void printEquivalenceClasses(raw_ostream
&Out
, ProgramStateRef State
,
1896 const char *NL
= "\n", unsigned int Space
= 0,
1897 bool IsDot
= false) const;
1898 void printDisequalities(raw_ostream
&Out
, ProgramStateRef State
,
1899 const char *NL
= "\n", unsigned int Space
= 0,
1900 bool IsDot
= false) const;
1902 //===------------------------------------------------------------------===//
1903 // Implementation for interface from RangedConstraintManager.
1904 //===------------------------------------------------------------------===//
1906 ProgramStateRef
assumeSymNE(ProgramStateRef State
, SymbolRef Sym
,
1907 const llvm::APSInt
&V
,
1908 const llvm::APSInt
&Adjustment
) override
;
1910 ProgramStateRef
assumeSymEQ(ProgramStateRef State
, SymbolRef Sym
,
1911 const llvm::APSInt
&V
,
1912 const llvm::APSInt
&Adjustment
) override
;
1914 ProgramStateRef
assumeSymLT(ProgramStateRef State
, SymbolRef Sym
,
1915 const llvm::APSInt
&V
,
1916 const llvm::APSInt
&Adjustment
) override
;
1918 ProgramStateRef
assumeSymGT(ProgramStateRef State
, SymbolRef Sym
,
1919 const llvm::APSInt
&V
,
1920 const llvm::APSInt
&Adjustment
) override
;
1922 ProgramStateRef
assumeSymLE(ProgramStateRef State
, SymbolRef Sym
,
1923 const llvm::APSInt
&V
,
1924 const llvm::APSInt
&Adjustment
) override
;
1926 ProgramStateRef
assumeSymGE(ProgramStateRef State
, SymbolRef Sym
,
1927 const llvm::APSInt
&V
,
1928 const llvm::APSInt
&Adjustment
) override
;
1930 ProgramStateRef
assumeSymWithinInclusiveRange(
1931 ProgramStateRef State
, SymbolRef Sym
, const llvm::APSInt
&From
,
1932 const llvm::APSInt
&To
, const llvm::APSInt
&Adjustment
) override
;
1934 ProgramStateRef
assumeSymOutsideInclusiveRange(
1935 ProgramStateRef State
, SymbolRef Sym
, const llvm::APSInt
&From
,
1936 const llvm::APSInt
&To
, const llvm::APSInt
&Adjustment
) override
;
1939 RangeSet::Factory F
;
1941 RangeSet
getRange(ProgramStateRef State
, SymbolRef Sym
);
1942 RangeSet
getRange(ProgramStateRef State
, EquivalenceClass Class
);
1943 ProgramStateRef
setRange(ProgramStateRef State
, SymbolRef Sym
,
1945 ProgramStateRef
setRange(ProgramStateRef State
, EquivalenceClass Class
,
1948 RangeSet
getSymLTRange(ProgramStateRef St
, SymbolRef Sym
,
1949 const llvm::APSInt
&Int
,
1950 const llvm::APSInt
&Adjustment
);
1951 RangeSet
getSymGTRange(ProgramStateRef St
, SymbolRef Sym
,
1952 const llvm::APSInt
&Int
,
1953 const llvm::APSInt
&Adjustment
);
1954 RangeSet
getSymLERange(ProgramStateRef St
, SymbolRef Sym
,
1955 const llvm::APSInt
&Int
,
1956 const llvm::APSInt
&Adjustment
);
1957 RangeSet
getSymLERange(llvm::function_ref
<RangeSet()> RS
,
1958 const llvm::APSInt
&Int
,
1959 const llvm::APSInt
&Adjustment
);
1960 RangeSet
getSymGERange(ProgramStateRef St
, SymbolRef Sym
,
1961 const llvm::APSInt
&Int
,
1962 const llvm::APSInt
&Adjustment
);
1965 //===----------------------------------------------------------------------===//
1966 // Constraint assignment logic
1967 //===----------------------------------------------------------------------===//
1969 /// ConstraintAssignorBase is a small utility class that unifies visitor
1970 /// for ranges with a visitor for constraints (rangeset/range/constant).
1972 /// It is designed to have one derived class, but generally it can have more.
1973 /// Derived class can control which types we handle by defining methods of the
1976 /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1977 /// CONSTRAINT Constraint);
1979 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1980 /// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1981 /// return value signifies whether we should try other handle methods
1982 /// (i.e. false would mean to stop right after calling this method)
1983 template <class Derived
> class ConstraintAssignorBase
{
1985 using Const
= const llvm::APSInt
&;
1987 #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1989 #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
1990 if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
1993 void assign(SymbolRef Sym
, RangeSet Constraint
) {
1994 assignImpl(Sym
, Constraint
);
1997 bool assignImpl(SymbolRef Sym
, RangeSet Constraint
) {
1998 switch (Sym
->getKind()) {
1999 #define SYMBOL(Id, Parent) \
2000 case SymExpr::Id##Kind: \
2002 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2004 llvm_unreachable("Unknown SymExpr kind!");
2007 #define DEFAULT_ASSIGN(Id) \
2008 bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
2011 bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
2012 bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
2014 // When we dispatch for constraint types, we first try to check
2015 // if the new constraint is the constant and try the corresponding
2016 // assignor methods. If it didn't interrupt, we can proceed to the
2017 // range, and finally to the range set.
2018 #define CONSTRAINT_DISPATCH(Id) \
2019 if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
2020 ASSIGN(Id, Const, Sym, *Const); \
2022 if (Constraint.size() == 1) { \
2023 ASSIGN(Id, Range, Sym, *Constraint.begin()); \
2025 ASSIGN(Id, RangeSet, Sym, Constraint)
2027 // Our internal assign method first tries to call assignor methods for all
2028 // constraint types that apply. And if not interrupted, continues with its
2030 #define SYMBOL(Id, Parent) \
2031 bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
2032 CONSTRAINT_DISPATCH(Id); \
2036 #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
2037 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2039 // Default implementations for the top class that doesn't have parents.
2040 bool assignSymExprImpl(const SymExpr
*Sym
, RangeSet Constraint
) {
2041 CONSTRAINT_DISPATCH(SymExpr
);
2044 DEFAULT_ASSIGN(SymExpr
);
2047 #undef CONSTRAINT_DISPATCH
2048 #undef DEFAULT_ASSIGN
2052 /// A little component aggregating all of the reasoning we have about
2053 /// assigning new constraints to symbols.
2055 /// The main purpose of this class is to associate constraints to symbols,
2056 /// and impose additional constraints on other symbols, when we can imply
2059 /// It has a nice symmetry with SymbolicRangeInferrer. When the latter
2060 /// can provide more precise ranges by looking into the operands of the
2061 /// expression in question, ConstraintAssignor looks into the operands
2062 /// to see if we can imply more from the new constraint.
2063 class ConstraintAssignor
: public ConstraintAssignorBase
<ConstraintAssignor
> {
2065 template <class ClassOrSymbol
>
2066 [[nodiscard
]] static ProgramStateRef
2067 assign(ProgramStateRef State
, SValBuilder
&Builder
, RangeSet::Factory
&F
,
2068 ClassOrSymbol CoS
, RangeSet NewConstraint
) {
2069 if (!State
|| NewConstraint
.isEmpty())
2072 ConstraintAssignor Assignor
{State
, Builder
, F
};
2073 return Assignor
.assign(CoS
, NewConstraint
);
2076 /// Handle expressions like: a % b != 0.
2077 template <typename SymT
>
2078 bool handleRemainderOp(const SymT
*Sym
, RangeSet Constraint
) {
2079 if (Sym
->getOpcode() != BO_Rem
)
2081 // a % b != 0 implies that a != 0.
2082 if (!Constraint
.containsZero()) {
2083 SVal SymSVal
= Builder
.makeSymbolVal(Sym
->getLHS());
2084 if (auto NonLocSymSVal
= SymSVal
.getAs
<nonloc::SymbolVal
>()) {
2085 State
= State
->assume(*NonLocSymSVal
, true);
2093 inline bool assignSymExprToConst(const SymExpr
*Sym
, Const Constraint
);
2094 inline bool assignSymIntExprToRangeSet(const SymIntExpr
*Sym
,
2095 RangeSet Constraint
) {
2096 return handleRemainderOp(Sym
, Constraint
);
2098 inline bool assignSymSymExprToRangeSet(const SymSymExpr
*Sym
,
2099 RangeSet Constraint
);
2102 ConstraintAssignor(ProgramStateRef State
, SValBuilder
&Builder
,
2103 RangeSet::Factory
&F
)
2104 : State(State
), Builder(Builder
), RangeFactory(F
) {}
2105 using Base
= ConstraintAssignorBase
<ConstraintAssignor
>;
2107 /// Base method for handling new constraints for symbols.
2108 [[nodiscard
]] ProgramStateRef
assign(SymbolRef Sym
, RangeSet NewConstraint
) {
2109 // All constraints are actually associated with equivalence classes, and
2110 // that's what we are going to do first.
2111 State
= assign(EquivalenceClass::find(State
, Sym
), NewConstraint
);
2115 // And after that we can check what other things we can get from this
2117 Base::assign(Sym
, NewConstraint
);
2121 /// Base method for handling new constraints for classes.
2122 [[nodiscard
]] ProgramStateRef
assign(EquivalenceClass Class
,
2123 RangeSet NewConstraint
) {
2124 // There is a chance that we might need to update constraints for the
2125 // classes that are known to be disequal to Class.
2127 // In order for this to be even possible, the new constraint should
2128 // be simply a constant because we can't reason about range disequalities.
2129 if (const llvm::APSInt
*Point
= NewConstraint
.getConcreteValue()) {
2131 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
2132 ConstraintRangeTy::Factory
&CF
= State
->get_context
<ConstraintRange
>();
2134 // Add new constraint.
2135 Constraints
= CF
.add(Constraints
, Class
, NewConstraint
);
2137 for (EquivalenceClass DisequalClass
: Class
.getDisequalClasses(State
)) {
2138 RangeSet UpdatedConstraint
= SymbolicRangeInferrer::inferRange(
2139 RangeFactory
, State
, DisequalClass
);
2141 UpdatedConstraint
= RangeFactory
.deletePoint(UpdatedConstraint
, *Point
);
2143 // If we end up with at least one of the disequal classes to be
2144 // constrained with an empty range-set, the state is infeasible.
2145 if (UpdatedConstraint
.isEmpty())
2148 Constraints
= CF
.add(Constraints
, DisequalClass
, UpdatedConstraint
);
2150 assert(areFeasible(Constraints
) && "Constraint manager shouldn't produce "
2151 "a state with infeasible constraints");
2153 return setConstraints(State
, Constraints
);
2156 return setConstraint(State
, Class
, NewConstraint
);
2159 ProgramStateRef
trackDisequality(ProgramStateRef State
, SymbolRef LHS
,
2161 return EquivalenceClass::markDisequal(RangeFactory
, State
, LHS
, RHS
);
2164 ProgramStateRef
trackEquality(ProgramStateRef State
, SymbolRef LHS
,
2166 return EquivalenceClass::merge(RangeFactory
, State
, LHS
, RHS
);
2169 [[nodiscard
]] std::optional
<bool> interpreteAsBool(RangeSet Constraint
) {
2170 assert(!Constraint
.isEmpty() && "Empty ranges shouldn't get here");
2172 if (Constraint
.getConcreteValue())
2173 return !Constraint
.getConcreteValue()->isZero();
2175 if (!Constraint
.containsZero())
2178 return std::nullopt
;
2181 ProgramStateRef State
;
2182 SValBuilder
&Builder
;
2183 RangeSet::Factory
&RangeFactory
;
2186 bool ConstraintAssignor::assignSymExprToConst(const SymExpr
*Sym
,
2187 const llvm::APSInt
&Constraint
) {
2188 llvm::SmallSet
<EquivalenceClass
, 4> SimplifiedClasses
;
2189 // Iterate over all equivalence classes and try to simplify them.
2190 ClassMembersTy Members
= State
->get
<ClassMembers
>();
2191 for (std::pair
<EquivalenceClass
, SymbolSet
> ClassToSymbolSet
: Members
) {
2192 EquivalenceClass Class
= ClassToSymbolSet
.first
;
2193 State
= EquivalenceClass::simplify(Builder
, RangeFactory
, State
, Class
);
2196 SimplifiedClasses
.insert(Class
);
2199 // Trivial equivalence classes (those that have only one symbol member) are
2200 // not stored in the State. Thus, we must skim through the constraints as
2201 // well. And we try to simplify symbols in the constraints.
2202 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
2203 for (std::pair
<EquivalenceClass
, RangeSet
> ClassConstraint
: Constraints
) {
2204 EquivalenceClass Class
= ClassConstraint
.first
;
2205 if (SimplifiedClasses
.count(Class
)) // Already simplified.
2207 State
= EquivalenceClass::simplify(Builder
, RangeFactory
, State
, Class
);
2212 // We may have trivial equivalence classes in the disequality info as
2213 // well, and we need to simplify them.
2214 DisequalityMapTy DisequalityInfo
= State
->get
<DisequalityMap
>();
2215 for (std::pair
<EquivalenceClass
, ClassSet
> DisequalityEntry
:
2217 EquivalenceClass Class
= DisequalityEntry
.first
;
2218 ClassSet DisequalClasses
= DisequalityEntry
.second
;
2219 State
= EquivalenceClass::simplify(Builder
, RangeFactory
, State
, Class
);
2227 bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr
*Sym
,
2228 RangeSet Constraint
) {
2229 if (!handleRemainderOp(Sym
, Constraint
))
2232 std::optional
<bool> ConstraintAsBool
= interpreteAsBool(Constraint
);
2234 if (!ConstraintAsBool
)
2237 if (std::optional
<bool> Equality
= meansEquality(Sym
)) {
2238 // Here we cover two cases:
2239 // * if Sym is equality and the new constraint is true -> Sym's operands
2240 // should be marked as equal
2241 // * if Sym is disequality and the new constraint is false -> Sym's
2242 // operands should be also marked as equal
2243 if (*Equality
== *ConstraintAsBool
) {
2244 State
= trackEquality(State
, Sym
->getLHS(), Sym
->getRHS());
2246 // Other combinations leave as with disequal operands.
2247 State
= trackDisequality(State
, Sym
->getLHS(), Sym
->getRHS());
2257 } // end anonymous namespace
2259 std::unique_ptr
<ConstraintManager
>
2260 ento::CreateRangeConstraintManager(ProgramStateManager
&StMgr
,
2262 return std::make_unique
<RangeConstraintManager
>(Eng
, StMgr
.getSValBuilder());
2265 ConstraintMap
ento::getConstraintMap(ProgramStateRef State
) {
2266 ConstraintMap::Factory
&F
= State
->get_context
<ConstraintMap
>();
2267 ConstraintMap Result
= F
.getEmptyMap();
2269 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
2270 for (std::pair
<EquivalenceClass
, RangeSet
> ClassConstraint
: Constraints
) {
2271 EquivalenceClass Class
= ClassConstraint
.first
;
2272 SymbolSet ClassMembers
= Class
.getClassMembers(State
);
2273 assert(!ClassMembers
.isEmpty() &&
2274 "Class must always have at least one member!");
2276 SymbolRef Representative
= *ClassMembers
.begin();
2277 Result
= F
.add(Result
, Representative
, ClassConstraint
.second
);
2283 //===----------------------------------------------------------------------===//
2284 // EqualityClass implementation details
2285 //===----------------------------------------------------------------------===//
2287 LLVM_DUMP_METHOD
void EquivalenceClass::dumpToStream(ProgramStateRef State
,
2288 raw_ostream
&os
) const {
2289 SymbolSet ClassMembers
= getClassMembers(State
);
2290 for (const SymbolRef
&MemberSym
: ClassMembers
) {
2296 inline EquivalenceClass
EquivalenceClass::find(ProgramStateRef State
,
2298 assert(State
&& "State should not be null");
2299 assert(Sym
&& "Symbol should not be null");
2300 // We store far from all Symbol -> Class mappings
2301 if (const EquivalenceClass
*NontrivialClass
= State
->get
<ClassMap
>(Sym
))
2302 return *NontrivialClass
;
2304 // This is a trivial class of Sym.
2308 inline ProgramStateRef
EquivalenceClass::merge(RangeSet::Factory
&F
,
2309 ProgramStateRef State
,
2312 EquivalenceClass FirstClass
= find(State
, First
);
2313 EquivalenceClass SecondClass
= find(State
, Second
);
2315 return FirstClass
.merge(F
, State
, SecondClass
);
2318 inline ProgramStateRef
EquivalenceClass::merge(RangeSet::Factory
&F
,
2319 ProgramStateRef State
,
2320 EquivalenceClass Other
) {
2321 // It is already the same class.
2325 // FIXME: As of now, we support only equivalence classes of the same type.
2326 // This limitation is connected to the lack of explicit casts in
2327 // our symbolic expression model.
2329 // That means that for `int x` and `char y` we don't distinguish
2330 // between these two very different cases:
2334 // The moment we introduce symbolic casts, this restriction can be
2336 if (getType() != Other
.getType())
2339 SymbolSet Members
= getClassMembers(State
);
2340 SymbolSet OtherMembers
= Other
.getClassMembers(State
);
2342 // We estimate the size of the class by the height of tree containing
2343 // its members. Merging is not a trivial operation, so it's easier to
2344 // merge the smaller class into the bigger one.
2345 if (Members
.getHeight() >= OtherMembers
.getHeight()) {
2346 return mergeImpl(F
, State
, Members
, Other
, OtherMembers
);
2348 return Other
.mergeImpl(F
, State
, OtherMembers
, *this, Members
);
2352 inline ProgramStateRef
2353 EquivalenceClass::mergeImpl(RangeSet::Factory
&RangeFactory
,
2354 ProgramStateRef State
, SymbolSet MyMembers
,
2355 EquivalenceClass Other
, SymbolSet OtherMembers
) {
2356 // Essentially what we try to recreate here is some kind of union-find
2357 // data structure. It does have certain limitations due to persistence
2358 // and the need to remove elements from classes.
2360 // In this setting, EquialityClass object is the representative of the class
2361 // or the parent element. ClassMap is a mapping of class members to their
2362 // parent. Unlike the union-find structure, they all point directly to the
2363 // class representative because we don't have an opportunity to actually do
2364 // path compression when dealing with immutability. This means that we
2365 // compress paths every time we do merges. It also means that we lose
2366 // the main amortized complexity benefit from the original data structure.
2367 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
2368 ConstraintRangeTy::Factory
&CRF
= State
->get_context
<ConstraintRange
>();
2370 // 1. If the merged classes have any constraints associated with them, we
2371 // need to transfer them to the class we have left.
2373 // Intersection here makes perfect sense because both of these constraints
2374 // must hold for the whole new class.
2375 if (std::optional
<RangeSet
> NewClassConstraint
=
2376 intersect(RangeFactory
, getConstraint(State
, *this),
2377 getConstraint(State
, Other
))) {
2378 // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2379 // range inferrer shouldn't generate ranges incompatible with
2380 // equivalence classes. However, at the moment, due to imperfections
2381 // in the solver, it is possible and the merge function can also
2382 // return infeasible states aka null states.
2383 if (NewClassConstraint
->isEmpty())
2387 // No need in tracking constraints of a now-dissolved class.
2388 Constraints
= CRF
.remove(Constraints
, Other
);
2389 // Assign new constraints for this class.
2390 Constraints
= CRF
.add(Constraints
, *this, *NewClassConstraint
);
2392 assert(areFeasible(Constraints
) && "Constraint manager shouldn't produce "
2393 "a state with infeasible constraints");
2395 State
= State
->set
<ConstraintRange
>(Constraints
);
2398 // 2. Get ALL equivalence-related maps
2399 ClassMapTy Classes
= State
->get
<ClassMap
>();
2400 ClassMapTy::Factory
&CMF
= State
->get_context
<ClassMap
>();
2402 ClassMembersTy Members
= State
->get
<ClassMembers
>();
2403 ClassMembersTy::Factory
&MF
= State
->get_context
<ClassMembers
>();
2405 DisequalityMapTy DisequalityInfo
= State
->get
<DisequalityMap
>();
2406 DisequalityMapTy::Factory
&DF
= State
->get_context
<DisequalityMap
>();
2408 ClassSet::Factory
&CF
= State
->get_context
<ClassSet
>();
2409 SymbolSet::Factory
&F
= getMembersFactory(State
);
2411 // 2. Merge members of the Other class into the current class.
2412 SymbolSet NewClassMembers
= MyMembers
;
2413 for (SymbolRef Sym
: OtherMembers
) {
2414 NewClassMembers
= F
.add(NewClassMembers
, Sym
);
2415 // *this is now the class for all these new symbols.
2416 Classes
= CMF
.add(Classes
, Sym
, *this);
2419 // 3. Adjust member mapping.
2421 // No need in tracking members of a now-dissolved class.
2422 Members
= MF
.remove(Members
, Other
);
2423 // Now only the current class is mapped to all the symbols.
2424 Members
= MF
.add(Members
, *this, NewClassMembers
);
2426 // 4. Update disequality relations
2427 ClassSet DisequalToOther
= Other
.getDisequalClasses(DisequalityInfo
, CF
);
2428 // We are about to merge two classes but they are already known to be
2429 // non-equal. This is a contradiction.
2430 if (DisequalToOther
.contains(*this))
2433 if (!DisequalToOther
.isEmpty()) {
2434 ClassSet DisequalToThis
= getDisequalClasses(DisequalityInfo
, CF
);
2435 DisequalityInfo
= DF
.remove(DisequalityInfo
, Other
);
2437 for (EquivalenceClass DisequalClass
: DisequalToOther
) {
2438 DisequalToThis
= CF
.add(DisequalToThis
, DisequalClass
);
2440 // Disequality is a symmetric relation meaning that if
2441 // DisequalToOther not null then the set for DisequalClass is not
2442 // empty and has at least Other.
2443 ClassSet OriginalSetLinkedToOther
=
2444 *DisequalityInfo
.lookup(DisequalClass
);
2446 // Other will be eliminated and we should replace it with the bigger
2448 ClassSet NewSet
= CF
.remove(OriginalSetLinkedToOther
, Other
);
2449 NewSet
= CF
.add(NewSet
, *this);
2451 DisequalityInfo
= DF
.add(DisequalityInfo
, DisequalClass
, NewSet
);
2454 DisequalityInfo
= DF
.add(DisequalityInfo
, *this, DisequalToThis
);
2455 State
= State
->set
<DisequalityMap
>(DisequalityInfo
);
2458 // 5. Update the state
2459 State
= State
->set
<ClassMap
>(Classes
);
2460 State
= State
->set
<ClassMembers
>(Members
);
2465 inline SymbolSet::Factory
&
2466 EquivalenceClass::getMembersFactory(ProgramStateRef State
) {
2467 return State
->get_context
<SymbolSet
>();
2470 SymbolSet
EquivalenceClass::getClassMembers(ProgramStateRef State
) const {
2471 if (const SymbolSet
*Members
= State
->get
<ClassMembers
>(*this))
2474 // This class is trivial, so we need to construct a set
2475 // with just that one symbol from the class.
2476 SymbolSet::Factory
&F
= getMembersFactory(State
);
2477 return F
.add(F
.getEmptySet(), getRepresentativeSymbol());
2480 bool EquivalenceClass::isTrivial(ProgramStateRef State
) const {
2481 return State
->get
<ClassMembers
>(*this) == nullptr;
2484 bool EquivalenceClass::isTriviallyDead(ProgramStateRef State
,
2485 SymbolReaper
&Reaper
) const {
2486 return isTrivial(State
) && Reaper
.isDead(getRepresentativeSymbol());
2489 inline ProgramStateRef
EquivalenceClass::markDisequal(RangeSet::Factory
&RF
,
2490 ProgramStateRef State
,
2493 return markDisequal(RF
, State
, find(State
, First
), find(State
, Second
));
2496 inline ProgramStateRef
EquivalenceClass::markDisequal(RangeSet::Factory
&RF
,
2497 ProgramStateRef State
,
2498 EquivalenceClass First
,
2499 EquivalenceClass Second
) {
2500 return First
.markDisequal(RF
, State
, Second
);
2503 inline ProgramStateRef
2504 EquivalenceClass::markDisequal(RangeSet::Factory
&RF
, ProgramStateRef State
,
2505 EquivalenceClass Other
) const {
2506 // If we know that two classes are equal, we can only produce an infeasible
2508 if (*this == Other
) {
2512 DisequalityMapTy DisequalityInfo
= State
->get
<DisequalityMap
>();
2513 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
2515 // Disequality is a symmetric relation, so if we mark A as disequal to B,
2516 // we should also mark B as disequalt to A.
2517 if (!addToDisequalityInfo(DisequalityInfo
, Constraints
, RF
, State
, *this,
2519 !addToDisequalityInfo(DisequalityInfo
, Constraints
, RF
, State
, Other
,
2523 assert(areFeasible(Constraints
) && "Constraint manager shouldn't produce "
2524 "a state with infeasible constraints");
2526 State
= State
->set
<DisequalityMap
>(DisequalityInfo
);
2527 State
= State
->set
<ConstraintRange
>(Constraints
);
2532 inline bool EquivalenceClass::addToDisequalityInfo(
2533 DisequalityMapTy
&Info
, ConstraintRangeTy
&Constraints
,
2534 RangeSet::Factory
&RF
, ProgramStateRef State
, EquivalenceClass First
,
2535 EquivalenceClass Second
) {
2537 // 1. Get all of the required factories.
2538 DisequalityMapTy::Factory
&F
= State
->get_context
<DisequalityMap
>();
2539 ClassSet::Factory
&CF
= State
->get_context
<ClassSet
>();
2540 ConstraintRangeTy::Factory
&CRF
= State
->get_context
<ConstraintRange
>();
2542 // 2. Add Second to the set of classes disequal to First.
2543 const ClassSet
*CurrentSet
= Info
.lookup(First
);
2544 ClassSet NewSet
= CurrentSet
? *CurrentSet
: CF
.getEmptySet();
2545 NewSet
= CF
.add(NewSet
, Second
);
2547 Info
= F
.add(Info
, First
, NewSet
);
2549 // 3. If Second is known to be a constant, we can delete this point
2550 // from the constraint asociated with First.
2552 // So, if Second == 10, it means that First != 10.
2553 // At the same time, the same logic does not apply to ranges.
2554 if (const RangeSet
*SecondConstraint
= Constraints
.lookup(Second
))
2555 if (const llvm::APSInt
*Point
= SecondConstraint
->getConcreteValue()) {
2557 RangeSet FirstConstraint
= SymbolicRangeInferrer::inferRange(
2558 RF
, State
, First
.getRepresentativeSymbol());
2560 FirstConstraint
= RF
.deletePoint(FirstConstraint
, *Point
);
2562 // If the First class is about to be constrained with an empty
2563 // range-set, the state is infeasible.
2564 if (FirstConstraint
.isEmpty())
2567 Constraints
= CRF
.add(Constraints
, First
, FirstConstraint
);
2573 inline std::optional
<bool> EquivalenceClass::areEqual(ProgramStateRef State
,
2575 SymbolRef SecondSym
) {
2576 return EquivalenceClass::areEqual(State
, find(State
, FirstSym
),
2577 find(State
, SecondSym
));
2580 inline std::optional
<bool> EquivalenceClass::areEqual(ProgramStateRef State
,
2581 EquivalenceClass First
,
2582 EquivalenceClass Second
) {
2583 // The same equivalence class => symbols are equal.
2584 if (First
== Second
)
2587 // Let's check if we know anything about these two classes being not equal to
2589 ClassSet DisequalToFirst
= First
.getDisequalClasses(State
);
2590 if (DisequalToFirst
.contains(Second
))
2594 return std::nullopt
;
2597 [[nodiscard
]] ProgramStateRef
2598 EquivalenceClass::removeMember(ProgramStateRef State
, const SymbolRef Old
) {
2600 SymbolSet ClsMembers
= getClassMembers(State
);
2601 assert(ClsMembers
.contains(Old
));
2603 // Remove `Old`'s Class->Sym relation.
2604 SymbolSet::Factory
&F
= getMembersFactory(State
);
2605 ClassMembersTy::Factory
&EMFactory
= State
->get_context
<ClassMembers
>();
2606 ClsMembers
= F
.remove(ClsMembers
, Old
);
2607 // Ensure another precondition of the removeMember function (we can check
2608 // this only with isEmpty, thus we have to do the remove first).
2609 assert(!ClsMembers
.isEmpty() &&
2610 "Class should have had at least two members before member removal");
2611 // Overwrite the existing members assigned to this class.
2612 ClassMembersTy ClassMembersMap
= State
->get
<ClassMembers
>();
2613 ClassMembersMap
= EMFactory
.add(ClassMembersMap
, *this, ClsMembers
);
2614 State
= State
->set
<ClassMembers
>(ClassMembersMap
);
2616 // Remove `Old`'s Sym->Class relation.
2617 ClassMapTy Classes
= State
->get
<ClassMap
>();
2618 ClassMapTy::Factory
&CMF
= State
->get_context
<ClassMap
>();
2619 Classes
= CMF
.remove(Classes
, Old
);
2620 State
= State
->set
<ClassMap
>(Classes
);
2625 // Re-evaluate an SVal with top-level `State->assume` logic.
2626 [[nodiscard
]] ProgramStateRef
2627 reAssume(ProgramStateRef State
, const RangeSet
*Constraint
, SVal TheValue
) {
2631 const auto DefinedVal
= TheValue
.castAs
<DefinedSVal
>();
2633 // If the SVal is 0, we can simply interpret that as `false`.
2634 if (Constraint
->encodesFalseRange())
2635 return State
->assume(DefinedVal
, false);
2637 // If the constraint does not encode 0 then we can interpret that as `true`
2638 // AND as a Range(Set).
2639 if (Constraint
->encodesTrueRange()) {
2640 State
= State
->assume(DefinedVal
, true);
2643 // Fall through, re-assume based on the range values as well.
2645 // Overestimate the individual Ranges with the RangeSet' lowest and
2647 return State
->assumeInclusiveRange(DefinedVal
, Constraint
->getMinValue(),
2648 Constraint
->getMaxValue(), true);
2651 // Iterate over all symbols and try to simplify them. Once a symbol is
2652 // simplified then we check if we can merge the simplified symbol's equivalence
2653 // class to this class. This way, we simplify not just the symbols but the
2654 // classes as well: we strive to keep the number of the classes to be the
2655 // absolute minimum.
2656 [[nodiscard
]] ProgramStateRef
2657 EquivalenceClass::simplify(SValBuilder
&SVB
, RangeSet::Factory
&F
,
2658 ProgramStateRef State
, EquivalenceClass Class
) {
2659 SymbolSet ClassMembers
= Class
.getClassMembers(State
);
2660 for (const SymbolRef
&MemberSym
: ClassMembers
) {
2662 const SVal SimplifiedMemberVal
= simplifyToSVal(State
, MemberSym
);
2663 const SymbolRef SimplifiedMemberSym
= SimplifiedMemberVal
.getAsSymbol();
2665 // The symbol is collapsed to a constant, check if the current State is
2667 if (const auto CI
= SimplifiedMemberVal
.getAs
<nonloc::ConcreteInt
>()) {
2668 const llvm::APSInt
&SV
= CI
->getValue();
2669 const RangeSet
*ClassConstraint
= getConstraint(State
, Class
);
2670 // We have found a contradiction.
2671 if (ClassConstraint
&& !ClassConstraint
->contains(SV
))
2675 if (SimplifiedMemberSym
&& MemberSym
!= SimplifiedMemberSym
) {
2676 // The simplified symbol should be the member of the original Class,
2677 // however, it might be in another existing class at the moment. We
2678 // have to merge these classes.
2679 ProgramStateRef OldState
= State
;
2680 State
= merge(F
, State
, MemberSym
, SimplifiedMemberSym
);
2683 // No state change, no merge happened actually.
2684 if (OldState
== State
)
2687 // Be aware that `SimplifiedMemberSym` might refer to an already dead
2688 // symbol. In that case, the eqclass of that might not be the same as the
2689 // eqclass of `MemberSym`. This is because the dead symbols are not
2690 // preserved in the `ClassMap`, hence
2691 // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
2692 // compared to the eqclass of `MemberSym`.
2693 // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
2694 // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
2696 // Note that `MemberSym` must be alive here since that is from the
2697 // `ClassMembers` where all the symbols are alive.
2699 // Remove the old and more complex symbol.
2700 State
= find(State
, MemberSym
).removeMember(State
, MemberSym
);
2702 // Query the class constraint again b/c that may have changed during the
2704 const RangeSet
*ClassConstraint
= getConstraint(State
, Class
);
2706 // Re-evaluate an SVal with top-level `State->assume`, this ignites
2707 // a RECURSIVE algorithm that will reach a FIXPOINT.
2709 // About performance and complexity: Let us assume that in a State we
2710 // have N non-trivial equivalence classes and that all constraints and
2711 // disequality info is related to non-trivial classes. In the worst case,
2712 // we can simplify only one symbol of one class in each iteration. The
2713 // number of symbols in one class cannot grow b/c we replace the old
2714 // symbol with the simplified one. Also, the number of the equivalence
2715 // classes can decrease only, b/c the algorithm does a merge operation
2716 // optionally. We need N iterations in this case to reach the fixpoint.
2717 // Thus, the steps needed to be done in the worst case is proportional to
2720 // This worst case scenario can be extended to that case when we have
2721 // trivial classes in the constraints and in the disequality map. This
2722 // case can be reduced to the case with a State where there are only
2723 // non-trivial classes. This is because a merge operation on two trivial
2724 // classes results in one non-trivial class.
2725 State
= reAssume(State
, ClassConstraint
, SimplifiedMemberVal
);
2733 inline ClassSet
EquivalenceClass::getDisequalClasses(ProgramStateRef State
,
2735 return find(State
, Sym
).getDisequalClasses(State
);
2739 EquivalenceClass::getDisequalClasses(ProgramStateRef State
) const {
2740 return getDisequalClasses(State
->get
<DisequalityMap
>(),
2741 State
->get_context
<ClassSet
>());
2745 EquivalenceClass::getDisequalClasses(DisequalityMapTy Map
,
2746 ClassSet::Factory
&Factory
) const {
2747 if (const ClassSet
*DisequalClasses
= Map
.lookup(*this))
2748 return *DisequalClasses
;
2750 return Factory
.getEmptySet();
2753 bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State
) {
2754 ClassMembersTy Members
= State
->get
<ClassMembers
>();
2756 for (std::pair
<EquivalenceClass
, SymbolSet
> ClassMembersPair
: Members
) {
2757 for (SymbolRef Member
: ClassMembersPair
.second
) {
2758 // Every member of the class should have a mapping back to the class.
2759 if (find(State
, Member
) == ClassMembersPair
.first
) {
2767 DisequalityMapTy Disequalities
= State
->get
<DisequalityMap
>();
2768 for (std::pair
<EquivalenceClass
, ClassSet
> DisequalityInfo
: Disequalities
) {
2769 EquivalenceClass Class
= DisequalityInfo
.first
;
2770 ClassSet DisequalClasses
= DisequalityInfo
.second
;
2772 // There is no use in keeping empty sets in the map.
2773 if (DisequalClasses
.isEmpty())
2776 // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2777 // B != A should also be true.
2778 for (EquivalenceClass DisequalClass
: DisequalClasses
) {
2779 const ClassSet
*DisequalToDisequalClasses
=
2780 Disequalities
.lookup(DisequalClass
);
2782 // It should be a set of at least one element: Class
2783 if (!DisequalToDisequalClasses
||
2784 !DisequalToDisequalClasses
->contains(Class
))
2792 //===----------------------------------------------------------------------===//
2793 // RangeConstraintManager implementation
2794 //===----------------------------------------------------------------------===//
2796 bool RangeConstraintManager::canReasonAbout(SVal X
) const {
2797 std::optional
<nonloc::SymbolVal
> SymVal
= X
.getAs
<nonloc::SymbolVal
>();
2798 if (SymVal
&& SymVal
->isExpression()) {
2799 const SymExpr
*SE
= SymVal
->getSymbol();
2801 if (const SymIntExpr
*SIE
= dyn_cast
<SymIntExpr
>(SE
)) {
2802 switch (SIE
->getOpcode()) {
2803 // We don't reason yet about bitwise-constraints on symbolic values.
2808 // We don't reason yet about these arithmetic constraints on
2822 if (const SymSymExpr
*SSE
= dyn_cast
<SymSymExpr
>(SE
)) {
2823 // FIXME: Handle <=> here.
2824 if (BinaryOperator::isEqualityOp(SSE
->getOpcode()) ||
2825 BinaryOperator::isRelationalOp(SSE
->getOpcode())) {
2826 // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2827 // We've recently started producing Loc <> NonLoc comparisons (that
2828 // result from casts of one of the operands between eg. intptr_t and
2829 // void *), but we can't reason about them yet.
2830 if (Loc::isLocType(SSE
->getLHS()->getType())) {
2831 return Loc::isLocType(SSE
->getRHS()->getType());
2842 ConditionTruthVal
RangeConstraintManager::checkNull(ProgramStateRef State
,
2844 const RangeSet
*Ranges
= getConstraint(State
, Sym
);
2846 // If we don't have any information about this symbol, it's underconstrained.
2848 return ConditionTruthVal();
2850 // If we have a concrete value, see if it's zero.
2851 if (const llvm::APSInt
*Value
= Ranges
->getConcreteValue())
2854 BasicValueFactory
&BV
= getBasicVals();
2855 APSIntType IntType
= BV
.getAPSIntType(Sym
->getType());
2856 llvm::APSInt Zero
= IntType
.getZeroValue();
2858 // Check if zero is in the set of possible values.
2859 if (!Ranges
->contains(Zero
))
2862 // Zero is a possible value, but it is not the /only/ possible value.
2863 return ConditionTruthVal();
2866 const llvm::APSInt
*RangeConstraintManager::getSymVal(ProgramStateRef St
,
2867 SymbolRef Sym
) const {
2868 const RangeSet
*T
= getConstraint(St
, Sym
);
2869 return T
? T
->getConcreteValue() : nullptr;
2872 const llvm::APSInt
*RangeConstraintManager::getSymMinVal(ProgramStateRef St
,
2873 SymbolRef Sym
) const {
2874 const RangeSet
*T
= getConstraint(St
, Sym
);
2875 if (!T
|| T
->isEmpty())
2877 return &T
->getMinValue();
2880 const llvm::APSInt
*RangeConstraintManager::getSymMaxVal(ProgramStateRef St
,
2881 SymbolRef Sym
) const {
2882 const RangeSet
*T
= getConstraint(St
, Sym
);
2883 if (!T
|| T
->isEmpty())
2885 return &T
->getMaxValue();
2888 //===----------------------------------------------------------------------===//
2889 // Remove dead symbols from existing constraints
2890 //===----------------------------------------------------------------------===//
2892 /// Scan all symbols referenced by the constraints. If the symbol is not alive
2893 /// as marked in LSymbols, mark it as dead in DSymbols.
2895 RangeConstraintManager::removeDeadBindings(ProgramStateRef State
,
2896 SymbolReaper
&SymReaper
) {
2897 ClassMembersTy ClassMembersMap
= State
->get
<ClassMembers
>();
2898 ClassMembersTy NewClassMembersMap
= ClassMembersMap
;
2899 ClassMembersTy::Factory
&EMFactory
= State
->get_context
<ClassMembers
>();
2900 SymbolSet::Factory
&SetFactory
= State
->get_context
<SymbolSet
>();
2902 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
2903 ConstraintRangeTy NewConstraints
= Constraints
;
2904 ConstraintRangeTy::Factory
&ConstraintFactory
=
2905 State
->get_context
<ConstraintRange
>();
2907 ClassMapTy Map
= State
->get
<ClassMap
>();
2908 ClassMapTy NewMap
= Map
;
2909 ClassMapTy::Factory
&ClassFactory
= State
->get_context
<ClassMap
>();
2911 DisequalityMapTy Disequalities
= State
->get
<DisequalityMap
>();
2912 DisequalityMapTy::Factory
&DisequalityFactory
=
2913 State
->get_context
<DisequalityMap
>();
2914 ClassSet::Factory
&ClassSetFactory
= State
->get_context
<ClassSet
>();
2916 bool ClassMapChanged
= false;
2917 bool MembersMapChanged
= false;
2918 bool ConstraintMapChanged
= false;
2919 bool DisequalitiesChanged
= false;
2921 auto removeDeadClass
= [&](EquivalenceClass Class
) {
2922 // Remove associated constraint ranges.
2923 Constraints
= ConstraintFactory
.remove(Constraints
, Class
);
2924 ConstraintMapChanged
= true;
2926 // Update disequality information to not hold any information on the
2928 ClassSet DisequalClasses
=
2929 Class
.getDisequalClasses(Disequalities
, ClassSetFactory
);
2930 if (!DisequalClasses
.isEmpty()) {
2931 for (EquivalenceClass DisequalClass
: DisequalClasses
) {
2932 ClassSet DisequalToDisequalSet
=
2933 DisequalClass
.getDisequalClasses(Disequalities
, ClassSetFactory
);
2934 // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2935 // disequality info.
2936 assert(!DisequalToDisequalSet
.isEmpty());
2937 ClassSet NewSet
= ClassSetFactory
.remove(DisequalToDisequalSet
, Class
);
2939 // No need in keeping an empty set.
2940 if (NewSet
.isEmpty()) {
2942 DisequalityFactory
.remove(Disequalities
, DisequalClass
);
2945 DisequalityFactory
.add(Disequalities
, DisequalClass
, NewSet
);
2948 // Remove the data for the class
2949 Disequalities
= DisequalityFactory
.remove(Disequalities
, Class
);
2950 DisequalitiesChanged
= true;
2954 // 1. Let's see if dead symbols are trivial and have associated constraints.
2955 for (std::pair
<EquivalenceClass
, RangeSet
> ClassConstraintPair
:
2957 EquivalenceClass Class
= ClassConstraintPair
.first
;
2958 if (Class
.isTriviallyDead(State
, SymReaper
)) {
2959 // If this class is trivial, we can remove its constraints right away.
2960 removeDeadClass(Class
);
2964 // 2. We don't need to track classes for dead symbols.
2965 for (std::pair
<SymbolRef
, EquivalenceClass
> SymbolClassPair
: Map
) {
2966 SymbolRef Sym
= SymbolClassPair
.first
;
2968 if (SymReaper
.isDead(Sym
)) {
2969 ClassMapChanged
= true;
2970 NewMap
= ClassFactory
.remove(NewMap
, Sym
);
2974 // 3. Remove dead members from classes and remove dead non-trivial classes
2975 // and their constraints.
2976 for (std::pair
<EquivalenceClass
, SymbolSet
> ClassMembersPair
:
2978 EquivalenceClass Class
= ClassMembersPair
.first
;
2979 SymbolSet LiveMembers
= ClassMembersPair
.second
;
2980 bool MembersChanged
= false;
2982 for (SymbolRef Member
: ClassMembersPair
.second
) {
2983 if (SymReaper
.isDead(Member
)) {
2984 MembersChanged
= true;
2985 LiveMembers
= SetFactory
.remove(LiveMembers
, Member
);
2989 // Check if the class changed.
2990 if (!MembersChanged
)
2993 MembersMapChanged
= true;
2995 if (LiveMembers
.isEmpty()) {
2996 // The class is dead now, we need to wipe it out of the members map...
2997 NewClassMembersMap
= EMFactory
.remove(NewClassMembersMap
, Class
);
2999 // ...and remove all of its constraints.
3000 removeDeadClass(Class
);
3002 // We need to change the members associated with the class.
3003 NewClassMembersMap
=
3004 EMFactory
.add(NewClassMembersMap
, Class
, LiveMembers
);
3008 // 4. Update the state with new maps.
3010 // Here we try to be humble and update a map only if it really changed.
3011 if (ClassMapChanged
)
3012 State
= State
->set
<ClassMap
>(NewMap
);
3014 if (MembersMapChanged
)
3015 State
= State
->set
<ClassMembers
>(NewClassMembersMap
);
3017 if (ConstraintMapChanged
)
3018 State
= State
->set
<ConstraintRange
>(Constraints
);
3020 if (DisequalitiesChanged
)
3021 State
= State
->set
<DisequalityMap
>(Disequalities
);
3023 assert(EquivalenceClass::isClassDataConsistent(State
));
3028 RangeSet
RangeConstraintManager::getRange(ProgramStateRef State
,
3030 return SymbolicRangeInferrer::inferRange(F
, State
, Sym
);
3033 ProgramStateRef
RangeConstraintManager::setRange(ProgramStateRef State
,
3036 return ConstraintAssignor::assign(State
, getSValBuilder(), F
, Sym
, Range
);
3039 //===------------------------------------------------------------------------===
3040 // assumeSymX methods: protected interface for RangeConstraintManager.
3041 //===------------------------------------------------------------------------===/
3043 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
3044 // and (x, y) for open ranges. These ranges are modular, corresponding with
3045 // a common treatment of C integer overflow. This means that these methods
3046 // do not have to worry about overflow; RangeSet::Intersect can handle such a
3047 // "wraparound" range.
3048 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
3049 // UINT_MAX, 0, 1, and 2.
3052 RangeConstraintManager::assumeSymNE(ProgramStateRef St
, SymbolRef Sym
,
3053 const llvm::APSInt
&Int
,
3054 const llvm::APSInt
&Adjustment
) {
3055 // Before we do any real work, see if the value can even show up.
3056 APSIntType
AdjustmentType(Adjustment
);
3057 if (AdjustmentType
.testInRange(Int
, true) != APSIntType::RTR_Within
)
3060 llvm::APSInt Point
= AdjustmentType
.convert(Int
) - Adjustment
;
3061 RangeSet New
= getRange(St
, Sym
);
3062 New
= F
.deletePoint(New
, Point
);
3064 return setRange(St
, Sym
, New
);
3068 RangeConstraintManager::assumeSymEQ(ProgramStateRef St
, SymbolRef Sym
,
3069 const llvm::APSInt
&Int
,
3070 const llvm::APSInt
&Adjustment
) {
3071 // Before we do any real work, see if the value can even show up.
3072 APSIntType
AdjustmentType(Adjustment
);
3073 if (AdjustmentType
.testInRange(Int
, true) != APSIntType::RTR_Within
)
3076 // [Int-Adjustment, Int-Adjustment]
3077 llvm::APSInt AdjInt
= AdjustmentType
.convert(Int
) - Adjustment
;
3078 RangeSet New
= getRange(St
, Sym
);
3079 New
= F
.intersect(New
, AdjInt
);
3081 return setRange(St
, Sym
, New
);
3084 RangeSet
RangeConstraintManager::getSymLTRange(ProgramStateRef St
,
3086 const llvm::APSInt
&Int
,
3087 const llvm::APSInt
&Adjustment
) {
3088 // Before we do any real work, see if the value can even show up.
3089 APSIntType
AdjustmentType(Adjustment
);
3090 switch (AdjustmentType
.testInRange(Int
, true)) {
3091 case APSIntType::RTR_Below
:
3092 return F
.getEmptySet();
3093 case APSIntType::RTR_Within
:
3095 case APSIntType::RTR_Above
:
3096 return getRange(St
, Sym
);
3099 // Special case for Int == Min. This is always false.
3100 llvm::APSInt ComparisonVal
= AdjustmentType
.convert(Int
);
3101 llvm::APSInt Min
= AdjustmentType
.getMinValue();
3102 if (ComparisonVal
== Min
)
3103 return F
.getEmptySet();
3105 llvm::APSInt Lower
= Min
- Adjustment
;
3106 llvm::APSInt Upper
= ComparisonVal
- Adjustment
;
3109 RangeSet Result
= getRange(St
, Sym
);
3110 return F
.intersect(Result
, Lower
, Upper
);
3114 RangeConstraintManager::assumeSymLT(ProgramStateRef St
, SymbolRef Sym
,
3115 const llvm::APSInt
&Int
,
3116 const llvm::APSInt
&Adjustment
) {
3117 RangeSet New
= getSymLTRange(St
, Sym
, Int
, Adjustment
);
3118 return setRange(St
, Sym
, New
);
3121 RangeSet
RangeConstraintManager::getSymGTRange(ProgramStateRef St
,
3123 const llvm::APSInt
&Int
,
3124 const llvm::APSInt
&Adjustment
) {
3125 // Before we do any real work, see if the value can even show up.
3126 APSIntType
AdjustmentType(Adjustment
);
3127 switch (AdjustmentType
.testInRange(Int
, true)) {
3128 case APSIntType::RTR_Below
:
3129 return getRange(St
, Sym
);
3130 case APSIntType::RTR_Within
:
3132 case APSIntType::RTR_Above
:
3133 return F
.getEmptySet();
3136 // Special case for Int == Max. This is always false.
3137 llvm::APSInt ComparisonVal
= AdjustmentType
.convert(Int
);
3138 llvm::APSInt Max
= AdjustmentType
.getMaxValue();
3139 if (ComparisonVal
== Max
)
3140 return F
.getEmptySet();
3142 llvm::APSInt Lower
= ComparisonVal
- Adjustment
;
3143 llvm::APSInt Upper
= Max
- Adjustment
;
3146 RangeSet SymRange
= getRange(St
, Sym
);
3147 return F
.intersect(SymRange
, Lower
, Upper
);
3151 RangeConstraintManager::assumeSymGT(ProgramStateRef St
, SymbolRef Sym
,
3152 const llvm::APSInt
&Int
,
3153 const llvm::APSInt
&Adjustment
) {
3154 RangeSet New
= getSymGTRange(St
, Sym
, Int
, Adjustment
);
3155 return setRange(St
, Sym
, New
);
3158 RangeSet
RangeConstraintManager::getSymGERange(ProgramStateRef St
,
3160 const llvm::APSInt
&Int
,
3161 const llvm::APSInt
&Adjustment
) {
3162 // Before we do any real work, see if the value can even show up.
3163 APSIntType
AdjustmentType(Adjustment
);
3164 switch (AdjustmentType
.testInRange(Int
, true)) {
3165 case APSIntType::RTR_Below
:
3166 return getRange(St
, Sym
);
3167 case APSIntType::RTR_Within
:
3169 case APSIntType::RTR_Above
:
3170 return F
.getEmptySet();
3173 // Special case for Int == Min. This is always feasible.
3174 llvm::APSInt ComparisonVal
= AdjustmentType
.convert(Int
);
3175 llvm::APSInt Min
= AdjustmentType
.getMinValue();
3176 if (ComparisonVal
== Min
)
3177 return getRange(St
, Sym
);
3179 llvm::APSInt Max
= AdjustmentType
.getMaxValue();
3180 llvm::APSInt Lower
= ComparisonVal
- Adjustment
;
3181 llvm::APSInt Upper
= Max
- Adjustment
;
3183 RangeSet SymRange
= getRange(St
, Sym
);
3184 return F
.intersect(SymRange
, Lower
, Upper
);
3188 RangeConstraintManager::assumeSymGE(ProgramStateRef St
, SymbolRef Sym
,
3189 const llvm::APSInt
&Int
,
3190 const llvm::APSInt
&Adjustment
) {
3191 RangeSet New
= getSymGERange(St
, Sym
, Int
, Adjustment
);
3192 return setRange(St
, Sym
, New
);
3196 RangeConstraintManager::getSymLERange(llvm::function_ref
<RangeSet()> RS
,
3197 const llvm::APSInt
&Int
,
3198 const llvm::APSInt
&Adjustment
) {
3199 // Before we do any real work, see if the value can even show up.
3200 APSIntType
AdjustmentType(Adjustment
);
3201 switch (AdjustmentType
.testInRange(Int
, true)) {
3202 case APSIntType::RTR_Below
:
3203 return F
.getEmptySet();
3204 case APSIntType::RTR_Within
:
3206 case APSIntType::RTR_Above
:
3210 // Special case for Int == Max. This is always feasible.
3211 llvm::APSInt ComparisonVal
= AdjustmentType
.convert(Int
);
3212 llvm::APSInt Max
= AdjustmentType
.getMaxValue();
3213 if (ComparisonVal
== Max
)
3216 llvm::APSInt Min
= AdjustmentType
.getMinValue();
3217 llvm::APSInt Lower
= Min
- Adjustment
;
3218 llvm::APSInt Upper
= ComparisonVal
- Adjustment
;
3220 RangeSet Default
= RS();
3221 return F
.intersect(Default
, Lower
, Upper
);
3224 RangeSet
RangeConstraintManager::getSymLERange(ProgramStateRef St
,
3226 const llvm::APSInt
&Int
,
3227 const llvm::APSInt
&Adjustment
) {
3228 return getSymLERange([&] { return getRange(St
, Sym
); }, Int
, Adjustment
);
3232 RangeConstraintManager::assumeSymLE(ProgramStateRef St
, SymbolRef Sym
,
3233 const llvm::APSInt
&Int
,
3234 const llvm::APSInt
&Adjustment
) {
3235 RangeSet New
= getSymLERange(St
, Sym
, Int
, Adjustment
);
3236 return setRange(St
, Sym
, New
);
3239 ProgramStateRef
RangeConstraintManager::assumeSymWithinInclusiveRange(
3240 ProgramStateRef State
, SymbolRef Sym
, const llvm::APSInt
&From
,
3241 const llvm::APSInt
&To
, const llvm::APSInt
&Adjustment
) {
3242 RangeSet New
= getSymGERange(State
, Sym
, From
, Adjustment
);
3245 RangeSet Out
= getSymLERange([&] { return New
; }, To
, Adjustment
);
3246 return setRange(State
, Sym
, Out
);
3249 ProgramStateRef
RangeConstraintManager::assumeSymOutsideInclusiveRange(
3250 ProgramStateRef State
, SymbolRef Sym
, const llvm::APSInt
&From
,
3251 const llvm::APSInt
&To
, const llvm::APSInt
&Adjustment
) {
3252 RangeSet RangeLT
= getSymLTRange(State
, Sym
, From
, Adjustment
);
3253 RangeSet RangeGT
= getSymGTRange(State
, Sym
, To
, Adjustment
);
3254 RangeSet
New(F
.add(RangeLT
, RangeGT
));
3255 return setRange(State
, Sym
, New
);
3258 //===----------------------------------------------------------------------===//
3260 //===----------------------------------------------------------------------===//
3262 void RangeConstraintManager::printJson(raw_ostream
&Out
, ProgramStateRef State
,
3263 const char *NL
, unsigned int Space
,
3265 printConstraints(Out
, State
, NL
, Space
, IsDot
);
3266 printEquivalenceClasses(Out
, State
, NL
, Space
, IsDot
);
3267 printDisequalities(Out
, State
, NL
, Space
, IsDot
);
3270 void RangeConstraintManager::printValue(raw_ostream
&Out
, ProgramStateRef State
,
3272 const RangeSet RS
= getRange(State
, Sym
);
3273 Out
<< RS
.getBitWidth() << (RS
.isUnsigned() ? "u:" : "s:");
3277 static std::string
toString(const SymbolRef
&Sym
) {
3279 llvm::raw_string_ostream
O(S
);
3280 Sym
->dumpToStream(O
);
3284 void RangeConstraintManager::printConstraints(raw_ostream
&Out
,
3285 ProgramStateRef State
,
3289 ConstraintRangeTy Constraints
= State
->get
<ConstraintRange
>();
3291 Indent(Out
, Space
, IsDot
) << "\"constraints\": ";
3292 if (Constraints
.isEmpty()) {
3293 Out
<< "null," << NL
;
3297 std::map
<std::string
, RangeSet
> OrderedConstraints
;
3298 for (std::pair
<EquivalenceClass
, RangeSet
> P
: Constraints
) {
3299 SymbolSet ClassMembers
= P
.first
.getClassMembers(State
);
3300 for (const SymbolRef
&ClassMember
: ClassMembers
) {
3301 bool insertion_took_place
;
3302 std::tie(std::ignore
, insertion_took_place
) =
3303 OrderedConstraints
.insert({toString(ClassMember
), P
.second
});
3304 assert(insertion_took_place
&&
3305 "two symbols should not have the same dump");
3312 for (std::pair
<std::string
, RangeSet
> P
: OrderedConstraints
) {
3319 Indent(Out
, Space
, IsDot
)
3320 << "{ \"symbol\": \"" << P
.first
<< "\", \"range\": \"";
3327 Indent(Out
, Space
, IsDot
) << "]," << NL
;
3330 static std::string
toString(ProgramStateRef State
, EquivalenceClass Class
) {
3331 SymbolSet ClassMembers
= Class
.getClassMembers(State
);
3332 llvm::SmallVector
<SymbolRef
, 8> ClassMembersSorted(ClassMembers
.begin(),
3333 ClassMembers
.end());
3334 llvm::sort(ClassMembersSorted
,
3335 [](const SymbolRef
&LHS
, const SymbolRef
&RHS
) {
3336 return toString(LHS
) < toString(RHS
);
3339 bool FirstMember
= true;
3342 llvm::raw_string_ostream
Out(Str
);
3344 for (SymbolRef ClassMember
: ClassMembersSorted
) {
3346 FirstMember
= false;
3349 Out
<< "\"" << ClassMember
<< "\"";
3355 void RangeConstraintManager::printEquivalenceClasses(raw_ostream
&Out
,
3356 ProgramStateRef State
,
3360 ClassMembersTy Members
= State
->get
<ClassMembers
>();
3362 Indent(Out
, Space
, IsDot
) << "\"equivalence_classes\": ";
3363 if (Members
.isEmpty()) {
3364 Out
<< "null," << NL
;
3368 std::set
<std::string
> MembersStr
;
3369 for (std::pair
<EquivalenceClass
, SymbolSet
> ClassToSymbolSet
: Members
)
3370 MembersStr
.insert(toString(State
, ClassToSymbolSet
.first
));
3374 bool FirstClass
= true;
3375 for (const std::string
&Str
: MembersStr
) {
3382 Indent(Out
, Space
, IsDot
);
3388 Indent(Out
, Space
, IsDot
) << "]," << NL
;
3391 void RangeConstraintManager::printDisequalities(raw_ostream
&Out
,
3392 ProgramStateRef State
,
3396 DisequalityMapTy Disequalities
= State
->get
<DisequalityMap
>();
3398 Indent(Out
, Space
, IsDot
) << "\"disequality_info\": ";
3399 if (Disequalities
.isEmpty()) {
3400 Out
<< "null," << NL
;
3404 // Transform the disequality info to an ordered map of
3405 // [string -> (ordered set of strings)]
3406 using EqClassesStrTy
= std::set
<std::string
>;
3407 using DisequalityInfoStrTy
= std::map
<std::string
, EqClassesStrTy
>;
3408 DisequalityInfoStrTy DisequalityInfoStr
;
3409 for (std::pair
<EquivalenceClass
, ClassSet
> ClassToDisEqSet
: Disequalities
) {
3410 EquivalenceClass Class
= ClassToDisEqSet
.first
;
3411 ClassSet DisequalClasses
= ClassToDisEqSet
.second
;
3412 EqClassesStrTy MembersStr
;
3413 for (EquivalenceClass DisEqClass
: DisequalClasses
)
3414 MembersStr
.insert(toString(State
, DisEqClass
));
3415 DisequalityInfoStr
.insert({toString(State
, Class
), MembersStr
});
3420 bool FirstClass
= true;
3421 for (std::pair
<std::string
, EqClassesStrTy
> ClassToDisEqSet
:
3422 DisequalityInfoStr
) {
3423 const std::string
&Class
= ClassToDisEqSet
.first
;
3430 Indent(Out
, Space
, IsDot
) << "{" << NL
;
3431 unsigned int DisEqSpace
= Space
+ 1;
3432 Indent(Out
, DisEqSpace
, IsDot
) << "\"class\": ";
3434 const EqClassesStrTy
&DisequalClasses
= ClassToDisEqSet
.second
;
3435 if (!DisequalClasses
.empty()) {
3437 Indent(Out
, DisEqSpace
, IsDot
) << "\"disequal_to\": [" << NL
;
3438 unsigned int DisEqClassSpace
= DisEqSpace
+ 1;
3439 Indent(Out
, DisEqClassSpace
, IsDot
);
3440 bool FirstDisEqClass
= true;
3441 for (const std::string
&DisEqClass
: DisequalClasses
) {
3442 if (FirstDisEqClass
) {
3443 FirstDisEqClass
= false;
3446 Indent(Out
, DisEqClassSpace
, IsDot
);
3452 Indent(Out
, Space
, IsDot
) << "}";
3457 Indent(Out
, Space
, IsDot
) << "]," << NL
;