1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10 // which looks for an out-of-bound array element access.
12 //===----------------------------------------------------------------------===//
14 #include "clang/AST/CharUnits.h"
15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16 #include "clang/StaticAnalyzer/Checkers/Taint.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
28 using namespace clang
;
30 using namespace taint
;
33 class ArrayBoundCheckerV2
:
34 public Checker
<check::Location
> {
35 BugType BT
{this, "Out-of-bound access"};
36 BugType TaintBT
{this, "Out-of-bound access", categories::TaintedData
};
38 enum OOB_Kind
{ OOB_Precedes
, OOB_Exceeds
, OOB_Taint
};
40 void reportOOB(CheckerContext
&C
, ProgramStateRef ErrorState
, OOB_Kind Kind
,
41 SVal TaintedSVal
= UnknownVal()) const;
43 static bool isFromCtypeMacro(const Stmt
*S
, ASTContext
&AC
);
46 void checkLocation(SVal l
, bool isLoad
, const Stmt
*S
,
47 CheckerContext
&C
) const;
49 } // anonymous namespace
51 /// For a given Location that can be represented as a symbolic expression
52 /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
53 /// Arr and the distance of Location from the beginning of Arr (expressed in a
54 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
55 /// cannot be determined.
56 std::optional
<std::pair
<const SubRegion
*, NonLoc
>>
57 computeOffset(ProgramStateRef State
, SValBuilder
&SVB
, SVal Location
) {
58 QualType T
= SVB
.getArrayIndexType();
59 auto EvalBinOp
= [&SVB
, State
, T
](BinaryOperatorKind Op
, NonLoc L
, NonLoc R
) {
60 // We will use this utility to add and multiply values.
61 return SVB
.evalBinOpNN(State
, Op
, L
, R
, T
).getAs
<NonLoc
>();
64 const SubRegion
*OwnerRegion
= nullptr;
65 std::optional
<NonLoc
> Offset
= SVB
.makeZeroArrayIndex();
67 const ElementRegion
*CurRegion
=
68 dyn_cast_or_null
<ElementRegion
>(Location
.getAsRegion());
71 const auto Index
= CurRegion
->getIndex().getAs
<NonLoc
>();
75 QualType ElemType
= CurRegion
->getElementType();
77 // FIXME: The following early return was presumably added to safeguard the
78 // getTypeSizeInChars() call (which doesn't accept an incomplete type), but
79 // it seems that `ElemType` cannot be incomplete at this point.
80 if (ElemType
->isIncompleteType())
83 // Calculate Delta = Index * sizeof(ElemType).
84 NonLoc Size
= SVB
.makeArrayIndex(
85 SVB
.getContext().getTypeSizeInChars(ElemType
).getQuantity());
86 auto Delta
= EvalBinOp(BO_Mul
, *Index
, Size
);
90 // Perform Offset += Delta.
91 Offset
= EvalBinOp(BO_Add
, *Offset
, *Delta
);
95 OwnerRegion
= CurRegion
->getSuperRegion()->getAs
<SubRegion
>();
96 // When this is just another ElementRegion layer, we need to continue the
97 // offset calculations:
98 CurRegion
= dyn_cast_or_null
<ElementRegion
>(OwnerRegion
);
102 return std::make_pair(OwnerRegion
, *Offset
);
107 // TODO: once the constraint manager is smart enough to handle non simplified
108 // symbolic expressions remove this function. Note that this can not be used in
109 // the constraint manager as is, since this does not handle overflows. It is
110 // safe to assume, however, that memory offsets will not overflow.
111 // NOTE: callers of this function need to be aware of the effects of overflows
112 // and signed<->unsigned conversions!
113 static std::pair
<NonLoc
, nonloc::ConcreteInt
>
114 getSimplifiedOffsets(NonLoc offset
, nonloc::ConcreteInt extent
,
115 SValBuilder
&svalBuilder
) {
116 std::optional
<nonloc::SymbolVal
> SymVal
= offset
.getAs
<nonloc::SymbolVal
>();
117 if (SymVal
&& SymVal
->isExpression()) {
118 if (const SymIntExpr
*SIE
= dyn_cast
<SymIntExpr
>(SymVal
->getSymbol())) {
119 llvm::APSInt constant
=
120 APSIntType(extent
.getValue()).convert(SIE
->getRHS());
121 switch (SIE
->getOpcode()) {
123 // The constant should never be 0 here, becasue multiplication by zero
124 // is simplified by the engine.
125 if ((extent
.getValue() % constant
) != 0)
126 return std::pair
<NonLoc
, nonloc::ConcreteInt
>(offset
, extent
);
128 return getSimplifiedOffsets(
129 nonloc::SymbolVal(SIE
->getLHS()),
130 svalBuilder
.makeIntVal(extent
.getValue() / constant
),
133 return getSimplifiedOffsets(
134 nonloc::SymbolVal(SIE
->getLHS()),
135 svalBuilder
.makeIntVal(extent
.getValue() - constant
), svalBuilder
);
142 return std::pair
<NonLoc
, nonloc::ConcreteInt
>(offset
, extent
);
145 // Evaluate the comparison Value < Threshold with the help of the custom
146 // simplification algorithm defined for this checker. Return a pair of states,
147 // where the first one corresponds to "value below threshold" and the second
148 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
149 // the case when the evaluation fails.
150 static std::pair
<ProgramStateRef
, ProgramStateRef
>
151 compareValueToThreshold(ProgramStateRef State
, NonLoc Value
, NonLoc Threshold
,
153 if (auto ConcreteThreshold
= Threshold
.getAs
<nonloc::ConcreteInt
>()) {
154 std::tie(Value
, Threshold
) = getSimplifiedOffsets(Value
, *ConcreteThreshold
, SVB
);
156 if (auto ConcreteThreshold
= Threshold
.getAs
<nonloc::ConcreteInt
>()) {
157 QualType T
= Value
.getType(SVB
.getContext());
158 if (T
->isUnsignedIntegerType() && ConcreteThreshold
->getValue().isNegative()) {
159 // In this case we reduced the bound check to a comparison of the form
160 // (symbol or value with unsigned type) < (negative number)
161 // which is always false. We are handling these cases separately because
162 // evalBinOpNN can perform a signed->unsigned conversion that turns the
163 // negative number into a huge positive value and leads to wildly
164 // inaccurate conclusions.
165 return {nullptr, State
};
168 auto BelowThreshold
=
169 SVB
.evalBinOpNN(State
, BO_LT
, Value
, Threshold
, SVB
.getConditionType()).getAs
<NonLoc
>();
172 return State
->assume(*BelowThreshold
);
174 return {nullptr, nullptr};
177 void ArrayBoundCheckerV2::checkLocation(SVal location
, bool isLoad
,
179 CheckerContext
&checkerContext
) const {
181 // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
182 // some new logic here that reasons directly about memory region extents.
183 // Once that logic is more mature, we can bring it back to assumeInBound()
184 // for all clients to use.
186 // The algorithm we are using here for bounds checking is to see if the
187 // memory access is within the extent of the base region. Since we
188 // have some flexibility in defining the base region, we can achieve
189 // various levels of conservatism in our buffer overflow checking.
191 // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
192 // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
193 // and incomplete analysis of these leads to false positives. As even
194 // accurate reports would be confusing for the users, just disable reports
195 // from these macros:
196 if (isFromCtypeMacro(LoadS
, checkerContext
.getASTContext()))
199 ProgramStateRef state
= checkerContext
.getState();
200 SValBuilder
&svalBuilder
= checkerContext
.getSValBuilder();
202 const std::optional
<std::pair
<const SubRegion
*, NonLoc
>> &RawOffset
=
203 computeOffset(state
, svalBuilder
, location
);
208 auto [Reg
, ByteOffset
] = *RawOffset
;
211 const MemSpaceRegion
*Space
= Reg
->getMemorySpace();
212 if (!(isa
<SymbolicRegion
>(Reg
) && isa
<UnknownSpaceRegion
>(Space
))) {
213 // A symbolic region in unknown space represents an unknown pointer that
214 // may point into the middle of an array, so we don't look for underflows.
215 // Both conditions are significant because we want to check underflows in
216 // symbolic regions on the heap (which may be introduced by checkers like
217 // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
218 // non-symbolic regions (e.g. a field subregion of a symbolic region) in
220 auto [state_precedesLowerBound
, state_withinLowerBound
] =
221 compareValueToThreshold(state
, ByteOffset
,
222 svalBuilder
.makeZeroArrayIndex(), svalBuilder
);
224 if (state_precedesLowerBound
&& !state_withinLowerBound
) {
225 // We know that the index definitely precedes the lower bound.
226 reportOOB(checkerContext
, state_precedesLowerBound
, OOB_Precedes
);
230 if (state_withinLowerBound
)
231 state
= state_withinLowerBound
;
235 DefinedOrUnknownSVal Size
= getDynamicExtent(state
, Reg
, svalBuilder
);
236 if (auto KnownSize
= Size
.getAs
<NonLoc
>()) {
237 auto [state_withinUpperBound
, state_exceedsUpperBound
] =
238 compareValueToThreshold(state
, ByteOffset
, *KnownSize
, svalBuilder
);
240 if (state_exceedsUpperBound
) {
241 if (!state_withinUpperBound
) {
242 // We know that the index definitely exceeds the upper bound.
243 reportOOB(checkerContext
, state_exceedsUpperBound
, OOB_Exceeds
);
246 if (isTainted(state
, ByteOffset
)) {
247 // Both cases are possible, but the index is tainted, so report.
248 reportOOB(checkerContext
, state_exceedsUpperBound
, OOB_Taint
,
254 if (state_withinUpperBound
)
255 state
= state_withinUpperBound
;
258 checkerContext
.addTransition(state
);
261 void ArrayBoundCheckerV2::reportOOB(CheckerContext
&C
,
262 ProgramStateRef ErrorState
, OOB_Kind Kind
,
263 SVal TaintedSVal
) const {
265 ExplodedNode
*ErrorNode
= C
.generateErrorNode(ErrorState
);
269 // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
270 // a follow-up commit.
272 SmallString
<128> Buf
;
273 llvm::raw_svector_ostream
Out(Buf
);
274 Out
<< "Out of bound memory access ";
278 Out
<< "(accessed memory precedes memory block)";
281 Out
<< "(access exceeds upper limit of memory block)";
284 Out
<< "(index is tainted)";
287 auto BR
= std::make_unique
<PathSensitiveBugReport
>(
288 Kind
== OOB_Taint
? TaintBT
: BT
, Out
.str(), ErrorNode
);
289 // Track back the propagation of taintedness, or do nothing if TaintedSVal is
290 // the default UnknownVal().
291 for (SymbolRef Sym
: getTaintedSymbols(ErrorState
, TaintedSVal
)) {
292 BR
->markInteresting(Sym
);
294 C
.emitReport(std::move(BR
));
297 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt
*S
, ASTContext
&ACtx
) {
298 SourceLocation Loc
= S
->getBeginLoc();
299 if (!Loc
.isMacroID())
302 StringRef MacroName
= Lexer::getImmediateMacroName(
303 Loc
, ACtx
.getSourceManager(), ACtx
.getLangOpts());
305 if (MacroName
.size() < 7 || MacroName
[0] != 'i' || MacroName
[1] != 's')
308 return ((MacroName
== "isalnum") || (MacroName
== "isalpha") ||
309 (MacroName
== "isblank") || (MacroName
== "isdigit") ||
310 (MacroName
== "isgraph") || (MacroName
== "islower") ||
311 (MacroName
== "isnctrl") || (MacroName
== "isprint") ||
312 (MacroName
== "ispunct") || (MacroName
== "isspace") ||
313 (MacroName
== "isupper") || (MacroName
== "isxdigit"));
316 void ento::registerArrayBoundCheckerV2(CheckerManager
&mgr
) {
317 mgr
.registerChecker
<ArrayBoundCheckerV2
>();
320 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager
&mgr
) {