[DFAJumpThreading] Remove incoming StartBlock from all phis when unfolding select...
[llvm-project.git] / clang / lib / StaticAnalyzer / Checkers / ArrayBoundCheckerV2.cpp
blobe80a06e385875200c5605655402dafa2b7e18873
1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10 // which looks for an out-of-bound array element access.
12 //===----------------------------------------------------------------------===//
14 #include "clang/AST/CharUnits.h"
15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16 #include "clang/StaticAnalyzer/Checkers/Taint.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <optional>
28 using namespace clang;
29 using namespace ento;
30 using namespace taint;
32 namespace {
33 class ArrayBoundCheckerV2 :
34 public Checker<check::Location> {
35 BugType BT{this, "Out-of-bound access"};
36 BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
38 enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
40 void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
41 SVal TaintedSVal = UnknownVal()) const;
43 static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
45 public:
46 void checkLocation(SVal l, bool isLoad, const Stmt *S,
47 CheckerContext &C) const;
49 } // anonymous namespace
51 /// For a given Location that can be represented as a symbolic expression
52 /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
53 /// Arr and the distance of Location from the beginning of Arr (expressed in a
54 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
55 /// cannot be determined.
56 std::optional<std::pair<const SubRegion *, NonLoc>>
57 computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
58 QualType T = SVB.getArrayIndexType();
59 auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
60 // We will use this utility to add and multiply values.
61 return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
64 const SubRegion *OwnerRegion = nullptr;
65 std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
67 const ElementRegion *CurRegion =
68 dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
70 while (CurRegion) {
71 const auto Index = CurRegion->getIndex().getAs<NonLoc>();
72 if (!Index)
73 return std::nullopt;
75 QualType ElemType = CurRegion->getElementType();
77 // FIXME: The following early return was presumably added to safeguard the
78 // getTypeSizeInChars() call (which doesn't accept an incomplete type), but
79 // it seems that `ElemType` cannot be incomplete at this point.
80 if (ElemType->isIncompleteType())
81 return std::nullopt;
83 // Calculate Delta = Index * sizeof(ElemType).
84 NonLoc Size = SVB.makeArrayIndex(
85 SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
86 auto Delta = EvalBinOp(BO_Mul, *Index, Size);
87 if (!Delta)
88 return std::nullopt;
90 // Perform Offset += Delta.
91 Offset = EvalBinOp(BO_Add, *Offset, *Delta);
92 if (!Offset)
93 return std::nullopt;
95 OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
96 // When this is just another ElementRegion layer, we need to continue the
97 // offset calculations:
98 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
101 if (OwnerRegion)
102 return std::make_pair(OwnerRegion, *Offset);
104 return std::nullopt;
107 // TODO: once the constraint manager is smart enough to handle non simplified
108 // symbolic expressions remove this function. Note that this can not be used in
109 // the constraint manager as is, since this does not handle overflows. It is
110 // safe to assume, however, that memory offsets will not overflow.
111 // NOTE: callers of this function need to be aware of the effects of overflows
112 // and signed<->unsigned conversions!
113 static std::pair<NonLoc, nonloc::ConcreteInt>
114 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
115 SValBuilder &svalBuilder) {
116 std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
117 if (SymVal && SymVal->isExpression()) {
118 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
119 llvm::APSInt constant =
120 APSIntType(extent.getValue()).convert(SIE->getRHS());
121 switch (SIE->getOpcode()) {
122 case BO_Mul:
123 // The constant should never be 0 here, becasue multiplication by zero
124 // is simplified by the engine.
125 if ((extent.getValue() % constant) != 0)
126 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
127 else
128 return getSimplifiedOffsets(
129 nonloc::SymbolVal(SIE->getLHS()),
130 svalBuilder.makeIntVal(extent.getValue() / constant),
131 svalBuilder);
132 case BO_Add:
133 return getSimplifiedOffsets(
134 nonloc::SymbolVal(SIE->getLHS()),
135 svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
136 default:
137 break;
142 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
145 // Evaluate the comparison Value < Threshold with the help of the custom
146 // simplification algorithm defined for this checker. Return a pair of states,
147 // where the first one corresponds to "value below threshold" and the second
148 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
149 // the case when the evaluation fails.
150 static std::pair<ProgramStateRef, ProgramStateRef>
151 compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
152 SValBuilder &SVB) {
153 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
154 std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
156 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
157 QualType T = Value.getType(SVB.getContext());
158 if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) {
159 // In this case we reduced the bound check to a comparison of the form
160 // (symbol or value with unsigned type) < (negative number)
161 // which is always false. We are handling these cases separately because
162 // evalBinOpNN can perform a signed->unsigned conversion that turns the
163 // negative number into a huge positive value and leads to wildly
164 // inaccurate conclusions.
165 return {nullptr, State};
168 auto BelowThreshold =
169 SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
171 if (BelowThreshold)
172 return State->assume(*BelowThreshold);
174 return {nullptr, nullptr};
177 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
178 const Stmt* LoadS,
179 CheckerContext &checkerContext) const {
181 // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
182 // some new logic here that reasons directly about memory region extents.
183 // Once that logic is more mature, we can bring it back to assumeInBound()
184 // for all clients to use.
186 // The algorithm we are using here for bounds checking is to see if the
187 // memory access is within the extent of the base region. Since we
188 // have some flexibility in defining the base region, we can achieve
189 // various levels of conservatism in our buffer overflow checking.
191 // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
192 // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
193 // and incomplete analysis of these leads to false positives. As even
194 // accurate reports would be confusing for the users, just disable reports
195 // from these macros:
196 if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
197 return;
199 ProgramStateRef state = checkerContext.getState();
200 SValBuilder &svalBuilder = checkerContext.getSValBuilder();
202 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
203 computeOffset(state, svalBuilder, location);
205 if (!RawOffset)
206 return;
208 auto [Reg, ByteOffset] = *RawOffset;
210 // CHECK LOWER BOUND
211 const MemSpaceRegion *Space = Reg->getMemorySpace();
212 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
213 // A symbolic region in unknown space represents an unknown pointer that
214 // may point into the middle of an array, so we don't look for underflows.
215 // Both conditions are significant because we want to check underflows in
216 // symbolic regions on the heap (which may be introduced by checkers like
217 // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
218 // non-symbolic regions (e.g. a field subregion of a symbolic region) in
219 // unknown space.
220 auto [state_precedesLowerBound, state_withinLowerBound] =
221 compareValueToThreshold(state, ByteOffset,
222 svalBuilder.makeZeroArrayIndex(), svalBuilder);
224 if (state_precedesLowerBound && !state_withinLowerBound) {
225 // We know that the index definitely precedes the lower bound.
226 reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
227 return;
230 if (state_withinLowerBound)
231 state = state_withinLowerBound;
234 // CHECK UPPER BOUND
235 DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
236 if (auto KnownSize = Size.getAs<NonLoc>()) {
237 auto [state_withinUpperBound, state_exceedsUpperBound] =
238 compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
240 if (state_exceedsUpperBound) {
241 if (!state_withinUpperBound) {
242 // We know that the index definitely exceeds the upper bound.
243 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
244 return;
246 if (isTainted(state, ByteOffset)) {
247 // Both cases are possible, but the index is tainted, so report.
248 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
249 ByteOffset);
250 return;
254 if (state_withinUpperBound)
255 state = state_withinUpperBound;
258 checkerContext.addTransition(state);
261 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
262 ProgramStateRef ErrorState, OOB_Kind Kind,
263 SVal TaintedSVal) const {
265 ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
266 if (!ErrorNode)
267 return;
269 // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
270 // a follow-up commit.
272 SmallString<128> Buf;
273 llvm::raw_svector_ostream Out(Buf);
274 Out << "Out of bound memory access ";
276 switch (Kind) {
277 case OOB_Precedes:
278 Out << "(accessed memory precedes memory block)";
279 break;
280 case OOB_Exceeds:
281 Out << "(access exceeds upper limit of memory block)";
282 break;
283 case OOB_Taint:
284 Out << "(index is tainted)";
285 break;
287 auto BR = std::make_unique<PathSensitiveBugReport>(
288 Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
289 // Track back the propagation of taintedness, or do nothing if TaintedSVal is
290 // the default UnknownVal().
291 for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
292 BR->markInteresting(Sym);
294 C.emitReport(std::move(BR));
297 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
298 SourceLocation Loc = S->getBeginLoc();
299 if (!Loc.isMacroID())
300 return false;
302 StringRef MacroName = Lexer::getImmediateMacroName(
303 Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
305 if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
306 return false;
308 return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
309 (MacroName == "isblank") || (MacroName == "isdigit") ||
310 (MacroName == "isgraph") || (MacroName == "islower") ||
311 (MacroName == "isnctrl") || (MacroName == "isprint") ||
312 (MacroName == "ispunct") || (MacroName == "isspace") ||
313 (MacroName == "isupper") || (MacroName == "isxdigit"));
316 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
317 mgr.registerChecker<ArrayBoundCheckerV2>();
320 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
321 return true;