[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang / lib / StaticAnalyzer / Checkers / CStringChecker.cpp
blobb1bc98e93a2799596c06f7daba7bff8923c5dda2
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This defines CStringChecker, which is an assortment of checks on calls
10 // to functions in <string.h>.
12 //===----------------------------------------------------------------------===//
14 #include "InterCheckerAPI.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <functional>
31 #include <optional>
33 using namespace clang;
34 using namespace ento;
35 using namespace std::placeholders;
37 namespace {
38 struct AnyArgExpr {
39 const Expr *Expression;
40 unsigned ArgumentIndex;
42 struct SourceArgExpr : AnyArgExpr {};
43 struct DestinationArgExpr : AnyArgExpr {};
44 struct SizeArgExpr : AnyArgExpr {};
46 using ErrorMessage = SmallString<128>;
47 enum class AccessKind { write, read };
49 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
50 AccessKind Access) {
51 ErrorMessage Message;
52 llvm::raw_svector_ostream Os(Message);
54 // Function classification like: Memory copy function
55 Os << toUppercase(FunctionDescription.front())
56 << &FunctionDescription.data()[1];
58 if (Access == AccessKind::write) {
59 Os << " overflows the destination buffer";
60 } else { // read access
61 Os << " accesses out-of-bound array element";
64 return Message;
67 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
69 enum class CharKind { Regular = 0, Wide };
70 constexpr CharKind CK_Regular = CharKind::Regular;
71 constexpr CharKind CK_Wide = CharKind::Wide;
73 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
74 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
75 : Ctx.WideCharTy);
78 class CStringChecker : public Checker< eval::Call,
79 check::PreStmt<DeclStmt>,
80 check::LiveSymbols,
81 check::DeadSymbols,
82 check::RegionChanges
83 > {
84 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
85 BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
87 mutable const char *CurrentFunctionDescription = nullptr;
89 public:
90 /// The filter is used to filter out the diagnostics which are not enabled by
91 /// the user.
92 struct CStringChecksFilter {
93 bool CheckCStringNullArg = false;
94 bool CheckCStringOutOfBounds = false;
95 bool CheckCStringBufferOverlap = false;
96 bool CheckCStringNotNullTerm = false;
97 bool CheckCStringUninitializedRead = false;
99 CheckerNameRef CheckNameCStringNullArg;
100 CheckerNameRef CheckNameCStringOutOfBounds;
101 CheckerNameRef CheckNameCStringBufferOverlap;
102 CheckerNameRef CheckNameCStringNotNullTerm;
103 CheckerNameRef CheckNameCStringUninitializedRead;
106 CStringChecksFilter Filter;
108 static void *getTag() { static int tag; return &tag; }
110 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
111 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
112 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
113 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
115 ProgramStateRef
116 checkRegionChanges(ProgramStateRef state,
117 const InvalidatedSymbols *,
118 ArrayRef<const MemRegion *> ExplicitRegions,
119 ArrayRef<const MemRegion *> Regions,
120 const LocationContext *LCtx,
121 const CallEvent *Call) const;
123 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
124 const CallExpr *)>;
126 CallDescriptionMap<FnCheck> Callbacks = {
127 {{CDF_MaybeBuiltin, {"memcpy"}, 3},
128 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
129 {{CDF_MaybeBuiltin, {"wmemcpy"}, 3},
130 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
131 {{CDF_MaybeBuiltin, {"mempcpy"}, 3},
132 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
133 {{CDF_None, {"wmempcpy"}, 3},
134 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
135 {{CDF_MaybeBuiltin, {"memcmp"}, 3},
136 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
137 {{CDF_MaybeBuiltin, {"wmemcmp"}, 3},
138 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
139 {{CDF_MaybeBuiltin, {"memmove"}, 3},
140 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
141 {{CDF_MaybeBuiltin, {"wmemmove"}, 3},
142 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
143 {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset},
144 {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
145 {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy},
146 {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy},
147 {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy},
148 {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy},
149 {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat},
150 {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat},
151 {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat},
152 {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength},
153 {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
154 {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength},
155 {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
156 {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
157 {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
158 {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
159 {{CDF_MaybeBuiltin, {"strncasecmp"}, 3},
160 &CStringChecker::evalStrncasecmp},
161 {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep},
162 {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
163 {{CDF_MaybeBuiltin, {"bcmp"}, 3},
164 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
165 {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero},
166 {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero},
167 {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf},
168 {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf},
171 // These require a bit of special handling.
172 CallDescription StdCopy{{"std", "copy"}, 3},
173 StdCopyBackward{{"std", "copy_backward"}, 3};
175 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
176 void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
177 void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
178 void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
179 void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
180 void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
181 ProgramStateRef state, SizeArgExpr Size,
182 DestinationArgExpr Dest, SourceArgExpr Source,
183 bool Restricted, bool IsMempcpy, CharKind CK) const;
185 void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
187 void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
188 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
189 void evalstrLengthCommon(CheckerContext &C,
190 const CallExpr *CE,
191 bool IsStrnlen = false) const;
193 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
194 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
195 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
196 void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const;
197 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd,
198 bool IsBounded, ConcatFnKind appendK,
199 bool returnPtr = true) const;
201 void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
202 void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
203 void evalStrlcat(CheckerContext &C, const CallExpr *CE) const;
205 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
206 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
207 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
208 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
209 void evalStrcmpCommon(CheckerContext &C,
210 const CallExpr *CE,
211 bool IsBounded = false,
212 bool IgnoreCase = false) const;
214 void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
216 void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
217 void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
218 void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
219 void evalMemset(CheckerContext &C, const CallExpr *CE) const;
220 void evalBzero(CheckerContext &C, const CallExpr *CE) const;
222 void evalSprintf(CheckerContext &C, const CallExpr *CE) const;
223 void evalSnprintf(CheckerContext &C, const CallExpr *CE) const;
224 void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded,
225 bool IsBuiltin) const;
227 // Utility methods
228 std::pair<ProgramStateRef , ProgramStateRef >
229 static assumeZero(CheckerContext &C,
230 ProgramStateRef state, SVal V, QualType Ty);
232 static ProgramStateRef setCStringLength(ProgramStateRef state,
233 const MemRegion *MR,
234 SVal strLength);
235 static SVal getCStringLengthForRegion(CheckerContext &C,
236 ProgramStateRef &state,
237 const Expr *Ex,
238 const MemRegion *MR,
239 bool hypothetical);
240 SVal getCStringLength(CheckerContext &C,
241 ProgramStateRef &state,
242 const Expr *Ex,
243 SVal Buf,
244 bool hypothetical = false) const;
246 const StringLiteral *getCStringLiteral(CheckerContext &C,
247 ProgramStateRef &state,
248 const Expr *expr,
249 SVal val) const;
251 /// Invalidate the destination buffer determined by characters copied.
252 static ProgramStateRef
253 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
254 const Expr *BufE, SVal BufV, SVal SizeV,
255 QualType SizeTy);
257 /// Operation never overflows, do not invalidate the super region.
258 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
259 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
261 /// We do not know whether the operation can overflow (e.g. size is unknown),
262 /// invalidate the super region and escape related pointers.
263 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
264 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
266 /// Invalidate the source buffer for escaping pointers.
267 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
268 ProgramStateRef S,
269 const Expr *BufE, SVal BufV);
271 /// @param InvalidationTraitOperations Determine how to invlidate the
272 /// MemRegion by setting the invalidation traits. Return true to cause pointer
273 /// escape, or false otherwise.
274 static ProgramStateRef invalidateBufferAux(
275 CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
276 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
277 const MemRegion *)>
278 InvalidationTraitOperations);
280 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
281 const MemRegion *MR);
283 static bool memsetAux(const Expr *DstBuffer, SVal CharE,
284 const Expr *Size, CheckerContext &C,
285 ProgramStateRef &State);
287 // Re-usable checks
288 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
289 AnyArgExpr Arg, SVal l) const;
290 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
291 AnyArgExpr Buffer, SVal Element,
292 AccessKind Access,
293 CharKind CK = CharKind::Regular) const;
294 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
295 AnyArgExpr Buffer, SizeArgExpr Size,
296 AccessKind Access,
297 CharKind CK = CharKind::Regular) const;
298 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
299 SizeArgExpr Size, AnyArgExpr First,
300 AnyArgExpr Second,
301 CharKind CK = CharKind::Regular) const;
302 void emitOverlapBug(CheckerContext &C,
303 ProgramStateRef state,
304 const Stmt *First,
305 const Stmt *Second) const;
307 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
308 StringRef WarningMsg) const;
309 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
310 const Stmt *S, StringRef WarningMsg) const;
311 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
312 const Stmt *S, StringRef WarningMsg) const;
313 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
314 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
315 const Expr *E) const;
316 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
317 ProgramStateRef state,
318 NonLoc left,
319 NonLoc right) const;
321 // Return true if the destination buffer of the copy function may be in bound.
322 // Expects SVal of Size to be positive and unsigned.
323 // Expects SVal of FirstBuf to be a FieldRegion.
324 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
325 SVal BufVal, QualType BufTy, SVal LengthVal,
326 QualType LengthTy);
329 } //end anonymous namespace
331 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
333 //===----------------------------------------------------------------------===//
334 // Individual checks and utility methods.
335 //===----------------------------------------------------------------------===//
337 std::pair<ProgramStateRef , ProgramStateRef >
338 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
339 QualType Ty) {
340 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
341 if (!val)
342 return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
344 SValBuilder &svalBuilder = C.getSValBuilder();
345 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
346 return state->assume(svalBuilder.evalEQ(state, *val, zero));
349 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
350 ProgramStateRef State,
351 AnyArgExpr Arg, SVal l) const {
352 // If a previous check has failed, propagate the failure.
353 if (!State)
354 return nullptr;
356 ProgramStateRef stateNull, stateNonNull;
357 std::tie(stateNull, stateNonNull) =
358 assumeZero(C, State, l, Arg.Expression->getType());
360 if (stateNull && !stateNonNull) {
361 if (Filter.CheckCStringNullArg) {
362 SmallString<80> buf;
363 llvm::raw_svector_ostream OS(buf);
364 assert(CurrentFunctionDescription);
365 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
366 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
367 << CurrentFunctionDescription;
369 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
371 return nullptr;
374 // From here on, assume that the value is non-null.
375 assert(stateNonNull);
376 return stateNonNull;
379 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
380 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
381 ProgramStateRef state,
382 AnyArgExpr Buffer, SVal Element,
383 AccessKind Access,
384 CharKind CK) const {
386 // If a previous check has failed, propagate the failure.
387 if (!state)
388 return nullptr;
390 // Check for out of bound array element access.
391 const MemRegion *R = Element.getAsRegion();
392 if (!R)
393 return state;
395 const auto *ER = dyn_cast<ElementRegion>(R);
396 if (!ER)
397 return state;
399 SValBuilder &svalBuilder = C.getSValBuilder();
400 ASTContext &Ctx = svalBuilder.getContext();
402 // Get the index of the accessed element.
403 NonLoc Idx = ER->getIndex();
405 if (CK == CharKind::Regular) {
406 if (ER->getValueType() != Ctx.CharTy)
407 return state;
408 } else {
409 if (ER->getValueType() != Ctx.WideCharTy)
410 return state;
412 QualType SizeTy = Ctx.getSizeType();
413 NonLoc WideSize =
414 svalBuilder
415 .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
416 SizeTy)
417 .castAs<NonLoc>();
418 SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
419 if (Offset.isUnknown())
420 return state;
421 Idx = Offset.castAs<NonLoc>();
424 // Get the size of the array.
425 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
426 DefinedOrUnknownSVal Size =
427 getDynamicExtent(state, superReg, C.getSValBuilder());
429 ProgramStateRef StInBound, StOutBound;
430 std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
431 if (StOutBound && !StInBound) {
432 // These checks are either enabled by the CString out-of-bounds checker
433 // explicitly or implicitly by the Malloc checker.
434 // In the latter case we only do modeling but do not emit warning.
435 if (!Filter.CheckCStringOutOfBounds)
436 return nullptr;
438 // Emit a bug report.
439 ErrorMessage Message =
440 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
441 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
442 return nullptr;
445 // Ensure that we wouldn't read uninitialized value.
446 if (Access == AccessKind::read) {
447 if (Filter.CheckCStringUninitializedRead &&
448 StInBound->getSVal(ER).isUndef()) {
449 emitUninitializedReadBug(C, StInBound, Buffer.Expression);
450 return nullptr;
454 // Array bound check succeeded. From this point forward the array bound
455 // should always succeed.
456 return StInBound;
459 ProgramStateRef
460 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
461 AnyArgExpr Buffer, SizeArgExpr Size,
462 AccessKind Access, CharKind CK) const {
463 // If a previous check has failed, propagate the failure.
464 if (!State)
465 return nullptr;
467 SValBuilder &svalBuilder = C.getSValBuilder();
468 ASTContext &Ctx = svalBuilder.getContext();
470 QualType SizeTy = Size.Expression->getType();
471 QualType PtrTy = getCharPtrType(Ctx, CK);
473 // Check that the first buffer is non-null.
474 SVal BufVal = C.getSVal(Buffer.Expression);
475 State = checkNonNull(C, State, Buffer, BufVal);
476 if (!State)
477 return nullptr;
479 // If out-of-bounds checking is turned off, skip the rest.
480 if (!Filter.CheckCStringOutOfBounds)
481 return State;
483 SVal BufStart =
484 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
486 // Check if the first byte of the buffer is accessible.
487 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
488 if (!State)
489 return nullptr;
491 // Get the access length and make sure it is known.
492 // FIXME: This assumes the caller has already checked that the access length
493 // is positive. And that it's unsigned.
494 SVal LengthVal = C.getSVal(Size.Expression);
495 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
496 if (!Length)
497 return State;
499 // Compute the offset of the last element to be accessed: size-1.
500 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
501 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
502 if (Offset.isUnknown())
503 return nullptr;
504 NonLoc LastOffset = Offset.castAs<NonLoc>();
506 // Check that the first buffer is sufficiently long.
507 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
509 SVal BufEnd =
510 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
511 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
513 // If the buffer isn't large enough, abort.
514 if (!State)
515 return nullptr;
518 // Large enough or not, return this state!
519 return State;
522 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
523 ProgramStateRef state,
524 SizeArgExpr Size, AnyArgExpr First,
525 AnyArgExpr Second,
526 CharKind CK) const {
527 if (!Filter.CheckCStringBufferOverlap)
528 return state;
530 // Do a simple check for overlap: if the two arguments are from the same
531 // buffer, see if the end of the first is greater than the start of the second
532 // or vice versa.
534 // If a previous check has failed, propagate the failure.
535 if (!state)
536 return nullptr;
538 ProgramStateRef stateTrue, stateFalse;
540 // Assume different address spaces cannot overlap.
541 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
542 Second.Expression->getType()->getPointeeType().getAddressSpace())
543 return state;
545 // Get the buffer values and make sure they're known locations.
546 const LocationContext *LCtx = C.getLocationContext();
547 SVal firstVal = state->getSVal(First.Expression, LCtx);
548 SVal secondVal = state->getSVal(Second.Expression, LCtx);
550 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
551 if (!firstLoc)
552 return state;
554 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
555 if (!secondLoc)
556 return state;
558 // Are the two values the same?
559 SValBuilder &svalBuilder = C.getSValBuilder();
560 std::tie(stateTrue, stateFalse) =
561 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
563 if (stateTrue && !stateFalse) {
564 // If the values are known to be equal, that's automatically an overlap.
565 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
566 return nullptr;
569 // assume the two expressions are not equal.
570 assert(stateFalse);
571 state = stateFalse;
573 // Which value comes first?
574 QualType cmpTy = svalBuilder.getConditionType();
575 SVal reverse =
576 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
577 std::optional<DefinedOrUnknownSVal> reverseTest =
578 reverse.getAs<DefinedOrUnknownSVal>();
579 if (!reverseTest)
580 return state;
582 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
583 if (stateTrue) {
584 if (stateFalse) {
585 // If we don't know which one comes first, we can't perform this test.
586 return state;
587 } else {
588 // Switch the values so that firstVal is before secondVal.
589 std::swap(firstLoc, secondLoc);
591 // Switch the Exprs as well, so that they still correspond.
592 std::swap(First, Second);
596 // Get the length, and make sure it too is known.
597 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
598 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
599 if (!Length)
600 return state;
602 // Convert the first buffer's start address to char*.
603 // Bail out if the cast fails.
604 ASTContext &Ctx = svalBuilder.getContext();
605 QualType CharPtrTy = getCharPtrType(Ctx, CK);
606 SVal FirstStart =
607 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
608 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
609 if (!FirstStartLoc)
610 return state;
612 // Compute the end of the first buffer. Bail out if THAT fails.
613 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
614 *Length, CharPtrTy);
615 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
616 if (!FirstEndLoc)
617 return state;
619 // Is the end of the first buffer past the start of the second buffer?
620 SVal Overlap =
621 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
622 std::optional<DefinedOrUnknownSVal> OverlapTest =
623 Overlap.getAs<DefinedOrUnknownSVal>();
624 if (!OverlapTest)
625 return state;
627 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
629 if (stateTrue && !stateFalse) {
630 // Overlap!
631 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
632 return nullptr;
635 // assume the two expressions don't overlap.
636 assert(stateFalse);
637 return stateFalse;
640 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
641 const Stmt *First, const Stmt *Second) const {
642 ExplodedNode *N = C.generateErrorNode(state);
643 if (!N)
644 return;
646 if (!BT_Overlap)
647 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
648 categories::UnixAPI, "Improper arguments"));
650 // Generate a report for this bug.
651 auto report = std::make_unique<PathSensitiveBugReport>(
652 *BT_Overlap, "Arguments must not be overlapping buffers", N);
653 report->addRange(First->getSourceRange());
654 report->addRange(Second->getSourceRange());
656 C.emitReport(std::move(report));
659 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
660 const Stmt *S, StringRef WarningMsg) const {
661 if (ExplodedNode *N = C.generateErrorNode(State)) {
662 if (!BT_Null) {
663 // FIXME: This call uses the string constant 'categories::UnixAPI' as the
664 // description of the bug; it should be replaced by a real description.
665 BT_Null.reset(
666 new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
669 auto Report =
670 std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
671 Report->addRange(S->getSourceRange());
672 if (const auto *Ex = dyn_cast<Expr>(S))
673 bugreporter::trackExpressionValue(N, Ex, *Report);
674 C.emitReport(std::move(Report));
678 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
679 ProgramStateRef State,
680 const Expr *E) const {
681 if (ExplodedNode *N = C.generateErrorNode(State)) {
682 const char *Msg =
683 "Bytes string function accesses uninitialized/garbage values";
684 if (!BT_UninitRead)
685 BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
686 "Accessing unitialized/garbage values"));
688 auto Report =
689 std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
690 Report->addRange(E->getSourceRange());
691 bugreporter::trackExpressionValue(N, E, *Report);
692 C.emitReport(std::move(Report));
696 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
697 ProgramStateRef State, const Stmt *S,
698 StringRef WarningMsg) const {
699 if (ExplodedNode *N = C.generateErrorNode(State)) {
700 if (!BT_Bounds)
701 BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
702 ? Filter.CheckNameCStringOutOfBounds
703 : Filter.CheckNameCStringNullArg,
704 "Out-of-bound array access"));
706 // FIXME: It would be nice to eventually make this diagnostic more clear,
707 // e.g., by referencing the original declaration or by saying *why* this
708 // reference is outside the range.
709 auto Report =
710 std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
711 Report->addRange(S->getSourceRange());
712 C.emitReport(std::move(Report));
716 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
717 const Stmt *S,
718 StringRef WarningMsg) const {
719 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
720 if (!BT_NotCString) {
721 // FIXME: This call uses the string constant 'categories::UnixAPI' as the
722 // description of the bug; it should be replaced by a real description.
723 BT_NotCString.reset(
724 new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
727 auto Report =
728 std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
730 Report->addRange(S->getSourceRange());
731 C.emitReport(std::move(Report));
735 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
736 ProgramStateRef State) const {
737 if (ExplodedNode *N = C.generateErrorNode(State)) {
738 if (!BT_AdditionOverflow) {
739 // FIXME: This call uses the word "API" as the description of the bug;
740 // it should be replaced by a better error message (if this unlikely
741 // situation continues to exist as a separate bug type).
742 BT_AdditionOverflow.reset(
743 new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
746 // This isn't a great error message, but this should never occur in real
747 // code anyway -- you'd have to create a buffer longer than a size_t can
748 // represent, which is sort of a contradiction.
749 const char *WarningMsg =
750 "This expression will create a string whose length is too big to "
751 "be represented as a size_t";
753 auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
754 WarningMsg, N);
755 C.emitReport(std::move(Report));
759 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
760 ProgramStateRef state,
761 NonLoc left,
762 NonLoc right) const {
763 // If out-of-bounds checking is turned off, skip the rest.
764 if (!Filter.CheckCStringOutOfBounds)
765 return state;
767 // If a previous check has failed, propagate the failure.
768 if (!state)
769 return nullptr;
771 SValBuilder &svalBuilder = C.getSValBuilder();
772 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
774 QualType sizeTy = svalBuilder.getContext().getSizeType();
775 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
776 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
778 SVal maxMinusRight;
779 if (isa<nonloc::ConcreteInt>(right)) {
780 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
781 sizeTy);
782 } else {
783 // Try switching the operands. (The order of these two assignments is
784 // important!)
785 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
786 sizeTy);
787 left = right;
790 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
791 QualType cmpTy = svalBuilder.getConditionType();
792 // If left > max - right, we have an overflow.
793 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
794 *maxMinusRightNL, cmpTy);
796 ProgramStateRef stateOverflow, stateOkay;
797 std::tie(stateOverflow, stateOkay) =
798 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
800 if (stateOverflow && !stateOkay) {
801 // We have an overflow. Emit a bug report.
802 emitAdditionOverflowBug(C, stateOverflow);
803 return nullptr;
806 // From now on, assume an overflow didn't occur.
807 assert(stateOkay);
808 state = stateOkay;
811 return state;
814 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
815 const MemRegion *MR,
816 SVal strLength) {
817 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
819 MR = MR->StripCasts();
821 switch (MR->getKind()) {
822 case MemRegion::StringRegionKind:
823 // FIXME: This can happen if we strcpy() into a string region. This is
824 // undefined [C99 6.4.5p6], but we should still warn about it.
825 return state;
827 case MemRegion::SymbolicRegionKind:
828 case MemRegion::AllocaRegionKind:
829 case MemRegion::NonParamVarRegionKind:
830 case MemRegion::ParamVarRegionKind:
831 case MemRegion::FieldRegionKind:
832 case MemRegion::ObjCIvarRegionKind:
833 // These are the types we can currently track string lengths for.
834 break;
836 case MemRegion::ElementRegionKind:
837 // FIXME: Handle element regions by upper-bounding the parent region's
838 // string length.
839 return state;
841 default:
842 // Other regions (mostly non-data) can't have a reliable C string length.
843 // For now, just ignore the change.
844 // FIXME: These are rare but not impossible. We should output some kind of
845 // warning for things like strcpy((char[]){'a', 0}, "b");
846 return state;
849 if (strLength.isUnknown())
850 return state->remove<CStringLength>(MR);
852 return state->set<CStringLength>(MR, strLength);
855 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
856 ProgramStateRef &state,
857 const Expr *Ex,
858 const MemRegion *MR,
859 bool hypothetical) {
860 if (!hypothetical) {
861 // If there's a recorded length, go ahead and return it.
862 const SVal *Recorded = state->get<CStringLength>(MR);
863 if (Recorded)
864 return *Recorded;
867 // Otherwise, get a new symbol and update the state.
868 SValBuilder &svalBuilder = C.getSValBuilder();
869 QualType sizeTy = svalBuilder.getContext().getSizeType();
870 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
871 MR, Ex, sizeTy,
872 C.getLocationContext(),
873 C.blockCount());
875 if (!hypothetical) {
876 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
877 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
878 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
879 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
880 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
881 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
882 fourInt);
883 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
884 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
885 maxLength, sizeTy);
886 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
888 state = state->set<CStringLength>(MR, strLength);
891 return strLength;
894 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
895 const Expr *Ex, SVal Buf,
896 bool hypothetical) const {
897 const MemRegion *MR = Buf.getAsRegion();
898 if (!MR) {
899 // If we can't get a region, see if it's something we /know/ isn't a
900 // C string. In the context of locations, the only time we can issue such
901 // a warning is for labels.
902 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
903 if (Filter.CheckCStringNotNullTerm) {
904 SmallString<120> buf;
905 llvm::raw_svector_ostream os(buf);
906 assert(CurrentFunctionDescription);
907 os << "Argument to " << CurrentFunctionDescription
908 << " is the address of the label '" << Label->getLabel()->getName()
909 << "', which is not a null-terminated string";
911 emitNotCStringBug(C, state, Ex, os.str());
913 return UndefinedVal();
916 // If it's not a region and not a label, give up.
917 return UnknownVal();
920 // If we have a region, strip casts from it and see if we can figure out
921 // its length. For anything we can't figure out, just return UnknownVal.
922 MR = MR->StripCasts();
924 switch (MR->getKind()) {
925 case MemRegion::StringRegionKind: {
926 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
927 // so we can assume that the byte length is the correct C string length.
928 SValBuilder &svalBuilder = C.getSValBuilder();
929 QualType sizeTy = svalBuilder.getContext().getSizeType();
930 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
931 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
933 case MemRegion::NonParamVarRegionKind: {
934 // If we have a global constant with a string literal initializer,
935 // compute the initializer's length.
936 const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
937 if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
938 if (const Expr *Init = Decl->getInit()) {
939 if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
940 SValBuilder &SvalBuilder = C.getSValBuilder();
941 QualType SizeTy = SvalBuilder.getContext().getSizeType();
942 return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
946 [[fallthrough]];
948 case MemRegion::SymbolicRegionKind:
949 case MemRegion::AllocaRegionKind:
950 case MemRegion::ParamVarRegionKind:
951 case MemRegion::FieldRegionKind:
952 case MemRegion::ObjCIvarRegionKind:
953 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
954 case MemRegion::CompoundLiteralRegionKind:
955 // FIXME: Can we track this? Is it necessary?
956 return UnknownVal();
957 case MemRegion::ElementRegionKind:
958 // FIXME: How can we handle this? It's not good enough to subtract the
959 // offset from the base string length; consider "123\x00567" and &a[5].
960 return UnknownVal();
961 default:
962 // Other regions (mostly non-data) can't have a reliable C string length.
963 // In this case, an error is emitted and UndefinedVal is returned.
964 // The caller should always be prepared to handle this case.
965 if (Filter.CheckCStringNotNullTerm) {
966 SmallString<120> buf;
967 llvm::raw_svector_ostream os(buf);
969 assert(CurrentFunctionDescription);
970 os << "Argument to " << CurrentFunctionDescription << " is ";
972 if (SummarizeRegion(os, C.getASTContext(), MR))
973 os << ", which is not a null-terminated string";
974 else
975 os << "not a null-terminated string";
977 emitNotCStringBug(C, state, Ex, os.str());
979 return UndefinedVal();
983 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
984 ProgramStateRef &state, const Expr *expr, SVal val) const {
986 // Get the memory region pointed to by the val.
987 const MemRegion *bufRegion = val.getAsRegion();
988 if (!bufRegion)
989 return nullptr;
991 // Strip casts off the memory region.
992 bufRegion = bufRegion->StripCasts();
994 // Cast the memory region to a string region.
995 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
996 if (!strRegion)
997 return nullptr;
999 // Return the actual string in the string region.
1000 return strRegion->getStringLiteral();
1003 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1004 SVal BufVal, QualType BufTy,
1005 SVal LengthVal, QualType LengthTy) {
1006 // If we do not know that the buffer is long enough we return 'true'.
1007 // Otherwise the parent region of this field region would also get
1008 // invalidated, which would lead to warnings based on an unknown state.
1010 if (LengthVal.isUnknown())
1011 return false;
1013 // Originally copied from CheckBufferAccess and CheckLocation.
1014 SValBuilder &SB = C.getSValBuilder();
1015 ASTContext &Ctx = C.getASTContext();
1017 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1019 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1020 if (!Length)
1021 return true; // cf top comment.
1023 // Compute the offset of the last element to be accessed: size-1.
1024 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1025 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1026 if (Offset.isUnknown())
1027 return true; // cf top comment
1028 NonLoc LastOffset = Offset.castAs<NonLoc>();
1030 // Check that the first buffer is sufficiently long.
1031 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1032 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1033 if (!BufLoc)
1034 return true; // cf top comment.
1036 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1038 // Check for out of bound array element access.
1039 const MemRegion *R = BufEnd.getAsRegion();
1040 if (!R)
1041 return true; // cf top comment.
1043 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1044 if (!ER)
1045 return true; // cf top comment.
1047 // FIXME: Does this crash when a non-standard definition
1048 // of a library function is encountered?
1049 assert(ER->getValueType() == C.getASTContext().CharTy &&
1050 "isFirstBufInBound should only be called with char* ElementRegions");
1052 // Get the size of the array.
1053 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1054 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1056 // Get the index of the accessed element.
1057 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1059 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1061 return static_cast<bool>(StInBound);
1064 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1065 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1066 SVal SizeV, QualType SizeTy) {
1067 auto InvalidationTraitOperations =
1068 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1069 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1070 // If destination buffer is a field region and access is in bound, do
1071 // not invalidate its super region.
1072 if (MemRegion::FieldRegionKind == R->getKind() &&
1073 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1074 ITraits.setTrait(
1076 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1078 return false;
1081 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1084 ProgramStateRef
1085 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1086 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1087 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1088 const MemRegion *R) {
1089 return isa<FieldRegion>(R);
1092 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1095 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1096 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1097 auto InvalidationTraitOperations =
1098 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1099 if (MemRegion::FieldRegionKind == R->getKind())
1100 ITraits.setTrait(
1102 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1103 return false;
1106 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1109 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1110 ProgramStateRef S,
1111 const Expr *BufE,
1112 SVal BufV) {
1113 auto InvalidationTraitOperations =
1114 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1115 ITraits.setTrait(
1116 R->getBaseRegion(),
1117 RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1118 ITraits.setTrait(R,
1119 RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1120 return true;
1123 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1126 ProgramStateRef CStringChecker::invalidateBufferAux(
1127 CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1128 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1129 const MemRegion *)>
1130 InvalidationTraitOperations) {
1131 std::optional<Loc> L = V.getAs<Loc>();
1132 if (!L)
1133 return State;
1135 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1136 // some assumptions about the value that CFRefCount can't. Even so, it should
1137 // probably be refactored.
1138 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1139 const MemRegion *R = MR->getRegion()->StripCasts();
1141 // Are we dealing with an ElementRegion? If so, we should be invalidating
1142 // the super-region.
1143 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1144 R = ER->getSuperRegion();
1145 // FIXME: What about layers of ElementRegions?
1148 // Invalidate this region.
1149 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1150 RegionAndSymbolInvalidationTraits ITraits;
1151 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1153 return State->invalidateRegions(R, E, C.blockCount(), LCtx,
1154 CausesPointerEscape, nullptr, nullptr,
1155 &ITraits);
1158 // If we have a non-region value by chance, just remove the binding.
1159 // FIXME: is this necessary or correct? This handles the non-Region
1160 // cases. Is it ever valid to store to these?
1161 return State->killBinding(*L);
1164 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1165 const MemRegion *MR) {
1166 switch (MR->getKind()) {
1167 case MemRegion::FunctionCodeRegionKind: {
1168 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1169 os << "the address of the function '" << *FD << '\'';
1170 else
1171 os << "the address of a function";
1172 return true;
1174 case MemRegion::BlockCodeRegionKind:
1175 os << "block text";
1176 return true;
1177 case MemRegion::BlockDataRegionKind:
1178 os << "a block";
1179 return true;
1180 case MemRegion::CXXThisRegionKind:
1181 case MemRegion::CXXTempObjectRegionKind:
1182 os << "a C++ temp object of type "
1183 << cast<TypedValueRegion>(MR)->getValueType();
1184 return true;
1185 case MemRegion::NonParamVarRegionKind:
1186 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1187 return true;
1188 case MemRegion::ParamVarRegionKind:
1189 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1190 return true;
1191 case MemRegion::FieldRegionKind:
1192 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1193 return true;
1194 case MemRegion::ObjCIvarRegionKind:
1195 os << "an instance variable of type "
1196 << cast<TypedValueRegion>(MR)->getValueType();
1197 return true;
1198 default:
1199 return false;
1203 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1204 const Expr *Size, CheckerContext &C,
1205 ProgramStateRef &State) {
1206 SVal MemVal = C.getSVal(DstBuffer);
1207 SVal SizeVal = C.getSVal(Size);
1208 const MemRegion *MR = MemVal.getAsRegion();
1209 if (!MR)
1210 return false;
1212 // We're about to model memset by producing a "default binding" in the Store.
1213 // Our current implementation - RegionStore - doesn't support default bindings
1214 // that don't cover the whole base region. So we should first get the offset
1215 // and the base region to figure out whether the offset of buffer is 0.
1216 RegionOffset Offset = MR->getAsOffset();
1217 const MemRegion *BR = Offset.getRegion();
1219 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1220 if (!SizeNL)
1221 return false;
1223 SValBuilder &svalBuilder = C.getSValBuilder();
1224 ASTContext &Ctx = C.getASTContext();
1226 // void *memset(void *dest, int ch, size_t count);
1227 // For now we can only handle the case of offset is 0 and concrete char value.
1228 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1229 Offset.getOffset() == 0) {
1230 // Get the base region's size.
1231 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1233 ProgramStateRef StateWholeReg, StateNotWholeReg;
1234 std::tie(StateWholeReg, StateNotWholeReg) =
1235 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1237 // With the semantic of 'memset()', we should convert the CharVal to
1238 // unsigned char.
1239 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1241 ProgramStateRef StateNullChar, StateNonNullChar;
1242 std::tie(StateNullChar, StateNonNullChar) =
1243 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1245 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1246 !StateNonNullChar) {
1247 // If the 'memset()' acts on the whole region of destination buffer and
1248 // the value of the second argument of 'memset()' is zero, bind the second
1249 // argument's value to the destination buffer with 'default binding'.
1250 // FIXME: Since there is no perfect way to bind the non-zero character, we
1251 // can only deal with zero value here. In the future, we need to deal with
1252 // the binding of non-zero value in the case of whole region.
1253 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1254 C.getLocationContext());
1255 } else {
1256 // If the destination buffer's extent is not equal to the value of
1257 // third argument, just invalidate buffer.
1258 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1259 SizeVal, Size->getType());
1262 if (StateNullChar && !StateNonNullChar) {
1263 // If the value of the second argument of 'memset()' is zero, set the
1264 // string length of destination buffer to 0 directly.
1265 State = setCStringLength(State, MR,
1266 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1267 } else if (!StateNullChar && StateNonNullChar) {
1268 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1269 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1270 C.getLocationContext(), C.blockCount());
1272 // If the value of second argument is not zero, then the string length
1273 // is at least the size argument.
1274 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1275 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1277 State = setCStringLength(
1278 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1279 MR, NewStrLen);
1281 } else {
1282 // If the offset is not zero and char value is not concrete, we can do
1283 // nothing but invalidate the buffer.
1284 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1285 SizeVal, Size->getType());
1287 return true;
1290 //===----------------------------------------------------------------------===//
1291 // evaluation of individual function calls.
1292 //===----------------------------------------------------------------------===//
1294 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE,
1295 ProgramStateRef state, SizeArgExpr Size,
1296 DestinationArgExpr Dest,
1297 SourceArgExpr Source, bool Restricted,
1298 bool IsMempcpy, CharKind CK) const {
1299 CurrentFunctionDescription = "memory copy function";
1301 // See if the size argument is zero.
1302 const LocationContext *LCtx = C.getLocationContext();
1303 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1304 QualType sizeTy = Size.Expression->getType();
1306 ProgramStateRef stateZeroSize, stateNonZeroSize;
1307 std::tie(stateZeroSize, stateNonZeroSize) =
1308 assumeZero(C, state, sizeVal, sizeTy);
1310 // Get the value of the Dest.
1311 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1313 // If the size is zero, there won't be any actual memory access, so
1314 // just bind the return value to the destination buffer and return.
1315 if (stateZeroSize && !stateNonZeroSize) {
1316 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1317 C.addTransition(stateZeroSize);
1318 return;
1321 // If the size can be nonzero, we have to check the other arguments.
1322 if (stateNonZeroSize) {
1323 state = stateNonZeroSize;
1325 // Ensure the destination is not null. If it is NULL there will be a
1326 // NULL pointer dereference.
1327 state = checkNonNull(C, state, Dest, destVal);
1328 if (!state)
1329 return;
1331 // Get the value of the Src.
1332 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1334 // Ensure the source is not null. If it is NULL there will be a
1335 // NULL pointer dereference.
1336 state = checkNonNull(C, state, Source, srcVal);
1337 if (!state)
1338 return;
1340 // Ensure the accesses are valid and that the buffers do not overlap.
1341 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1342 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1344 if (Restricted)
1345 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1347 if (!state)
1348 return;
1350 // If this is mempcpy, get the byte after the last byte copied and
1351 // bind the expr.
1352 if (IsMempcpy) {
1353 // Get the byte after the last byte copied.
1354 SValBuilder &SvalBuilder = C.getSValBuilder();
1355 ASTContext &Ctx = SvalBuilder.getContext();
1356 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1357 SVal DestRegCharVal =
1358 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1359 SVal lastElement = C.getSValBuilder().evalBinOp(
1360 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1361 // If we don't know how much we copied, we can at least
1362 // conjure a return value for later.
1363 if (lastElement.isUnknown())
1364 lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1365 C.blockCount());
1367 // The byte after the last byte copied is the return value.
1368 state = state->BindExpr(CE, LCtx, lastElement);
1369 } else {
1370 // All other copies return the destination buffer.
1371 // (Well, bcopy() has a void return type, but this won't hurt.)
1372 state = state->BindExpr(CE, LCtx, destVal);
1375 // Invalidate the destination (regular invalidation without pointer-escaping
1376 // the address of the top-level region).
1377 // FIXME: Even if we can't perfectly model the copy, we should see if we
1378 // can use LazyCompoundVals to copy the source values into the destination.
1379 // This would probably remove any existing bindings past the end of the
1380 // copied region, but that's still an improvement over blank invalidation.
1381 state = invalidateDestinationBufferBySize(
1382 C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1383 Size.Expression->getType());
1385 // Invalidate the source (const-invalidation without const-pointer-escaping
1386 // the address of the top-level region).
1387 state = invalidateSourceBuffer(C, state, Source.Expression,
1388 C.getSVal(Source.Expression));
1390 C.addTransition(state);
1394 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE,
1395 CharKind CK) const {
1396 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1397 // The return value is the address of the destination buffer.
1398 DestinationArgExpr Dest = {{CE->getArg(0), 0}};
1399 SourceArgExpr Src = {{CE->getArg(1), 1}};
1400 SizeArgExpr Size = {{CE->getArg(2), 2}};
1402 ProgramStateRef State = C.getState();
1404 constexpr bool IsRestricted = true;
1405 constexpr bool IsMempcpy = false;
1406 evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1409 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE,
1410 CharKind CK) const {
1411 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1412 // The return value is a pointer to the byte following the last written byte.
1413 DestinationArgExpr Dest = {{CE->getArg(0), 0}};
1414 SourceArgExpr Src = {{CE->getArg(1), 1}};
1415 SizeArgExpr Size = {{CE->getArg(2), 2}};
1417 constexpr bool IsRestricted = true;
1418 constexpr bool IsMempcpy = true;
1419 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1420 CK);
1423 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE,
1424 CharKind CK) const {
1425 // void *memmove(void *dst, const void *src, size_t n);
1426 // The return value is the address of the destination buffer.
1427 DestinationArgExpr Dest = {{CE->getArg(0), 0}};
1428 SourceArgExpr Src = {{CE->getArg(1), 1}};
1429 SizeArgExpr Size = {{CE->getArg(2), 2}};
1431 constexpr bool IsRestricted = false;
1432 constexpr bool IsMempcpy = false;
1433 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1434 CK);
1437 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1438 // void bcopy(const void *src, void *dst, size_t n);
1439 SourceArgExpr Src{{CE->getArg(0), 0}};
1440 DestinationArgExpr Dest = {{CE->getArg(1), 1}};
1441 SizeArgExpr Size = {{CE->getArg(2), 2}};
1443 constexpr bool IsRestricted = false;
1444 constexpr bool IsMempcpy = false;
1445 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1446 CharKind::Regular);
1449 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE,
1450 CharKind CK) const {
1451 // int memcmp(const void *s1, const void *s2, size_t n);
1452 CurrentFunctionDescription = "memory comparison function";
1454 AnyArgExpr Left = {CE->getArg(0), 0};
1455 AnyArgExpr Right = {CE->getArg(1), 1};
1456 SizeArgExpr Size = {{CE->getArg(2), 2}};
1458 ProgramStateRef State = C.getState();
1459 SValBuilder &Builder = C.getSValBuilder();
1460 const LocationContext *LCtx = C.getLocationContext();
1462 // See if the size argument is zero.
1463 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1464 QualType sizeTy = Size.Expression->getType();
1466 ProgramStateRef stateZeroSize, stateNonZeroSize;
1467 std::tie(stateZeroSize, stateNonZeroSize) =
1468 assumeZero(C, State, sizeVal, sizeTy);
1470 // If the size can be zero, the result will be 0 in that case, and we don't
1471 // have to check either of the buffers.
1472 if (stateZeroSize) {
1473 State = stateZeroSize;
1474 State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType()));
1475 C.addTransition(State);
1478 // If the size can be nonzero, we have to check the other arguments.
1479 if (stateNonZeroSize) {
1480 State = stateNonZeroSize;
1481 // If we know the two buffers are the same, we know the result is 0.
1482 // First, get the two buffers' addresses. Another checker will have already
1483 // made sure they're not undefined.
1484 DefinedOrUnknownSVal LV =
1485 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1486 DefinedOrUnknownSVal RV =
1487 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1489 // See if they are the same.
1490 ProgramStateRef SameBuffer, NotSameBuffer;
1491 std::tie(SameBuffer, NotSameBuffer) =
1492 State->assume(Builder.evalEQ(State, LV, RV));
1494 // If the two arguments are the same buffer, we know the result is 0,
1495 // and we only need to check one size.
1496 if (SameBuffer && !NotSameBuffer) {
1497 State = SameBuffer;
1498 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1499 if (State) {
1500 State =
1501 SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType()));
1502 C.addTransition(State);
1504 return;
1507 // If the two arguments might be different buffers, we have to check
1508 // the size of both of them.
1509 assert(NotSameBuffer);
1510 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1511 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1512 if (State) {
1513 // The return value is the comparison result, which we don't know.
1514 SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1515 State = State->BindExpr(CE, LCtx, CmpV);
1516 C.addTransition(State);
1521 void CStringChecker::evalstrLength(CheckerContext &C,
1522 const CallExpr *CE) const {
1523 // size_t strlen(const char *s);
1524 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1527 void CStringChecker::evalstrnLength(CheckerContext &C,
1528 const CallExpr *CE) const {
1529 // size_t strnlen(const char *s, size_t maxlen);
1530 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1533 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1534 bool IsStrnlen) const {
1535 CurrentFunctionDescription = "string length function";
1536 ProgramStateRef state = C.getState();
1537 const LocationContext *LCtx = C.getLocationContext();
1539 if (IsStrnlen) {
1540 const Expr *maxlenExpr = CE->getArg(1);
1541 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1543 ProgramStateRef stateZeroSize, stateNonZeroSize;
1544 std::tie(stateZeroSize, stateNonZeroSize) =
1545 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1547 // If the size can be zero, the result will be 0 in that case, and we don't
1548 // have to check the string itself.
1549 if (stateZeroSize) {
1550 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1551 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1552 C.addTransition(stateZeroSize);
1555 // If the size is GUARANTEED to be zero, we're done!
1556 if (!stateNonZeroSize)
1557 return;
1559 // Otherwise, record the assumption that the size is nonzero.
1560 state = stateNonZeroSize;
1563 // Check that the string argument is non-null.
1564 AnyArgExpr Arg = {CE->getArg(0), 0};
1565 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1566 state = checkNonNull(C, state, Arg, ArgVal);
1568 if (!state)
1569 return;
1571 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1573 // If the argument isn't a valid C string, there's no valid state to
1574 // transition to.
1575 if (strLength.isUndef())
1576 return;
1578 DefinedOrUnknownSVal result = UnknownVal();
1580 // If the check is for strnlen() then bind the return value to no more than
1581 // the maxlen value.
1582 if (IsStrnlen) {
1583 QualType cmpTy = C.getSValBuilder().getConditionType();
1585 // It's a little unfortunate to be getting this again,
1586 // but it's not that expensive...
1587 const Expr *maxlenExpr = CE->getArg(1);
1588 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1590 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1591 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1593 if (strLengthNL && maxlenValNL) {
1594 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1596 // Check if the strLength is greater than the maxlen.
1597 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1598 C.getSValBuilder()
1599 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1600 .castAs<DefinedOrUnknownSVal>());
1602 if (stateStringTooLong && !stateStringNotTooLong) {
1603 // If the string is longer than maxlen, return maxlen.
1604 result = *maxlenValNL;
1605 } else if (stateStringNotTooLong && !stateStringTooLong) {
1606 // If the string is shorter than maxlen, return its length.
1607 result = *strLengthNL;
1611 if (result.isUnknown()) {
1612 // If we don't have enough information for a comparison, there's
1613 // no guarantee the full string length will actually be returned.
1614 // All we know is the return value is the min of the string length
1615 // and the limit. This is better than nothing.
1616 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1617 C.blockCount());
1618 NonLoc resultNL = result.castAs<NonLoc>();
1620 if (strLengthNL) {
1621 state = state->assume(C.getSValBuilder().evalBinOpNN(
1622 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1623 .castAs<DefinedOrUnknownSVal>(), true);
1626 if (maxlenValNL) {
1627 state = state->assume(C.getSValBuilder().evalBinOpNN(
1628 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1629 .castAs<DefinedOrUnknownSVal>(), true);
1633 } else {
1634 // This is a plain strlen(), not strnlen().
1635 result = strLength.castAs<DefinedOrUnknownSVal>();
1637 // If we don't know the length of the string, conjure a return
1638 // value, so it can be used in constraints, at least.
1639 if (result.isUnknown()) {
1640 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1641 C.blockCount());
1645 // Bind the return value.
1646 assert(!result.isUnknown() && "Should have conjured a value by now");
1647 state = state->BindExpr(CE, LCtx, result);
1648 C.addTransition(state);
1651 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1652 // char *strcpy(char *restrict dst, const char *restrict src);
1653 evalStrcpyCommon(C, CE,
1654 /* ReturnEnd = */ false,
1655 /* IsBounded = */ false,
1656 /* appendK = */ ConcatFnKind::none);
1659 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1660 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1661 evalStrcpyCommon(C, CE,
1662 /* ReturnEnd = */ false,
1663 /* IsBounded = */ true,
1664 /* appendK = */ ConcatFnKind::none);
1667 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1668 // char *stpcpy(char *restrict dst, const char *restrict src);
1669 evalStrcpyCommon(C, CE,
1670 /* ReturnEnd = */ true,
1671 /* IsBounded = */ false,
1672 /* appendK = */ ConcatFnKind::none);
1675 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const {
1676 // size_t strlcpy(char *dest, const char *src, size_t size);
1677 evalStrcpyCommon(C, CE,
1678 /* ReturnEnd = */ true,
1679 /* IsBounded = */ true,
1680 /* appendK = */ ConcatFnKind::none,
1681 /* returnPtr = */ false);
1684 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1685 // char *strcat(char *restrict s1, const char *restrict s2);
1686 evalStrcpyCommon(C, CE,
1687 /* ReturnEnd = */ false,
1688 /* IsBounded = */ false,
1689 /* appendK = */ ConcatFnKind::strcat);
1692 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1693 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1694 evalStrcpyCommon(C, CE,
1695 /* ReturnEnd = */ false,
1696 /* IsBounded = */ true,
1697 /* appendK = */ ConcatFnKind::strcat);
1700 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const {
1701 // size_t strlcat(char *dst, const char *src, size_t size);
1702 // It will append at most size - strlen(dst) - 1 bytes,
1703 // NULL-terminating the result.
1704 evalStrcpyCommon(C, CE,
1705 /* ReturnEnd = */ false,
1706 /* IsBounded = */ true,
1707 /* appendK = */ ConcatFnKind::strlcat,
1708 /* returnPtr = */ false);
1711 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1712 bool ReturnEnd, bool IsBounded,
1713 ConcatFnKind appendK,
1714 bool returnPtr) const {
1715 if (appendK == ConcatFnKind::none)
1716 CurrentFunctionDescription = "string copy function";
1717 else
1718 CurrentFunctionDescription = "string concatenation function";
1720 ProgramStateRef state = C.getState();
1721 const LocationContext *LCtx = C.getLocationContext();
1723 // Check that the destination is non-null.
1724 DestinationArgExpr Dst = {{CE->getArg(0), 0}};
1725 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1726 state = checkNonNull(C, state, Dst, DstVal);
1727 if (!state)
1728 return;
1730 // Check that the source is non-null.
1731 SourceArgExpr srcExpr = {{CE->getArg(1), 1}};
1732 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1733 state = checkNonNull(C, state, srcExpr, srcVal);
1734 if (!state)
1735 return;
1737 // Get the string length of the source.
1738 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1739 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1741 // Get the string length of the destination buffer.
1742 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1743 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1745 // If the source isn't a valid C string, give up.
1746 if (strLength.isUndef())
1747 return;
1749 SValBuilder &svalBuilder = C.getSValBuilder();
1750 QualType cmpTy = svalBuilder.getConditionType();
1751 QualType sizeTy = svalBuilder.getContext().getSizeType();
1753 // These two values allow checking two kinds of errors:
1754 // - actual overflows caused by a source that doesn't fit in the destination
1755 // - potential overflows caused by a bound that could exceed the destination
1756 SVal amountCopied = UnknownVal();
1757 SVal maxLastElementIndex = UnknownVal();
1758 const char *boundWarning = nullptr;
1760 // FIXME: Why do we choose the srcExpr if the access has no size?
1761 // Note that the 3rd argument of the call would be the size parameter.
1762 SizeArgExpr SrcExprAsSizeDummy = {
1763 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1764 state = CheckOverlap(
1765 C, state,
1766 (IsBounded ? SizeArgExpr{{CE->getArg(2), 2}} : SrcExprAsSizeDummy), Dst,
1767 srcExpr);
1769 if (!state)
1770 return;
1772 // If the function is strncpy, strncat, etc... it is bounded.
1773 if (IsBounded) {
1774 // Get the max number of characters to copy.
1775 SizeArgExpr lenExpr = {{CE->getArg(2), 2}};
1776 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1778 // Protect against misdeclared strncpy().
1779 lenVal =
1780 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1782 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1784 // If we know both values, we might be able to figure out how much
1785 // we're copying.
1786 if (strLengthNL && lenValNL) {
1787 switch (appendK) {
1788 case ConcatFnKind::none:
1789 case ConcatFnKind::strcat: {
1790 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1791 // Check if the max number to copy is less than the length of the src.
1792 // If the bound is equal to the source length, strncpy won't null-
1793 // terminate the result!
1794 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1795 svalBuilder
1796 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1797 .castAs<DefinedOrUnknownSVal>());
1799 if (stateSourceTooLong && !stateSourceNotTooLong) {
1800 // Max number to copy is less than the length of the src, so the
1801 // actual strLength copied is the max number arg.
1802 state = stateSourceTooLong;
1803 amountCopied = lenVal;
1805 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1806 // The source buffer entirely fits in the bound.
1807 state = stateSourceNotTooLong;
1808 amountCopied = strLength;
1810 break;
1812 case ConcatFnKind::strlcat:
1813 if (!dstStrLengthNL)
1814 return;
1816 // amountCopied = min (size - dstLen - 1 , srcLen)
1817 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1818 *dstStrLengthNL, sizeTy);
1819 if (!isa<NonLoc>(freeSpace))
1820 return;
1821 freeSpace =
1822 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1823 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1824 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1826 // While unlikely, it is possible that the subtraction is
1827 // too complex to compute, let's check whether it succeeded.
1828 if (!freeSpaceNL)
1829 return;
1830 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1831 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1833 ProgramStateRef TrueState, FalseState;
1834 std::tie(TrueState, FalseState) =
1835 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1837 // srcStrLength <= size - dstStrLength -1
1838 if (TrueState && !FalseState) {
1839 amountCopied = strLength;
1842 // srcStrLength > size - dstStrLength -1
1843 if (!TrueState && FalseState) {
1844 amountCopied = freeSpace;
1847 if (TrueState && FalseState)
1848 amountCopied = UnknownVal();
1849 break;
1852 // We still want to know if the bound is known to be too large.
1853 if (lenValNL) {
1854 switch (appendK) {
1855 case ConcatFnKind::strcat:
1856 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1858 // Get the string length of the destination. If the destination is
1859 // memory that can't have a string length, we shouldn't be copying
1860 // into it anyway.
1861 if (dstStrLength.isUndef())
1862 return;
1864 if (dstStrLengthNL) {
1865 maxLastElementIndex = svalBuilder.evalBinOpNN(
1866 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1868 boundWarning = "Size argument is greater than the free space in the "
1869 "destination buffer";
1871 break;
1872 case ConcatFnKind::none:
1873 case ConcatFnKind::strlcat:
1874 // For strncpy and strlcat, this is just checking
1875 // that lenVal <= sizeof(dst).
1876 // (Yes, strncpy and strncat differ in how they treat termination.
1877 // strncat ALWAYS terminates, but strncpy doesn't.)
1879 // We need a special case for when the copy size is zero, in which
1880 // case strncpy will do no work at all. Our bounds check uses n-1
1881 // as the last element accessed, so n == 0 is problematic.
1882 ProgramStateRef StateZeroSize, StateNonZeroSize;
1883 std::tie(StateZeroSize, StateNonZeroSize) =
1884 assumeZero(C, state, *lenValNL, sizeTy);
1886 // If the size is known to be zero, we're done.
1887 if (StateZeroSize && !StateNonZeroSize) {
1888 if (returnPtr) {
1889 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1890 } else {
1891 if (appendK == ConcatFnKind::none) {
1892 // strlcpy returns strlen(src)
1893 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength);
1894 } else {
1895 // strlcat returns strlen(src) + strlen(dst)
1896 SVal retSize = svalBuilder.evalBinOp(
1897 state, BO_Add, strLength, dstStrLength, sizeTy);
1898 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize);
1901 C.addTransition(StateZeroSize);
1902 return;
1905 // Otherwise, go ahead and figure out the last element we'll touch.
1906 // We don't record the non-zero assumption here because we can't
1907 // be sure. We won't warn on a possible zero.
1908 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1909 maxLastElementIndex =
1910 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
1911 boundWarning = "Size argument is greater than the length of the "
1912 "destination buffer";
1913 break;
1916 } else {
1917 // The function isn't bounded. The amount copied should match the length
1918 // of the source buffer.
1919 amountCopied = strLength;
1922 assert(state);
1924 // This represents the number of characters copied into the destination
1925 // buffer. (It may not actually be the strlen if the destination buffer
1926 // is not terminated.)
1927 SVal finalStrLength = UnknownVal();
1928 SVal strlRetVal = UnknownVal();
1930 if (appendK == ConcatFnKind::none && !returnPtr) {
1931 // strlcpy returns the sizeof(src)
1932 strlRetVal = strLength;
1935 // If this is an appending function (strcat, strncat...) then set the
1936 // string length to strlen(src) + strlen(dst) since the buffer will
1937 // ultimately contain both.
1938 if (appendK != ConcatFnKind::none) {
1939 // Get the string length of the destination. If the destination is memory
1940 // that can't have a string length, we shouldn't be copying into it anyway.
1941 if (dstStrLength.isUndef())
1942 return;
1944 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
1945 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
1946 *dstStrLengthNL, sizeTy);
1949 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
1951 // If we know both string lengths, we might know the final string length.
1952 if (amountCopiedNL && dstStrLengthNL) {
1953 // Make sure the two lengths together don't overflow a size_t.
1954 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
1955 if (!state)
1956 return;
1958 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
1959 *dstStrLengthNL, sizeTy);
1962 // If we couldn't get a single value for the final string length,
1963 // we can at least bound it by the individual lengths.
1964 if (finalStrLength.isUnknown()) {
1965 // Try to get a "hypothetical" string length symbol, which we can later
1966 // set as a real value if that turns out to be the case.
1967 finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1968 assert(!finalStrLength.isUndef());
1970 if (std::optional<NonLoc> finalStrLengthNL =
1971 finalStrLength.getAs<NonLoc>()) {
1972 if (amountCopiedNL && appendK == ConcatFnKind::none) {
1973 // we overwrite dst string with the src
1974 // finalStrLength >= srcStrLength
1975 SVal sourceInResult = svalBuilder.evalBinOpNN(
1976 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
1977 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1978 true);
1979 if (!state)
1980 return;
1983 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
1984 // we extend the dst string with the src
1985 // finalStrLength >= dstStrLength
1986 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1987 *finalStrLengthNL,
1988 *dstStrLengthNL,
1989 cmpTy);
1990 state =
1991 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1992 if (!state)
1993 return;
1998 } else {
1999 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2000 // the final string length will match the input string length.
2001 finalStrLength = amountCopied;
2004 SVal Result;
2006 if (returnPtr) {
2007 // The final result of the function will either be a pointer past the last
2008 // copied element, or a pointer to the start of the destination buffer.
2009 Result = (ReturnEnd ? UnknownVal() : DstVal);
2010 } else {
2011 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2012 //strlcpy, strlcat
2013 Result = strlRetVal;
2014 else
2015 Result = finalStrLength;
2018 assert(state);
2020 // If the destination is a MemRegion, try to check for a buffer overflow and
2021 // record the new string length.
2022 if (std::optional<loc::MemRegionVal> dstRegVal =
2023 DstVal.getAs<loc::MemRegionVal>()) {
2024 QualType ptrTy = Dst.Expression->getType();
2026 // If we have an exact value on a bounded copy, use that to check for
2027 // overflows, rather than our estimate about how much is actually copied.
2028 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2029 SVal maxLastElement =
2030 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2032 // Check if the first byte of the destination is writable.
2033 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2034 if (!state)
2035 return;
2036 // Check if the last byte of the destination is writable.
2037 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2038 if (!state)
2039 return;
2042 // Then, if the final length is known...
2043 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2044 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2045 *knownStrLength, ptrTy);
2047 // ...and we haven't checked the bound, we'll check the actual copy.
2048 if (!boundWarning) {
2049 // Check if the first byte of the destination is writable.
2050 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2051 if (!state)
2052 return;
2053 // Check if the last byte of the destination is writable.
2054 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2055 if (!state)
2056 return;
2059 // If this is a stpcpy-style copy, the last element is the return value.
2060 if (returnPtr && ReturnEnd)
2061 Result = lastElement;
2064 // Invalidate the destination (regular invalidation without pointer-escaping
2065 // the address of the top-level region). This must happen before we set the
2066 // C string length because invalidation will clear the length.
2067 // FIXME: Even if we can't perfectly model the copy, we should see if we
2068 // can use LazyCompoundVals to copy the source values into the destination.
2069 // This would probably remove any existing bindings past the end of the
2070 // string, but that's still an improvement over blank invalidation.
2071 state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2072 *dstRegVal, amountCopied,
2073 C.getASTContext().getSizeType());
2075 // Invalidate the source (const-invalidation without const-pointer-escaping
2076 // the address of the top-level region).
2077 state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
2079 // Set the C string length of the destination, if we know it.
2080 if (IsBounded && (appendK == ConcatFnKind::none)) {
2081 // strncpy is annoying in that it doesn't guarantee to null-terminate
2082 // the result string. If the original string didn't fit entirely inside
2083 // the bound (including the null-terminator), we don't know how long the
2084 // result is.
2085 if (amountCopied != strLength)
2086 finalStrLength = UnknownVal();
2088 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2091 assert(state);
2093 if (returnPtr) {
2094 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2095 // overflow, we still need a result. Conjure a return value.
2096 if (ReturnEnd && Result.isUnknown()) {
2097 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2100 // Set the return value.
2101 state = state->BindExpr(CE, LCtx, Result);
2102 C.addTransition(state);
2105 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
2106 //int strcmp(const char *s1, const char *s2);
2107 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false);
2110 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
2111 //int strncmp(const char *s1, const char *s2, size_t n);
2112 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false);
2115 void CStringChecker::evalStrcasecmp(CheckerContext &C,
2116 const CallExpr *CE) const {
2117 //int strcasecmp(const char *s1, const char *s2);
2118 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true);
2121 void CStringChecker::evalStrncasecmp(CheckerContext &C,
2122 const CallExpr *CE) const {
2123 //int strncasecmp(const char *s1, const char *s2, size_t n);
2124 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true);
2127 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
2128 bool IsBounded, bool IgnoreCase) const {
2129 CurrentFunctionDescription = "string comparison function";
2130 ProgramStateRef state = C.getState();
2131 const LocationContext *LCtx = C.getLocationContext();
2133 // Check that the first string is non-null
2134 AnyArgExpr Left = {CE->getArg(0), 0};
2135 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2136 state = checkNonNull(C, state, Left, LeftVal);
2137 if (!state)
2138 return;
2140 // Check that the second string is non-null.
2141 AnyArgExpr Right = {CE->getArg(1), 1};
2142 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2143 state = checkNonNull(C, state, Right, RightVal);
2144 if (!state)
2145 return;
2147 // Get the string length of the first string or give up.
2148 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2149 if (LeftLength.isUndef())
2150 return;
2152 // Get the string length of the second string or give up.
2153 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2154 if (RightLength.isUndef())
2155 return;
2157 // If we know the two buffers are the same, we know the result is 0.
2158 // First, get the two buffers' addresses. Another checker will have already
2159 // made sure they're not undefined.
2160 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2161 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2163 // See if they are the same.
2164 SValBuilder &svalBuilder = C.getSValBuilder();
2165 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2166 ProgramStateRef StSameBuf, StNotSameBuf;
2167 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2169 // If the two arguments might be the same buffer, we know the result is 0,
2170 // and we only need to check one size.
2171 if (StSameBuf) {
2172 StSameBuf = StSameBuf->BindExpr(CE, LCtx,
2173 svalBuilder.makeZeroVal(CE->getType()));
2174 C.addTransition(StSameBuf);
2176 // If the two arguments are GUARANTEED to be the same, we're done!
2177 if (!StNotSameBuf)
2178 return;
2181 assert(StNotSameBuf);
2182 state = StNotSameBuf;
2184 // At this point we can go about comparing the two buffers.
2185 // For now, we only do this if they're both known string literals.
2187 // Attempt to extract string literals from both expressions.
2188 const StringLiteral *LeftStrLiteral =
2189 getCStringLiteral(C, state, Left.Expression, LeftVal);
2190 const StringLiteral *RightStrLiteral =
2191 getCStringLiteral(C, state, Right.Expression, RightVal);
2192 bool canComputeResult = false;
2193 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
2194 C.blockCount());
2196 if (LeftStrLiteral && RightStrLiteral) {
2197 StringRef LeftStrRef = LeftStrLiteral->getString();
2198 StringRef RightStrRef = RightStrLiteral->getString();
2200 if (IsBounded) {
2201 // Get the max number of characters to compare.
2202 const Expr *lenExpr = CE->getArg(2);
2203 SVal lenVal = state->getSVal(lenExpr, LCtx);
2205 // If the length is known, we can get the right substrings.
2206 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2207 // Create substrings of each to compare the prefix.
2208 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2209 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2210 canComputeResult = true;
2212 } else {
2213 // This is a normal, unbounded strcmp.
2214 canComputeResult = true;
2217 if (canComputeResult) {
2218 // Real strcmp stops at null characters.
2219 size_t s1Term = LeftStrRef.find('\0');
2220 if (s1Term != StringRef::npos)
2221 LeftStrRef = LeftStrRef.substr(0, s1Term);
2223 size_t s2Term = RightStrRef.find('\0');
2224 if (s2Term != StringRef::npos)
2225 RightStrRef = RightStrRef.substr(0, s2Term);
2227 // Use StringRef's comparison methods to compute the actual result.
2228 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2229 : LeftStrRef.compare(RightStrRef);
2231 // The strcmp function returns an integer greater than, equal to, or less
2232 // than zero, [c11, p7.24.4.2].
2233 if (compareRes == 0) {
2234 resultVal = svalBuilder.makeIntVal(compareRes, CE->getType());
2236 else {
2237 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType());
2238 // Constrain strcmp's result range based on the result of StringRef's
2239 // comparison methods.
2240 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2241 SVal compareWithZero =
2242 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2243 svalBuilder.getConditionType());
2244 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2245 state = state->assume(compareWithZeroVal, true);
2250 state = state->BindExpr(CE, LCtx, resultVal);
2252 // Record this as a possible path.
2253 C.addTransition(state);
2256 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
2257 // char *strsep(char **stringp, const char *delim);
2258 // Verify whether the search string parameter matches the return type.
2259 SourceArgExpr SearchStrPtr = {{CE->getArg(0), 0}};
2261 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2262 if (CharPtrTy.isNull() ||
2263 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
2264 return;
2266 CurrentFunctionDescription = "strsep()";
2267 ProgramStateRef State = C.getState();
2268 const LocationContext *LCtx = C.getLocationContext();
2270 // Check that the search string pointer is non-null (though it may point to
2271 // a null string).
2272 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2273 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2274 if (!State)
2275 return;
2277 // Check that the delimiter string is non-null.
2278 AnyArgExpr DelimStr = {CE->getArg(1), 1};
2279 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2280 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2281 if (!State)
2282 return;
2284 SValBuilder &SVB = C.getSValBuilder();
2285 SVal Result;
2286 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2287 // Get the current value of the search string pointer, as a char*.
2288 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2290 // Invalidate the search string, representing the change of one delimiter
2291 // character to NUL.
2292 // As the replacement never overflows, do not invalidate its super region.
2293 State = invalidateDestinationBufferNeverOverflows(
2294 C, State, SearchStrPtr.Expression, Result);
2296 // Overwrite the search string pointer. The new value is either an address
2297 // further along in the same string, or NULL if there are no more tokens.
2298 State = State->bindLoc(*SearchStrLoc,
2299 SVB.conjureSymbolVal(getTag(),
2301 LCtx,
2302 CharPtrTy,
2303 C.blockCount()),
2304 LCtx);
2305 } else {
2306 assert(SearchStrVal.isUnknown());
2307 // Conjure a symbolic value. It's the best we can do.
2308 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2311 // Set the return value, and finish.
2312 State = State->BindExpr(CE, LCtx, Result);
2313 C.addTransition(State);
2316 // These should probably be moved into a C++ standard library checker.
2317 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const {
2318 evalStdCopyCommon(C, CE);
2321 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2322 const CallExpr *CE) const {
2323 evalStdCopyCommon(C, CE);
2326 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2327 const CallExpr *CE) const {
2328 if (!CE->getArg(2)->getType()->isPointerType())
2329 return;
2331 ProgramStateRef State = C.getState();
2333 const LocationContext *LCtx = C.getLocationContext();
2335 // template <class _InputIterator, class _OutputIterator>
2336 // _OutputIterator
2337 // copy(_InputIterator __first, _InputIterator __last,
2338 // _OutputIterator __result)
2340 // Invalidate the destination buffer
2341 const Expr *Dst = CE->getArg(2);
2342 SVal DstVal = State->getSVal(Dst, LCtx);
2343 // FIXME: As we do not know how many items are copied, we also invalidate the
2344 // super region containing the target location.
2345 State =
2346 invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
2348 SValBuilder &SVB = C.getSValBuilder();
2350 SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2351 State = State->BindExpr(CE, LCtx, ResultVal);
2353 C.addTransition(State);
2356 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const {
2357 // void *memset(void *s, int c, size_t n);
2358 CurrentFunctionDescription = "memory set function";
2360 DestinationArgExpr Buffer = {{CE->getArg(0), 0}};
2361 AnyArgExpr CharE = {CE->getArg(1), 1};
2362 SizeArgExpr Size = {{CE->getArg(2), 2}};
2364 ProgramStateRef State = C.getState();
2366 // See if the size argument is zero.
2367 const LocationContext *LCtx = C.getLocationContext();
2368 SVal SizeVal = C.getSVal(Size.Expression);
2369 QualType SizeTy = Size.Expression->getType();
2371 ProgramStateRef ZeroSize, NonZeroSize;
2372 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2374 // Get the value of the memory area.
2375 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2377 // If the size is zero, there won't be any actual memory access, so
2378 // just bind the return value to the buffer and return.
2379 if (ZeroSize && !NonZeroSize) {
2380 ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal);
2381 C.addTransition(ZeroSize);
2382 return;
2385 // Ensure the memory area is not null.
2386 // If it is NULL there will be a NULL pointer dereference.
2387 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2388 if (!State)
2389 return;
2391 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2392 if (!State)
2393 return;
2395 // According to the values of the arguments, bind the value of the second
2396 // argument to the destination buffer and set string length, or just
2397 // invalidate the destination buffer.
2398 if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2399 Size.Expression, C, State))
2400 return;
2402 State = State->BindExpr(CE, LCtx, BufferPtrVal);
2403 C.addTransition(State);
2406 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const {
2407 CurrentFunctionDescription = "memory clearance function";
2409 DestinationArgExpr Buffer = {{CE->getArg(0), 0}};
2410 SizeArgExpr Size = {{CE->getArg(1), 1}};
2411 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2413 ProgramStateRef State = C.getState();
2415 // See if the size argument is zero.
2416 SVal SizeVal = C.getSVal(Size.Expression);
2417 QualType SizeTy = Size.Expression->getType();
2419 ProgramStateRef StateZeroSize, StateNonZeroSize;
2420 std::tie(StateZeroSize, StateNonZeroSize) =
2421 assumeZero(C, State, SizeVal, SizeTy);
2423 // If the size is zero, there won't be any actual memory access,
2424 // In this case we just return.
2425 if (StateZeroSize && !StateNonZeroSize) {
2426 C.addTransition(StateZeroSize);
2427 return;
2430 // Get the value of the memory area.
2431 SVal MemVal = C.getSVal(Buffer.Expression);
2433 // Ensure the memory area is not null.
2434 // If it is NULL there will be a NULL pointer dereference.
2435 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2436 if (!State)
2437 return;
2439 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2440 if (!State)
2441 return;
2443 if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2444 return;
2446 C.addTransition(State);
2449 void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const {
2450 CurrentFunctionDescription = "'sprintf'";
2451 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk;
2452 evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI);
2455 void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const {
2456 CurrentFunctionDescription = "'snprintf'";
2457 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk;
2458 evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI);
2461 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE,
2462 bool IsBounded, bool IsBuiltin) const {
2463 ProgramStateRef State = C.getState();
2464 DestinationArgExpr Dest = {{CE->getArg(0), 0}};
2466 const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams();
2467 assert(CE->getNumArgs() >= NumParams);
2469 const auto AllArguments =
2470 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2471 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2473 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2474 // We consider only string buffers
2475 if (const QualType type = ArgExpr->getType();
2476 !type->isAnyPointerType() ||
2477 !type->getPointeeType()->isAnyCharacterType())
2478 continue;
2479 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2481 // Ensure the buffers do not overlap.
2482 SizeArgExpr SrcExprAsSizeDummy = {
2483 {Source.Expression, Source.ArgumentIndex}};
2484 State = CheckOverlap(
2485 C, State,
2486 (IsBounded ? SizeArgExpr{{CE->getArg(1), 1}} : SrcExprAsSizeDummy),
2487 Dest, Source);
2488 if (!State)
2489 return;
2492 C.addTransition(State);
2495 //===----------------------------------------------------------------------===//
2496 // The driver method, and other Checker callbacks.
2497 //===----------------------------------------------------------------------===//
2499 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2500 CheckerContext &C) const {
2501 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2502 if (!CE)
2503 return nullptr;
2505 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2506 if (!FD)
2507 return nullptr;
2509 if (StdCopy.matches(Call))
2510 return &CStringChecker::evalStdCopy;
2511 if (StdCopyBackward.matches(Call))
2512 return &CStringChecker::evalStdCopyBackward;
2514 // Pro-actively check that argument types are safe to do arithmetic upon.
2515 // We do not want to crash if someone accidentally passes a structure
2516 // into, say, a C++ overload of any of these functions. We could not check
2517 // that for std::copy because they may have arguments of other types.
2518 for (auto I : CE->arguments()) {
2519 QualType T = I->getType();
2520 if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2521 return nullptr;
2524 const FnCheck *Callback = Callbacks.lookup(Call);
2525 if (Callback)
2526 return *Callback;
2528 return nullptr;
2531 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2532 FnCheck Callback = identifyCall(Call, C);
2534 // If the callee isn't a string function, let another checker handle it.
2535 if (!Callback)
2536 return false;
2538 // Check and evaluate the call.
2539 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2540 Callback(this, C, CE);
2542 // If the evaluate call resulted in no change, chain to the next eval call
2543 // handler.
2544 // Note, the custom CString evaluation calls assume that basic safety
2545 // properties are held. However, if the user chooses to turn off some of these
2546 // checks, we ignore the issues and leave the call evaluation to a generic
2547 // handler.
2548 return C.isDifferent();
2551 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2552 // Record string length for char a[] = "abc";
2553 ProgramStateRef state = C.getState();
2555 for (const auto *I : DS->decls()) {
2556 const VarDecl *D = dyn_cast<VarDecl>(I);
2557 if (!D)
2558 continue;
2560 // FIXME: Handle array fields of structs.
2561 if (!D->getType()->isArrayType())
2562 continue;
2564 const Expr *Init = D->getInit();
2565 if (!Init)
2566 continue;
2567 if (!isa<StringLiteral>(Init))
2568 continue;
2570 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2571 const MemRegion *MR = VarLoc.getAsRegion();
2572 if (!MR)
2573 continue;
2575 SVal StrVal = C.getSVal(Init);
2576 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2577 DefinedOrUnknownSVal strLength =
2578 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2580 state = state->set<CStringLength>(MR, strLength);
2583 C.addTransition(state);
2586 ProgramStateRef
2587 CStringChecker::checkRegionChanges(ProgramStateRef state,
2588 const InvalidatedSymbols *,
2589 ArrayRef<const MemRegion *> ExplicitRegions,
2590 ArrayRef<const MemRegion *> Regions,
2591 const LocationContext *LCtx,
2592 const CallEvent *Call) const {
2593 CStringLengthTy Entries = state->get<CStringLength>();
2594 if (Entries.isEmpty())
2595 return state;
2597 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2598 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2600 // First build sets for the changed regions and their super-regions.
2601 for (const MemRegion *MR : Regions) {
2602 Invalidated.insert(MR);
2604 SuperRegions.insert(MR);
2605 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2606 MR = SR->getSuperRegion();
2607 SuperRegions.insert(MR);
2611 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2613 // Then loop over the entries in the current state.
2614 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2615 // Is this entry for a super-region of a changed region?
2616 if (SuperRegions.count(MR)) {
2617 Entries = F.remove(Entries, MR);
2618 continue;
2621 // Is this entry for a sub-region of a changed region?
2622 const MemRegion *Super = MR;
2623 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2624 Super = SR->getSuperRegion();
2625 if (Invalidated.count(Super)) {
2626 Entries = F.remove(Entries, MR);
2627 break;
2632 return state->set<CStringLength>(Entries);
2635 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2636 SymbolReaper &SR) const {
2637 // Mark all symbols in our string length map as valid.
2638 CStringLengthTy Entries = state->get<CStringLength>();
2640 for (SVal Len : llvm::make_second_range(Entries)) {
2641 for (SymbolRef Sym : Len.symbols())
2642 SR.markInUse(Sym);
2646 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2647 CheckerContext &C) const {
2648 ProgramStateRef state = C.getState();
2649 CStringLengthTy Entries = state->get<CStringLength>();
2650 if (Entries.isEmpty())
2651 return;
2653 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2654 for (auto [Reg, Len] : Entries) {
2655 if (SymbolRef Sym = Len.getAsSymbol()) {
2656 if (SR.isDead(Sym))
2657 Entries = F.remove(Entries, Reg);
2661 state = state->set<CStringLength>(Entries);
2662 C.addTransition(state);
2665 void ento::registerCStringModeling(CheckerManager &Mgr) {
2666 Mgr.registerChecker<CStringChecker>();
2669 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2670 return true;
2673 #define REGISTER_CHECKER(name) \
2674 void ento::register##name(CheckerManager &mgr) { \
2675 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \
2676 checker->Filter.Check##name = true; \
2677 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \
2680 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2682 REGISTER_CHECKER(CStringNullArg)
2683 REGISTER_CHECKER(CStringOutOfBounds)
2684 REGISTER_CHECKER(CStringBufferOverlap)
2685 REGISTER_CHECKER(CStringNotNullTerm)
2686 REGISTER_CHECKER(CStringUninitializedRead)