1 //== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // An AST checker that looks for common pitfalls when using C string APIs.
10 // - Identifies erroneous patterns in the last argument to strncat - the number
13 //===----------------------------------------------------------------------===//
14 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/OperationKinds.h"
17 #include "clang/AST/StmtVisitor.h"
18 #include "clang/Analysis/AnalysisDeclContext.h"
19 #include "clang/Basic/TargetInfo.h"
20 #include "clang/Basic/TypeTraits.h"
21 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
22 #include "clang/StaticAnalyzer/Core/Checker.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Support/raw_ostream.h"
28 using namespace clang
;
32 class WalkAST
: public StmtVisitor
<WalkAST
> {
33 const CheckerBase
*Checker
;
35 AnalysisDeclContext
* AC
;
37 /// Check if two expressions refer to the same declaration.
38 bool sameDecl(const Expr
*A1
, const Expr
*A2
) {
39 if (const auto *D1
= dyn_cast
<DeclRefExpr
>(A1
->IgnoreParenCasts()))
40 if (const auto *D2
= dyn_cast
<DeclRefExpr
>(A2
->IgnoreParenCasts()))
41 return D1
->getDecl() == D2
->getDecl();
45 /// Check if the expression E is a sizeof(WithArg).
46 bool isSizeof(const Expr
*E
, const Expr
*WithArg
) {
47 if (const auto *UE
= dyn_cast
<UnaryExprOrTypeTraitExpr
>(E
))
48 if (UE
->getKind() == UETT_SizeOf
&& !UE
->isArgumentType())
49 return sameDecl(UE
->getArgumentExpr(), WithArg
);
53 /// Check if the expression E is a strlen(WithArg).
54 bool isStrlen(const Expr
*E
, const Expr
*WithArg
) {
55 if (const auto *CE
= dyn_cast
<CallExpr
>(E
)) {
56 const FunctionDecl
*FD
= CE
->getDirectCallee();
59 return (CheckerContext::isCLibraryFunction(FD
, "strlen") &&
60 sameDecl(CE
->getArg(0), WithArg
));
65 /// Check if the expression is an integer literal with value 1.
66 bool isOne(const Expr
*E
) {
67 if (const auto *IL
= dyn_cast
<IntegerLiteral
>(E
))
68 return (IL
->getValue().isIntN(1));
72 StringRef
getPrintableName(const Expr
*E
) {
73 if (const auto *D
= dyn_cast
<DeclRefExpr
>(E
->IgnoreParenCasts()))
74 return D
->getDecl()->getName();
78 /// Identify erroneous patterns in the last argument to strncat - the number
80 bool containsBadStrncatPattern(const CallExpr
*CE
);
82 /// Identify erroneous patterns in the last argument to strlcpy - the number
84 /// The bad pattern checked is when the size is known
85 /// to be larger than the destination can handle.
88 /// strlcpy(dst, "abcd", sizeof("abcd") - 1);
89 /// strlcpy(dst, "abcd", 4);
90 /// strlcpy(dst + 3, "abcd", 2);
91 /// strlcpy(dst, "abcd", cpy);
92 /// Identify erroneous patterns in the last argument to strlcat - the number
94 /// The bad pattern checked is when the last argument is basically
95 /// pointing to the destination buffer size or argument larger or
98 /// strlcat(dst, src2, sizeof(dst));
99 /// strlcat(dst, src2, 2);
100 /// strlcat(dst, src2, 10);
101 bool containsBadStrlcpyStrlcatPattern(const CallExpr
*CE
);
104 WalkAST(const CheckerBase
*Checker
, BugReporter
&BR
, AnalysisDeclContext
*AC
)
105 : Checker(Checker
), BR(BR
), AC(AC
) {}
107 // Statement visitor methods.
108 void VisitChildren(Stmt
*S
);
109 void VisitStmt(Stmt
*S
) {
112 void VisitCallExpr(CallExpr
*CE
);
114 } // end anonymous namespace
116 // The correct size argument should look like following:
117 // strncat(dst, src, sizeof(dst) - strlen(dest) - 1);
118 // We look for the following anti-patterns:
119 // - strncat(dst, src, sizeof(dst) - strlen(dst));
120 // - strncat(dst, src, sizeof(dst) - 1);
121 // - strncat(dst, src, sizeof(dst));
122 bool WalkAST::containsBadStrncatPattern(const CallExpr
*CE
) {
123 if (CE
->getNumArgs() != 3)
125 const Expr
*DstArg
= CE
->getArg(0);
126 const Expr
*SrcArg
= CE
->getArg(1);
127 const Expr
*LenArg
= CE
->getArg(2);
129 // Identify wrong size expressions, which are commonly used instead.
130 if (const auto *BE
= dyn_cast
<BinaryOperator
>(LenArg
->IgnoreParenCasts())) {
131 // - sizeof(dst) - strlen(dst)
132 if (BE
->getOpcode() == BO_Sub
) {
133 const Expr
*L
= BE
->getLHS();
134 const Expr
*R
= BE
->getRHS();
135 if (isSizeof(L
, DstArg
) && isStrlen(R
, DstArg
))
139 if (isSizeof(L
, DstArg
) && isOne(R
->IgnoreParenCasts()))
144 if (isSizeof(LenArg
, DstArg
))
148 if (isSizeof(LenArg
, SrcArg
))
153 bool WalkAST::containsBadStrlcpyStrlcatPattern(const CallExpr
*CE
) {
154 if (CE
->getNumArgs() != 3)
156 const Expr
*DstArg
= CE
->getArg(0);
157 const Expr
*LenArg
= CE
->getArg(2);
159 const auto *DstArgDRE
= dyn_cast
<DeclRefExpr
>(DstArg
->IgnoreParenImpCasts());
160 const auto *LenArgDRE
=
161 dyn_cast
<DeclRefExpr
>(LenArg
->IgnoreParenLValueCasts());
163 if (isSizeof(LenArg
, DstArg
))
166 // - size_t dstlen = sizeof(dst)
168 const auto *LenArgVal
= dyn_cast
<VarDecl
>(LenArgDRE
->getDecl());
169 // If it's an EnumConstantDecl instead, then we're missing out on something.
171 assert(isa
<EnumConstantDecl
>(LenArgDRE
->getDecl()));
174 if (LenArgVal
->getInit())
175 LenArg
= LenArgVal
->getInit();
179 // We try to figure out if the last argument is possibly longer
180 // than the destination can possibly handle if its size can be defined.
181 if (const auto *IL
= dyn_cast
<IntegerLiteral
>(LenArg
->IgnoreParenImpCasts())) {
182 uint64_t ILRawVal
= IL
->getValue().getZExtValue();
184 // Case when there is pointer arithmetic on the destination buffer
185 // especially when we offset from the base decreasing the
186 // buffer length accordingly.
189 dyn_cast
<BinaryOperator
>(DstArg
->IgnoreParenImpCasts())) {
190 DstArgDRE
= dyn_cast
<DeclRefExpr
>(BE
->getLHS()->IgnoreParenImpCasts());
191 if (BE
->getOpcode() == BO_Add
) {
192 if ((IL
= dyn_cast
<IntegerLiteral
>(BE
->getRHS()->IgnoreParenImpCasts()))) {
193 DstOff
= IL
->getValue().getZExtValue();
199 if (const auto *Buffer
=
200 dyn_cast
<ConstantArrayType
>(DstArgDRE
->getType())) {
201 ASTContext
&C
= BR
.getContext();
202 uint64_t BufferLen
= C
.getTypeSize(Buffer
) / 8;
203 auto RemainingBufferLen
= BufferLen
- DstOff
;
204 if (RemainingBufferLen
< ILRawVal
)
213 void WalkAST::VisitCallExpr(CallExpr
*CE
) {
214 const FunctionDecl
*FD
= CE
->getDirectCallee();
218 if (CheckerContext::isCLibraryFunction(FD
, "strncat")) {
219 if (containsBadStrncatPattern(CE
)) {
220 const Expr
*DstArg
= CE
->getArg(0);
221 const Expr
*LenArg
= CE
->getArg(2);
222 PathDiagnosticLocation Loc
=
223 PathDiagnosticLocation::createBegin(LenArg
, BR
.getSourceManager(), AC
);
225 StringRef DstName
= getPrintableName(DstArg
);
228 llvm::raw_svector_ostream
os(S
);
229 os
<< "Potential buffer overflow. ";
230 if (!DstName
.empty()) {
231 os
<< "Replace with 'sizeof(" << DstName
<< ") "
232 "- strlen(" << DstName
<<") - 1'";
236 os
<< "se a safer 'strlcat' API";
238 BR
.EmitBasicReport(FD
, Checker
, "Anti-pattern in the argument",
239 "C String API", os
.str(), Loc
,
240 LenArg
->getSourceRange());
242 } else if (CheckerContext::isCLibraryFunction(FD
, "strlcpy") ||
243 CheckerContext::isCLibraryFunction(FD
, "strlcat")) {
244 if (containsBadStrlcpyStrlcatPattern(CE
)) {
245 const Expr
*DstArg
= CE
->getArg(0);
246 const Expr
*LenArg
= CE
->getArg(2);
247 PathDiagnosticLocation Loc
=
248 PathDiagnosticLocation::createBegin(LenArg
, BR
.getSourceManager(), AC
);
250 StringRef DstName
= getPrintableName(DstArg
);
253 llvm::raw_svector_ostream
os(S
);
254 os
<< "The third argument allows to potentially copy more bytes than it should. ";
255 os
<< "Replace with the value ";
256 if (!DstName
.empty())
257 os
<< "sizeof(" << DstName
<< ")";
259 os
<< "sizeof(<destination buffer>)";
262 BR
.EmitBasicReport(FD
, Checker
, "Anti-pattern in the argument",
263 "C String API", os
.str(), Loc
,
264 LenArg
->getSourceRange());
268 // Recurse and check children.
272 void WalkAST::VisitChildren(Stmt
*S
) {
273 for (Stmt
*Child
: S
->children())
279 class CStringSyntaxChecker
: public Checker
<check::ASTCodeBody
> {
282 void checkASTCodeBody(const Decl
*D
, AnalysisManager
& Mgr
,
283 BugReporter
&BR
) const {
284 WalkAST
walker(this, BR
, Mgr
.getAnalysisDeclContext(D
));
285 walker
.Visit(D
->getBody());
290 void ento::registerCStringSyntaxChecker(CheckerManager
&mgr
) {
291 mgr
.registerChecker
<CStringSyntaxChecker
>();
294 bool ento::shouldRegisterCStringSyntaxChecker(const CheckerManager
&mgr
) {