1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/AST/Decl.h"
12 #include "clang/AST/DynamicRecursiveASTVisitor.h"
13 #include "clang/AST/Expr.h"
14 #include "clang/AST/FormatString.h"
15 #include "clang/AST/Stmt.h"
16 #include "clang/AST/StmtVisitor.h"
17 #include "clang/AST/Type.h"
18 #include "clang/ASTMatchers/ASTMatchFinder.h"
19 #include "clang/ASTMatchers/ASTMatchers.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "llvm/ADT/APSInt.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/Support/Casting.h"
33 using namespace clang
;
34 using namespace ast_matchers
;
38 class StmtDebugPrinter
39 : public ConstStmtVisitor
<StmtDebugPrinter
, std::string
> {
41 std::string
VisitStmt(const Stmt
*S
) { return S
->getStmtClassName(); }
43 std::string
VisitBinaryOperator(const BinaryOperator
*BO
) {
44 return "BinaryOperator(" + BO
->getOpcodeStr().str() + ")";
47 std::string
VisitUnaryOperator(const UnaryOperator
*UO
) {
48 return "UnaryOperator(" + UO
->getOpcodeStr(UO
->getOpcode()).str() + ")";
51 std::string
VisitImplicitCastExpr(const ImplicitCastExpr
*ICE
) {
52 return "ImplicitCastExpr(" + std::string(ICE
->getCastKindName()) + ")";
56 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
57 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
58 static std::string
getDREAncestorString(const DeclRefExpr
*DRE
,
62 StmtDebugPrinter StmtPriner
;
65 SS
<< StmtPriner
.Visit(St
);
67 DynTypedNodeList StParents
= Ctx
.getParents(*St
);
69 if (StParents
.size() > 1)
70 return "unavailable due to multiple parents";
71 if (StParents
.size() == 0)
73 St
= StParents
.begin()->get
<Stmt
>();
82 namespace clang::ast_matchers
{
83 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
84 // except for those belonging to a different callable of "n".
85 class MatchDescendantVisitor
: public DynamicRecursiveASTVisitor
{
87 // Creates an AST visitor that matches `Matcher` on all
88 // descendants of a given node "n" except for the ones
89 // belonging to a different callable of "n".
90 MatchDescendantVisitor(const internal::DynTypedMatcher
*Matcher
,
91 internal::ASTMatchFinder
*Finder
,
92 internal::BoundNodesTreeBuilder
*Builder
,
93 internal::ASTMatchFinder::BindKind Bind
,
94 const bool ignoreUnevaluatedContext
)
95 : Matcher(Matcher
), Finder(Finder
), Builder(Builder
), Bind(Bind
),
96 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext
) {
97 ShouldVisitTemplateInstantiations
= true;
98 ShouldVisitImplicitCode
= false; // TODO: let's ignore implicit code for now
101 // Returns true if a match is found in a subtree of `DynNode`, which belongs
102 // to the same callable of `DynNode`.
103 bool findMatch(const DynTypedNode
&DynNode
) {
105 if (const Stmt
*StmtNode
= DynNode
.get
<Stmt
>()) {
106 TraverseStmt(const_cast<Stmt
*>(StmtNode
));
107 *Builder
= ResultBindings
;
113 // The following are overriding methods from the base visitor class.
114 // They are public only to allow CRTP to work. They are *not *part
115 // of the public API of this class.
117 // For the matchers so far used in safe buffers, we only need to match
118 // `Stmt`s. To override more as needed.
120 bool TraverseDecl(Decl
*Node
) override
{
125 // To skip callables:
126 if (isa
<FunctionDecl
, BlockDecl
, ObjCMethodDecl
>(Node
))
128 // Traverse descendants
129 return DynamicRecursiveASTVisitor::TraverseDecl(Node
);
132 bool TraverseGenericSelectionExpr(GenericSelectionExpr
*Node
) override
{
133 // These are unevaluated, except the result expression.
134 if (ignoreUnevaluatedContext
)
135 return TraverseStmt(Node
->getResultExpr());
136 return DynamicRecursiveASTVisitor::TraverseGenericSelectionExpr(Node
);
140 TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr
*Node
) override
{
141 // Unevaluated context.
142 if (ignoreUnevaluatedContext
)
144 return DynamicRecursiveASTVisitor::TraverseUnaryExprOrTypeTraitExpr(Node
);
147 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node
) override
{
148 // Unevaluated context.
149 if (ignoreUnevaluatedContext
)
151 return DynamicRecursiveASTVisitor::TraverseTypeOfExprTypeLoc(Node
);
154 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node
) override
{
155 // Unevaluated context.
156 if (ignoreUnevaluatedContext
)
158 return DynamicRecursiveASTVisitor::TraverseDecltypeTypeLoc(Node
);
161 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr
*Node
) override
{
162 // Unevaluated context.
163 if (ignoreUnevaluatedContext
)
165 return DynamicRecursiveASTVisitor::TraverseCXXNoexceptExpr(Node
);
168 bool TraverseCXXTypeidExpr(CXXTypeidExpr
*Node
) override
{
169 // Unevaluated context.
170 if (ignoreUnevaluatedContext
)
172 return DynamicRecursiveASTVisitor::TraverseCXXTypeidExpr(Node
);
175 bool TraverseStmt(Stmt
*Node
) override
{
180 return DynamicRecursiveASTVisitor::TraverseStmt(Node
);
184 // Sets 'Matched' to true if 'Matcher' matches 'Node'
186 // Returns 'true' if traversal should continue after this function
187 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
188 template <typename T
> bool match(const T
&Node
) {
189 internal::BoundNodesTreeBuilder
RecursiveBuilder(*Builder
);
191 if (Matcher
->matches(DynTypedNode::create(Node
), Finder
,
192 &RecursiveBuilder
)) {
193 ResultBindings
.addMatch(RecursiveBuilder
);
195 if (Bind
!= internal::ASTMatchFinder::BK_All
)
196 return false; // Abort as soon as a match is found.
201 const internal::DynTypedMatcher
*const Matcher
;
202 internal::ASTMatchFinder
*const Finder
;
203 internal::BoundNodesTreeBuilder
*const Builder
;
204 internal::BoundNodesTreeBuilder ResultBindings
;
205 const internal::ASTMatchFinder::BindKind Bind
;
207 bool ignoreUnevaluatedContext
;
210 // Because we're dealing with raw pointers, let's define what we mean by that.
211 static auto hasPointerType() {
212 return hasType(hasCanonicalType(pointerType()));
215 static auto hasArrayType() { return hasType(hasCanonicalType(arrayType())); }
217 AST_MATCHER_P(Stmt
, forEachDescendantEvaluatedStmt
, internal::Matcher
<Stmt
>,
219 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
221 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
,
223 return Visitor
.findMatch(DynTypedNode::create(Node
));
226 AST_MATCHER_P(Stmt
, forEachDescendantStmt
, internal::Matcher
<Stmt
>,
228 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
230 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
,
232 return Visitor
.findMatch(DynTypedNode::create(Node
));
235 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
236 AST_MATCHER_P(Stmt
, notInSafeBufferOptOut
, const UnsafeBufferUsageHandler
*,
238 return !Handler
->isSafeBufferOptOut(Node
.getBeginLoc());
241 AST_MATCHER_P(Stmt
, ignoreUnsafeBufferInContainer
,
242 const UnsafeBufferUsageHandler
*, Handler
) {
243 return Handler
->ignoreUnsafeBufferInContainer(Node
.getBeginLoc());
246 AST_MATCHER_P(Stmt
, ignoreUnsafeLibcCall
, const UnsafeBufferUsageHandler
*,
248 if (Finder
->getASTContext().getLangOpts().CPlusPlus
)
249 return Handler
->ignoreUnsafeBufferInLibcCall(Node
.getBeginLoc());
250 return true; /* Only warn about libc calls for C++ */
253 AST_MATCHER_P(CastExpr
, castSubExpr
, internal::Matcher
<Expr
>, innerMatcher
) {
254 return innerMatcher
.matches(*Node
.getSubExpr(), Finder
, Builder
);
257 // Matches a `UnaryOperator` whose operator is pre-increment:
258 AST_MATCHER(UnaryOperator
, isPreInc
) {
259 return Node
.getOpcode() == UnaryOperator::Opcode::UO_PreInc
;
262 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
263 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
264 static auto isInUnspecifiedLvalueContext(internal::Matcher
<Expr
> innerMatcher
) {
269 hasCastKind(CastKind::CK_LValueToRValue
),
270 castSubExpr(innerMatcher
)),
272 hasAnyOperatorName("="),
279 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
280 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
281 static internal::Matcher
<Stmt
>
282 isInUnspecifiedPointerContext(internal::Matcher
<Stmt
> InnerMatcher
) {
284 // 1. an argument of a function call (except the callee has [[unsafe_...]]
286 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
287 // 3. the operand of a comparator operation; or
288 // 4. the operand of a pointer subtraction operation
289 // (i.e., computing the distance between two pointers); or ...
292 auto CallArgMatcher
= callExpr(
293 forEachArgumentWithParamType(
295 isAnyPointer() /* array also decays to pointer type*/),
297 functionDecl(hasAttr(attr::UnsafeBufferUsage
)))));
299 auto CastOperandMatcher
=
300 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral
),
301 hasCastKind(CastKind::CK_PointerToBoolean
)),
302 castSubExpr(allOf(hasPointerType(), InnerMatcher
)));
304 auto CompOperandMatcher
=
305 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
306 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher
)),
307 hasRHS(allOf(hasPointerType(), InnerMatcher
))));
309 // A matcher that matches pointer subtractions:
310 auto PtrSubtractionMatcher
=
311 binaryOperator(hasOperatorName("-"),
312 // Note that here we need both LHS and RHS to be
313 // pointer. Then the inner matcher can match any of
315 allOf(hasLHS(hasPointerType()),
316 hasRHS(hasPointerType())),
317 eachOf(hasLHS(InnerMatcher
),
318 hasRHS(InnerMatcher
)));
321 return stmt(anyOf(CallArgMatcher
, CastOperandMatcher
, CompOperandMatcher
,
322 PtrSubtractionMatcher
));
323 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
324 // don't have to check that.)
327 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
328 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
329 // 'e' isn't evaluated to an RValue). For example, consider the following code:
330 // int *p = new int[4];
331 // int *q = new int[4];
334 // The expression `p = q` in the conditional of the `if` statement
335 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
336 // in the assignment statement is in an untyped context.
337 static internal::Matcher
<Stmt
>
338 isInUnspecifiedUntypedContext(internal::Matcher
<Stmt
> InnerMatcher
) {
339 // An unspecified context can be
340 // 1. A compound statement,
341 // 2. The body of an if statement
343 auto CompStmt
= compoundStmt(forEach(InnerMatcher
));
344 auto IfStmtThen
= ifStmt(hasThen(InnerMatcher
));
345 auto IfStmtElse
= ifStmt(hasElse(InnerMatcher
));
346 // FIXME: Handle loop bodies.
347 return stmt(anyOf(CompStmt
, IfStmtThen
, IfStmtElse
));
350 // Given a two-param std::span construct call, matches iff the call has the
352 // 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
353 // 2. `std::span<T>{new T, 1}`
354 // 3. `std::span<T>{&var, 1}`
355 // 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
357 // 5. `std::span<T>{any, 0}`
358 // 6. `std::span<T>{std::addressof(...), 1}`
359 AST_MATCHER(CXXConstructExpr
, isSafeSpanTwoParamConstruct
) {
360 assert(Node
.getNumArgs() == 2 &&
361 "expecting a two-parameter std::span constructor");
362 const Expr
*Arg0
= Node
.getArg(0)->IgnoreImplicit();
363 const Expr
*Arg1
= Node
.getArg(1)->IgnoreImplicit();
364 auto HaveEqualConstantValues
= [&Finder
](const Expr
*E0
, const Expr
*E1
) {
365 if (auto E0CV
= E0
->getIntegerConstantExpr(Finder
->getASTContext()))
366 if (auto E1CV
= E1
->getIntegerConstantExpr(Finder
->getASTContext())) {
367 return APSInt::compareValues(*E0CV
, *E1CV
) == 0;
371 auto AreSameDRE
= [](const Expr
*E0
, const Expr
*E1
) {
372 if (auto *DRE0
= dyn_cast
<DeclRefExpr
>(E0
))
373 if (auto *DRE1
= dyn_cast
<DeclRefExpr
>(E1
)) {
374 return DRE0
->getDecl() == DRE1
->getDecl();
378 std::optional
<APSInt
> Arg1CV
=
379 Arg1
->getIntegerConstantExpr(Finder
->getASTContext());
381 if (Arg1CV
&& Arg1CV
->isZero())
384 switch (Arg0
->IgnoreImplicit()->getStmtClass()) {
385 case Stmt::CXXNewExprClass
:
386 if (auto Size
= cast
<CXXNewExpr
>(Arg0
)->getArraySize()) {
388 return AreSameDRE((*Size
)->IgnoreImplicit(), Arg1
) ||
389 HaveEqualConstantValues(*Size
, Arg1
);
391 // TODO: what's placeholder type? avoid it for now.
392 if (!cast
<CXXNewExpr
>(Arg0
)->hasPlaceholderType()) {
394 return Arg1CV
&& Arg1CV
->isOne();
397 case Stmt::UnaryOperatorClass
:
398 if (cast
<UnaryOperator
>(Arg0
)->getOpcode() ==
399 UnaryOperator::Opcode::UO_AddrOf
)
401 return Arg1CV
&& Arg1CV
->isOne();
403 case Stmt::CallExprClass
:
404 if (const auto *CE
= dyn_cast
<CallExpr
>(Arg0
)) {
405 const auto FnDecl
= CE
->getDirectCallee();
406 if (FnDecl
&& FnDecl
->getNameAsString() == "addressof" &&
407 FnDecl
->isInStdNamespace()) {
408 return Arg1CV
&& Arg1CV
->isOne();
416 QualType Arg0Ty
= Arg0
->IgnoreImplicit()->getType();
418 if (auto *ConstArrTy
=
419 Finder
->getASTContext().getAsConstantArrayType(Arg0Ty
)) {
420 const APSInt ConstArrSize
= APSInt(ConstArrTy
->getSize());
423 return Arg1CV
&& APSInt::compareValues(ConstArrSize
, *Arg1CV
) == 0;
428 AST_MATCHER(ArraySubscriptExpr
, isSafeArraySubscript
) {
429 // FIXME: Proper solution:
430 // - refactor Sema::CheckArrayAccess
431 // - split safe/OOB/unknown decision logic from diagnostics emitting code
432 // - e. g. "Try harder to find a NamedDecl to point at in the note."
433 // already duplicated
434 // - call both from Sema and from here
436 const auto *BaseDRE
=
437 dyn_cast
<DeclRefExpr
>(Node
.getBase()->IgnoreParenImpCasts());
438 const auto *SLiteral
=
439 dyn_cast
<StringLiteral
>(Node
.getBase()->IgnoreParenImpCasts());
442 if (!BaseDRE
&& !SLiteral
)
446 if (!BaseDRE
->getDecl())
448 const auto *CATy
= Finder
->getASTContext().getAsConstantArrayType(
449 BaseDRE
->getDecl()->getType());
453 size
= CATy
->getLimitedSize();
454 } else if (SLiteral
) {
455 size
= SLiteral
->getLength() + 1;
458 if (const auto *IdxLit
= dyn_cast
<IntegerLiteral
>(Node
.getIdx())) {
459 const APInt ArrIdx
= IdxLit
->getValue();
460 // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
462 if (ArrIdx
.isNonNegative() && ArrIdx
.getLimitedValue() < size
)
469 AST_MATCHER_P(CallExpr
, hasNumArgs
, unsigned, Num
) {
470 return Node
.getNumArgs() == Num
;
473 namespace libc_func_matchers
{
474 // Under `libc_func_matchers`, define a set of matchers that match unsafe
475 // functions in libc and unsafe calls to them.
477 // A tiny parser to strip off common prefix and suffix of libc function names
480 // Given a function name, `matchName` returns `CoreName` according to the
481 // following grammar:
483 // LibcName := CoreName | CoreName + "_s"
484 // MatchingName := "__builtin_" + LibcName |
485 // "__builtin___" + LibcName + "_chk" |
486 // "__asan_" + LibcName
488 struct LibcFunNamePrefixSuffixParser
{
489 StringRef
matchName(StringRef FunName
, bool isBuiltin
) {
490 // Try to match __builtin_:
491 if (isBuiltin
&& FunName
.starts_with("__builtin_"))
492 // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
494 return matchLibcNameOrBuiltinChk(
495 FunName
.drop_front(10 /* truncate "__builtin_" */));
496 // Try to match __asan_:
497 if (FunName
.starts_with("__asan_"))
498 return matchLibcName(FunName
.drop_front(7 /* truncate of "__asan_" */));
499 return matchLibcName(FunName
);
502 // Parameter `Name` is the substring after stripping off the prefix
504 StringRef
matchLibcNameOrBuiltinChk(StringRef Name
) {
505 if (Name
.starts_with("__") && Name
.ends_with("_chk"))
506 return matchLibcName(
507 Name
.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
508 return matchLibcName(Name
);
511 StringRef
matchLibcName(StringRef Name
) {
512 if (Name
.ends_with("_s"))
513 return Name
.drop_back(2 /* truncate "_s" */);
518 // A pointer type expression is known to be null-terminated, if it has the
519 // form: E.c_str(), for any expression E of `std::string` type.
520 static bool isNullTermPointer(const Expr
*Ptr
) {
521 if (isa
<StringLiteral
>(Ptr
->IgnoreParenImpCasts()))
523 if (isa
<PredefinedExpr
>(Ptr
->IgnoreParenImpCasts()))
525 if (auto *MCE
= dyn_cast
<CXXMemberCallExpr
>(Ptr
->IgnoreParenImpCasts())) {
526 const CXXMethodDecl
*MD
= MCE
->getMethodDecl();
527 const CXXRecordDecl
*RD
= MCE
->getRecordDecl()->getCanonicalDecl();
529 if (MD
&& RD
&& RD
->isInStdNamespace())
530 if (MD
->getName() == "c_str" && RD
->getName() == "basic_string")
536 // Return true iff at least one of following cases holds:
537 // 1. Format string is a literal and there is an unsafe pointer argument
538 // corresponding to an `s` specifier;
539 // 2. Format string is not a literal and there is least an unsafe pointer
540 // argument (including the formatter argument).
542 // `UnsafeArg` is the output argument that will be set only if this function
544 static bool hasUnsafeFormatOrSArg(const CallExpr
*Call
, const Expr
*&UnsafeArg
,
545 const unsigned FmtArgIdx
, ASTContext
&Ctx
,
546 bool isKprintf
= false) {
547 class StringFormatStringHandler
548 : public analyze_format_string::FormatStringHandler
{
549 const CallExpr
*Call
;
551 const Expr
*&UnsafeArg
;
554 StringFormatStringHandler(const CallExpr
*Call
, unsigned FmtArgIdx
,
555 const Expr
*&UnsafeArg
)
556 : Call(Call
), FmtArgIdx(FmtArgIdx
), UnsafeArg(UnsafeArg
) {}
558 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
&FS
,
559 const char *startSpecifier
,
560 unsigned specifierLen
,
561 const TargetInfo
&Target
) override
{
562 if (FS
.getConversionSpecifier().getKind() ==
563 analyze_printf::PrintfConversionSpecifier::sArg
) {
564 unsigned ArgIdx
= FS
.getPositionalArgIndex() + FmtArgIdx
;
566 if (0 < ArgIdx
&& ArgIdx
< Call
->getNumArgs())
567 if (!isNullTermPointer(Call
->getArg(ArgIdx
))) {
568 UnsafeArg
= Call
->getArg(ArgIdx
); // output
569 // returning false stops parsing immediately
573 return true; // continue parsing
577 const Expr
*Fmt
= Call
->getArg(FmtArgIdx
);
579 if (auto *SL
= dyn_cast
<StringLiteral
>(Fmt
->IgnoreParenImpCasts())) {
582 if (SL
->getCharByteWidth() == 1)
583 FmtStr
= SL
->getString();
584 else if (auto EvaledFmtStr
= SL
->tryEvaluateString(Ctx
))
585 FmtStr
= *EvaledFmtStr
;
587 goto CHECK_UNSAFE_PTR
;
589 StringFormatStringHandler
Handler(Call
, FmtArgIdx
, UnsafeArg
);
591 return analyze_format_string::ParsePrintfString(
592 Handler
, FmtStr
.begin(), FmtStr
.end(), Ctx
.getLangOpts(),
593 Ctx
.getTargetInfo(), isKprintf
);
596 // If format is not a string literal, we cannot analyze the format string.
597 // In this case, this call is considered unsafe if at least one argument
598 // (including the format argument) is unsafe pointer.
600 llvm::make_range(Call
->arg_begin() + FmtArgIdx
, Call
->arg_end()),
601 [&UnsafeArg
](const Expr
*Arg
) -> bool {
602 if (Arg
->getType()->isPointerType() && !isNullTermPointer(Arg
)) {
610 // Matches a FunctionDecl node such that
611 // 1. It's name, after stripping off predefined prefix and suffix, is
613 // 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
614 // is a set of libc function names.
616 // Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
617 // The notation `CoreName[str/wcs]` means a new name obtained from replace
618 // string "wcs" with "str" in `CoreName`.
619 AST_MATCHER(FunctionDecl
, isPredefinedUnsafeLibcFunc
) {
620 static std::unique_ptr
<std::set
<StringRef
>> PredefinedNames
= nullptr;
621 if (!PredefinedNames
)
623 std::make_unique
<std::set
<StringRef
>, std::set
<StringRef
>>({
624 // numeric conversion:
638 // "strfromf", "strfromd", "strfroml", // C23?
639 // string manipulation:
649 // string examination:
695 auto *II
= Node
.getIdentifier();
700 StringRef Name
= LibcFunNamePrefixSuffixParser().matchName(
701 II
->getName(), Node
.getBuiltinID());
703 // Match predefined names:
704 if (PredefinedNames
->find(Name
) != PredefinedNames
->end())
707 std::string NameWCS
= Name
.str();
708 size_t WcsPos
= NameWCS
.find("wcs");
710 while (WcsPos
!= std::string::npos
) {
711 NameWCS
[WcsPos
++] = 's';
712 NameWCS
[WcsPos
++] = 't';
713 NameWCS
[WcsPos
++] = 'r';
714 WcsPos
= NameWCS
.find("wcs", WcsPos
);
716 if (PredefinedNames
->find(NameWCS
) != PredefinedNames
->end())
718 // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
719 // all should end with "scanf"):
720 return Name
.ends_with("scanf");
723 // Match a call to one of the `v*printf` functions taking `va_list`. We cannot
724 // check safety for these functions so they should be changed to their
725 // non-va_list versions.
726 AST_MATCHER(FunctionDecl
, isUnsafeVaListPrintfFunc
) {
727 auto *II
= Node
.getIdentifier();
732 StringRef Name
= LibcFunNamePrefixSuffixParser().matchName(
733 II
->getName(), Node
.getBuiltinID());
735 if (!Name
.ends_with("printf"))
736 return false; // neither printf nor scanf
737 return Name
.starts_with("v");
740 // Matches a call to one of the `sprintf` functions as they are always unsafe
741 // and should be changed to `snprintf`.
742 AST_MATCHER(FunctionDecl
, isUnsafeSprintfFunc
) {
743 auto *II
= Node
.getIdentifier();
748 StringRef Name
= LibcFunNamePrefixSuffixParser().matchName(
749 II
->getName(), Node
.getBuiltinID());
751 if (!Name
.ends_with("printf") ||
752 // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
753 Name
.starts_with("v"))
756 StringRef Prefix
= Name
.drop_back(6);
758 if (Prefix
.ends_with("w"))
759 Prefix
= Prefix
.drop_back(1);
760 return Prefix
== "s";
763 // Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
764 // character versions. Calls to these functions can be safe if their arguments
765 // are carefully made safe.
766 AST_MATCHER(FunctionDecl
, isNormalPrintfFunc
) {
767 auto *II
= Node
.getIdentifier();
772 StringRef Name
= LibcFunNamePrefixSuffixParser().matchName(
773 II
->getName(), Node
.getBuiltinID());
775 if (!Name
.ends_with("printf") || Name
.starts_with("v"))
778 StringRef Prefix
= Name
.drop_back(6);
780 if (Prefix
.ends_with("w"))
781 Prefix
= Prefix
.drop_back(1);
783 return Prefix
.empty() || Prefix
== "k" || Prefix
== "f" || Prefix
== "sn";
786 // This matcher requires that it is known that the callee `isNormalPrintf`.
787 // Then if the format string is a string literal, this matcher matches when at
788 // least one string argument is unsafe. If the format is not a string literal,
789 // this matcher matches when at least one pointer type argument is unsafe.
790 AST_MATCHER_P(CallExpr
, hasUnsafePrintfStringArg
,
791 clang::ast_matchers::internal::Matcher
<Expr
>,
792 UnsafeStringArgMatcher
) {
793 // Determine what printf it is by examining formal parameters:
794 const FunctionDecl
*FD
= Node
.getDirectCallee();
796 assert(FD
&& "It should have been checked that FD is non-null.");
798 unsigned NumParms
= FD
->getNumParams();
801 return false; // possibly some user-defined printf function
803 ASTContext
&Ctx
= Finder
->getASTContext();
804 QualType FirstParmTy
= FD
->getParamDecl(0)->getType();
806 if (!FirstParmTy
->isPointerType())
807 return false; // possibly some user-defined printf function
809 QualType FirstPteTy
= FirstParmTy
->castAs
<PointerType
>()->getPointeeType();
811 if (!Ctx
.getFILEType()
812 .isNull() && //`FILE *` must be in the context if it is fprintf
813 FirstPteTy
.getCanonicalType() == Ctx
.getFILEType().getCanonicalType()) {
815 const Expr
*UnsafeArg
;
817 if (hasUnsafeFormatOrSArg(&Node
, UnsafeArg
, 1, Ctx
, false))
818 return UnsafeStringArgMatcher
.matches(*UnsafeArg
, Finder
, Builder
);
822 if (FirstPteTy
.isConstQualified()) {
823 // If the first parameter is a `const char *`, it is a printf/kprintf:
824 bool isKprintf
= false;
825 const Expr
*UnsafeArg
;
827 if (auto *II
= FD
->getIdentifier())
828 isKprintf
= II
->getName() == "kprintf";
829 if (hasUnsafeFormatOrSArg(&Node
, UnsafeArg
, 0, Ctx
, isKprintf
))
830 return UnsafeStringArgMatcher
.matches(*UnsafeArg
, Finder
, Builder
);
835 QualType SecondParmTy
= FD
->getParamDecl(1)->getType();
837 if (!FirstPteTy
.isConstQualified() && SecondParmTy
->isIntegerType()) {
838 // If the first parameter type is non-const qualified `char *` and the
839 // second is an integer, it is a snprintf:
840 const Expr
*UnsafeArg
;
842 if (hasUnsafeFormatOrSArg(&Node
, UnsafeArg
, 2, Ctx
, false))
843 return UnsafeStringArgMatcher
.matches(*UnsafeArg
, Finder
, Builder
);
847 // We don't really recognize this "normal" printf, the only thing we
848 // can do is to require all pointers to be null-terminated:
849 for (auto Arg
: Node
.arguments())
850 if (Arg
->getType()->isPointerType() && !isNullTermPointer(Arg
))
851 if (UnsafeStringArgMatcher
.matches(*Arg
, Finder
, Builder
))
856 // This matcher requires that it is known that the callee `isNormalPrintf`.
857 // Then it matches if the first two arguments of the call is a pointer and an
858 // integer and they are not in a safe pattern.
860 // For the first two arguments: `ptr` and `size`, they are safe if in the
861 // following patterns:
864 // ptr := DRE.data();
865 // size:= DRE.size()/DRE.size_bytes()
866 // And DRE is a hardened container or view.
869 // ptr := Constant-Array-DRE;
870 // size:= any expression that has compile-time constant value equivalent to
871 // sizeof (Constant-Array-DRE)
872 AST_MATCHER(CallExpr
, hasUnsafeSnprintfBuffer
) {
873 const FunctionDecl
*FD
= Node
.getDirectCallee();
875 assert(FD
&& "It should have been checked that FD is non-null.");
877 if (FD
->getNumParams() < 3)
878 return false; // Not an snprint
880 QualType FirstParmTy
= FD
->getParamDecl(0)->getType();
882 if (!FirstParmTy
->isPointerType())
883 return false; // Not an snprint
885 QualType FirstPteTy
= FirstParmTy
->castAs
<PointerType
>()->getPointeeType();
886 const Expr
*Buf
= Node
.getArg(0), *Size
= Node
.getArg(1);
888 if (FirstPteTy
.isConstQualified() || !Buf
->getType()->isPointerType() ||
889 !Size
->getType()->isIntegerType())
890 return false; // not an snprintf call
893 static StringRef SizedObjs
[] = {"span", "array", "vector",
894 "basic_string_view", "basic_string"};
895 Buf
= Buf
->IgnoreParenImpCasts();
896 Size
= Size
->IgnoreParenImpCasts();
897 if (auto *MCEPtr
= dyn_cast
<CXXMemberCallExpr
>(Buf
))
898 if (auto *MCESize
= dyn_cast
<CXXMemberCallExpr
>(Size
)) {
899 auto *DREOfPtr
= dyn_cast
<DeclRefExpr
>(
900 MCEPtr
->getImplicitObjectArgument()->IgnoreParenImpCasts());
901 auto *DREOfSize
= dyn_cast
<DeclRefExpr
>(
902 MCESize
->getImplicitObjectArgument()->IgnoreParenImpCasts());
904 if (!DREOfPtr
|| !DREOfSize
)
905 return true; // not in safe pattern
906 if (DREOfPtr
->getDecl() != DREOfSize
->getDecl())
907 return true; // not in safe pattern
908 if (MCEPtr
->getMethodDecl()->getName() != "data")
909 return true; // not in safe pattern
911 if (MCESize
->getMethodDecl()->getName() == "size_bytes" ||
912 // Note here the pointer must be a pointer-to-char type unless there
913 // is explicit casting. If there is explicit casting, this branch
914 // is unreachable. Thus, at this branch "size" and "size_bytes" are
915 // equivalent as the pointer is a char pointer:
916 MCESize
->getMethodDecl()->getName() == "size")
917 for (StringRef SizedObj
: SizedObjs
)
918 if (MCEPtr
->getRecordDecl()->isInStdNamespace() &&
919 MCEPtr
->getRecordDecl()->getCanonicalDecl()->getName() ==
921 return false; // It is in fact safe
925 if (auto *DRE
= dyn_cast
<DeclRefExpr
>(Buf
->IgnoreParenImpCasts())) {
926 ASTContext
&Ctx
= Finder
->getASTContext();
928 if (auto *CAT
= Ctx
.getAsConstantArrayType(DRE
->getType())) {
930 // The array element type must be compatible with `char` otherwise an
931 // explicit cast will be needed, which will make this check unreachable.
932 // Therefore, the array extent is same as its' bytewise size.
933 if (Size
->EvaluateAsConstantExpr(ER
, Ctx
)) {
934 APSInt EVal
= ER
.Val
.getInt(); // Size must have integer type
936 return APSInt::compareValues(EVal
, APSInt(CAT
->getSize(), true)) != 0;
940 return true; // ptr and size are not in safe pattern
942 } // namespace libc_func_matchers
943 } // namespace clang::ast_matchers
946 // Because the analysis revolves around variables and their types, we'll need to
947 // track uses of variables (aka DeclRefExprs).
948 using DeclUseList
= SmallVector
<const DeclRefExpr
*, 1>;
950 // Convenience typedef.
951 using FixItList
= SmallVector
<FixItHint
, 4>;
955 /// Gadget is an individual operation in the code that may be of interest to
956 /// this analysis. Each (non-abstract) subclass corresponds to a specific
957 /// rigid AST structure that constitutes an operation on a pointer-type object.
958 /// Discovery of a gadget in the code corresponds to claiming that we understand
959 /// what this part of code is doing well enough to potentially improve it.
960 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
961 /// always deserving a warning per se, but requires our attention to identify
962 /// it warrants a fixit).
967 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
970 /// Common type of ASTMatchers used for discovering gadgets.
971 /// Useful for implementing the static matcher() methods
972 /// that are expected from all non-abstract subclasses.
973 using Matcher
= decltype(stmt());
975 Gadget(Kind K
) : K(K
) {}
977 Kind
getKind() const { return K
; }
980 StringRef
getDebugName() const {
985 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
987 llvm_unreachable("Unhandled Gadget::Kind enum");
991 virtual bool isWarningGadget() const = 0;
992 // TODO remove this method from WarningGadget interface. It's only used for
993 // debug prints in FixableGadget.
994 virtual SourceLocation
getSourceLoc() const = 0;
996 /// Returns the list of pointer-type variables on which this gadget performs
997 /// its operation. Typically, there's only one variable. This isn't a list
998 /// of all DeclRefExprs in the gadget's AST!
999 virtual DeclUseList
getClaimedVarUseSites() const = 0;
1001 virtual ~Gadget() = default;
1007 /// Warning gadgets correspond to unsafe code patterns that warrants
1008 /// an immediate warning.
1009 class WarningGadget
: public Gadget
{
1011 WarningGadget(Kind K
) : Gadget(K
) {}
1013 static bool classof(const Gadget
*G
) { return G
->isWarningGadget(); }
1014 bool isWarningGadget() const final
{ return true; }
1016 virtual void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1017 bool IsRelatedToDecl
,
1018 ASTContext
&Ctx
) const = 0;
1021 /// Fixable gadgets correspond to code patterns that aren't always unsafe but
1022 /// need to be properly recognized in order to emit fixes. For example, if a raw
1023 /// pointer-type variable is replaced by a safe C++ container, every use of such
1024 /// variable must be carefully considered and possibly updated.
1025 class FixableGadget
: public Gadget
{
1027 FixableGadget(Kind K
) : Gadget(K
) {}
1029 static bool classof(const Gadget
*G
) { return !G
->isWarningGadget(); }
1030 bool isWarningGadget() const final
{ return false; }
1032 /// Returns a fixit that would fix the current gadget according to
1033 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
1034 /// returns an empty list if no fixes are necessary.
1035 virtual std::optional
<FixItList
> getFixits(const FixitStrategy
&) const {
1036 return std::nullopt
;
1039 /// Returns a list of two elements where the first element is the LHS of a
1040 /// pointer assignment statement and the second element is the RHS. This
1041 /// two-element list represents the fact that the LHS buffer gets its bounds
1042 /// information from the RHS buffer. This information will be used later to
1043 /// group all those variables whose types must be modified together to prevent
1044 /// type mismatches.
1045 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
1046 getStrategyImplications() const {
1047 return std::nullopt
;
1051 static auto toSupportedVariable() { return to(varDecl()); }
1053 using FixableGadgetList
= std::vector
<std::unique_ptr
<FixableGadget
>>;
1054 using WarningGadgetList
= std::vector
<std::unique_ptr
<WarningGadget
>>;
1056 /// An increment of a pointer-type value is unsafe as it may run the pointer
1058 class IncrementGadget
: public WarningGadget
{
1059 static constexpr const char *const OpTag
= "op";
1060 const UnaryOperator
*Op
;
1063 IncrementGadget(const MatchFinder::MatchResult
&Result
)
1064 : WarningGadget(Kind::Increment
),
1065 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
1067 static bool classof(const Gadget
*G
) {
1068 return G
->getKind() == Kind::Increment
;
1071 static Matcher
matcher() {
1073 unaryOperator(hasOperatorName("++"),
1074 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())))
1078 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1079 bool IsRelatedToDecl
,
1080 ASTContext
&Ctx
) const override
{
1081 Handler
.handleUnsafeOperation(Op
, IsRelatedToDecl
, Ctx
);
1083 SourceLocation
getSourceLoc() const override
{ return Op
->getBeginLoc(); }
1085 DeclUseList
getClaimedVarUseSites() const override
{
1086 SmallVector
<const DeclRefExpr
*, 2> Uses
;
1087 if (const auto *DRE
=
1088 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
1089 Uses
.push_back(DRE
);
1092 return std::move(Uses
);
1096 /// A decrement of a pointer-type value is unsafe as it may run the pointer
1098 class DecrementGadget
: public WarningGadget
{
1099 static constexpr const char *const OpTag
= "op";
1100 const UnaryOperator
*Op
;
1103 DecrementGadget(const MatchFinder::MatchResult
&Result
)
1104 : WarningGadget(Kind::Decrement
),
1105 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
1107 static bool classof(const Gadget
*G
) {
1108 return G
->getKind() == Kind::Decrement
;
1111 static Matcher
matcher() {
1113 unaryOperator(hasOperatorName("--"),
1114 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())))
1118 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1119 bool IsRelatedToDecl
,
1120 ASTContext
&Ctx
) const override
{
1121 Handler
.handleUnsafeOperation(Op
, IsRelatedToDecl
, Ctx
);
1123 SourceLocation
getSourceLoc() const override
{ return Op
->getBeginLoc(); }
1125 DeclUseList
getClaimedVarUseSites() const override
{
1126 if (const auto *DRE
=
1127 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
1135 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
1136 /// it doesn't have any bounds checks for the array.
1137 class ArraySubscriptGadget
: public WarningGadget
{
1138 static constexpr const char *const ArraySubscrTag
= "ArraySubscript";
1139 const ArraySubscriptExpr
*ASE
;
1142 ArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
1143 : WarningGadget(Kind::ArraySubscript
),
1144 ASE(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ArraySubscrTag
)) {}
1146 static bool classof(const Gadget
*G
) {
1147 return G
->getKind() == Kind::ArraySubscript
;
1150 static Matcher
matcher() {
1152 return stmt(arraySubscriptExpr(
1153 hasBase(ignoringParenImpCasts(
1154 anyOf(hasPointerType(), hasArrayType()))),
1156 isSafeArraySubscript(),
1158 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
1160 ))).bind(ArraySubscrTag
));
1164 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1165 bool IsRelatedToDecl
,
1166 ASTContext
&Ctx
) const override
{
1167 Handler
.handleUnsafeOperation(ASE
, IsRelatedToDecl
, Ctx
);
1169 SourceLocation
getSourceLoc() const override
{ return ASE
->getBeginLoc(); }
1171 DeclUseList
getClaimedVarUseSites() const override
{
1172 if (const auto *DRE
=
1173 dyn_cast
<DeclRefExpr
>(ASE
->getBase()->IgnoreParenImpCasts())) {
1181 /// A pointer arithmetic expression of one of the forms:
1183 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
1185 class PointerArithmeticGadget
: public WarningGadget
{
1186 static constexpr const char *const PointerArithmeticTag
= "ptrAdd";
1187 static constexpr const char *const PointerArithmeticPointerTag
= "ptrAddPtr";
1188 const BinaryOperator
*PA
; // pointer arithmetic expression
1189 const Expr
*Ptr
; // the pointer expression in `PA`
1192 PointerArithmeticGadget(const MatchFinder::MatchResult
&Result
)
1193 : WarningGadget(Kind::PointerArithmetic
),
1194 PA(Result
.Nodes
.getNodeAs
<BinaryOperator
>(PointerArithmeticTag
)),
1195 Ptr(Result
.Nodes
.getNodeAs
<Expr
>(PointerArithmeticPointerTag
)) {}
1197 static bool classof(const Gadget
*G
) {
1198 return G
->getKind() == Kind::PointerArithmetic
;
1201 static Matcher
matcher() {
1202 auto HasIntegerType
= anyOf(hasType(isInteger()), hasType(enumType()));
1204 allOf(hasOperatorName("+"),
1205 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
1206 hasLHS(HasIntegerType
));
1208 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
1209 hasOperatorName("+="), hasOperatorName("-=")),
1210 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
1211 hasRHS(HasIntegerType
));
1213 return stmt(binaryOperator(anyOf(PtrAtLeft
, PtrAtRight
))
1214 .bind(PointerArithmeticTag
));
1217 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1218 bool IsRelatedToDecl
,
1219 ASTContext
&Ctx
) const override
{
1220 Handler
.handleUnsafeOperation(PA
, IsRelatedToDecl
, Ctx
);
1222 SourceLocation
getSourceLoc() const override
{ return PA
->getBeginLoc(); }
1224 DeclUseList
getClaimedVarUseSites() const override
{
1225 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ptr
->IgnoreParenImpCasts())) {
1231 // FIXME: pointer adding zero should be fine
1232 // FIXME: this gadge will need a fix-it
1235 class SpanTwoParamConstructorGadget
: public WarningGadget
{
1236 static constexpr const char *const SpanTwoParamConstructorTag
=
1237 "spanTwoParamConstructor";
1238 const CXXConstructExpr
*Ctor
; // the span constructor expression
1241 SpanTwoParamConstructorGadget(const MatchFinder::MatchResult
&Result
)
1242 : WarningGadget(Kind::SpanTwoParamConstructor
),
1243 Ctor(Result
.Nodes
.getNodeAs
<CXXConstructExpr
>(
1244 SpanTwoParamConstructorTag
)) {}
1246 static bool classof(const Gadget
*G
) {
1247 return G
->getKind() == Kind::SpanTwoParamConstructor
;
1250 static Matcher
matcher() {
1251 auto HasTwoParamSpanCtorDecl
= hasDeclaration(
1252 cxxConstructorDecl(hasDeclContext(isInStdNamespace()), hasName("span"),
1253 parameterCountIs(2)));
1255 return stmt(cxxConstructExpr(HasTwoParamSpanCtorDecl
,
1256 unless(isSafeSpanTwoParamConstruct()))
1257 .bind(SpanTwoParamConstructorTag
));
1260 static Matcher
matcher(const UnsafeBufferUsageHandler
*Handler
) {
1261 return stmt(unless(ignoreUnsafeBufferInContainer(Handler
)), matcher());
1264 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1265 bool IsRelatedToDecl
,
1266 ASTContext
&Ctx
) const override
{
1267 Handler
.handleUnsafeOperationInContainer(Ctor
, IsRelatedToDecl
, Ctx
);
1269 SourceLocation
getSourceLoc() const override
{ return Ctor
->getBeginLoc(); }
1271 DeclUseList
getClaimedVarUseSites() const override
{
1272 // If the constructor call is of the form `std::span{var, n}`, `var` is
1273 // considered an unsafe variable.
1274 if (auto *DRE
= dyn_cast
<DeclRefExpr
>(Ctor
->getArg(0))) {
1275 if (isa
<VarDecl
>(DRE
->getDecl()))
1282 /// A pointer initialization expression of the form:
1286 class PointerInitGadget
: public FixableGadget
{
1288 static constexpr const char *const PointerInitLHSTag
= "ptrInitLHS";
1289 static constexpr const char *const PointerInitRHSTag
= "ptrInitRHS";
1290 const VarDecl
*PtrInitLHS
; // the LHS pointer expression in `PI`
1291 const DeclRefExpr
*PtrInitRHS
; // the RHS pointer expression in `PI`
1294 PointerInitGadget(const MatchFinder::MatchResult
&Result
)
1295 : FixableGadget(Kind::PointerInit
),
1296 PtrInitLHS(Result
.Nodes
.getNodeAs
<VarDecl
>(PointerInitLHSTag
)),
1297 PtrInitRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerInitRHSTag
)) {}
1299 static bool classof(const Gadget
*G
) {
1300 return G
->getKind() == Kind::PointerInit
;
1303 static Matcher
matcher() {
1304 auto PtrInitStmt
= declStmt(hasSingleDecl(
1305 varDecl(hasInitializer(ignoringImpCasts(
1306 declRefExpr(hasPointerType(), toSupportedVariable())
1307 .bind(PointerInitRHSTag
))))
1308 .bind(PointerInitLHSTag
)));
1310 return stmt(PtrInitStmt
);
1313 virtual std::optional
<FixItList
>
1314 getFixits(const FixitStrategy
&S
) const override
;
1315 SourceLocation
getSourceLoc() const override
{
1316 return PtrInitRHS
->getBeginLoc();
1319 virtual DeclUseList
getClaimedVarUseSites() const override
{
1320 return DeclUseList
{PtrInitRHS
};
1323 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
1324 getStrategyImplications() const override
{
1325 return std::make_pair(PtrInitLHS
, cast
<VarDecl
>(PtrInitRHS
->getDecl()));
1329 /// A pointer assignment expression of the form:
1333 /// where both `p` and `q` are pointers.
1334 class PtrToPtrAssignmentGadget
: public FixableGadget
{
1336 static constexpr const char *const PointerAssignLHSTag
= "ptrLHS";
1337 static constexpr const char *const PointerAssignRHSTag
= "ptrRHS";
1338 const DeclRefExpr
*PtrLHS
; // the LHS pointer expression in `PA`
1339 const DeclRefExpr
*PtrRHS
; // the RHS pointer expression in `PA`
1342 PtrToPtrAssignmentGadget(const MatchFinder::MatchResult
&Result
)
1343 : FixableGadget(Kind::PtrToPtrAssignment
),
1344 PtrLHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignLHSTag
)),
1345 PtrRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignRHSTag
)) {}
1347 static bool classof(const Gadget
*G
) {
1348 return G
->getKind() == Kind::PtrToPtrAssignment
;
1351 static Matcher
matcher() {
1352 auto PtrAssignExpr
= binaryOperator(
1353 allOf(hasOperatorName("="),
1354 hasRHS(ignoringParenImpCasts(
1355 declRefExpr(hasPointerType(), toSupportedVariable())
1356 .bind(PointerAssignRHSTag
))),
1357 hasLHS(declRefExpr(hasPointerType(), toSupportedVariable())
1358 .bind(PointerAssignLHSTag
))));
1360 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr
));
1363 virtual std::optional
<FixItList
>
1364 getFixits(const FixitStrategy
&S
) const override
;
1365 SourceLocation
getSourceLoc() const override
{ return PtrLHS
->getBeginLoc(); }
1367 virtual DeclUseList
getClaimedVarUseSites() const override
{
1368 return DeclUseList
{PtrLHS
, PtrRHS
};
1371 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
1372 getStrategyImplications() const override
{
1373 return std::make_pair(cast
<VarDecl
>(PtrLHS
->getDecl()),
1374 cast
<VarDecl
>(PtrRHS
->getDecl()));
1378 /// An assignment expression of the form:
1382 /// where `p` is a pointer and `array` is a constant size array.
1383 class CArrayToPtrAssignmentGadget
: public FixableGadget
{
1385 static constexpr const char *const PointerAssignLHSTag
= "ptrLHS";
1386 static constexpr const char *const PointerAssignRHSTag
= "ptrRHS";
1387 const DeclRefExpr
*PtrLHS
; // the LHS pointer expression in `PA`
1388 const DeclRefExpr
*PtrRHS
; // the RHS pointer expression in `PA`
1391 CArrayToPtrAssignmentGadget(const MatchFinder::MatchResult
&Result
)
1392 : FixableGadget(Kind::CArrayToPtrAssignment
),
1393 PtrLHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignLHSTag
)),
1394 PtrRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignRHSTag
)) {}
1396 static bool classof(const Gadget
*G
) {
1397 return G
->getKind() == Kind::CArrayToPtrAssignment
;
1400 static Matcher
matcher() {
1401 auto PtrAssignExpr
= binaryOperator(
1402 allOf(hasOperatorName("="),
1403 hasRHS(ignoringParenImpCasts(
1404 declRefExpr(hasType(hasCanonicalType(constantArrayType())),
1405 toSupportedVariable())
1406 .bind(PointerAssignRHSTag
))),
1407 hasLHS(declRefExpr(hasPointerType(), toSupportedVariable())
1408 .bind(PointerAssignLHSTag
))));
1410 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr
));
1413 virtual std::optional
<FixItList
>
1414 getFixits(const FixitStrategy
&S
) const override
;
1415 SourceLocation
getSourceLoc() const override
{ return PtrLHS
->getBeginLoc(); }
1417 virtual DeclUseList
getClaimedVarUseSites() const override
{
1418 return DeclUseList
{PtrLHS
, PtrRHS
};
1421 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
1422 getStrategyImplications() const override
{
1427 /// A call of a function or method that performs unchecked buffer operations
1428 /// over one of its pointer parameters.
1429 class UnsafeBufferUsageAttrGadget
: public WarningGadget
{
1430 constexpr static const char *const OpTag
= "attr_expr";
1434 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult
&Result
)
1435 : WarningGadget(Kind::UnsafeBufferUsageAttr
),
1436 Op(Result
.Nodes
.getNodeAs
<Expr
>(OpTag
)) {}
1438 static bool classof(const Gadget
*G
) {
1439 return G
->getKind() == Kind::UnsafeBufferUsageAttr
;
1442 static Matcher
matcher() {
1443 auto HasUnsafeFieldDecl
=
1444 member(fieldDecl(hasAttr(attr::UnsafeBufferUsage
)));
1446 auto HasUnsafeFnDecl
=
1447 callee(functionDecl(hasAttr(attr::UnsafeBufferUsage
)));
1449 return stmt(anyOf(callExpr(HasUnsafeFnDecl
).bind(OpTag
),
1450 memberExpr(HasUnsafeFieldDecl
).bind(OpTag
)));
1453 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1454 bool IsRelatedToDecl
,
1455 ASTContext
&Ctx
) const override
{
1456 Handler
.handleUnsafeOperation(Op
, IsRelatedToDecl
, Ctx
);
1458 SourceLocation
getSourceLoc() const override
{ return Op
->getBeginLoc(); }
1460 DeclUseList
getClaimedVarUseSites() const override
{ return {}; }
1463 /// A call of a constructor that performs unchecked buffer operations
1464 /// over one of its pointer parameters, or constructs a class object that will
1465 /// perform buffer operations that depend on the correctness of the parameters.
1466 class UnsafeBufferUsageCtorAttrGadget
: public WarningGadget
{
1467 constexpr static const char *const OpTag
= "cxx_construct_expr";
1468 const CXXConstructExpr
*Op
;
1471 UnsafeBufferUsageCtorAttrGadget(const MatchFinder::MatchResult
&Result
)
1472 : WarningGadget(Kind::UnsafeBufferUsageCtorAttr
),
1473 Op(Result
.Nodes
.getNodeAs
<CXXConstructExpr
>(OpTag
)) {}
1475 static bool classof(const Gadget
*G
) {
1476 return G
->getKind() == Kind::UnsafeBufferUsageCtorAttr
;
1479 static Matcher
matcher() {
1480 auto HasUnsafeCtorDecl
=
1481 hasDeclaration(cxxConstructorDecl(hasAttr(attr::UnsafeBufferUsage
)));
1482 // std::span(ptr, size) ctor is handled by SpanTwoParamConstructorGadget.
1483 auto HasTwoParamSpanCtorDecl
= SpanTwoParamConstructorGadget::matcher();
1485 cxxConstructExpr(HasUnsafeCtorDecl
, unless(HasTwoParamSpanCtorDecl
))
1489 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1490 bool IsRelatedToDecl
,
1491 ASTContext
&Ctx
) const override
{
1492 Handler
.handleUnsafeOperation(Op
, IsRelatedToDecl
, Ctx
);
1494 SourceLocation
getSourceLoc() const override
{ return Op
->getBeginLoc(); }
1496 DeclUseList
getClaimedVarUseSites() const override
{ return {}; }
1499 // Warning gadget for unsafe invocation of span::data method.
1500 // Triggers when the pointer returned by the invocation is immediately
1501 // cast to a larger type.
1503 class DataInvocationGadget
: public WarningGadget
{
1504 constexpr static const char *const OpTag
= "data_invocation_expr";
1505 const ExplicitCastExpr
*Op
;
1508 DataInvocationGadget(const MatchFinder::MatchResult
&Result
)
1509 : WarningGadget(Kind::DataInvocation
),
1510 Op(Result
.Nodes
.getNodeAs
<ExplicitCastExpr
>(OpTag
)) {}
1512 static bool classof(const Gadget
*G
) {
1513 return G
->getKind() == Kind::DataInvocation
;
1516 static Matcher
matcher() {
1518 Matcher callExpr
= cxxMemberCallExpr(callee(
1519 cxxMethodDecl(hasName("data"),
1520 ofClass(anyOf(hasName("std::span"), hasName("std::array"),
1521 hasName("std::vector"))))));
1523 explicitCastExpr(anyOf(has(callExpr
), has(parenExpr(has(callExpr
)))))
1527 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1528 bool IsRelatedToDecl
,
1529 ASTContext
&Ctx
) const override
{
1530 Handler
.handleUnsafeOperation(Op
, IsRelatedToDecl
, Ctx
);
1532 SourceLocation
getSourceLoc() const override
{ return Op
->getBeginLoc(); }
1534 DeclUseList
getClaimedVarUseSites() const override
{ return {}; }
1537 class UnsafeLibcFunctionCallGadget
: public WarningGadget
{
1538 const CallExpr
*const Call
;
1539 const Expr
*UnsafeArg
= nullptr;
1540 constexpr static const char *const Tag
= "UnsafeLibcFunctionCall";
1541 // Extra tags for additional information:
1542 constexpr static const char *const UnsafeSprintfTag
=
1543 "UnsafeLibcFunctionCall_sprintf";
1544 constexpr static const char *const UnsafeSizedByTag
=
1545 "UnsafeLibcFunctionCall_sized_by";
1546 constexpr static const char *const UnsafeStringTag
=
1547 "UnsafeLibcFunctionCall_string";
1548 constexpr static const char *const UnsafeVaListTag
=
1549 "UnsafeLibcFunctionCall_va_list";
1552 OTHERS
= 0, // no specific information, the callee function is unsafe
1553 SPRINTF
= 1, // never call `-sprintf`s, call `-snprintf`s instead.
1555 2, // the first two arguments of `snprintf` function have
1556 // "__sized_by" relation but they do not conform to safe patterns
1557 STRING
= 3, // an argument is a pointer-to-char-as-string but does not
1558 // guarantee null-termination
1559 VA_LIST
= 4, // one of the `-printf`s function that take va_list, which is
1560 // considered unsafe as it is not compile-time check
1561 } WarnedFunKind
= OTHERS
;
1564 UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult
&Result
)
1565 : WarningGadget(Kind::UnsafeLibcFunctionCall
),
1566 Call(Result
.Nodes
.getNodeAs
<CallExpr
>(Tag
)) {
1567 if (Result
.Nodes
.getNodeAs
<Decl
>(UnsafeSprintfTag
))
1568 WarnedFunKind
= SPRINTF
;
1569 else if (auto *E
= Result
.Nodes
.getNodeAs
<Expr
>(UnsafeStringTag
)) {
1570 WarnedFunKind
= STRING
;
1572 } else if (Result
.Nodes
.getNodeAs
<CallExpr
>(UnsafeSizedByTag
)) {
1573 WarnedFunKind
= SIZED_BY
;
1574 UnsafeArg
= Call
->getArg(0);
1575 } else if (Result
.Nodes
.getNodeAs
<Decl
>(UnsafeVaListTag
))
1576 WarnedFunKind
= VA_LIST
;
1579 static Matcher
matcher(const UnsafeBufferUsageHandler
*Handler
) {
1580 return stmt(unless(ignoreUnsafeLibcCall(Handler
)),
1583 callee(functionDecl(anyOf(
1584 // Match a predefined unsafe libc
1586 functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()),
1587 // Match a call to one of the `v*printf` functions
1588 // taking va-list, which cannot be checked at
1590 functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc())
1591 .bind(UnsafeVaListTag
),
1592 // Match a call to a `sprintf` function, which is never
1594 functionDecl(libc_func_matchers::isUnsafeSprintfFunc())
1595 .bind(UnsafeSprintfTag
)))),
1596 // (unless the call has a sole string literal argument):
1598 allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))),
1600 // The following two cases require checking against actual
1601 // arguments of the call:
1603 // Match a call to an `snprintf` function. And first two
1604 // arguments of the call (that describe a buffer) are not in
1606 callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
1607 libc_func_matchers::hasUnsafeSnprintfBuffer())
1608 .bind(UnsafeSizedByTag
),
1609 // Match a call to a `printf` function, which can be safe if
1610 // all arguments are null-terminated:
1611 callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())),
1612 libc_func_matchers::hasUnsafePrintfStringArg(
1613 expr().bind(UnsafeStringTag
)))));
1616 const Stmt
*getBaseStmt() const { return Call
; }
1618 SourceLocation
getSourceLoc() const override
{ return Call
->getBeginLoc(); }
1620 void handleUnsafeOperation(UnsafeBufferUsageHandler
&Handler
,
1621 bool IsRelatedToDecl
,
1622 ASTContext
&Ctx
) const override
{
1623 Handler
.handleUnsafeLibcCall(Call
, WarnedFunKind
, Ctx
, UnsafeArg
);
1626 DeclUseList
getClaimedVarUseSites() const override
{ return {}; }
1629 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
1630 // Context (see `isInUnspecifiedLvalueContext`).
1631 // Note here `[]` is the built-in subscript operator.
1632 class ULCArraySubscriptGadget
: public FixableGadget
{
1634 static constexpr const char *const ULCArraySubscriptTag
=
1635 "ArraySubscriptUnderULC";
1636 const ArraySubscriptExpr
*Node
;
1639 ULCArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
1640 : FixableGadget(Kind::ULCArraySubscript
),
1641 Node(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ULCArraySubscriptTag
)) {
1642 assert(Node
!= nullptr && "Expecting a non-null matching result");
1645 static bool classof(const Gadget
*G
) {
1646 return G
->getKind() == Kind::ULCArraySubscript
;
1649 static Matcher
matcher() {
1650 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
1651 auto BaseIsArrayOrPtrDRE
= hasBase(
1652 ignoringParenImpCasts(declRefExpr(ArrayOrPtr
, toSupportedVariable())));
1654 arraySubscriptExpr(BaseIsArrayOrPtrDRE
).bind(ULCArraySubscriptTag
);
1656 return expr(isInUnspecifiedLvalueContext(Target
));
1659 virtual std::optional
<FixItList
>
1660 getFixits(const FixitStrategy
&S
) const override
;
1661 SourceLocation
getSourceLoc() const override
{ return Node
->getBeginLoc(); }
1663 virtual DeclUseList
getClaimedVarUseSites() const override
{
1664 if (const auto *DRE
=
1665 dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts())) {
1672 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
1673 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
1674 // fixit of the form `UPC(DRE.data())`.
1675 class UPCStandalonePointerGadget
: public FixableGadget
{
1677 static constexpr const char *const DeclRefExprTag
= "StandalonePointer";
1678 const DeclRefExpr
*Node
;
1681 UPCStandalonePointerGadget(const MatchFinder::MatchResult
&Result
)
1682 : FixableGadget(Kind::UPCStandalonePointer
),
1683 Node(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(DeclRefExprTag
)) {
1684 assert(Node
!= nullptr && "Expecting a non-null matching result");
1687 static bool classof(const Gadget
*G
) {
1688 return G
->getKind() == Kind::UPCStandalonePointer
;
1691 static Matcher
matcher() {
1692 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
1693 auto target
= expr(ignoringParenImpCasts(
1694 declRefExpr(allOf(ArrayOrPtr
, toSupportedVariable()))
1695 .bind(DeclRefExprTag
)));
1696 return stmt(isInUnspecifiedPointerContext(target
));
1699 virtual std::optional
<FixItList
>
1700 getFixits(const FixitStrategy
&S
) const override
;
1701 SourceLocation
getSourceLoc() const override
{ return Node
->getBeginLoc(); }
1703 virtual DeclUseList
getClaimedVarUseSites() const override
{ return {Node
}; }
1706 class PointerDereferenceGadget
: public FixableGadget
{
1707 static constexpr const char *const BaseDeclRefExprTag
= "BaseDRE";
1708 static constexpr const char *const OperatorTag
= "op";
1710 const DeclRefExpr
*BaseDeclRefExpr
= nullptr;
1711 const UnaryOperator
*Op
= nullptr;
1714 PointerDereferenceGadget(const MatchFinder::MatchResult
&Result
)
1715 : FixableGadget(Kind::PointerDereference
),
1717 Result
.Nodes
.getNodeAs
<DeclRefExpr
>(BaseDeclRefExprTag
)),
1718 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OperatorTag
)) {}
1720 static bool classof(const Gadget
*G
) {
1721 return G
->getKind() == Kind::PointerDereference
;
1724 static Matcher
matcher() {
1727 hasOperatorName("*"),
1728 has(expr(ignoringParenImpCasts(
1729 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag
)))))
1732 return expr(isInUnspecifiedLvalueContext(Target
));
1735 DeclUseList
getClaimedVarUseSites() const override
{
1736 return {BaseDeclRefExpr
};
1739 virtual std::optional
<FixItList
>
1740 getFixits(const FixitStrategy
&S
) const override
;
1741 SourceLocation
getSourceLoc() const override
{ return Op
->getBeginLoc(); }
1744 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
1745 // Context (see `isInUnspecifiedPointerContext`).
1746 // Note here `[]` is the built-in subscript operator.
1747 class UPCAddressofArraySubscriptGadget
: public FixableGadget
{
1749 static constexpr const char *const UPCAddressofArraySubscriptTag
=
1750 "AddressofArraySubscriptUnderUPC";
1751 const UnaryOperator
*Node
; // the `&DRE[any]` node
1754 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
1755 : FixableGadget(Kind::ULCArraySubscript
),
1756 Node(Result
.Nodes
.getNodeAs
<UnaryOperator
>(
1757 UPCAddressofArraySubscriptTag
)) {
1758 assert(Node
!= nullptr && "Expecting a non-null matching result");
1761 static bool classof(const Gadget
*G
) {
1762 return G
->getKind() == Kind::UPCAddressofArraySubscript
;
1765 static Matcher
matcher() {
1766 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1768 hasOperatorName("&"),
1769 hasUnaryOperand(arraySubscriptExpr(hasBase(
1770 ignoringParenImpCasts(declRefExpr(toSupportedVariable()))))))
1771 .bind(UPCAddressofArraySubscriptTag
)))));
1774 virtual std::optional
<FixItList
>
1775 getFixits(const FixitStrategy
&) const override
;
1776 SourceLocation
getSourceLoc() const override
{ return Node
->getBeginLoc(); }
1778 virtual DeclUseList
getClaimedVarUseSites() const override
{
1779 const auto *ArraySubst
= cast
<ArraySubscriptExpr
>(Node
->getSubExpr());
1781 cast
<DeclRefExpr
>(ArraySubst
->getBase()->IgnoreParenImpCasts());
1788 // An auxiliary tracking facility for the fixit analysis. It helps connect
1789 // declarations to its uses and make sure we've covered all uses with our
1790 // analysis before we try to fix the declaration.
1791 class DeclUseTracker
{
1792 using UseSetTy
= SmallSet
<const DeclRefExpr
*, 16>;
1793 using DefMapTy
= DenseMap
<const VarDecl
*, const DeclStmt
*>;
1795 // Allocate on the heap for easier move.
1796 std::unique_ptr
<UseSetTy
> Uses
{std::make_unique
<UseSetTy
>()};
1800 DeclUseTracker() = default;
1801 DeclUseTracker(const DeclUseTracker
&) = delete; // Let's avoid copies.
1802 DeclUseTracker
&operator=(const DeclUseTracker
&) = delete;
1803 DeclUseTracker(DeclUseTracker
&&) = default;
1804 DeclUseTracker
&operator=(DeclUseTracker
&&) = default;
1806 // Start tracking a freshly discovered DRE.
1807 void discoverUse(const DeclRefExpr
*DRE
) { Uses
->insert(DRE
); }
1809 // Stop tracking the DRE as it's been fully figured out.
1810 void claimUse(const DeclRefExpr
*DRE
) {
1811 assert(Uses
->count(DRE
) &&
1812 "DRE not found or claimed by multiple matchers!");
1816 // A variable is unclaimed if at least one use is unclaimed.
1817 bool hasUnclaimedUses(const VarDecl
*VD
) const {
1818 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
1819 return any_of(*Uses
, [VD
](const DeclRefExpr
*DRE
) {
1820 return DRE
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl();
1824 UseSetTy
getUnclaimedUses(const VarDecl
*VD
) const {
1826 for (auto use
: *Uses
) {
1827 if (use
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl()) {
1828 ReturnSet
.insert(use
);
1834 void discoverDecl(const DeclStmt
*DS
) {
1835 for (const Decl
*D
: DS
->decls()) {
1836 if (const auto *VD
= dyn_cast
<VarDecl
>(D
)) {
1837 // FIXME: Assertion temporarily disabled due to a bug in
1838 // ASTMatcher internal behavior in presence of GNU
1839 // statement-expressions. We need to properly investigate this
1840 // because it can screw up our algorithm in other ways.
1841 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
1847 const DeclStmt
*lookupDecl(const VarDecl
*VD
) const {
1848 return Defs
.lookup(VD
);
1853 // Representing a pointer type expression of the form `++Ptr` in an Unspecified
1854 // Pointer Context (UPC):
1855 class UPCPreIncrementGadget
: public FixableGadget
{
1857 static constexpr const char *const UPCPreIncrementTag
=
1858 "PointerPreIncrementUnderUPC";
1859 const UnaryOperator
*Node
; // the `++Ptr` node
1862 UPCPreIncrementGadget(const MatchFinder::MatchResult
&Result
)
1863 : FixableGadget(Kind::UPCPreIncrement
),
1864 Node(Result
.Nodes
.getNodeAs
<UnaryOperator
>(UPCPreIncrementTag
)) {
1865 assert(Node
!= nullptr && "Expecting a non-null matching result");
1868 static bool classof(const Gadget
*G
) {
1869 return G
->getKind() == Kind::UPCPreIncrement
;
1872 static Matcher
matcher() {
1873 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1874 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1875 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1877 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1878 unaryOperator(isPreInc(),
1879 hasUnaryOperand(declRefExpr(toSupportedVariable())))
1880 .bind(UPCPreIncrementTag
)))));
1883 virtual std::optional
<FixItList
>
1884 getFixits(const FixitStrategy
&S
) const override
;
1885 SourceLocation
getSourceLoc() const override
{ return Node
->getBeginLoc(); }
1887 virtual DeclUseList
getClaimedVarUseSites() const override
{
1888 return {dyn_cast
<DeclRefExpr
>(Node
->getSubExpr())};
1892 // Representing a pointer type expression of the form `Ptr += n` in an
1893 // Unspecified Untyped Context (UUC):
1894 class UUCAddAssignGadget
: public FixableGadget
{
1896 static constexpr const char *const UUCAddAssignTag
=
1897 "PointerAddAssignUnderUUC";
1898 static constexpr const char *const OffsetTag
= "Offset";
1900 const BinaryOperator
*Node
; // the `Ptr += n` node
1901 const Expr
*Offset
= nullptr;
1904 UUCAddAssignGadget(const MatchFinder::MatchResult
&Result
)
1905 : FixableGadget(Kind::UUCAddAssign
),
1906 Node(Result
.Nodes
.getNodeAs
<BinaryOperator
>(UUCAddAssignTag
)),
1907 Offset(Result
.Nodes
.getNodeAs
<Expr
>(OffsetTag
)) {
1908 assert(Node
!= nullptr && "Expecting a non-null matching result");
1911 static bool classof(const Gadget
*G
) {
1912 return G
->getKind() == Kind::UUCAddAssign
;
1915 static Matcher
matcher() {
1917 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts(
1918 binaryOperator(hasOperatorName("+="),
1922 toSupportedVariable())),
1923 hasRHS(expr().bind(OffsetTag
)))
1924 .bind(UUCAddAssignTag
)))));
1928 virtual std::optional
<FixItList
>
1929 getFixits(const FixitStrategy
&S
) const override
;
1930 SourceLocation
getSourceLoc() const override
{ return Node
->getBeginLoc(); }
1932 virtual DeclUseList
getClaimedVarUseSites() const override
{
1933 return {dyn_cast
<DeclRefExpr
>(Node
->getLHS())};
1937 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1939 class DerefSimplePtrArithFixableGadget
: public FixableGadget
{
1940 static constexpr const char *const BaseDeclRefExprTag
= "BaseDRE";
1941 static constexpr const char *const DerefOpTag
= "DerefOp";
1942 static constexpr const char *const AddOpTag
= "AddOp";
1943 static constexpr const char *const OffsetTag
= "Offset";
1945 const DeclRefExpr
*BaseDeclRefExpr
= nullptr;
1946 const UnaryOperator
*DerefOp
= nullptr;
1947 const BinaryOperator
*AddOp
= nullptr;
1948 const IntegerLiteral
*Offset
= nullptr;
1951 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult
&Result
)
1952 : FixableGadget(Kind::DerefSimplePtrArithFixable
),
1954 Result
.Nodes
.getNodeAs
<DeclRefExpr
>(BaseDeclRefExprTag
)),
1955 DerefOp(Result
.Nodes
.getNodeAs
<UnaryOperator
>(DerefOpTag
)),
1956 AddOp(Result
.Nodes
.getNodeAs
<BinaryOperator
>(AddOpTag
)),
1957 Offset(Result
.Nodes
.getNodeAs
<IntegerLiteral
>(OffsetTag
)) {}
1959 static Matcher
matcher() {
1961 auto ThePtr
= expr(hasPointerType(),
1962 ignoringImpCasts(declRefExpr(toSupportedVariable()).
1963 bind(BaseDeclRefExprTag
)));
1964 auto PlusOverPtrAndInteger
= expr(anyOf(
1965 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr
),
1966 hasRHS(integerLiteral().bind(OffsetTag
)))
1968 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr
),
1969 hasLHS(integerLiteral().bind(OffsetTag
)))
1971 return isInUnspecifiedLvalueContext(unaryOperator(
1972 hasOperatorName("*"),
1973 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger
)))
1978 virtual std::optional
<FixItList
>
1979 getFixits(const FixitStrategy
&s
) const final
;
1980 SourceLocation
getSourceLoc() const override
{
1981 return DerefOp
->getBeginLoc();
1984 virtual DeclUseList
getClaimedVarUseSites() const final
{
1985 return {BaseDeclRefExpr
};
1989 /// Scan the function and return a list of gadgets found with provided kits.
1990 static std::tuple
<FixableGadgetList
, WarningGadgetList
, DeclUseTracker
>
1991 findGadgets(const Decl
*D
, const UnsafeBufferUsageHandler
&Handler
,
1992 bool EmitSuggestions
) {
1994 struct GadgetFinderCallback
: MatchFinder::MatchCallback
{
1995 FixableGadgetList FixableGadgets
;
1996 WarningGadgetList WarningGadgets
;
1997 DeclUseTracker Tracker
;
1999 void run(const MatchFinder::MatchResult
&Result
) override
{
2000 // In debug mode, assert that we've found exactly one gadget.
2001 // This helps us avoid conflicts in .bind() tags.
2005 [[maybe_unused
]] int numFound
= 0;
2006 #define NEXT ++numFound
2009 if (const auto *DRE
= Result
.Nodes
.getNodeAs
<DeclRefExpr
>("any_dre")) {
2010 Tracker
.discoverUse(DRE
);
2014 if (const auto *DS
= Result
.Nodes
.getNodeAs
<DeclStmt
>("any_ds")) {
2015 Tracker
.discoverDecl(DS
);
2019 // Figure out which matcher we've found, and call the appropriate
2020 // subclass constructor.
2021 // FIXME: Can we do this more logarithmically?
2022 #define FIXABLE_GADGET(name) \
2023 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
2024 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2027 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2028 #define WARNING_GADGET(name) \
2029 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
2030 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2033 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2035 assert(numFound
>= 1 && "Gadgets not found in match result!");
2036 assert(numFound
<= 1 && "Conflicting bind tags in gadgets!");
2041 GadgetFinderCallback CB
;
2046 forEachDescendantEvaluatedStmt(stmt(anyOf(
2047 // Add Gadget::matcher() for every gadget in the registry.
2048 #define WARNING_GADGET(x) \
2049 allOf(x ## Gadget::matcher().bind(#x), \
2050 notInSafeBufferOptOut(&Handler)),
2051 #define WARNING_OPTIONAL_GADGET(x) \
2052 allOf(x ## Gadget::matcher(&Handler).bind(#x), \
2053 notInSafeBufferOptOut(&Handler)),
2054 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2055 // Avoid a hanging comma.
2063 if (EmitSuggestions
) {
2067 forEachDescendantStmt(stmt(eachOf(
2068 #define FIXABLE_GADGET(x) \
2069 x ## Gadget::matcher().bind(#x),
2070 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2071 // In parallel, match all DeclRefExprs so that to find out
2072 // whether there are any uncovered by gadgets.
2073 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
2074 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"),
2075 // Also match DeclStmts because we'll need them when fixing
2076 // their underlying VarDecls that otherwise don't have
2077 // any backreferences to DeclStmts.
2078 declStmt().bind("any_ds")
2086 M
.match(*D
->getBody(), D
->getASTContext());
2087 return {std::move(CB
.FixableGadgets
), std::move(CB
.WarningGadgets
),
2088 std::move(CB
.Tracker
)};
2091 // Compares AST nodes by source locations.
2092 template <typename NodeTy
> struct CompareNode
{
2093 bool operator()(const NodeTy
*N1
, const NodeTy
*N2
) const {
2094 return N1
->getBeginLoc().getRawEncoding() <
2095 N2
->getBeginLoc().getRawEncoding();
2099 struct WarningGadgetSets
{
2100 std::map
<const VarDecl
*, std::set
<const WarningGadget
*>,
2101 // To keep keys sorted by their locations in the map so that the
2102 // order is deterministic:
2103 CompareNode
<VarDecl
>>
2105 // These Gadgets are not related to pointer variables (e. g. temporaries).
2106 llvm::SmallVector
<const WarningGadget
*, 16> noVar
;
2109 static WarningGadgetSets
2110 groupWarningGadgetsByVar(const WarningGadgetList
&AllUnsafeOperations
) {
2111 WarningGadgetSets result
;
2112 // If some gadgets cover more than one
2113 // variable, they'll appear more than once in the map.
2114 for (auto &G
: AllUnsafeOperations
) {
2115 DeclUseList ClaimedVarUseSites
= G
->getClaimedVarUseSites();
2117 bool AssociatedWithVarDecl
= false;
2118 for (const DeclRefExpr
*DRE
: ClaimedVarUseSites
) {
2119 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
2120 result
.byVar
[VD
].insert(G
.get());
2121 AssociatedWithVarDecl
= true;
2125 if (!AssociatedWithVarDecl
) {
2126 result
.noVar
.push_back(G
.get());
2133 struct FixableGadgetSets
{
2134 std::map
<const VarDecl
*, std::set
<const FixableGadget
*>,
2135 // To keep keys sorted by their locations in the map so that the
2136 // order is deterministic:
2137 CompareNode
<VarDecl
>>
2141 static FixableGadgetSets
2142 groupFixablesByVar(FixableGadgetList
&&AllFixableOperations
) {
2143 FixableGadgetSets FixablesForUnsafeVars
;
2144 for (auto &F
: AllFixableOperations
) {
2145 DeclUseList DREs
= F
->getClaimedVarUseSites();
2147 for (const DeclRefExpr
*DRE
: DREs
) {
2148 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
2149 FixablesForUnsafeVars
.byVar
[VD
].insert(F
.get());
2153 return FixablesForUnsafeVars
;
2156 bool clang::internal::anyConflict(const SmallVectorImpl
<FixItHint
> &FixIts
,
2157 const SourceManager
&SM
) {
2158 // A simple interval overlap detection algorithm. Sorts all ranges by their
2159 // begin location then finds the first overlap in one pass.
2160 std::vector
<const FixItHint
*> All
; // a copy of `FixIts`
2162 for (const FixItHint
&H
: FixIts
)
2164 std::sort(All
.begin(), All
.end(),
2165 [&SM
](const FixItHint
*H1
, const FixItHint
*H2
) {
2166 return SM
.isBeforeInTranslationUnit(H1
->RemoveRange
.getBegin(),
2167 H2
->RemoveRange
.getBegin());
2170 const FixItHint
*CurrHint
= nullptr;
2172 for (const FixItHint
*Hint
: All
) {
2174 SM
.isBeforeInTranslationUnit(CurrHint
->RemoveRange
.getEnd(),
2175 Hint
->RemoveRange
.getBegin())) {
2176 // Either to initialize `CurrHint` or `CurrHint` does not
2177 // overlap with `Hint`:
2180 // In case `Hint` overlaps the `CurrHint`, we found at least one
2187 std::optional
<FixItList
>
2188 PtrToPtrAssignmentGadget::getFixits(const FixitStrategy
&S
) const {
2189 const auto *LeftVD
= cast
<VarDecl
>(PtrLHS
->getDecl());
2190 const auto *RightVD
= cast
<VarDecl
>(PtrRHS
->getDecl());
2191 switch (S
.lookup(LeftVD
)) {
2192 case FixitStrategy::Kind::Span
:
2193 if (S
.lookup(RightVD
) == FixitStrategy::Kind::Span
)
2195 return std::nullopt
;
2196 case FixitStrategy::Kind::Wontfix
:
2197 return std::nullopt
;
2198 case FixitStrategy::Kind::Iterator
:
2199 case FixitStrategy::Kind::Array
:
2200 return std::nullopt
;
2201 case FixitStrategy::Kind::Vector
:
2202 llvm_unreachable("unsupported strategies for FixableGadgets");
2204 return std::nullopt
;
2207 /// \returns fixit that adds .data() call after \DRE.
2208 static inline std::optional
<FixItList
> createDataFixit(const ASTContext
&Ctx
,
2209 const DeclRefExpr
*DRE
);
2211 std::optional
<FixItList
>
2212 CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy
&S
) const {
2213 const auto *LeftVD
= cast
<VarDecl
>(PtrLHS
->getDecl());
2214 const auto *RightVD
= cast
<VarDecl
>(PtrRHS
->getDecl());
2215 // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is
2218 // CArrayToPtrAssignmentGadget doesn't have strategy implications because
2219 // constant size array propagates its bounds. Because of that LHS and RHS are
2220 // addressed by two different fixits.
2222 // At the same time FixitStrategy S doesn't reflect what group a fixit belongs
2223 // to and can't be generally relied on in multi-variable Fixables!
2225 // E. g. If an instance of this gadget is fixing variable on LHS then the
2226 // variable on RHS is fixed by a different fixit and its strategy for LHS
2227 // fixit is as if Wontfix.
2229 // The only exception is Wontfix strategy for a given variable as that is
2230 // valid for any fixit produced for the given input source code.
2231 if (S
.lookup(LeftVD
) == FixitStrategy::Kind::Span
) {
2232 if (S
.lookup(RightVD
) == FixitStrategy::Kind::Wontfix
) {
2235 } else if (S
.lookup(LeftVD
) == FixitStrategy::Kind::Wontfix
) {
2236 if (S
.lookup(RightVD
) == FixitStrategy::Kind::Array
) {
2237 return createDataFixit(RightVD
->getASTContext(), PtrRHS
);
2240 return std::nullopt
;
2243 std::optional
<FixItList
>
2244 PointerInitGadget::getFixits(const FixitStrategy
&S
) const {
2245 const auto *LeftVD
= PtrInitLHS
;
2246 const auto *RightVD
= cast
<VarDecl
>(PtrInitRHS
->getDecl());
2247 switch (S
.lookup(LeftVD
)) {
2248 case FixitStrategy::Kind::Span
:
2249 if (S
.lookup(RightVD
) == FixitStrategy::Kind::Span
)
2251 return std::nullopt
;
2252 case FixitStrategy::Kind::Wontfix
:
2253 return std::nullopt
;
2254 case FixitStrategy::Kind::Iterator
:
2255 case FixitStrategy::Kind::Array
:
2256 return std::nullopt
;
2257 case FixitStrategy::Kind::Vector
:
2258 llvm_unreachable("unsupported strategies for FixableGadgets");
2260 return std::nullopt
;
2263 static bool isNonNegativeIntegerExpr(const Expr
*Expr
, const VarDecl
*VD
,
2264 const ASTContext
&Ctx
) {
2265 if (auto ConstVal
= Expr
->getIntegerConstantExpr(Ctx
)) {
2266 if (ConstVal
->isNegative())
2268 } else if (!Expr
->getType()->isUnsignedIntegerType())
2273 std::optional
<FixItList
>
2274 ULCArraySubscriptGadget::getFixits(const FixitStrategy
&S
) const {
2275 if (const auto *DRE
=
2276 dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts()))
2277 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
2278 switch (S
.lookup(VD
)) {
2279 case FixitStrategy::Kind::Span
: {
2281 // If the index has a negative constant value, we give up as no valid
2282 // fix-it can be generated:
2283 const ASTContext
&Ctx
= // FIXME: we need ASTContext to be passed in!
2284 VD
->getASTContext();
2285 if (!isNonNegativeIntegerExpr(Node
->getIdx(), VD
, Ctx
))
2286 return std::nullopt
;
2287 // no-op is a good fix-it, otherwise
2290 case FixitStrategy::Kind::Array
:
2292 case FixitStrategy::Kind::Wontfix
:
2293 case FixitStrategy::Kind::Iterator
:
2294 case FixitStrategy::Kind::Vector
:
2295 llvm_unreachable("unsupported strategies for FixableGadgets");
2298 return std::nullopt
;
2301 static std::optional
<FixItList
> // forward declaration
2302 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator
*Node
);
2304 std::optional
<FixItList
>
2305 UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy
&S
) const {
2306 auto DREs
= getClaimedVarUseSites();
2307 const auto *VD
= cast
<VarDecl
>(DREs
.front()->getDecl());
2309 switch (S
.lookup(VD
)) {
2310 case FixitStrategy::Kind::Span
:
2311 return fixUPCAddressofArraySubscriptWithSpan(Node
);
2312 case FixitStrategy::Kind::Wontfix
:
2313 case FixitStrategy::Kind::Iterator
:
2314 case FixitStrategy::Kind::Array
:
2315 return std::nullopt
;
2316 case FixitStrategy::Kind::Vector
:
2317 llvm_unreachable("unsupported strategies for FixableGadgets");
2319 return std::nullopt
; // something went wrong, no fix-it
2322 // FIXME: this function should be customizable through format
2323 static StringRef
getEndOfLine() {
2324 static const char *const EOL
= "\n";
2328 // Returns the text indicating that the user needs to provide input there:
2330 getUserFillPlaceHolder(StringRef HintTextToUser
= "placeholder") {
2331 std::string s
= std::string("<# ");
2332 s
+= HintTextToUser
;
2337 // Return the source location of the last character of the AST `Node`.
2338 template <typename NodeTy
>
2339 static std::optional
<SourceLocation
>
2340 getEndCharLoc(const NodeTy
*Node
, const SourceManager
&SM
,
2341 const LangOptions
&LangOpts
) {
2342 unsigned TkLen
= Lexer::MeasureTokenLength(Node
->getEndLoc(), SM
, LangOpts
);
2343 SourceLocation Loc
= Node
->getEndLoc().getLocWithOffset(TkLen
- 1);
2348 return std::nullopt
;
2351 // Return the source location just past the last character of the AST `Node`.
2352 template <typename NodeTy
>
2353 static std::optional
<SourceLocation
> getPastLoc(const NodeTy
*Node
,
2354 const SourceManager
&SM
,
2355 const LangOptions
&LangOpts
) {
2356 SourceLocation Loc
=
2357 Lexer::getLocForEndOfToken(Node
->getEndLoc(), 0, SM
, LangOpts
);
2360 return std::nullopt
;
2363 // Return text representation of an `Expr`.
2364 static std::optional
<StringRef
> getExprText(const Expr
*E
,
2365 const SourceManager
&SM
,
2366 const LangOptions
&LangOpts
) {
2367 std::optional
<SourceLocation
> LastCharLoc
= getPastLoc(E
, SM
, LangOpts
);
2370 return Lexer::getSourceText(
2371 CharSourceRange::getCharRange(E
->getBeginLoc(), *LastCharLoc
), SM
,
2374 return std::nullopt
;
2377 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
2378 static std::optional
<StringRef
> getRangeText(SourceRange SR
,
2379 const SourceManager
&SM
,
2380 const LangOptions
&LangOpts
) {
2381 bool Invalid
= false;
2382 CharSourceRange CSR
= CharSourceRange::getCharRange(SR
);
2383 StringRef Text
= Lexer::getSourceText(CSR
, SM
, LangOpts
, &Invalid
);
2387 return std::nullopt
;
2390 // Returns the begin location of the identifier of the given variable
2392 static SourceLocation
getVarDeclIdentifierLoc(const VarDecl
*VD
) {
2393 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
2394 // returns the begin location of the identifier of the declaration:
2395 return VD
->getLocation();
2398 // Returns the literal text of the identifier of the given variable declaration.
2399 static std::optional
<StringRef
>
2400 getVarDeclIdentifierText(const VarDecl
*VD
, const SourceManager
&SM
,
2401 const LangOptions
&LangOpts
) {
2402 SourceLocation ParmIdentBeginLoc
= getVarDeclIdentifierLoc(VD
);
2403 SourceLocation ParmIdentEndLoc
=
2404 Lexer::getLocForEndOfToken(ParmIdentBeginLoc
, 0, SM
, LangOpts
);
2406 if (ParmIdentEndLoc
.isMacroID() &&
2407 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc
, SM
, LangOpts
))
2408 return std::nullopt
;
2409 return getRangeText({ParmIdentBeginLoc
, ParmIdentEndLoc
}, SM
, LangOpts
);
2412 // We cannot fix a variable declaration if it has some other specifiers than the
2413 // type specifier. Because the source ranges of those specifiers could overlap
2414 // with the source range that is being replaced using fix-its. Especially when
2415 // we often cannot obtain accurate source ranges of cv-qualified type
2417 // FIXME: also deal with type attributes
2418 static bool hasUnsupportedSpecifiers(const VarDecl
*VD
,
2419 const SourceManager
&SM
) {
2420 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
2421 // source range of `VD`:
2422 bool AttrRangeOverlapping
= llvm::any_of(VD
->attrs(), [&](Attr
*At
) -> bool {
2423 return !(SM
.isBeforeInTranslationUnit(At
->getRange().getEnd(),
2424 VD
->getBeginLoc())) &&
2425 !(SM
.isBeforeInTranslationUnit(VD
->getEndLoc(),
2426 At
->getRange().getBegin()));
2428 return VD
->isInlineSpecified() || VD
->isConstexpr() ||
2429 VD
->hasConstantInitialization() || !VD
->hasLocalStorage() ||
2430 AttrRangeOverlapping
;
2433 // Returns the `SourceRange` of `D`. The reason why this function exists is
2434 // that `D->getSourceRange()` may return a range where the end location is the
2435 // starting location of the last token. The end location of the source range
2436 // returned by this function is the last location of the last token.
2437 static SourceRange
getSourceRangeToTokenEnd(const Decl
*D
,
2438 const SourceManager
&SM
,
2439 const LangOptions
&LangOpts
) {
2440 SourceLocation Begin
= D
->getBeginLoc();
2442 End
= // `D->getEndLoc` should always return the starting location of the
2443 // last token, so we should get the end of the token
2444 Lexer::getLocForEndOfToken(D
->getEndLoc(), 0, SM
, LangOpts
);
2446 return SourceRange(Begin
, End
);
2449 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
2450 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
2451 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
2452 // :( ), `Qualifiers` of the pointee type is returned separately through the
2453 // output parameter `QualifiersToAppend`.
2454 static std::optional
<std::string
>
2455 getPointeeTypeText(const VarDecl
*VD
, const SourceManager
&SM
,
2456 const LangOptions
&LangOpts
,
2457 std::optional
<Qualifiers
> *QualifiersToAppend
) {
2458 QualType Ty
= VD
->getType();
2461 assert(Ty
->isPointerType() && !Ty
->isFunctionPointerType() &&
2462 "Expecting a VarDecl of type of pointer to object type");
2463 PteTy
= Ty
->getPointeeType();
2465 TypeLoc TyLoc
= VD
->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
2468 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
2469 // the `TypeLoc` of the pointee type:
2470 switch (TyLoc
.getTypeLocClass()) {
2471 case TypeLoc::ConstantArray
:
2472 case TypeLoc::IncompleteArray
:
2473 case TypeLoc::VariableArray
:
2474 case TypeLoc::DependentSizedArray
:
2475 case TypeLoc::Decayed
:
2476 assert(isa
<ParmVarDecl
>(VD
) && "An array type shall not be treated as a "
2477 "pointer type unless it decays.");
2478 PteTyLoc
= TyLoc
.getNextTypeLoc();
2480 case TypeLoc::Pointer
:
2481 PteTyLoc
= TyLoc
.castAs
<PointerTypeLoc
>().getPointeeLoc();
2484 return std::nullopt
;
2486 if (PteTyLoc
.isNull())
2487 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
2488 // when the pointer type is `auto`.
2489 return std::nullopt
;
2491 SourceLocation IdentLoc
= getVarDeclIdentifierLoc(VD
);
2493 if (!(IdentLoc
.isValid() && PteTyLoc
.getSourceRange().isValid())) {
2494 // We are expecting these locations to be valid. But in some cases, they are
2495 // not all valid. It is a Clang bug to me and we are not responsible for
2496 // fixing it. So we will just give up for now when it happens.
2497 return std::nullopt
;
2500 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
2501 SourceLocation PteEndOfTokenLoc
=
2502 Lexer::getLocForEndOfToken(PteTyLoc
.getEndLoc(), 0, SM
, LangOpts
);
2504 if (!PteEndOfTokenLoc
.isValid())
2505 // Sometimes we cannot get the end location of the pointee type, e.g., when
2506 // there are macros involved.
2507 return std::nullopt
;
2508 if (!SM
.isBeforeInTranslationUnit(PteEndOfTokenLoc
, IdentLoc
)) {
2509 // We only deal with the cases where the source text of the pointee type
2510 // appears on the left-hand side of the variable identifier completely,
2511 // including the following forms:
2513 // `T ident[]`, where `T` is any type.
2514 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
2515 return std::nullopt
;
2517 if (PteTy
.hasQualifiers()) {
2518 // TypeLoc does not provide source ranges for qualifiers (it says it's
2519 // intentional but seems fishy to me), so we cannot get the full text
2520 // `PteTy` via source ranges.
2521 *QualifiersToAppend
= PteTy
.getQualifiers();
2523 return getRangeText({PteTyLoc
.getBeginLoc(), PteEndOfTokenLoc
}, SM
, LangOpts
)
2527 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
2528 static std::optional
<StringRef
> getFunNameText(const FunctionDecl
*FD
,
2529 const SourceManager
&SM
,
2530 const LangOptions
&LangOpts
) {
2531 SourceLocation BeginLoc
= FD
->getQualifier()
2532 ? FD
->getQualifierLoc().getBeginLoc()
2533 : FD
->getNameInfo().getBeginLoc();
2534 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
2536 SourceLocation EndLoc
= Lexer::getLocForEndOfToken(
2537 FD
->getNameInfo().getEndLoc(), 0, SM
, LangOpts
);
2538 SourceRange NameRange
{BeginLoc
, EndLoc
};
2540 return getRangeText(NameRange
, SM
, LangOpts
);
2543 // Returns the text representing a `std::span` type where the element type is
2544 // represented by `EltTyText`.
2546 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers
2547 // explicitly if the element type needs to be qualified.
2549 getSpanTypeText(StringRef EltTyText
,
2550 std::optional
<Qualifiers
> Quals
= std::nullopt
) {
2551 const char *const SpanOpen
= "std::span<";
2554 return SpanOpen
+ EltTyText
.str() + ' ' + Quals
->getAsString() + '>';
2555 return SpanOpen
+ EltTyText
.str() + '>';
2558 std::optional
<FixItList
>
2559 DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy
&s
) const {
2560 const VarDecl
*VD
= dyn_cast
<VarDecl
>(BaseDeclRefExpr
->getDecl());
2562 if (VD
&& s
.lookup(VD
) == FixitStrategy::Kind::Span
) {
2563 ASTContext
&Ctx
= VD
->getASTContext();
2564 // std::span can't represent elements before its begin()
2565 if (auto ConstVal
= Offset
->getIntegerConstantExpr(Ctx
))
2566 if (ConstVal
->isNegative())
2567 return std::nullopt
;
2569 // note that the expr may (oddly) has multiple layers of parens
2571 // *((..(pointer + 123)..))
2576 // replace ' + ' with '['
2577 // replace ')' with ']'
2580 // *((..(123 + pointer)..))
2585 // replace ' + ' with '['
2586 // replace ')' with ']'
2588 const Expr
*LHS
= AddOp
->getLHS(), *RHS
= AddOp
->getRHS();
2589 const SourceManager
&SM
= Ctx
.getSourceManager();
2590 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
2591 CharSourceRange StarWithTrailWhitespace
=
2592 clang::CharSourceRange::getCharRange(DerefOp
->getOperatorLoc(),
2593 LHS
->getBeginLoc());
2595 std::optional
<SourceLocation
> LHSLocation
= getPastLoc(LHS
, SM
, LangOpts
);
2597 return std::nullopt
;
2599 CharSourceRange PlusWithSurroundingWhitespace
=
2600 clang::CharSourceRange::getCharRange(*LHSLocation
, RHS
->getBeginLoc());
2602 std::optional
<SourceLocation
> AddOpLocation
=
2603 getPastLoc(AddOp
, SM
, LangOpts
);
2604 std::optional
<SourceLocation
> DerefOpLocation
=
2605 getPastLoc(DerefOp
, SM
, LangOpts
);
2607 if (!AddOpLocation
|| !DerefOpLocation
)
2608 return std::nullopt
;
2610 CharSourceRange ClosingParenWithPrecWhitespace
=
2611 clang::CharSourceRange::getCharRange(*AddOpLocation
, *DerefOpLocation
);
2614 {FixItHint::CreateRemoval(StarWithTrailWhitespace
),
2615 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace
, "["),
2616 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace
, "]")}};
2618 return std::nullopt
; // something wrong or unsupported, give up
2621 std::optional
<FixItList
>
2622 PointerDereferenceGadget::getFixits(const FixitStrategy
&S
) const {
2623 const VarDecl
*VD
= cast
<VarDecl
>(BaseDeclRefExpr
->getDecl());
2624 switch (S
.lookup(VD
)) {
2625 case FixitStrategy::Kind::Span
: {
2626 ASTContext
&Ctx
= VD
->getASTContext();
2627 SourceManager
&SM
= Ctx
.getSourceManager();
2628 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
2629 // Deletes the *operand
2630 CharSourceRange derefRange
= clang::CharSourceRange::getCharRange(
2631 Op
->getBeginLoc(), Op
->getBeginLoc().getLocWithOffset(1));
2633 if (auto LocPastOperand
=
2634 getPastLoc(BaseDeclRefExpr
, SM
, Ctx
.getLangOpts())) {
2635 return FixItList
{{FixItHint::CreateRemoval(derefRange
),
2636 FixItHint::CreateInsertion(*LocPastOperand
, "[0]")}};
2640 case FixitStrategy::Kind::Iterator
:
2641 case FixitStrategy::Kind::Array
:
2642 return std::nullopt
;
2643 case FixitStrategy::Kind::Vector
:
2644 llvm_unreachable("FixitStrategy not implemented yet!");
2645 case FixitStrategy::Kind::Wontfix
:
2646 llvm_unreachable("Invalid strategy!");
2649 return std::nullopt
;
2652 static inline std::optional
<FixItList
> createDataFixit(const ASTContext
&Ctx
,
2653 const DeclRefExpr
*DRE
) {
2654 const SourceManager
&SM
= Ctx
.getSourceManager();
2655 // Inserts the .data() after the DRE
2656 std::optional
<SourceLocation
> EndOfOperand
=
2657 getPastLoc(DRE
, SM
, Ctx
.getLangOpts());
2660 return FixItList
{{FixItHint::CreateInsertion(*EndOfOperand
, ".data()")}};
2662 return std::nullopt
;
2665 // Generates fix-its replacing an expression of the form UPC(DRE) with
2667 std::optional
<FixItList
>
2668 UPCStandalonePointerGadget::getFixits(const FixitStrategy
&S
) const {
2669 const auto VD
= cast
<VarDecl
>(Node
->getDecl());
2670 switch (S
.lookup(VD
)) {
2671 case FixitStrategy::Kind::Array
:
2672 case FixitStrategy::Kind::Span
: {
2673 return createDataFixit(VD
->getASTContext(), Node
);
2674 // FIXME: Points inside a macro expansion.
2677 case FixitStrategy::Kind::Wontfix
:
2678 case FixitStrategy::Kind::Iterator
:
2679 return std::nullopt
;
2680 case FixitStrategy::Kind::Vector
:
2681 llvm_unreachable("unsupported strategies for FixableGadgets");
2684 return std::nullopt
;
2687 // Generates fix-its replacing an expression of the form `&DRE[e]` with
2688 // `&DRE.data()[e]`:
2689 static std::optional
<FixItList
>
2690 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator
*Node
) {
2691 const auto *ArraySub
= cast
<ArraySubscriptExpr
>(Node
->getSubExpr());
2692 const auto *DRE
= cast
<DeclRefExpr
>(ArraySub
->getBase()->IgnoreImpCasts());
2693 // FIXME: this `getASTContext` call is costly, we should pass the
2695 const ASTContext
&Ctx
= DRE
->getDecl()->getASTContext();
2696 const Expr
*Idx
= ArraySub
->getIdx();
2697 const SourceManager
&SM
= Ctx
.getSourceManager();
2698 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
2699 std::stringstream SS
;
2700 bool IdxIsLitZero
= false;
2702 if (auto ICE
= Idx
->getIntegerConstantExpr(Ctx
))
2703 if ((*ICE
).isZero())
2704 IdxIsLitZero
= true;
2705 std::optional
<StringRef
> DreString
= getExprText(DRE
, SM
, LangOpts
);
2707 return std::nullopt
;
2710 // If the index is literal zero, we produce the most concise fix-it:
2711 SS
<< (*DreString
).str() << ".data()";
2713 std::optional
<StringRef
> IndexString
= getExprText(Idx
, SM
, LangOpts
);
2715 return std::nullopt
;
2717 SS
<< "&" << (*DreString
).str() << ".data()"
2718 << "[" << (*IndexString
).str() << "]";
2721 FixItHint::CreateReplacement(Node
->getSourceRange(), SS
.str())};
2724 std::optional
<FixItList
>
2725 UUCAddAssignGadget::getFixits(const FixitStrategy
&S
) const {
2726 DeclUseList DREs
= getClaimedVarUseSites();
2728 if (DREs
.size() != 1)
2729 return std::nullopt
; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
2731 if (const VarDecl
*VD
= dyn_cast
<VarDecl
>(DREs
.front()->getDecl())) {
2732 if (S
.lookup(VD
) == FixitStrategy::Kind::Span
) {
2735 const Stmt
*AddAssignNode
= Node
;
2736 StringRef varName
= VD
->getName();
2737 const ASTContext
&Ctx
= VD
->getASTContext();
2739 if (!isNonNegativeIntegerExpr(Offset
, VD
, Ctx
))
2740 return std::nullopt
;
2742 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
2744 (Offset
->IgnoreParens()->getBeginLoc() == Offset
->getBeginLoc());
2745 std::string SS
= varName
.str() + " = " + varName
.str() + ".subspan";
2749 std::optional
<SourceLocation
> AddAssignLocation
= getEndCharLoc(
2750 AddAssignNode
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
2751 if (!AddAssignLocation
)
2752 return std::nullopt
;
2754 Fixes
.push_back(FixItHint::CreateReplacement(
2755 SourceRange(AddAssignNode
->getBeginLoc(), Node
->getOperatorLoc()),
2758 Fixes
.push_back(FixItHint::CreateInsertion(
2759 Offset
->getEndLoc().getLocWithOffset(1), ")"));
2763 return std::nullopt
; // Not in the cases that we can handle for now, give up.
2766 std::optional
<FixItList
>
2767 UPCPreIncrementGadget::getFixits(const FixitStrategy
&S
) const {
2768 DeclUseList DREs
= getClaimedVarUseSites();
2770 if (DREs
.size() != 1)
2771 return std::nullopt
; // In cases of `++Ptr` where `Ptr` is not a DRE, we
2773 if (const VarDecl
*VD
= dyn_cast
<VarDecl
>(DREs
.front()->getDecl())) {
2774 if (S
.lookup(VD
) == FixitStrategy::Kind::Span
) {
2776 std::stringstream SS
;
2777 StringRef varName
= VD
->getName();
2778 const ASTContext
&Ctx
= VD
->getASTContext();
2780 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
2781 SS
<< "(" << varName
.data() << " = " << varName
.data()
2782 << ".subspan(1)).data()";
2783 std::optional
<SourceLocation
> PreIncLocation
=
2784 getEndCharLoc(Node
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
2785 if (!PreIncLocation
)
2786 return std::nullopt
;
2788 Fixes
.push_back(FixItHint::CreateReplacement(
2789 SourceRange(Node
->getBeginLoc(), *PreIncLocation
), SS
.str()));
2793 return std::nullopt
; // Not in the cases that we can handle for now, give up.
2796 // For a non-null initializer `Init` of `T *` type, this function returns
2797 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
2798 // to output stream.
2799 // In many cases, this function cannot figure out the actual extent `S`. It
2800 // then will use a place holder to replace `S` to ask users to fill `S` in. The
2801 // initializer shall be used to initialize a variable of type `std::span<T>`.
2802 // In some cases (e. g. constant size array) the initializer should remain
2803 // unchanged and the function returns empty list. In case the function can't
2804 // provide the right fixit it will return nullopt.
2806 // FIXME: Support multi-level pointers
2809 // `Init` a pointer to the initializer expression
2810 // `Ctx` a reference to the ASTContext
2811 static std::optional
<FixItList
>
2812 FixVarInitializerWithSpan(const Expr
*Init
, ASTContext
&Ctx
,
2813 const StringRef UserFillPlaceHolder
) {
2814 const SourceManager
&SM
= Ctx
.getSourceManager();
2815 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
2817 // If `Init` has a constant value that is (or equivalent to) a
2818 // NULL pointer, we use the default constructor to initialize the span
2819 // object, i.e., a `std:span` variable declaration with no initializer.
2820 // So the fix-it is just to remove the initializer.
2821 if (Init
->isNullPointerConstant(
2823 // FIXME: Why does this function not ask for `const ASTContext
2824 // &`? It should. Maybe worth an NFC patch later.
2825 Expr::NullPointerConstantValueDependence::
2826 NPC_ValueDependentIsNotNull
)) {
2827 std::optional
<SourceLocation
> InitLocation
=
2828 getEndCharLoc(Init
, SM
, LangOpts
);
2830 return std::nullopt
;
2832 SourceRange
SR(Init
->getBeginLoc(), *InitLocation
);
2834 return FixItList
{FixItHint::CreateRemoval(SR
)};
2838 std::string ExtentText
= UserFillPlaceHolder
.data();
2839 StringRef One
= "1";
2841 // Insert `{` before `Init`:
2842 FixIts
.push_back(FixItHint::CreateInsertion(Init
->getBeginLoc(), "{"));
2843 // Try to get the data extent. Break into different cases:
2844 if (auto CxxNew
= dyn_cast
<CXXNewExpr
>(Init
->IgnoreImpCasts())) {
2845 // In cases `Init` is `new T[n]` and there is no explicit cast over
2846 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
2847 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
2848 // simpler for the case where `Init` is `new T`.
2849 if (const Expr
*Ext
= CxxNew
->getArraySize().value_or(nullptr)) {
2850 if (!Ext
->HasSideEffects(Ctx
)) {
2851 std::optional
<StringRef
> ExtentString
= getExprText(Ext
, SM
, LangOpts
);
2853 return std::nullopt
;
2854 ExtentText
= *ExtentString
;
2856 } else if (!CxxNew
->isArray())
2857 // Although the initializer is not allocating a buffer, the pointer
2858 // variable could still be used in buffer access operations.
2860 } else if (Ctx
.getAsConstantArrayType(Init
->IgnoreImpCasts()->getType())) {
2861 // std::span has a single parameter constructor for initialization with
2862 // constant size array. The size is auto-deduced as the constructor is a
2863 // function template. The correct fixit is empty - no changes should happen.
2866 // In cases `Init` is of the form `&Var` after stripping of implicit
2867 // casts, where `&` is the built-in operator, the extent is 1.
2868 if (auto AddrOfExpr
= dyn_cast
<UnaryOperator
>(Init
->IgnoreImpCasts()))
2869 if (AddrOfExpr
->getOpcode() == UnaryOperatorKind::UO_AddrOf
&&
2870 isa_and_present
<DeclRefExpr
>(AddrOfExpr
->getSubExpr()))
2872 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
2873 // and explicit casting, etc. etc.
2876 SmallString
<32> StrBuffer
{};
2877 std::optional
<SourceLocation
> LocPassInit
= getPastLoc(Init
, SM
, LangOpts
);
2880 return std::nullopt
;
2882 StrBuffer
.append(", ");
2883 StrBuffer
.append(ExtentText
);
2884 StrBuffer
.append("}");
2885 FixIts
.push_back(FixItHint::CreateInsertion(*LocPassInit
, StrBuffer
.str()));
2890 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \
2891 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), \
2892 "failed to produce fixit for declaration '" + \
2893 (D)->getNameAsString() + "'" + (Msg))
2895 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
2898 // For the given variable declaration with a pointer-to-T type, returns the text
2899 // `std::span<T>`. If it is unable to generate the text, returns
2901 static std::optional
<std::string
>
2902 createSpanTypeForVarDecl(const VarDecl
*VD
, const ASTContext
&Ctx
) {
2903 assert(VD
->getType()->isPointerType());
2905 std::optional
<Qualifiers
> PteTyQualifiers
= std::nullopt
;
2906 std::optional
<std::string
> PteTyText
= getPointeeTypeText(
2907 VD
, Ctx
.getSourceManager(), Ctx
.getLangOpts(), &PteTyQualifiers
);
2910 return std::nullopt
;
2912 std::string SpanTyText
= "std::span<";
2914 SpanTyText
.append(*PteTyText
);
2915 // Append qualifiers to span element type if any:
2916 if (PteTyQualifiers
) {
2917 SpanTyText
.append(" ");
2918 SpanTyText
.append(PteTyQualifiers
->getAsString());
2920 SpanTyText
.append(">");
2924 // For a `VarDecl` of the form `T * var (= Init)?`, this
2925 // function generates fix-its that
2926 // 1) replace `T * var` with `std::span<T> var`; and
2927 // 2) change `Init` accordingly to a span constructor, if it exists.
2929 // FIXME: support Multi-level pointers
2932 // `D` a pointer the variable declaration node
2933 // `Ctx` a reference to the ASTContext
2934 // `UserFillPlaceHolder` the user-input placeholder text
2936 // the non-empty fix-it list, if fix-its are successfuly generated; empty
2938 static FixItList
fixLocalVarDeclWithSpan(const VarDecl
*D
, ASTContext
&Ctx
,
2939 const StringRef UserFillPlaceHolder
,
2940 UnsafeBufferUsageHandler
&Handler
) {
2941 if (hasUnsupportedSpecifiers(D
, Ctx
.getSourceManager()))
2945 std::optional
<std::string
> SpanTyText
= createSpanTypeForVarDecl(D
, Ctx
);
2948 DEBUG_NOTE_DECL_FAIL(D
, " : failed to generate 'std::span' type");
2952 // Will hold the text for `std::span<T> Ident`:
2953 std::stringstream SS
;
2956 // Fix the initializer if it exists:
2957 if (const Expr
*Init
= D
->getInit()) {
2958 std::optional
<FixItList
> InitFixIts
=
2959 FixVarInitializerWithSpan(Init
, Ctx
, UserFillPlaceHolder
);
2962 FixIts
.insert(FixIts
.end(), std::make_move_iterator(InitFixIts
->begin()),
2963 std::make_move_iterator(InitFixIts
->end()));
2965 // For declaration of the form `T * ident = init;`, we want to replace
2966 // `T * ` with `std::span<T>`.
2967 // We ignore CV-qualifiers so for `T * const ident;` we also want to replace
2968 // just `T *` with `std::span<T>`.
2969 const SourceLocation EndLocForReplacement
= D
->getTypeSpecEndLoc();
2970 if (!EndLocForReplacement
.isValid()) {
2971 DEBUG_NOTE_DECL_FAIL(D
, " : failed to locate the end of the declaration");
2974 // The only exception is that for `T *ident` we'll add a single space between
2975 // "std::span<T>" and "ident".
2976 // FIXME: The condition is false for identifiers expended from macros.
2977 if (EndLocForReplacement
.getLocWithOffset(1) == getVarDeclIdentifierLoc(D
))
2980 FixIts
.push_back(FixItHint::CreateReplacement(
2981 SourceRange(D
->getBeginLoc(), EndLocForReplacement
), SS
.str()));
2985 static bool hasConflictingOverload(const FunctionDecl
*FD
) {
2986 return !FD
->getDeclContext()->lookup(FD
->getDeclName()).isSingleResult();
2989 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
2990 // types, this function produces fix-its to make the change self-contained. Let
2991 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
2992 // entity defined by the `FunctionDecl` after the change to the parameters.
2993 // Fix-its produced by this function are
2994 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2996 // 2. Create a declaration of "NewF" next to each declaration of `F`;
2997 // 3. Create a definition of "F" (as its' original definition is now belongs
2998 // to "NewF") next to its original definition. The body of the creating
2999 // definition calls to "NewF".
3003 // void f(int *p); // original declaration
3004 // void f(int *p) { // original definition
3008 // To change the parameter `p` to be of `std::span<int>` type, we
3009 // also add overloads:
3011 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
3012 // void f(std::span<int> p); // added overload decl
3013 // void f(std::span<int> p) { // original def where param is changed
3016 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
3017 // return f(std::span(p, <# size #>));
3020 static std::optional
<FixItList
>
3021 createOverloadsForFixedParams(const FixitStrategy
&S
, const FunctionDecl
*FD
,
3022 const ASTContext
&Ctx
,
3023 UnsafeBufferUsageHandler
&Handler
) {
3024 // FIXME: need to make this conflict checking better:
3025 if (hasConflictingOverload(FD
))
3026 return std::nullopt
;
3028 const SourceManager
&SM
= Ctx
.getSourceManager();
3029 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
3030 const unsigned NumParms
= FD
->getNumParams();
3031 std::vector
<std::string
> NewTysTexts(NumParms
);
3032 std::vector
<bool> ParmsMask(NumParms
, false);
3033 bool AtLeastOneParmToFix
= false;
3035 for (unsigned i
= 0; i
< NumParms
; i
++) {
3036 const ParmVarDecl
*PVD
= FD
->getParamDecl(i
);
3038 if (S
.lookup(PVD
) == FixitStrategy::Kind::Wontfix
)
3040 if (S
.lookup(PVD
) != FixitStrategy::Kind::Span
)
3041 // Not supported, not suppose to happen:
3042 return std::nullopt
;
3044 std::optional
<Qualifiers
> PteTyQuals
= std::nullopt
;
3045 std::optional
<std::string
> PteTyText
=
3046 getPointeeTypeText(PVD
, SM
, LangOpts
, &PteTyQuals
);
3049 // something wrong in obtaining the text of the pointee type, give up
3050 return std::nullopt
;
3051 // FIXME: whether we should create std::span type depends on the
3053 NewTysTexts
[i
] = getSpanTypeText(*PteTyText
, PteTyQuals
);
3054 ParmsMask
[i
] = true;
3055 AtLeastOneParmToFix
= true;
3057 if (!AtLeastOneParmToFix
)
3058 // No need to create function overloads:
3060 // FIXME Respect indentation of the original code.
3062 // A lambda that creates the text representation of a function declaration
3063 // with the new type signatures:
3064 const auto NewOverloadSignatureCreator
=
3065 [&SM
, &LangOpts
, &NewTysTexts
,
3066 &ParmsMask
](const FunctionDecl
*FD
) -> std::optional
<std::string
> {
3067 std::stringstream SS
;
3070 SS
<< getEndOfLine().str();
3071 // Append: ret-type func-name "("
3072 if (auto Prefix
= getRangeText(
3073 SourceRange(FD
->getBeginLoc(), (*FD
->param_begin())->getBeginLoc()),
3075 SS
<< Prefix
->str();
3077 return std::nullopt
; // give up
3078 // Append: parameter-type-list
3079 const unsigned NumParms
= FD
->getNumParams();
3081 for (unsigned i
= 0; i
< NumParms
; i
++) {
3082 const ParmVarDecl
*Parm
= FD
->getParamDecl(i
);
3084 if (Parm
->isImplicit())
3087 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
3089 SS
<< NewTysTexts
[i
];
3090 // print parameter name if provided:
3091 if (IdentifierInfo
*II
= Parm
->getIdentifier())
3092 SS
<< ' ' << II
->getName().str();
3093 } else if (auto ParmTypeText
=
3094 getRangeText(getSourceRangeToTokenEnd(Parm
, SM
, LangOpts
),
3096 // print the whole `Parm` without modification:
3097 SS
<< ParmTypeText
->str();
3099 return std::nullopt
; // something wrong, give up
3100 if (i
!= NumParms
- 1)
3107 // A lambda that creates the text representation of a function definition with
3108 // the original signature:
3109 const auto OldOverloadDefCreator
=
3110 [&Handler
, &SM
, &LangOpts
, &NewTysTexts
,
3111 &ParmsMask
](const FunctionDecl
*FD
) -> std::optional
<std::string
> {
3112 std::stringstream SS
;
3114 SS
<< getEndOfLine().str();
3115 // Append: attr-name ret-type func-name "(" param-list ")" "{"
3116 if (auto FDPrefix
= getRangeText(
3117 SourceRange(FD
->getBeginLoc(), FD
->getBody()->getBeginLoc()), SM
,
3119 SS
<< Handler
.getUnsafeBufferUsageAttributeTextAt(FD
->getBeginLoc(), " ")
3120 << FDPrefix
->str() << "{";
3122 return std::nullopt
;
3123 // Append: "return" func-name "("
3124 if (auto FunQualName
= getFunNameText(FD
, SM
, LangOpts
))
3125 SS
<< "return " << FunQualName
->str() << "(";
3127 return std::nullopt
;
3130 const unsigned NumParms
= FD
->getNumParams();
3131 for (unsigned i
= 0; i
< NumParms
; i
++) {
3132 const ParmVarDecl
*Parm
= FD
->getParamDecl(i
);
3134 if (Parm
->isImplicit())
3136 // FIXME: If a parameter has no name, it is unused in the
3137 // definition. So we could just leave it as it is.
3138 if (!Parm
->getIdentifier())
3139 // If a parameter of a function definition has no name:
3140 return std::nullopt
;
3142 // This is our spanified paramter!
3143 SS
<< NewTysTexts
[i
] << "(" << Parm
->getIdentifier()->getName().str()
3144 << ", " << getUserFillPlaceHolder("size") << ")";
3146 SS
<< Parm
->getIdentifier()->getName().str();
3147 if (i
!= NumParms
- 1)
3150 // finish call and the body
3151 SS
<< ");}" << getEndOfLine().str();
3152 // FIXME: 80-char line formatting?
3157 for (FunctionDecl
*FReDecl
: FD
->redecls()) {
3158 std::optional
<SourceLocation
> Loc
= getPastLoc(FReDecl
, SM
, LangOpts
);
3162 if (FReDecl
->isThisDeclarationADefinition()) {
3163 assert(FReDecl
== FD
&& "inconsistent function definition");
3164 // Inserts a definition with the old signature to the end of
3166 if (auto OldOverloadDef
= OldOverloadDefCreator(FReDecl
))
3167 FixIts
.emplace_back(FixItHint::CreateInsertion(*Loc
, *OldOverloadDef
));
3169 return {}; // give up
3171 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
3172 if (!FReDecl
->hasAttr
<UnsafeBufferUsageAttr
>()) {
3173 FixIts
.emplace_back(FixItHint::CreateInsertion(
3174 FReDecl
->getBeginLoc(), Handler
.getUnsafeBufferUsageAttributeTextAt(
3175 FReDecl
->getBeginLoc(), " ")));
3177 // Inserts a declaration with the new signature to the end of `FReDecl`:
3178 if (auto NewOverloadDecl
= NewOverloadSignatureCreator(FReDecl
))
3179 FixIts
.emplace_back(FixItHint::CreateInsertion(*Loc
, *NewOverloadDecl
));
3187 // To fix a `ParmVarDecl` to be of `std::span` type.
3188 static FixItList
fixParamWithSpan(const ParmVarDecl
*PVD
, const ASTContext
&Ctx
,
3189 UnsafeBufferUsageHandler
&Handler
) {
3190 if (hasUnsupportedSpecifiers(PVD
, Ctx
.getSourceManager())) {
3191 DEBUG_NOTE_DECL_FAIL(PVD
, " : has unsupport specifier(s)");
3194 if (PVD
->hasDefaultArg()) {
3195 // FIXME: generate fix-its for default values:
3196 DEBUG_NOTE_DECL_FAIL(PVD
, " : has default arg");
3200 std::optional
<Qualifiers
> PteTyQualifiers
= std::nullopt
;
3201 std::optional
<std::string
> PteTyText
= getPointeeTypeText(
3202 PVD
, Ctx
.getSourceManager(), Ctx
.getLangOpts(), &PteTyQualifiers
);
3205 DEBUG_NOTE_DECL_FAIL(PVD
, " : invalid pointee type");
3209 std::optional
<StringRef
> PVDNameText
= PVD
->getIdentifier()->getName();
3212 DEBUG_NOTE_DECL_FAIL(PVD
, " : invalid identifier name");
3216 std::stringstream SS
;
3217 std::optional
<std::string
> SpanTyText
= createSpanTypeForVarDecl(PVD
, Ctx
);
3219 if (PteTyQualifiers
)
3220 // Append qualifiers if they exist:
3221 SS
<< getSpanTypeText(*PteTyText
, PteTyQualifiers
);
3223 SS
<< getSpanTypeText(*PteTyText
);
3224 // Append qualifiers to the type of the parameter:
3225 if (PVD
->getType().hasQualifiers())
3226 SS
<< ' ' << PVD
->getType().getQualifiers().getAsString();
3227 // Append parameter's name:
3228 SS
<< ' ' << PVDNameText
->str();
3229 // Add replacement fix-it:
3230 return {FixItHint::CreateReplacement(PVD
->getSourceRange(), SS
.str())};
3233 static FixItList
fixVariableWithSpan(const VarDecl
*VD
,
3234 const DeclUseTracker
&Tracker
,
3236 UnsafeBufferUsageHandler
&Handler
) {
3237 const DeclStmt
*DS
= Tracker
.lookupDecl(VD
);
3239 DEBUG_NOTE_DECL_FAIL(VD
,
3240 " : variables declared this way not implemented yet");
3243 if (!DS
->isSingleDecl()) {
3244 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3245 DEBUG_NOTE_DECL_FAIL(VD
, " : multiple VarDecls");
3248 // Currently DS is an unused variable but we'll need it when
3249 // non-single decls are implemented, where the pointee type name
3250 // and the '*' are spread around the place.
3253 // FIXME: handle cases where DS has multiple declarations
3254 return fixLocalVarDeclWithSpan(VD
, Ctx
, getUserFillPlaceHolder(), Handler
);
3257 static FixItList
fixVarDeclWithArray(const VarDecl
*D
, const ASTContext
&Ctx
,
3258 UnsafeBufferUsageHandler
&Handler
) {
3261 // Note: the code below expects the declaration to not use any type sugar like
3263 if (auto CAT
= Ctx
.getAsConstantArrayType(D
->getType())) {
3264 const QualType
&ArrayEltT
= CAT
->getElementType();
3265 assert(!ArrayEltT
.isNull() && "Trying to fix a non-array type variable!");
3266 // FIXME: support multi-dimensional arrays
3267 if (isa
<clang::ArrayType
>(ArrayEltT
.getCanonicalType()))
3270 const SourceLocation IdentifierLoc
= getVarDeclIdentifierLoc(D
);
3272 // Get the spelling of the element type as written in the source file
3273 // (including macros, etc.).
3274 auto MaybeElemTypeTxt
=
3275 getRangeText({D
->getBeginLoc(), IdentifierLoc
}, Ctx
.getSourceManager(),
3277 if (!MaybeElemTypeTxt
)
3279 const llvm::StringRef ElemTypeTxt
= MaybeElemTypeTxt
->trim();
3281 // Find the '[' token.
3282 std::optional
<Token
> NextTok
= Lexer::findNextToken(
3283 IdentifierLoc
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
3284 while (NextTok
&& !NextTok
->is(tok::l_square
) &&
3285 NextTok
->getLocation() <= D
->getSourceRange().getEnd())
3286 NextTok
= Lexer::findNextToken(NextTok
->getLocation(),
3287 Ctx
.getSourceManager(), Ctx
.getLangOpts());
3290 const SourceLocation LSqBracketLoc
= NextTok
->getLocation();
3292 // Get the spelling of the array size as written in the source file
3293 // (including macros, etc.).
3294 auto MaybeArraySizeTxt
= getRangeText(
3295 {LSqBracketLoc
.getLocWithOffset(1), D
->getTypeSpecEndLoc()},
3296 Ctx
.getSourceManager(), Ctx
.getLangOpts());
3297 if (!MaybeArraySizeTxt
)
3299 const llvm::StringRef ArraySizeTxt
= MaybeArraySizeTxt
->trim();
3300 if (ArraySizeTxt
.empty()) {
3301 // FIXME: Support array size getting determined from the initializer.
3303 // int arr1[] = {0, 1, 2};
3304 // int arr2{3, 4, 5};
3305 // We might be able to preserve the non-specified size with `auto` and
3307 // auto arr1 = std::to_array<int>({0, 1, 2});
3311 std::optional
<StringRef
> IdentText
=
3312 getVarDeclIdentifierText(D
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
3315 DEBUG_NOTE_DECL_FAIL(D
, " : failed to locate the identifier");
3319 SmallString
<32> Replacement
;
3320 raw_svector_ostream
OS(Replacement
);
3321 OS
<< "std::array<" << ElemTypeTxt
<< ", " << ArraySizeTxt
<< "> "
3322 << IdentText
->str();
3324 FixIts
.push_back(FixItHint::CreateReplacement(
3325 SourceRange
{D
->getBeginLoc(), D
->getTypeSpecEndLoc()}, OS
.str()));
3331 static FixItList
fixVariableWithArray(const VarDecl
*VD
,
3332 const DeclUseTracker
&Tracker
,
3333 const ASTContext
&Ctx
,
3334 UnsafeBufferUsageHandler
&Handler
) {
3335 const DeclStmt
*DS
= Tracker
.lookupDecl(VD
);
3336 assert(DS
&& "Fixing non-local variables not implemented yet!");
3337 if (!DS
->isSingleDecl()) {
3338 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3341 // Currently DS is an unused variable but we'll need it when
3342 // non-single decls are implemented, where the pointee type name
3343 // and the '*' are spread around the place.
3346 // FIXME: handle cases where DS has multiple declarations
3347 return fixVarDeclWithArray(VD
, Ctx
, Handler
);
3350 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due
3351 // to any unexpected problem.
3353 fixVariable(const VarDecl
*VD
, FixitStrategy::Kind K
,
3354 /* The function decl under analysis */ const Decl
*D
,
3355 const DeclUseTracker
&Tracker
, ASTContext
&Ctx
,
3356 UnsafeBufferUsageHandler
&Handler
) {
3357 if (const auto *PVD
= dyn_cast
<ParmVarDecl
>(VD
)) {
3358 auto *FD
= dyn_cast
<clang::FunctionDecl
>(PVD
->getDeclContext());
3359 if (!FD
|| FD
!= D
) {
3360 // `FD != D` means that `PVD` belongs to a function that is not being
3361 // analyzed currently. Thus `FD` may not be complete.
3362 DEBUG_NOTE_DECL_FAIL(VD
, " : function not currently analyzed");
3366 // TODO If function has a try block we can't change params unless we check
3367 // also its catch block for their use.
3368 // FIXME We might support static class methods, some select methods,
3369 // operators and possibly lamdas.
3370 if (FD
->isMain() || FD
->isConstexpr() ||
3371 FD
->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate
||
3373 // also covers call-operator of lamdas
3374 isa
<CXXMethodDecl
>(FD
) ||
3375 // skip when the function body is a try-block
3376 (FD
->hasBody() && isa
<CXXTryStmt
>(FD
->getBody())) ||
3377 FD
->isOverloadedOperator()) {
3378 DEBUG_NOTE_DECL_FAIL(VD
, " : unsupported function decl");
3379 return {}; // TODO test all these cases
3384 case FixitStrategy::Kind::Span
: {
3385 if (VD
->getType()->isPointerType()) {
3386 if (const auto *PVD
= dyn_cast
<ParmVarDecl
>(VD
))
3387 return fixParamWithSpan(PVD
, Ctx
, Handler
);
3389 if (VD
->isLocalVarDecl())
3390 return fixVariableWithSpan(VD
, Tracker
, Ctx
, Handler
);
3392 DEBUG_NOTE_DECL_FAIL(VD
, " : not a pointer");
3395 case FixitStrategy::Kind::Array
: {
3396 if (VD
->isLocalVarDecl() && Ctx
.getAsConstantArrayType(VD
->getType()))
3397 return fixVariableWithArray(VD
, Tracker
, Ctx
, Handler
);
3399 DEBUG_NOTE_DECL_FAIL(VD
, " : not a local const-size array");
3402 case FixitStrategy::Kind::Iterator
:
3403 case FixitStrategy::Kind::Vector
:
3404 llvm_unreachable("FixitStrategy not implemented yet!");
3405 case FixitStrategy::Kind::Wontfix
:
3406 llvm_unreachable("Invalid strategy!");
3408 llvm_unreachable("Unknown strategy!");
3411 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
3412 // `RemoveRange` of 'h' overlaps with a macro use.
3413 static bool overlapWithMacro(const FixItList
&FixIts
) {
3414 // FIXME: For now we only check if the range (or the first token) is (part of)
3415 // a macro expansion. Ideally, we want to check for all tokens in the range.
3416 return llvm::any_of(FixIts
, [](const FixItHint
&Hint
) {
3417 auto Range
= Hint
.RemoveRange
;
3418 if (Range
.getBegin().isMacroID() || Range
.getEnd().isMacroID())
3419 // If the range (or the first token) is (part of) a macro expansion:
3425 // Returns true iff `VD` is a parameter of the declaration `D`:
3426 static bool isParameterOf(const VarDecl
*VD
, const Decl
*D
) {
3427 return isa
<ParmVarDecl
>(VD
) &&
3428 VD
->getDeclContext() == dyn_cast
<DeclContext
>(D
);
3431 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable
3432 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
3434 static void eraseVarsForUnfixableGroupMates(
3435 std::map
<const VarDecl
*, FixItList
> &FixItsForVariable
,
3436 const VariableGroupsManager
&VarGrpMgr
) {
3437 // Variables will be removed from `FixItsForVariable`:
3438 SmallVector
<const VarDecl
*, 8> ToErase
;
3440 for (const auto &[VD
, Ignore
] : FixItsForVariable
) {
3441 VarGrpRef Grp
= VarGrpMgr
.getGroupOfVar(VD
);
3442 if (llvm::any_of(Grp
,
3443 [&FixItsForVariable
](const VarDecl
*GrpMember
) -> bool {
3444 return !FixItsForVariable
.count(GrpMember
);
3446 // At least one group member cannot be fixed, so we have to erase the
3448 for (const VarDecl
*Member
: Grp
)
3449 ToErase
.push_back(Member
);
3452 for (auto *VarToErase
: ToErase
)
3453 FixItsForVariable
.erase(VarToErase
);
3456 // Returns the fix-its that create bounds-safe function overloads for the
3457 // function `D`, if `D`'s parameters will be changed to safe-types through
3458 // fix-its in `FixItsForVariable`.
3460 // NOTE: In case `D`'s parameters will be changed but bounds-safe function
3461 // overloads cannot created, the whole group that contains the parameters will
3462 // be erased from `FixItsForVariable`.
3463 static FixItList
createFunctionOverloadsForParms(
3464 std::map
<const VarDecl
*, FixItList
> &FixItsForVariable
/* mutable */,
3465 const VariableGroupsManager
&VarGrpMgr
, const FunctionDecl
*FD
,
3466 const FixitStrategy
&S
, ASTContext
&Ctx
,
3467 UnsafeBufferUsageHandler
&Handler
) {
3468 FixItList FixItsSharedByParms
{};
3470 std::optional
<FixItList
> OverloadFixes
=
3471 createOverloadsForFixedParams(S
, FD
, Ctx
, Handler
);
3473 if (OverloadFixes
) {
3474 FixItsSharedByParms
.append(*OverloadFixes
);
3476 // Something wrong in generating `OverloadFixes`, need to remove the
3477 // whole group, where parameters are in, from `FixItsForVariable` (Note
3478 // that all parameters should be in the same group):
3479 for (auto *Member
: VarGrpMgr
.getGroupOfParms())
3480 FixItsForVariable
.erase(Member
);
3482 return FixItsSharedByParms
;
3485 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
3486 static std::map
<const VarDecl
*, FixItList
>
3487 getFixIts(FixableGadgetSets
&FixablesForAllVars
, const FixitStrategy
&S
,
3489 /* The function decl under analysis */ const Decl
*D
,
3490 const DeclUseTracker
&Tracker
, UnsafeBufferUsageHandler
&Handler
,
3491 const VariableGroupsManager
&VarGrpMgr
) {
3492 // `FixItsForVariable` will map each variable to a set of fix-its directly
3493 // associated to the variable itself. Fix-its of distinct variables in
3494 // `FixItsForVariable` are disjoint.
3495 std::map
<const VarDecl
*, FixItList
> FixItsForVariable
;
3497 // Populate `FixItsForVariable` with fix-its directly associated with each
3498 // variable. Fix-its directly associated to a variable 'v' are the ones
3499 // produced by the `FixableGadget`s whose claimed variable is 'v'.
3500 for (const auto &[VD
, Fixables
] : FixablesForAllVars
.byVar
) {
3501 FixItsForVariable
[VD
] =
3502 fixVariable(VD
, S
.lookup(VD
), D
, Tracker
, Ctx
, Handler
);
3503 // If we fail to produce Fix-It for the declaration we have to skip the
3504 // variable entirely.
3505 if (FixItsForVariable
[VD
].empty()) {
3506 FixItsForVariable
.erase(VD
);
3509 for (const auto &F
: Fixables
) {
3510 std::optional
<FixItList
> Fixits
= F
->getFixits(S
);
3513 FixItsForVariable
[VD
].insert(FixItsForVariable
[VD
].end(),
3514 Fixits
->begin(), Fixits
->end());
3518 Handler
.addDebugNoteForVar(
3519 VD
, F
->getSourceLoc(),
3520 ("gadget '" + F
->getDebugName() + "' refused to produce a fix")
3523 FixItsForVariable
.erase(VD
);
3528 // `FixItsForVariable` now contains only variables that can be
3529 // fixed. A variable can be fixed if its' declaration and all Fixables
3530 // associated to it can all be fixed.
3532 // To further remove from `FixItsForVariable` variables whose group mates
3533 // cannot be fixed...
3534 eraseVarsForUnfixableGroupMates(FixItsForVariable
, VarGrpMgr
);
3535 // Now `FixItsForVariable` gets further reduced: a variable is in
3536 // `FixItsForVariable` iff it can be fixed and all its group mates can be
3539 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
3540 // That is, when fixing multiple parameters in one step, these fix-its will
3541 // be applied only once (instead of being applied per parameter).
3542 FixItList FixItsSharedByParms
{};
3544 if (auto *FD
= dyn_cast
<FunctionDecl
>(D
))
3545 FixItsSharedByParms
= createFunctionOverloadsForParms(
3546 FixItsForVariable
, VarGrpMgr
, FD
, S
, Ctx
, Handler
);
3548 // The map that maps each variable `v` to fix-its for the whole group where
3550 std::map
<const VarDecl
*, FixItList
> FinalFixItsForVariable
{
3553 for (auto &[Var
, Ignore
] : FixItsForVariable
) {
3554 bool AnyParm
= false;
3555 const auto VarGroupForVD
= VarGrpMgr
.getGroupOfVar(Var
, &AnyParm
);
3557 for (const VarDecl
*GrpMate
: VarGroupForVD
) {
3560 if (FixItsForVariable
.count(GrpMate
))
3561 FinalFixItsForVariable
[Var
].append(FixItsForVariable
[GrpMate
]);
3564 // This assertion should never fail. Otherwise we have a bug.
3565 assert(!FixItsSharedByParms
.empty() &&
3566 "Should not try to fix a parameter that does not belong to a "
3568 FinalFixItsForVariable
[Var
].append(FixItsSharedByParms
);
3571 // Fix-its that will be applied in one step shall NOT:
3572 // 1. overlap with macros or/and templates; or
3573 // 2. conflict with each other.
3574 // Otherwise, the fix-its will be dropped.
3575 for (auto Iter
= FinalFixItsForVariable
.begin();
3576 Iter
!= FinalFixItsForVariable
.end();)
3577 if (overlapWithMacro(Iter
->second
) ||
3578 clang::internal::anyConflict(Iter
->second
, Ctx
.getSourceManager())) {
3579 Iter
= FinalFixItsForVariable
.erase(Iter
);
3582 return FinalFixItsForVariable
;
3585 template <typename VarDeclIterTy
>
3586 static FixitStrategy
3587 getNaiveStrategy(llvm::iterator_range
<VarDeclIterTy
> UnsafeVars
) {
3589 for (const VarDecl
*VD
: UnsafeVars
) {
3590 if (isa
<ConstantArrayType
>(VD
->getType().getCanonicalType()))
3591 S
.set(VD
, FixitStrategy::Kind::Array
);
3593 S
.set(VD
, FixitStrategy::Kind::Span
);
3598 // Manages variable groups:
3599 class VariableGroupsManagerImpl
: public VariableGroupsManager
{
3600 const std::vector
<VarGrpTy
> Groups
;
3601 const std::map
<const VarDecl
*, unsigned> &VarGrpMap
;
3602 const llvm::SetVector
<const VarDecl
*> &GrpsUnionForParms
;
3605 VariableGroupsManagerImpl(
3606 const std::vector
<VarGrpTy
> &Groups
,
3607 const std::map
<const VarDecl
*, unsigned> &VarGrpMap
,
3608 const llvm::SetVector
<const VarDecl
*> &GrpsUnionForParms
)
3609 : Groups(Groups
), VarGrpMap(VarGrpMap
),
3610 GrpsUnionForParms(GrpsUnionForParms
) {}
3612 VarGrpRef
getGroupOfVar(const VarDecl
*Var
, bool *HasParm
) const override
{
3613 if (GrpsUnionForParms
.contains(Var
)) {
3616 return GrpsUnionForParms
.getArrayRef();
3621 auto It
= VarGrpMap
.find(Var
);
3623 if (It
== VarGrpMap
.end())
3625 return Groups
[It
->second
];
3628 VarGrpRef
getGroupOfParms() const override
{
3629 return GrpsUnionForParms
.getArrayRef();
3633 void clang::checkUnsafeBufferUsage(const Decl
*D
,
3634 UnsafeBufferUsageHandler
&Handler
,
3635 bool EmitSuggestions
) {
3637 Handler
.clearDebugNotes();
3640 assert(D
&& D
->getBody());
3641 // We do not want to visit a Lambda expression defined inside a method
3642 // independently. Instead, it should be visited along with the outer method.
3643 // FIXME: do we want to do the same thing for `BlockDecl`s?
3644 if (const auto *fd
= dyn_cast
<CXXMethodDecl
>(D
)) {
3645 if (fd
->getParent()->isLambda() && fd
->getParent()->isLocalClass())
3649 // Do not emit fixit suggestions for functions declared in an
3650 // extern "C" block.
3651 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
)) {
3652 for (FunctionDecl
*FReDecl
: FD
->redecls()) {
3653 if (FReDecl
->isExternC()) {
3654 EmitSuggestions
= false;
3660 WarningGadgetSets UnsafeOps
;
3661 FixableGadgetSets FixablesForAllVars
;
3663 auto [FixableGadgets
, WarningGadgets
, Tracker
] =
3664 findGadgets(D
, Handler
, EmitSuggestions
);
3666 if (!EmitSuggestions
) {
3667 // Our job is very easy without suggestions. Just warn about
3668 // every problematic operation and consider it done. No need to deal
3669 // with fixable gadgets, no need to group operations by variable.
3670 for (const auto &G
: WarningGadgets
) {
3671 G
->handleUnsafeOperation(Handler
, /*IsRelatedToDecl=*/false,
3672 D
->getASTContext());
3675 // This return guarantees that most of the machine doesn't run when
3676 // suggestions aren't requested.
3677 assert(FixableGadgets
.size() == 0 &&
3678 "Fixable gadgets found but suggestions not requested!");
3682 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
3683 // function under the analysis. No need to fix any Fixables.
3684 if (!WarningGadgets
.empty()) {
3685 // Gadgets "claim" variables they're responsible for. Once this loop
3686 // finishes, the tracker will only track DREs that weren't claimed by any
3687 // gadgets, i.e. not understood by the analysis.
3688 for (const auto &G
: FixableGadgets
) {
3689 for (const auto *DRE
: G
->getClaimedVarUseSites()) {
3690 Tracker
.claimUse(DRE
);
3695 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
3696 // function under the analysis. Thus, it early returns here as there is
3697 // nothing needs to be fixed.
3699 // Note this claim is based on the assumption that there is no unsafe
3700 // variable whose declaration is invisible from the analyzing function.
3701 // Otherwise, we need to consider if the uses of those unsafe varuables needs
3703 // So far, we are not fixing any global variables or class members. And,
3704 // lambdas will be analyzed along with the enclosing function. So this early
3705 // return is correct for now.
3706 if (WarningGadgets
.empty())
3709 UnsafeOps
= groupWarningGadgetsByVar(std::move(WarningGadgets
));
3710 FixablesForAllVars
= groupFixablesByVar(std::move(FixableGadgets
));
3712 std::map
<const VarDecl
*, FixItList
> FixItsForVariableGroup
;
3714 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
3715 for (auto it
= FixablesForAllVars
.byVar
.cbegin();
3716 it
!= FixablesForAllVars
.byVar
.cend();) {
3717 // FIXME: need to deal with global variables later
3718 if ((!it
->first
->isLocalVarDecl() && !isa
<ParmVarDecl
>(it
->first
))) {
3720 Handler
.addDebugNoteForVar(it
->first
, it
->first
->getBeginLoc(),
3721 ("failed to produce fixit for '" +
3722 it
->first
->getNameAsString() +
3723 "' : neither local nor a parameter"));
3725 it
= FixablesForAllVars
.byVar
.erase(it
);
3726 } else if (it
->first
->getType().getCanonicalType()->isReferenceType()) {
3728 Handler
.addDebugNoteForVar(it
->first
, it
->first
->getBeginLoc(),
3729 ("failed to produce fixit for '" +
3730 it
->first
->getNameAsString() +
3731 "' : has a reference type"));
3733 it
= FixablesForAllVars
.byVar
.erase(it
);
3734 } else if (Tracker
.hasUnclaimedUses(it
->first
)) {
3735 it
= FixablesForAllVars
.byVar
.erase(it
);
3736 } else if (it
->first
->isInitCapture()) {
3738 Handler
.addDebugNoteForVar(it
->first
, it
->first
->getBeginLoc(),
3739 ("failed to produce fixit for '" +
3740 it
->first
->getNameAsString() +
3741 "' : init capture"));
3743 it
= FixablesForAllVars
.byVar
.erase(it
);
3750 for (const auto &it
: UnsafeOps
.byVar
) {
3751 const VarDecl
*const UnsafeVD
= it
.first
;
3752 auto UnclaimedDREs
= Tracker
.getUnclaimedUses(UnsafeVD
);
3753 if (UnclaimedDREs
.empty())
3755 const auto UnfixedVDName
= UnsafeVD
->getNameAsString();
3756 for (const clang::DeclRefExpr
*UnclaimedDRE
: UnclaimedDREs
) {
3757 std::string UnclaimedUseTrace
=
3758 getDREAncestorString(UnclaimedDRE
, D
->getASTContext());
3760 Handler
.addDebugNoteForVar(
3761 UnsafeVD
, UnclaimedDRE
->getBeginLoc(),
3762 ("failed to produce fixit for '" + UnfixedVDName
+
3763 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
3764 UnclaimedUseTrace
));
3769 // Fixpoint iteration for pointer assignments
3770 using DepMapTy
= DenseMap
<const VarDecl
*, llvm::SetVector
<const VarDecl
*>>;
3771 DepMapTy DependenciesMap
{};
3772 DepMapTy PtrAssignmentGraph
{};
3774 for (auto it
: FixablesForAllVars
.byVar
) {
3775 for (const FixableGadget
*fixable
: it
.second
) {
3776 std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>> ImplPair
=
3777 fixable
->getStrategyImplications();
3779 std::pair
<const VarDecl
*, const VarDecl
*> Impl
= std::move(*ImplPair
);
3780 PtrAssignmentGraph
[Impl
.first
].insert(Impl
.second
);
3786 The following code does a BFS traversal of the `PtrAssignmentGraph`
3787 considering all unsafe vars as starting nodes and constructs an undirected
3788 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
3789 elimiates all variables that are unreachable from any unsafe var. In other
3790 words, this removes all dependencies that don't include any unsafe variable
3791 and consequently don't need any fixit generation.
3792 Note: A careful reader would observe that the code traverses
3793 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
3794 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
3795 achieve the same result but the one used here dramatically cuts the
3796 amount of hoops the second part of the algorithm needs to jump, given that
3797 a lot of these connections become "direct". The reader is advised not to
3798 imagine how the graph is transformed because of using `Var` instead of
3799 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
3800 and think about why it's equivalent later.
3802 std::set
<const VarDecl
*> VisitedVarsDirected
{};
3803 for (const auto &[Var
, ignore
] : UnsafeOps
.byVar
) {
3804 if (VisitedVarsDirected
.find(Var
) == VisitedVarsDirected
.end()) {
3806 std::queue
<const VarDecl
*> QueueDirected
{};
3807 QueueDirected
.push(Var
);
3808 while (!QueueDirected
.empty()) {
3809 const VarDecl
*CurrentVar
= QueueDirected
.front();
3810 QueueDirected
.pop();
3811 VisitedVarsDirected
.insert(CurrentVar
);
3812 auto AdjacentNodes
= PtrAssignmentGraph
[CurrentVar
];
3813 for (const VarDecl
*Adj
: AdjacentNodes
) {
3814 if (VisitedVarsDirected
.find(Adj
) == VisitedVarsDirected
.end()) {
3815 QueueDirected
.push(Adj
);
3817 DependenciesMap
[Var
].insert(Adj
);
3818 DependenciesMap
[Adj
].insert(Var
);
3824 // `Groups` stores the set of Connected Components in the graph.
3825 std::vector
<VarGrpTy
> Groups
;
3826 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
3827 // variables belong to. Group indexes refer to the elements in `Groups`.
3828 // `VarGrpMap` is complete in that every variable that needs fix is in it.
3829 std::map
<const VarDecl
*, unsigned> VarGrpMap
;
3830 // The union group over the ones in "Groups" that contain parameters of `D`:
3831 llvm::SetVector
<const VarDecl
*>
3832 GrpsUnionForParms
; // these variables need to be fixed in one step
3834 // Group Connected Components for Unsafe Vars
3835 // (Dependencies based on pointer assignments)
3836 std::set
<const VarDecl
*> VisitedVars
{};
3837 for (const auto &[Var
, ignore
] : UnsafeOps
.byVar
) {
3838 if (VisitedVars
.find(Var
) == VisitedVars
.end()) {
3839 VarGrpTy
&VarGroup
= Groups
.emplace_back();
3840 std::queue
<const VarDecl
*> Queue
{};
3843 while (!Queue
.empty()) {
3844 const VarDecl
*CurrentVar
= Queue
.front();
3846 VisitedVars
.insert(CurrentVar
);
3847 VarGroup
.push_back(CurrentVar
);
3848 auto AdjacentNodes
= DependenciesMap
[CurrentVar
];
3849 for (const VarDecl
*Adj
: AdjacentNodes
) {
3850 if (VisitedVars
.find(Adj
) == VisitedVars
.end()) {
3856 bool HasParm
= false;
3857 unsigned GrpIdx
= Groups
.size() - 1;
3859 for (const VarDecl
*V
: VarGroup
) {
3860 VarGrpMap
[V
] = GrpIdx
;
3861 if (!HasParm
&& isParameterOf(V
, D
))
3865 GrpsUnionForParms
.insert(VarGroup
.begin(), VarGroup
.end());
3869 // Remove a `FixableGadget` if the associated variable is not in the graph
3870 // computed above. We do not want to generate fix-its for such variables,
3871 // since they are neither warned nor reachable from a warned one.
3873 // Note a variable is not warned if it is not directly used in any unsafe
3874 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
3875 // does not exist another variable `u` such that `u` is warned and fixing `u`
3876 // (transitively) implicates fixing `v`.
3880 // void f(int * p) {
3881 // int * a = p; *p = 0;
3884 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
3885 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
3886 // the function above, `p` becomes reachable from a warned variable.
3887 for (auto I
= FixablesForAllVars
.byVar
.begin();
3888 I
!= FixablesForAllVars
.byVar
.end();) {
3889 // Note `VisitedVars` contain all the variables in the graph:
3890 if (!VisitedVars
.count((*I
).first
)) {
3891 // no such var in graph:
3892 I
= FixablesForAllVars
.byVar
.erase(I
);
3897 // We assign strategies to variables that are 1) in the graph and 2) can be
3898 // fixed. Other variables have the default "Won't fix" strategy.
3899 FixitStrategy NaiveStrategy
= getNaiveStrategy(llvm::make_filter_range(
3900 VisitedVars
, [&FixablesForAllVars
](const VarDecl
*V
) {
3901 // If a warned variable has no "Fixable", it is considered unfixable:
3902 return FixablesForAllVars
.byVar
.count(V
);
3904 VariableGroupsManagerImpl
VarGrpMgr(Groups
, VarGrpMap
, GrpsUnionForParms
);
3906 if (isa
<NamedDecl
>(D
))
3907 // The only case where `D` is not a `NamedDecl` is when `D` is a
3908 // `BlockDecl`. Let's not fix variables in blocks for now
3909 FixItsForVariableGroup
=
3910 getFixIts(FixablesForAllVars
, NaiveStrategy
, D
->getASTContext(), D
,
3911 Tracker
, Handler
, VarGrpMgr
);
3913 for (const auto &G
: UnsafeOps
.noVar
) {
3914 G
->handleUnsafeOperation(Handler
, /*IsRelatedToDecl=*/false,
3915 D
->getASTContext());
3918 for (const auto &[VD
, WarningGadgets
] : UnsafeOps
.byVar
) {
3919 auto FixItsIt
= FixItsForVariableGroup
.find(VD
);
3920 Handler
.handleUnsafeVariableGroup(VD
, VarGrpMgr
,
3921 FixItsIt
!= FixItsForVariableGroup
.end()
3922 ? std::move(FixItsIt
->second
)
3925 for (const auto &G
: WarningGadgets
) {
3926 G
->handleUnsafeOperation(Handler
, /*IsRelatedToDecl=*/true,
3927 D
->getASTContext());