1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/RecursiveASTVisitor.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "clang/Lex/Lexer.h"
14 #include "llvm/ADT/SmallVector.h"
19 using namespace clang
;
20 using namespace ast_matchers
;
22 namespace clang::ast_matchers
{
23 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
24 // except for those belonging to a different callable of "n".
25 class MatchDescendantVisitor
26 : public RecursiveASTVisitor
<MatchDescendantVisitor
> {
28 typedef RecursiveASTVisitor
<MatchDescendantVisitor
> VisitorBase
;
30 // Creates an AST visitor that matches `Matcher` on all
31 // descendants of a given node "n" except for the ones
32 // belonging to a different callable of "n".
33 MatchDescendantVisitor(const internal::DynTypedMatcher
*Matcher
,
34 internal::ASTMatchFinder
*Finder
,
35 internal::BoundNodesTreeBuilder
*Builder
,
36 internal::ASTMatchFinder::BindKind Bind
)
37 : Matcher(Matcher
), Finder(Finder
), Builder(Builder
), Bind(Bind
),
40 // Returns true if a match is found in a subtree of `DynNode`, which belongs
41 // to the same callable of `DynNode`.
42 bool findMatch(const DynTypedNode
&DynNode
) {
44 if (const Stmt
*StmtNode
= DynNode
.get
<Stmt
>()) {
45 TraverseStmt(const_cast<Stmt
*>(StmtNode
));
46 *Builder
= ResultBindings
;
52 // The following are overriding methods from the base visitor class.
53 // They are public only to allow CRTP to work. They are *not *part
54 // of the public API of this class.
56 // For the matchers so far used in safe buffers, we only need to match
57 // `Stmt`s. To override more as needed.
59 bool TraverseDecl(Decl
*Node
) {
65 if (isa
<FunctionDecl
, BlockDecl
, ObjCMethodDecl
>(Node
))
67 // Traverse descendants
68 return VisitorBase::TraverseDecl(Node
);
71 bool TraverseStmt(Stmt
*Node
, DataRecursionQueue
*Queue
= nullptr) {
77 if (isa
<LambdaExpr
>(Node
))
79 return VisitorBase::TraverseStmt(Node
);
82 bool shouldVisitTemplateInstantiations() const { return true; }
83 bool shouldVisitImplicitCode() const {
84 // TODO: let's ignore implicit code for now
89 // Sets 'Matched' to true if 'Matcher' matches 'Node'
91 // Returns 'true' if traversal should continue after this function
92 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
93 template <typename T
> bool match(const T
&Node
) {
94 internal::BoundNodesTreeBuilder
RecursiveBuilder(*Builder
);
96 if (Matcher
->matches(DynTypedNode::create(Node
), Finder
,
98 ResultBindings
.addMatch(RecursiveBuilder
);
100 if (Bind
!= internal::ASTMatchFinder::BK_All
)
101 return false; // Abort as soon as a match is found.
106 const internal::DynTypedMatcher
*const Matcher
;
107 internal::ASTMatchFinder
*const Finder
;
108 internal::BoundNodesTreeBuilder
*const Builder
;
109 internal::BoundNodesTreeBuilder ResultBindings
;
110 const internal::ASTMatchFinder::BindKind Bind
;
114 AST_MATCHER_P(Stmt
, forEveryDescendant
, internal::Matcher
<Stmt
>, innerMatcher
) {
115 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
117 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
);
118 return Visitor
.findMatch(DynTypedNode::create(Node
));
121 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
122 AST_MATCHER_P(Stmt
, notInSafeBufferOptOut
, const UnsafeBufferUsageHandler
*, Handler
) {
123 return !Handler
->isSafeBufferOptOut(Node
.getBeginLoc());
126 AST_MATCHER_P(CastExpr
, castSubExpr
, internal::Matcher
<Expr
>, innerMatcher
) {
127 return innerMatcher
.matches(*Node
.getSubExpr(), Finder
, Builder
);
130 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
131 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
132 static internal::Matcher
<Stmt
>
133 isInUnspecifiedLvalueContext(internal::Matcher
<Expr
> innerMatcher
) {
134 return implicitCastExpr(hasCastKind(CastKind::CK_LValueToRValue
),
135 castSubExpr(innerMatcher
));
136 // FIXME: add assignmentTo context...
138 } // namespace clang::ast_matchers
141 // Because the analysis revolves around variables and their types, we'll need to
142 // track uses of variables (aka DeclRefExprs).
143 using DeclUseList
= SmallVector
<const DeclRefExpr
*, 1>;
145 // Convenience typedef.
146 using FixItList
= SmallVector
<FixItHint
, 4>;
152 // Because we're dealing with raw pointers, let's define what we mean by that.
153 static auto hasPointerType() {
154 return hasType(hasCanonicalType(pointerType()));
157 static auto hasArrayType() {
158 return hasType(hasCanonicalType(arrayType()));
162 /// Gadget is an individual operation in the code that may be of interest to
163 /// this analysis. Each (non-abstract) subclass corresponds to a specific
164 /// rigid AST structure that constitutes an operation on a pointer-type object.
165 /// Discovery of a gadget in the code corresponds to claiming that we understand
166 /// what this part of code is doing well enough to potentially improve it.
167 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
168 /// always deserving a warning per se, but requires our attention to identify
169 /// it warrants a fixit).
174 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
177 /// Common type of ASTMatchers used for discovering gadgets.
178 /// Useful for implementing the static matcher() methods
179 /// that are expected from all non-abstract subclasses.
180 using Matcher
= decltype(stmt());
182 Gadget(Kind K
) : K(K
) {}
184 Kind
getKind() const { return K
; }
186 virtual bool isWarningGadget() const = 0;
187 virtual const Stmt
*getBaseStmt() const = 0;
189 /// Returns the list of pointer-type variables on which this gadget performs
190 /// its operation. Typically, there's only one variable. This isn't a list
191 /// of all DeclRefExprs in the gadget's AST!
192 virtual DeclUseList
getClaimedVarUseSites() const = 0;
194 virtual ~Gadget() = default;
201 /// Warning gadgets correspond to unsafe code patterns that warrants
202 /// an immediate warning.
203 class WarningGadget
: public Gadget
{
205 WarningGadget(Kind K
) : Gadget(K
) {}
207 static bool classof(const Gadget
*G
) { return G
->isWarningGadget(); }
208 bool isWarningGadget() const final
{ return true; }
211 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
212 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
213 /// variable is replaced by a safe C++ container, every use of such variable must be
214 /// carefully considered and possibly updated.
215 class FixableGadget
: public Gadget
{
217 FixableGadget(Kind K
) : Gadget(K
) {}
219 static bool classof(const Gadget
*G
) { return !G
->isWarningGadget(); }
220 bool isWarningGadget() const final
{ return false; }
222 /// Returns a fixit that would fix the current gadget according to
223 /// the current strategy. Returns None if the fix cannot be produced;
224 /// returns an empty list if no fixes are necessary.
225 virtual std::optional
<FixItList
> getFixits(const Strategy
&) const {
230 using FixableGadgetList
= std::vector
<std::unique_ptr
<FixableGadget
>>;
231 using WarningGadgetList
= std::vector
<std::unique_ptr
<WarningGadget
>>;
233 /// An increment of a pointer-type value is unsafe as it may run the pointer
235 class IncrementGadget
: public WarningGadget
{
236 static constexpr const char *const OpTag
= "op";
237 const UnaryOperator
*Op
;
240 IncrementGadget(const MatchFinder::MatchResult
&Result
)
241 : WarningGadget(Kind::Increment
),
242 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
244 static bool classof(const Gadget
*G
) {
245 return G
->getKind() == Kind::Increment
;
248 static Matcher
matcher() {
249 return stmt(unaryOperator(
250 hasOperatorName("++"),
251 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
255 const UnaryOperator
*getBaseStmt() const override
{ return Op
; }
257 DeclUseList
getClaimedVarUseSites() const override
{
258 SmallVector
<const DeclRefExpr
*, 2> Uses
;
259 if (const auto *DRE
=
260 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
264 return std::move(Uses
);
268 /// A decrement of a pointer-type value is unsafe as it may run the pointer
270 class DecrementGadget
: public WarningGadget
{
271 static constexpr const char *const OpTag
= "op";
272 const UnaryOperator
*Op
;
275 DecrementGadget(const MatchFinder::MatchResult
&Result
)
276 : WarningGadget(Kind::Decrement
),
277 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
279 static bool classof(const Gadget
*G
) {
280 return G
->getKind() == Kind::Decrement
;
283 static Matcher
matcher() {
284 return stmt(unaryOperator(
285 hasOperatorName("--"),
286 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
290 const UnaryOperator
*getBaseStmt() const override
{ return Op
; }
292 DeclUseList
getClaimedVarUseSites() const override
{
293 if (const auto *DRE
=
294 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
302 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
303 /// it doesn't have any bounds checks for the array.
304 class ArraySubscriptGadget
: public WarningGadget
{
305 static constexpr const char *const ArraySubscrTag
= "ArraySubscript";
306 const ArraySubscriptExpr
*ASE
;
309 ArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
310 : WarningGadget(Kind::ArraySubscript
),
311 ASE(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ArraySubscrTag
)) {}
313 static bool classof(const Gadget
*G
) {
314 return G
->getKind() == Kind::ArraySubscript
;
317 static Matcher
matcher() {
318 // FIXME: What if the index is integer literal 0? Should this be
319 // a safe gadget in this case?
321 return stmt(arraySubscriptExpr(
322 hasBase(ignoringParenImpCasts(
323 anyOf(hasPointerType(), hasArrayType()))),
324 unless(hasIndex(integerLiteral(equals(0)))))
325 .bind(ArraySubscrTag
));
329 const ArraySubscriptExpr
*getBaseStmt() const override
{ return ASE
; }
331 DeclUseList
getClaimedVarUseSites() const override
{
332 if (const auto *DRE
=
333 dyn_cast
<DeclRefExpr
>(ASE
->getBase()->IgnoreParenImpCasts())) {
341 /// A pointer arithmetic expression of one of the forms:
343 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
345 class PointerArithmeticGadget
: public WarningGadget
{
346 static constexpr const char *const PointerArithmeticTag
= "ptrAdd";
347 static constexpr const char *const PointerArithmeticPointerTag
= "ptrAddPtr";
348 const BinaryOperator
*PA
; // pointer arithmetic expression
349 const Expr
* Ptr
; // the pointer expression in `PA`
352 PointerArithmeticGadget(const MatchFinder::MatchResult
&Result
)
353 : WarningGadget(Kind::PointerArithmetic
),
354 PA(Result
.Nodes
.getNodeAs
<BinaryOperator
>(PointerArithmeticTag
)),
355 Ptr(Result
.Nodes
.getNodeAs
<Expr
>(PointerArithmeticPointerTag
)) {}
357 static bool classof(const Gadget
*G
) {
358 return G
->getKind() == Kind::PointerArithmetic
;
361 static Matcher
matcher() {
362 auto HasIntegerType
= anyOf(
363 hasType(isInteger()), hasType(enumType()));
364 auto PtrAtRight
= allOf(hasOperatorName("+"),
365 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
366 hasLHS(HasIntegerType
));
367 auto PtrAtLeft
= allOf(
368 anyOf(hasOperatorName("+"), hasOperatorName("-"),
369 hasOperatorName("+="), hasOperatorName("-=")),
370 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
371 hasRHS(HasIntegerType
));
373 return stmt(binaryOperator(anyOf(PtrAtLeft
, PtrAtRight
)).bind(PointerArithmeticTag
));
376 const Stmt
*getBaseStmt() const override
{ return PA
; }
378 DeclUseList
getClaimedVarUseSites() const override
{
379 if (const auto *DRE
=
380 dyn_cast
<DeclRefExpr
>(Ptr
->IgnoreParenImpCasts())) {
386 // FIXME: pointer adding zero should be fine
387 //FIXME: this gadge will need a fix-it
391 /// A call of a function or method that performs unchecked buffer operations
392 /// over one of its pointer parameters.
393 class UnsafeBufferUsageAttrGadget
: public WarningGadget
{
394 constexpr static const char *const OpTag
= "call_expr";
398 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult
&Result
)
399 : WarningGadget(Kind::UnsafeBufferUsageAttr
),
400 Op(Result
.Nodes
.getNodeAs
<CallExpr
>(OpTag
)) {}
402 static bool classof(const Gadget
*G
) {
403 return G
->getKind() == Kind::UnsafeBufferUsageAttr
;
406 static Matcher
matcher() {
407 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage
))))
410 const Stmt
*getBaseStmt() const override
{ return Op
; }
412 DeclUseList
getClaimedVarUseSites() const override
{
418 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
419 // Context (see `isInUnspecifiedLvalueContext`).
420 // Note here `[]` is the built-in subscript operator.
421 class ULCArraySubscriptGadget
: public FixableGadget
{
423 static constexpr const char *const ULCArraySubscriptTag
= "ArraySubscriptUnderULC";
424 const ArraySubscriptExpr
*Node
;
427 ULCArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
428 : FixableGadget(Kind::ULCArraySubscript
),
429 Node(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ULCArraySubscriptTag
)) {
430 assert(Node
!= nullptr && "Expecting a non-null matching result");
433 static bool classof(const Gadget
*G
) {
434 return G
->getKind() == Kind::ULCArraySubscript
;
437 static Matcher
matcher() {
438 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
439 auto BaseIsArrayOrPtrDRE
=
440 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr
)));
442 arraySubscriptExpr(BaseIsArrayOrPtrDRE
).bind(ULCArraySubscriptTag
);
444 return expr(isInUnspecifiedLvalueContext(Target
));
447 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
449 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
451 virtual DeclUseList
getClaimedVarUseSites() const override
{
452 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts())) {
461 // An auxiliary tracking facility for the fixit analysis. It helps connect
462 // declarations to its and make sure we've covered all uses with our analysis
463 // before we try to fix the declaration.
464 class DeclUseTracker
{
465 using UseSetTy
= SmallSet
<const DeclRefExpr
*, 16>;
466 using DefMapTy
= DenseMap
<const VarDecl
*, const DeclStmt
*>;
468 // Allocate on the heap for easier move.
469 std::unique_ptr
<UseSetTy
> Uses
{std::make_unique
<UseSetTy
>()};
473 DeclUseTracker() = default;
474 DeclUseTracker(const DeclUseTracker
&) = delete; // Let's avoid copies.
475 DeclUseTracker(DeclUseTracker
&&) = default;
476 DeclUseTracker
&operator=(DeclUseTracker
&&) = default;
478 // Start tracking a freshly discovered DRE.
479 void discoverUse(const DeclRefExpr
*DRE
) { Uses
->insert(DRE
); }
481 // Stop tracking the DRE as it's been fully figured out.
482 void claimUse(const DeclRefExpr
*DRE
) {
483 assert(Uses
->count(DRE
) &&
484 "DRE not found or claimed by multiple matchers!");
488 // A variable is unclaimed if at least one use is unclaimed.
489 bool hasUnclaimedUses(const VarDecl
*VD
) const {
490 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
491 return any_of(*Uses
, [VD
](const DeclRefExpr
*DRE
) {
492 return DRE
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl();
496 void discoverDecl(const DeclStmt
*DS
) {
497 for (const Decl
*D
: DS
->decls()) {
498 if (const auto *VD
= dyn_cast
<VarDecl
>(D
)) {
499 // FIXME: Assertion temporarily disabled due to a bug in
500 // ASTMatcher internal behavior in presence of GNU
501 // statement-expressions. We need to properly investigate this
502 // because it can screw up our algorithm in other ways.
503 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
509 const DeclStmt
*lookupDecl(const VarDecl
*VD
) const {
510 auto It
= Defs
.find(VD
);
511 assert(It
!= Defs
.end() && "Definition never discovered!");
518 // Strategy is a map from variables to the way we plan to emit fixes for
519 // these variables. It is figured out gradually by trying different fixes
520 // for different variables depending on gadgets in which these variables
525 Wontfix
, // We don't plan to emit a fixit for this variable.
526 Span
, // We recommend replacing the variable with std::span.
527 Iterator
, // We recommend replacing the variable with std::span::iterator.
528 Array
, // We recommend replacing the variable with std::array.
529 Vector
// We recommend replacing the variable with std::vector.
533 using MapTy
= llvm::DenseMap
<const VarDecl
*, Kind
>;
538 Strategy() = default;
539 Strategy(const Strategy
&) = delete; // Let's avoid copies.
540 Strategy(Strategy
&&) = default;
542 void set(const VarDecl
*VD
, Kind K
) {
546 Kind
lookup(const VarDecl
*VD
) const {
547 auto I
= Map
.find(VD
);
549 return Kind::Wontfix
;
556 /// Scan the function and return a list of gadgets found with provided kits.
557 static std::tuple
<FixableGadgetList
, WarningGadgetList
, DeclUseTracker
>
558 findGadgets(const Decl
*D
, const UnsafeBufferUsageHandler
&Handler
) {
560 struct GadgetFinderCallback
: MatchFinder::MatchCallback
{
561 FixableGadgetList FixableGadgets
;
562 WarningGadgetList WarningGadgets
;
563 DeclUseTracker Tracker
;
565 void run(const MatchFinder::MatchResult
&Result
) override
{
566 // In debug mode, assert that we've found exactly one gadget.
567 // This helps us avoid conflicts in .bind() tags.
571 [[maybe_unused
]] int numFound
= 0;
572 #define NEXT ++numFound
575 if (const auto *DRE
= Result
.Nodes
.getNodeAs
<DeclRefExpr
>("any_dre")) {
576 Tracker
.discoverUse(DRE
);
580 if (const auto *DS
= Result
.Nodes
.getNodeAs
<DeclStmt
>("any_ds")) {
581 Tracker
.discoverDecl(DS
);
585 // Figure out which matcher we've found, and call the appropriate
586 // subclass constructor.
587 // FIXME: Can we do this more logarithmically?
588 #define FIXABLE_GADGET(name) \
589 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
590 FixableGadgets.push_back(std::make_unique<name ## Gadget>(Result)); \
593 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
594 #define WARNING_GADGET(name) \
595 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
596 WarningGadgets.push_back(std::make_unique<name ## Gadget>(Result)); \
599 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
601 assert(numFound
>= 1 && "Gadgets not found in match result!");
602 assert(numFound
<= 1 && "Conflicting bind tags in gadgets!");
607 GadgetFinderCallback CB
;
611 stmt(forEveryDescendant(
613 // A `FixableGadget` matcher and a `WarningGadget` matcher should not disable
614 // each other (they could if they were put in the same `anyOf` group).
615 // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers
616 // match for the same node, so that we can group them
617 // in one `anyOf` group (for better performance via short-circuiting).
619 #define FIXABLE_GADGET(x) \
620 x ## Gadget::matcher().bind(#x),
621 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
622 // Also match DeclStmts because we'll need them when fixing
623 // their underlying VarDecls that otherwise don't have
624 // any backreferences to DeclStmts.
625 declStmt().bind("any_ds")
628 // Add Gadget::matcher() for every gadget in the registry.
629 #define WARNING_GADGET(x) \
630 allOf(x ## Gadget::matcher().bind(#x), notInSafeBufferOptOut(&Handler)),
631 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
632 // In parallel, match all DeclRefExprs so that to find out
633 // whether there are any uncovered by gadgets.
634 declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre")
641 M
.match(*D
->getBody(), D
->getASTContext());
643 // Gadgets "claim" variables they're responsible for. Once this loop finishes,
644 // the tracker will only track DREs that weren't claimed by any gadgets,
645 // i.e. not understood by the analysis.
646 for (const auto &G
: CB
.FixableGadgets
) {
647 for (const auto *DRE
: G
->getClaimedVarUseSites()) {
648 CB
.Tracker
.claimUse(DRE
);
652 return {std::move(CB
.FixableGadgets
), std::move(CB
.WarningGadgets
), std::move(CB
.Tracker
)};
655 struct WarningGadgetSets
{
656 std::map
<const VarDecl
*, std::set
<std::unique_ptr
<WarningGadget
>>> byVar
;
657 // These Gadgets are not related to pointer variables (e. g. temporaries).
658 llvm::SmallVector
<std::unique_ptr
<WarningGadget
>, 16> noVar
;
661 static WarningGadgetSets
662 groupWarningGadgetsByVar(WarningGadgetList
&&AllUnsafeOperations
) {
663 WarningGadgetSets result
;
664 // If some gadgets cover more than one
665 // variable, they'll appear more than once in the map.
666 for (auto &G
: AllUnsafeOperations
) {
667 DeclUseList ClaimedVarUseSites
= G
->getClaimedVarUseSites();
669 bool AssociatedWithVarDecl
= false;
670 for (const DeclRefExpr
*DRE
: ClaimedVarUseSites
) {
671 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
672 result
.byVar
[VD
].emplace(std::move(G
));
673 AssociatedWithVarDecl
= true;
677 if (!AssociatedWithVarDecl
) {
678 result
.noVar
.emplace_back(std::move(G
));
685 struct FixableGadgetSets
{
686 std::map
<const VarDecl
*, std::set
<std::unique_ptr
<FixableGadget
>>> byVar
;
689 static FixableGadgetSets
690 groupFixablesByVar(FixableGadgetList
&&AllFixableOperations
) {
691 FixableGadgetSets FixablesForUnsafeVars
;
692 for (auto &F
: AllFixableOperations
) {
693 DeclUseList DREs
= F
->getClaimedVarUseSites();
695 for (const DeclRefExpr
*DRE
: DREs
) {
696 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
697 FixablesForUnsafeVars
.byVar
[VD
].emplace(std::move(F
));
701 return FixablesForUnsafeVars
;
704 bool clang::internal::anyConflict(
705 const SmallVectorImpl
<FixItHint
> &FixIts
, const SourceManager
&SM
) {
706 // A simple interval overlap detection algorithm. Sorts all ranges by their
707 // begin location then finds the first overlap in one pass.
708 std::vector
<const FixItHint
*> All
; // a copy of `FixIts`
710 for (const FixItHint
&H
: FixIts
)
712 std::sort(All
.begin(), All
.end(),
713 [&SM
](const FixItHint
*H1
, const FixItHint
*H2
) {
714 return SM
.isBeforeInTranslationUnit(H1
->RemoveRange
.getBegin(),
715 H2
->RemoveRange
.getBegin());
718 const FixItHint
*CurrHint
= nullptr;
720 for (const FixItHint
*Hint
: All
) {
722 SM
.isBeforeInTranslationUnit(CurrHint
->RemoveRange
.getEnd(),
723 Hint
->RemoveRange
.getBegin())) {
724 // Either to initialize `CurrHint` or `CurrHint` does not
725 // overlap with `Hint`:
728 // In case `Hint` overlaps the `CurrHint`, we found at least one
735 std::optional
<FixItList
>
736 ULCArraySubscriptGadget::getFixits(const Strategy
&S
) const {
737 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts()))
738 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
739 switch (S
.lookup(VD
)) {
740 case Strategy::Kind::Span
: {
741 // If the index has a negative constant value, we give up as no valid
742 // fix-it can be generated:
743 const ASTContext
&Ctx
= // FIXME: we need ASTContext to be passed in!
745 if (auto ConstVal
= Node
->getIdx()->getIntegerConstantExpr(Ctx
)) {
746 if (ConstVal
->isNegative())
748 } else if (!Node
->getIdx()->getType()->isUnsignedIntegerType())
750 // no-op is a good fix-it, otherwise
753 case Strategy::Kind::Wontfix
:
754 case Strategy::Kind::Iterator
:
755 case Strategy::Kind::Array
:
756 case Strategy::Kind::Vector
:
757 llvm_unreachable("unsupported strategies for FixableGadgets");
763 // Return the text representation of the given `APInt Val`:
764 static std::string
getAPIntText(APInt Val
) {
765 SmallVector
<char> Txt
;
766 Val
.toString(Txt
, 10, true);
767 // APInt::toString does not add '\0' to the end of the string for us:
772 // Return the source location of the last character of the AST `Node`.
773 template <typename NodeTy
>
774 static SourceLocation
getEndCharLoc(const NodeTy
*Node
, const SourceManager
&SM
,
775 const LangOptions
&LangOpts
) {
776 return Lexer::getLocForEndOfToken(Node
->getEndLoc(), 1, SM
, LangOpts
);
779 // Return the source location just past the last character of the AST `Node`.
780 template <typename NodeTy
>
781 static SourceLocation
getPastLoc(const NodeTy
*Node
, const SourceManager
&SM
,
782 const LangOptions
&LangOpts
) {
783 return Lexer::getLocForEndOfToken(Node
->getEndLoc(), 0, SM
, LangOpts
);
786 // Return text representation of an `Expr`.
787 static StringRef
getExprText(const Expr
*E
, const SourceManager
&SM
,
788 const LangOptions
&LangOpts
) {
789 SourceLocation LastCharLoc
= getPastLoc(E
, SM
, LangOpts
);
791 return Lexer::getSourceText(
792 CharSourceRange::getCharRange(E
->getBeginLoc(), LastCharLoc
), SM
,
796 // For a non-null initializer `Init` of `T *` type, this function returns
797 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
799 // In many cases, this function cannot figure out the actual extent `S`. It
800 // then will use a place holder to replace `S` to ask users to fill `S` in. The
801 // initializer shall be used to initialize a variable of type `std::span<T>`.
803 // FIXME: Support multi-level pointers
806 // `Init` a pointer to the initializer expression
807 // `Ctx` a reference to the ASTContext
809 populateInitializerFixItWithSpan(const Expr
*Init
, const ASTContext
&Ctx
,
810 const StringRef UserFillPlaceHolder
) {
812 const SourceManager
&SM
= Ctx
.getSourceManager();
813 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
814 std::string ExtentText
= UserFillPlaceHolder
.data();
817 // Insert `{` before `Init`:
818 FixIts
.push_back(FixItHint::CreateInsertion(Init
->getBeginLoc(), "{"));
819 // Try to get the data extent. Break into different cases:
820 if (auto CxxNew
= dyn_cast
<CXXNewExpr
>(Init
->IgnoreImpCasts())) {
821 // In cases `Init` is `new T[n]` and there is no explicit cast over
822 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
823 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
824 // simpler for the case where `Init` is `new T`.
825 if (const Expr
*Ext
= CxxNew
->getArraySize().value_or(nullptr)) {
826 if (!Ext
->HasSideEffects(Ctx
))
827 ExtentText
= getExprText(Ext
, SM
, LangOpts
);
828 } else if (!CxxNew
->isArray())
829 // Although the initializer is not allocating a buffer, the pointer
830 // variable could still be used in buffer access operations.
832 } else if (const auto *CArrTy
= Ctx
.getAsConstantArrayType(
833 Init
->IgnoreImpCasts()->getType())) {
834 // In cases `Init` is of an array type after stripping off implicit casts,
835 // the extent is the array size. Note that if the array size is not a
836 // constant, we cannot use it as the extent.
837 ExtentText
= getAPIntText(CArrTy
->getSize());
839 // In cases `Init` is of the form `&Var` after stripping of implicit
840 // casts, where `&` is the built-in operator, the extent is 1.
841 if (auto AddrOfExpr
= dyn_cast
<UnaryOperator
>(Init
->IgnoreImpCasts()))
842 if (AddrOfExpr
->getOpcode() == UnaryOperatorKind::UO_AddrOf
&&
843 isa_and_present
<DeclRefExpr
>(AddrOfExpr
->getSubExpr()))
845 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, and explicit casting, etc.
849 SmallString
<32> StrBuffer
{};
850 SourceLocation LocPassInit
= getPastLoc(Init
, SM
, LangOpts
);
852 StrBuffer
.append(", ");
853 StrBuffer
.append(ExtentText
);
854 StrBuffer
.append("}");
855 FixIts
.push_back(FixItHint::CreateInsertion(LocPassInit
, StrBuffer
.str()));
859 // For a `VarDecl` of the form `T * var (= Init)?`, this
860 // function generates a fix-it for the declaration, which re-declares `var` to
861 // be of `span<T>` type and transforms the initializer, if present, to a span
862 // constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user
865 // FIXME: support Multi-level pointers
868 // `D` a pointer the variable declaration node
869 // `Ctx` a reference to the ASTContext
871 // the generated fix-it
872 static FixItList
fixVarDeclWithSpan(const VarDecl
*D
, const ASTContext
&Ctx
,
873 const StringRef UserFillPlaceHolder
) {
874 const QualType
&SpanEltT
= D
->getType()->getPointeeType();
875 assert(!SpanEltT
.isNull() && "Trying to fix a non-pointer type variable!");
879 ReplacementLastLoc
; // the inclusive end location of the replacement
880 const SourceManager
&SM
= Ctx
.getSourceManager();
882 if (const Expr
*Init
= D
->getInit()) {
883 FixItList InitFixIts
=
884 populateInitializerFixItWithSpan(Init
, Ctx
, UserFillPlaceHolder
);
886 if (InitFixIts
.empty())
887 return {}; // Something wrong with fixing initializer, give up
888 // The loc right before the initializer:
889 ReplacementLastLoc
= Init
->getBeginLoc().getLocWithOffset(-1);
890 FixIts
.insert(FixIts
.end(), std::make_move_iterator(InitFixIts
.begin()),
891 std::make_move_iterator(InitFixIts
.end()));
893 ReplacementLastLoc
= getEndCharLoc(D
, SM
, Ctx
.getLangOpts());
895 // Producing fix-it for variable declaration---make `D` to be of span type:
896 SmallString
<32> Replacement
;
897 raw_svector_ostream
OS(Replacement
);
899 OS
<< "std::span<" << SpanEltT
.getAsString() << "> " << D
->getName();
900 FixIts
.push_back(FixItHint::CreateReplacement(
901 SourceRange
{D
->getBeginLoc(), ReplacementLastLoc
}, OS
.str()));
905 static FixItList
fixVariableWithSpan(const VarDecl
*VD
,
906 const DeclUseTracker
&Tracker
,
907 const ASTContext
&Ctx
,
908 UnsafeBufferUsageHandler
&Handler
) {
909 const DeclStmt
*DS
= Tracker
.lookupDecl(VD
);
910 assert(DS
&& "Fixing non-local variables not implemented yet!");
911 if (!DS
->isSingleDecl()) {
912 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
915 // Currently DS is an unused variable but we'll need it when
916 // non-single decls are implemented, where the pointee type name
917 // and the '*' are spread around the place.
920 // FIXME: handle cases where DS has multiple declarations
921 return fixVarDeclWithSpan(VD
, Ctx
, Handler
.getUserFillPlaceHolder());
924 static FixItList
fixVariable(const VarDecl
*VD
, Strategy::Kind K
,
925 const DeclUseTracker
&Tracker
,
926 const ASTContext
&Ctx
,
927 UnsafeBufferUsageHandler
&Handler
) {
929 case Strategy::Kind::Span
: {
930 if (VD
->getType()->isPointerType() && VD
->isLocalVarDecl())
931 return fixVariableWithSpan(VD
, Tracker
, Ctx
, Handler
);
934 case Strategy::Kind::Iterator
:
935 case Strategy::Kind::Array
:
936 case Strategy::Kind::Vector
:
937 llvm_unreachable("Strategy not implemented yet!");
938 case Strategy::Kind::Wontfix
:
939 llvm_unreachable("Invalid strategy!");
941 llvm_unreachable("Unknown strategy!");
944 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
945 // `RemoveRange` of 'h' overlaps with a macro use.
946 static bool overlapWithMacro(const FixItList
&FixIts
) {
947 // FIXME: For now we only check if the range (or the first token) is (part of)
948 // a macro expansion. Ideally, we want to check for all tokens in the range.
949 return llvm::any_of(FixIts
, [](const FixItHint
&Hint
) {
950 auto BeginLoc
= Hint
.RemoveRange
.getBegin();
951 if (BeginLoc
.isMacroID())
952 // If the range (or the first token) is (part of) a macro expansion:
958 static std::map
<const VarDecl
*, FixItList
>
959 getFixIts(FixableGadgetSets
&FixablesForUnsafeVars
, const Strategy
&S
,
960 const DeclUseTracker
&Tracker
, const ASTContext
&Ctx
,
961 UnsafeBufferUsageHandler
&Handler
) {
962 std::map
<const VarDecl
*, FixItList
> FixItsForVariable
;
963 for (const auto &[VD
, Fixables
] : FixablesForUnsafeVars
.byVar
) {
964 FixItsForVariable
[VD
] = fixVariable(VD
, S
.lookup(VD
), Tracker
, Ctx
, Handler
);
965 // If we fail to produce Fix-It for the declaration we have to skip the
966 // variable entirely.
967 if (FixItsForVariable
[VD
].empty()) {
968 FixItsForVariable
.erase(VD
);
971 bool ImpossibleToFix
= false;
972 llvm::SmallVector
<FixItHint
, 16> FixItsForVD
;
973 for (const auto &F
: Fixables
) {
974 std::optional
<FixItList
> Fixits
= F
->getFixits(S
);
976 ImpossibleToFix
= true;
979 const FixItList CorrectFixes
= Fixits
.value();
980 FixItsForVD
.insert(FixItsForVD
.end(), CorrectFixes
.begin(),
985 FixItsForVariable
.erase(VD
);
987 FixItsForVariable
[VD
].insert(FixItsForVariable
[VD
].end(),
988 FixItsForVD
.begin(), FixItsForVD
.end());
989 // We conservatively discard fix-its of a variable if
990 // a fix-it overlaps with macros; or
991 // a fix-it conflicts with another one
992 if (overlapWithMacro(FixItsForVariable
[VD
]) ||
993 clang::internal::anyConflict(FixItsForVariable
[VD
],
994 Ctx
.getSourceManager())) {
995 FixItsForVariable
.erase(VD
);
998 return FixItsForVariable
;
1002 getNaiveStrategy(const llvm::SmallVectorImpl
<const VarDecl
*> &UnsafeVars
) {
1004 for (const VarDecl
*VD
: UnsafeVars
) {
1005 S
.set(VD
, Strategy::Kind::Span
);
1010 void clang::checkUnsafeBufferUsage(const Decl
*D
,
1011 UnsafeBufferUsageHandler
&Handler
,
1013 assert(D
&& D
->getBody());
1015 WarningGadgetSets UnsafeOps
;
1016 FixableGadgetSets FixablesForUnsafeVars
;
1017 DeclUseTracker Tracker
;
1020 // FIXME: We could skip even matching Fixables' matchers if EmitFixits ==
1022 auto [FixableGadgets
, WarningGadgets
, TrackerRes
] = findGadgets(D
, Handler
);
1023 UnsafeOps
= groupWarningGadgetsByVar(std::move(WarningGadgets
));
1024 FixablesForUnsafeVars
= groupFixablesByVar(std::move(FixableGadgets
));
1025 Tracker
= std::move(TrackerRes
);
1028 std::map
<const VarDecl
*, FixItList
> FixItsForVariable
;
1031 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
1032 for (auto it
= FixablesForUnsafeVars
.byVar
.cbegin();
1033 it
!= FixablesForUnsafeVars
.byVar
.cend();) {
1034 // FIXME: Support ParmVarDecl as well.
1035 if (!it
->first
->isLocalVarDecl() || Tracker
.hasUnclaimedUses(it
->first
)) {
1036 it
= FixablesForUnsafeVars
.byVar
.erase(it
);
1042 llvm::SmallVector
<const VarDecl
*, 16> UnsafeVars
;
1043 for (const auto &[VD
, ignore
] : FixablesForUnsafeVars
.byVar
)
1044 UnsafeVars
.push_back(VD
);
1046 Strategy NaiveStrategy
= getNaiveStrategy(UnsafeVars
);
1047 FixItsForVariable
= getFixIts(FixablesForUnsafeVars
, NaiveStrategy
, Tracker
,
1048 D
->getASTContext(), Handler
);
1050 // FIXME Detect overlapping FixIts.
1053 for (const auto &G
: UnsafeOps
.noVar
) {
1054 Handler
.handleUnsafeOperation(G
->getBaseStmt(), /*IsRelatedToDecl=*/false);
1057 for (const auto &[VD
, WarningGadgets
] : UnsafeOps
.byVar
) {
1059 EmitFixits
? FixItsForVariable
.find(VD
) : FixItsForVariable
.end();
1060 Handler
.handleFixableVariable(VD
, FixItsIt
!= FixItsForVariable
.end()
1061 ? std::move(FixItsIt
->second
)
1063 for (const auto &G
: WarningGadgets
) {
1064 Handler
.handleUnsafeOperation(G
->getBaseStmt(), /*IsRelatedToDecl=*/true);