1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/AST/StmtVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "llvm/ADT/SmallVector.h"
24 using namespace clang
;
25 using namespace ast_matchers
;
29 class StmtDebugPrinter
30 : public ConstStmtVisitor
<StmtDebugPrinter
, std::string
> {
32 std::string
VisitStmt(const Stmt
*S
) { return S
->getStmtClassName(); }
34 std::string
VisitBinaryOperator(const BinaryOperator
*BO
) {
35 return "BinaryOperator(" + BO
->getOpcodeStr().str() + ")";
38 std::string
VisitUnaryOperator(const UnaryOperator
*UO
) {
39 return "UnaryOperator(" + UO
->getOpcodeStr(UO
->getOpcode()).str() + ")";
42 std::string
VisitImplicitCastExpr(const ImplicitCastExpr
*ICE
) {
43 return "ImplicitCastExpr(" + std::string(ICE
->getCastKindName()) + ")";
47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
49 static std::string
getDREAncestorString(const DeclRefExpr
*DRE
,
53 StmtDebugPrinter StmtPriner
;
56 SS
<< StmtPriner
.Visit(St
);
58 DynTypedNodeList StParents
= Ctx
.getParents(*St
);
60 if (StParents
.size() > 1)
61 return "unavailable due to multiple parents";
62 if (StParents
.size() == 0)
64 St
= StParents
.begin()->get
<Stmt
>();
73 namespace clang::ast_matchers
{
74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
75 // except for those belonging to a different callable of "n".
76 class MatchDescendantVisitor
77 : public RecursiveASTVisitor
<MatchDescendantVisitor
> {
79 typedef RecursiveASTVisitor
<MatchDescendantVisitor
> VisitorBase
;
81 // Creates an AST visitor that matches `Matcher` on all
82 // descendants of a given node "n" except for the ones
83 // belonging to a different callable of "n".
84 MatchDescendantVisitor(const internal::DynTypedMatcher
*Matcher
,
85 internal::ASTMatchFinder
*Finder
,
86 internal::BoundNodesTreeBuilder
*Builder
,
87 internal::ASTMatchFinder::BindKind Bind
,
88 const bool ignoreUnevaluatedContext
)
89 : Matcher(Matcher
), Finder(Finder
), Builder(Builder
), Bind(Bind
),
90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext
) {}
92 // Returns true if a match is found in a subtree of `DynNode`, which belongs
93 // to the same callable of `DynNode`.
94 bool findMatch(const DynTypedNode
&DynNode
) {
96 if (const Stmt
*StmtNode
= DynNode
.get
<Stmt
>()) {
97 TraverseStmt(const_cast<Stmt
*>(StmtNode
));
98 *Builder
= ResultBindings
;
104 // The following are overriding methods from the base visitor class.
105 // They are public only to allow CRTP to work. They are *not *part
106 // of the public API of this class.
108 // For the matchers so far used in safe buffers, we only need to match
109 // `Stmt`s. To override more as needed.
111 bool TraverseDecl(Decl
*Node
) {
116 // To skip callables:
117 if (isa
<FunctionDecl
, BlockDecl
, ObjCMethodDecl
>(Node
))
119 // Traverse descendants
120 return VisitorBase::TraverseDecl(Node
);
123 bool TraverseGenericSelectionExpr(GenericSelectionExpr
*Node
) {
124 // These are unevaluated, except the result expression.
125 if(ignoreUnevaluatedContext
)
126 return TraverseStmt(Node
->getResultExpr());
127 return VisitorBase::TraverseGenericSelectionExpr(Node
);
130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr
*Node
) {
131 // Unevaluated context.
132 if(ignoreUnevaluatedContext
)
134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node
);
137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node
) {
138 // Unevaluated context.
139 if(ignoreUnevaluatedContext
)
141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node
);
144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node
) {
145 // Unevaluated context.
146 if(ignoreUnevaluatedContext
)
148 return VisitorBase::TraverseDecltypeTypeLoc(Node
);
151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr
*Node
) {
152 // Unevaluated context.
153 if(ignoreUnevaluatedContext
)
155 return VisitorBase::TraverseCXXNoexceptExpr(Node
);
158 bool TraverseCXXTypeidExpr(CXXTypeidExpr
*Node
) {
159 // Unevaluated context.
160 if(ignoreUnevaluatedContext
)
162 return VisitorBase::TraverseCXXTypeidExpr(Node
);
165 bool TraverseStmt(Stmt
*Node
, DataRecursionQueue
*Queue
= nullptr) {
170 return VisitorBase::TraverseStmt(Node
);
173 bool shouldVisitTemplateInstantiations() const { return true; }
174 bool shouldVisitImplicitCode() const {
175 // TODO: let's ignore implicit code for now
180 // Sets 'Matched' to true if 'Matcher' matches 'Node'
182 // Returns 'true' if traversal should continue after this function
183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
184 template <typename T
> bool match(const T
&Node
) {
185 internal::BoundNodesTreeBuilder
RecursiveBuilder(*Builder
);
187 if (Matcher
->matches(DynTypedNode::create(Node
), Finder
,
188 &RecursiveBuilder
)) {
189 ResultBindings
.addMatch(RecursiveBuilder
);
191 if (Bind
!= internal::ASTMatchFinder::BK_All
)
192 return false; // Abort as soon as a match is found.
197 const internal::DynTypedMatcher
*const Matcher
;
198 internal::ASTMatchFinder
*const Finder
;
199 internal::BoundNodesTreeBuilder
*const Builder
;
200 internal::BoundNodesTreeBuilder ResultBindings
;
201 const internal::ASTMatchFinder::BindKind Bind
;
203 bool ignoreUnevaluatedContext
;
206 // Because we're dealing with raw pointers, let's define what we mean by that.
207 static auto hasPointerType() {
208 return hasType(hasCanonicalType(pointerType()));
211 static auto hasArrayType() {
212 return hasType(hasCanonicalType(arrayType()));
215 AST_MATCHER_P(Stmt
, forEachDescendantEvaluatedStmt
, internal::Matcher
<Stmt
>, innerMatcher
) {
216 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
218 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
, true);
219 return Visitor
.findMatch(DynTypedNode::create(Node
));
222 AST_MATCHER_P(Stmt
, forEachDescendantStmt
, internal::Matcher
<Stmt
>, innerMatcher
) {
223 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
225 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
, false);
226 return Visitor
.findMatch(DynTypedNode::create(Node
));
229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
230 AST_MATCHER_P(Stmt
, notInSafeBufferOptOut
, const UnsafeBufferUsageHandler
*,
232 return !Handler
->isSafeBufferOptOut(Node
.getBeginLoc());
235 AST_MATCHER_P(CastExpr
, castSubExpr
, internal::Matcher
<Expr
>, innerMatcher
) {
236 return innerMatcher
.matches(*Node
.getSubExpr(), Finder
, Builder
);
239 // Matches a `UnaryOperator` whose operator is pre-increment:
240 AST_MATCHER(UnaryOperator
, isPreInc
) {
241 return Node
.getOpcode() == UnaryOperator::Opcode::UO_PreInc
;
244 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
245 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
246 static auto isInUnspecifiedLvalueContext(internal::Matcher
<Expr
> innerMatcher
) {
251 hasCastKind(CastKind::CK_LValueToRValue
),
252 castSubExpr(innerMatcher
)),
254 hasAnyOperatorName("="),
262 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
264 static internal::Matcher
<Stmt
>
265 isInUnspecifiedPointerContext(internal::Matcher
<Stmt
> InnerMatcher
) {
267 // 1. an argument of a function call (except the callee has [[unsafe_...]]
269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
270 // 3. the operand of a comparator operation; or
271 // 4. the operand of a pointer subtraction operation
272 // (i.e., computing the distance between two pointers); or ...
274 auto CallArgMatcher
=
275 callExpr(forEachArgumentWithParam(InnerMatcher
,
276 hasPointerType() /* array also decays to pointer type*/),
277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage
)))));
279 auto CastOperandMatcher
=
280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral
),
281 hasCastKind(CastKind::CK_PointerToBoolean
)),
282 castSubExpr(allOf(hasPointerType(), InnerMatcher
)));
284 auto CompOperandMatcher
=
285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher
)),
287 hasRHS(allOf(hasPointerType(), InnerMatcher
))));
289 // A matcher that matches pointer subtractions:
290 auto PtrSubtractionMatcher
=
291 binaryOperator(hasOperatorName("-"),
292 // Note that here we need both LHS and RHS to be
293 // pointer. Then the inner matcher can match any of
295 allOf(hasLHS(hasPointerType()),
296 hasRHS(hasPointerType())),
297 eachOf(hasLHS(InnerMatcher
),
298 hasRHS(InnerMatcher
)));
300 return stmt(anyOf(CallArgMatcher
, CastOperandMatcher
, CompOperandMatcher
,
301 PtrSubtractionMatcher
));
302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
303 // don't have to check that.)
306 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
308 // 'e' isn't evaluated to an RValue). For example, consider the following code:
309 // int *p = new int[4];
310 // int *q = new int[4];
313 // The expression `p = q` in the conditional of the `if` statement
314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
315 // in the assignment statement is in an untyped context.
316 static internal::Matcher
<Stmt
>
317 isInUnspecifiedUntypedContext(internal::Matcher
<Stmt
> InnerMatcher
) {
318 // An unspecified context can be
319 // 1. A compound statement,
320 // 2. The body of an if statement
322 auto CompStmt
= compoundStmt(forEach(InnerMatcher
));
323 auto IfStmtThen
= ifStmt(hasThen(InnerMatcher
));
324 auto IfStmtElse
= ifStmt(hasElse(InnerMatcher
));
325 // FIXME: Handle loop bodies.
326 return stmt(anyOf(CompStmt
, IfStmtThen
, IfStmtElse
));
328 } // namespace clang::ast_matchers
331 // Because the analysis revolves around variables and their types, we'll need to
332 // track uses of variables (aka DeclRefExprs).
333 using DeclUseList
= SmallVector
<const DeclRefExpr
*, 1>;
335 // Convenience typedef.
336 using FixItList
= SmallVector
<FixItHint
, 4>;
343 /// Gadget is an individual operation in the code that may be of interest to
344 /// this analysis. Each (non-abstract) subclass corresponds to a specific
345 /// rigid AST structure that constitutes an operation on a pointer-type object.
346 /// Discovery of a gadget in the code corresponds to claiming that we understand
347 /// what this part of code is doing well enough to potentially improve it.
348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
349 /// always deserving a warning per se, but requires our attention to identify
350 /// it warrants a fixit).
355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
358 /// Common type of ASTMatchers used for discovering gadgets.
359 /// Useful for implementing the static matcher() methods
360 /// that are expected from all non-abstract subclasses.
361 using Matcher
= decltype(stmt());
363 Gadget(Kind K
) : K(K
) {}
365 Kind
getKind() const { return K
; }
368 StringRef
getDebugName() const {
370 #define GADGET(x) case Kind::x: return #x;
371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
373 llvm_unreachable("Unhandled Gadget::Kind enum");
377 virtual bool isWarningGadget() const = 0;
378 virtual const Stmt
*getBaseStmt() const = 0;
380 /// Returns the list of pointer-type variables on which this gadget performs
381 /// its operation. Typically, there's only one variable. This isn't a list
382 /// of all DeclRefExprs in the gadget's AST!
383 virtual DeclUseList
getClaimedVarUseSites() const = 0;
385 virtual ~Gadget() = default;
392 /// Warning gadgets correspond to unsafe code patterns that warrants
393 /// an immediate warning.
394 class WarningGadget
: public Gadget
{
396 WarningGadget(Kind K
) : Gadget(K
) {}
398 static bool classof(const Gadget
*G
) { return G
->isWarningGadget(); }
399 bool isWarningGadget() const final
{ return true; }
402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
404 /// variable is replaced by a safe C++ container, every use of such variable must be
405 /// carefully considered and possibly updated.
406 class FixableGadget
: public Gadget
{
408 FixableGadget(Kind K
) : Gadget(K
) {}
410 static bool classof(const Gadget
*G
) { return !G
->isWarningGadget(); }
411 bool isWarningGadget() const final
{ return false; }
413 /// Returns a fixit that would fix the current gadget according to
414 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
415 /// returns an empty list if no fixes are necessary.
416 virtual std::optional
<FixItList
> getFixits(const Strategy
&) const {
420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
421 /// statement and the second element is the RHS. This two-element list represents the fact that
422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
423 /// later to group all those variables whose types must be modified together to prevent type
425 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
426 getStrategyImplications() const {
431 static auto toSupportedVariable() {
432 return to(varDecl());
435 using FixableGadgetList
= std::vector
<std::unique_ptr
<FixableGadget
>>;
436 using WarningGadgetList
= std::vector
<std::unique_ptr
<WarningGadget
>>;
438 /// An increment of a pointer-type value is unsafe as it may run the pointer
440 class IncrementGadget
: public WarningGadget
{
441 static constexpr const char *const OpTag
= "op";
442 const UnaryOperator
*Op
;
445 IncrementGadget(const MatchFinder::MatchResult
&Result
)
446 : WarningGadget(Kind::Increment
),
447 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
449 static bool classof(const Gadget
*G
) {
450 return G
->getKind() == Kind::Increment
;
453 static Matcher
matcher() {
454 return stmt(unaryOperator(
455 hasOperatorName("++"),
456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
460 const UnaryOperator
*getBaseStmt() const override
{ return Op
; }
462 DeclUseList
getClaimedVarUseSites() const override
{
463 SmallVector
<const DeclRefExpr
*, 2> Uses
;
464 if (const auto *DRE
=
465 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
469 return std::move(Uses
);
473 /// A decrement of a pointer-type value is unsafe as it may run the pointer
475 class DecrementGadget
: public WarningGadget
{
476 static constexpr const char *const OpTag
= "op";
477 const UnaryOperator
*Op
;
480 DecrementGadget(const MatchFinder::MatchResult
&Result
)
481 : WarningGadget(Kind::Decrement
),
482 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
484 static bool classof(const Gadget
*G
) {
485 return G
->getKind() == Kind::Decrement
;
488 static Matcher
matcher() {
489 return stmt(unaryOperator(
490 hasOperatorName("--"),
491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
495 const UnaryOperator
*getBaseStmt() const override
{ return Op
; }
497 DeclUseList
getClaimedVarUseSites() const override
{
498 if (const auto *DRE
=
499 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
508 /// it doesn't have any bounds checks for the array.
509 class ArraySubscriptGadget
: public WarningGadget
{
510 static constexpr const char *const ArraySubscrTag
= "ArraySubscript";
511 const ArraySubscriptExpr
*ASE
;
514 ArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
515 : WarningGadget(Kind::ArraySubscript
),
516 ASE(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ArraySubscrTag
)) {}
518 static bool classof(const Gadget
*G
) {
519 return G
->getKind() == Kind::ArraySubscript
;
522 static Matcher
matcher() {
523 // FIXME: What if the index is integer literal 0? Should this be
524 // a safe gadget in this case?
526 return stmt(arraySubscriptExpr(
527 hasBase(ignoringParenImpCasts(
528 anyOf(hasPointerType(), hasArrayType()))),
530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
532 .bind(ArraySubscrTag
));
536 const ArraySubscriptExpr
*getBaseStmt() const override
{ return ASE
; }
538 DeclUseList
getClaimedVarUseSites() const override
{
539 if (const auto *DRE
=
540 dyn_cast
<DeclRefExpr
>(ASE
->getBase()->IgnoreParenImpCasts())) {
548 /// A pointer arithmetic expression of one of the forms:
550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
552 class PointerArithmeticGadget
: public WarningGadget
{
553 static constexpr const char *const PointerArithmeticTag
= "ptrAdd";
554 static constexpr const char *const PointerArithmeticPointerTag
= "ptrAddPtr";
555 const BinaryOperator
*PA
; // pointer arithmetic expression
556 const Expr
*Ptr
; // the pointer expression in `PA`
559 PointerArithmeticGadget(const MatchFinder::MatchResult
&Result
)
560 : WarningGadget(Kind::PointerArithmetic
),
561 PA(Result
.Nodes
.getNodeAs
<BinaryOperator
>(PointerArithmeticTag
)),
562 Ptr(Result
.Nodes
.getNodeAs
<Expr
>(PointerArithmeticPointerTag
)) {}
564 static bool classof(const Gadget
*G
) {
565 return G
->getKind() == Kind::PointerArithmetic
;
568 static Matcher
matcher() {
569 auto HasIntegerType
= anyOf(hasType(isInteger()), hasType(enumType()));
571 allOf(hasOperatorName("+"),
572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
573 hasLHS(HasIntegerType
));
575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
576 hasOperatorName("+="), hasOperatorName("-=")),
577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
578 hasRHS(HasIntegerType
));
580 return stmt(binaryOperator(anyOf(PtrAtLeft
, PtrAtRight
))
581 .bind(PointerArithmeticTag
));
584 const Stmt
*getBaseStmt() const override
{ return PA
; }
586 DeclUseList
getClaimedVarUseSites() const override
{
587 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ptr
->IgnoreParenImpCasts())) {
593 // FIXME: pointer adding zero should be fine
594 // FIXME: this gadge will need a fix-it
597 /// A pointer initialization expression of the form:
601 class PointerInitGadget
: public FixableGadget
{
603 static constexpr const char *const PointerInitLHSTag
= "ptrInitLHS";
604 static constexpr const char *const PointerInitRHSTag
= "ptrInitRHS";
605 const VarDecl
* PtrInitLHS
; // the LHS pointer expression in `PI`
606 const DeclRefExpr
* PtrInitRHS
; // the RHS pointer expression in `PI`
609 PointerInitGadget(const MatchFinder::MatchResult
&Result
)
610 : FixableGadget(Kind::PointerInit
),
611 PtrInitLHS(Result
.Nodes
.getNodeAs
<VarDecl
>(PointerInitLHSTag
)),
612 PtrInitRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerInitRHSTag
)) {}
614 static bool classof(const Gadget
*G
) {
615 return G
->getKind() == Kind::PointerInit
;
618 static Matcher
matcher() {
619 auto PtrInitStmt
= declStmt(hasSingleDecl(varDecl(
620 hasInitializer(ignoringImpCasts(declRefExpr(
622 toSupportedVariable()).
623 bind(PointerInitRHSTag
)))).
624 bind(PointerInitLHSTag
)));
626 return stmt(PtrInitStmt
);
629 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
631 virtual const Stmt
*getBaseStmt() const override
{
632 // FIXME: This needs to be the entire DeclStmt, assuming that this method
633 // makes sense at all on a FixableGadget.
637 virtual DeclUseList
getClaimedVarUseSites() const override
{
638 return DeclUseList
{PtrInitRHS
};
641 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
642 getStrategyImplications() const override
{
643 return std::make_pair(PtrInitLHS
,
644 cast
<VarDecl
>(PtrInitRHS
->getDecl()));
648 /// A pointer assignment expression of the form:
652 class PointerAssignmentGadget
: public FixableGadget
{
654 static constexpr const char *const PointerAssignLHSTag
= "ptrLHS";
655 static constexpr const char *const PointerAssignRHSTag
= "ptrRHS";
656 const DeclRefExpr
* PtrLHS
; // the LHS pointer expression in `PA`
657 const DeclRefExpr
* PtrRHS
; // the RHS pointer expression in `PA`
660 PointerAssignmentGadget(const MatchFinder::MatchResult
&Result
)
661 : FixableGadget(Kind::PointerAssignment
),
662 PtrLHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignLHSTag
)),
663 PtrRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignRHSTag
)) {}
665 static bool classof(const Gadget
*G
) {
666 return G
->getKind() == Kind::PointerAssignment
;
669 static Matcher
matcher() {
670 auto PtrAssignExpr
= binaryOperator(allOf(hasOperatorName("="),
671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
672 toSupportedVariable()).
673 bind(PointerAssignRHSTag
))),
674 hasLHS(declRefExpr(hasPointerType(),
675 toSupportedVariable()).
676 bind(PointerAssignLHSTag
))));
678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr
));
681 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
683 virtual const Stmt
*getBaseStmt() const override
{
684 // FIXME: This should be the binary operator, assuming that this method
685 // makes sense at all on a FixableGadget.
689 virtual DeclUseList
getClaimedVarUseSites() const override
{
690 return DeclUseList
{PtrLHS
, PtrRHS
};
693 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
694 getStrategyImplications() const override
{
695 return std::make_pair(cast
<VarDecl
>(PtrLHS
->getDecl()),
696 cast
<VarDecl
>(PtrRHS
->getDecl()));
700 /// A call of a function or method that performs unchecked buffer operations
701 /// over one of its pointer parameters.
702 class UnsafeBufferUsageAttrGadget
: public WarningGadget
{
703 constexpr static const char *const OpTag
= "call_expr";
707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult
&Result
)
708 : WarningGadget(Kind::UnsafeBufferUsageAttr
),
709 Op(Result
.Nodes
.getNodeAs
<CallExpr
>(OpTag
)) {}
711 static bool classof(const Gadget
*G
) {
712 return G
->getKind() == Kind::UnsafeBufferUsageAttr
;
715 static Matcher
matcher() {
716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage
))))
719 const Stmt
*getBaseStmt() const override
{ return Op
; }
721 DeclUseList
getClaimedVarUseSites() const override
{ return {}; }
724 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
725 // Context (see `isInUnspecifiedLvalueContext`).
726 // Note here `[]` is the built-in subscript operator.
727 class ULCArraySubscriptGadget
: public FixableGadget
{
729 static constexpr const char *const ULCArraySubscriptTag
=
730 "ArraySubscriptUnderULC";
731 const ArraySubscriptExpr
*Node
;
734 ULCArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
735 : FixableGadget(Kind::ULCArraySubscript
),
736 Node(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ULCArraySubscriptTag
)) {
737 assert(Node
!= nullptr && "Expecting a non-null matching result");
740 static bool classof(const Gadget
*G
) {
741 return G
->getKind() == Kind::ULCArraySubscript
;
744 static Matcher
matcher() {
745 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
746 auto BaseIsArrayOrPtrDRE
=
747 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr
,
748 toSupportedVariable())));
750 arraySubscriptExpr(BaseIsArrayOrPtrDRE
).bind(ULCArraySubscriptTag
);
752 return expr(isInUnspecifiedLvalueContext(Target
));
755 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
757 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
759 virtual DeclUseList
getClaimedVarUseSites() const override
{
760 if (const auto *DRE
=
761 dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts())) {
768 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
769 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
770 // fixit of the form `UPC(DRE.data())`.
771 class UPCStandalonePointerGadget
: public FixableGadget
{
773 static constexpr const char *const DeclRefExprTag
= "StandalonePointer";
774 const DeclRefExpr
*Node
;
777 UPCStandalonePointerGadget(const MatchFinder::MatchResult
&Result
)
778 : FixableGadget(Kind::UPCStandalonePointer
),
779 Node(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(DeclRefExprTag
)) {
780 assert(Node
!= nullptr && "Expecting a non-null matching result");
783 static bool classof(const Gadget
*G
) {
784 return G
->getKind() == Kind::UPCStandalonePointer
;
787 static Matcher
matcher() {
788 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
790 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr
,
791 toSupportedVariable())).bind(DeclRefExprTag
)));
792 return stmt(isInUnspecifiedPointerContext(target
));
795 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
797 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
799 virtual DeclUseList
getClaimedVarUseSites() const override
{
804 class PointerDereferenceGadget
: public FixableGadget
{
805 static constexpr const char *const BaseDeclRefExprTag
= "BaseDRE";
806 static constexpr const char *const OperatorTag
= "op";
808 const DeclRefExpr
*BaseDeclRefExpr
= nullptr;
809 const UnaryOperator
*Op
= nullptr;
812 PointerDereferenceGadget(const MatchFinder::MatchResult
&Result
)
813 : FixableGadget(Kind::PointerDereference
),
815 Result
.Nodes
.getNodeAs
<DeclRefExpr
>(BaseDeclRefExprTag
)),
816 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OperatorTag
)) {}
818 static bool classof(const Gadget
*G
) {
819 return G
->getKind() == Kind::PointerDereference
;
822 static Matcher
matcher() {
825 hasOperatorName("*"),
826 has(expr(ignoringParenImpCasts(
827 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag
)))))
830 return expr(isInUnspecifiedLvalueContext(Target
));
833 DeclUseList
getClaimedVarUseSites() const override
{
834 return {BaseDeclRefExpr
};
837 virtual const Stmt
*getBaseStmt() const final
{ return Op
; }
839 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
842 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
843 // Context (see `isInUnspecifiedPointerContext`).
844 // Note here `[]` is the built-in subscript operator.
845 class UPCAddressofArraySubscriptGadget
: public FixableGadget
{
847 static constexpr const char *const UPCAddressofArraySubscriptTag
=
848 "AddressofArraySubscriptUnderUPC";
849 const UnaryOperator
*Node
; // the `&DRE[any]` node
852 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
853 : FixableGadget(Kind::ULCArraySubscript
),
854 Node(Result
.Nodes
.getNodeAs
<UnaryOperator
>(
855 UPCAddressofArraySubscriptTag
)) {
856 assert(Node
!= nullptr && "Expecting a non-null matching result");
859 static bool classof(const Gadget
*G
) {
860 return G
->getKind() == Kind::UPCAddressofArraySubscript
;
863 static Matcher
matcher() {
864 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
865 unaryOperator(hasOperatorName("&"),
866 hasUnaryOperand(arraySubscriptExpr(
867 hasBase(ignoringParenImpCasts(declRefExpr(
868 toSupportedVariable()))))))
869 .bind(UPCAddressofArraySubscriptTag
)))));
872 virtual std::optional
<FixItList
> getFixits(const Strategy
&) const override
;
874 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
876 virtual DeclUseList
getClaimedVarUseSites() const override
{
877 const auto *ArraySubst
= cast
<ArraySubscriptExpr
>(Node
->getSubExpr());
879 cast
<DeclRefExpr
>(ArraySubst
->getBase()->IgnoreImpCasts());
886 // An auxiliary tracking facility for the fixit analysis. It helps connect
887 // declarations to its uses and make sure we've covered all uses with our
888 // analysis before we try to fix the declaration.
889 class DeclUseTracker
{
890 using UseSetTy
= SmallSet
<const DeclRefExpr
*, 16>;
891 using DefMapTy
= DenseMap
<const VarDecl
*, const DeclStmt
*>;
893 // Allocate on the heap for easier move.
894 std::unique_ptr
<UseSetTy
> Uses
{std::make_unique
<UseSetTy
>()};
898 DeclUseTracker() = default;
899 DeclUseTracker(const DeclUseTracker
&) = delete; // Let's avoid copies.
900 DeclUseTracker
&operator=(const DeclUseTracker
&) = delete;
901 DeclUseTracker(DeclUseTracker
&&) = default;
902 DeclUseTracker
&operator=(DeclUseTracker
&&) = default;
904 // Start tracking a freshly discovered DRE.
905 void discoverUse(const DeclRefExpr
*DRE
) { Uses
->insert(DRE
); }
907 // Stop tracking the DRE as it's been fully figured out.
908 void claimUse(const DeclRefExpr
*DRE
) {
909 assert(Uses
->count(DRE
) &&
910 "DRE not found or claimed by multiple matchers!");
914 // A variable is unclaimed if at least one use is unclaimed.
915 bool hasUnclaimedUses(const VarDecl
*VD
) const {
916 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
917 return any_of(*Uses
, [VD
](const DeclRefExpr
*DRE
) {
918 return DRE
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl();
922 UseSetTy
getUnclaimedUses(const VarDecl
*VD
) const {
924 for (auto use
: *Uses
) {
925 if (use
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl()) {
926 ReturnSet
.insert(use
);
932 void discoverDecl(const DeclStmt
*DS
) {
933 for (const Decl
*D
: DS
->decls()) {
934 if (const auto *VD
= dyn_cast
<VarDecl
>(D
)) {
935 // FIXME: Assertion temporarily disabled due to a bug in
936 // ASTMatcher internal behavior in presence of GNU
937 // statement-expressions. We need to properly investigate this
938 // because it can screw up our algorithm in other ways.
939 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
945 const DeclStmt
*lookupDecl(const VarDecl
*VD
) const {
946 return Defs
.lookup(VD
);
952 // Strategy is a map from variables to the way we plan to emit fixes for
953 // these variables. It is figured out gradually by trying different fixes
954 // for different variables depending on gadgets in which these variables
959 Wontfix
, // We don't plan to emit a fixit for this variable.
960 Span
, // We recommend replacing the variable with std::span.
961 Iterator
, // We recommend replacing the variable with std::span::iterator.
962 Array
, // We recommend replacing the variable with std::array.
963 Vector
// We recommend replacing the variable with std::vector.
967 using MapTy
= llvm::DenseMap
<const VarDecl
*, Kind
>;
972 Strategy() = default;
973 Strategy(const Strategy
&) = delete; // Let's avoid copies.
974 Strategy
&operator=(const Strategy
&) = delete;
975 Strategy(Strategy
&&) = default;
976 Strategy
&operator=(Strategy
&&) = default;
978 void set(const VarDecl
*VD
, Kind K
) { Map
[VD
] = K
; }
980 Kind
lookup(const VarDecl
*VD
) const {
981 auto I
= Map
.find(VD
);
983 return Kind::Wontfix
;
991 // Representing a pointer type expression of the form `++Ptr` in an Unspecified
992 // Pointer Context (UPC):
993 class UPCPreIncrementGadget
: public FixableGadget
{
995 static constexpr const char *const UPCPreIncrementTag
=
996 "PointerPreIncrementUnderUPC";
997 const UnaryOperator
*Node
; // the `++Ptr` node
1000 UPCPreIncrementGadget(const MatchFinder::MatchResult
&Result
)
1001 : FixableGadget(Kind::UPCPreIncrement
),
1002 Node(Result
.Nodes
.getNodeAs
<UnaryOperator
>(UPCPreIncrementTag
)) {
1003 assert(Node
!= nullptr && "Expecting a non-null matching result");
1006 static bool classof(const Gadget
*G
) {
1007 return G
->getKind() == Kind::UPCPreIncrement
;
1010 static Matcher
matcher() {
1011 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1012 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1013 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1015 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1016 unaryOperator(isPreInc(),
1017 hasUnaryOperand(declRefExpr(
1018 toSupportedVariable()))
1019 ).bind(UPCPreIncrementTag
)))));
1022 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
1024 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
1026 virtual DeclUseList
getClaimedVarUseSites() const override
{
1027 return {dyn_cast
<DeclRefExpr
>(Node
->getSubExpr())};
1031 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1033 class DerefSimplePtrArithFixableGadget
: public FixableGadget
{
1034 static constexpr const char *const BaseDeclRefExprTag
= "BaseDRE";
1035 static constexpr const char *const DerefOpTag
= "DerefOp";
1036 static constexpr const char *const AddOpTag
= "AddOp";
1037 static constexpr const char *const OffsetTag
= "Offset";
1039 const DeclRefExpr
*BaseDeclRefExpr
= nullptr;
1040 const UnaryOperator
*DerefOp
= nullptr;
1041 const BinaryOperator
*AddOp
= nullptr;
1042 const IntegerLiteral
*Offset
= nullptr;
1045 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult
&Result
)
1046 : FixableGadget(Kind::DerefSimplePtrArithFixable
),
1048 Result
.Nodes
.getNodeAs
<DeclRefExpr
>(BaseDeclRefExprTag
)),
1049 DerefOp(Result
.Nodes
.getNodeAs
<UnaryOperator
>(DerefOpTag
)),
1050 AddOp(Result
.Nodes
.getNodeAs
<BinaryOperator
>(AddOpTag
)),
1051 Offset(Result
.Nodes
.getNodeAs
<IntegerLiteral
>(OffsetTag
)) {}
1053 static Matcher
matcher() {
1055 auto ThePtr
= expr(hasPointerType(),
1056 ignoringImpCasts(declRefExpr(toSupportedVariable()).
1057 bind(BaseDeclRefExprTag
)));
1058 auto PlusOverPtrAndInteger
= expr(anyOf(
1059 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr
),
1060 hasRHS(integerLiteral().bind(OffsetTag
)))
1062 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr
),
1063 hasLHS(integerLiteral().bind(OffsetTag
)))
1065 return isInUnspecifiedLvalueContext(unaryOperator(
1066 hasOperatorName("*"),
1067 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger
)))
1072 virtual std::optional
<FixItList
> getFixits(const Strategy
&s
) const final
;
1074 // TODO remove this method from FixableGadget interface
1075 virtual const Stmt
*getBaseStmt() const final
{ return nullptr; }
1077 virtual DeclUseList
getClaimedVarUseSites() const final
{
1078 return {BaseDeclRefExpr
};
1082 /// Scan the function and return a list of gadgets found with provided kits.
1083 static std::tuple
<FixableGadgetList
, WarningGadgetList
, DeclUseTracker
>
1084 findGadgets(const Decl
*D
, const UnsafeBufferUsageHandler
&Handler
,
1085 bool EmitSuggestions
) {
1087 struct GadgetFinderCallback
: MatchFinder::MatchCallback
{
1088 FixableGadgetList FixableGadgets
;
1089 WarningGadgetList WarningGadgets
;
1090 DeclUseTracker Tracker
;
1092 void run(const MatchFinder::MatchResult
&Result
) override
{
1093 // In debug mode, assert that we've found exactly one gadget.
1094 // This helps us avoid conflicts in .bind() tags.
1098 [[maybe_unused
]] int numFound
= 0;
1099 #define NEXT ++numFound
1102 if (const auto *DRE
= Result
.Nodes
.getNodeAs
<DeclRefExpr
>("any_dre")) {
1103 Tracker
.discoverUse(DRE
);
1107 if (const auto *DS
= Result
.Nodes
.getNodeAs
<DeclStmt
>("any_ds")) {
1108 Tracker
.discoverDecl(DS
);
1112 // Figure out which matcher we've found, and call the appropriate
1113 // subclass constructor.
1114 // FIXME: Can we do this more logarithmically?
1115 #define FIXABLE_GADGET(name) \
1116 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1117 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1120 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1121 #define WARNING_GADGET(name) \
1122 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1123 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1126 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1128 assert(numFound
>= 1 && "Gadgets not found in match result!");
1129 assert(numFound
<= 1 && "Conflicting bind tags in gadgets!");
1134 GadgetFinderCallback CB
;
1139 forEachDescendantEvaluatedStmt(stmt(anyOf(
1140 // Add Gadget::matcher() for every gadget in the registry.
1141 #define WARNING_GADGET(x) \
1142 allOf(x ## Gadget::matcher().bind(#x), \
1143 notInSafeBufferOptOut(&Handler)),
1144 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1145 // Avoid a hanging comma.
1153 if (EmitSuggestions
) {
1157 forEachDescendantStmt(stmt(eachOf(
1158 #define FIXABLE_GADGET(x) \
1159 x ## Gadget::matcher().bind(#x),
1160 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1161 // In parallel, match all DeclRefExprs so that to find out
1162 // whether there are any uncovered by gadgets.
1163 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1164 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"),
1165 // Also match DeclStmts because we'll need them when fixing
1166 // their underlying VarDecls that otherwise don't have
1167 // any backreferences to DeclStmts.
1168 declStmt().bind("any_ds")
1176 M
.match(*D
->getBody(), D
->getASTContext());
1177 return {std::move(CB
.FixableGadgets
), std::move(CB
.WarningGadgets
),
1178 std::move(CB
.Tracker
)};
1181 // Compares AST nodes by source locations.
1182 template <typename NodeTy
> struct CompareNode
{
1183 bool operator()(const NodeTy
*N1
, const NodeTy
*N2
) const {
1184 return N1
->getBeginLoc().getRawEncoding() <
1185 N2
->getBeginLoc().getRawEncoding();
1189 struct WarningGadgetSets
{
1190 std::map
<const VarDecl
*, std::set
<const WarningGadget
*>,
1191 // To keep keys sorted by their locations in the map so that the
1192 // order is deterministic:
1193 CompareNode
<VarDecl
>>
1195 // These Gadgets are not related to pointer variables (e. g. temporaries).
1196 llvm::SmallVector
<const WarningGadget
*, 16> noVar
;
1199 static WarningGadgetSets
1200 groupWarningGadgetsByVar(const WarningGadgetList
&AllUnsafeOperations
) {
1201 WarningGadgetSets result
;
1202 // If some gadgets cover more than one
1203 // variable, they'll appear more than once in the map.
1204 for (auto &G
: AllUnsafeOperations
) {
1205 DeclUseList ClaimedVarUseSites
= G
->getClaimedVarUseSites();
1207 bool AssociatedWithVarDecl
= false;
1208 for (const DeclRefExpr
*DRE
: ClaimedVarUseSites
) {
1209 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
1210 result
.byVar
[VD
].insert(G
.get());
1211 AssociatedWithVarDecl
= true;
1215 if (!AssociatedWithVarDecl
) {
1216 result
.noVar
.push_back(G
.get());
1223 struct FixableGadgetSets
{
1224 std::map
<const VarDecl
*, std::set
<const FixableGadget
*>,
1225 // To keep keys sorted by their locations in the map so that the
1226 // order is deterministic:
1227 CompareNode
<VarDecl
>>
1231 static FixableGadgetSets
1232 groupFixablesByVar(FixableGadgetList
&&AllFixableOperations
) {
1233 FixableGadgetSets FixablesForUnsafeVars
;
1234 for (auto &F
: AllFixableOperations
) {
1235 DeclUseList DREs
= F
->getClaimedVarUseSites();
1237 for (const DeclRefExpr
*DRE
: DREs
) {
1238 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
1239 FixablesForUnsafeVars
.byVar
[VD
].insert(F
.get());
1243 return FixablesForUnsafeVars
;
1246 bool clang::internal::anyConflict(const SmallVectorImpl
<FixItHint
> &FixIts
,
1247 const SourceManager
&SM
) {
1248 // A simple interval overlap detection algorithm. Sorts all ranges by their
1249 // begin location then finds the first overlap in one pass.
1250 std::vector
<const FixItHint
*> All
; // a copy of `FixIts`
1252 for (const FixItHint
&H
: FixIts
)
1254 std::sort(All
.begin(), All
.end(),
1255 [&SM
](const FixItHint
*H1
, const FixItHint
*H2
) {
1256 return SM
.isBeforeInTranslationUnit(H1
->RemoveRange
.getBegin(),
1257 H2
->RemoveRange
.getBegin());
1260 const FixItHint
*CurrHint
= nullptr;
1262 for (const FixItHint
*Hint
: All
) {
1264 SM
.isBeforeInTranslationUnit(CurrHint
->RemoveRange
.getEnd(),
1265 Hint
->RemoveRange
.getBegin())) {
1266 // Either to initialize `CurrHint` or `CurrHint` does not
1267 // overlap with `Hint`:
1270 // In case `Hint` overlaps the `CurrHint`, we found at least one
1277 std::optional
<FixItList
>
1278 PointerAssignmentGadget::getFixits(const Strategy
&S
) const {
1279 const auto *LeftVD
= cast
<VarDecl
>(PtrLHS
->getDecl());
1280 const auto *RightVD
= cast
<VarDecl
>(PtrRHS
->getDecl());
1281 switch (S
.lookup(LeftVD
)) {
1282 case Strategy::Kind::Span
:
1283 if (S
.lookup(RightVD
) == Strategy::Kind::Span
)
1285 return std::nullopt
;
1286 case Strategy::Kind::Wontfix
:
1287 return std::nullopt
;
1288 case Strategy::Kind::Iterator
:
1289 case Strategy::Kind::Array
:
1290 case Strategy::Kind::Vector
:
1291 llvm_unreachable("unsupported strategies for FixableGadgets");
1293 return std::nullopt
;
1296 std::optional
<FixItList
>
1297 PointerInitGadget::getFixits(const Strategy
&S
) const {
1298 const auto *LeftVD
= PtrInitLHS
;
1299 const auto *RightVD
= cast
<VarDecl
>(PtrInitRHS
->getDecl());
1300 switch (S
.lookup(LeftVD
)) {
1301 case Strategy::Kind::Span
:
1302 if (S
.lookup(RightVD
) == Strategy::Kind::Span
)
1304 return std::nullopt
;
1305 case Strategy::Kind::Wontfix
:
1306 return std::nullopt
;
1307 case Strategy::Kind::Iterator
:
1308 case Strategy::Kind::Array
:
1309 case Strategy::Kind::Vector
:
1310 llvm_unreachable("unsupported strategies for FixableGadgets");
1312 return std::nullopt
;
1315 std::optional
<FixItList
>
1316 ULCArraySubscriptGadget::getFixits(const Strategy
&S
) const {
1317 if (const auto *DRE
=
1318 dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts()))
1319 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
1320 switch (S
.lookup(VD
)) {
1321 case Strategy::Kind::Span
: {
1322 // If the index has a negative constant value, we give up as no valid
1323 // fix-it can be generated:
1324 const ASTContext
&Ctx
= // FIXME: we need ASTContext to be passed in!
1325 VD
->getASTContext();
1326 if (auto ConstVal
= Node
->getIdx()->getIntegerConstantExpr(Ctx
)) {
1327 if (ConstVal
->isNegative())
1328 return std::nullopt
;
1329 } else if (!Node
->getIdx()->getType()->isUnsignedIntegerType())
1330 return std::nullopt
;
1331 // no-op is a good fix-it, otherwise
1334 case Strategy::Kind::Wontfix
:
1335 case Strategy::Kind::Iterator
:
1336 case Strategy::Kind::Array
:
1337 case Strategy::Kind::Vector
:
1338 llvm_unreachable("unsupported strategies for FixableGadgets");
1341 return std::nullopt
;
1344 static std::optional
<FixItList
> // forward declaration
1345 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator
*Node
);
1347 std::optional
<FixItList
>
1348 UPCAddressofArraySubscriptGadget::getFixits(const Strategy
&S
) const {
1349 auto DREs
= getClaimedVarUseSites();
1350 const auto *VD
= cast
<VarDecl
>(DREs
.front()->getDecl());
1352 switch (S
.lookup(VD
)) {
1353 case Strategy::Kind::Span
:
1354 return fixUPCAddressofArraySubscriptWithSpan(Node
);
1355 case Strategy::Kind::Wontfix
:
1356 case Strategy::Kind::Iterator
:
1357 case Strategy::Kind::Array
:
1358 case Strategy::Kind::Vector
:
1359 llvm_unreachable("unsupported strategies for FixableGadgets");
1361 return std::nullopt
; // something went wrong, no fix-it
1364 // FIXME: this function should be customizable through format
1365 static StringRef
getEndOfLine() {
1366 static const char *const EOL
= "\n";
1370 // Returns the text indicating that the user needs to provide input there:
1371 std::string
getUserFillPlaceHolder(StringRef HintTextToUser
= "placeholder") {
1372 std::string s
= std::string("<# ");
1373 s
+= HintTextToUser
;
1378 // Return the text representation of the given `APInt Val`:
1379 static std::string
getAPIntText(APInt Val
) {
1380 SmallVector
<char> Txt
;
1381 Val
.toString(Txt
, 10, true);
1382 // APInt::toString does not add '\0' to the end of the string for us:
1383 Txt
.push_back('\0');
1387 // Return the source location of the last character of the AST `Node`.
1388 template <typename NodeTy
>
1389 static std::optional
<SourceLocation
>
1390 getEndCharLoc(const NodeTy
*Node
, const SourceManager
&SM
,
1391 const LangOptions
&LangOpts
) {
1392 unsigned TkLen
= Lexer::MeasureTokenLength(Node
->getEndLoc(), SM
, LangOpts
);
1393 SourceLocation Loc
= Node
->getEndLoc().getLocWithOffset(TkLen
- 1);
1398 return std::nullopt
;
1401 // Return the source location just past the last character of the AST `Node`.
1402 template <typename NodeTy
>
1403 static std::optional
<SourceLocation
> getPastLoc(const NodeTy
*Node
,
1404 const SourceManager
&SM
,
1405 const LangOptions
&LangOpts
) {
1406 SourceLocation Loc
=
1407 Lexer::getLocForEndOfToken(Node
->getEndLoc(), 0, SM
, LangOpts
);
1412 return std::nullopt
;
1415 // Return text representation of an `Expr`.
1416 static std::optional
<StringRef
> getExprText(const Expr
*E
,
1417 const SourceManager
&SM
,
1418 const LangOptions
&LangOpts
) {
1419 std::optional
<SourceLocation
> LastCharLoc
= getPastLoc(E
, SM
, LangOpts
);
1422 return Lexer::getSourceText(
1423 CharSourceRange::getCharRange(E
->getBeginLoc(), *LastCharLoc
), SM
,
1426 return std::nullopt
;
1429 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
1430 static std::optional
<StringRef
> getRangeText(SourceRange SR
,
1431 const SourceManager
&SM
,
1432 const LangOptions
&LangOpts
) {
1433 bool Invalid
= false;
1434 CharSourceRange CSR
= CharSourceRange::getCharRange(SR
);
1435 StringRef Text
= Lexer::getSourceText(CSR
, SM
, LangOpts
, &Invalid
);
1439 return std::nullopt
;
1442 // Returns the begin location of the identifier of the given variable
1444 static SourceLocation
getVarDeclIdentifierLoc(const VarDecl
*VD
) {
1445 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
1446 // returns the begin location of the identifier of the declaration:
1447 return VD
->getLocation();
1450 // Returns the literal text of the identifier of the given variable declaration.
1451 static std::optional
<StringRef
>
1452 getVarDeclIdentifierText(const VarDecl
*VD
, const SourceManager
&SM
,
1453 const LangOptions
&LangOpts
) {
1454 SourceLocation ParmIdentBeginLoc
= getVarDeclIdentifierLoc(VD
);
1455 SourceLocation ParmIdentEndLoc
=
1456 Lexer::getLocForEndOfToken(ParmIdentBeginLoc
, 0, SM
, LangOpts
);
1458 if (ParmIdentEndLoc
.isMacroID() &&
1459 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc
, SM
, LangOpts
))
1460 return std::nullopt
;
1461 return getRangeText({ParmIdentBeginLoc
, ParmIdentEndLoc
}, SM
, LangOpts
);
1464 // We cannot fix a variable declaration if it has some other specifiers than the
1465 // type specifier. Because the source ranges of those specifiers could overlap
1466 // with the source range that is being replaced using fix-its. Especially when
1467 // we often cannot obtain accurate source ranges of cv-qualified type
1469 // FIXME: also deal with type attributes
1470 static bool hasUnsupportedSpecifiers(const VarDecl
*VD
,
1471 const SourceManager
&SM
) {
1472 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
1473 // source range of `VD`:
1474 bool AttrRangeOverlapping
= llvm::any_of(VD
->attrs(), [&](Attr
*At
) -> bool {
1475 return !(SM
.isBeforeInTranslationUnit(At
->getRange().getEnd(),
1476 VD
->getBeginLoc())) &&
1477 !(SM
.isBeforeInTranslationUnit(VD
->getEndLoc(),
1478 At
->getRange().getBegin()));
1480 return VD
->isInlineSpecified() || VD
->isConstexpr() ||
1481 VD
->hasConstantInitialization() || !VD
->hasLocalStorage() ||
1482 AttrRangeOverlapping
;
1485 // Returns the `SourceRange` of `D`. The reason why this function exists is
1486 // that `D->getSourceRange()` may return a range where the end location is the
1487 // starting location of the last token. The end location of the source range
1488 // returned by this function is the last location of the last token.
1489 static SourceRange
getSourceRangeToTokenEnd(const Decl
*D
,
1490 const SourceManager
&SM
,
1491 LangOptions LangOpts
) {
1492 SourceLocation Begin
= D
->getBeginLoc();
1494 End
= // `D->getEndLoc` should always return the starting location of the
1495 // last token, so we should get the end of the token
1496 Lexer::getLocForEndOfToken(D
->getEndLoc(), 0, SM
, LangOpts
);
1498 return SourceRange(Begin
, End
);
1501 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
1502 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
1503 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
1504 // :( ), `Qualifiers` of the pointee type is returned separately through the
1505 // output parameter `QualifiersToAppend`.
1506 static std::optional
<std::string
>
1507 getPointeeTypeText(const VarDecl
*VD
, const SourceManager
&SM
,
1508 const LangOptions
&LangOpts
,
1509 std::optional
<Qualifiers
> *QualifiersToAppend
) {
1510 QualType Ty
= VD
->getType();
1513 assert(Ty
->isPointerType() && !Ty
->isFunctionPointerType() &&
1514 "Expecting a VarDecl of type of pointer to object type");
1515 PteTy
= Ty
->getPointeeType();
1517 TypeLoc TyLoc
= VD
->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
1520 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
1521 // the `TypeLoc` of the pointee type:
1522 switch (TyLoc
.getTypeLocClass()) {
1523 case TypeLoc::ConstantArray
:
1524 case TypeLoc::IncompleteArray
:
1525 case TypeLoc::VariableArray
:
1526 case TypeLoc::DependentSizedArray
:
1527 case TypeLoc::Decayed
:
1528 assert(isa
<ParmVarDecl
>(VD
) && "An array type shall not be treated as a "
1529 "pointer type unless it decays.");
1530 PteTyLoc
= TyLoc
.getNextTypeLoc();
1532 case TypeLoc::Pointer
:
1533 PteTyLoc
= TyLoc
.castAs
<PointerTypeLoc
>().getPointeeLoc();
1536 return std::nullopt
;
1538 if (PteTyLoc
.isNull())
1539 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
1540 // when the pointer type is `auto`.
1541 return std::nullopt
;
1543 SourceLocation IdentLoc
= getVarDeclIdentifierLoc(VD
);
1545 if (!(IdentLoc
.isValid() && PteTyLoc
.getSourceRange().isValid())) {
1546 // We are expecting these locations to be valid. But in some cases, they are
1547 // not all valid. It is a Clang bug to me and we are not responsible for
1548 // fixing it. So we will just give up for now when it happens.
1549 return std::nullopt
;
1552 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
1553 SourceLocation PteEndOfTokenLoc
=
1554 Lexer::getLocForEndOfToken(PteTyLoc
.getEndLoc(), 0, SM
, LangOpts
);
1556 if (!PteEndOfTokenLoc
.isValid())
1557 // Sometimes we cannot get the end location of the pointee type, e.g., when
1558 // there are macros involved.
1559 return std::nullopt
;
1560 if (!SM
.isBeforeInTranslationUnit(PteEndOfTokenLoc
, IdentLoc
)) {
1561 // We only deal with the cases where the source text of the pointee type
1562 // appears on the left-hand side of the variable identifier completely,
1563 // including the following forms:
1565 // `T ident[]`, where `T` is any type.
1566 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
1567 return std::nullopt
;
1569 if (PteTy
.hasQualifiers()) {
1570 // TypeLoc does not provide source ranges for qualifiers (it says it's
1571 // intentional but seems fishy to me), so we cannot get the full text
1572 // `PteTy` via source ranges.
1573 *QualifiersToAppend
= PteTy
.getQualifiers();
1575 return getRangeText({PteTyLoc
.getBeginLoc(), PteEndOfTokenLoc
}, SM
, LangOpts
)
1579 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
1580 static std::optional
<StringRef
> getFunNameText(const FunctionDecl
*FD
,
1581 const SourceManager
&SM
,
1582 const LangOptions
&LangOpts
) {
1583 SourceLocation BeginLoc
= FD
->getQualifier()
1584 ? FD
->getQualifierLoc().getBeginLoc()
1585 : FD
->getNameInfo().getBeginLoc();
1586 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
1588 SourceLocation EndLoc
= Lexer::getLocForEndOfToken(
1589 FD
->getNameInfo().getEndLoc(), 0, SM
, LangOpts
);
1590 SourceRange NameRange
{BeginLoc
, EndLoc
};
1592 return getRangeText(NameRange
, SM
, LangOpts
);
1595 // Returns the text representing a `std::span` type where the element type is
1596 // represented by `EltTyText`.
1598 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers
1599 // explicitly if the element type needs to be qualified.
1601 getSpanTypeText(StringRef EltTyText
,
1602 std::optional
<Qualifiers
> Quals
= std::nullopt
) {
1603 const char *const SpanOpen
= "std::span<";
1606 return SpanOpen
+ EltTyText
.str() + ' ' + Quals
->getAsString() + '>';
1607 return SpanOpen
+ EltTyText
.str() + '>';
1610 std::optional
<FixItList
>
1611 DerefSimplePtrArithFixableGadget::getFixits(const Strategy
&s
) const {
1612 const VarDecl
*VD
= dyn_cast
<VarDecl
>(BaseDeclRefExpr
->getDecl());
1614 if (VD
&& s
.lookup(VD
) == Strategy::Kind::Span
) {
1615 ASTContext
&Ctx
= VD
->getASTContext();
1616 // std::span can't represent elements before its begin()
1617 if (auto ConstVal
= Offset
->getIntegerConstantExpr(Ctx
))
1618 if (ConstVal
->isNegative())
1619 return std::nullopt
;
1621 // note that the expr may (oddly) has multiple layers of parens
1623 // *((..(pointer + 123)..))
1628 // replace ' + ' with '['
1629 // replace ')' with ']'
1632 // *((..(123 + pointer)..))
1637 // replace ' + ' with '['
1638 // replace ')' with ']'
1640 const Expr
*LHS
= AddOp
->getLHS(), *RHS
= AddOp
->getRHS();
1641 const SourceManager
&SM
= Ctx
.getSourceManager();
1642 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
1643 CharSourceRange StarWithTrailWhitespace
=
1644 clang::CharSourceRange::getCharRange(DerefOp
->getOperatorLoc(),
1645 LHS
->getBeginLoc());
1647 std::optional
<SourceLocation
> LHSLocation
= getPastLoc(LHS
, SM
, LangOpts
);
1649 return std::nullopt
;
1651 CharSourceRange PlusWithSurroundingWhitespace
=
1652 clang::CharSourceRange::getCharRange(*LHSLocation
, RHS
->getBeginLoc());
1654 std::optional
<SourceLocation
> AddOpLocation
=
1655 getPastLoc(AddOp
, SM
, LangOpts
);
1656 std::optional
<SourceLocation
> DerefOpLocation
=
1657 getPastLoc(DerefOp
, SM
, LangOpts
);
1659 if (!AddOpLocation
|| !DerefOpLocation
)
1660 return std::nullopt
;
1662 CharSourceRange ClosingParenWithPrecWhitespace
=
1663 clang::CharSourceRange::getCharRange(*AddOpLocation
, *DerefOpLocation
);
1666 {FixItHint::CreateRemoval(StarWithTrailWhitespace
),
1667 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace
, "["),
1668 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace
, "]")}};
1670 return std::nullopt
; // something wrong or unsupported, give up
1673 std::optional
<FixItList
>
1674 PointerDereferenceGadget::getFixits(const Strategy
&S
) const {
1675 const VarDecl
*VD
= cast
<VarDecl
>(BaseDeclRefExpr
->getDecl());
1676 switch (S
.lookup(VD
)) {
1677 case Strategy::Kind::Span
: {
1678 ASTContext
&Ctx
= VD
->getASTContext();
1679 SourceManager
&SM
= Ctx
.getSourceManager();
1680 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1681 // Deletes the *operand
1682 CharSourceRange derefRange
= clang::CharSourceRange::getCharRange(
1683 Op
->getBeginLoc(), Op
->getBeginLoc().getLocWithOffset(1));
1685 if (auto LocPastOperand
=
1686 getPastLoc(BaseDeclRefExpr
, SM
, Ctx
.getLangOpts())) {
1687 return FixItList
{{FixItHint::CreateRemoval(derefRange
),
1688 FixItHint::CreateInsertion(*LocPastOperand
, "[0]")}};
1692 case Strategy::Kind::Iterator
:
1693 case Strategy::Kind::Array
:
1694 case Strategy::Kind::Vector
:
1695 llvm_unreachable("Strategy not implemented yet!");
1696 case Strategy::Kind::Wontfix
:
1697 llvm_unreachable("Invalid strategy!");
1700 return std::nullopt
;
1703 // Generates fix-its replacing an expression of the form UPC(DRE) with
1705 std::optional
<FixItList
> UPCStandalonePointerGadget::getFixits(const Strategy
&S
)
1707 const auto VD
= cast
<VarDecl
>(Node
->getDecl());
1708 switch (S
.lookup(VD
)) {
1709 case Strategy::Kind::Span
: {
1710 ASTContext
&Ctx
= VD
->getASTContext();
1711 SourceManager
&SM
= Ctx
.getSourceManager();
1712 // Inserts the .data() after the DRE
1713 std::optional
<SourceLocation
> EndOfOperand
=
1714 getPastLoc(Node
, SM
, Ctx
.getLangOpts());
1717 return FixItList
{{FixItHint::CreateInsertion(
1718 *EndOfOperand
, ".data()")}};
1719 // FIXME: Points inside a macro expansion.
1722 case Strategy::Kind::Wontfix
:
1723 case Strategy::Kind::Iterator
:
1724 case Strategy::Kind::Array
:
1725 case Strategy::Kind::Vector
:
1726 llvm_unreachable("unsupported strategies for FixableGadgets");
1729 return std::nullopt
;
1732 // Generates fix-its replacing an expression of the form `&DRE[e]` with
1733 // `&DRE.data()[e]`:
1734 static std::optional
<FixItList
>
1735 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator
*Node
) {
1736 const auto *ArraySub
= cast
<ArraySubscriptExpr
>(Node
->getSubExpr());
1737 const auto *DRE
= cast
<DeclRefExpr
>(ArraySub
->getBase()->IgnoreImpCasts());
1738 // FIXME: this `getASTContext` call is costly, we should pass the
1740 const ASTContext
&Ctx
= DRE
->getDecl()->getASTContext();
1741 const Expr
*Idx
= ArraySub
->getIdx();
1742 const SourceManager
&SM
= Ctx
.getSourceManager();
1743 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
1744 std::stringstream SS
;
1745 bool IdxIsLitZero
= false;
1747 if (auto ICE
= Idx
->getIntegerConstantExpr(Ctx
))
1748 if ((*ICE
).isZero())
1749 IdxIsLitZero
= true;
1750 std::optional
<StringRef
> DreString
= getExprText(DRE
, SM
, LangOpts
);
1752 return std::nullopt
;
1755 // If the index is literal zero, we produce the most concise fix-it:
1756 SS
<< (*DreString
).str() << ".data()";
1758 std::optional
<StringRef
> IndexString
= getExprText(Idx
, SM
, LangOpts
);
1760 return std::nullopt
;
1762 SS
<< "&" << (*DreString
).str() << ".data()"
1763 << "[" << (*IndexString
).str() << "]";
1766 FixItHint::CreateReplacement(Node
->getSourceRange(), SS
.str())};
1770 std::optional
<FixItList
> UPCPreIncrementGadget::getFixits(const Strategy
&S
) const {
1771 DeclUseList DREs
= getClaimedVarUseSites();
1773 if (DREs
.size() != 1)
1774 return std::nullopt
; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1776 if (const VarDecl
*VD
= dyn_cast
<VarDecl
>(DREs
.front()->getDecl())) {
1777 if (S
.lookup(VD
) == Strategy::Kind::Span
) {
1779 std::stringstream SS
;
1780 const Stmt
*PreIncNode
= getBaseStmt();
1781 StringRef varName
= VD
->getName();
1782 const ASTContext
&Ctx
= VD
->getASTContext();
1784 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1785 SS
<< "(" << varName
.data() << " = " << varName
.data()
1786 << ".subspan(1)).data()";
1787 std::optional
<SourceLocation
> PreIncLocation
=
1788 getEndCharLoc(PreIncNode
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
1789 if (!PreIncLocation
)
1790 return std::nullopt
;
1792 Fixes
.push_back(FixItHint::CreateReplacement(
1793 SourceRange(PreIncNode
->getBeginLoc(), *PreIncLocation
), SS
.str()));
1797 return std::nullopt
; // Not in the cases that we can handle for now, give up.
1801 // For a non-null initializer `Init` of `T *` type, this function returns
1802 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
1803 // to output stream.
1804 // In many cases, this function cannot figure out the actual extent `S`. It
1805 // then will use a place holder to replace `S` to ask users to fill `S` in. The
1806 // initializer shall be used to initialize a variable of type `std::span<T>`.
1808 // FIXME: Support multi-level pointers
1811 // `Init` a pointer to the initializer expression
1812 // `Ctx` a reference to the ASTContext
1814 FixVarInitializerWithSpan(const Expr
*Init
, ASTContext
&Ctx
,
1815 const StringRef UserFillPlaceHolder
) {
1816 const SourceManager
&SM
= Ctx
.getSourceManager();
1817 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
1819 // If `Init` has a constant value that is (or equivalent to) a
1820 // NULL pointer, we use the default constructor to initialize the span
1821 // object, i.e., a `std:span` variable declaration with no initializer.
1822 // So the fix-it is just to remove the initializer.
1823 if (Init
->isNullPointerConstant(Ctx
,
1824 // FIXME: Why does this function not ask for `const ASTContext
1825 // &`? It should. Maybe worth an NFC patch later.
1826 Expr::NullPointerConstantValueDependence::
1827 NPC_ValueDependentIsNotNull
)) {
1828 std::optional
<SourceLocation
> InitLocation
=
1829 getEndCharLoc(Init
, SM
, LangOpts
);
1833 SourceRange
SR(Init
->getBeginLoc(), *InitLocation
);
1835 return {FixItHint::CreateRemoval(SR
)};
1839 std::string ExtentText
= UserFillPlaceHolder
.data();
1840 StringRef One
= "1";
1842 // Insert `{` before `Init`:
1843 FixIts
.push_back(FixItHint::CreateInsertion(Init
->getBeginLoc(), "{"));
1844 // Try to get the data extent. Break into different cases:
1845 if (auto CxxNew
= dyn_cast
<CXXNewExpr
>(Init
->IgnoreImpCasts())) {
1846 // In cases `Init` is `new T[n]` and there is no explicit cast over
1847 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1848 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
1849 // simpler for the case where `Init` is `new T`.
1850 if (const Expr
*Ext
= CxxNew
->getArraySize().value_or(nullptr)) {
1851 if (!Ext
->HasSideEffects(Ctx
)) {
1852 std::optional
<StringRef
> ExtentString
= getExprText(Ext
, SM
, LangOpts
);
1855 ExtentText
= *ExtentString
;
1857 } else if (!CxxNew
->isArray())
1858 // Although the initializer is not allocating a buffer, the pointer
1859 // variable could still be used in buffer access operations.
1861 } else if (const auto *CArrTy
= Ctx
.getAsConstantArrayType(
1862 Init
->IgnoreImpCasts()->getType())) {
1863 // In cases `Init` is of an array type after stripping off implicit casts,
1864 // the extent is the array size. Note that if the array size is not a
1865 // constant, we cannot use it as the extent.
1866 ExtentText
= getAPIntText(CArrTy
->getSize());
1868 // In cases `Init` is of the form `&Var` after stripping of implicit
1869 // casts, where `&` is the built-in operator, the extent is 1.
1870 if (auto AddrOfExpr
= dyn_cast
<UnaryOperator
>(Init
->IgnoreImpCasts()))
1871 if (AddrOfExpr
->getOpcode() == UnaryOperatorKind::UO_AddrOf
&&
1872 isa_and_present
<DeclRefExpr
>(AddrOfExpr
->getSubExpr()))
1874 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1875 // and explicit casting, etc. etc.
1878 SmallString
<32> StrBuffer
{};
1879 std::optional
<SourceLocation
> LocPassInit
= getPastLoc(Init
, SM
, LangOpts
);
1884 StrBuffer
.append(", ");
1885 StrBuffer
.append(ExtentText
);
1886 StrBuffer
.append("}");
1887 FixIts
.push_back(FixItHint::CreateInsertion(*LocPassInit
, StrBuffer
.str()));
1892 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \
1893 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
1895 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
1898 // For the given variable declaration with a pointer-to-T type, returns the text
1899 // `std::span<T>`. If it is unable to generate the text, returns
1901 static std::optional
<std::string
> createSpanTypeForVarDecl(const VarDecl
*VD
,
1902 const ASTContext
&Ctx
) {
1903 assert(VD
->getType()->isPointerType());
1905 std::optional
<Qualifiers
> PteTyQualifiers
= std::nullopt
;
1906 std::optional
<std::string
> PteTyText
= getPointeeTypeText(
1907 VD
, Ctx
.getSourceManager(), Ctx
.getLangOpts(), &PteTyQualifiers
);
1910 return std::nullopt
;
1912 std::string SpanTyText
= "std::span<";
1914 SpanTyText
.append(*PteTyText
);
1915 // Append qualifiers to span element type if any:
1916 if (PteTyQualifiers
) {
1917 SpanTyText
.append(" ");
1918 SpanTyText
.append(PteTyQualifiers
->getAsString());
1920 SpanTyText
.append(">");
1924 // For a `VarDecl` of the form `T * var (= Init)?`, this
1925 // function generates fix-its that
1926 // 1) replace `T * var` with `std::span<T> var`; and
1927 // 2) change `Init` accordingly to a span constructor, if it exists.
1929 // FIXME: support Multi-level pointers
1932 // `D` a pointer the variable declaration node
1933 // `Ctx` a reference to the ASTContext
1934 // `UserFillPlaceHolder` the user-input placeholder text
1936 // the non-empty fix-it list, if fix-its are successfuly generated; empty
1938 static FixItList
fixLocalVarDeclWithSpan(const VarDecl
*D
, ASTContext
&Ctx
,
1939 const StringRef UserFillPlaceHolder
,
1940 UnsafeBufferUsageHandler
&Handler
) {
1941 if (hasUnsupportedSpecifiers(D
, Ctx
.getSourceManager()))
1945 std::optional
<std::string
> SpanTyText
= createSpanTypeForVarDecl(D
, Ctx
);
1948 DEBUG_NOTE_DECL_FAIL(D
, " : failed to generate 'std::span' type");
1952 // Will hold the text for `std::span<T> Ident`:
1953 std::stringstream SS
;
1956 // Append qualifiers to the type of `D`, if any:
1957 if (D
->getType().hasQualifiers())
1958 SS
<< " " << D
->getType().getQualifiers().getAsString();
1960 // The end of the range of the original source that will be replaced
1961 // by `std::span<T> ident`:
1962 SourceLocation EndLocForReplacement
= D
->getEndLoc();
1963 std::optional
<StringRef
> IdentText
=
1964 getVarDeclIdentifierText(D
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
1967 DEBUG_NOTE_DECL_FAIL(D
, " : failed to locate the identifier");
1970 // Fix the initializer if it exists:
1971 if (const Expr
*Init
= D
->getInit()) {
1972 FixItList InitFixIts
=
1973 FixVarInitializerWithSpan(Init
, Ctx
, UserFillPlaceHolder
);
1974 if (InitFixIts
.empty())
1976 FixIts
.insert(FixIts
.end(), std::make_move_iterator(InitFixIts
.begin()),
1977 std::make_move_iterator(InitFixIts
.end()));
1978 // If the declaration has the form `T *ident = init`, we want to replace
1979 // `T *ident = ` with `std::span<T> ident`:
1980 EndLocForReplacement
= Init
->getBeginLoc().getLocWithOffset(-1);
1982 SS
<< " " << IdentText
->str();
1983 if (!EndLocForReplacement
.isValid()) {
1984 DEBUG_NOTE_DECL_FAIL(D
, " : failed to locate the end of the declaration");
1987 FixIts
.push_back(FixItHint::CreateReplacement(
1988 SourceRange(D
->getBeginLoc(), EndLocForReplacement
), SS
.str()));
1992 static bool hasConflictingOverload(const FunctionDecl
*FD
) {
1993 return !FD
->getDeclContext()->lookup(FD
->getDeclName()).isSingleResult();
1996 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
1997 // types, this function produces fix-its to make the change self-contained. Let
1998 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
1999 // entity defined by the `FunctionDecl` after the change to the parameters.
2000 // Fix-its produced by this function are
2001 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2003 // 2. Create a declaration of "NewF" next to each declaration of `F`;
2004 // 3. Create a definition of "F" (as its' original definition is now belongs
2005 // to "NewF") next to its original definition. The body of the creating
2006 // definition calls to "NewF".
2010 // void f(int *p); // original declaration
2011 // void f(int *p) { // original definition
2015 // To change the parameter `p` to be of `std::span<int>` type, we
2016 // also add overloads:
2018 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
2019 // void f(std::span<int> p); // added overload decl
2020 // void f(std::span<int> p) { // original def where param is changed
2023 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
2024 // return f(std::span(p, <# size #>));
2027 static std::optional
<FixItList
>
2028 createOverloadsForFixedParams(const Strategy
&S
, const FunctionDecl
*FD
,
2029 const ASTContext
&Ctx
,
2030 UnsafeBufferUsageHandler
&Handler
) {
2031 // FIXME: need to make this conflict checking better:
2032 if (hasConflictingOverload(FD
))
2033 return std::nullopt
;
2035 const SourceManager
&SM
= Ctx
.getSourceManager();
2036 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
2037 const unsigned NumParms
= FD
->getNumParams();
2038 std::vector
<std::string
> NewTysTexts(NumParms
);
2039 std::vector
<bool> ParmsMask(NumParms
, false);
2040 bool AtLeastOneParmToFix
= false;
2042 for (unsigned i
= 0; i
< NumParms
; i
++) {
2043 const ParmVarDecl
*PVD
= FD
->getParamDecl(i
);
2045 if (S
.lookup(PVD
) == Strategy::Kind::Wontfix
)
2047 if (S
.lookup(PVD
) != Strategy::Kind::Span
)
2048 // Not supported, not suppose to happen:
2049 return std::nullopt
;
2051 std::optional
<Qualifiers
> PteTyQuals
= std::nullopt
;
2052 std::optional
<std::string
> PteTyText
=
2053 getPointeeTypeText(PVD
, SM
, LangOpts
, &PteTyQuals
);
2056 // something wrong in obtaining the text of the pointee type, give up
2057 return std::nullopt
;
2058 // FIXME: whether we should create std::span type depends on the Strategy.
2059 NewTysTexts
[i
] = getSpanTypeText(*PteTyText
, PteTyQuals
);
2060 ParmsMask
[i
] = true;
2061 AtLeastOneParmToFix
= true;
2063 if (!AtLeastOneParmToFix
)
2064 // No need to create function overloads:
2066 // FIXME Respect indentation of the original code.
2068 // A lambda that creates the text representation of a function declaration
2069 // with the new type signatures:
2070 const auto NewOverloadSignatureCreator
=
2071 [&SM
, &LangOpts
, &NewTysTexts
,
2072 &ParmsMask
](const FunctionDecl
*FD
) -> std::optional
<std::string
> {
2073 std::stringstream SS
;
2076 SS
<< getEndOfLine().str();
2077 // Append: ret-type func-name "("
2078 if (auto Prefix
= getRangeText(
2079 SourceRange(FD
->getBeginLoc(), (*FD
->param_begin())->getBeginLoc()),
2081 SS
<< Prefix
->str();
2083 return std::nullopt
; // give up
2084 // Append: parameter-type-list
2085 const unsigned NumParms
= FD
->getNumParams();
2087 for (unsigned i
= 0; i
< NumParms
; i
++) {
2088 const ParmVarDecl
*Parm
= FD
->getParamDecl(i
);
2090 if (Parm
->isImplicit())
2093 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
2095 SS
<< NewTysTexts
[i
];
2096 // print parameter name if provided:
2097 if (IdentifierInfo
*II
= Parm
->getIdentifier())
2098 SS
<< ' ' << II
->getName().str();
2099 } else if (auto ParmTypeText
= getRangeText(
2100 getSourceRangeToTokenEnd(Parm
, SM
, LangOpts
),
2102 // print the whole `Parm` without modification:
2103 SS
<< ParmTypeText
->str();
2105 return std::nullopt
; // something wrong, give up
2106 if (i
!= NumParms
- 1)
2113 // A lambda that creates the text representation of a function definition with
2114 // the original signature:
2115 const auto OldOverloadDefCreator
=
2116 [&Handler
, &SM
, &LangOpts
, &NewTysTexts
,
2117 &ParmsMask
](const FunctionDecl
*FD
) -> std::optional
<std::string
> {
2118 std::stringstream SS
;
2120 SS
<< getEndOfLine().str();
2121 // Append: attr-name ret-type func-name "(" param-list ")" "{"
2122 if (auto FDPrefix
= getRangeText(
2123 SourceRange(FD
->getBeginLoc(), FD
->getBody()->getBeginLoc()), SM
,
2125 SS
<< Handler
.getUnsafeBufferUsageAttributeTextAt(FD
->getBeginLoc(), " ")
2126 << FDPrefix
->str() << "{";
2128 return std::nullopt
;
2129 // Append: "return" func-name "("
2130 if (auto FunQualName
= getFunNameText(FD
, SM
, LangOpts
))
2131 SS
<< "return " << FunQualName
->str() << "(";
2133 return std::nullopt
;
2136 const unsigned NumParms
= FD
->getNumParams();
2137 for (unsigned i
= 0; i
< NumParms
; i
++) {
2138 const ParmVarDecl
*Parm
= FD
->getParamDecl(i
);
2140 if (Parm
->isImplicit())
2142 // FIXME: If a parameter has no name, it is unused in the
2143 // definition. So we could just leave it as it is.
2144 if (!Parm
->getIdentifier())
2145 // If a parameter of a function definition has no name:
2146 return std::nullopt
;
2148 // This is our spanified paramter!
2149 SS
<< NewTysTexts
[i
] << "(" << Parm
->getIdentifier()->getName().str()
2150 << ", " << getUserFillPlaceHolder("size") << ")";
2152 SS
<< Parm
->getIdentifier()->getName().str();
2153 if (i
!= NumParms
- 1)
2156 // finish call and the body
2157 SS
<< ");}" << getEndOfLine().str();
2158 // FIXME: 80-char line formatting?
2163 for (FunctionDecl
*FReDecl
: FD
->redecls()) {
2164 std::optional
<SourceLocation
> Loc
= getPastLoc(FReDecl
, SM
, LangOpts
);
2168 if (FReDecl
->isThisDeclarationADefinition()) {
2169 assert(FReDecl
== FD
&& "inconsistent function definition");
2170 // Inserts a definition with the old signature to the end of
2172 if (auto OldOverloadDef
= OldOverloadDefCreator(FReDecl
))
2173 FixIts
.emplace_back(FixItHint::CreateInsertion(*Loc
, *OldOverloadDef
));
2175 return {}; // give up
2177 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
2178 if (!FReDecl
->hasAttr
<UnsafeBufferUsageAttr
>()) {
2179 FixIts
.emplace_back(FixItHint::CreateInsertion(
2180 FReDecl
->getBeginLoc(), Handler
.getUnsafeBufferUsageAttributeTextAt(
2181 FReDecl
->getBeginLoc(), " ")));
2183 // Inserts a declaration with the new signature to the end of `FReDecl`:
2184 if (auto NewOverloadDecl
= NewOverloadSignatureCreator(FReDecl
))
2185 FixIts
.emplace_back(FixItHint::CreateInsertion(*Loc
, *NewOverloadDecl
));
2193 // To fix a `ParmVarDecl` to be of `std::span` type.
2194 static FixItList
fixParamWithSpan(const ParmVarDecl
*PVD
, const ASTContext
&Ctx
,
2195 UnsafeBufferUsageHandler
&Handler
) {
2196 if (hasUnsupportedSpecifiers(PVD
, Ctx
.getSourceManager())) {
2197 DEBUG_NOTE_DECL_FAIL(PVD
, " : has unsupport specifier(s)");
2200 if (PVD
->hasDefaultArg()) {
2201 // FIXME: generate fix-its for default values:
2202 DEBUG_NOTE_DECL_FAIL(PVD
, " : has default arg");
2206 std::optional
<Qualifiers
> PteTyQualifiers
= std::nullopt
;
2207 std::optional
<std::string
> PteTyText
= getPointeeTypeText(
2208 PVD
, Ctx
.getSourceManager(), Ctx
.getLangOpts(), &PteTyQualifiers
);
2211 DEBUG_NOTE_DECL_FAIL(PVD
, " : invalid pointee type");
2215 std::optional
<StringRef
> PVDNameText
= PVD
->getIdentifier()->getName();
2218 DEBUG_NOTE_DECL_FAIL(PVD
, " : invalid identifier name");
2222 std::stringstream SS
;
2223 std::optional
<std::string
> SpanTyText
= createSpanTypeForVarDecl(PVD
, Ctx
);
2225 if (PteTyQualifiers
)
2226 // Append qualifiers if they exist:
2227 SS
<< getSpanTypeText(*PteTyText
, PteTyQualifiers
);
2229 SS
<< getSpanTypeText(*PteTyText
);
2230 // Append qualifiers to the type of the parameter:
2231 if (PVD
->getType().hasQualifiers())
2232 SS
<< ' ' << PVD
->getType().getQualifiers().getAsString();
2233 // Append parameter's name:
2234 SS
<< ' ' << PVDNameText
->str();
2235 // Add replacement fix-it:
2236 return {FixItHint::CreateReplacement(PVD
->getSourceRange(), SS
.str())};
2239 static FixItList
fixVariableWithSpan(const VarDecl
*VD
,
2240 const DeclUseTracker
&Tracker
,
2242 UnsafeBufferUsageHandler
&Handler
) {
2243 const DeclStmt
*DS
= Tracker
.lookupDecl(VD
);
2245 DEBUG_NOTE_DECL_FAIL(VD
, " : variables declared this way not implemented yet");
2248 if (!DS
->isSingleDecl()) {
2249 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2250 DEBUG_NOTE_DECL_FAIL(VD
, " : multiple VarDecls");
2253 // Currently DS is an unused variable but we'll need it when
2254 // non-single decls are implemented, where the pointee type name
2255 // and the '*' are spread around the place.
2258 // FIXME: handle cases where DS has multiple declarations
2259 return fixLocalVarDeclWithSpan(VD
, Ctx
, getUserFillPlaceHolder(), Handler
);
2262 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due
2263 // to any unexpected problem.
2265 fixVariable(const VarDecl
*VD
, Strategy::Kind K
,
2266 /* The function decl under analysis */ const Decl
*D
,
2267 const DeclUseTracker
&Tracker
, ASTContext
&Ctx
,
2268 UnsafeBufferUsageHandler
&Handler
) {
2269 if (const auto *PVD
= dyn_cast
<ParmVarDecl
>(VD
)) {
2270 auto *FD
= dyn_cast
<clang::FunctionDecl
>(PVD
->getDeclContext());
2271 if (!FD
|| FD
!= D
) {
2272 // `FD != D` means that `PVD` belongs to a function that is not being
2273 // analyzed currently. Thus `FD` may not be complete.
2274 DEBUG_NOTE_DECL_FAIL(VD
, " : function not currently analyzed");
2278 // TODO If function has a try block we can't change params unless we check
2279 // also its catch block for their use.
2280 // FIXME We might support static class methods, some select methods,
2281 // operators and possibly lamdas.
2282 if (FD
->isMain() || FD
->isConstexpr() ||
2283 FD
->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate
||
2285 // also covers call-operator of lamdas
2286 isa
<CXXMethodDecl
>(FD
) ||
2287 // skip when the function body is a try-block
2288 (FD
->hasBody() && isa
<CXXTryStmt
>(FD
->getBody())) ||
2289 FD
->isOverloadedOperator()) {
2290 DEBUG_NOTE_DECL_FAIL(VD
, " : unsupported function decl");
2291 return {}; // TODO test all these cases
2296 case Strategy::Kind::Span
: {
2297 if (VD
->getType()->isPointerType()) {
2298 if (const auto *PVD
= dyn_cast
<ParmVarDecl
>(VD
))
2299 return fixParamWithSpan(PVD
, Ctx
, Handler
);
2301 if (VD
->isLocalVarDecl())
2302 return fixVariableWithSpan(VD
, Tracker
, Ctx
, Handler
);
2304 DEBUG_NOTE_DECL_FAIL(VD
, " : not a pointer");
2307 case Strategy::Kind::Iterator
:
2308 case Strategy::Kind::Array
:
2309 case Strategy::Kind::Vector
:
2310 llvm_unreachable("Strategy not implemented yet!");
2311 case Strategy::Kind::Wontfix
:
2312 llvm_unreachable("Invalid strategy!");
2314 llvm_unreachable("Unknown strategy!");
2317 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
2318 // `RemoveRange` of 'h' overlaps with a macro use.
2319 static bool overlapWithMacro(const FixItList
&FixIts
) {
2320 // FIXME: For now we only check if the range (or the first token) is (part of)
2321 // a macro expansion. Ideally, we want to check for all tokens in the range.
2322 return llvm::any_of(FixIts
, [](const FixItHint
&Hint
) {
2323 auto Range
= Hint
.RemoveRange
;
2324 if (Range
.getBegin().isMacroID() || Range
.getEnd().isMacroID())
2325 // If the range (or the first token) is (part of) a macro expansion:
2331 // Returns true iff `VD` is a parameter of the declaration `D`:
2332 static bool isParameterOf(const VarDecl
*VD
, const Decl
*D
) {
2333 return isa
<ParmVarDecl
>(VD
) &&
2334 VD
->getDeclContext() == dyn_cast
<DeclContext
>(D
);
2337 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable
2338 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
2340 static void eraseVarsForUnfixableGroupMates(
2341 std::map
<const VarDecl
*, FixItList
> &FixItsForVariable
,
2342 const VariableGroupsManager
&VarGrpMgr
) {
2343 // Variables will be removed from `FixItsForVariable`:
2344 SmallVector
<const VarDecl
*, 8> ToErase
;
2346 for (const auto &[VD
, Ignore
] : FixItsForVariable
) {
2347 VarGrpRef Grp
= VarGrpMgr
.getGroupOfVar(VD
);
2348 if (llvm::any_of(Grp
,
2349 [&FixItsForVariable
](const VarDecl
*GrpMember
) -> bool {
2350 return !FixItsForVariable
.count(GrpMember
);
2352 // At least one group member cannot be fixed, so we have to erase the
2354 for (const VarDecl
*Member
: Grp
)
2355 ToErase
.push_back(Member
);
2358 for (auto *VarToErase
: ToErase
)
2359 FixItsForVariable
.erase(VarToErase
);
2362 // Returns the fix-its that create bounds-safe function overloads for the
2363 // function `D`, if `D`'s parameters will be changed to safe-types through
2364 // fix-its in `FixItsForVariable`.
2366 // NOTE: In case `D`'s parameters will be changed but bounds-safe function
2367 // overloads cannot created, the whole group that contains the parameters will
2368 // be erased from `FixItsForVariable`.
2369 static FixItList
createFunctionOverloadsForParms(
2370 std::map
<const VarDecl
*, FixItList
> &FixItsForVariable
/* mutable */,
2371 const VariableGroupsManager
&VarGrpMgr
, const FunctionDecl
*FD
,
2372 const Strategy
&S
, ASTContext
&Ctx
, UnsafeBufferUsageHandler
&Handler
) {
2373 FixItList FixItsSharedByParms
{};
2375 std::optional
<FixItList
> OverloadFixes
=
2376 createOverloadsForFixedParams(S
, FD
, Ctx
, Handler
);
2378 if (OverloadFixes
) {
2379 FixItsSharedByParms
.append(*OverloadFixes
);
2381 // Something wrong in generating `OverloadFixes`, need to remove the
2382 // whole group, where parameters are in, from `FixItsForVariable` (Note
2383 // that all parameters should be in the same group):
2384 for (auto *Member
: VarGrpMgr
.getGroupOfParms())
2385 FixItsForVariable
.erase(Member
);
2387 return FixItsSharedByParms
;
2390 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
2391 static std::map
<const VarDecl
*, FixItList
>
2392 getFixIts(FixableGadgetSets
&FixablesForAllVars
, const Strategy
&S
,
2394 /* The function decl under analysis */ const Decl
*D
,
2395 const DeclUseTracker
&Tracker
, UnsafeBufferUsageHandler
&Handler
,
2396 const VariableGroupsManager
&VarGrpMgr
) {
2397 // `FixItsForVariable` will map each variable to a set of fix-its directly
2398 // associated to the variable itself. Fix-its of distinct variables in
2399 // `FixItsForVariable` are disjoint.
2400 std::map
<const VarDecl
*, FixItList
> FixItsForVariable
;
2402 // Populate `FixItsForVariable` with fix-its directly associated with each
2403 // variable. Fix-its directly associated to a variable 'v' are the ones
2404 // produced by the `FixableGadget`s whose claimed variable is 'v'.
2405 for (const auto &[VD
, Fixables
] : FixablesForAllVars
.byVar
) {
2406 FixItsForVariable
[VD
] =
2407 fixVariable(VD
, S
.lookup(VD
), D
, Tracker
, Ctx
, Handler
);
2408 // If we fail to produce Fix-It for the declaration we have to skip the
2409 // variable entirely.
2410 if (FixItsForVariable
[VD
].empty()) {
2411 FixItsForVariable
.erase(VD
);
2414 for (const auto &F
: Fixables
) {
2415 std::optional
<FixItList
> Fixits
= F
->getFixits(S
);
2418 FixItsForVariable
[VD
].insert(FixItsForVariable
[VD
].end(),
2419 Fixits
->begin(), Fixits
->end());
2423 Handler
.addDebugNoteForVar(
2424 VD
, F
->getBaseStmt()->getBeginLoc(),
2425 ("gadget '" + F
->getDebugName() + "' refused to produce a fix")
2428 FixItsForVariable
.erase(VD
);
2433 // `FixItsForVariable` now contains only variables that can be
2434 // fixed. A variable can be fixed if its' declaration and all Fixables
2435 // associated to it can all be fixed.
2437 // To further remove from `FixItsForVariable` variables whose group mates
2438 // cannot be fixed...
2439 eraseVarsForUnfixableGroupMates(FixItsForVariable
, VarGrpMgr
);
2440 // Now `FixItsForVariable` gets further reduced: a variable is in
2441 // `FixItsForVariable` iff it can be fixed and all its group mates can be
2444 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
2445 // That is, when fixing multiple parameters in one step, these fix-its will
2446 // be applied only once (instead of being applied per parameter).
2447 FixItList FixItsSharedByParms
{};
2449 if (auto *FD
= dyn_cast
<FunctionDecl
>(D
))
2450 FixItsSharedByParms
= createFunctionOverloadsForParms(
2451 FixItsForVariable
, VarGrpMgr
, FD
, S
, Ctx
, Handler
);
2453 // The map that maps each variable `v` to fix-its for the whole group where
2455 std::map
<const VarDecl
*, FixItList
> FinalFixItsForVariable
{
2458 for (auto &[Var
, Ignore
] : FixItsForVariable
) {
2459 bool AnyParm
= false;
2460 const auto VarGroupForVD
= VarGrpMgr
.getGroupOfVar(Var
, &AnyParm
);
2462 for (const VarDecl
*GrpMate
: VarGroupForVD
) {
2465 if (FixItsForVariable
.count(GrpMate
))
2466 FinalFixItsForVariable
[Var
].append(FixItsForVariable
[GrpMate
]);
2469 // This assertion should never fail. Otherwise we have a bug.
2470 assert(!FixItsSharedByParms
.empty() &&
2471 "Should not try to fix a parameter that does not belong to a "
2473 FinalFixItsForVariable
[Var
].append(FixItsSharedByParms
);
2476 // Fix-its that will be applied in one step shall NOT:
2477 // 1. overlap with macros or/and templates; or
2478 // 2. conflict with each other.
2479 // Otherwise, the fix-its will be dropped.
2480 for (auto Iter
= FinalFixItsForVariable
.begin();
2481 Iter
!= FinalFixItsForVariable
.end();)
2482 if (overlapWithMacro(Iter
->second
) ||
2483 clang::internal::anyConflict(Iter
->second
, Ctx
.getSourceManager())) {
2484 Iter
= FinalFixItsForVariable
.erase(Iter
);
2487 return FinalFixItsForVariable
;
2490 template <typename VarDeclIterTy
>
2492 getNaiveStrategy(llvm::iterator_range
<VarDeclIterTy
> UnsafeVars
) {
2494 for (const VarDecl
*VD
: UnsafeVars
) {
2495 S
.set(VD
, Strategy::Kind::Span
);
2500 // Manages variable groups:
2501 class VariableGroupsManagerImpl
: public VariableGroupsManager
{
2502 const std::vector
<VarGrpTy
> Groups
;
2503 const std::map
<const VarDecl
*, unsigned> &VarGrpMap
;
2504 const llvm::SetVector
<const VarDecl
*> &GrpsUnionForParms
;
2507 VariableGroupsManagerImpl(
2508 const std::vector
<VarGrpTy
> &Groups
,
2509 const std::map
<const VarDecl
*, unsigned> &VarGrpMap
,
2510 const llvm::SetVector
<const VarDecl
*> &GrpsUnionForParms
)
2511 : Groups(Groups
), VarGrpMap(VarGrpMap
),
2512 GrpsUnionForParms(GrpsUnionForParms
) {}
2514 VarGrpRef
getGroupOfVar(const VarDecl
*Var
, bool *HasParm
) const override
{
2515 if (GrpsUnionForParms
.contains(Var
)) {
2518 return GrpsUnionForParms
.getArrayRef();
2523 auto It
= VarGrpMap
.find(Var
);
2525 if (It
== VarGrpMap
.end())
2526 return std::nullopt
;
2527 return Groups
[It
->second
];
2530 VarGrpRef
getGroupOfParms() const override
{
2531 return GrpsUnionForParms
.getArrayRef();
2535 void clang::checkUnsafeBufferUsage(const Decl
*D
,
2536 UnsafeBufferUsageHandler
&Handler
,
2537 bool EmitSuggestions
) {
2539 Handler
.clearDebugNotes();
2542 assert(D
&& D
->getBody());
2543 // We do not want to visit a Lambda expression defined inside a method independently.
2544 // Instead, it should be visited along with the outer method.
2545 // FIXME: do we want to do the same thing for `BlockDecl`s?
2546 if (const auto *fd
= dyn_cast
<CXXMethodDecl
>(D
)) {
2547 if (fd
->getParent()->isLambda() && fd
->getParent()->isLocalClass())
2551 // Do not emit fixit suggestions for functions declared in an
2552 // extern "C" block.
2553 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
)) {
2554 for (FunctionDecl
*FReDecl
: FD
->redecls()) {
2555 if (FReDecl
->isExternC()) {
2556 EmitSuggestions
= false;
2562 WarningGadgetSets UnsafeOps
;
2563 FixableGadgetSets FixablesForAllVars
;
2565 auto [FixableGadgets
, WarningGadgets
, Tracker
] =
2566 findGadgets(D
, Handler
, EmitSuggestions
);
2568 if (!EmitSuggestions
) {
2569 // Our job is very easy without suggestions. Just warn about
2570 // every problematic operation and consider it done. No need to deal
2571 // with fixable gadgets, no need to group operations by variable.
2572 for (const auto &G
: WarningGadgets
) {
2573 Handler
.handleUnsafeOperation(G
->getBaseStmt(),
2574 /*IsRelatedToDecl=*/false);
2577 // This return guarantees that most of the machine doesn't run when
2578 // suggestions aren't requested.
2579 assert(FixableGadgets
.size() == 0 &&
2580 "Fixable gadgets found but suggestions not requested!");
2584 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2585 // function under the analysis. No need to fix any Fixables.
2586 if (!WarningGadgets
.empty()) {
2587 // Gadgets "claim" variables they're responsible for. Once this loop
2588 // finishes, the tracker will only track DREs that weren't claimed by any
2589 // gadgets, i.e. not understood by the analysis.
2590 for (const auto &G
: FixableGadgets
) {
2591 for (const auto *DRE
: G
->getClaimedVarUseSites()) {
2592 Tracker
.claimUse(DRE
);
2597 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2598 // function under the analysis. Thus, it early returns here as there is
2599 // nothing needs to be fixed.
2601 // Note this claim is based on the assumption that there is no unsafe
2602 // variable whose declaration is invisible from the analyzing function.
2603 // Otherwise, we need to consider if the uses of those unsafe varuables needs
2605 // So far, we are not fixing any global variables or class members. And,
2606 // lambdas will be analyzed along with the enclosing function. So this early
2607 // return is correct for now.
2608 if (WarningGadgets
.empty())
2611 UnsafeOps
= groupWarningGadgetsByVar(std::move(WarningGadgets
));
2612 FixablesForAllVars
= groupFixablesByVar(std::move(FixableGadgets
));
2614 std::map
<const VarDecl
*, FixItList
> FixItsForVariableGroup
;
2616 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
2617 for (auto it
= FixablesForAllVars
.byVar
.cbegin();
2618 it
!= FixablesForAllVars
.byVar
.cend();) {
2619 // FIXME: need to deal with global variables later
2620 if ((!it
->first
->isLocalVarDecl() && !isa
<ParmVarDecl
>(it
->first
))) {
2622 Handler
.addDebugNoteForVar(
2623 it
->first
, it
->first
->getBeginLoc(),
2624 ("failed to produce fixit for '" + it
->first
->getNameAsString() +
2625 "' : neither local nor a parameter"));
2627 it
= FixablesForAllVars
.byVar
.erase(it
);
2628 } else if (it
->first
->getType().getCanonicalType()->isReferenceType()) {
2630 Handler
.addDebugNoteForVar(it
->first
, it
->first
->getBeginLoc(),
2631 ("failed to produce fixit for '" +
2632 it
->first
->getNameAsString() +
2633 "' : has a reference type"));
2635 it
= FixablesForAllVars
.byVar
.erase(it
);
2636 } else if (Tracker
.hasUnclaimedUses(it
->first
)) {
2638 auto AllUnclaimed
= Tracker
.getUnclaimedUses(it
->first
);
2639 for (auto UnclaimedDRE
: AllUnclaimed
) {
2640 std::string UnclaimedUseTrace
=
2641 getDREAncestorString(UnclaimedDRE
, D
->getASTContext());
2643 Handler
.addDebugNoteForVar(
2644 it
->first
, UnclaimedDRE
->getBeginLoc(),
2645 ("failed to produce fixit for '" + it
->first
->getNameAsString() +
2646 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
2647 UnclaimedUseTrace
));
2650 it
= FixablesForAllVars
.byVar
.erase(it
);
2651 } else if (it
->first
->isInitCapture()) {
2653 Handler
.addDebugNoteForVar(
2654 it
->first
, it
->first
->getBeginLoc(),
2655 ("failed to produce fixit for '" + it
->first
->getNameAsString() +
2656 "' : init capture"));
2658 it
= FixablesForAllVars
.byVar
.erase(it
);
2664 // Fixpoint iteration for pointer assignments
2665 using DepMapTy
= DenseMap
<const VarDecl
*, llvm::SetVector
<const VarDecl
*>>;
2666 DepMapTy DependenciesMap
{};
2667 DepMapTy PtrAssignmentGraph
{};
2669 for (auto it
: FixablesForAllVars
.byVar
) {
2670 for (const FixableGadget
*fixable
: it
.second
) {
2671 std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>> ImplPair
=
2672 fixable
->getStrategyImplications();
2674 std::pair
<const VarDecl
*, const VarDecl
*> Impl
= std::move(*ImplPair
);
2675 PtrAssignmentGraph
[Impl
.first
].insert(Impl
.second
);
2681 The following code does a BFS traversal of the `PtrAssignmentGraph`
2682 considering all unsafe vars as starting nodes and constructs an undirected
2683 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
2684 elimiates all variables that are unreachable from any unsafe var. In other
2685 words, this removes all dependencies that don't include any unsafe variable
2686 and consequently don't need any fixit generation.
2687 Note: A careful reader would observe that the code traverses
2688 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
2689 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
2690 achieve the same result but the one used here dramatically cuts the
2691 amount of hoops the second part of the algorithm needs to jump, given that
2692 a lot of these connections become "direct". The reader is advised not to
2693 imagine how the graph is transformed because of using `Var` instead of
2694 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
2695 and think about why it's equivalent later.
2697 std::set
<const VarDecl
*> VisitedVarsDirected
{};
2698 for (const auto &[Var
, ignore
] : UnsafeOps
.byVar
) {
2699 if (VisitedVarsDirected
.find(Var
) == VisitedVarsDirected
.end()) {
2701 std::queue
<const VarDecl
*> QueueDirected
{};
2702 QueueDirected
.push(Var
);
2703 while(!QueueDirected
.empty()) {
2704 const VarDecl
* CurrentVar
= QueueDirected
.front();
2705 QueueDirected
.pop();
2706 VisitedVarsDirected
.insert(CurrentVar
);
2707 auto AdjacentNodes
= PtrAssignmentGraph
[CurrentVar
];
2708 for (const VarDecl
*Adj
: AdjacentNodes
) {
2709 if (VisitedVarsDirected
.find(Adj
) == VisitedVarsDirected
.end()) {
2710 QueueDirected
.push(Adj
);
2712 DependenciesMap
[Var
].insert(Adj
);
2713 DependenciesMap
[Adj
].insert(Var
);
2719 // `Groups` stores the set of Connected Components in the graph.
2720 std::vector
<VarGrpTy
> Groups
;
2721 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
2722 // variables belong to. Group indexes refer to the elements in `Groups`.
2723 // `VarGrpMap` is complete in that every variable that needs fix is in it.
2724 std::map
<const VarDecl
*, unsigned> VarGrpMap
;
2725 // The union group over the ones in "Groups" that contain parameters of `D`:
2726 llvm::SetVector
<const VarDecl
*>
2727 GrpsUnionForParms
; // these variables need to be fixed in one step
2729 // Group Connected Components for Unsafe Vars
2730 // (Dependencies based on pointer assignments)
2731 std::set
<const VarDecl
*> VisitedVars
{};
2732 for (const auto &[Var
, ignore
] : UnsafeOps
.byVar
) {
2733 if (VisitedVars
.find(Var
) == VisitedVars
.end()) {
2734 VarGrpTy
&VarGroup
= Groups
.emplace_back();
2735 std::queue
<const VarDecl
*> Queue
{};
2738 while(!Queue
.empty()) {
2739 const VarDecl
* CurrentVar
= Queue
.front();
2741 VisitedVars
.insert(CurrentVar
);
2742 VarGroup
.push_back(CurrentVar
);
2743 auto AdjacentNodes
= DependenciesMap
[CurrentVar
];
2744 for (const VarDecl
*Adj
: AdjacentNodes
) {
2745 if (VisitedVars
.find(Adj
) == VisitedVars
.end()) {
2751 bool HasParm
= false;
2752 unsigned GrpIdx
= Groups
.size() - 1;
2754 for (const VarDecl
*V
: VarGroup
) {
2755 VarGrpMap
[V
] = GrpIdx
;
2756 if (!HasParm
&& isParameterOf(V
, D
))
2760 GrpsUnionForParms
.insert(VarGroup
.begin(), VarGroup
.end());
2764 // Remove a `FixableGadget` if the associated variable is not in the graph
2765 // computed above. We do not want to generate fix-its for such variables,
2766 // since they are neither warned nor reachable from a warned one.
2768 // Note a variable is not warned if it is not directly used in any unsafe
2769 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
2770 // does not exist another variable `u` such that `u` is warned and fixing `u`
2771 // (transitively) implicates fixing `v`.
2775 // void f(int * p) {
2776 // int * a = p; *p = 0;
2779 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
2780 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
2781 // the function above, `p` becomes reachable from a warned variable.
2782 for (auto I
= FixablesForAllVars
.byVar
.begin();
2783 I
!= FixablesForAllVars
.byVar
.end();) {
2784 // Note `VisitedVars` contain all the variables in the graph:
2785 if (!VisitedVars
.count((*I
).first
)) {
2786 // no such var in graph:
2787 I
= FixablesForAllVars
.byVar
.erase(I
);
2792 // We assign strategies to variables that are 1) in the graph and 2) can be
2793 // fixed. Other variables have the default "Won't fix" strategy.
2794 Strategy NaiveStrategy
= getNaiveStrategy(llvm::make_filter_range(
2795 VisitedVars
, [&FixablesForAllVars
](const VarDecl
*V
) {
2796 // If a warned variable has no "Fixable", it is considered unfixable:
2797 return FixablesForAllVars
.byVar
.count(V
);
2799 VariableGroupsManagerImpl
VarGrpMgr(Groups
, VarGrpMap
, GrpsUnionForParms
);
2801 if (isa
<NamedDecl
>(D
))
2802 // The only case where `D` is not a `NamedDecl` is when `D` is a
2803 // `BlockDecl`. Let's not fix variables in blocks for now
2804 FixItsForVariableGroup
=
2805 getFixIts(FixablesForAllVars
, NaiveStrategy
, D
->getASTContext(), D
,
2806 Tracker
, Handler
, VarGrpMgr
);
2808 for (const auto &G
: UnsafeOps
.noVar
) {
2809 Handler
.handleUnsafeOperation(G
->getBaseStmt(), /*IsRelatedToDecl=*/false);
2812 for (const auto &[VD
, WarningGadgets
] : UnsafeOps
.byVar
) {
2813 auto FixItsIt
= FixItsForVariableGroup
.find(VD
);
2814 Handler
.handleUnsafeVariableGroup(VD
, VarGrpMgr
,
2815 FixItsIt
!= FixItsForVariableGroup
.end()
2816 ? std::move(FixItsIt
->second
)
2819 for (const auto &G
: WarningGadgets
) {
2820 Handler
.handleUnsafeOperation(G
->getBaseStmt(), /*IsRelatedToDecl=*/true);