1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/AST/StmtVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "llvm/ADT/SmallVector.h"
24 using namespace clang
;
25 using namespace ast_matchers
;
29 class StmtDebugPrinter
30 : public ConstStmtVisitor
<StmtDebugPrinter
, std::string
> {
32 std::string
VisitStmt(const Stmt
*S
) { return S
->getStmtClassName(); }
34 std::string
VisitBinaryOperator(const BinaryOperator
*BO
) {
35 return "BinaryOperator(" + BO
->getOpcodeStr().str() + ")";
38 std::string
VisitUnaryOperator(const UnaryOperator
*UO
) {
39 return "UnaryOperator(" + UO
->getOpcodeStr(UO
->getOpcode()).str() + ")";
42 std::string
VisitImplicitCastExpr(const ImplicitCastExpr
*ICE
) {
43 return "ImplicitCastExpr(" + std::string(ICE
->getCastKindName()) + ")";
47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
49 static std::string
getDREAncestorString(const DeclRefExpr
*DRE
,
53 StmtDebugPrinter StmtPriner
;
56 SS
<< StmtPriner
.Visit(St
);
58 DynTypedNodeList StParents
= Ctx
.getParents(*St
);
60 if (StParents
.size() > 1)
61 return "unavailable due to multiple parents";
62 if (StParents
.size() == 0)
64 St
= StParents
.begin()->get
<Stmt
>();
73 namespace clang::ast_matchers
{
74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
75 // except for those belonging to a different callable of "n".
76 class MatchDescendantVisitor
77 : public RecursiveASTVisitor
<MatchDescendantVisitor
> {
79 typedef RecursiveASTVisitor
<MatchDescendantVisitor
> VisitorBase
;
81 // Creates an AST visitor that matches `Matcher` on all
82 // descendants of a given node "n" except for the ones
83 // belonging to a different callable of "n".
84 MatchDescendantVisitor(const internal::DynTypedMatcher
*Matcher
,
85 internal::ASTMatchFinder
*Finder
,
86 internal::BoundNodesTreeBuilder
*Builder
,
87 internal::ASTMatchFinder::BindKind Bind
,
88 const bool ignoreUnevaluatedContext
)
89 : Matcher(Matcher
), Finder(Finder
), Builder(Builder
), Bind(Bind
),
90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext
) {}
92 // Returns true if a match is found in a subtree of `DynNode`, which belongs
93 // to the same callable of `DynNode`.
94 bool findMatch(const DynTypedNode
&DynNode
) {
96 if (const Stmt
*StmtNode
= DynNode
.get
<Stmt
>()) {
97 TraverseStmt(const_cast<Stmt
*>(StmtNode
));
98 *Builder
= ResultBindings
;
104 // The following are overriding methods from the base visitor class.
105 // They are public only to allow CRTP to work. They are *not *part
106 // of the public API of this class.
108 // For the matchers so far used in safe buffers, we only need to match
109 // `Stmt`s. To override more as needed.
111 bool TraverseDecl(Decl
*Node
) {
116 // To skip callables:
117 if (isa
<FunctionDecl
, BlockDecl
, ObjCMethodDecl
>(Node
))
119 // Traverse descendants
120 return VisitorBase::TraverseDecl(Node
);
123 bool TraverseGenericSelectionExpr(GenericSelectionExpr
*Node
) {
124 // These are unevaluated, except the result expression.
125 if(ignoreUnevaluatedContext
)
126 return TraverseStmt(Node
->getResultExpr());
127 return VisitorBase::TraverseGenericSelectionExpr(Node
);
130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr
*Node
) {
131 // Unevaluated context.
132 if(ignoreUnevaluatedContext
)
134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node
);
137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node
) {
138 // Unevaluated context.
139 if(ignoreUnevaluatedContext
)
141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node
);
144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node
) {
145 // Unevaluated context.
146 if(ignoreUnevaluatedContext
)
148 return VisitorBase::TraverseDecltypeTypeLoc(Node
);
151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr
*Node
) {
152 // Unevaluated context.
153 if(ignoreUnevaluatedContext
)
155 return VisitorBase::TraverseCXXNoexceptExpr(Node
);
158 bool TraverseCXXTypeidExpr(CXXTypeidExpr
*Node
) {
159 // Unevaluated context.
160 if(ignoreUnevaluatedContext
)
162 return VisitorBase::TraverseCXXTypeidExpr(Node
);
165 bool TraverseStmt(Stmt
*Node
, DataRecursionQueue
*Queue
= nullptr) {
170 return VisitorBase::TraverseStmt(Node
);
173 bool shouldVisitTemplateInstantiations() const { return true; }
174 bool shouldVisitImplicitCode() const {
175 // TODO: let's ignore implicit code for now
180 // Sets 'Matched' to true if 'Matcher' matches 'Node'
182 // Returns 'true' if traversal should continue after this function
183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
184 template <typename T
> bool match(const T
&Node
) {
185 internal::BoundNodesTreeBuilder
RecursiveBuilder(*Builder
);
187 if (Matcher
->matches(DynTypedNode::create(Node
), Finder
,
188 &RecursiveBuilder
)) {
189 ResultBindings
.addMatch(RecursiveBuilder
);
191 if (Bind
!= internal::ASTMatchFinder::BK_All
)
192 return false; // Abort as soon as a match is found.
197 const internal::DynTypedMatcher
*const Matcher
;
198 internal::ASTMatchFinder
*const Finder
;
199 internal::BoundNodesTreeBuilder
*const Builder
;
200 internal::BoundNodesTreeBuilder ResultBindings
;
201 const internal::ASTMatchFinder::BindKind Bind
;
203 bool ignoreUnevaluatedContext
;
206 // Because we're dealing with raw pointers, let's define what we mean by that.
207 static auto hasPointerType() {
208 return hasType(hasCanonicalType(pointerType()));
211 static auto hasArrayType() {
212 return hasType(hasCanonicalType(arrayType()));
215 AST_MATCHER_P(Stmt
, forEachDescendantEvaluatedStmt
, internal::Matcher
<Stmt
>, innerMatcher
) {
216 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
218 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
, true);
219 return Visitor
.findMatch(DynTypedNode::create(Node
));
222 AST_MATCHER_P(Stmt
, forEachDescendantStmt
, internal::Matcher
<Stmt
>, innerMatcher
) {
223 const DynTypedMatcher
&DTM
= static_cast<DynTypedMatcher
>(innerMatcher
);
225 MatchDescendantVisitor
Visitor(&DTM
, Finder
, Builder
, ASTMatchFinder::BK_All
, false);
226 return Visitor
.findMatch(DynTypedNode::create(Node
));
229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
230 AST_MATCHER_P(Stmt
, notInSafeBufferOptOut
, const UnsafeBufferUsageHandler
*,
232 return !Handler
->isSafeBufferOptOut(Node
.getBeginLoc());
235 AST_MATCHER_P(CastExpr
, castSubExpr
, internal::Matcher
<Expr
>, innerMatcher
) {
236 return innerMatcher
.matches(*Node
.getSubExpr(), Finder
, Builder
);
239 // Matches a `UnaryOperator` whose operator is pre-increment:
240 AST_MATCHER(UnaryOperator
, isPreInc
) {
241 return Node
.getOpcode() == UnaryOperator::Opcode::UO_PreInc
;
244 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
245 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
246 static auto isInUnspecifiedLvalueContext(internal::Matcher
<Expr
> innerMatcher
) {
251 hasCastKind(CastKind::CK_LValueToRValue
),
252 castSubExpr(innerMatcher
)),
254 hasAnyOperatorName("="),
262 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
264 static internal::Matcher
<Stmt
>
265 isInUnspecifiedPointerContext(internal::Matcher
<Stmt
> InnerMatcher
) {
267 // 1. an argument of a function call (except the callee has [[unsafe_...]]
269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
270 // 3. the operand of a comparator operation; or
271 // 4. the operand of a pointer subtraction operation
272 // (i.e., computing the distance between two pointers); or ...
274 auto CallArgMatcher
=
275 callExpr(forEachArgumentWithParam(InnerMatcher
,
276 hasPointerType() /* array also decays to pointer type*/),
277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage
)))));
279 auto CastOperandMatcher
=
280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral
),
281 hasCastKind(CastKind::CK_PointerToBoolean
)),
282 castSubExpr(allOf(hasPointerType(), InnerMatcher
)));
284 auto CompOperandMatcher
=
285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher
)),
287 hasRHS(allOf(hasPointerType(), InnerMatcher
))));
289 // A matcher that matches pointer subtractions:
290 auto PtrSubtractionMatcher
=
291 binaryOperator(hasOperatorName("-"),
292 // Note that here we need both LHS and RHS to be
293 // pointer. Then the inner matcher can match any of
295 allOf(hasLHS(hasPointerType()),
296 hasRHS(hasPointerType())),
297 eachOf(hasLHS(InnerMatcher
),
298 hasRHS(InnerMatcher
)));
300 return stmt(anyOf(CallArgMatcher
, CastOperandMatcher
, CompOperandMatcher
,
301 PtrSubtractionMatcher
));
302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
303 // don't have to check that.)
306 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
308 // 'e' isn't evaluated to an RValue). For example, consider the following code:
309 // int *p = new int[4];
310 // int *q = new int[4];
313 // The expression `p = q` in the conditional of the `if` statement
314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
315 // in the assignment statement is in an untyped context.
316 static internal::Matcher
<Stmt
>
317 isInUnspecifiedUntypedContext(internal::Matcher
<Stmt
> InnerMatcher
) {
318 // An unspecified context can be
319 // 1. A compound statement,
320 // 2. The body of an if statement
322 auto CompStmt
= compoundStmt(forEach(InnerMatcher
));
323 auto IfStmtThen
= ifStmt(hasThen(InnerMatcher
));
324 auto IfStmtElse
= ifStmt(hasElse(InnerMatcher
));
325 // FIXME: Handle loop bodies.
326 return stmt(anyOf(CompStmt
, IfStmtThen
, IfStmtElse
));
328 } // namespace clang::ast_matchers
331 // Because the analysis revolves around variables and their types, we'll need to
332 // track uses of variables (aka DeclRefExprs).
333 using DeclUseList
= SmallVector
<const DeclRefExpr
*, 1>;
335 // Convenience typedef.
336 using FixItList
= SmallVector
<FixItHint
, 4>;
343 /// Gadget is an individual operation in the code that may be of interest to
344 /// this analysis. Each (non-abstract) subclass corresponds to a specific
345 /// rigid AST structure that constitutes an operation on a pointer-type object.
346 /// Discovery of a gadget in the code corresponds to claiming that we understand
347 /// what this part of code is doing well enough to potentially improve it.
348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
349 /// always deserving a warning per se, but requires our attention to identify
350 /// it warrants a fixit).
355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
358 /// Common type of ASTMatchers used for discovering gadgets.
359 /// Useful for implementing the static matcher() methods
360 /// that are expected from all non-abstract subclasses.
361 using Matcher
= decltype(stmt());
363 Gadget(Kind K
) : K(K
) {}
365 Kind
getKind() const { return K
; }
368 StringRef
getDebugName() const {
370 #define GADGET(x) case Kind::x: return #x;
371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
373 llvm_unreachable("Unhandled Gadget::Kind enum");
377 virtual bool isWarningGadget() const = 0;
378 virtual const Stmt
*getBaseStmt() const = 0;
380 /// Returns the list of pointer-type variables on which this gadget performs
381 /// its operation. Typically, there's only one variable. This isn't a list
382 /// of all DeclRefExprs in the gadget's AST!
383 virtual DeclUseList
getClaimedVarUseSites() const = 0;
385 virtual ~Gadget() = default;
392 /// Warning gadgets correspond to unsafe code patterns that warrants
393 /// an immediate warning.
394 class WarningGadget
: public Gadget
{
396 WarningGadget(Kind K
) : Gadget(K
) {}
398 static bool classof(const Gadget
*G
) { return G
->isWarningGadget(); }
399 bool isWarningGadget() const final
{ return true; }
402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
404 /// variable is replaced by a safe C++ container, every use of such variable must be
405 /// carefully considered and possibly updated.
406 class FixableGadget
: public Gadget
{
408 FixableGadget(Kind K
) : Gadget(K
) {}
410 static bool classof(const Gadget
*G
) { return !G
->isWarningGadget(); }
411 bool isWarningGadget() const final
{ return false; }
413 /// Returns a fixit that would fix the current gadget according to
414 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
415 /// returns an empty list if no fixes are necessary.
416 virtual std::optional
<FixItList
> getFixits(const Strategy
&) const {
420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
421 /// statement and the second element is the RHS. This two-element list represents the fact that
422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
423 /// later to group all those variables whose types must be modified together to prevent type
425 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
426 getStrategyImplications() const {
431 static auto toSupportedVariable() {
432 return to(varDecl());
435 using FixableGadgetList
= std::vector
<std::unique_ptr
<FixableGadget
>>;
436 using WarningGadgetList
= std::vector
<std::unique_ptr
<WarningGadget
>>;
438 /// An increment of a pointer-type value is unsafe as it may run the pointer
440 class IncrementGadget
: public WarningGadget
{
441 static constexpr const char *const OpTag
= "op";
442 const UnaryOperator
*Op
;
445 IncrementGadget(const MatchFinder::MatchResult
&Result
)
446 : WarningGadget(Kind::Increment
),
447 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
449 static bool classof(const Gadget
*G
) {
450 return G
->getKind() == Kind::Increment
;
453 static Matcher
matcher() {
454 return stmt(unaryOperator(
455 hasOperatorName("++"),
456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
460 const UnaryOperator
*getBaseStmt() const override
{ return Op
; }
462 DeclUseList
getClaimedVarUseSites() const override
{
463 SmallVector
<const DeclRefExpr
*, 2> Uses
;
464 if (const auto *DRE
=
465 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
469 return std::move(Uses
);
473 /// A decrement of a pointer-type value is unsafe as it may run the pointer
475 class DecrementGadget
: public WarningGadget
{
476 static constexpr const char *const OpTag
= "op";
477 const UnaryOperator
*Op
;
480 DecrementGadget(const MatchFinder::MatchResult
&Result
)
481 : WarningGadget(Kind::Decrement
),
482 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OpTag
)) {}
484 static bool classof(const Gadget
*G
) {
485 return G
->getKind() == Kind::Decrement
;
488 static Matcher
matcher() {
489 return stmt(unaryOperator(
490 hasOperatorName("--"),
491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
495 const UnaryOperator
*getBaseStmt() const override
{ return Op
; }
497 DeclUseList
getClaimedVarUseSites() const override
{
498 if (const auto *DRE
=
499 dyn_cast
<DeclRefExpr
>(Op
->getSubExpr()->IgnoreParenImpCasts())) {
507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
508 /// it doesn't have any bounds checks for the array.
509 class ArraySubscriptGadget
: public WarningGadget
{
510 static constexpr const char *const ArraySubscrTag
= "ArraySubscript";
511 const ArraySubscriptExpr
*ASE
;
514 ArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
515 : WarningGadget(Kind::ArraySubscript
),
516 ASE(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ArraySubscrTag
)) {}
518 static bool classof(const Gadget
*G
) {
519 return G
->getKind() == Kind::ArraySubscript
;
522 static Matcher
matcher() {
523 // FIXME: What if the index is integer literal 0? Should this be
524 // a safe gadget in this case?
526 return stmt(arraySubscriptExpr(
527 hasBase(ignoringParenImpCasts(
528 anyOf(hasPointerType(), hasArrayType()))),
530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
532 .bind(ArraySubscrTag
));
536 const ArraySubscriptExpr
*getBaseStmt() const override
{ return ASE
; }
538 DeclUseList
getClaimedVarUseSites() const override
{
539 if (const auto *DRE
=
540 dyn_cast
<DeclRefExpr
>(ASE
->getBase()->IgnoreParenImpCasts())) {
548 /// A pointer arithmetic expression of one of the forms:
550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
552 class PointerArithmeticGadget
: public WarningGadget
{
553 static constexpr const char *const PointerArithmeticTag
= "ptrAdd";
554 static constexpr const char *const PointerArithmeticPointerTag
= "ptrAddPtr";
555 const BinaryOperator
*PA
; // pointer arithmetic expression
556 const Expr
*Ptr
; // the pointer expression in `PA`
559 PointerArithmeticGadget(const MatchFinder::MatchResult
&Result
)
560 : WarningGadget(Kind::PointerArithmetic
),
561 PA(Result
.Nodes
.getNodeAs
<BinaryOperator
>(PointerArithmeticTag
)),
562 Ptr(Result
.Nodes
.getNodeAs
<Expr
>(PointerArithmeticPointerTag
)) {}
564 static bool classof(const Gadget
*G
) {
565 return G
->getKind() == Kind::PointerArithmetic
;
568 static Matcher
matcher() {
569 auto HasIntegerType
= anyOf(hasType(isInteger()), hasType(enumType()));
571 allOf(hasOperatorName("+"),
572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
573 hasLHS(HasIntegerType
));
575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
576 hasOperatorName("+="), hasOperatorName("-=")),
577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag
)),
578 hasRHS(HasIntegerType
));
580 return stmt(binaryOperator(anyOf(PtrAtLeft
, PtrAtRight
))
581 .bind(PointerArithmeticTag
));
584 const Stmt
*getBaseStmt() const override
{ return PA
; }
586 DeclUseList
getClaimedVarUseSites() const override
{
587 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ptr
->IgnoreParenImpCasts())) {
593 // FIXME: pointer adding zero should be fine
594 // FIXME: this gadge will need a fix-it
597 /// A pointer initialization expression of the form:
601 class PointerInitGadget
: public FixableGadget
{
603 static constexpr const char *const PointerInitLHSTag
= "ptrInitLHS";
604 static constexpr const char *const PointerInitRHSTag
= "ptrInitRHS";
605 const VarDecl
* PtrInitLHS
; // the LHS pointer expression in `PI`
606 const DeclRefExpr
* PtrInitRHS
; // the RHS pointer expression in `PI`
609 PointerInitGadget(const MatchFinder::MatchResult
&Result
)
610 : FixableGadget(Kind::PointerInit
),
611 PtrInitLHS(Result
.Nodes
.getNodeAs
<VarDecl
>(PointerInitLHSTag
)),
612 PtrInitRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerInitRHSTag
)) {}
614 static bool classof(const Gadget
*G
) {
615 return G
->getKind() == Kind::PointerInit
;
618 static Matcher
matcher() {
619 auto PtrInitStmt
= declStmt(hasSingleDecl(varDecl(
620 hasInitializer(ignoringImpCasts(declRefExpr(
622 toSupportedVariable()).
623 bind(PointerInitRHSTag
)))).
624 bind(PointerInitLHSTag
)));
626 return stmt(PtrInitStmt
);
629 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
631 virtual const Stmt
*getBaseStmt() const override
{
632 // FIXME: This needs to be the entire DeclStmt, assuming that this method
633 // makes sense at all on a FixableGadget.
637 virtual DeclUseList
getClaimedVarUseSites() const override
{
638 return DeclUseList
{PtrInitRHS
};
641 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
642 getStrategyImplications() const override
{
643 return std::make_pair(PtrInitLHS
,
644 cast
<VarDecl
>(PtrInitRHS
->getDecl()));
648 /// A pointer assignment expression of the form:
652 class PointerAssignmentGadget
: public FixableGadget
{
654 static constexpr const char *const PointerAssignLHSTag
= "ptrLHS";
655 static constexpr const char *const PointerAssignRHSTag
= "ptrRHS";
656 const DeclRefExpr
* PtrLHS
; // the LHS pointer expression in `PA`
657 const DeclRefExpr
* PtrRHS
; // the RHS pointer expression in `PA`
660 PointerAssignmentGadget(const MatchFinder::MatchResult
&Result
)
661 : FixableGadget(Kind::PointerAssignment
),
662 PtrLHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignLHSTag
)),
663 PtrRHS(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(PointerAssignRHSTag
)) {}
665 static bool classof(const Gadget
*G
) {
666 return G
->getKind() == Kind::PointerAssignment
;
669 static Matcher
matcher() {
670 auto PtrAssignExpr
= binaryOperator(allOf(hasOperatorName("="),
671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
672 toSupportedVariable()).
673 bind(PointerAssignRHSTag
))),
674 hasLHS(declRefExpr(hasPointerType(),
675 toSupportedVariable()).
676 bind(PointerAssignLHSTag
))));
678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr
));
681 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
683 virtual const Stmt
*getBaseStmt() const override
{
684 // FIXME: This should be the binary operator, assuming that this method
685 // makes sense at all on a FixableGadget.
689 virtual DeclUseList
getClaimedVarUseSites() const override
{
690 return DeclUseList
{PtrLHS
, PtrRHS
};
693 virtual std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>>
694 getStrategyImplications() const override
{
695 return std::make_pair(cast
<VarDecl
>(PtrLHS
->getDecl()),
696 cast
<VarDecl
>(PtrRHS
->getDecl()));
700 /// A call of a function or method that performs unchecked buffer operations
701 /// over one of its pointer parameters.
702 class UnsafeBufferUsageAttrGadget
: public WarningGadget
{
703 constexpr static const char *const OpTag
= "call_expr";
707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult
&Result
)
708 : WarningGadget(Kind::UnsafeBufferUsageAttr
),
709 Op(Result
.Nodes
.getNodeAs
<CallExpr
>(OpTag
)) {}
711 static bool classof(const Gadget
*G
) {
712 return G
->getKind() == Kind::UnsafeBufferUsageAttr
;
715 static Matcher
matcher() {
716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage
))))
719 const Stmt
*getBaseStmt() const override
{ return Op
; }
721 DeclUseList
getClaimedVarUseSites() const override
{ return {}; }
724 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
725 // Context (see `isInUnspecifiedLvalueContext`).
726 // Note here `[]` is the built-in subscript operator.
727 class ULCArraySubscriptGadget
: public FixableGadget
{
729 static constexpr const char *const ULCArraySubscriptTag
=
730 "ArraySubscriptUnderULC";
731 const ArraySubscriptExpr
*Node
;
734 ULCArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
735 : FixableGadget(Kind::ULCArraySubscript
),
736 Node(Result
.Nodes
.getNodeAs
<ArraySubscriptExpr
>(ULCArraySubscriptTag
)) {
737 assert(Node
!= nullptr && "Expecting a non-null matching result");
740 static bool classof(const Gadget
*G
) {
741 return G
->getKind() == Kind::ULCArraySubscript
;
744 static Matcher
matcher() {
745 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
746 auto BaseIsArrayOrPtrDRE
=
747 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr
,
748 toSupportedVariable())));
750 arraySubscriptExpr(BaseIsArrayOrPtrDRE
).bind(ULCArraySubscriptTag
);
752 return expr(isInUnspecifiedLvalueContext(Target
));
755 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
757 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
759 virtual DeclUseList
getClaimedVarUseSites() const override
{
760 if (const auto *DRE
=
761 dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts())) {
768 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
769 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
770 // fixit of the form `UPC(DRE.data())`.
771 class UPCStandalonePointerGadget
: public FixableGadget
{
773 static constexpr const char *const DeclRefExprTag
= "StandalonePointer";
774 const DeclRefExpr
*Node
;
777 UPCStandalonePointerGadget(const MatchFinder::MatchResult
&Result
)
778 : FixableGadget(Kind::UPCStandalonePointer
),
779 Node(Result
.Nodes
.getNodeAs
<DeclRefExpr
>(DeclRefExprTag
)) {
780 assert(Node
!= nullptr && "Expecting a non-null matching result");
783 static bool classof(const Gadget
*G
) {
784 return G
->getKind() == Kind::UPCStandalonePointer
;
787 static Matcher
matcher() {
788 auto ArrayOrPtr
= anyOf(hasPointerType(), hasArrayType());
790 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr
,
791 toSupportedVariable())).bind(DeclRefExprTag
)));
792 return stmt(isInUnspecifiedPointerContext(target
));
795 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
797 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
799 virtual DeclUseList
getClaimedVarUseSites() const override
{
804 class PointerDereferenceGadget
: public FixableGadget
{
805 static constexpr const char *const BaseDeclRefExprTag
= "BaseDRE";
806 static constexpr const char *const OperatorTag
= "op";
808 const DeclRefExpr
*BaseDeclRefExpr
= nullptr;
809 const UnaryOperator
*Op
= nullptr;
812 PointerDereferenceGadget(const MatchFinder::MatchResult
&Result
)
813 : FixableGadget(Kind::PointerDereference
),
815 Result
.Nodes
.getNodeAs
<DeclRefExpr
>(BaseDeclRefExprTag
)),
816 Op(Result
.Nodes
.getNodeAs
<UnaryOperator
>(OperatorTag
)) {}
818 static bool classof(const Gadget
*G
) {
819 return G
->getKind() == Kind::PointerDereference
;
822 static Matcher
matcher() {
825 hasOperatorName("*"),
826 has(expr(ignoringParenImpCasts(
827 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag
)))))
830 return expr(isInUnspecifiedLvalueContext(Target
));
833 DeclUseList
getClaimedVarUseSites() const override
{
834 return {BaseDeclRefExpr
};
837 virtual const Stmt
*getBaseStmt() const final
{ return Op
; }
839 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
842 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
843 // Context (see `isInUnspecifiedPointerContext`).
844 // Note here `[]` is the built-in subscript operator.
845 class UPCAddressofArraySubscriptGadget
: public FixableGadget
{
847 static constexpr const char *const UPCAddressofArraySubscriptTag
=
848 "AddressofArraySubscriptUnderUPC";
849 const UnaryOperator
*Node
; // the `&DRE[any]` node
852 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult
&Result
)
853 : FixableGadget(Kind::ULCArraySubscript
),
854 Node(Result
.Nodes
.getNodeAs
<UnaryOperator
>(
855 UPCAddressofArraySubscriptTag
)) {
856 assert(Node
!= nullptr && "Expecting a non-null matching result");
859 static bool classof(const Gadget
*G
) {
860 return G
->getKind() == Kind::UPCAddressofArraySubscript
;
863 static Matcher
matcher() {
864 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
865 unaryOperator(hasOperatorName("&"),
866 hasUnaryOperand(arraySubscriptExpr(
867 hasBase(ignoringParenImpCasts(declRefExpr(
868 toSupportedVariable()))))))
869 .bind(UPCAddressofArraySubscriptTag
)))));
872 virtual std::optional
<FixItList
> getFixits(const Strategy
&) const override
;
874 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
876 virtual DeclUseList
getClaimedVarUseSites() const override
{
877 const auto *ArraySubst
= cast
<ArraySubscriptExpr
>(Node
->getSubExpr());
879 cast
<DeclRefExpr
>(ArraySubst
->getBase()->IgnoreImpCasts());
886 // An auxiliary tracking facility for the fixit analysis. It helps connect
887 // declarations to its uses and make sure we've covered all uses with our
888 // analysis before we try to fix the declaration.
889 class DeclUseTracker
{
890 using UseSetTy
= SmallSet
<const DeclRefExpr
*, 16>;
891 using DefMapTy
= DenseMap
<const VarDecl
*, const DeclStmt
*>;
893 // Allocate on the heap for easier move.
894 std::unique_ptr
<UseSetTy
> Uses
{std::make_unique
<UseSetTy
>()};
898 DeclUseTracker() = default;
899 DeclUseTracker(const DeclUseTracker
&) = delete; // Let's avoid copies.
900 DeclUseTracker
&operator=(const DeclUseTracker
&) = delete;
901 DeclUseTracker(DeclUseTracker
&&) = default;
902 DeclUseTracker
&operator=(DeclUseTracker
&&) = default;
904 // Start tracking a freshly discovered DRE.
905 void discoverUse(const DeclRefExpr
*DRE
) { Uses
->insert(DRE
); }
907 // Stop tracking the DRE as it's been fully figured out.
908 void claimUse(const DeclRefExpr
*DRE
) {
909 assert(Uses
->count(DRE
) &&
910 "DRE not found or claimed by multiple matchers!");
914 // A variable is unclaimed if at least one use is unclaimed.
915 bool hasUnclaimedUses(const VarDecl
*VD
) const {
916 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
917 return any_of(*Uses
, [VD
](const DeclRefExpr
*DRE
) {
918 return DRE
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl();
922 UseSetTy
getUnclaimedUses(const VarDecl
*VD
) const {
924 for (auto use
: *Uses
) {
925 if (use
->getDecl()->getCanonicalDecl() == VD
->getCanonicalDecl()) {
926 ReturnSet
.insert(use
);
932 void discoverDecl(const DeclStmt
*DS
) {
933 for (const Decl
*D
: DS
->decls()) {
934 if (const auto *VD
= dyn_cast
<VarDecl
>(D
)) {
935 // FIXME: Assertion temporarily disabled due to a bug in
936 // ASTMatcher internal behavior in presence of GNU
937 // statement-expressions. We need to properly investigate this
938 // because it can screw up our algorithm in other ways.
939 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
945 const DeclStmt
*lookupDecl(const VarDecl
*VD
) const {
946 return Defs
.lookup(VD
);
952 // Strategy is a map from variables to the way we plan to emit fixes for
953 // these variables. It is figured out gradually by trying different fixes
954 // for different variables depending on gadgets in which these variables
959 Wontfix
, // We don't plan to emit a fixit for this variable.
960 Span
, // We recommend replacing the variable with std::span.
961 Iterator
, // We recommend replacing the variable with std::span::iterator.
962 Array
, // We recommend replacing the variable with std::array.
963 Vector
// We recommend replacing the variable with std::vector.
967 using MapTy
= llvm::DenseMap
<const VarDecl
*, Kind
>;
972 Strategy() = default;
973 Strategy(const Strategy
&) = delete; // Let's avoid copies.
974 Strategy
&operator=(const Strategy
&) = delete;
975 Strategy(Strategy
&&) = default;
976 Strategy
&operator=(Strategy
&&) = default;
978 void set(const VarDecl
*VD
, Kind K
) { Map
[VD
] = K
; }
980 Kind
lookup(const VarDecl
*VD
) const {
981 auto I
= Map
.find(VD
);
983 return Kind::Wontfix
;
991 // Representing a pointer type expression of the form `++Ptr` in an Unspecified
992 // Pointer Context (UPC):
993 class UPCPreIncrementGadget
: public FixableGadget
{
995 static constexpr const char *const UPCPreIncrementTag
=
996 "PointerPreIncrementUnderUPC";
997 const UnaryOperator
*Node
; // the `++Ptr` node
1000 UPCPreIncrementGadget(const MatchFinder::MatchResult
&Result
)
1001 : FixableGadget(Kind::UPCPreIncrement
),
1002 Node(Result
.Nodes
.getNodeAs
<UnaryOperator
>(UPCPreIncrementTag
)) {
1003 assert(Node
!= nullptr && "Expecting a non-null matching result");
1006 static bool classof(const Gadget
*G
) {
1007 return G
->getKind() == Kind::UPCPreIncrement
;
1010 static Matcher
matcher() {
1011 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1012 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1013 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1015 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1016 unaryOperator(isPreInc(),
1017 hasUnaryOperand(declRefExpr(
1018 toSupportedVariable()))
1019 ).bind(UPCPreIncrementTag
)))));
1022 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
1024 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
1026 virtual DeclUseList
getClaimedVarUseSites() const override
{
1027 return {dyn_cast
<DeclRefExpr
>(Node
->getSubExpr())};
1031 // Representing a pointer type expression of the form `Ptr += n` in an
1032 // Unspecified Untyped Context (UUC):
1033 class UUCAddAssignGadget
: public FixableGadget
{
1035 static constexpr const char *const UUCAddAssignTag
=
1036 "PointerAddAssignUnderUUC";
1037 static constexpr const char *const OffsetTag
= "Offset";
1039 const BinaryOperator
*Node
; // the `Ptr += n` node
1040 const Expr
*Offset
= nullptr;
1043 UUCAddAssignGadget(const MatchFinder::MatchResult
&Result
)
1044 : FixableGadget(Kind::UUCAddAssign
),
1045 Node(Result
.Nodes
.getNodeAs
<BinaryOperator
>(UUCAddAssignTag
)),
1046 Offset(Result
.Nodes
.getNodeAs
<Expr
>(OffsetTag
)) {
1047 assert(Node
!= nullptr && "Expecting a non-null matching result");
1050 static bool classof(const Gadget
*G
) {
1051 return G
->getKind() == Kind::UUCAddAssign
;
1054 static Matcher
matcher() {
1055 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts(
1056 binaryOperator(hasOperatorName("+="),
1057 hasLHS(declRefExpr(toSupportedVariable())),
1058 hasRHS(expr().bind(OffsetTag
)))
1059 .bind(UUCAddAssignTag
)))));
1062 virtual std::optional
<FixItList
> getFixits(const Strategy
&S
) const override
;
1064 virtual const Stmt
*getBaseStmt() const override
{ return Node
; }
1066 virtual DeclUseList
getClaimedVarUseSites() const override
{
1067 return {dyn_cast
<DeclRefExpr
>(Node
->getLHS())};
1071 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1073 class DerefSimplePtrArithFixableGadget
: public FixableGadget
{
1074 static constexpr const char *const BaseDeclRefExprTag
= "BaseDRE";
1075 static constexpr const char *const DerefOpTag
= "DerefOp";
1076 static constexpr const char *const AddOpTag
= "AddOp";
1077 static constexpr const char *const OffsetTag
= "Offset";
1079 const DeclRefExpr
*BaseDeclRefExpr
= nullptr;
1080 const UnaryOperator
*DerefOp
= nullptr;
1081 const BinaryOperator
*AddOp
= nullptr;
1082 const IntegerLiteral
*Offset
= nullptr;
1085 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult
&Result
)
1086 : FixableGadget(Kind::DerefSimplePtrArithFixable
),
1088 Result
.Nodes
.getNodeAs
<DeclRefExpr
>(BaseDeclRefExprTag
)),
1089 DerefOp(Result
.Nodes
.getNodeAs
<UnaryOperator
>(DerefOpTag
)),
1090 AddOp(Result
.Nodes
.getNodeAs
<BinaryOperator
>(AddOpTag
)),
1091 Offset(Result
.Nodes
.getNodeAs
<IntegerLiteral
>(OffsetTag
)) {}
1093 static Matcher
matcher() {
1095 auto ThePtr
= expr(hasPointerType(),
1096 ignoringImpCasts(declRefExpr(toSupportedVariable()).
1097 bind(BaseDeclRefExprTag
)));
1098 auto PlusOverPtrAndInteger
= expr(anyOf(
1099 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr
),
1100 hasRHS(integerLiteral().bind(OffsetTag
)))
1102 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr
),
1103 hasLHS(integerLiteral().bind(OffsetTag
)))
1105 return isInUnspecifiedLvalueContext(unaryOperator(
1106 hasOperatorName("*"),
1107 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger
)))
1112 virtual std::optional
<FixItList
> getFixits(const Strategy
&s
) const final
;
1114 // TODO remove this method from FixableGadget interface
1115 virtual const Stmt
*getBaseStmt() const final
{ return nullptr; }
1117 virtual DeclUseList
getClaimedVarUseSites() const final
{
1118 return {BaseDeclRefExpr
};
1122 /// Scan the function and return a list of gadgets found with provided kits.
1123 static std::tuple
<FixableGadgetList
, WarningGadgetList
, DeclUseTracker
>
1124 findGadgets(const Decl
*D
, const UnsafeBufferUsageHandler
&Handler
,
1125 bool EmitSuggestions
) {
1127 struct GadgetFinderCallback
: MatchFinder::MatchCallback
{
1128 FixableGadgetList FixableGadgets
;
1129 WarningGadgetList WarningGadgets
;
1130 DeclUseTracker Tracker
;
1132 void run(const MatchFinder::MatchResult
&Result
) override
{
1133 // In debug mode, assert that we've found exactly one gadget.
1134 // This helps us avoid conflicts in .bind() tags.
1138 [[maybe_unused
]] int numFound
= 0;
1139 #define NEXT ++numFound
1142 if (const auto *DRE
= Result
.Nodes
.getNodeAs
<DeclRefExpr
>("any_dre")) {
1143 Tracker
.discoverUse(DRE
);
1147 if (const auto *DS
= Result
.Nodes
.getNodeAs
<DeclStmt
>("any_ds")) {
1148 Tracker
.discoverDecl(DS
);
1152 // Figure out which matcher we've found, and call the appropriate
1153 // subclass constructor.
1154 // FIXME: Can we do this more logarithmically?
1155 #define FIXABLE_GADGET(name) \
1156 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1157 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1160 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1161 #define WARNING_GADGET(name) \
1162 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1163 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1166 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1168 assert(numFound
>= 1 && "Gadgets not found in match result!");
1169 assert(numFound
<= 1 && "Conflicting bind tags in gadgets!");
1174 GadgetFinderCallback CB
;
1179 forEachDescendantEvaluatedStmt(stmt(anyOf(
1180 // Add Gadget::matcher() for every gadget in the registry.
1181 #define WARNING_GADGET(x) \
1182 allOf(x ## Gadget::matcher().bind(#x), \
1183 notInSafeBufferOptOut(&Handler)),
1184 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1185 // Avoid a hanging comma.
1193 if (EmitSuggestions
) {
1197 forEachDescendantStmt(stmt(eachOf(
1198 #define FIXABLE_GADGET(x) \
1199 x ## Gadget::matcher().bind(#x),
1200 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1201 // In parallel, match all DeclRefExprs so that to find out
1202 // whether there are any uncovered by gadgets.
1203 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1204 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"),
1205 // Also match DeclStmts because we'll need them when fixing
1206 // their underlying VarDecls that otherwise don't have
1207 // any backreferences to DeclStmts.
1208 declStmt().bind("any_ds")
1216 M
.match(*D
->getBody(), D
->getASTContext());
1217 return {std::move(CB
.FixableGadgets
), std::move(CB
.WarningGadgets
),
1218 std::move(CB
.Tracker
)};
1221 // Compares AST nodes by source locations.
1222 template <typename NodeTy
> struct CompareNode
{
1223 bool operator()(const NodeTy
*N1
, const NodeTy
*N2
) const {
1224 return N1
->getBeginLoc().getRawEncoding() <
1225 N2
->getBeginLoc().getRawEncoding();
1229 struct WarningGadgetSets
{
1230 std::map
<const VarDecl
*, std::set
<const WarningGadget
*>,
1231 // To keep keys sorted by their locations in the map so that the
1232 // order is deterministic:
1233 CompareNode
<VarDecl
>>
1235 // These Gadgets are not related to pointer variables (e. g. temporaries).
1236 llvm::SmallVector
<const WarningGadget
*, 16> noVar
;
1239 static WarningGadgetSets
1240 groupWarningGadgetsByVar(const WarningGadgetList
&AllUnsafeOperations
) {
1241 WarningGadgetSets result
;
1242 // If some gadgets cover more than one
1243 // variable, they'll appear more than once in the map.
1244 for (auto &G
: AllUnsafeOperations
) {
1245 DeclUseList ClaimedVarUseSites
= G
->getClaimedVarUseSites();
1247 bool AssociatedWithVarDecl
= false;
1248 for (const DeclRefExpr
*DRE
: ClaimedVarUseSites
) {
1249 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
1250 result
.byVar
[VD
].insert(G
.get());
1251 AssociatedWithVarDecl
= true;
1255 if (!AssociatedWithVarDecl
) {
1256 result
.noVar
.push_back(G
.get());
1263 struct FixableGadgetSets
{
1264 std::map
<const VarDecl
*, std::set
<const FixableGadget
*>,
1265 // To keep keys sorted by their locations in the map so that the
1266 // order is deterministic:
1267 CompareNode
<VarDecl
>>
1271 static FixableGadgetSets
1272 groupFixablesByVar(FixableGadgetList
&&AllFixableOperations
) {
1273 FixableGadgetSets FixablesForUnsafeVars
;
1274 for (auto &F
: AllFixableOperations
) {
1275 DeclUseList DREs
= F
->getClaimedVarUseSites();
1277 for (const DeclRefExpr
*DRE
: DREs
) {
1278 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
1279 FixablesForUnsafeVars
.byVar
[VD
].insert(F
.get());
1283 return FixablesForUnsafeVars
;
1286 bool clang::internal::anyConflict(const SmallVectorImpl
<FixItHint
> &FixIts
,
1287 const SourceManager
&SM
) {
1288 // A simple interval overlap detection algorithm. Sorts all ranges by their
1289 // begin location then finds the first overlap in one pass.
1290 std::vector
<const FixItHint
*> All
; // a copy of `FixIts`
1292 for (const FixItHint
&H
: FixIts
)
1294 std::sort(All
.begin(), All
.end(),
1295 [&SM
](const FixItHint
*H1
, const FixItHint
*H2
) {
1296 return SM
.isBeforeInTranslationUnit(H1
->RemoveRange
.getBegin(),
1297 H2
->RemoveRange
.getBegin());
1300 const FixItHint
*CurrHint
= nullptr;
1302 for (const FixItHint
*Hint
: All
) {
1304 SM
.isBeforeInTranslationUnit(CurrHint
->RemoveRange
.getEnd(),
1305 Hint
->RemoveRange
.getBegin())) {
1306 // Either to initialize `CurrHint` or `CurrHint` does not
1307 // overlap with `Hint`:
1310 // In case `Hint` overlaps the `CurrHint`, we found at least one
1317 std::optional
<FixItList
>
1318 PointerAssignmentGadget::getFixits(const Strategy
&S
) const {
1319 const auto *LeftVD
= cast
<VarDecl
>(PtrLHS
->getDecl());
1320 const auto *RightVD
= cast
<VarDecl
>(PtrRHS
->getDecl());
1321 switch (S
.lookup(LeftVD
)) {
1322 case Strategy::Kind::Span
:
1323 if (S
.lookup(RightVD
) == Strategy::Kind::Span
)
1325 return std::nullopt
;
1326 case Strategy::Kind::Wontfix
:
1327 return std::nullopt
;
1328 case Strategy::Kind::Iterator
:
1329 case Strategy::Kind::Array
:
1330 case Strategy::Kind::Vector
:
1331 llvm_unreachable("unsupported strategies for FixableGadgets");
1333 return std::nullopt
;
1336 std::optional
<FixItList
>
1337 PointerInitGadget::getFixits(const Strategy
&S
) const {
1338 const auto *LeftVD
= PtrInitLHS
;
1339 const auto *RightVD
= cast
<VarDecl
>(PtrInitRHS
->getDecl());
1340 switch (S
.lookup(LeftVD
)) {
1341 case Strategy::Kind::Span
:
1342 if (S
.lookup(RightVD
) == Strategy::Kind::Span
)
1344 return std::nullopt
;
1345 case Strategy::Kind::Wontfix
:
1346 return std::nullopt
;
1347 case Strategy::Kind::Iterator
:
1348 case Strategy::Kind::Array
:
1349 case Strategy::Kind::Vector
:
1350 llvm_unreachable("unsupported strategies for FixableGadgets");
1352 return std::nullopt
;
1355 static bool isNonNegativeIntegerExpr(const Expr
*Expr
, const VarDecl
*VD
,
1356 const ASTContext
&Ctx
) {
1357 if (auto ConstVal
= Expr
->getIntegerConstantExpr(Ctx
)) {
1358 if (ConstVal
->isNegative())
1360 } else if (!Expr
->getType()->isUnsignedIntegerType())
1365 std::optional
<FixItList
>
1366 ULCArraySubscriptGadget::getFixits(const Strategy
&S
) const {
1367 if (const auto *DRE
=
1368 dyn_cast
<DeclRefExpr
>(Node
->getBase()->IgnoreImpCasts()))
1369 if (const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl())) {
1370 switch (S
.lookup(VD
)) {
1371 case Strategy::Kind::Span
: {
1373 // If the index has a negative constant value, we give up as no valid
1374 // fix-it can be generated:
1375 const ASTContext
&Ctx
= // FIXME: we need ASTContext to be passed in!
1376 VD
->getASTContext();
1377 if (!isNonNegativeIntegerExpr(Node
->getIdx(), VD
, Ctx
))
1378 return std::nullopt
;
1379 // no-op is a good fix-it, otherwise
1382 case Strategy::Kind::Wontfix
:
1383 case Strategy::Kind::Iterator
:
1384 case Strategy::Kind::Array
:
1385 case Strategy::Kind::Vector
:
1386 llvm_unreachable("unsupported strategies for FixableGadgets");
1389 return std::nullopt
;
1392 static std::optional
<FixItList
> // forward declaration
1393 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator
*Node
);
1395 std::optional
<FixItList
>
1396 UPCAddressofArraySubscriptGadget::getFixits(const Strategy
&S
) const {
1397 auto DREs
= getClaimedVarUseSites();
1398 const auto *VD
= cast
<VarDecl
>(DREs
.front()->getDecl());
1400 switch (S
.lookup(VD
)) {
1401 case Strategy::Kind::Span
:
1402 return fixUPCAddressofArraySubscriptWithSpan(Node
);
1403 case Strategy::Kind::Wontfix
:
1404 case Strategy::Kind::Iterator
:
1405 case Strategy::Kind::Array
:
1406 case Strategy::Kind::Vector
:
1407 llvm_unreachable("unsupported strategies for FixableGadgets");
1409 return std::nullopt
; // something went wrong, no fix-it
1412 // FIXME: this function should be customizable through format
1413 static StringRef
getEndOfLine() {
1414 static const char *const EOL
= "\n";
1418 // Returns the text indicating that the user needs to provide input there:
1419 std::string
getUserFillPlaceHolder(StringRef HintTextToUser
= "placeholder") {
1420 std::string s
= std::string("<# ");
1421 s
+= HintTextToUser
;
1426 // Return the text representation of the given `APInt Val`:
1427 static std::string
getAPIntText(APInt Val
) {
1428 SmallVector
<char> Txt
;
1429 Val
.toString(Txt
, 10, true);
1430 // APInt::toString does not add '\0' to the end of the string for us:
1431 Txt
.push_back('\0');
1435 // Return the source location of the last character of the AST `Node`.
1436 template <typename NodeTy
>
1437 static std::optional
<SourceLocation
>
1438 getEndCharLoc(const NodeTy
*Node
, const SourceManager
&SM
,
1439 const LangOptions
&LangOpts
) {
1440 unsigned TkLen
= Lexer::MeasureTokenLength(Node
->getEndLoc(), SM
, LangOpts
);
1441 SourceLocation Loc
= Node
->getEndLoc().getLocWithOffset(TkLen
- 1);
1446 return std::nullopt
;
1449 // Return the source location just past the last character of the AST `Node`.
1450 template <typename NodeTy
>
1451 static std::optional
<SourceLocation
> getPastLoc(const NodeTy
*Node
,
1452 const SourceManager
&SM
,
1453 const LangOptions
&LangOpts
) {
1454 SourceLocation Loc
=
1455 Lexer::getLocForEndOfToken(Node
->getEndLoc(), 0, SM
, LangOpts
);
1458 return std::nullopt
;
1461 // Return text representation of an `Expr`.
1462 static std::optional
<StringRef
> getExprText(const Expr
*E
,
1463 const SourceManager
&SM
,
1464 const LangOptions
&LangOpts
) {
1465 std::optional
<SourceLocation
> LastCharLoc
= getPastLoc(E
, SM
, LangOpts
);
1468 return Lexer::getSourceText(
1469 CharSourceRange::getCharRange(E
->getBeginLoc(), *LastCharLoc
), SM
,
1472 return std::nullopt
;
1475 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
1476 static std::optional
<StringRef
> getRangeText(SourceRange SR
,
1477 const SourceManager
&SM
,
1478 const LangOptions
&LangOpts
) {
1479 bool Invalid
= false;
1480 CharSourceRange CSR
= CharSourceRange::getCharRange(SR
);
1481 StringRef Text
= Lexer::getSourceText(CSR
, SM
, LangOpts
, &Invalid
);
1485 return std::nullopt
;
1488 // Returns the begin location of the identifier of the given variable
1490 static SourceLocation
getVarDeclIdentifierLoc(const VarDecl
*VD
) {
1491 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
1492 // returns the begin location of the identifier of the declaration:
1493 return VD
->getLocation();
1496 // Returns the literal text of the identifier of the given variable declaration.
1497 static std::optional
<StringRef
>
1498 getVarDeclIdentifierText(const VarDecl
*VD
, const SourceManager
&SM
,
1499 const LangOptions
&LangOpts
) {
1500 SourceLocation ParmIdentBeginLoc
= getVarDeclIdentifierLoc(VD
);
1501 SourceLocation ParmIdentEndLoc
=
1502 Lexer::getLocForEndOfToken(ParmIdentBeginLoc
, 0, SM
, LangOpts
);
1504 if (ParmIdentEndLoc
.isMacroID() &&
1505 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc
, SM
, LangOpts
))
1506 return std::nullopt
;
1507 return getRangeText({ParmIdentBeginLoc
, ParmIdentEndLoc
}, SM
, LangOpts
);
1510 // We cannot fix a variable declaration if it has some other specifiers than the
1511 // type specifier. Because the source ranges of those specifiers could overlap
1512 // with the source range that is being replaced using fix-its. Especially when
1513 // we often cannot obtain accurate source ranges of cv-qualified type
1515 // FIXME: also deal with type attributes
1516 static bool hasUnsupportedSpecifiers(const VarDecl
*VD
,
1517 const SourceManager
&SM
) {
1518 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
1519 // source range of `VD`:
1520 bool AttrRangeOverlapping
= llvm::any_of(VD
->attrs(), [&](Attr
*At
) -> bool {
1521 return !(SM
.isBeforeInTranslationUnit(At
->getRange().getEnd(),
1522 VD
->getBeginLoc())) &&
1523 !(SM
.isBeforeInTranslationUnit(VD
->getEndLoc(),
1524 At
->getRange().getBegin()));
1526 return VD
->isInlineSpecified() || VD
->isConstexpr() ||
1527 VD
->hasConstantInitialization() || !VD
->hasLocalStorage() ||
1528 AttrRangeOverlapping
;
1531 // Returns the `SourceRange` of `D`. The reason why this function exists is
1532 // that `D->getSourceRange()` may return a range where the end location is the
1533 // starting location of the last token. The end location of the source range
1534 // returned by this function is the last location of the last token.
1535 static SourceRange
getSourceRangeToTokenEnd(const Decl
*D
,
1536 const SourceManager
&SM
,
1537 const LangOptions
&LangOpts
) {
1538 SourceLocation Begin
= D
->getBeginLoc();
1540 End
= // `D->getEndLoc` should always return the starting location of the
1541 // last token, so we should get the end of the token
1542 Lexer::getLocForEndOfToken(D
->getEndLoc(), 0, SM
, LangOpts
);
1544 return SourceRange(Begin
, End
);
1547 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
1548 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
1549 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
1550 // :( ), `Qualifiers` of the pointee type is returned separately through the
1551 // output parameter `QualifiersToAppend`.
1552 static std::optional
<std::string
>
1553 getPointeeTypeText(const VarDecl
*VD
, const SourceManager
&SM
,
1554 const LangOptions
&LangOpts
,
1555 std::optional
<Qualifiers
> *QualifiersToAppend
) {
1556 QualType Ty
= VD
->getType();
1559 assert(Ty
->isPointerType() && !Ty
->isFunctionPointerType() &&
1560 "Expecting a VarDecl of type of pointer to object type");
1561 PteTy
= Ty
->getPointeeType();
1563 TypeLoc TyLoc
= VD
->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
1566 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
1567 // the `TypeLoc` of the pointee type:
1568 switch (TyLoc
.getTypeLocClass()) {
1569 case TypeLoc::ConstantArray
:
1570 case TypeLoc::IncompleteArray
:
1571 case TypeLoc::VariableArray
:
1572 case TypeLoc::DependentSizedArray
:
1573 case TypeLoc::Decayed
:
1574 assert(isa
<ParmVarDecl
>(VD
) && "An array type shall not be treated as a "
1575 "pointer type unless it decays.");
1576 PteTyLoc
= TyLoc
.getNextTypeLoc();
1578 case TypeLoc::Pointer
:
1579 PteTyLoc
= TyLoc
.castAs
<PointerTypeLoc
>().getPointeeLoc();
1582 return std::nullopt
;
1584 if (PteTyLoc
.isNull())
1585 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
1586 // when the pointer type is `auto`.
1587 return std::nullopt
;
1589 SourceLocation IdentLoc
= getVarDeclIdentifierLoc(VD
);
1591 if (!(IdentLoc
.isValid() && PteTyLoc
.getSourceRange().isValid())) {
1592 // We are expecting these locations to be valid. But in some cases, they are
1593 // not all valid. It is a Clang bug to me and we are not responsible for
1594 // fixing it. So we will just give up for now when it happens.
1595 return std::nullopt
;
1598 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
1599 SourceLocation PteEndOfTokenLoc
=
1600 Lexer::getLocForEndOfToken(PteTyLoc
.getEndLoc(), 0, SM
, LangOpts
);
1602 if (!PteEndOfTokenLoc
.isValid())
1603 // Sometimes we cannot get the end location of the pointee type, e.g., when
1604 // there are macros involved.
1605 return std::nullopt
;
1606 if (!SM
.isBeforeInTranslationUnit(PteEndOfTokenLoc
, IdentLoc
)) {
1607 // We only deal with the cases where the source text of the pointee type
1608 // appears on the left-hand side of the variable identifier completely,
1609 // including the following forms:
1611 // `T ident[]`, where `T` is any type.
1612 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
1613 return std::nullopt
;
1615 if (PteTy
.hasQualifiers()) {
1616 // TypeLoc does not provide source ranges for qualifiers (it says it's
1617 // intentional but seems fishy to me), so we cannot get the full text
1618 // `PteTy` via source ranges.
1619 *QualifiersToAppend
= PteTy
.getQualifiers();
1621 return getRangeText({PteTyLoc
.getBeginLoc(), PteEndOfTokenLoc
}, SM
, LangOpts
)
1625 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
1626 static std::optional
<StringRef
> getFunNameText(const FunctionDecl
*FD
,
1627 const SourceManager
&SM
,
1628 const LangOptions
&LangOpts
) {
1629 SourceLocation BeginLoc
= FD
->getQualifier()
1630 ? FD
->getQualifierLoc().getBeginLoc()
1631 : FD
->getNameInfo().getBeginLoc();
1632 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
1634 SourceLocation EndLoc
= Lexer::getLocForEndOfToken(
1635 FD
->getNameInfo().getEndLoc(), 0, SM
, LangOpts
);
1636 SourceRange NameRange
{BeginLoc
, EndLoc
};
1638 return getRangeText(NameRange
, SM
, LangOpts
);
1641 // Returns the text representing a `std::span` type where the element type is
1642 // represented by `EltTyText`.
1644 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers
1645 // explicitly if the element type needs to be qualified.
1647 getSpanTypeText(StringRef EltTyText
,
1648 std::optional
<Qualifiers
> Quals
= std::nullopt
) {
1649 const char *const SpanOpen
= "std::span<";
1652 return SpanOpen
+ EltTyText
.str() + ' ' + Quals
->getAsString() + '>';
1653 return SpanOpen
+ EltTyText
.str() + '>';
1656 std::optional
<FixItList
>
1657 DerefSimplePtrArithFixableGadget::getFixits(const Strategy
&s
) const {
1658 const VarDecl
*VD
= dyn_cast
<VarDecl
>(BaseDeclRefExpr
->getDecl());
1660 if (VD
&& s
.lookup(VD
) == Strategy::Kind::Span
) {
1661 ASTContext
&Ctx
= VD
->getASTContext();
1662 // std::span can't represent elements before its begin()
1663 if (auto ConstVal
= Offset
->getIntegerConstantExpr(Ctx
))
1664 if (ConstVal
->isNegative())
1665 return std::nullopt
;
1667 // note that the expr may (oddly) has multiple layers of parens
1669 // *((..(pointer + 123)..))
1674 // replace ' + ' with '['
1675 // replace ')' with ']'
1678 // *((..(123 + pointer)..))
1683 // replace ' + ' with '['
1684 // replace ')' with ']'
1686 const Expr
*LHS
= AddOp
->getLHS(), *RHS
= AddOp
->getRHS();
1687 const SourceManager
&SM
= Ctx
.getSourceManager();
1688 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
1689 CharSourceRange StarWithTrailWhitespace
=
1690 clang::CharSourceRange::getCharRange(DerefOp
->getOperatorLoc(),
1691 LHS
->getBeginLoc());
1693 std::optional
<SourceLocation
> LHSLocation
= getPastLoc(LHS
, SM
, LangOpts
);
1695 return std::nullopt
;
1697 CharSourceRange PlusWithSurroundingWhitespace
=
1698 clang::CharSourceRange::getCharRange(*LHSLocation
, RHS
->getBeginLoc());
1700 std::optional
<SourceLocation
> AddOpLocation
=
1701 getPastLoc(AddOp
, SM
, LangOpts
);
1702 std::optional
<SourceLocation
> DerefOpLocation
=
1703 getPastLoc(DerefOp
, SM
, LangOpts
);
1705 if (!AddOpLocation
|| !DerefOpLocation
)
1706 return std::nullopt
;
1708 CharSourceRange ClosingParenWithPrecWhitespace
=
1709 clang::CharSourceRange::getCharRange(*AddOpLocation
, *DerefOpLocation
);
1712 {FixItHint::CreateRemoval(StarWithTrailWhitespace
),
1713 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace
, "["),
1714 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace
, "]")}};
1716 return std::nullopt
; // something wrong or unsupported, give up
1719 std::optional
<FixItList
>
1720 PointerDereferenceGadget::getFixits(const Strategy
&S
) const {
1721 const VarDecl
*VD
= cast
<VarDecl
>(BaseDeclRefExpr
->getDecl());
1722 switch (S
.lookup(VD
)) {
1723 case Strategy::Kind::Span
: {
1724 ASTContext
&Ctx
= VD
->getASTContext();
1725 SourceManager
&SM
= Ctx
.getSourceManager();
1726 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1727 // Deletes the *operand
1728 CharSourceRange derefRange
= clang::CharSourceRange::getCharRange(
1729 Op
->getBeginLoc(), Op
->getBeginLoc().getLocWithOffset(1));
1731 if (auto LocPastOperand
=
1732 getPastLoc(BaseDeclRefExpr
, SM
, Ctx
.getLangOpts())) {
1733 return FixItList
{{FixItHint::CreateRemoval(derefRange
),
1734 FixItHint::CreateInsertion(*LocPastOperand
, "[0]")}};
1738 case Strategy::Kind::Iterator
:
1739 case Strategy::Kind::Array
:
1740 case Strategy::Kind::Vector
:
1741 llvm_unreachable("Strategy not implemented yet!");
1742 case Strategy::Kind::Wontfix
:
1743 llvm_unreachable("Invalid strategy!");
1746 return std::nullopt
;
1749 // Generates fix-its replacing an expression of the form UPC(DRE) with
1751 std::optional
<FixItList
> UPCStandalonePointerGadget::getFixits(const Strategy
&S
)
1753 const auto VD
= cast
<VarDecl
>(Node
->getDecl());
1754 switch (S
.lookup(VD
)) {
1755 case Strategy::Kind::Span
: {
1756 ASTContext
&Ctx
= VD
->getASTContext();
1757 SourceManager
&SM
= Ctx
.getSourceManager();
1758 // Inserts the .data() after the DRE
1759 std::optional
<SourceLocation
> EndOfOperand
=
1760 getPastLoc(Node
, SM
, Ctx
.getLangOpts());
1763 return FixItList
{{FixItHint::CreateInsertion(
1764 *EndOfOperand
, ".data()")}};
1765 // FIXME: Points inside a macro expansion.
1768 case Strategy::Kind::Wontfix
:
1769 case Strategy::Kind::Iterator
:
1770 case Strategy::Kind::Array
:
1771 case Strategy::Kind::Vector
:
1772 llvm_unreachable("unsupported strategies for FixableGadgets");
1775 return std::nullopt
;
1778 // Generates fix-its replacing an expression of the form `&DRE[e]` with
1779 // `&DRE.data()[e]`:
1780 static std::optional
<FixItList
>
1781 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator
*Node
) {
1782 const auto *ArraySub
= cast
<ArraySubscriptExpr
>(Node
->getSubExpr());
1783 const auto *DRE
= cast
<DeclRefExpr
>(ArraySub
->getBase()->IgnoreImpCasts());
1784 // FIXME: this `getASTContext` call is costly, we should pass the
1786 const ASTContext
&Ctx
= DRE
->getDecl()->getASTContext();
1787 const Expr
*Idx
= ArraySub
->getIdx();
1788 const SourceManager
&SM
= Ctx
.getSourceManager();
1789 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
1790 std::stringstream SS
;
1791 bool IdxIsLitZero
= false;
1793 if (auto ICE
= Idx
->getIntegerConstantExpr(Ctx
))
1794 if ((*ICE
).isZero())
1795 IdxIsLitZero
= true;
1796 std::optional
<StringRef
> DreString
= getExprText(DRE
, SM
, LangOpts
);
1798 return std::nullopt
;
1801 // If the index is literal zero, we produce the most concise fix-it:
1802 SS
<< (*DreString
).str() << ".data()";
1804 std::optional
<StringRef
> IndexString
= getExprText(Idx
, SM
, LangOpts
);
1806 return std::nullopt
;
1808 SS
<< "&" << (*DreString
).str() << ".data()"
1809 << "[" << (*IndexString
).str() << "]";
1812 FixItHint::CreateReplacement(Node
->getSourceRange(), SS
.str())};
1815 std::optional
<FixItList
>
1816 UUCAddAssignGadget::getFixits(const Strategy
&S
) const {
1817 DeclUseList DREs
= getClaimedVarUseSites();
1819 if (DREs
.size() != 1)
1820 return std::nullopt
; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
1822 if (const VarDecl
*VD
= dyn_cast
<VarDecl
>(DREs
.front()->getDecl())) {
1823 if (S
.lookup(VD
) == Strategy::Kind::Span
) {
1826 const Stmt
*AddAssignNode
= getBaseStmt();
1827 StringRef varName
= VD
->getName();
1828 const ASTContext
&Ctx
= VD
->getASTContext();
1830 if (!isNonNegativeIntegerExpr(Offset
, VD
, Ctx
))
1831 return std::nullopt
;
1833 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
1835 (Offset
->IgnoreParens()->getBeginLoc() == Offset
->getBeginLoc());
1836 std::string SS
= varName
.str() + " = " + varName
.str() + ".subspan";
1840 std::optional
<SourceLocation
> AddAssignLocation
= getEndCharLoc(
1841 AddAssignNode
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
1842 if (!AddAssignLocation
)
1843 return std::nullopt
;
1845 Fixes
.push_back(FixItHint::CreateReplacement(
1846 SourceRange(AddAssignNode
->getBeginLoc(), Node
->getOperatorLoc()),
1849 Fixes
.push_back(FixItHint::CreateInsertion(
1850 Offset
->getEndLoc().getLocWithOffset(1), ")"));
1854 return std::nullopt
; // Not in the cases that we can handle for now, give up.
1857 std::optional
<FixItList
> UPCPreIncrementGadget::getFixits(const Strategy
&S
) const {
1858 DeclUseList DREs
= getClaimedVarUseSites();
1860 if (DREs
.size() != 1)
1861 return std::nullopt
; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1863 if (const VarDecl
*VD
= dyn_cast
<VarDecl
>(DREs
.front()->getDecl())) {
1864 if (S
.lookup(VD
) == Strategy::Kind::Span
) {
1866 std::stringstream SS
;
1867 const Stmt
*PreIncNode
= getBaseStmt();
1868 StringRef varName
= VD
->getName();
1869 const ASTContext
&Ctx
= VD
->getASTContext();
1871 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1872 SS
<< "(" << varName
.data() << " = " << varName
.data()
1873 << ".subspan(1)).data()";
1874 std::optional
<SourceLocation
> PreIncLocation
=
1875 getEndCharLoc(PreIncNode
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
1876 if (!PreIncLocation
)
1877 return std::nullopt
;
1879 Fixes
.push_back(FixItHint::CreateReplacement(
1880 SourceRange(PreIncNode
->getBeginLoc(), *PreIncLocation
), SS
.str()));
1884 return std::nullopt
; // Not in the cases that we can handle for now, give up.
1888 // For a non-null initializer `Init` of `T *` type, this function returns
1889 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
1890 // to output stream.
1891 // In many cases, this function cannot figure out the actual extent `S`. It
1892 // then will use a place holder to replace `S` to ask users to fill `S` in. The
1893 // initializer shall be used to initialize a variable of type `std::span<T>`.
1895 // FIXME: Support multi-level pointers
1898 // `Init` a pointer to the initializer expression
1899 // `Ctx` a reference to the ASTContext
1901 FixVarInitializerWithSpan(const Expr
*Init
, ASTContext
&Ctx
,
1902 const StringRef UserFillPlaceHolder
) {
1903 const SourceManager
&SM
= Ctx
.getSourceManager();
1904 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
1906 // If `Init` has a constant value that is (or equivalent to) a
1907 // NULL pointer, we use the default constructor to initialize the span
1908 // object, i.e., a `std:span` variable declaration with no initializer.
1909 // So the fix-it is just to remove the initializer.
1910 if (Init
->isNullPointerConstant(Ctx
,
1911 // FIXME: Why does this function not ask for `const ASTContext
1912 // &`? It should. Maybe worth an NFC patch later.
1913 Expr::NullPointerConstantValueDependence::
1914 NPC_ValueDependentIsNotNull
)) {
1915 std::optional
<SourceLocation
> InitLocation
=
1916 getEndCharLoc(Init
, SM
, LangOpts
);
1920 SourceRange
SR(Init
->getBeginLoc(), *InitLocation
);
1922 return {FixItHint::CreateRemoval(SR
)};
1926 std::string ExtentText
= UserFillPlaceHolder
.data();
1927 StringRef One
= "1";
1929 // Insert `{` before `Init`:
1930 FixIts
.push_back(FixItHint::CreateInsertion(Init
->getBeginLoc(), "{"));
1931 // Try to get the data extent. Break into different cases:
1932 if (auto CxxNew
= dyn_cast
<CXXNewExpr
>(Init
->IgnoreImpCasts())) {
1933 // In cases `Init` is `new T[n]` and there is no explicit cast over
1934 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1935 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
1936 // simpler for the case where `Init` is `new T`.
1937 if (const Expr
*Ext
= CxxNew
->getArraySize().value_or(nullptr)) {
1938 if (!Ext
->HasSideEffects(Ctx
)) {
1939 std::optional
<StringRef
> ExtentString
= getExprText(Ext
, SM
, LangOpts
);
1942 ExtentText
= *ExtentString
;
1944 } else if (!CxxNew
->isArray())
1945 // Although the initializer is not allocating a buffer, the pointer
1946 // variable could still be used in buffer access operations.
1948 } else if (const auto *CArrTy
= Ctx
.getAsConstantArrayType(
1949 Init
->IgnoreImpCasts()->getType())) {
1950 // In cases `Init` is of an array type after stripping off implicit casts,
1951 // the extent is the array size. Note that if the array size is not a
1952 // constant, we cannot use it as the extent.
1953 ExtentText
= getAPIntText(CArrTy
->getSize());
1955 // In cases `Init` is of the form `&Var` after stripping of implicit
1956 // casts, where `&` is the built-in operator, the extent is 1.
1957 if (auto AddrOfExpr
= dyn_cast
<UnaryOperator
>(Init
->IgnoreImpCasts()))
1958 if (AddrOfExpr
->getOpcode() == UnaryOperatorKind::UO_AddrOf
&&
1959 isa_and_present
<DeclRefExpr
>(AddrOfExpr
->getSubExpr()))
1961 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1962 // and explicit casting, etc. etc.
1965 SmallString
<32> StrBuffer
{};
1966 std::optional
<SourceLocation
> LocPassInit
= getPastLoc(Init
, SM
, LangOpts
);
1971 StrBuffer
.append(", ");
1972 StrBuffer
.append(ExtentText
);
1973 StrBuffer
.append("}");
1974 FixIts
.push_back(FixItHint::CreateInsertion(*LocPassInit
, StrBuffer
.str()));
1979 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \
1980 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
1982 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
1985 // For the given variable declaration with a pointer-to-T type, returns the text
1986 // `std::span<T>`. If it is unable to generate the text, returns
1988 static std::optional
<std::string
> createSpanTypeForVarDecl(const VarDecl
*VD
,
1989 const ASTContext
&Ctx
) {
1990 assert(VD
->getType()->isPointerType());
1992 std::optional
<Qualifiers
> PteTyQualifiers
= std::nullopt
;
1993 std::optional
<std::string
> PteTyText
= getPointeeTypeText(
1994 VD
, Ctx
.getSourceManager(), Ctx
.getLangOpts(), &PteTyQualifiers
);
1997 return std::nullopt
;
1999 std::string SpanTyText
= "std::span<";
2001 SpanTyText
.append(*PteTyText
);
2002 // Append qualifiers to span element type if any:
2003 if (PteTyQualifiers
) {
2004 SpanTyText
.append(" ");
2005 SpanTyText
.append(PteTyQualifiers
->getAsString());
2007 SpanTyText
.append(">");
2011 // For a `VarDecl` of the form `T * var (= Init)?`, this
2012 // function generates fix-its that
2013 // 1) replace `T * var` with `std::span<T> var`; and
2014 // 2) change `Init` accordingly to a span constructor, if it exists.
2016 // FIXME: support Multi-level pointers
2019 // `D` a pointer the variable declaration node
2020 // `Ctx` a reference to the ASTContext
2021 // `UserFillPlaceHolder` the user-input placeholder text
2023 // the non-empty fix-it list, if fix-its are successfuly generated; empty
2025 static FixItList
fixLocalVarDeclWithSpan(const VarDecl
*D
, ASTContext
&Ctx
,
2026 const StringRef UserFillPlaceHolder
,
2027 UnsafeBufferUsageHandler
&Handler
) {
2028 if (hasUnsupportedSpecifiers(D
, Ctx
.getSourceManager()))
2032 std::optional
<std::string
> SpanTyText
= createSpanTypeForVarDecl(D
, Ctx
);
2035 DEBUG_NOTE_DECL_FAIL(D
, " : failed to generate 'std::span' type");
2039 // Will hold the text for `std::span<T> Ident`:
2040 std::stringstream SS
;
2043 // Append qualifiers to the type of `D`, if any:
2044 if (D
->getType().hasQualifiers())
2045 SS
<< " " << D
->getType().getQualifiers().getAsString();
2047 // The end of the range of the original source that will be replaced
2048 // by `std::span<T> ident`:
2049 SourceLocation EndLocForReplacement
= D
->getEndLoc();
2050 std::optional
<StringRef
> IdentText
=
2051 getVarDeclIdentifierText(D
, Ctx
.getSourceManager(), Ctx
.getLangOpts());
2054 DEBUG_NOTE_DECL_FAIL(D
, " : failed to locate the identifier");
2057 // Fix the initializer if it exists:
2058 if (const Expr
*Init
= D
->getInit()) {
2059 FixItList InitFixIts
=
2060 FixVarInitializerWithSpan(Init
, Ctx
, UserFillPlaceHolder
);
2061 if (InitFixIts
.empty())
2063 FixIts
.insert(FixIts
.end(), std::make_move_iterator(InitFixIts
.begin()),
2064 std::make_move_iterator(InitFixIts
.end()));
2065 // If the declaration has the form `T *ident = init`, we want to replace
2066 // `T *ident = ` with `std::span<T> ident`:
2067 EndLocForReplacement
= Init
->getBeginLoc().getLocWithOffset(-1);
2069 SS
<< " " << IdentText
->str();
2070 if (!EndLocForReplacement
.isValid()) {
2071 DEBUG_NOTE_DECL_FAIL(D
, " : failed to locate the end of the declaration");
2074 FixIts
.push_back(FixItHint::CreateReplacement(
2075 SourceRange(D
->getBeginLoc(), EndLocForReplacement
), SS
.str()));
2079 static bool hasConflictingOverload(const FunctionDecl
*FD
) {
2080 return !FD
->getDeclContext()->lookup(FD
->getDeclName()).isSingleResult();
2083 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
2084 // types, this function produces fix-its to make the change self-contained. Let
2085 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
2086 // entity defined by the `FunctionDecl` after the change to the parameters.
2087 // Fix-its produced by this function are
2088 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2090 // 2. Create a declaration of "NewF" next to each declaration of `F`;
2091 // 3. Create a definition of "F" (as its' original definition is now belongs
2092 // to "NewF") next to its original definition. The body of the creating
2093 // definition calls to "NewF".
2097 // void f(int *p); // original declaration
2098 // void f(int *p) { // original definition
2102 // To change the parameter `p` to be of `std::span<int>` type, we
2103 // also add overloads:
2105 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
2106 // void f(std::span<int> p); // added overload decl
2107 // void f(std::span<int> p) { // original def where param is changed
2110 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
2111 // return f(std::span(p, <# size #>));
2114 static std::optional
<FixItList
>
2115 createOverloadsForFixedParams(const Strategy
&S
, const FunctionDecl
*FD
,
2116 const ASTContext
&Ctx
,
2117 UnsafeBufferUsageHandler
&Handler
) {
2118 // FIXME: need to make this conflict checking better:
2119 if (hasConflictingOverload(FD
))
2120 return std::nullopt
;
2122 const SourceManager
&SM
= Ctx
.getSourceManager();
2123 const LangOptions
&LangOpts
= Ctx
.getLangOpts();
2124 const unsigned NumParms
= FD
->getNumParams();
2125 std::vector
<std::string
> NewTysTexts(NumParms
);
2126 std::vector
<bool> ParmsMask(NumParms
, false);
2127 bool AtLeastOneParmToFix
= false;
2129 for (unsigned i
= 0; i
< NumParms
; i
++) {
2130 const ParmVarDecl
*PVD
= FD
->getParamDecl(i
);
2132 if (S
.lookup(PVD
) == Strategy::Kind::Wontfix
)
2134 if (S
.lookup(PVD
) != Strategy::Kind::Span
)
2135 // Not supported, not suppose to happen:
2136 return std::nullopt
;
2138 std::optional
<Qualifiers
> PteTyQuals
= std::nullopt
;
2139 std::optional
<std::string
> PteTyText
=
2140 getPointeeTypeText(PVD
, SM
, LangOpts
, &PteTyQuals
);
2143 // something wrong in obtaining the text of the pointee type, give up
2144 return std::nullopt
;
2145 // FIXME: whether we should create std::span type depends on the Strategy.
2146 NewTysTexts
[i
] = getSpanTypeText(*PteTyText
, PteTyQuals
);
2147 ParmsMask
[i
] = true;
2148 AtLeastOneParmToFix
= true;
2150 if (!AtLeastOneParmToFix
)
2151 // No need to create function overloads:
2153 // FIXME Respect indentation of the original code.
2155 // A lambda that creates the text representation of a function declaration
2156 // with the new type signatures:
2157 const auto NewOverloadSignatureCreator
=
2158 [&SM
, &LangOpts
, &NewTysTexts
,
2159 &ParmsMask
](const FunctionDecl
*FD
) -> std::optional
<std::string
> {
2160 std::stringstream SS
;
2163 SS
<< getEndOfLine().str();
2164 // Append: ret-type func-name "("
2165 if (auto Prefix
= getRangeText(
2166 SourceRange(FD
->getBeginLoc(), (*FD
->param_begin())->getBeginLoc()),
2168 SS
<< Prefix
->str();
2170 return std::nullopt
; // give up
2171 // Append: parameter-type-list
2172 const unsigned NumParms
= FD
->getNumParams();
2174 for (unsigned i
= 0; i
< NumParms
; i
++) {
2175 const ParmVarDecl
*Parm
= FD
->getParamDecl(i
);
2177 if (Parm
->isImplicit())
2180 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
2182 SS
<< NewTysTexts
[i
];
2183 // print parameter name if provided:
2184 if (IdentifierInfo
*II
= Parm
->getIdentifier())
2185 SS
<< ' ' << II
->getName().str();
2186 } else if (auto ParmTypeText
= getRangeText(
2187 getSourceRangeToTokenEnd(Parm
, SM
, LangOpts
),
2189 // print the whole `Parm` without modification:
2190 SS
<< ParmTypeText
->str();
2192 return std::nullopt
; // something wrong, give up
2193 if (i
!= NumParms
- 1)
2200 // A lambda that creates the text representation of a function definition with
2201 // the original signature:
2202 const auto OldOverloadDefCreator
=
2203 [&Handler
, &SM
, &LangOpts
, &NewTysTexts
,
2204 &ParmsMask
](const FunctionDecl
*FD
) -> std::optional
<std::string
> {
2205 std::stringstream SS
;
2207 SS
<< getEndOfLine().str();
2208 // Append: attr-name ret-type func-name "(" param-list ")" "{"
2209 if (auto FDPrefix
= getRangeText(
2210 SourceRange(FD
->getBeginLoc(), FD
->getBody()->getBeginLoc()), SM
,
2212 SS
<< Handler
.getUnsafeBufferUsageAttributeTextAt(FD
->getBeginLoc(), " ")
2213 << FDPrefix
->str() << "{";
2215 return std::nullopt
;
2216 // Append: "return" func-name "("
2217 if (auto FunQualName
= getFunNameText(FD
, SM
, LangOpts
))
2218 SS
<< "return " << FunQualName
->str() << "(";
2220 return std::nullopt
;
2223 const unsigned NumParms
= FD
->getNumParams();
2224 for (unsigned i
= 0; i
< NumParms
; i
++) {
2225 const ParmVarDecl
*Parm
= FD
->getParamDecl(i
);
2227 if (Parm
->isImplicit())
2229 // FIXME: If a parameter has no name, it is unused in the
2230 // definition. So we could just leave it as it is.
2231 if (!Parm
->getIdentifier())
2232 // If a parameter of a function definition has no name:
2233 return std::nullopt
;
2235 // This is our spanified paramter!
2236 SS
<< NewTysTexts
[i
] << "(" << Parm
->getIdentifier()->getName().str()
2237 << ", " << getUserFillPlaceHolder("size") << ")";
2239 SS
<< Parm
->getIdentifier()->getName().str();
2240 if (i
!= NumParms
- 1)
2243 // finish call and the body
2244 SS
<< ");}" << getEndOfLine().str();
2245 // FIXME: 80-char line formatting?
2250 for (FunctionDecl
*FReDecl
: FD
->redecls()) {
2251 std::optional
<SourceLocation
> Loc
= getPastLoc(FReDecl
, SM
, LangOpts
);
2255 if (FReDecl
->isThisDeclarationADefinition()) {
2256 assert(FReDecl
== FD
&& "inconsistent function definition");
2257 // Inserts a definition with the old signature to the end of
2259 if (auto OldOverloadDef
= OldOverloadDefCreator(FReDecl
))
2260 FixIts
.emplace_back(FixItHint::CreateInsertion(*Loc
, *OldOverloadDef
));
2262 return {}; // give up
2264 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
2265 if (!FReDecl
->hasAttr
<UnsafeBufferUsageAttr
>()) {
2266 FixIts
.emplace_back(FixItHint::CreateInsertion(
2267 FReDecl
->getBeginLoc(), Handler
.getUnsafeBufferUsageAttributeTextAt(
2268 FReDecl
->getBeginLoc(), " ")));
2270 // Inserts a declaration with the new signature to the end of `FReDecl`:
2271 if (auto NewOverloadDecl
= NewOverloadSignatureCreator(FReDecl
))
2272 FixIts
.emplace_back(FixItHint::CreateInsertion(*Loc
, *NewOverloadDecl
));
2280 // To fix a `ParmVarDecl` to be of `std::span` type.
2281 static FixItList
fixParamWithSpan(const ParmVarDecl
*PVD
, const ASTContext
&Ctx
,
2282 UnsafeBufferUsageHandler
&Handler
) {
2283 if (hasUnsupportedSpecifiers(PVD
, Ctx
.getSourceManager())) {
2284 DEBUG_NOTE_DECL_FAIL(PVD
, " : has unsupport specifier(s)");
2287 if (PVD
->hasDefaultArg()) {
2288 // FIXME: generate fix-its for default values:
2289 DEBUG_NOTE_DECL_FAIL(PVD
, " : has default arg");
2293 std::optional
<Qualifiers
> PteTyQualifiers
= std::nullopt
;
2294 std::optional
<std::string
> PteTyText
= getPointeeTypeText(
2295 PVD
, Ctx
.getSourceManager(), Ctx
.getLangOpts(), &PteTyQualifiers
);
2298 DEBUG_NOTE_DECL_FAIL(PVD
, " : invalid pointee type");
2302 std::optional
<StringRef
> PVDNameText
= PVD
->getIdentifier()->getName();
2305 DEBUG_NOTE_DECL_FAIL(PVD
, " : invalid identifier name");
2309 std::stringstream SS
;
2310 std::optional
<std::string
> SpanTyText
= createSpanTypeForVarDecl(PVD
, Ctx
);
2312 if (PteTyQualifiers
)
2313 // Append qualifiers if they exist:
2314 SS
<< getSpanTypeText(*PteTyText
, PteTyQualifiers
);
2316 SS
<< getSpanTypeText(*PteTyText
);
2317 // Append qualifiers to the type of the parameter:
2318 if (PVD
->getType().hasQualifiers())
2319 SS
<< ' ' << PVD
->getType().getQualifiers().getAsString();
2320 // Append parameter's name:
2321 SS
<< ' ' << PVDNameText
->str();
2322 // Add replacement fix-it:
2323 return {FixItHint::CreateReplacement(PVD
->getSourceRange(), SS
.str())};
2326 static FixItList
fixVariableWithSpan(const VarDecl
*VD
,
2327 const DeclUseTracker
&Tracker
,
2329 UnsafeBufferUsageHandler
&Handler
) {
2330 const DeclStmt
*DS
= Tracker
.lookupDecl(VD
);
2332 DEBUG_NOTE_DECL_FAIL(VD
, " : variables declared this way not implemented yet");
2335 if (!DS
->isSingleDecl()) {
2336 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2337 DEBUG_NOTE_DECL_FAIL(VD
, " : multiple VarDecls");
2340 // Currently DS is an unused variable but we'll need it when
2341 // non-single decls are implemented, where the pointee type name
2342 // and the '*' are spread around the place.
2345 // FIXME: handle cases where DS has multiple declarations
2346 return fixLocalVarDeclWithSpan(VD
, Ctx
, getUserFillPlaceHolder(), Handler
);
2349 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due
2350 // to any unexpected problem.
2352 fixVariable(const VarDecl
*VD
, Strategy::Kind K
,
2353 /* The function decl under analysis */ const Decl
*D
,
2354 const DeclUseTracker
&Tracker
, ASTContext
&Ctx
,
2355 UnsafeBufferUsageHandler
&Handler
) {
2356 if (const auto *PVD
= dyn_cast
<ParmVarDecl
>(VD
)) {
2357 auto *FD
= dyn_cast
<clang::FunctionDecl
>(PVD
->getDeclContext());
2358 if (!FD
|| FD
!= D
) {
2359 // `FD != D` means that `PVD` belongs to a function that is not being
2360 // analyzed currently. Thus `FD` may not be complete.
2361 DEBUG_NOTE_DECL_FAIL(VD
, " : function not currently analyzed");
2365 // TODO If function has a try block we can't change params unless we check
2366 // also its catch block for their use.
2367 // FIXME We might support static class methods, some select methods,
2368 // operators and possibly lamdas.
2369 if (FD
->isMain() || FD
->isConstexpr() ||
2370 FD
->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate
||
2372 // also covers call-operator of lamdas
2373 isa
<CXXMethodDecl
>(FD
) ||
2374 // skip when the function body is a try-block
2375 (FD
->hasBody() && isa
<CXXTryStmt
>(FD
->getBody())) ||
2376 FD
->isOverloadedOperator()) {
2377 DEBUG_NOTE_DECL_FAIL(VD
, " : unsupported function decl");
2378 return {}; // TODO test all these cases
2383 case Strategy::Kind::Span
: {
2384 if (VD
->getType()->isPointerType()) {
2385 if (const auto *PVD
= dyn_cast
<ParmVarDecl
>(VD
))
2386 return fixParamWithSpan(PVD
, Ctx
, Handler
);
2388 if (VD
->isLocalVarDecl())
2389 return fixVariableWithSpan(VD
, Tracker
, Ctx
, Handler
);
2391 DEBUG_NOTE_DECL_FAIL(VD
, " : not a pointer");
2394 case Strategy::Kind::Iterator
:
2395 case Strategy::Kind::Array
:
2396 case Strategy::Kind::Vector
:
2397 llvm_unreachable("Strategy not implemented yet!");
2398 case Strategy::Kind::Wontfix
:
2399 llvm_unreachable("Invalid strategy!");
2401 llvm_unreachable("Unknown strategy!");
2404 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
2405 // `RemoveRange` of 'h' overlaps with a macro use.
2406 static bool overlapWithMacro(const FixItList
&FixIts
) {
2407 // FIXME: For now we only check if the range (or the first token) is (part of)
2408 // a macro expansion. Ideally, we want to check for all tokens in the range.
2409 return llvm::any_of(FixIts
, [](const FixItHint
&Hint
) {
2410 auto Range
= Hint
.RemoveRange
;
2411 if (Range
.getBegin().isMacroID() || Range
.getEnd().isMacroID())
2412 // If the range (or the first token) is (part of) a macro expansion:
2418 // Returns true iff `VD` is a parameter of the declaration `D`:
2419 static bool isParameterOf(const VarDecl
*VD
, const Decl
*D
) {
2420 return isa
<ParmVarDecl
>(VD
) &&
2421 VD
->getDeclContext() == dyn_cast
<DeclContext
>(D
);
2424 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable
2425 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
2427 static void eraseVarsForUnfixableGroupMates(
2428 std::map
<const VarDecl
*, FixItList
> &FixItsForVariable
,
2429 const VariableGroupsManager
&VarGrpMgr
) {
2430 // Variables will be removed from `FixItsForVariable`:
2431 SmallVector
<const VarDecl
*, 8> ToErase
;
2433 for (const auto &[VD
, Ignore
] : FixItsForVariable
) {
2434 VarGrpRef Grp
= VarGrpMgr
.getGroupOfVar(VD
);
2435 if (llvm::any_of(Grp
,
2436 [&FixItsForVariable
](const VarDecl
*GrpMember
) -> bool {
2437 return !FixItsForVariable
.count(GrpMember
);
2439 // At least one group member cannot be fixed, so we have to erase the
2441 for (const VarDecl
*Member
: Grp
)
2442 ToErase
.push_back(Member
);
2445 for (auto *VarToErase
: ToErase
)
2446 FixItsForVariable
.erase(VarToErase
);
2449 // Returns the fix-its that create bounds-safe function overloads for the
2450 // function `D`, if `D`'s parameters will be changed to safe-types through
2451 // fix-its in `FixItsForVariable`.
2453 // NOTE: In case `D`'s parameters will be changed but bounds-safe function
2454 // overloads cannot created, the whole group that contains the parameters will
2455 // be erased from `FixItsForVariable`.
2456 static FixItList
createFunctionOverloadsForParms(
2457 std::map
<const VarDecl
*, FixItList
> &FixItsForVariable
/* mutable */,
2458 const VariableGroupsManager
&VarGrpMgr
, const FunctionDecl
*FD
,
2459 const Strategy
&S
, ASTContext
&Ctx
, UnsafeBufferUsageHandler
&Handler
) {
2460 FixItList FixItsSharedByParms
{};
2462 std::optional
<FixItList
> OverloadFixes
=
2463 createOverloadsForFixedParams(S
, FD
, Ctx
, Handler
);
2465 if (OverloadFixes
) {
2466 FixItsSharedByParms
.append(*OverloadFixes
);
2468 // Something wrong in generating `OverloadFixes`, need to remove the
2469 // whole group, where parameters are in, from `FixItsForVariable` (Note
2470 // that all parameters should be in the same group):
2471 for (auto *Member
: VarGrpMgr
.getGroupOfParms())
2472 FixItsForVariable
.erase(Member
);
2474 return FixItsSharedByParms
;
2477 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
2478 static std::map
<const VarDecl
*, FixItList
>
2479 getFixIts(FixableGadgetSets
&FixablesForAllVars
, const Strategy
&S
,
2481 /* The function decl under analysis */ const Decl
*D
,
2482 const DeclUseTracker
&Tracker
, UnsafeBufferUsageHandler
&Handler
,
2483 const VariableGroupsManager
&VarGrpMgr
) {
2484 // `FixItsForVariable` will map each variable to a set of fix-its directly
2485 // associated to the variable itself. Fix-its of distinct variables in
2486 // `FixItsForVariable` are disjoint.
2487 std::map
<const VarDecl
*, FixItList
> FixItsForVariable
;
2489 // Populate `FixItsForVariable` with fix-its directly associated with each
2490 // variable. Fix-its directly associated to a variable 'v' are the ones
2491 // produced by the `FixableGadget`s whose claimed variable is 'v'.
2492 for (const auto &[VD
, Fixables
] : FixablesForAllVars
.byVar
) {
2493 FixItsForVariable
[VD
] =
2494 fixVariable(VD
, S
.lookup(VD
), D
, Tracker
, Ctx
, Handler
);
2495 // If we fail to produce Fix-It for the declaration we have to skip the
2496 // variable entirely.
2497 if (FixItsForVariable
[VD
].empty()) {
2498 FixItsForVariable
.erase(VD
);
2501 for (const auto &F
: Fixables
) {
2502 std::optional
<FixItList
> Fixits
= F
->getFixits(S
);
2505 FixItsForVariable
[VD
].insert(FixItsForVariable
[VD
].end(),
2506 Fixits
->begin(), Fixits
->end());
2510 Handler
.addDebugNoteForVar(
2511 VD
, F
->getBaseStmt()->getBeginLoc(),
2512 ("gadget '" + F
->getDebugName() + "' refused to produce a fix")
2515 FixItsForVariable
.erase(VD
);
2520 // `FixItsForVariable` now contains only variables that can be
2521 // fixed. A variable can be fixed if its' declaration and all Fixables
2522 // associated to it can all be fixed.
2524 // To further remove from `FixItsForVariable` variables whose group mates
2525 // cannot be fixed...
2526 eraseVarsForUnfixableGroupMates(FixItsForVariable
, VarGrpMgr
);
2527 // Now `FixItsForVariable` gets further reduced: a variable is in
2528 // `FixItsForVariable` iff it can be fixed and all its group mates can be
2531 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
2532 // That is, when fixing multiple parameters in one step, these fix-its will
2533 // be applied only once (instead of being applied per parameter).
2534 FixItList FixItsSharedByParms
{};
2536 if (auto *FD
= dyn_cast
<FunctionDecl
>(D
))
2537 FixItsSharedByParms
= createFunctionOverloadsForParms(
2538 FixItsForVariable
, VarGrpMgr
, FD
, S
, Ctx
, Handler
);
2540 // The map that maps each variable `v` to fix-its for the whole group where
2542 std::map
<const VarDecl
*, FixItList
> FinalFixItsForVariable
{
2545 for (auto &[Var
, Ignore
] : FixItsForVariable
) {
2546 bool AnyParm
= false;
2547 const auto VarGroupForVD
= VarGrpMgr
.getGroupOfVar(Var
, &AnyParm
);
2549 for (const VarDecl
*GrpMate
: VarGroupForVD
) {
2552 if (FixItsForVariable
.count(GrpMate
))
2553 FinalFixItsForVariable
[Var
].append(FixItsForVariable
[GrpMate
]);
2556 // This assertion should never fail. Otherwise we have a bug.
2557 assert(!FixItsSharedByParms
.empty() &&
2558 "Should not try to fix a parameter that does not belong to a "
2560 FinalFixItsForVariable
[Var
].append(FixItsSharedByParms
);
2563 // Fix-its that will be applied in one step shall NOT:
2564 // 1. overlap with macros or/and templates; or
2565 // 2. conflict with each other.
2566 // Otherwise, the fix-its will be dropped.
2567 for (auto Iter
= FinalFixItsForVariable
.begin();
2568 Iter
!= FinalFixItsForVariable
.end();)
2569 if (overlapWithMacro(Iter
->second
) ||
2570 clang::internal::anyConflict(Iter
->second
, Ctx
.getSourceManager())) {
2571 Iter
= FinalFixItsForVariable
.erase(Iter
);
2574 return FinalFixItsForVariable
;
2577 template <typename VarDeclIterTy
>
2579 getNaiveStrategy(llvm::iterator_range
<VarDeclIterTy
> UnsafeVars
) {
2581 for (const VarDecl
*VD
: UnsafeVars
) {
2582 S
.set(VD
, Strategy::Kind::Span
);
2587 // Manages variable groups:
2588 class VariableGroupsManagerImpl
: public VariableGroupsManager
{
2589 const std::vector
<VarGrpTy
> Groups
;
2590 const std::map
<const VarDecl
*, unsigned> &VarGrpMap
;
2591 const llvm::SetVector
<const VarDecl
*> &GrpsUnionForParms
;
2594 VariableGroupsManagerImpl(
2595 const std::vector
<VarGrpTy
> &Groups
,
2596 const std::map
<const VarDecl
*, unsigned> &VarGrpMap
,
2597 const llvm::SetVector
<const VarDecl
*> &GrpsUnionForParms
)
2598 : Groups(Groups
), VarGrpMap(VarGrpMap
),
2599 GrpsUnionForParms(GrpsUnionForParms
) {}
2601 VarGrpRef
getGroupOfVar(const VarDecl
*Var
, bool *HasParm
) const override
{
2602 if (GrpsUnionForParms
.contains(Var
)) {
2605 return GrpsUnionForParms
.getArrayRef();
2610 auto It
= VarGrpMap
.find(Var
);
2612 if (It
== VarGrpMap
.end())
2613 return std::nullopt
;
2614 return Groups
[It
->second
];
2617 VarGrpRef
getGroupOfParms() const override
{
2618 return GrpsUnionForParms
.getArrayRef();
2622 void clang::checkUnsafeBufferUsage(const Decl
*D
,
2623 UnsafeBufferUsageHandler
&Handler
,
2624 bool EmitSuggestions
) {
2626 Handler
.clearDebugNotes();
2629 assert(D
&& D
->getBody());
2630 // We do not want to visit a Lambda expression defined inside a method independently.
2631 // Instead, it should be visited along with the outer method.
2632 // FIXME: do we want to do the same thing for `BlockDecl`s?
2633 if (const auto *fd
= dyn_cast
<CXXMethodDecl
>(D
)) {
2634 if (fd
->getParent()->isLambda() && fd
->getParent()->isLocalClass())
2638 // Do not emit fixit suggestions for functions declared in an
2639 // extern "C" block.
2640 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
)) {
2641 for (FunctionDecl
*FReDecl
: FD
->redecls()) {
2642 if (FReDecl
->isExternC()) {
2643 EmitSuggestions
= false;
2649 WarningGadgetSets UnsafeOps
;
2650 FixableGadgetSets FixablesForAllVars
;
2652 auto [FixableGadgets
, WarningGadgets
, Tracker
] =
2653 findGadgets(D
, Handler
, EmitSuggestions
);
2655 if (!EmitSuggestions
) {
2656 // Our job is very easy without suggestions. Just warn about
2657 // every problematic operation and consider it done. No need to deal
2658 // with fixable gadgets, no need to group operations by variable.
2659 for (const auto &G
: WarningGadgets
) {
2660 Handler
.handleUnsafeOperation(G
->getBaseStmt(),
2661 /*IsRelatedToDecl=*/false);
2664 // This return guarantees that most of the machine doesn't run when
2665 // suggestions aren't requested.
2666 assert(FixableGadgets
.size() == 0 &&
2667 "Fixable gadgets found but suggestions not requested!");
2671 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2672 // function under the analysis. No need to fix any Fixables.
2673 if (!WarningGadgets
.empty()) {
2674 // Gadgets "claim" variables they're responsible for. Once this loop
2675 // finishes, the tracker will only track DREs that weren't claimed by any
2676 // gadgets, i.e. not understood by the analysis.
2677 for (const auto &G
: FixableGadgets
) {
2678 for (const auto *DRE
: G
->getClaimedVarUseSites()) {
2679 Tracker
.claimUse(DRE
);
2684 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2685 // function under the analysis. Thus, it early returns here as there is
2686 // nothing needs to be fixed.
2688 // Note this claim is based on the assumption that there is no unsafe
2689 // variable whose declaration is invisible from the analyzing function.
2690 // Otherwise, we need to consider if the uses of those unsafe varuables needs
2692 // So far, we are not fixing any global variables or class members. And,
2693 // lambdas will be analyzed along with the enclosing function. So this early
2694 // return is correct for now.
2695 if (WarningGadgets
.empty())
2698 UnsafeOps
= groupWarningGadgetsByVar(std::move(WarningGadgets
));
2699 FixablesForAllVars
= groupFixablesByVar(std::move(FixableGadgets
));
2701 std::map
<const VarDecl
*, FixItList
> FixItsForVariableGroup
;
2703 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
2704 for (auto it
= FixablesForAllVars
.byVar
.cbegin();
2705 it
!= FixablesForAllVars
.byVar
.cend();) {
2706 // FIXME: need to deal with global variables later
2707 if ((!it
->first
->isLocalVarDecl() && !isa
<ParmVarDecl
>(it
->first
))) {
2709 Handler
.addDebugNoteForVar(
2710 it
->first
, it
->first
->getBeginLoc(),
2711 ("failed to produce fixit for '" + it
->first
->getNameAsString() +
2712 "' : neither local nor a parameter"));
2714 it
= FixablesForAllVars
.byVar
.erase(it
);
2715 } else if (it
->first
->getType().getCanonicalType()->isReferenceType()) {
2717 Handler
.addDebugNoteForVar(it
->first
, it
->first
->getBeginLoc(),
2718 ("failed to produce fixit for '" +
2719 it
->first
->getNameAsString() +
2720 "' : has a reference type"));
2722 it
= FixablesForAllVars
.byVar
.erase(it
);
2723 } else if (Tracker
.hasUnclaimedUses(it
->first
)) {
2725 auto AllUnclaimed
= Tracker
.getUnclaimedUses(it
->first
);
2726 for (auto UnclaimedDRE
: AllUnclaimed
) {
2727 std::string UnclaimedUseTrace
=
2728 getDREAncestorString(UnclaimedDRE
, D
->getASTContext());
2730 Handler
.addDebugNoteForVar(
2731 it
->first
, UnclaimedDRE
->getBeginLoc(),
2732 ("failed to produce fixit for '" + it
->first
->getNameAsString() +
2733 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
2734 UnclaimedUseTrace
));
2737 it
= FixablesForAllVars
.byVar
.erase(it
);
2738 } else if (it
->first
->isInitCapture()) {
2740 Handler
.addDebugNoteForVar(
2741 it
->first
, it
->first
->getBeginLoc(),
2742 ("failed to produce fixit for '" + it
->first
->getNameAsString() +
2743 "' : init capture"));
2745 it
= FixablesForAllVars
.byVar
.erase(it
);
2751 // Fixpoint iteration for pointer assignments
2752 using DepMapTy
= DenseMap
<const VarDecl
*, llvm::SetVector
<const VarDecl
*>>;
2753 DepMapTy DependenciesMap
{};
2754 DepMapTy PtrAssignmentGraph
{};
2756 for (auto it
: FixablesForAllVars
.byVar
) {
2757 for (const FixableGadget
*fixable
: it
.second
) {
2758 std::optional
<std::pair
<const VarDecl
*, const VarDecl
*>> ImplPair
=
2759 fixable
->getStrategyImplications();
2761 std::pair
<const VarDecl
*, const VarDecl
*> Impl
= std::move(*ImplPair
);
2762 PtrAssignmentGraph
[Impl
.first
].insert(Impl
.second
);
2768 The following code does a BFS traversal of the `PtrAssignmentGraph`
2769 considering all unsafe vars as starting nodes and constructs an undirected
2770 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
2771 elimiates all variables that are unreachable from any unsafe var. In other
2772 words, this removes all dependencies that don't include any unsafe variable
2773 and consequently don't need any fixit generation.
2774 Note: A careful reader would observe that the code traverses
2775 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
2776 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
2777 achieve the same result but the one used here dramatically cuts the
2778 amount of hoops the second part of the algorithm needs to jump, given that
2779 a lot of these connections become "direct". The reader is advised not to
2780 imagine how the graph is transformed because of using `Var` instead of
2781 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
2782 and think about why it's equivalent later.
2784 std::set
<const VarDecl
*> VisitedVarsDirected
{};
2785 for (const auto &[Var
, ignore
] : UnsafeOps
.byVar
) {
2786 if (VisitedVarsDirected
.find(Var
) == VisitedVarsDirected
.end()) {
2788 std::queue
<const VarDecl
*> QueueDirected
{};
2789 QueueDirected
.push(Var
);
2790 while(!QueueDirected
.empty()) {
2791 const VarDecl
* CurrentVar
= QueueDirected
.front();
2792 QueueDirected
.pop();
2793 VisitedVarsDirected
.insert(CurrentVar
);
2794 auto AdjacentNodes
= PtrAssignmentGraph
[CurrentVar
];
2795 for (const VarDecl
*Adj
: AdjacentNodes
) {
2796 if (VisitedVarsDirected
.find(Adj
) == VisitedVarsDirected
.end()) {
2797 QueueDirected
.push(Adj
);
2799 DependenciesMap
[Var
].insert(Adj
);
2800 DependenciesMap
[Adj
].insert(Var
);
2806 // `Groups` stores the set of Connected Components in the graph.
2807 std::vector
<VarGrpTy
> Groups
;
2808 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
2809 // variables belong to. Group indexes refer to the elements in `Groups`.
2810 // `VarGrpMap` is complete in that every variable that needs fix is in it.
2811 std::map
<const VarDecl
*, unsigned> VarGrpMap
;
2812 // The union group over the ones in "Groups" that contain parameters of `D`:
2813 llvm::SetVector
<const VarDecl
*>
2814 GrpsUnionForParms
; // these variables need to be fixed in one step
2816 // Group Connected Components for Unsafe Vars
2817 // (Dependencies based on pointer assignments)
2818 std::set
<const VarDecl
*> VisitedVars
{};
2819 for (const auto &[Var
, ignore
] : UnsafeOps
.byVar
) {
2820 if (VisitedVars
.find(Var
) == VisitedVars
.end()) {
2821 VarGrpTy
&VarGroup
= Groups
.emplace_back();
2822 std::queue
<const VarDecl
*> Queue
{};
2825 while(!Queue
.empty()) {
2826 const VarDecl
* CurrentVar
= Queue
.front();
2828 VisitedVars
.insert(CurrentVar
);
2829 VarGroup
.push_back(CurrentVar
);
2830 auto AdjacentNodes
= DependenciesMap
[CurrentVar
];
2831 for (const VarDecl
*Adj
: AdjacentNodes
) {
2832 if (VisitedVars
.find(Adj
) == VisitedVars
.end()) {
2838 bool HasParm
= false;
2839 unsigned GrpIdx
= Groups
.size() - 1;
2841 for (const VarDecl
*V
: VarGroup
) {
2842 VarGrpMap
[V
] = GrpIdx
;
2843 if (!HasParm
&& isParameterOf(V
, D
))
2847 GrpsUnionForParms
.insert(VarGroup
.begin(), VarGroup
.end());
2851 // Remove a `FixableGadget` if the associated variable is not in the graph
2852 // computed above. We do not want to generate fix-its for such variables,
2853 // since they are neither warned nor reachable from a warned one.
2855 // Note a variable is not warned if it is not directly used in any unsafe
2856 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
2857 // does not exist another variable `u` such that `u` is warned and fixing `u`
2858 // (transitively) implicates fixing `v`.
2862 // void f(int * p) {
2863 // int * a = p; *p = 0;
2866 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
2867 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
2868 // the function above, `p` becomes reachable from a warned variable.
2869 for (auto I
= FixablesForAllVars
.byVar
.begin();
2870 I
!= FixablesForAllVars
.byVar
.end();) {
2871 // Note `VisitedVars` contain all the variables in the graph:
2872 if (!VisitedVars
.count((*I
).first
)) {
2873 // no such var in graph:
2874 I
= FixablesForAllVars
.byVar
.erase(I
);
2879 // We assign strategies to variables that are 1) in the graph and 2) can be
2880 // fixed. Other variables have the default "Won't fix" strategy.
2881 Strategy NaiveStrategy
= getNaiveStrategy(llvm::make_filter_range(
2882 VisitedVars
, [&FixablesForAllVars
](const VarDecl
*V
) {
2883 // If a warned variable has no "Fixable", it is considered unfixable:
2884 return FixablesForAllVars
.byVar
.count(V
);
2886 VariableGroupsManagerImpl
VarGrpMgr(Groups
, VarGrpMap
, GrpsUnionForParms
);
2888 if (isa
<NamedDecl
>(D
))
2889 // The only case where `D` is not a `NamedDecl` is when `D` is a
2890 // `BlockDecl`. Let's not fix variables in blocks for now
2891 FixItsForVariableGroup
=
2892 getFixIts(FixablesForAllVars
, NaiveStrategy
, D
->getASTContext(), D
,
2893 Tracker
, Handler
, VarGrpMgr
);
2895 for (const auto &G
: UnsafeOps
.noVar
) {
2896 Handler
.handleUnsafeOperation(G
->getBaseStmt(), /*IsRelatedToDecl=*/false);
2899 for (const auto &[VD
, WarningGadgets
] : UnsafeOps
.byVar
) {
2900 auto FixItsIt
= FixItsForVariableGroup
.find(VD
);
2901 Handler
.handleUnsafeVariableGroup(VD
, VarGrpMgr
,
2902 FixItsIt
!= FixItsForVariableGroup
.end()
2903 ? std::move(FixItsIt
->second
)
2906 for (const auto &G
: WarningGadgets
) {
2907 Handler
.handleUnsafeOperation(G
->getBaseStmt(), /*IsRelatedToDecl=*/true);