[Flang] remove whole-archive option for AIX linker (#76039)
[llvm-project.git] / clang / lib / Analysis / UnsafeBufferUsage.cpp
blob70eec1cee57f8e1d52263999dbf55c5e9375d963
1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/AST/StmtVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include <memory>
19 #include <optional>
20 #include <sstream>
21 #include <queue>
23 using namespace llvm;
24 using namespace clang;
25 using namespace ast_matchers;
27 #ifndef NDEBUG
28 namespace {
29 class StmtDebugPrinter
30 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
31 public:
32 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
34 std::string VisitBinaryOperator(const BinaryOperator *BO) {
35 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
38 std::string VisitUnaryOperator(const UnaryOperator *UO) {
39 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")";
42 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
43 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
49 static std::string getDREAncestorString(const DeclRefExpr *DRE,
50 ASTContext &Ctx) {
51 std::stringstream SS;
52 const Stmt *St = DRE;
53 StmtDebugPrinter StmtPriner;
55 do {
56 SS << StmtPriner.Visit(St);
58 DynTypedNodeList StParents = Ctx.getParents(*St);
60 if (StParents.size() > 1)
61 return "unavailable due to multiple parents";
62 if (StParents.size() == 0)
63 break;
64 St = StParents.begin()->get<Stmt>();
65 if (St)
66 SS << " ==> ";
67 } while (St);
68 return SS.str();
70 } // namespace
71 #endif /* NDEBUG */
73 namespace clang::ast_matchers {
74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
75 // except for those belonging to a different callable of "n".
76 class MatchDescendantVisitor
77 : public RecursiveASTVisitor<MatchDescendantVisitor> {
78 public:
79 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase;
81 // Creates an AST visitor that matches `Matcher` on all
82 // descendants of a given node "n" except for the ones
83 // belonging to a different callable of "n".
84 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
85 internal::ASTMatchFinder *Finder,
86 internal::BoundNodesTreeBuilder *Builder,
87 internal::ASTMatchFinder::BindKind Bind,
88 const bool ignoreUnevaluatedContext)
89 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {}
92 // Returns true if a match is found in a subtree of `DynNode`, which belongs
93 // to the same callable of `DynNode`.
94 bool findMatch(const DynTypedNode &DynNode) {
95 Matches = false;
96 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
97 TraverseStmt(const_cast<Stmt *>(StmtNode));
98 *Builder = ResultBindings;
99 return Matches;
101 return false;
104 // The following are overriding methods from the base visitor class.
105 // They are public only to allow CRTP to work. They are *not *part
106 // of the public API of this class.
108 // For the matchers so far used in safe buffers, we only need to match
109 // `Stmt`s. To override more as needed.
111 bool TraverseDecl(Decl *Node) {
112 if (!Node)
113 return true;
114 if (!match(*Node))
115 return false;
116 // To skip callables:
117 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node))
118 return true;
119 // Traverse descendants
120 return VisitorBase::TraverseDecl(Node);
123 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) {
124 // These are unevaluated, except the result expression.
125 if(ignoreUnevaluatedContext)
126 return TraverseStmt(Node->getResultExpr());
127 return VisitorBase::TraverseGenericSelectionExpr(Node);
130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) {
131 // Unevaluated context.
132 if(ignoreUnevaluatedContext)
133 return true;
134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) {
138 // Unevaluated context.
139 if(ignoreUnevaluatedContext)
140 return true;
141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) {
145 // Unevaluated context.
146 if(ignoreUnevaluatedContext)
147 return true;
148 return VisitorBase::TraverseDecltypeTypeLoc(Node);
151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) {
152 // Unevaluated context.
153 if(ignoreUnevaluatedContext)
154 return true;
155 return VisitorBase::TraverseCXXNoexceptExpr(Node);
158 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) {
159 // Unevaluated context.
160 if(ignoreUnevaluatedContext)
161 return true;
162 return VisitorBase::TraverseCXXTypeidExpr(Node);
165 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
166 if (!Node)
167 return true;
168 if (!match(*Node))
169 return false;
170 return VisitorBase::TraverseStmt(Node);
173 bool shouldVisitTemplateInstantiations() const { return true; }
174 bool shouldVisitImplicitCode() const {
175 // TODO: let's ignore implicit code for now
176 return false;
179 private:
180 // Sets 'Matched' to true if 'Matcher' matches 'Node'
182 // Returns 'true' if traversal should continue after this function
183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
184 template <typename T> bool match(const T &Node) {
185 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
187 if (Matcher->matches(DynTypedNode::create(Node), Finder,
188 &RecursiveBuilder)) {
189 ResultBindings.addMatch(RecursiveBuilder);
190 Matches = true;
191 if (Bind != internal::ASTMatchFinder::BK_All)
192 return false; // Abort as soon as a match is found.
194 return true;
197 const internal::DynTypedMatcher *const Matcher;
198 internal::ASTMatchFinder *const Finder;
199 internal::BoundNodesTreeBuilder *const Builder;
200 internal::BoundNodesTreeBuilder ResultBindings;
201 const internal::ASTMatchFinder::BindKind Bind;
202 bool Matches;
203 bool ignoreUnevaluatedContext;
206 // Because we're dealing with raw pointers, let's define what we mean by that.
207 static auto hasPointerType() {
208 return hasType(hasCanonicalType(pointerType()));
211 static auto hasArrayType() {
212 return hasType(hasCanonicalType(arrayType()));
215 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) {
216 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
218 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
219 return Visitor.findMatch(DynTypedNode::create(Node));
222 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) {
223 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
225 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
226 return Visitor.findMatch(DynTypedNode::create(Node));
229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
230 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
231 Handler) {
232 return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
235 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
236 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
239 // Matches a `UnaryOperator` whose operator is pre-increment:
240 AST_MATCHER(UnaryOperator, isPreInc) {
241 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc;
244 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
245 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
246 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
247 // clang-format off
248 return
249 expr(anyOf(
250 implicitCastExpr(
251 hasCastKind(CastKind::CK_LValueToRValue),
252 castSubExpr(innerMatcher)),
253 binaryOperator(
254 hasAnyOperatorName("="),
255 hasLHS(innerMatcher)
258 // clang-format on
262 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
264 static internal::Matcher<Stmt>
265 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
266 // A UPC can be
267 // 1. an argument of a function call (except the callee has [[unsafe_...]]
268 // attribute), or
269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
270 // 3. the operand of a comparator operation; or
271 // 4. the operand of a pointer subtraction operation
272 // (i.e., computing the distance between two pointers); or ...
274 auto CallArgMatcher =
275 callExpr(forEachArgumentWithParam(InnerMatcher,
276 hasPointerType() /* array also decays to pointer type*/),
277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
279 auto CastOperandMatcher =
280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
281 hasCastKind(CastKind::CK_PointerToBoolean)),
282 castSubExpr(allOf(hasPointerType(), InnerMatcher)));
284 auto CompOperandMatcher =
285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)),
287 hasRHS(allOf(hasPointerType(), InnerMatcher))));
289 // A matcher that matches pointer subtractions:
290 auto PtrSubtractionMatcher =
291 binaryOperator(hasOperatorName("-"),
292 // Note that here we need both LHS and RHS to be
293 // pointer. Then the inner matcher can match any of
294 // them:
295 allOf(hasLHS(hasPointerType()),
296 hasRHS(hasPointerType())),
297 eachOf(hasLHS(InnerMatcher),
298 hasRHS(InnerMatcher)));
300 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
301 PtrSubtractionMatcher));
302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
303 // don't have to check that.)
306 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
308 // 'e' isn't evaluated to an RValue). For example, consider the following code:
309 // int *p = new int[4];
310 // int *q = new int[4];
311 // if ((p = q)) {}
312 // p = q;
313 // The expression `p = q` in the conditional of the `if` statement
314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
315 // in the assignment statement is in an untyped context.
316 static internal::Matcher<Stmt>
317 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
318 // An unspecified context can be
319 // 1. A compound statement,
320 // 2. The body of an if statement
321 // 3. Body of a loop
322 auto CompStmt = compoundStmt(forEach(InnerMatcher));
323 auto IfStmtThen = ifStmt(hasThen(InnerMatcher));
324 auto IfStmtElse = ifStmt(hasElse(InnerMatcher));
325 // FIXME: Handle loop bodies.
326 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
328 } // namespace clang::ast_matchers
330 namespace {
331 // Because the analysis revolves around variables and their types, we'll need to
332 // track uses of variables (aka DeclRefExprs).
333 using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
335 // Convenience typedef.
336 using FixItList = SmallVector<FixItHint, 4>;
338 // Defined below.
339 class Strategy;
340 } // namespace
342 namespace {
343 /// Gadget is an individual operation in the code that may be of interest to
344 /// this analysis. Each (non-abstract) subclass corresponds to a specific
345 /// rigid AST structure that constitutes an operation on a pointer-type object.
346 /// Discovery of a gadget in the code corresponds to claiming that we understand
347 /// what this part of code is doing well enough to potentially improve it.
348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
349 /// always deserving a warning per se, but requires our attention to identify
350 /// it warrants a fixit).
351 class Gadget {
352 public:
353 enum class Kind {
354 #define GADGET(x) x,
355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
358 /// Common type of ASTMatchers used for discovering gadgets.
359 /// Useful for implementing the static matcher() methods
360 /// that are expected from all non-abstract subclasses.
361 using Matcher = decltype(stmt());
363 Gadget(Kind K) : K(K) {}
365 Kind getKind() const { return K; }
367 #ifndef NDEBUG
368 StringRef getDebugName() const {
369 switch (K) {
370 #define GADGET(x) case Kind::x: return #x;
371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
373 llvm_unreachable("Unhandled Gadget::Kind enum");
375 #endif
377 virtual bool isWarningGadget() const = 0;
378 virtual const Stmt *getBaseStmt() const = 0;
380 /// Returns the list of pointer-type variables on which this gadget performs
381 /// its operation. Typically, there's only one variable. This isn't a list
382 /// of all DeclRefExprs in the gadget's AST!
383 virtual DeclUseList getClaimedVarUseSites() const = 0;
385 virtual ~Gadget() = default;
387 private:
388 Kind K;
392 /// Warning gadgets correspond to unsafe code patterns that warrants
393 /// an immediate warning.
394 class WarningGadget : public Gadget {
395 public:
396 WarningGadget(Kind K) : Gadget(K) {}
398 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
399 bool isWarningGadget() const final { return true; }
402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
404 /// variable is replaced by a safe C++ container, every use of such variable must be
405 /// carefully considered and possibly updated.
406 class FixableGadget : public Gadget {
407 public:
408 FixableGadget(Kind K) : Gadget(K) {}
410 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
411 bool isWarningGadget() const final { return false; }
413 /// Returns a fixit that would fix the current gadget according to
414 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
415 /// returns an empty list if no fixes are necessary.
416 virtual std::optional<FixItList> getFixits(const Strategy &) const {
417 return std::nullopt;
420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
421 /// statement and the second element is the RHS. This two-element list represents the fact that
422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
423 /// later to group all those variables whose types must be modified together to prevent type
424 /// mismatches.
425 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
426 getStrategyImplications() const {
427 return std::nullopt;
431 static auto toSupportedVariable() {
432 return to(varDecl());
435 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
436 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
438 /// An increment of a pointer-type value is unsafe as it may run the pointer
439 /// out of bounds.
440 class IncrementGadget : public WarningGadget {
441 static constexpr const char *const OpTag = "op";
442 const UnaryOperator *Op;
444 public:
445 IncrementGadget(const MatchFinder::MatchResult &Result)
446 : WarningGadget(Kind::Increment),
447 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
449 static bool classof(const Gadget *G) {
450 return G->getKind() == Kind::Increment;
453 static Matcher matcher() {
454 return stmt(unaryOperator(
455 hasOperatorName("++"),
456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
457 ).bind(OpTag));
460 const UnaryOperator *getBaseStmt() const override { return Op; }
462 DeclUseList getClaimedVarUseSites() const override {
463 SmallVector<const DeclRefExpr *, 2> Uses;
464 if (const auto *DRE =
465 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
466 Uses.push_back(DRE);
469 return std::move(Uses);
473 /// A decrement of a pointer-type value is unsafe as it may run the pointer
474 /// out of bounds.
475 class DecrementGadget : public WarningGadget {
476 static constexpr const char *const OpTag = "op";
477 const UnaryOperator *Op;
479 public:
480 DecrementGadget(const MatchFinder::MatchResult &Result)
481 : WarningGadget(Kind::Decrement),
482 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
484 static bool classof(const Gadget *G) {
485 return G->getKind() == Kind::Decrement;
488 static Matcher matcher() {
489 return stmt(unaryOperator(
490 hasOperatorName("--"),
491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
492 ).bind(OpTag));
495 const UnaryOperator *getBaseStmt() const override { return Op; }
497 DeclUseList getClaimedVarUseSites() const override {
498 if (const auto *DRE =
499 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
500 return {DRE};
503 return {};
507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
508 /// it doesn't have any bounds checks for the array.
509 class ArraySubscriptGadget : public WarningGadget {
510 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
511 const ArraySubscriptExpr *ASE;
513 public:
514 ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
515 : WarningGadget(Kind::ArraySubscript),
516 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {}
518 static bool classof(const Gadget *G) {
519 return G->getKind() == Kind::ArraySubscript;
522 static Matcher matcher() {
523 // FIXME: What if the index is integer literal 0? Should this be
524 // a safe gadget in this case?
525 // clang-format off
526 return stmt(arraySubscriptExpr(
527 hasBase(ignoringParenImpCasts(
528 anyOf(hasPointerType(), hasArrayType()))),
529 unless(hasIndex(
530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
532 .bind(ArraySubscrTag));
533 // clang-format on
536 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
538 DeclUseList getClaimedVarUseSites() const override {
539 if (const auto *DRE =
540 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
541 return {DRE};
544 return {};
548 /// A pointer arithmetic expression of one of the forms:
549 /// \code
550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
551 /// \endcode
552 class PointerArithmeticGadget : public WarningGadget {
553 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
554 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
555 const BinaryOperator *PA; // pointer arithmetic expression
556 const Expr *Ptr; // the pointer expression in `PA`
558 public:
559 PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
560 : WarningGadget(Kind::PointerArithmetic),
561 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)),
562 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {}
564 static bool classof(const Gadget *G) {
565 return G->getKind() == Kind::PointerArithmetic;
568 static Matcher matcher() {
569 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType()));
570 auto PtrAtRight =
571 allOf(hasOperatorName("+"),
572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
573 hasLHS(HasIntegerType));
574 auto PtrAtLeft =
575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
576 hasOperatorName("+="), hasOperatorName("-=")),
577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
578 hasRHS(HasIntegerType));
580 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight))
581 .bind(PointerArithmeticTag));
584 const Stmt *getBaseStmt() const override { return PA; }
586 DeclUseList getClaimedVarUseSites() const override {
587 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
588 return {DRE};
591 return {};
593 // FIXME: pointer adding zero should be fine
594 // FIXME: this gadge will need a fix-it
597 /// A pointer initialization expression of the form:
598 /// \code
599 /// int *p = q;
600 /// \endcode
601 class PointerInitGadget : public FixableGadget {
602 private:
603 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
604 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
605 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI`
606 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI`
608 public:
609 PointerInitGadget(const MatchFinder::MatchResult &Result)
610 : FixableGadget(Kind::PointerInit),
611 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)),
612 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {}
614 static bool classof(const Gadget *G) {
615 return G->getKind() == Kind::PointerInit;
618 static Matcher matcher() {
619 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl(
620 hasInitializer(ignoringImpCasts(declRefExpr(
621 hasPointerType(),
622 toSupportedVariable()).
623 bind(PointerInitRHSTag)))).
624 bind(PointerInitLHSTag)));
626 return stmt(PtrInitStmt);
629 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
631 virtual const Stmt *getBaseStmt() const override {
632 // FIXME: This needs to be the entire DeclStmt, assuming that this method
633 // makes sense at all on a FixableGadget.
634 return PtrInitRHS;
637 virtual DeclUseList getClaimedVarUseSites() const override {
638 return DeclUseList{PtrInitRHS};
641 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
642 getStrategyImplications() const override {
643 return std::make_pair(PtrInitLHS,
644 cast<VarDecl>(PtrInitRHS->getDecl()));
648 /// A pointer assignment expression of the form:
649 /// \code
650 /// p = q;
651 /// \endcode
652 class PointerAssignmentGadget : public FixableGadget {
653 private:
654 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
655 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
656 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA`
657 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA`
659 public:
660 PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
661 : FixableGadget(Kind::PointerAssignment),
662 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
663 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
665 static bool classof(const Gadget *G) {
666 return G->getKind() == Kind::PointerAssignment;
669 static Matcher matcher() {
670 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
672 toSupportedVariable()).
673 bind(PointerAssignRHSTag))),
674 hasLHS(declRefExpr(hasPointerType(),
675 toSupportedVariable()).
676 bind(PointerAssignLHSTag))));
678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
683 virtual const Stmt *getBaseStmt() const override {
684 // FIXME: This should be the binary operator, assuming that this method
685 // makes sense at all on a FixableGadget.
686 return PtrLHS;
689 virtual DeclUseList getClaimedVarUseSites() const override {
690 return DeclUseList{PtrLHS, PtrRHS};
693 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
694 getStrategyImplications() const override {
695 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()),
696 cast<VarDecl>(PtrRHS->getDecl()));
700 /// A call of a function or method that performs unchecked buffer operations
701 /// over one of its pointer parameters.
702 class UnsafeBufferUsageAttrGadget : public WarningGadget {
703 constexpr static const char *const OpTag = "call_expr";
704 const CallExpr *Op;
706 public:
707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
708 : WarningGadget(Kind::UnsafeBufferUsageAttr),
709 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {}
711 static bool classof(const Gadget *G) {
712 return G->getKind() == Kind::UnsafeBufferUsageAttr;
715 static Matcher matcher() {
716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
717 .bind(OpTag));
719 const Stmt *getBaseStmt() const override { return Op; }
721 DeclUseList getClaimedVarUseSites() const override { return {}; }
724 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
725 // Context (see `isInUnspecifiedLvalueContext`).
726 // Note here `[]` is the built-in subscript operator.
727 class ULCArraySubscriptGadget : public FixableGadget {
728 private:
729 static constexpr const char *const ULCArraySubscriptTag =
730 "ArraySubscriptUnderULC";
731 const ArraySubscriptExpr *Node;
733 public:
734 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
735 : FixableGadget(Kind::ULCArraySubscript),
736 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
737 assert(Node != nullptr && "Expecting a non-null matching result");
740 static bool classof(const Gadget *G) {
741 return G->getKind() == Kind::ULCArraySubscript;
744 static Matcher matcher() {
745 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
746 auto BaseIsArrayOrPtrDRE =
747 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr,
748 toSupportedVariable())));
749 auto Target =
750 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
752 return expr(isInUnspecifiedLvalueContext(Target));
755 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
757 virtual const Stmt *getBaseStmt() const override { return Node; }
759 virtual DeclUseList getClaimedVarUseSites() const override {
760 if (const auto *DRE =
761 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
762 return {DRE};
764 return {};
768 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
769 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
770 // fixit of the form `UPC(DRE.data())`.
771 class UPCStandalonePointerGadget : public FixableGadget {
772 private:
773 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
774 const DeclRefExpr *Node;
776 public:
777 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result)
778 : FixableGadget(Kind::UPCStandalonePointer),
779 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) {
780 assert(Node != nullptr && "Expecting a non-null matching result");
783 static bool classof(const Gadget *G) {
784 return G->getKind() == Kind::UPCStandalonePointer;
787 static Matcher matcher() {
788 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
789 auto target = expr(
790 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr,
791 toSupportedVariable())).bind(DeclRefExprTag)));
792 return stmt(isInUnspecifiedPointerContext(target));
795 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
797 virtual const Stmt *getBaseStmt() const override { return Node; }
799 virtual DeclUseList getClaimedVarUseSites() const override {
800 return {Node};
804 class PointerDereferenceGadget : public FixableGadget {
805 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
806 static constexpr const char *const OperatorTag = "op";
808 const DeclRefExpr *BaseDeclRefExpr = nullptr;
809 const UnaryOperator *Op = nullptr;
811 public:
812 PointerDereferenceGadget(const MatchFinder::MatchResult &Result)
813 : FixableGadget(Kind::PointerDereference),
814 BaseDeclRefExpr(
815 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
816 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {}
818 static bool classof(const Gadget *G) {
819 return G->getKind() == Kind::PointerDereference;
822 static Matcher matcher() {
823 auto Target =
824 unaryOperator(
825 hasOperatorName("*"),
826 has(expr(ignoringParenImpCasts(
827 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag)))))
828 .bind(OperatorTag);
830 return expr(isInUnspecifiedLvalueContext(Target));
833 DeclUseList getClaimedVarUseSites() const override {
834 return {BaseDeclRefExpr};
837 virtual const Stmt *getBaseStmt() const final { return Op; }
839 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
842 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
843 // Context (see `isInUnspecifiedPointerContext`).
844 // Note here `[]` is the built-in subscript operator.
845 class UPCAddressofArraySubscriptGadget : public FixableGadget {
846 private:
847 static constexpr const char *const UPCAddressofArraySubscriptTag =
848 "AddressofArraySubscriptUnderUPC";
849 const UnaryOperator *Node; // the `&DRE[any]` node
851 public:
852 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
853 : FixableGadget(Kind::ULCArraySubscript),
854 Node(Result.Nodes.getNodeAs<UnaryOperator>(
855 UPCAddressofArraySubscriptTag)) {
856 assert(Node != nullptr && "Expecting a non-null matching result");
859 static bool classof(const Gadget *G) {
860 return G->getKind() == Kind::UPCAddressofArraySubscript;
863 static Matcher matcher() {
864 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
865 unaryOperator(hasOperatorName("&"),
866 hasUnaryOperand(arraySubscriptExpr(
867 hasBase(ignoringParenImpCasts(declRefExpr(
868 toSupportedVariable()))))))
869 .bind(UPCAddressofArraySubscriptTag)))));
872 virtual std::optional<FixItList> getFixits(const Strategy &) const override;
874 virtual const Stmt *getBaseStmt() const override { return Node; }
876 virtual DeclUseList getClaimedVarUseSites() const override {
877 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
878 const auto *DRE =
879 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
880 return {DRE};
883 } // namespace
885 namespace {
886 // An auxiliary tracking facility for the fixit analysis. It helps connect
887 // declarations to its uses and make sure we've covered all uses with our
888 // analysis before we try to fix the declaration.
889 class DeclUseTracker {
890 using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
891 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
893 // Allocate on the heap for easier move.
894 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
895 DefMapTy Defs{};
897 public:
898 DeclUseTracker() = default;
899 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
900 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
901 DeclUseTracker(DeclUseTracker &&) = default;
902 DeclUseTracker &operator=(DeclUseTracker &&) = default;
904 // Start tracking a freshly discovered DRE.
905 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
907 // Stop tracking the DRE as it's been fully figured out.
908 void claimUse(const DeclRefExpr *DRE) {
909 assert(Uses->count(DRE) &&
910 "DRE not found or claimed by multiple matchers!");
911 Uses->erase(DRE);
914 // A variable is unclaimed if at least one use is unclaimed.
915 bool hasUnclaimedUses(const VarDecl *VD) const {
916 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
917 return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
918 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
922 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
923 UseSetTy ReturnSet;
924 for (auto use : *Uses) {
925 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
926 ReturnSet.insert(use);
929 return ReturnSet;
932 void discoverDecl(const DeclStmt *DS) {
933 for (const Decl *D : DS->decls()) {
934 if (const auto *VD = dyn_cast<VarDecl>(D)) {
935 // FIXME: Assertion temporarily disabled due to a bug in
936 // ASTMatcher internal behavior in presence of GNU
937 // statement-expressions. We need to properly investigate this
938 // because it can screw up our algorithm in other ways.
939 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
940 Defs[VD] = DS;
945 const DeclStmt *lookupDecl(const VarDecl *VD) const {
946 return Defs.lookup(VD);
949 } // namespace
951 namespace {
952 // Strategy is a map from variables to the way we plan to emit fixes for
953 // these variables. It is figured out gradually by trying different fixes
954 // for different variables depending on gadgets in which these variables
955 // participate.
956 class Strategy {
957 public:
958 enum class Kind {
959 Wontfix, // We don't plan to emit a fixit for this variable.
960 Span, // We recommend replacing the variable with std::span.
961 Iterator, // We recommend replacing the variable with std::span::iterator.
962 Array, // We recommend replacing the variable with std::array.
963 Vector // We recommend replacing the variable with std::vector.
966 private:
967 using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
969 MapTy Map;
971 public:
972 Strategy() = default;
973 Strategy(const Strategy &) = delete; // Let's avoid copies.
974 Strategy &operator=(const Strategy &) = delete;
975 Strategy(Strategy &&) = default;
976 Strategy &operator=(Strategy &&) = default;
978 void set(const VarDecl *VD, Kind K) { Map[VD] = K; }
980 Kind lookup(const VarDecl *VD) const {
981 auto I = Map.find(VD);
982 if (I == Map.end())
983 return Kind::Wontfix;
985 return I->second;
988 } // namespace
991 // Representing a pointer type expression of the form `++Ptr` in an Unspecified
992 // Pointer Context (UPC):
993 class UPCPreIncrementGadget : public FixableGadget {
994 private:
995 static constexpr const char *const UPCPreIncrementTag =
996 "PointerPreIncrementUnderUPC";
997 const UnaryOperator *Node; // the `++Ptr` node
999 public:
1000 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
1001 : FixableGadget(Kind::UPCPreIncrement),
1002 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) {
1003 assert(Node != nullptr && "Expecting a non-null matching result");
1006 static bool classof(const Gadget *G) {
1007 return G->getKind() == Kind::UPCPreIncrement;
1010 static Matcher matcher() {
1011 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1012 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1013 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1014 // things right.
1015 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1016 unaryOperator(isPreInc(),
1017 hasUnaryOperand(declRefExpr(
1018 toSupportedVariable()))
1019 ).bind(UPCPreIncrementTag)))));
1022 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
1024 virtual const Stmt *getBaseStmt() const override { return Node; }
1026 virtual DeclUseList getClaimedVarUseSites() const override {
1027 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())};
1031 // Representing a pointer type expression of the form `Ptr += n` in an
1032 // Unspecified Untyped Context (UUC):
1033 class UUCAddAssignGadget : public FixableGadget {
1034 private:
1035 static constexpr const char *const UUCAddAssignTag =
1036 "PointerAddAssignUnderUUC";
1037 static constexpr const char *const OffsetTag = "Offset";
1039 const BinaryOperator *Node; // the `Ptr += n` node
1040 const Expr *Offset = nullptr;
1042 public:
1043 UUCAddAssignGadget(const MatchFinder::MatchResult &Result)
1044 : FixableGadget(Kind::UUCAddAssign),
1045 Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)),
1046 Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) {
1047 assert(Node != nullptr && "Expecting a non-null matching result");
1050 static bool classof(const Gadget *G) {
1051 return G->getKind() == Kind::UUCAddAssign;
1054 static Matcher matcher() {
1055 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts(
1056 binaryOperator(hasOperatorName("+="),
1057 hasLHS(declRefExpr(toSupportedVariable())),
1058 hasRHS(expr().bind(OffsetTag)))
1059 .bind(UUCAddAssignTag)))));
1062 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
1064 virtual const Stmt *getBaseStmt() const override { return Node; }
1066 virtual DeclUseList getClaimedVarUseSites() const override {
1067 return {dyn_cast<DeclRefExpr>(Node->getLHS())};
1071 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1072 // ptr)`:
1073 class DerefSimplePtrArithFixableGadget : public FixableGadget {
1074 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
1075 static constexpr const char *const DerefOpTag = "DerefOp";
1076 static constexpr const char *const AddOpTag = "AddOp";
1077 static constexpr const char *const OffsetTag = "Offset";
1079 const DeclRefExpr *BaseDeclRefExpr = nullptr;
1080 const UnaryOperator *DerefOp = nullptr;
1081 const BinaryOperator *AddOp = nullptr;
1082 const IntegerLiteral *Offset = nullptr;
1084 public:
1085 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result)
1086 : FixableGadget(Kind::DerefSimplePtrArithFixable),
1087 BaseDeclRefExpr(
1088 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
1089 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)),
1090 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)),
1091 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {}
1093 static Matcher matcher() {
1094 // clang-format off
1095 auto ThePtr = expr(hasPointerType(),
1096 ignoringImpCasts(declRefExpr(toSupportedVariable()).
1097 bind(BaseDeclRefExprTag)));
1098 auto PlusOverPtrAndInteger = expr(anyOf(
1099 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr),
1100 hasRHS(integerLiteral().bind(OffsetTag)))
1101 .bind(AddOpTag),
1102 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr),
1103 hasLHS(integerLiteral().bind(OffsetTag)))
1104 .bind(AddOpTag)));
1105 return isInUnspecifiedLvalueContext(unaryOperator(
1106 hasOperatorName("*"),
1107 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger)))
1108 .bind(DerefOpTag));
1109 // clang-format on
1112 virtual std::optional<FixItList> getFixits(const Strategy &s) const final;
1114 // TODO remove this method from FixableGadget interface
1115 virtual const Stmt *getBaseStmt() const final { return nullptr; }
1117 virtual DeclUseList getClaimedVarUseSites() const final {
1118 return {BaseDeclRefExpr};
1122 /// Scan the function and return a list of gadgets found with provided kits.
1123 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
1124 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
1125 bool EmitSuggestions) {
1127 struct GadgetFinderCallback : MatchFinder::MatchCallback {
1128 FixableGadgetList FixableGadgets;
1129 WarningGadgetList WarningGadgets;
1130 DeclUseTracker Tracker;
1132 void run(const MatchFinder::MatchResult &Result) override {
1133 // In debug mode, assert that we've found exactly one gadget.
1134 // This helps us avoid conflicts in .bind() tags.
1135 #if NDEBUG
1136 #define NEXT return
1137 #else
1138 [[maybe_unused]] int numFound = 0;
1139 #define NEXT ++numFound
1140 #endif
1142 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
1143 Tracker.discoverUse(DRE);
1144 NEXT;
1147 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
1148 Tracker.discoverDecl(DS);
1149 NEXT;
1152 // Figure out which matcher we've found, and call the appropriate
1153 // subclass constructor.
1154 // FIXME: Can we do this more logarithmically?
1155 #define FIXABLE_GADGET(name) \
1156 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1157 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1158 NEXT; \
1160 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1161 #define WARNING_GADGET(name) \
1162 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1163 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1164 NEXT; \
1166 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1168 assert(numFound >= 1 && "Gadgets not found in match result!");
1169 assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
1173 MatchFinder M;
1174 GadgetFinderCallback CB;
1176 // clang-format off
1177 M.addMatcher(
1178 stmt(
1179 forEachDescendantEvaluatedStmt(stmt(anyOf(
1180 // Add Gadget::matcher() for every gadget in the registry.
1181 #define WARNING_GADGET(x) \
1182 allOf(x ## Gadget::matcher().bind(#x), \
1183 notInSafeBufferOptOut(&Handler)),
1184 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1185 // Avoid a hanging comma.
1186 unless(stmt())
1191 // clang-format on
1193 if (EmitSuggestions) {
1194 // clang-format off
1195 M.addMatcher(
1196 stmt(
1197 forEachDescendantStmt(stmt(eachOf(
1198 #define FIXABLE_GADGET(x) \
1199 x ## Gadget::matcher().bind(#x),
1200 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1201 // In parallel, match all DeclRefExprs so that to find out
1202 // whether there are any uncovered by gadgets.
1203 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1204 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"),
1205 // Also match DeclStmts because we'll need them when fixing
1206 // their underlying VarDecls that otherwise don't have
1207 // any backreferences to DeclStmts.
1208 declStmt().bind("any_ds")
1213 // clang-format on
1216 M.match(*D->getBody(), D->getASTContext());
1217 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets),
1218 std::move(CB.Tracker)};
1221 // Compares AST nodes by source locations.
1222 template <typename NodeTy> struct CompareNode {
1223 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
1224 return N1->getBeginLoc().getRawEncoding() <
1225 N2->getBeginLoc().getRawEncoding();
1229 struct WarningGadgetSets {
1230 std::map<const VarDecl *, std::set<const WarningGadget *>,
1231 // To keep keys sorted by their locations in the map so that the
1232 // order is deterministic:
1233 CompareNode<VarDecl>>
1234 byVar;
1235 // These Gadgets are not related to pointer variables (e. g. temporaries).
1236 llvm::SmallVector<const WarningGadget *, 16> noVar;
1239 static WarningGadgetSets
1240 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
1241 WarningGadgetSets result;
1242 // If some gadgets cover more than one
1243 // variable, they'll appear more than once in the map.
1244 for (auto &G : AllUnsafeOperations) {
1245 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
1247 bool AssociatedWithVarDecl = false;
1248 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
1249 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1250 result.byVar[VD].insert(G.get());
1251 AssociatedWithVarDecl = true;
1255 if (!AssociatedWithVarDecl) {
1256 result.noVar.push_back(G.get());
1257 continue;
1260 return result;
1263 struct FixableGadgetSets {
1264 std::map<const VarDecl *, std::set<const FixableGadget *>,
1265 // To keep keys sorted by their locations in the map so that the
1266 // order is deterministic:
1267 CompareNode<VarDecl>>
1268 byVar;
1271 static FixableGadgetSets
1272 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
1273 FixableGadgetSets FixablesForUnsafeVars;
1274 for (auto &F : AllFixableOperations) {
1275 DeclUseList DREs = F->getClaimedVarUseSites();
1277 for (const DeclRefExpr *DRE : DREs) {
1278 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1279 FixablesForUnsafeVars.byVar[VD].insert(F.get());
1283 return FixablesForUnsafeVars;
1286 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
1287 const SourceManager &SM) {
1288 // A simple interval overlap detection algorithm. Sorts all ranges by their
1289 // begin location then finds the first overlap in one pass.
1290 std::vector<const FixItHint *> All; // a copy of `FixIts`
1292 for (const FixItHint &H : FixIts)
1293 All.push_back(&H);
1294 std::sort(All.begin(), All.end(),
1295 [&SM](const FixItHint *H1, const FixItHint *H2) {
1296 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(),
1297 H2->RemoveRange.getBegin());
1300 const FixItHint *CurrHint = nullptr;
1302 for (const FixItHint *Hint : All) {
1303 if (!CurrHint ||
1304 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(),
1305 Hint->RemoveRange.getBegin())) {
1306 // Either to initialize `CurrHint` or `CurrHint` does not
1307 // overlap with `Hint`:
1308 CurrHint = Hint;
1309 } else
1310 // In case `Hint` overlaps the `CurrHint`, we found at least one
1311 // conflict:
1312 return true;
1314 return false;
1317 std::optional<FixItList>
1318 PointerAssignmentGadget::getFixits(const Strategy &S) const {
1319 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
1320 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
1321 switch (S.lookup(LeftVD)) {
1322 case Strategy::Kind::Span:
1323 if (S.lookup(RightVD) == Strategy::Kind::Span)
1324 return FixItList{};
1325 return std::nullopt;
1326 case Strategy::Kind::Wontfix:
1327 return std::nullopt;
1328 case Strategy::Kind::Iterator:
1329 case Strategy::Kind::Array:
1330 case Strategy::Kind::Vector:
1331 llvm_unreachable("unsupported strategies for FixableGadgets");
1333 return std::nullopt;
1336 std::optional<FixItList>
1337 PointerInitGadget::getFixits(const Strategy &S) const {
1338 const auto *LeftVD = PtrInitLHS;
1339 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl());
1340 switch (S.lookup(LeftVD)) {
1341 case Strategy::Kind::Span:
1342 if (S.lookup(RightVD) == Strategy::Kind::Span)
1343 return FixItList{};
1344 return std::nullopt;
1345 case Strategy::Kind::Wontfix:
1346 return std::nullopt;
1347 case Strategy::Kind::Iterator:
1348 case Strategy::Kind::Array:
1349 case Strategy::Kind::Vector:
1350 llvm_unreachable("unsupported strategies for FixableGadgets");
1352 return std::nullopt;
1355 static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
1356 const ASTContext &Ctx) {
1357 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) {
1358 if (ConstVal->isNegative())
1359 return false;
1360 } else if (!Expr->getType()->isUnsignedIntegerType())
1361 return false;
1362 return true;
1365 std::optional<FixItList>
1366 ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
1367 if (const auto *DRE =
1368 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
1369 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1370 switch (S.lookup(VD)) {
1371 case Strategy::Kind::Span: {
1373 // If the index has a negative constant value, we give up as no valid
1374 // fix-it can be generated:
1375 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
1376 VD->getASTContext();
1377 if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx))
1378 return std::nullopt;
1379 // no-op is a good fix-it, otherwise
1380 return FixItList{};
1382 case Strategy::Kind::Wontfix:
1383 case Strategy::Kind::Iterator:
1384 case Strategy::Kind::Array:
1385 case Strategy::Kind::Vector:
1386 llvm_unreachable("unsupported strategies for FixableGadgets");
1389 return std::nullopt;
1392 static std::optional<FixItList> // forward declaration
1393 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
1395 std::optional<FixItList>
1396 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
1397 auto DREs = getClaimedVarUseSites();
1398 const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
1400 switch (S.lookup(VD)) {
1401 case Strategy::Kind::Span:
1402 return fixUPCAddressofArraySubscriptWithSpan(Node);
1403 case Strategy::Kind::Wontfix:
1404 case Strategy::Kind::Iterator:
1405 case Strategy::Kind::Array:
1406 case Strategy::Kind::Vector:
1407 llvm_unreachable("unsupported strategies for FixableGadgets");
1409 return std::nullopt; // something went wrong, no fix-it
1412 // FIXME: this function should be customizable through format
1413 static StringRef getEndOfLine() {
1414 static const char *const EOL = "\n";
1415 return EOL;
1418 // Returns the text indicating that the user needs to provide input there:
1419 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
1420 std::string s = std::string("<# ");
1421 s += HintTextToUser;
1422 s += " #>";
1423 return s;
1426 // Return the text representation of the given `APInt Val`:
1427 static std::string getAPIntText(APInt Val) {
1428 SmallVector<char> Txt;
1429 Val.toString(Txt, 10, true);
1430 // APInt::toString does not add '\0' to the end of the string for us:
1431 Txt.push_back('\0');
1432 return Txt.data();
1435 // Return the source location of the last character of the AST `Node`.
1436 template <typename NodeTy>
1437 static std::optional<SourceLocation>
1438 getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
1439 const LangOptions &LangOpts) {
1440 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1441 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1443 if (Loc.isValid())
1444 return Loc;
1446 return std::nullopt;
1449 // Return the source location just past the last character of the AST `Node`.
1450 template <typename NodeTy>
1451 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
1452 const SourceManager &SM,
1453 const LangOptions &LangOpts) {
1454 SourceLocation Loc =
1455 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1456 if (Loc.isValid())
1457 return Loc;
1458 return std::nullopt;
1461 // Return text representation of an `Expr`.
1462 static std::optional<StringRef> getExprText(const Expr *E,
1463 const SourceManager &SM,
1464 const LangOptions &LangOpts) {
1465 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts);
1467 if (LastCharLoc)
1468 return Lexer::getSourceText(
1469 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM,
1470 LangOpts);
1472 return std::nullopt;
1475 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
1476 static std::optional<StringRef> getRangeText(SourceRange SR,
1477 const SourceManager &SM,
1478 const LangOptions &LangOpts) {
1479 bool Invalid = false;
1480 CharSourceRange CSR = CharSourceRange::getCharRange(SR);
1481 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid);
1483 if (!Invalid)
1484 return Text;
1485 return std::nullopt;
1488 // Returns the begin location of the identifier of the given variable
1489 // declaration.
1490 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) {
1491 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
1492 // returns the begin location of the identifier of the declaration:
1493 return VD->getLocation();
1496 // Returns the literal text of the identifier of the given variable declaration.
1497 static std::optional<StringRef>
1498 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM,
1499 const LangOptions &LangOpts) {
1500 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD);
1501 SourceLocation ParmIdentEndLoc =
1502 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts);
1504 if (ParmIdentEndLoc.isMacroID() &&
1505 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts))
1506 return std::nullopt;
1507 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts);
1510 // We cannot fix a variable declaration if it has some other specifiers than the
1511 // type specifier. Because the source ranges of those specifiers could overlap
1512 // with the source range that is being replaced using fix-its. Especially when
1513 // we often cannot obtain accurate source ranges of cv-qualified type
1514 // specifiers.
1515 // FIXME: also deal with type attributes
1516 static bool hasUnsupportedSpecifiers(const VarDecl *VD,
1517 const SourceManager &SM) {
1518 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
1519 // source range of `VD`:
1520 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool {
1521 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(),
1522 VD->getBeginLoc())) &&
1523 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(),
1524 At->getRange().getBegin()));
1526 return VD->isInlineSpecified() || VD->isConstexpr() ||
1527 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
1528 AttrRangeOverlapping;
1531 // Returns the `SourceRange` of `D`. The reason why this function exists is
1532 // that `D->getSourceRange()` may return a range where the end location is the
1533 // starting location of the last token. The end location of the source range
1534 // returned by this function is the last location of the last token.
1535 static SourceRange getSourceRangeToTokenEnd(const Decl *D,
1536 const SourceManager &SM,
1537 const LangOptions &LangOpts) {
1538 SourceLocation Begin = D->getBeginLoc();
1539 SourceLocation
1540 End = // `D->getEndLoc` should always return the starting location of the
1541 // last token, so we should get the end of the token
1542 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts);
1544 return SourceRange(Begin, End);
1547 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
1548 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
1549 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
1550 // :( ), `Qualifiers` of the pointee type is returned separately through the
1551 // output parameter `QualifiersToAppend`.
1552 static std::optional<std::string>
1553 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM,
1554 const LangOptions &LangOpts,
1555 std::optional<Qualifiers> *QualifiersToAppend) {
1556 QualType Ty = VD->getType();
1557 QualType PteTy;
1559 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() &&
1560 "Expecting a VarDecl of type of pointer to object type");
1561 PteTy = Ty->getPointeeType();
1563 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
1564 TypeLoc PteTyLoc;
1566 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
1567 // the `TypeLoc` of the pointee type:
1568 switch (TyLoc.getTypeLocClass()) {
1569 case TypeLoc::ConstantArray:
1570 case TypeLoc::IncompleteArray:
1571 case TypeLoc::VariableArray:
1572 case TypeLoc::DependentSizedArray:
1573 case TypeLoc::Decayed:
1574 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a "
1575 "pointer type unless it decays.");
1576 PteTyLoc = TyLoc.getNextTypeLoc();
1577 break;
1578 case TypeLoc::Pointer:
1579 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc();
1580 break;
1581 default:
1582 return std::nullopt;
1584 if (PteTyLoc.isNull())
1585 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
1586 // when the pointer type is `auto`.
1587 return std::nullopt;
1589 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD);
1591 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) {
1592 // We are expecting these locations to be valid. But in some cases, they are
1593 // not all valid. It is a Clang bug to me and we are not responsible for
1594 // fixing it. So we will just give up for now when it happens.
1595 return std::nullopt;
1598 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
1599 SourceLocation PteEndOfTokenLoc =
1600 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts);
1602 if (!PteEndOfTokenLoc.isValid())
1603 // Sometimes we cannot get the end location of the pointee type, e.g., when
1604 // there are macros involved.
1605 return std::nullopt;
1606 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) {
1607 // We only deal with the cases where the source text of the pointee type
1608 // appears on the left-hand side of the variable identifier completely,
1609 // including the following forms:
1610 // `T ident`,
1611 // `T ident[]`, where `T` is any type.
1612 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
1613 return std::nullopt;
1615 if (PteTy.hasQualifiers()) {
1616 // TypeLoc does not provide source ranges for qualifiers (it says it's
1617 // intentional but seems fishy to me), so we cannot get the full text
1618 // `PteTy` via source ranges.
1619 *QualifiersToAppend = PteTy.getQualifiers();
1621 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts)
1622 ->str();
1625 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
1626 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
1627 const SourceManager &SM,
1628 const LangOptions &LangOpts) {
1629 SourceLocation BeginLoc = FD->getQualifier()
1630 ? FD->getQualifierLoc().getBeginLoc()
1631 : FD->getNameInfo().getBeginLoc();
1632 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
1633 // last token:
1634 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
1635 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts);
1636 SourceRange NameRange{BeginLoc, EndLoc};
1638 return getRangeText(NameRange, SM, LangOpts);
1641 // Returns the text representing a `std::span` type where the element type is
1642 // represented by `EltTyText`.
1644 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers
1645 // explicitly if the element type needs to be qualified.
1646 static std::string
1647 getSpanTypeText(StringRef EltTyText,
1648 std::optional<Qualifiers> Quals = std::nullopt) {
1649 const char *const SpanOpen = "std::span<";
1651 if (Quals)
1652 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
1653 return SpanOpen + EltTyText.str() + '>';
1656 std::optional<FixItList>
1657 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const {
1658 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl());
1660 if (VD && s.lookup(VD) == Strategy::Kind::Span) {
1661 ASTContext &Ctx = VD->getASTContext();
1662 // std::span can't represent elements before its begin()
1663 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
1664 if (ConstVal->isNegative())
1665 return std::nullopt;
1667 // note that the expr may (oddly) has multiple layers of parens
1668 // example:
1669 // *((..(pointer + 123)..))
1670 // goal:
1671 // pointer[123]
1672 // Fix-It:
1673 // remove '*('
1674 // replace ' + ' with '['
1675 // replace ')' with ']'
1677 // example:
1678 // *((..(123 + pointer)..))
1679 // goal:
1680 // 123[pointer]
1681 // Fix-It:
1682 // remove '*('
1683 // replace ' + ' with '['
1684 // replace ')' with ']'
1686 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
1687 const SourceManager &SM = Ctx.getSourceManager();
1688 const LangOptions &LangOpts = Ctx.getLangOpts();
1689 CharSourceRange StarWithTrailWhitespace =
1690 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(),
1691 LHS->getBeginLoc());
1693 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts);
1694 if (!LHSLocation)
1695 return std::nullopt;
1697 CharSourceRange PlusWithSurroundingWhitespace =
1698 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
1700 std::optional<SourceLocation> AddOpLocation =
1701 getPastLoc(AddOp, SM, LangOpts);
1702 std::optional<SourceLocation> DerefOpLocation =
1703 getPastLoc(DerefOp, SM, LangOpts);
1705 if (!AddOpLocation || !DerefOpLocation)
1706 return std::nullopt;
1708 CharSourceRange ClosingParenWithPrecWhitespace =
1709 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation);
1711 return FixItList{
1712 {FixItHint::CreateRemoval(StarWithTrailWhitespace),
1713 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["),
1714 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}};
1716 return std::nullopt; // something wrong or unsupported, give up
1719 std::optional<FixItList>
1720 PointerDereferenceGadget::getFixits(const Strategy &S) const {
1721 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl());
1722 switch (S.lookup(VD)) {
1723 case Strategy::Kind::Span: {
1724 ASTContext &Ctx = VD->getASTContext();
1725 SourceManager &SM = Ctx.getSourceManager();
1726 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1727 // Deletes the *operand
1728 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
1729 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1));
1730 // Inserts the [0]
1731 if (auto LocPastOperand =
1732 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) {
1733 return FixItList{{FixItHint::CreateRemoval(derefRange),
1734 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}};
1736 break;
1738 case Strategy::Kind::Iterator:
1739 case Strategy::Kind::Array:
1740 case Strategy::Kind::Vector:
1741 llvm_unreachable("Strategy not implemented yet!");
1742 case Strategy::Kind::Wontfix:
1743 llvm_unreachable("Invalid strategy!");
1746 return std::nullopt;
1749 // Generates fix-its replacing an expression of the form UPC(DRE) with
1750 // `DRE.data()`
1751 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S)
1752 const {
1753 const auto VD = cast<VarDecl>(Node->getDecl());
1754 switch (S.lookup(VD)) {
1755 case Strategy::Kind::Span: {
1756 ASTContext &Ctx = VD->getASTContext();
1757 SourceManager &SM = Ctx.getSourceManager();
1758 // Inserts the .data() after the DRE
1759 std::optional<SourceLocation> EndOfOperand =
1760 getPastLoc(Node, SM, Ctx.getLangOpts());
1762 if (EndOfOperand)
1763 return FixItList{{FixItHint::CreateInsertion(
1764 *EndOfOperand, ".data()")}};
1765 // FIXME: Points inside a macro expansion.
1766 break;
1768 case Strategy::Kind::Wontfix:
1769 case Strategy::Kind::Iterator:
1770 case Strategy::Kind::Array:
1771 case Strategy::Kind::Vector:
1772 llvm_unreachable("unsupported strategies for FixableGadgets");
1775 return std::nullopt;
1778 // Generates fix-its replacing an expression of the form `&DRE[e]` with
1779 // `&DRE.data()[e]`:
1780 static std::optional<FixItList>
1781 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
1782 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
1783 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
1784 // FIXME: this `getASTContext` call is costly, we should pass the
1785 // ASTContext in:
1786 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
1787 const Expr *Idx = ArraySub->getIdx();
1788 const SourceManager &SM = Ctx.getSourceManager();
1789 const LangOptions &LangOpts = Ctx.getLangOpts();
1790 std::stringstream SS;
1791 bool IdxIsLitZero = false;
1793 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
1794 if ((*ICE).isZero())
1795 IdxIsLitZero = true;
1796 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
1797 if (!DreString)
1798 return std::nullopt;
1800 if (IdxIsLitZero) {
1801 // If the index is literal zero, we produce the most concise fix-it:
1802 SS << (*DreString).str() << ".data()";
1803 } else {
1804 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts);
1805 if (!IndexString)
1806 return std::nullopt;
1808 SS << "&" << (*DreString).str() << ".data()"
1809 << "[" << (*IndexString).str() << "]";
1811 return FixItList{
1812 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
1815 std::optional<FixItList>
1816 UUCAddAssignGadget::getFixits(const Strategy &S) const {
1817 DeclUseList DREs = getClaimedVarUseSites();
1819 if (DREs.size() != 1)
1820 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
1821 // give up
1822 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1823 if (S.lookup(VD) == Strategy::Kind::Span) {
1824 FixItList Fixes;
1826 const Stmt *AddAssignNode = getBaseStmt();
1827 StringRef varName = VD->getName();
1828 const ASTContext &Ctx = VD->getASTContext();
1830 if (!isNonNegativeIntegerExpr(Offset, VD, Ctx))
1831 return std::nullopt;
1833 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
1834 bool NotParenExpr =
1835 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc());
1836 std::string SS = varName.str() + " = " + varName.str() + ".subspan";
1837 if (NotParenExpr)
1838 SS += "(";
1840 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc(
1841 AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1842 if (!AddAssignLocation)
1843 return std::nullopt;
1845 Fixes.push_back(FixItHint::CreateReplacement(
1846 SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()),
1847 SS));
1848 if (NotParenExpr)
1849 Fixes.push_back(FixItHint::CreateInsertion(
1850 Offset->getEndLoc().getLocWithOffset(1), ")"));
1851 return Fixes;
1854 return std::nullopt; // Not in the cases that we can handle for now, give up.
1857 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const {
1858 DeclUseList DREs = getClaimedVarUseSites();
1860 if (DREs.size() != 1)
1861 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1862 // give up
1863 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1864 if (S.lookup(VD) == Strategy::Kind::Span) {
1865 FixItList Fixes;
1866 std::stringstream SS;
1867 const Stmt *PreIncNode = getBaseStmt();
1868 StringRef varName = VD->getName();
1869 const ASTContext &Ctx = VD->getASTContext();
1871 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1872 SS << "(" << varName.data() << " = " << varName.data()
1873 << ".subspan(1)).data()";
1874 std::optional<SourceLocation> PreIncLocation =
1875 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1876 if (!PreIncLocation)
1877 return std::nullopt;
1879 Fixes.push_back(FixItHint::CreateReplacement(
1880 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str()));
1881 return Fixes;
1884 return std::nullopt; // Not in the cases that we can handle for now, give up.
1888 // For a non-null initializer `Init` of `T *` type, this function returns
1889 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
1890 // to output stream.
1891 // In many cases, this function cannot figure out the actual extent `S`. It
1892 // then will use a place holder to replace `S` to ask users to fill `S` in. The
1893 // initializer shall be used to initialize a variable of type `std::span<T>`.
1895 // FIXME: Support multi-level pointers
1897 // Parameters:
1898 // `Init` a pointer to the initializer expression
1899 // `Ctx` a reference to the ASTContext
1900 static FixItList
1901 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
1902 const StringRef UserFillPlaceHolder) {
1903 const SourceManager &SM = Ctx.getSourceManager();
1904 const LangOptions &LangOpts = Ctx.getLangOpts();
1906 // If `Init` has a constant value that is (or equivalent to) a
1907 // NULL pointer, we use the default constructor to initialize the span
1908 // object, i.e., a `std:span` variable declaration with no initializer.
1909 // So the fix-it is just to remove the initializer.
1910 if (Init->isNullPointerConstant(Ctx,
1911 // FIXME: Why does this function not ask for `const ASTContext
1912 // &`? It should. Maybe worth an NFC patch later.
1913 Expr::NullPointerConstantValueDependence::
1914 NPC_ValueDependentIsNotNull)) {
1915 std::optional<SourceLocation> InitLocation =
1916 getEndCharLoc(Init, SM, LangOpts);
1917 if (!InitLocation)
1918 return {};
1920 SourceRange SR(Init->getBeginLoc(), *InitLocation);
1922 return {FixItHint::CreateRemoval(SR)};
1925 FixItList FixIts{};
1926 std::string ExtentText = UserFillPlaceHolder.data();
1927 StringRef One = "1";
1929 // Insert `{` before `Init`:
1930 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
1931 // Try to get the data extent. Break into different cases:
1932 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
1933 // In cases `Init` is `new T[n]` and there is no explicit cast over
1934 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1935 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
1936 // simpler for the case where `Init` is `new T`.
1937 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
1938 if (!Ext->HasSideEffects(Ctx)) {
1939 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts);
1940 if (!ExtentString)
1941 return {};
1942 ExtentText = *ExtentString;
1944 } else if (!CxxNew->isArray())
1945 // Although the initializer is not allocating a buffer, the pointer
1946 // variable could still be used in buffer access operations.
1947 ExtentText = One;
1948 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType(
1949 Init->IgnoreImpCasts()->getType())) {
1950 // In cases `Init` is of an array type after stripping off implicit casts,
1951 // the extent is the array size. Note that if the array size is not a
1952 // constant, we cannot use it as the extent.
1953 ExtentText = getAPIntText(CArrTy->getSize());
1954 } else {
1955 // In cases `Init` is of the form `&Var` after stripping of implicit
1956 // casts, where `&` is the built-in operator, the extent is 1.
1957 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
1958 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
1959 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
1960 ExtentText = One;
1961 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1962 // and explicit casting, etc. etc.
1965 SmallString<32> StrBuffer{};
1966 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts);
1968 if (!LocPassInit)
1969 return {};
1971 StrBuffer.append(", ");
1972 StrBuffer.append(ExtentText);
1973 StrBuffer.append("}");
1974 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str()));
1975 return FixIts;
1978 #ifndef NDEBUG
1979 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \
1980 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
1981 #else
1982 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
1983 #endif
1985 // For the given variable declaration with a pointer-to-T type, returns the text
1986 // `std::span<T>`. If it is unable to generate the text, returns
1987 // `std::nullopt`.
1988 static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD,
1989 const ASTContext &Ctx) {
1990 assert(VD->getType()->isPointerType());
1992 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
1993 std::optional<std::string> PteTyText = getPointeeTypeText(
1994 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
1996 if (!PteTyText)
1997 return std::nullopt;
1999 std::string SpanTyText = "std::span<";
2001 SpanTyText.append(*PteTyText);
2002 // Append qualifiers to span element type if any:
2003 if (PteTyQualifiers) {
2004 SpanTyText.append(" ");
2005 SpanTyText.append(PteTyQualifiers->getAsString());
2007 SpanTyText.append(">");
2008 return SpanTyText;
2011 // For a `VarDecl` of the form `T * var (= Init)?`, this
2012 // function generates fix-its that
2013 // 1) replace `T * var` with `std::span<T> var`; and
2014 // 2) change `Init` accordingly to a span constructor, if it exists.
2016 // FIXME: support Multi-level pointers
2018 // Parameters:
2019 // `D` a pointer the variable declaration node
2020 // `Ctx` a reference to the ASTContext
2021 // `UserFillPlaceHolder` the user-input placeholder text
2022 // Returns:
2023 // the non-empty fix-it list, if fix-its are successfuly generated; empty
2024 // list otherwise.
2025 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
2026 const StringRef UserFillPlaceHolder,
2027 UnsafeBufferUsageHandler &Handler) {
2028 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager()))
2029 return {};
2031 FixItList FixIts{};
2032 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx);
2034 if (!SpanTyText) {
2035 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
2036 return {};
2039 // Will hold the text for `std::span<T> Ident`:
2040 std::stringstream SS;
2042 SS << *SpanTyText;
2043 // Append qualifiers to the type of `D`, if any:
2044 if (D->getType().hasQualifiers())
2045 SS << " " << D->getType().getQualifiers().getAsString();
2047 // The end of the range of the original source that will be replaced
2048 // by `std::span<T> ident`:
2049 SourceLocation EndLocForReplacement = D->getEndLoc();
2050 std::optional<StringRef> IdentText =
2051 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts());
2053 if (!IdentText) {
2054 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
2055 return {};
2057 // Fix the initializer if it exists:
2058 if (const Expr *Init = D->getInit()) {
2059 FixItList InitFixIts =
2060 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
2061 if (InitFixIts.empty())
2062 return {};
2063 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
2064 std::make_move_iterator(InitFixIts.end()));
2065 // If the declaration has the form `T *ident = init`, we want to replace
2066 // `T *ident = ` with `std::span<T> ident`:
2067 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1);
2069 SS << " " << IdentText->str();
2070 if (!EndLocForReplacement.isValid()) {
2071 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
2072 return {};
2074 FixIts.push_back(FixItHint::CreateReplacement(
2075 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str()));
2076 return FixIts;
2079 static bool hasConflictingOverload(const FunctionDecl *FD) {
2080 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult();
2083 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
2084 // types, this function produces fix-its to make the change self-contained. Let
2085 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
2086 // entity defined by the `FunctionDecl` after the change to the parameters.
2087 // Fix-its produced by this function are
2088 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2089 // of 'F';
2090 // 2. Create a declaration of "NewF" next to each declaration of `F`;
2091 // 3. Create a definition of "F" (as its' original definition is now belongs
2092 // to "NewF") next to its original definition. The body of the creating
2093 // definition calls to "NewF".
2095 // Example:
2097 // void f(int *p); // original declaration
2098 // void f(int *p) { // original definition
2099 // p[5];
2100 // }
2102 // To change the parameter `p` to be of `std::span<int>` type, we
2103 // also add overloads:
2105 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
2106 // void f(std::span<int> p); // added overload decl
2107 // void f(std::span<int> p) { // original def where param is changed
2108 // p[5];
2109 // }
2110 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
2111 // return f(std::span(p, <# size #>));
2112 // }
2114 static std::optional<FixItList>
2115 createOverloadsForFixedParams(const Strategy &S, const FunctionDecl *FD,
2116 const ASTContext &Ctx,
2117 UnsafeBufferUsageHandler &Handler) {
2118 // FIXME: need to make this conflict checking better:
2119 if (hasConflictingOverload(FD))
2120 return std::nullopt;
2122 const SourceManager &SM = Ctx.getSourceManager();
2123 const LangOptions &LangOpts = Ctx.getLangOpts();
2124 const unsigned NumParms = FD->getNumParams();
2125 std::vector<std::string> NewTysTexts(NumParms);
2126 std::vector<bool> ParmsMask(NumParms, false);
2127 bool AtLeastOneParmToFix = false;
2129 for (unsigned i = 0; i < NumParms; i++) {
2130 const ParmVarDecl *PVD = FD->getParamDecl(i);
2132 if (S.lookup(PVD) == Strategy::Kind::Wontfix)
2133 continue;
2134 if (S.lookup(PVD) != Strategy::Kind::Span)
2135 // Not supported, not suppose to happen:
2136 return std::nullopt;
2138 std::optional<Qualifiers> PteTyQuals = std::nullopt;
2139 std::optional<std::string> PteTyText =
2140 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals);
2142 if (!PteTyText)
2143 // something wrong in obtaining the text of the pointee type, give up
2144 return std::nullopt;
2145 // FIXME: whether we should create std::span type depends on the Strategy.
2146 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals);
2147 ParmsMask[i] = true;
2148 AtLeastOneParmToFix = true;
2150 if (!AtLeastOneParmToFix)
2151 // No need to create function overloads:
2152 return {};
2153 // FIXME Respect indentation of the original code.
2155 // A lambda that creates the text representation of a function declaration
2156 // with the new type signatures:
2157 const auto NewOverloadSignatureCreator =
2158 [&SM, &LangOpts, &NewTysTexts,
2159 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2160 std::stringstream SS;
2162 SS << ";";
2163 SS << getEndOfLine().str();
2164 // Append: ret-type func-name "("
2165 if (auto Prefix = getRangeText(
2166 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
2167 SM, LangOpts))
2168 SS << Prefix->str();
2169 else
2170 return std::nullopt; // give up
2171 // Append: parameter-type-list
2172 const unsigned NumParms = FD->getNumParams();
2174 for (unsigned i = 0; i < NumParms; i++) {
2175 const ParmVarDecl *Parm = FD->getParamDecl(i);
2177 if (Parm->isImplicit())
2178 continue;
2179 if (ParmsMask[i]) {
2180 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
2181 // new type:
2182 SS << NewTysTexts[i];
2183 // print parameter name if provided:
2184 if (IdentifierInfo *II = Parm->getIdentifier())
2185 SS << ' ' << II->getName().str();
2186 } else if (auto ParmTypeText = getRangeText(
2187 getSourceRangeToTokenEnd(Parm, SM, LangOpts),
2188 SM, LangOpts)) {
2189 // print the whole `Parm` without modification:
2190 SS << ParmTypeText->str();
2191 } else
2192 return std::nullopt; // something wrong, give up
2193 if (i != NumParms - 1)
2194 SS << ", ";
2196 SS << ")";
2197 return SS.str();
2200 // A lambda that creates the text representation of a function definition with
2201 // the original signature:
2202 const auto OldOverloadDefCreator =
2203 [&Handler, &SM, &LangOpts, &NewTysTexts,
2204 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2205 std::stringstream SS;
2207 SS << getEndOfLine().str();
2208 // Append: attr-name ret-type func-name "(" param-list ")" "{"
2209 if (auto FDPrefix = getRangeText(
2210 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
2211 LangOpts))
2212 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ")
2213 << FDPrefix->str() << "{";
2214 else
2215 return std::nullopt;
2216 // Append: "return" func-name "("
2217 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
2218 SS << "return " << FunQualName->str() << "(";
2219 else
2220 return std::nullopt;
2222 // Append: arg-list
2223 const unsigned NumParms = FD->getNumParams();
2224 for (unsigned i = 0; i < NumParms; i++) {
2225 const ParmVarDecl *Parm = FD->getParamDecl(i);
2227 if (Parm->isImplicit())
2228 continue;
2229 // FIXME: If a parameter has no name, it is unused in the
2230 // definition. So we could just leave it as it is.
2231 if (!Parm->getIdentifier())
2232 // If a parameter of a function definition has no name:
2233 return std::nullopt;
2234 if (ParmsMask[i])
2235 // This is our spanified paramter!
2236 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
2237 << ", " << getUserFillPlaceHolder("size") << ")";
2238 else
2239 SS << Parm->getIdentifier()->getName().str();
2240 if (i != NumParms - 1)
2241 SS << ", ";
2243 // finish call and the body
2244 SS << ");}" << getEndOfLine().str();
2245 // FIXME: 80-char line formatting?
2246 return SS.str();
2249 FixItList FixIts{};
2250 for (FunctionDecl *FReDecl : FD->redecls()) {
2251 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts);
2253 if (!Loc)
2254 return {};
2255 if (FReDecl->isThisDeclarationADefinition()) {
2256 assert(FReDecl == FD && "inconsistent function definition");
2257 // Inserts a definition with the old signature to the end of
2258 // `FReDecl`:
2259 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
2260 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef));
2261 else
2262 return {}; // give up
2263 } else {
2264 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
2265 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
2266 FixIts.emplace_back(FixItHint::CreateInsertion(
2267 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt(
2268 FReDecl->getBeginLoc(), " ")));
2270 // Inserts a declaration with the new signature to the end of `FReDecl`:
2271 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
2272 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl));
2273 else
2274 return {};
2277 return FixIts;
2280 // To fix a `ParmVarDecl` to be of `std::span` type.
2281 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
2282 UnsafeBufferUsageHandler &Handler) {
2283 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) {
2284 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
2285 return {};
2287 if (PVD->hasDefaultArg()) {
2288 // FIXME: generate fix-its for default values:
2289 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
2290 return {};
2293 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
2294 std::optional<std::string> PteTyText = getPointeeTypeText(
2295 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
2297 if (!PteTyText) {
2298 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
2299 return {};
2302 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
2304 if (!PVDNameText) {
2305 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
2306 return {};
2309 std::stringstream SS;
2310 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx);
2312 if (PteTyQualifiers)
2313 // Append qualifiers if they exist:
2314 SS << getSpanTypeText(*PteTyText, PteTyQualifiers);
2315 else
2316 SS << getSpanTypeText(*PteTyText);
2317 // Append qualifiers to the type of the parameter:
2318 if (PVD->getType().hasQualifiers())
2319 SS << ' ' << PVD->getType().getQualifiers().getAsString();
2320 // Append parameter's name:
2321 SS << ' ' << PVDNameText->str();
2322 // Add replacement fix-it:
2323 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())};
2326 static FixItList fixVariableWithSpan(const VarDecl *VD,
2327 const DeclUseTracker &Tracker,
2328 ASTContext &Ctx,
2329 UnsafeBufferUsageHandler &Handler) {
2330 const DeclStmt *DS = Tracker.lookupDecl(VD);
2331 if (!DS) {
2332 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet");
2333 return {};
2335 if (!DS->isSingleDecl()) {
2336 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2337 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
2338 return {};
2340 // Currently DS is an unused variable but we'll need it when
2341 // non-single decls are implemented, where the pointee type name
2342 // and the '*' are spread around the place.
2343 (void)DS;
2345 // FIXME: handle cases where DS has multiple declarations
2346 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
2349 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due
2350 // to any unexpected problem.
2351 static FixItList
2352 fixVariable(const VarDecl *VD, Strategy::Kind K,
2353 /* The function decl under analysis */ const Decl *D,
2354 const DeclUseTracker &Tracker, ASTContext &Ctx,
2355 UnsafeBufferUsageHandler &Handler) {
2356 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) {
2357 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext());
2358 if (!FD || FD != D) {
2359 // `FD != D` means that `PVD` belongs to a function that is not being
2360 // analyzed currently. Thus `FD` may not be complete.
2361 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
2362 return {};
2365 // TODO If function has a try block we can't change params unless we check
2366 // also its catch block for their use.
2367 // FIXME We might support static class methods, some select methods,
2368 // operators and possibly lamdas.
2369 if (FD->isMain() || FD->isConstexpr() ||
2370 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
2371 FD->isVariadic() ||
2372 // also covers call-operator of lamdas
2373 isa<CXXMethodDecl>(FD) ||
2374 // skip when the function body is a try-block
2375 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) ||
2376 FD->isOverloadedOperator()) {
2377 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
2378 return {}; // TODO test all these cases
2382 switch (K) {
2383 case Strategy::Kind::Span: {
2384 if (VD->getType()->isPointerType()) {
2385 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD))
2386 return fixParamWithSpan(PVD, Ctx, Handler);
2388 if (VD->isLocalVarDecl())
2389 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
2391 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
2392 return {};
2394 case Strategy::Kind::Iterator:
2395 case Strategy::Kind::Array:
2396 case Strategy::Kind::Vector:
2397 llvm_unreachable("Strategy not implemented yet!");
2398 case Strategy::Kind::Wontfix:
2399 llvm_unreachable("Invalid strategy!");
2401 llvm_unreachable("Unknown strategy!");
2404 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
2405 // `RemoveRange` of 'h' overlaps with a macro use.
2406 static bool overlapWithMacro(const FixItList &FixIts) {
2407 // FIXME: For now we only check if the range (or the first token) is (part of)
2408 // a macro expansion. Ideally, we want to check for all tokens in the range.
2409 return llvm::any_of(FixIts, [](const FixItHint &Hint) {
2410 auto Range = Hint.RemoveRange;
2411 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
2412 // If the range (or the first token) is (part of) a macro expansion:
2413 return true;
2414 return false;
2418 // Returns true iff `VD` is a parameter of the declaration `D`:
2419 static bool isParameterOf(const VarDecl *VD, const Decl *D) {
2420 return isa<ParmVarDecl>(VD) &&
2421 VD->getDeclContext() == dyn_cast<DeclContext>(D);
2424 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable
2425 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
2426 // contain `v`.
2427 static void eraseVarsForUnfixableGroupMates(
2428 std::map<const VarDecl *, FixItList> &FixItsForVariable,
2429 const VariableGroupsManager &VarGrpMgr) {
2430 // Variables will be removed from `FixItsForVariable`:
2431 SmallVector<const VarDecl *, 8> ToErase;
2433 for (const auto &[VD, Ignore] : FixItsForVariable) {
2434 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD);
2435 if (llvm::any_of(Grp,
2436 [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
2437 return !FixItsForVariable.count(GrpMember);
2438 })) {
2439 // At least one group member cannot be fixed, so we have to erase the
2440 // whole group:
2441 for (const VarDecl *Member : Grp)
2442 ToErase.push_back(Member);
2445 for (auto *VarToErase : ToErase)
2446 FixItsForVariable.erase(VarToErase);
2449 // Returns the fix-its that create bounds-safe function overloads for the
2450 // function `D`, if `D`'s parameters will be changed to safe-types through
2451 // fix-its in `FixItsForVariable`.
2453 // NOTE: In case `D`'s parameters will be changed but bounds-safe function
2454 // overloads cannot created, the whole group that contains the parameters will
2455 // be erased from `FixItsForVariable`.
2456 static FixItList createFunctionOverloadsForParms(
2457 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
2458 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
2459 const Strategy &S, ASTContext &Ctx, UnsafeBufferUsageHandler &Handler) {
2460 FixItList FixItsSharedByParms{};
2462 std::optional<FixItList> OverloadFixes =
2463 createOverloadsForFixedParams(S, FD, Ctx, Handler);
2465 if (OverloadFixes) {
2466 FixItsSharedByParms.append(*OverloadFixes);
2467 } else {
2468 // Something wrong in generating `OverloadFixes`, need to remove the
2469 // whole group, where parameters are in, from `FixItsForVariable` (Note
2470 // that all parameters should be in the same group):
2471 for (auto *Member : VarGrpMgr.getGroupOfParms())
2472 FixItsForVariable.erase(Member);
2474 return FixItsSharedByParms;
2477 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
2478 static std::map<const VarDecl *, FixItList>
2479 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S,
2480 ASTContext &Ctx,
2481 /* The function decl under analysis */ const Decl *D,
2482 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
2483 const VariableGroupsManager &VarGrpMgr) {
2484 // `FixItsForVariable` will map each variable to a set of fix-its directly
2485 // associated to the variable itself. Fix-its of distinct variables in
2486 // `FixItsForVariable` are disjoint.
2487 std::map<const VarDecl *, FixItList> FixItsForVariable;
2489 // Populate `FixItsForVariable` with fix-its directly associated with each
2490 // variable. Fix-its directly associated to a variable 'v' are the ones
2491 // produced by the `FixableGadget`s whose claimed variable is 'v'.
2492 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
2493 FixItsForVariable[VD] =
2494 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler);
2495 // If we fail to produce Fix-It for the declaration we have to skip the
2496 // variable entirely.
2497 if (FixItsForVariable[VD].empty()) {
2498 FixItsForVariable.erase(VD);
2499 continue;
2501 for (const auto &F : Fixables) {
2502 std::optional<FixItList> Fixits = F->getFixits(S);
2504 if (Fixits) {
2505 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(),
2506 Fixits->begin(), Fixits->end());
2507 continue;
2509 #ifndef NDEBUG
2510 Handler.addDebugNoteForVar(
2511 VD, F->getBaseStmt()->getBeginLoc(),
2512 ("gadget '" + F->getDebugName() + "' refused to produce a fix")
2513 .str());
2514 #endif
2515 FixItsForVariable.erase(VD);
2516 break;
2520 // `FixItsForVariable` now contains only variables that can be
2521 // fixed. A variable can be fixed if its' declaration and all Fixables
2522 // associated to it can all be fixed.
2524 // To further remove from `FixItsForVariable` variables whose group mates
2525 // cannot be fixed...
2526 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
2527 // Now `FixItsForVariable` gets further reduced: a variable is in
2528 // `FixItsForVariable` iff it can be fixed and all its group mates can be
2529 // fixed.
2531 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
2532 // That is, when fixing multiple parameters in one step, these fix-its will
2533 // be applied only once (instead of being applied per parameter).
2534 FixItList FixItsSharedByParms{};
2536 if (auto *FD = dyn_cast<FunctionDecl>(D))
2537 FixItsSharedByParms = createFunctionOverloadsForParms(
2538 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
2540 // The map that maps each variable `v` to fix-its for the whole group where
2541 // `v` is in:
2542 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
2543 FixItsForVariable};
2545 for (auto &[Var, Ignore] : FixItsForVariable) {
2546 bool AnyParm = false;
2547 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm);
2549 for (const VarDecl *GrpMate : VarGroupForVD) {
2550 if (Var == GrpMate)
2551 continue;
2552 if (FixItsForVariable.count(GrpMate))
2553 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]);
2555 if (AnyParm) {
2556 // This assertion should never fail. Otherwise we have a bug.
2557 assert(!FixItsSharedByParms.empty() &&
2558 "Should not try to fix a parameter that does not belong to a "
2559 "FunctionDecl");
2560 FinalFixItsForVariable[Var].append(FixItsSharedByParms);
2563 // Fix-its that will be applied in one step shall NOT:
2564 // 1. overlap with macros or/and templates; or
2565 // 2. conflict with each other.
2566 // Otherwise, the fix-its will be dropped.
2567 for (auto Iter = FinalFixItsForVariable.begin();
2568 Iter != FinalFixItsForVariable.end();)
2569 if (overlapWithMacro(Iter->second) ||
2570 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) {
2571 Iter = FinalFixItsForVariable.erase(Iter);
2572 } else
2573 Iter++;
2574 return FinalFixItsForVariable;
2577 template <typename VarDeclIterTy>
2578 static Strategy
2579 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
2580 Strategy S;
2581 for (const VarDecl *VD : UnsafeVars) {
2582 S.set(VD, Strategy::Kind::Span);
2584 return S;
2587 // Manages variable groups:
2588 class VariableGroupsManagerImpl : public VariableGroupsManager {
2589 const std::vector<VarGrpTy> Groups;
2590 const std::map<const VarDecl *, unsigned> &VarGrpMap;
2591 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
2593 public:
2594 VariableGroupsManagerImpl(
2595 const std::vector<VarGrpTy> &Groups,
2596 const std::map<const VarDecl *, unsigned> &VarGrpMap,
2597 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
2598 : Groups(Groups), VarGrpMap(VarGrpMap),
2599 GrpsUnionForParms(GrpsUnionForParms) {}
2601 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
2602 if (GrpsUnionForParms.contains(Var)) {
2603 if (HasParm)
2604 *HasParm = true;
2605 return GrpsUnionForParms.getArrayRef();
2607 if (HasParm)
2608 *HasParm = false;
2610 auto It = VarGrpMap.find(Var);
2612 if (It == VarGrpMap.end())
2613 return std::nullopt;
2614 return Groups[It->second];
2617 VarGrpRef getGroupOfParms() const override {
2618 return GrpsUnionForParms.getArrayRef();
2622 void clang::checkUnsafeBufferUsage(const Decl *D,
2623 UnsafeBufferUsageHandler &Handler,
2624 bool EmitSuggestions) {
2625 #ifndef NDEBUG
2626 Handler.clearDebugNotes();
2627 #endif
2629 assert(D && D->getBody());
2630 // We do not want to visit a Lambda expression defined inside a method independently.
2631 // Instead, it should be visited along with the outer method.
2632 // FIXME: do we want to do the same thing for `BlockDecl`s?
2633 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) {
2634 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass())
2635 return;
2638 // Do not emit fixit suggestions for functions declared in an
2639 // extern "C" block.
2640 if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
2641 for (FunctionDecl *FReDecl : FD->redecls()) {
2642 if (FReDecl->isExternC()) {
2643 EmitSuggestions = false;
2644 break;
2649 WarningGadgetSets UnsafeOps;
2650 FixableGadgetSets FixablesForAllVars;
2652 auto [FixableGadgets, WarningGadgets, Tracker] =
2653 findGadgets(D, Handler, EmitSuggestions);
2655 if (!EmitSuggestions) {
2656 // Our job is very easy without suggestions. Just warn about
2657 // every problematic operation and consider it done. No need to deal
2658 // with fixable gadgets, no need to group operations by variable.
2659 for (const auto &G : WarningGadgets) {
2660 Handler.handleUnsafeOperation(G->getBaseStmt(),
2661 /*IsRelatedToDecl=*/false);
2664 // This return guarantees that most of the machine doesn't run when
2665 // suggestions aren't requested.
2666 assert(FixableGadgets.size() == 0 &&
2667 "Fixable gadgets found but suggestions not requested!");
2668 return;
2671 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2672 // function under the analysis. No need to fix any Fixables.
2673 if (!WarningGadgets.empty()) {
2674 // Gadgets "claim" variables they're responsible for. Once this loop
2675 // finishes, the tracker will only track DREs that weren't claimed by any
2676 // gadgets, i.e. not understood by the analysis.
2677 for (const auto &G : FixableGadgets) {
2678 for (const auto *DRE : G->getClaimedVarUseSites()) {
2679 Tracker.claimUse(DRE);
2684 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2685 // function under the analysis. Thus, it early returns here as there is
2686 // nothing needs to be fixed.
2688 // Note this claim is based on the assumption that there is no unsafe
2689 // variable whose declaration is invisible from the analyzing function.
2690 // Otherwise, we need to consider if the uses of those unsafe varuables needs
2691 // fix.
2692 // So far, we are not fixing any global variables or class members. And,
2693 // lambdas will be analyzed along with the enclosing function. So this early
2694 // return is correct for now.
2695 if (WarningGadgets.empty())
2696 return;
2698 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets));
2699 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets));
2701 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
2703 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
2704 for (auto it = FixablesForAllVars.byVar.cbegin();
2705 it != FixablesForAllVars.byVar.cend();) {
2706 // FIXME: need to deal with global variables later
2707 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) {
2708 #ifndef NDEBUG
2709 Handler.addDebugNoteForVar(
2710 it->first, it->first->getBeginLoc(),
2711 ("failed to produce fixit for '" + it->first->getNameAsString() +
2712 "' : neither local nor a parameter"));
2713 #endif
2714 it = FixablesForAllVars.byVar.erase(it);
2715 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
2716 #ifndef NDEBUG
2717 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
2718 ("failed to produce fixit for '" +
2719 it->first->getNameAsString() +
2720 "' : has a reference type"));
2721 #endif
2722 it = FixablesForAllVars.byVar.erase(it);
2723 } else if (Tracker.hasUnclaimedUses(it->first)) {
2724 #ifndef NDEBUG
2725 auto AllUnclaimed = Tracker.getUnclaimedUses(it->first);
2726 for (auto UnclaimedDRE : AllUnclaimed) {
2727 std::string UnclaimedUseTrace =
2728 getDREAncestorString(UnclaimedDRE, D->getASTContext());
2730 Handler.addDebugNoteForVar(
2731 it->first, UnclaimedDRE->getBeginLoc(),
2732 ("failed to produce fixit for '" + it->first->getNameAsString() +
2733 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
2734 UnclaimedUseTrace));
2736 #endif
2737 it = FixablesForAllVars.byVar.erase(it);
2738 } else if (it->first->isInitCapture()) {
2739 #ifndef NDEBUG
2740 Handler.addDebugNoteForVar(
2741 it->first, it->first->getBeginLoc(),
2742 ("failed to produce fixit for '" + it->first->getNameAsString() +
2743 "' : init capture"));
2744 #endif
2745 it = FixablesForAllVars.byVar.erase(it);
2746 }else {
2747 ++it;
2751 // Fixpoint iteration for pointer assignments
2752 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
2753 DepMapTy DependenciesMap{};
2754 DepMapTy PtrAssignmentGraph{};
2756 for (auto it : FixablesForAllVars.byVar) {
2757 for (const FixableGadget *fixable : it.second) {
2758 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
2759 fixable->getStrategyImplications();
2760 if (ImplPair) {
2761 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
2762 PtrAssignmentGraph[Impl.first].insert(Impl.second);
2768 The following code does a BFS traversal of the `PtrAssignmentGraph`
2769 considering all unsafe vars as starting nodes and constructs an undirected
2770 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
2771 elimiates all variables that are unreachable from any unsafe var. In other
2772 words, this removes all dependencies that don't include any unsafe variable
2773 and consequently don't need any fixit generation.
2774 Note: A careful reader would observe that the code traverses
2775 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
2776 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
2777 achieve the same result but the one used here dramatically cuts the
2778 amount of hoops the second part of the algorithm needs to jump, given that
2779 a lot of these connections become "direct". The reader is advised not to
2780 imagine how the graph is transformed because of using `Var` instead of
2781 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
2782 and think about why it's equivalent later.
2784 std::set<const VarDecl *> VisitedVarsDirected{};
2785 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
2786 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) {
2788 std::queue<const VarDecl*> QueueDirected{};
2789 QueueDirected.push(Var);
2790 while(!QueueDirected.empty()) {
2791 const VarDecl* CurrentVar = QueueDirected.front();
2792 QueueDirected.pop();
2793 VisitedVarsDirected.insert(CurrentVar);
2794 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
2795 for (const VarDecl *Adj : AdjacentNodes) {
2796 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) {
2797 QueueDirected.push(Adj);
2799 DependenciesMap[Var].insert(Adj);
2800 DependenciesMap[Adj].insert(Var);
2806 // `Groups` stores the set of Connected Components in the graph.
2807 std::vector<VarGrpTy> Groups;
2808 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
2809 // variables belong to. Group indexes refer to the elements in `Groups`.
2810 // `VarGrpMap` is complete in that every variable that needs fix is in it.
2811 std::map<const VarDecl *, unsigned> VarGrpMap;
2812 // The union group over the ones in "Groups" that contain parameters of `D`:
2813 llvm::SetVector<const VarDecl *>
2814 GrpsUnionForParms; // these variables need to be fixed in one step
2816 // Group Connected Components for Unsafe Vars
2817 // (Dependencies based on pointer assignments)
2818 std::set<const VarDecl *> VisitedVars{};
2819 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
2820 if (VisitedVars.find(Var) == VisitedVars.end()) {
2821 VarGrpTy &VarGroup = Groups.emplace_back();
2822 std::queue<const VarDecl*> Queue{};
2824 Queue.push(Var);
2825 while(!Queue.empty()) {
2826 const VarDecl* CurrentVar = Queue.front();
2827 Queue.pop();
2828 VisitedVars.insert(CurrentVar);
2829 VarGroup.push_back(CurrentVar);
2830 auto AdjacentNodes = DependenciesMap[CurrentVar];
2831 for (const VarDecl *Adj : AdjacentNodes) {
2832 if (VisitedVars.find(Adj) == VisitedVars.end()) {
2833 Queue.push(Adj);
2838 bool HasParm = false;
2839 unsigned GrpIdx = Groups.size() - 1;
2841 for (const VarDecl *V : VarGroup) {
2842 VarGrpMap[V] = GrpIdx;
2843 if (!HasParm && isParameterOf(V, D))
2844 HasParm = true;
2846 if (HasParm)
2847 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end());
2851 // Remove a `FixableGadget` if the associated variable is not in the graph
2852 // computed above. We do not want to generate fix-its for such variables,
2853 // since they are neither warned nor reachable from a warned one.
2855 // Note a variable is not warned if it is not directly used in any unsafe
2856 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
2857 // does not exist another variable `u` such that `u` is warned and fixing `u`
2858 // (transitively) implicates fixing `v`.
2860 // For example,
2861 // ```
2862 // void f(int * p) {
2863 // int * a = p; *p = 0;
2864 // }
2865 // ```
2866 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
2867 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
2868 // the function above, `p` becomes reachable from a warned variable.
2869 for (auto I = FixablesForAllVars.byVar.begin();
2870 I != FixablesForAllVars.byVar.end();) {
2871 // Note `VisitedVars` contain all the variables in the graph:
2872 if (!VisitedVars.count((*I).first)) {
2873 // no such var in graph:
2874 I = FixablesForAllVars.byVar.erase(I);
2875 } else
2876 ++I;
2879 // We assign strategies to variables that are 1) in the graph and 2) can be
2880 // fixed. Other variables have the default "Won't fix" strategy.
2881 Strategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range(
2882 VisitedVars, [&FixablesForAllVars](const VarDecl *V) {
2883 // If a warned variable has no "Fixable", it is considered unfixable:
2884 return FixablesForAllVars.byVar.count(V);
2885 }));
2886 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
2888 if (isa<NamedDecl>(D))
2889 // The only case where `D` is not a `NamedDecl` is when `D` is a
2890 // `BlockDecl`. Let's not fix variables in blocks for now
2891 FixItsForVariableGroup =
2892 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D,
2893 Tracker, Handler, VarGrpMgr);
2895 for (const auto &G : UnsafeOps.noVar) {
2896 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false);
2899 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
2900 auto FixItsIt = FixItsForVariableGroup.find(VD);
2901 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr,
2902 FixItsIt != FixItsForVariableGroup.end()
2903 ? std::move(FixItsIt->second)
2904 : FixItList{},
2906 for (const auto &G : WarningGadgets) {
2907 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true);