[DFAJumpThreading] Remove incoming StartBlock from all phis when unfolding select...
[llvm-project.git] / clang / lib / Analysis / UnsafeBufferUsage.cpp
blobe332a3609290aace02af6ea0e023dd21d18d0cb8
1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/AST/StmtVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include <memory>
19 #include <optional>
20 #include <sstream>
21 #include <queue>
23 using namespace llvm;
24 using namespace clang;
25 using namespace ast_matchers;
27 #ifndef NDEBUG
28 namespace {
29 class StmtDebugPrinter
30 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
31 public:
32 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
34 std::string VisitBinaryOperator(const BinaryOperator *BO) {
35 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
38 std::string VisitUnaryOperator(const UnaryOperator *UO) {
39 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")";
42 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
43 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
49 static std::string getDREAncestorString(const DeclRefExpr *DRE,
50 ASTContext &Ctx) {
51 std::stringstream SS;
52 const Stmt *St = DRE;
53 StmtDebugPrinter StmtPriner;
55 do {
56 SS << StmtPriner.Visit(St);
58 DynTypedNodeList StParents = Ctx.getParents(*St);
60 if (StParents.size() > 1)
61 return "unavailable due to multiple parents";
62 if (StParents.size() == 0)
63 break;
64 St = StParents.begin()->get<Stmt>();
65 if (St)
66 SS << " ==> ";
67 } while (St);
68 return SS.str();
70 } // namespace
71 #endif /* NDEBUG */
73 namespace clang::ast_matchers {
74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
75 // except for those belonging to a different callable of "n".
76 class MatchDescendantVisitor
77 : public RecursiveASTVisitor<MatchDescendantVisitor> {
78 public:
79 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase;
81 // Creates an AST visitor that matches `Matcher` on all
82 // descendants of a given node "n" except for the ones
83 // belonging to a different callable of "n".
84 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
85 internal::ASTMatchFinder *Finder,
86 internal::BoundNodesTreeBuilder *Builder,
87 internal::ASTMatchFinder::BindKind Bind,
88 const bool ignoreUnevaluatedContext)
89 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {}
92 // Returns true if a match is found in a subtree of `DynNode`, which belongs
93 // to the same callable of `DynNode`.
94 bool findMatch(const DynTypedNode &DynNode) {
95 Matches = false;
96 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
97 TraverseStmt(const_cast<Stmt *>(StmtNode));
98 *Builder = ResultBindings;
99 return Matches;
101 return false;
104 // The following are overriding methods from the base visitor class.
105 // They are public only to allow CRTP to work. They are *not *part
106 // of the public API of this class.
108 // For the matchers so far used in safe buffers, we only need to match
109 // `Stmt`s. To override more as needed.
111 bool TraverseDecl(Decl *Node) {
112 if (!Node)
113 return true;
114 if (!match(*Node))
115 return false;
116 // To skip callables:
117 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node))
118 return true;
119 // Traverse descendants
120 return VisitorBase::TraverseDecl(Node);
123 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) {
124 // These are unevaluated, except the result expression.
125 if(ignoreUnevaluatedContext)
126 return TraverseStmt(Node->getResultExpr());
127 return VisitorBase::TraverseGenericSelectionExpr(Node);
130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) {
131 // Unevaluated context.
132 if(ignoreUnevaluatedContext)
133 return true;
134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) {
138 // Unevaluated context.
139 if(ignoreUnevaluatedContext)
140 return true;
141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) {
145 // Unevaluated context.
146 if(ignoreUnevaluatedContext)
147 return true;
148 return VisitorBase::TraverseDecltypeTypeLoc(Node);
151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) {
152 // Unevaluated context.
153 if(ignoreUnevaluatedContext)
154 return true;
155 return VisitorBase::TraverseCXXNoexceptExpr(Node);
158 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) {
159 // Unevaluated context.
160 if(ignoreUnevaluatedContext)
161 return true;
162 return VisitorBase::TraverseCXXTypeidExpr(Node);
165 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
166 if (!Node)
167 return true;
168 if (!match(*Node))
169 return false;
170 return VisitorBase::TraverseStmt(Node);
173 bool shouldVisitTemplateInstantiations() const { return true; }
174 bool shouldVisitImplicitCode() const {
175 // TODO: let's ignore implicit code for now
176 return false;
179 private:
180 // Sets 'Matched' to true if 'Matcher' matches 'Node'
182 // Returns 'true' if traversal should continue after this function
183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
184 template <typename T> bool match(const T &Node) {
185 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
187 if (Matcher->matches(DynTypedNode::create(Node), Finder,
188 &RecursiveBuilder)) {
189 ResultBindings.addMatch(RecursiveBuilder);
190 Matches = true;
191 if (Bind != internal::ASTMatchFinder::BK_All)
192 return false; // Abort as soon as a match is found.
194 return true;
197 const internal::DynTypedMatcher *const Matcher;
198 internal::ASTMatchFinder *const Finder;
199 internal::BoundNodesTreeBuilder *const Builder;
200 internal::BoundNodesTreeBuilder ResultBindings;
201 const internal::ASTMatchFinder::BindKind Bind;
202 bool Matches;
203 bool ignoreUnevaluatedContext;
206 // Because we're dealing with raw pointers, let's define what we mean by that.
207 static auto hasPointerType() {
208 return hasType(hasCanonicalType(pointerType()));
211 static auto hasArrayType() {
212 return hasType(hasCanonicalType(arrayType()));
215 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) {
216 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
218 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
219 return Visitor.findMatch(DynTypedNode::create(Node));
222 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) {
223 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
225 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
226 return Visitor.findMatch(DynTypedNode::create(Node));
229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
230 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
231 Handler) {
232 return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
235 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
236 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
239 // Matches a `UnaryOperator` whose operator is pre-increment:
240 AST_MATCHER(UnaryOperator, isPreInc) {
241 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc;
244 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
245 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
246 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
247 // clang-format off
248 return
249 expr(anyOf(
250 implicitCastExpr(
251 hasCastKind(CastKind::CK_LValueToRValue),
252 castSubExpr(innerMatcher)),
253 binaryOperator(
254 hasAnyOperatorName("="),
255 hasLHS(innerMatcher)
258 // clang-format on
262 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
264 static internal::Matcher<Stmt>
265 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
266 // A UPC can be
267 // 1. an argument of a function call (except the callee has [[unsafe_...]]
268 // attribute), or
269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
270 // 3. the operand of a comparator operation; or
271 // 4. the operand of a pointer subtraction operation
272 // (i.e., computing the distance between two pointers); or ...
274 auto CallArgMatcher =
275 callExpr(forEachArgumentWithParam(InnerMatcher,
276 hasPointerType() /* array also decays to pointer type*/),
277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
279 auto CastOperandMatcher =
280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
281 hasCastKind(CastKind::CK_PointerToBoolean)),
282 castSubExpr(allOf(hasPointerType(), InnerMatcher)));
284 auto CompOperandMatcher =
285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)),
287 hasRHS(allOf(hasPointerType(), InnerMatcher))));
289 // A matcher that matches pointer subtractions:
290 auto PtrSubtractionMatcher =
291 binaryOperator(hasOperatorName("-"),
292 // Note that here we need both LHS and RHS to be
293 // pointer. Then the inner matcher can match any of
294 // them:
295 allOf(hasLHS(hasPointerType()),
296 hasRHS(hasPointerType())),
297 eachOf(hasLHS(InnerMatcher),
298 hasRHS(InnerMatcher)));
300 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
301 PtrSubtractionMatcher));
302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
303 // don't have to check that.)
306 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
308 // 'e' isn't evaluated to an RValue). For example, consider the following code:
309 // int *p = new int[4];
310 // int *q = new int[4];
311 // if ((p = q)) {}
312 // p = q;
313 // The expression `p = q` in the conditional of the `if` statement
314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
315 // in the assignment statement is in an untyped context.
316 static internal::Matcher<Stmt>
317 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
318 // An unspecified context can be
319 // 1. A compound statement,
320 // 2. The body of an if statement
321 // 3. Body of a loop
322 auto CompStmt = compoundStmt(forEach(InnerMatcher));
323 auto IfStmtThen = ifStmt(hasThen(InnerMatcher));
324 auto IfStmtElse = ifStmt(hasElse(InnerMatcher));
325 // FIXME: Handle loop bodies.
326 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
328 } // namespace clang::ast_matchers
330 namespace {
331 // Because the analysis revolves around variables and their types, we'll need to
332 // track uses of variables (aka DeclRefExprs).
333 using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
335 // Convenience typedef.
336 using FixItList = SmallVector<FixItHint, 4>;
338 // Defined below.
339 class Strategy;
340 } // namespace
342 namespace {
343 /// Gadget is an individual operation in the code that may be of interest to
344 /// this analysis. Each (non-abstract) subclass corresponds to a specific
345 /// rigid AST structure that constitutes an operation on a pointer-type object.
346 /// Discovery of a gadget in the code corresponds to claiming that we understand
347 /// what this part of code is doing well enough to potentially improve it.
348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
349 /// always deserving a warning per se, but requires our attention to identify
350 /// it warrants a fixit).
351 class Gadget {
352 public:
353 enum class Kind {
354 #define GADGET(x) x,
355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
358 /// Common type of ASTMatchers used for discovering gadgets.
359 /// Useful for implementing the static matcher() methods
360 /// that are expected from all non-abstract subclasses.
361 using Matcher = decltype(stmt());
363 Gadget(Kind K) : K(K) {}
365 Kind getKind() const { return K; }
367 #ifndef NDEBUG
368 StringRef getDebugName() const {
369 switch (K) {
370 #define GADGET(x) case Kind::x: return #x;
371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
373 llvm_unreachable("Unhandled Gadget::Kind enum");
375 #endif
377 virtual bool isWarningGadget() const = 0;
378 virtual const Stmt *getBaseStmt() const = 0;
380 /// Returns the list of pointer-type variables on which this gadget performs
381 /// its operation. Typically, there's only one variable. This isn't a list
382 /// of all DeclRefExprs in the gadget's AST!
383 virtual DeclUseList getClaimedVarUseSites() const = 0;
385 virtual ~Gadget() = default;
387 private:
388 Kind K;
392 /// Warning gadgets correspond to unsafe code patterns that warrants
393 /// an immediate warning.
394 class WarningGadget : public Gadget {
395 public:
396 WarningGadget(Kind K) : Gadget(K) {}
398 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
399 bool isWarningGadget() const final { return true; }
402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
404 /// variable is replaced by a safe C++ container, every use of such variable must be
405 /// carefully considered and possibly updated.
406 class FixableGadget : public Gadget {
407 public:
408 FixableGadget(Kind K) : Gadget(K) {}
410 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
411 bool isWarningGadget() const final { return false; }
413 /// Returns a fixit that would fix the current gadget according to
414 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
415 /// returns an empty list if no fixes are necessary.
416 virtual std::optional<FixItList> getFixits(const Strategy &) const {
417 return std::nullopt;
420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
421 /// statement and the second element is the RHS. This two-element list represents the fact that
422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
423 /// later to group all those variables whose types must be modified together to prevent type
424 /// mismatches.
425 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
426 getStrategyImplications() const {
427 return std::nullopt;
431 static auto toSupportedVariable() {
432 return to(varDecl());
435 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
436 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
438 /// An increment of a pointer-type value is unsafe as it may run the pointer
439 /// out of bounds.
440 class IncrementGadget : public WarningGadget {
441 static constexpr const char *const OpTag = "op";
442 const UnaryOperator *Op;
444 public:
445 IncrementGadget(const MatchFinder::MatchResult &Result)
446 : WarningGadget(Kind::Increment),
447 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
449 static bool classof(const Gadget *G) {
450 return G->getKind() == Kind::Increment;
453 static Matcher matcher() {
454 return stmt(unaryOperator(
455 hasOperatorName("++"),
456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
457 ).bind(OpTag));
460 const UnaryOperator *getBaseStmt() const override { return Op; }
462 DeclUseList getClaimedVarUseSites() const override {
463 SmallVector<const DeclRefExpr *, 2> Uses;
464 if (const auto *DRE =
465 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
466 Uses.push_back(DRE);
469 return std::move(Uses);
473 /// A decrement of a pointer-type value is unsafe as it may run the pointer
474 /// out of bounds.
475 class DecrementGadget : public WarningGadget {
476 static constexpr const char *const OpTag = "op";
477 const UnaryOperator *Op;
479 public:
480 DecrementGadget(const MatchFinder::MatchResult &Result)
481 : WarningGadget(Kind::Decrement),
482 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
484 static bool classof(const Gadget *G) {
485 return G->getKind() == Kind::Decrement;
488 static Matcher matcher() {
489 return stmt(unaryOperator(
490 hasOperatorName("--"),
491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
492 ).bind(OpTag));
495 const UnaryOperator *getBaseStmt() const override { return Op; }
497 DeclUseList getClaimedVarUseSites() const override {
498 if (const auto *DRE =
499 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
500 return {DRE};
503 return {};
507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
508 /// it doesn't have any bounds checks for the array.
509 class ArraySubscriptGadget : public WarningGadget {
510 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
511 const ArraySubscriptExpr *ASE;
513 public:
514 ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
515 : WarningGadget(Kind::ArraySubscript),
516 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {}
518 static bool classof(const Gadget *G) {
519 return G->getKind() == Kind::ArraySubscript;
522 static Matcher matcher() {
523 // FIXME: What if the index is integer literal 0? Should this be
524 // a safe gadget in this case?
525 // clang-format off
526 return stmt(arraySubscriptExpr(
527 hasBase(ignoringParenImpCasts(
528 anyOf(hasPointerType(), hasArrayType()))),
529 unless(hasIndex(
530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
532 .bind(ArraySubscrTag));
533 // clang-format on
536 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
538 DeclUseList getClaimedVarUseSites() const override {
539 if (const auto *DRE =
540 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
541 return {DRE};
544 return {};
548 /// A pointer arithmetic expression of one of the forms:
549 /// \code
550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
551 /// \endcode
552 class PointerArithmeticGadget : public WarningGadget {
553 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
554 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
555 const BinaryOperator *PA; // pointer arithmetic expression
556 const Expr *Ptr; // the pointer expression in `PA`
558 public:
559 PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
560 : WarningGadget(Kind::PointerArithmetic),
561 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)),
562 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {}
564 static bool classof(const Gadget *G) {
565 return G->getKind() == Kind::PointerArithmetic;
568 static Matcher matcher() {
569 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType()));
570 auto PtrAtRight =
571 allOf(hasOperatorName("+"),
572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
573 hasLHS(HasIntegerType));
574 auto PtrAtLeft =
575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
576 hasOperatorName("+="), hasOperatorName("-=")),
577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
578 hasRHS(HasIntegerType));
580 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight))
581 .bind(PointerArithmeticTag));
584 const Stmt *getBaseStmt() const override { return PA; }
586 DeclUseList getClaimedVarUseSites() const override {
587 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
588 return {DRE};
591 return {};
593 // FIXME: pointer adding zero should be fine
594 // FIXME: this gadge will need a fix-it
597 /// A pointer initialization expression of the form:
598 /// \code
599 /// int *p = q;
600 /// \endcode
601 class PointerInitGadget : public FixableGadget {
602 private:
603 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
604 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
605 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI`
606 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI`
608 public:
609 PointerInitGadget(const MatchFinder::MatchResult &Result)
610 : FixableGadget(Kind::PointerInit),
611 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)),
612 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {}
614 static bool classof(const Gadget *G) {
615 return G->getKind() == Kind::PointerInit;
618 static Matcher matcher() {
619 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl(
620 hasInitializer(ignoringImpCasts(declRefExpr(
621 hasPointerType(),
622 toSupportedVariable()).
623 bind(PointerInitRHSTag)))).
624 bind(PointerInitLHSTag)));
626 return stmt(PtrInitStmt);
629 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
631 virtual const Stmt *getBaseStmt() const override {
632 // FIXME: This needs to be the entire DeclStmt, assuming that this method
633 // makes sense at all on a FixableGadget.
634 return PtrInitRHS;
637 virtual DeclUseList getClaimedVarUseSites() const override {
638 return DeclUseList{PtrInitRHS};
641 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
642 getStrategyImplications() const override {
643 return std::make_pair(PtrInitLHS,
644 cast<VarDecl>(PtrInitRHS->getDecl()));
648 /// A pointer assignment expression of the form:
649 /// \code
650 /// p = q;
651 /// \endcode
652 class PointerAssignmentGadget : public FixableGadget {
653 private:
654 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
655 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
656 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA`
657 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA`
659 public:
660 PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
661 : FixableGadget(Kind::PointerAssignment),
662 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
663 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
665 static bool classof(const Gadget *G) {
666 return G->getKind() == Kind::PointerAssignment;
669 static Matcher matcher() {
670 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
672 toSupportedVariable()).
673 bind(PointerAssignRHSTag))),
674 hasLHS(declRefExpr(hasPointerType(),
675 toSupportedVariable()).
676 bind(PointerAssignLHSTag))));
678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
683 virtual const Stmt *getBaseStmt() const override {
684 // FIXME: This should be the binary operator, assuming that this method
685 // makes sense at all on a FixableGadget.
686 return PtrLHS;
689 virtual DeclUseList getClaimedVarUseSites() const override {
690 return DeclUseList{PtrLHS, PtrRHS};
693 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
694 getStrategyImplications() const override {
695 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()),
696 cast<VarDecl>(PtrRHS->getDecl()));
700 /// A call of a function or method that performs unchecked buffer operations
701 /// over one of its pointer parameters.
702 class UnsafeBufferUsageAttrGadget : public WarningGadget {
703 constexpr static const char *const OpTag = "call_expr";
704 const CallExpr *Op;
706 public:
707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
708 : WarningGadget(Kind::UnsafeBufferUsageAttr),
709 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {}
711 static bool classof(const Gadget *G) {
712 return G->getKind() == Kind::UnsafeBufferUsageAttr;
715 static Matcher matcher() {
716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
717 .bind(OpTag));
719 const Stmt *getBaseStmt() const override { return Op; }
721 DeclUseList getClaimedVarUseSites() const override { return {}; }
724 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
725 // Context (see `isInUnspecifiedLvalueContext`).
726 // Note here `[]` is the built-in subscript operator.
727 class ULCArraySubscriptGadget : public FixableGadget {
728 private:
729 static constexpr const char *const ULCArraySubscriptTag =
730 "ArraySubscriptUnderULC";
731 const ArraySubscriptExpr *Node;
733 public:
734 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
735 : FixableGadget(Kind::ULCArraySubscript),
736 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
737 assert(Node != nullptr && "Expecting a non-null matching result");
740 static bool classof(const Gadget *G) {
741 return G->getKind() == Kind::ULCArraySubscript;
744 static Matcher matcher() {
745 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
746 auto BaseIsArrayOrPtrDRE =
747 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr,
748 toSupportedVariable())));
749 auto Target =
750 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
752 return expr(isInUnspecifiedLvalueContext(Target));
755 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
757 virtual const Stmt *getBaseStmt() const override { return Node; }
759 virtual DeclUseList getClaimedVarUseSites() const override {
760 if (const auto *DRE =
761 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
762 return {DRE};
764 return {};
768 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
769 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
770 // fixit of the form `UPC(DRE.data())`.
771 class UPCStandalonePointerGadget : public FixableGadget {
772 private:
773 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
774 const DeclRefExpr *Node;
776 public:
777 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result)
778 : FixableGadget(Kind::UPCStandalonePointer),
779 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) {
780 assert(Node != nullptr && "Expecting a non-null matching result");
783 static bool classof(const Gadget *G) {
784 return G->getKind() == Kind::UPCStandalonePointer;
787 static Matcher matcher() {
788 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
789 auto target = expr(
790 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr,
791 toSupportedVariable())).bind(DeclRefExprTag)));
792 return stmt(isInUnspecifiedPointerContext(target));
795 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
797 virtual const Stmt *getBaseStmt() const override { return Node; }
799 virtual DeclUseList getClaimedVarUseSites() const override {
800 return {Node};
804 class PointerDereferenceGadget : public FixableGadget {
805 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
806 static constexpr const char *const OperatorTag = "op";
808 const DeclRefExpr *BaseDeclRefExpr = nullptr;
809 const UnaryOperator *Op = nullptr;
811 public:
812 PointerDereferenceGadget(const MatchFinder::MatchResult &Result)
813 : FixableGadget(Kind::PointerDereference),
814 BaseDeclRefExpr(
815 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
816 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {}
818 static bool classof(const Gadget *G) {
819 return G->getKind() == Kind::PointerDereference;
822 static Matcher matcher() {
823 auto Target =
824 unaryOperator(
825 hasOperatorName("*"),
826 has(expr(ignoringParenImpCasts(
827 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag)))))
828 .bind(OperatorTag);
830 return expr(isInUnspecifiedLvalueContext(Target));
833 DeclUseList getClaimedVarUseSites() const override {
834 return {BaseDeclRefExpr};
837 virtual const Stmt *getBaseStmt() const final { return Op; }
839 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
842 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
843 // Context (see `isInUnspecifiedPointerContext`).
844 // Note here `[]` is the built-in subscript operator.
845 class UPCAddressofArraySubscriptGadget : public FixableGadget {
846 private:
847 static constexpr const char *const UPCAddressofArraySubscriptTag =
848 "AddressofArraySubscriptUnderUPC";
849 const UnaryOperator *Node; // the `&DRE[any]` node
851 public:
852 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
853 : FixableGadget(Kind::ULCArraySubscript),
854 Node(Result.Nodes.getNodeAs<UnaryOperator>(
855 UPCAddressofArraySubscriptTag)) {
856 assert(Node != nullptr && "Expecting a non-null matching result");
859 static bool classof(const Gadget *G) {
860 return G->getKind() == Kind::UPCAddressofArraySubscript;
863 static Matcher matcher() {
864 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
865 unaryOperator(hasOperatorName("&"),
866 hasUnaryOperand(arraySubscriptExpr(
867 hasBase(ignoringParenImpCasts(declRefExpr(
868 toSupportedVariable()))))))
869 .bind(UPCAddressofArraySubscriptTag)))));
872 virtual std::optional<FixItList> getFixits(const Strategy &) const override;
874 virtual const Stmt *getBaseStmt() const override { return Node; }
876 virtual DeclUseList getClaimedVarUseSites() const override {
877 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
878 const auto *DRE =
879 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
880 return {DRE};
883 } // namespace
885 namespace {
886 // An auxiliary tracking facility for the fixit analysis. It helps connect
887 // declarations to its uses and make sure we've covered all uses with our
888 // analysis before we try to fix the declaration.
889 class DeclUseTracker {
890 using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
891 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
893 // Allocate on the heap for easier move.
894 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
895 DefMapTy Defs{};
897 public:
898 DeclUseTracker() = default;
899 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
900 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
901 DeclUseTracker(DeclUseTracker &&) = default;
902 DeclUseTracker &operator=(DeclUseTracker &&) = default;
904 // Start tracking a freshly discovered DRE.
905 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
907 // Stop tracking the DRE as it's been fully figured out.
908 void claimUse(const DeclRefExpr *DRE) {
909 assert(Uses->count(DRE) &&
910 "DRE not found or claimed by multiple matchers!");
911 Uses->erase(DRE);
914 // A variable is unclaimed if at least one use is unclaimed.
915 bool hasUnclaimedUses(const VarDecl *VD) const {
916 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
917 return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
918 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
922 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
923 UseSetTy ReturnSet;
924 for (auto use : *Uses) {
925 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
926 ReturnSet.insert(use);
929 return ReturnSet;
932 void discoverDecl(const DeclStmt *DS) {
933 for (const Decl *D : DS->decls()) {
934 if (const auto *VD = dyn_cast<VarDecl>(D)) {
935 // FIXME: Assertion temporarily disabled due to a bug in
936 // ASTMatcher internal behavior in presence of GNU
937 // statement-expressions. We need to properly investigate this
938 // because it can screw up our algorithm in other ways.
939 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
940 Defs[VD] = DS;
945 const DeclStmt *lookupDecl(const VarDecl *VD) const {
946 return Defs.lookup(VD);
949 } // namespace
951 namespace {
952 // Strategy is a map from variables to the way we plan to emit fixes for
953 // these variables. It is figured out gradually by trying different fixes
954 // for different variables depending on gadgets in which these variables
955 // participate.
956 class Strategy {
957 public:
958 enum class Kind {
959 Wontfix, // We don't plan to emit a fixit for this variable.
960 Span, // We recommend replacing the variable with std::span.
961 Iterator, // We recommend replacing the variable with std::span::iterator.
962 Array, // We recommend replacing the variable with std::array.
963 Vector // We recommend replacing the variable with std::vector.
966 private:
967 using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
969 MapTy Map;
971 public:
972 Strategy() = default;
973 Strategy(const Strategy &) = delete; // Let's avoid copies.
974 Strategy &operator=(const Strategy &) = delete;
975 Strategy(Strategy &&) = default;
976 Strategy &operator=(Strategy &&) = default;
978 void set(const VarDecl *VD, Kind K) { Map[VD] = K; }
980 Kind lookup(const VarDecl *VD) const {
981 auto I = Map.find(VD);
982 if (I == Map.end())
983 return Kind::Wontfix;
985 return I->second;
988 } // namespace
991 // Representing a pointer type expression of the form `++Ptr` in an Unspecified
992 // Pointer Context (UPC):
993 class UPCPreIncrementGadget : public FixableGadget {
994 private:
995 static constexpr const char *const UPCPreIncrementTag =
996 "PointerPreIncrementUnderUPC";
997 const UnaryOperator *Node; // the `++Ptr` node
999 public:
1000 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
1001 : FixableGadget(Kind::UPCPreIncrement),
1002 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) {
1003 assert(Node != nullptr && "Expecting a non-null matching result");
1006 static bool classof(const Gadget *G) {
1007 return G->getKind() == Kind::UPCPreIncrement;
1010 static Matcher matcher() {
1011 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1012 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1013 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1014 // things right.
1015 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
1016 unaryOperator(isPreInc(),
1017 hasUnaryOperand(declRefExpr(
1018 toSupportedVariable()))
1019 ).bind(UPCPreIncrementTag)))));
1022 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
1024 virtual const Stmt *getBaseStmt() const override { return Node; }
1026 virtual DeclUseList getClaimedVarUseSites() const override {
1027 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())};
1031 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1032 // ptr)`:
1033 class DerefSimplePtrArithFixableGadget : public FixableGadget {
1034 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
1035 static constexpr const char *const DerefOpTag = "DerefOp";
1036 static constexpr const char *const AddOpTag = "AddOp";
1037 static constexpr const char *const OffsetTag = "Offset";
1039 const DeclRefExpr *BaseDeclRefExpr = nullptr;
1040 const UnaryOperator *DerefOp = nullptr;
1041 const BinaryOperator *AddOp = nullptr;
1042 const IntegerLiteral *Offset = nullptr;
1044 public:
1045 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result)
1046 : FixableGadget(Kind::DerefSimplePtrArithFixable),
1047 BaseDeclRefExpr(
1048 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
1049 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)),
1050 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)),
1051 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {}
1053 static Matcher matcher() {
1054 // clang-format off
1055 auto ThePtr = expr(hasPointerType(),
1056 ignoringImpCasts(declRefExpr(toSupportedVariable()).
1057 bind(BaseDeclRefExprTag)));
1058 auto PlusOverPtrAndInteger = expr(anyOf(
1059 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr),
1060 hasRHS(integerLiteral().bind(OffsetTag)))
1061 .bind(AddOpTag),
1062 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr),
1063 hasLHS(integerLiteral().bind(OffsetTag)))
1064 .bind(AddOpTag)));
1065 return isInUnspecifiedLvalueContext(unaryOperator(
1066 hasOperatorName("*"),
1067 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger)))
1068 .bind(DerefOpTag));
1069 // clang-format on
1072 virtual std::optional<FixItList> getFixits(const Strategy &s) const final;
1074 // TODO remove this method from FixableGadget interface
1075 virtual const Stmt *getBaseStmt() const final { return nullptr; }
1077 virtual DeclUseList getClaimedVarUseSites() const final {
1078 return {BaseDeclRefExpr};
1082 /// Scan the function and return a list of gadgets found with provided kits.
1083 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
1084 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
1085 bool EmitSuggestions) {
1087 struct GadgetFinderCallback : MatchFinder::MatchCallback {
1088 FixableGadgetList FixableGadgets;
1089 WarningGadgetList WarningGadgets;
1090 DeclUseTracker Tracker;
1092 void run(const MatchFinder::MatchResult &Result) override {
1093 // In debug mode, assert that we've found exactly one gadget.
1094 // This helps us avoid conflicts in .bind() tags.
1095 #if NDEBUG
1096 #define NEXT return
1097 #else
1098 [[maybe_unused]] int numFound = 0;
1099 #define NEXT ++numFound
1100 #endif
1102 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
1103 Tracker.discoverUse(DRE);
1104 NEXT;
1107 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
1108 Tracker.discoverDecl(DS);
1109 NEXT;
1112 // Figure out which matcher we've found, and call the appropriate
1113 // subclass constructor.
1114 // FIXME: Can we do this more logarithmically?
1115 #define FIXABLE_GADGET(name) \
1116 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1117 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1118 NEXT; \
1120 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1121 #define WARNING_GADGET(name) \
1122 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1123 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1124 NEXT; \
1126 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1128 assert(numFound >= 1 && "Gadgets not found in match result!");
1129 assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
1133 MatchFinder M;
1134 GadgetFinderCallback CB;
1136 // clang-format off
1137 M.addMatcher(
1138 stmt(
1139 forEachDescendantEvaluatedStmt(stmt(anyOf(
1140 // Add Gadget::matcher() for every gadget in the registry.
1141 #define WARNING_GADGET(x) \
1142 allOf(x ## Gadget::matcher().bind(#x), \
1143 notInSafeBufferOptOut(&Handler)),
1144 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1145 // Avoid a hanging comma.
1146 unless(stmt())
1151 // clang-format on
1153 if (EmitSuggestions) {
1154 // clang-format off
1155 M.addMatcher(
1156 stmt(
1157 forEachDescendantStmt(stmt(eachOf(
1158 #define FIXABLE_GADGET(x) \
1159 x ## Gadget::matcher().bind(#x),
1160 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1161 // In parallel, match all DeclRefExprs so that to find out
1162 // whether there are any uncovered by gadgets.
1163 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1164 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"),
1165 // Also match DeclStmts because we'll need them when fixing
1166 // their underlying VarDecls that otherwise don't have
1167 // any backreferences to DeclStmts.
1168 declStmt().bind("any_ds")
1173 // clang-format on
1176 M.match(*D->getBody(), D->getASTContext());
1177 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets),
1178 std::move(CB.Tracker)};
1181 // Compares AST nodes by source locations.
1182 template <typename NodeTy> struct CompareNode {
1183 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
1184 return N1->getBeginLoc().getRawEncoding() <
1185 N2->getBeginLoc().getRawEncoding();
1189 struct WarningGadgetSets {
1190 std::map<const VarDecl *, std::set<const WarningGadget *>,
1191 // To keep keys sorted by their locations in the map so that the
1192 // order is deterministic:
1193 CompareNode<VarDecl>>
1194 byVar;
1195 // These Gadgets are not related to pointer variables (e. g. temporaries).
1196 llvm::SmallVector<const WarningGadget *, 16> noVar;
1199 static WarningGadgetSets
1200 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
1201 WarningGadgetSets result;
1202 // If some gadgets cover more than one
1203 // variable, they'll appear more than once in the map.
1204 for (auto &G : AllUnsafeOperations) {
1205 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
1207 bool AssociatedWithVarDecl = false;
1208 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
1209 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1210 result.byVar[VD].insert(G.get());
1211 AssociatedWithVarDecl = true;
1215 if (!AssociatedWithVarDecl) {
1216 result.noVar.push_back(G.get());
1217 continue;
1220 return result;
1223 struct FixableGadgetSets {
1224 std::map<const VarDecl *, std::set<const FixableGadget *>,
1225 // To keep keys sorted by their locations in the map so that the
1226 // order is deterministic:
1227 CompareNode<VarDecl>>
1228 byVar;
1231 static FixableGadgetSets
1232 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
1233 FixableGadgetSets FixablesForUnsafeVars;
1234 for (auto &F : AllFixableOperations) {
1235 DeclUseList DREs = F->getClaimedVarUseSites();
1237 for (const DeclRefExpr *DRE : DREs) {
1238 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1239 FixablesForUnsafeVars.byVar[VD].insert(F.get());
1243 return FixablesForUnsafeVars;
1246 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
1247 const SourceManager &SM) {
1248 // A simple interval overlap detection algorithm. Sorts all ranges by their
1249 // begin location then finds the first overlap in one pass.
1250 std::vector<const FixItHint *> All; // a copy of `FixIts`
1252 for (const FixItHint &H : FixIts)
1253 All.push_back(&H);
1254 std::sort(All.begin(), All.end(),
1255 [&SM](const FixItHint *H1, const FixItHint *H2) {
1256 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(),
1257 H2->RemoveRange.getBegin());
1260 const FixItHint *CurrHint = nullptr;
1262 for (const FixItHint *Hint : All) {
1263 if (!CurrHint ||
1264 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(),
1265 Hint->RemoveRange.getBegin())) {
1266 // Either to initialize `CurrHint` or `CurrHint` does not
1267 // overlap with `Hint`:
1268 CurrHint = Hint;
1269 } else
1270 // In case `Hint` overlaps the `CurrHint`, we found at least one
1271 // conflict:
1272 return true;
1274 return false;
1277 std::optional<FixItList>
1278 PointerAssignmentGadget::getFixits(const Strategy &S) const {
1279 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
1280 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
1281 switch (S.lookup(LeftVD)) {
1282 case Strategy::Kind::Span:
1283 if (S.lookup(RightVD) == Strategy::Kind::Span)
1284 return FixItList{};
1285 return std::nullopt;
1286 case Strategy::Kind::Wontfix:
1287 return std::nullopt;
1288 case Strategy::Kind::Iterator:
1289 case Strategy::Kind::Array:
1290 case Strategy::Kind::Vector:
1291 llvm_unreachable("unsupported strategies for FixableGadgets");
1293 return std::nullopt;
1296 std::optional<FixItList>
1297 PointerInitGadget::getFixits(const Strategy &S) const {
1298 const auto *LeftVD = PtrInitLHS;
1299 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl());
1300 switch (S.lookup(LeftVD)) {
1301 case Strategy::Kind::Span:
1302 if (S.lookup(RightVD) == Strategy::Kind::Span)
1303 return FixItList{};
1304 return std::nullopt;
1305 case Strategy::Kind::Wontfix:
1306 return std::nullopt;
1307 case Strategy::Kind::Iterator:
1308 case Strategy::Kind::Array:
1309 case Strategy::Kind::Vector:
1310 llvm_unreachable("unsupported strategies for FixableGadgets");
1312 return std::nullopt;
1315 std::optional<FixItList>
1316 ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
1317 if (const auto *DRE =
1318 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
1319 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1320 switch (S.lookup(VD)) {
1321 case Strategy::Kind::Span: {
1322 // If the index has a negative constant value, we give up as no valid
1323 // fix-it can be generated:
1324 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
1325 VD->getASTContext();
1326 if (auto ConstVal = Node->getIdx()->getIntegerConstantExpr(Ctx)) {
1327 if (ConstVal->isNegative())
1328 return std::nullopt;
1329 } else if (!Node->getIdx()->getType()->isUnsignedIntegerType())
1330 return std::nullopt;
1331 // no-op is a good fix-it, otherwise
1332 return FixItList{};
1334 case Strategy::Kind::Wontfix:
1335 case Strategy::Kind::Iterator:
1336 case Strategy::Kind::Array:
1337 case Strategy::Kind::Vector:
1338 llvm_unreachable("unsupported strategies for FixableGadgets");
1341 return std::nullopt;
1344 static std::optional<FixItList> // forward declaration
1345 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
1347 std::optional<FixItList>
1348 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
1349 auto DREs = getClaimedVarUseSites();
1350 const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
1352 switch (S.lookup(VD)) {
1353 case Strategy::Kind::Span:
1354 return fixUPCAddressofArraySubscriptWithSpan(Node);
1355 case Strategy::Kind::Wontfix:
1356 case Strategy::Kind::Iterator:
1357 case Strategy::Kind::Array:
1358 case Strategy::Kind::Vector:
1359 llvm_unreachable("unsupported strategies for FixableGadgets");
1361 return std::nullopt; // something went wrong, no fix-it
1364 // FIXME: this function should be customizable through format
1365 static StringRef getEndOfLine() {
1366 static const char *const EOL = "\n";
1367 return EOL;
1370 // Returns the text indicating that the user needs to provide input there:
1371 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
1372 std::string s = std::string("<# ");
1373 s += HintTextToUser;
1374 s += " #>";
1375 return s;
1378 // Return the text representation of the given `APInt Val`:
1379 static std::string getAPIntText(APInt Val) {
1380 SmallVector<char> Txt;
1381 Val.toString(Txt, 10, true);
1382 // APInt::toString does not add '\0' to the end of the string for us:
1383 Txt.push_back('\0');
1384 return Txt.data();
1387 // Return the source location of the last character of the AST `Node`.
1388 template <typename NodeTy>
1389 static std::optional<SourceLocation>
1390 getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
1391 const LangOptions &LangOpts) {
1392 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1393 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1395 if (Loc.isValid())
1396 return Loc;
1398 return std::nullopt;
1401 // Return the source location just past the last character of the AST `Node`.
1402 template <typename NodeTy>
1403 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
1404 const SourceManager &SM,
1405 const LangOptions &LangOpts) {
1406 SourceLocation Loc =
1407 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1409 if (Loc.isValid())
1410 return Loc;
1412 return std::nullopt;
1415 // Return text representation of an `Expr`.
1416 static std::optional<StringRef> getExprText(const Expr *E,
1417 const SourceManager &SM,
1418 const LangOptions &LangOpts) {
1419 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts);
1421 if (LastCharLoc)
1422 return Lexer::getSourceText(
1423 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM,
1424 LangOpts);
1426 return std::nullopt;
1429 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
1430 static std::optional<StringRef> getRangeText(SourceRange SR,
1431 const SourceManager &SM,
1432 const LangOptions &LangOpts) {
1433 bool Invalid = false;
1434 CharSourceRange CSR = CharSourceRange::getCharRange(SR);
1435 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid);
1437 if (!Invalid)
1438 return Text;
1439 return std::nullopt;
1442 // Returns the begin location of the identifier of the given variable
1443 // declaration.
1444 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) {
1445 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
1446 // returns the begin location of the identifier of the declaration:
1447 return VD->getLocation();
1450 // Returns the literal text of the identifier of the given variable declaration.
1451 static std::optional<StringRef>
1452 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM,
1453 const LangOptions &LangOpts) {
1454 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD);
1455 SourceLocation ParmIdentEndLoc =
1456 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts);
1458 if (ParmIdentEndLoc.isMacroID() &&
1459 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts))
1460 return std::nullopt;
1461 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts);
1464 // We cannot fix a variable declaration if it has some other specifiers than the
1465 // type specifier. Because the source ranges of those specifiers could overlap
1466 // with the source range that is being replaced using fix-its. Especially when
1467 // we often cannot obtain accurate source ranges of cv-qualified type
1468 // specifiers.
1469 // FIXME: also deal with type attributes
1470 static bool hasUnsupportedSpecifiers(const VarDecl *VD,
1471 const SourceManager &SM) {
1472 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
1473 // source range of `VD`:
1474 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool {
1475 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(),
1476 VD->getBeginLoc())) &&
1477 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(),
1478 At->getRange().getBegin()));
1480 return VD->isInlineSpecified() || VD->isConstexpr() ||
1481 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
1482 AttrRangeOverlapping;
1485 // Returns the `SourceRange` of `D`. The reason why this function exists is
1486 // that `D->getSourceRange()` may return a range where the end location is the
1487 // starting location of the last token. The end location of the source range
1488 // returned by this function is the last location of the last token.
1489 static SourceRange getSourceRangeToTokenEnd(const Decl *D,
1490 const SourceManager &SM,
1491 LangOptions LangOpts) {
1492 SourceLocation Begin = D->getBeginLoc();
1493 SourceLocation
1494 End = // `D->getEndLoc` should always return the starting location of the
1495 // last token, so we should get the end of the token
1496 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts);
1498 return SourceRange(Begin, End);
1501 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
1502 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
1503 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
1504 // :( ), `Qualifiers` of the pointee type is returned separately through the
1505 // output parameter `QualifiersToAppend`.
1506 static std::optional<std::string>
1507 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM,
1508 const LangOptions &LangOpts,
1509 std::optional<Qualifiers> *QualifiersToAppend) {
1510 QualType Ty = VD->getType();
1511 QualType PteTy;
1513 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() &&
1514 "Expecting a VarDecl of type of pointer to object type");
1515 PteTy = Ty->getPointeeType();
1517 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
1518 TypeLoc PteTyLoc;
1520 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
1521 // the `TypeLoc` of the pointee type:
1522 switch (TyLoc.getTypeLocClass()) {
1523 case TypeLoc::ConstantArray:
1524 case TypeLoc::IncompleteArray:
1525 case TypeLoc::VariableArray:
1526 case TypeLoc::DependentSizedArray:
1527 case TypeLoc::Decayed:
1528 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a "
1529 "pointer type unless it decays.");
1530 PteTyLoc = TyLoc.getNextTypeLoc();
1531 break;
1532 case TypeLoc::Pointer:
1533 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc();
1534 break;
1535 default:
1536 return std::nullopt;
1538 if (PteTyLoc.isNull())
1539 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
1540 // when the pointer type is `auto`.
1541 return std::nullopt;
1543 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD);
1545 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) {
1546 // We are expecting these locations to be valid. But in some cases, they are
1547 // not all valid. It is a Clang bug to me and we are not responsible for
1548 // fixing it. So we will just give up for now when it happens.
1549 return std::nullopt;
1552 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
1553 SourceLocation PteEndOfTokenLoc =
1554 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts);
1556 if (!PteEndOfTokenLoc.isValid())
1557 // Sometimes we cannot get the end location of the pointee type, e.g., when
1558 // there are macros involved.
1559 return std::nullopt;
1560 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) {
1561 // We only deal with the cases where the source text of the pointee type
1562 // appears on the left-hand side of the variable identifier completely,
1563 // including the following forms:
1564 // `T ident`,
1565 // `T ident[]`, where `T` is any type.
1566 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
1567 return std::nullopt;
1569 if (PteTy.hasQualifiers()) {
1570 // TypeLoc does not provide source ranges for qualifiers (it says it's
1571 // intentional but seems fishy to me), so we cannot get the full text
1572 // `PteTy` via source ranges.
1573 *QualifiersToAppend = PteTy.getQualifiers();
1575 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts)
1576 ->str();
1579 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
1580 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
1581 const SourceManager &SM,
1582 const LangOptions &LangOpts) {
1583 SourceLocation BeginLoc = FD->getQualifier()
1584 ? FD->getQualifierLoc().getBeginLoc()
1585 : FD->getNameInfo().getBeginLoc();
1586 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
1587 // last token:
1588 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
1589 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts);
1590 SourceRange NameRange{BeginLoc, EndLoc};
1592 return getRangeText(NameRange, SM, LangOpts);
1595 // Returns the text representing a `std::span` type where the element type is
1596 // represented by `EltTyText`.
1598 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers
1599 // explicitly if the element type needs to be qualified.
1600 static std::string
1601 getSpanTypeText(StringRef EltTyText,
1602 std::optional<Qualifiers> Quals = std::nullopt) {
1603 const char *const SpanOpen = "std::span<";
1605 if (Quals)
1606 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
1607 return SpanOpen + EltTyText.str() + '>';
1610 std::optional<FixItList>
1611 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const {
1612 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl());
1614 if (VD && s.lookup(VD) == Strategy::Kind::Span) {
1615 ASTContext &Ctx = VD->getASTContext();
1616 // std::span can't represent elements before its begin()
1617 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
1618 if (ConstVal->isNegative())
1619 return std::nullopt;
1621 // note that the expr may (oddly) has multiple layers of parens
1622 // example:
1623 // *((..(pointer + 123)..))
1624 // goal:
1625 // pointer[123]
1626 // Fix-It:
1627 // remove '*('
1628 // replace ' + ' with '['
1629 // replace ')' with ']'
1631 // example:
1632 // *((..(123 + pointer)..))
1633 // goal:
1634 // 123[pointer]
1635 // Fix-It:
1636 // remove '*('
1637 // replace ' + ' with '['
1638 // replace ')' with ']'
1640 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
1641 const SourceManager &SM = Ctx.getSourceManager();
1642 const LangOptions &LangOpts = Ctx.getLangOpts();
1643 CharSourceRange StarWithTrailWhitespace =
1644 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(),
1645 LHS->getBeginLoc());
1647 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts);
1648 if (!LHSLocation)
1649 return std::nullopt;
1651 CharSourceRange PlusWithSurroundingWhitespace =
1652 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
1654 std::optional<SourceLocation> AddOpLocation =
1655 getPastLoc(AddOp, SM, LangOpts);
1656 std::optional<SourceLocation> DerefOpLocation =
1657 getPastLoc(DerefOp, SM, LangOpts);
1659 if (!AddOpLocation || !DerefOpLocation)
1660 return std::nullopt;
1662 CharSourceRange ClosingParenWithPrecWhitespace =
1663 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation);
1665 return FixItList{
1666 {FixItHint::CreateRemoval(StarWithTrailWhitespace),
1667 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["),
1668 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}};
1670 return std::nullopt; // something wrong or unsupported, give up
1673 std::optional<FixItList>
1674 PointerDereferenceGadget::getFixits(const Strategy &S) const {
1675 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl());
1676 switch (S.lookup(VD)) {
1677 case Strategy::Kind::Span: {
1678 ASTContext &Ctx = VD->getASTContext();
1679 SourceManager &SM = Ctx.getSourceManager();
1680 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1681 // Deletes the *operand
1682 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
1683 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1));
1684 // Inserts the [0]
1685 if (auto LocPastOperand =
1686 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) {
1687 return FixItList{{FixItHint::CreateRemoval(derefRange),
1688 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}};
1690 break;
1692 case Strategy::Kind::Iterator:
1693 case Strategy::Kind::Array:
1694 case Strategy::Kind::Vector:
1695 llvm_unreachable("Strategy not implemented yet!");
1696 case Strategy::Kind::Wontfix:
1697 llvm_unreachable("Invalid strategy!");
1700 return std::nullopt;
1703 // Generates fix-its replacing an expression of the form UPC(DRE) with
1704 // `DRE.data()`
1705 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S)
1706 const {
1707 const auto VD = cast<VarDecl>(Node->getDecl());
1708 switch (S.lookup(VD)) {
1709 case Strategy::Kind::Span: {
1710 ASTContext &Ctx = VD->getASTContext();
1711 SourceManager &SM = Ctx.getSourceManager();
1712 // Inserts the .data() after the DRE
1713 std::optional<SourceLocation> EndOfOperand =
1714 getPastLoc(Node, SM, Ctx.getLangOpts());
1716 if (EndOfOperand)
1717 return FixItList{{FixItHint::CreateInsertion(
1718 *EndOfOperand, ".data()")}};
1719 // FIXME: Points inside a macro expansion.
1720 break;
1722 case Strategy::Kind::Wontfix:
1723 case Strategy::Kind::Iterator:
1724 case Strategy::Kind::Array:
1725 case Strategy::Kind::Vector:
1726 llvm_unreachable("unsupported strategies for FixableGadgets");
1729 return std::nullopt;
1732 // Generates fix-its replacing an expression of the form `&DRE[e]` with
1733 // `&DRE.data()[e]`:
1734 static std::optional<FixItList>
1735 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
1736 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
1737 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
1738 // FIXME: this `getASTContext` call is costly, we should pass the
1739 // ASTContext in:
1740 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
1741 const Expr *Idx = ArraySub->getIdx();
1742 const SourceManager &SM = Ctx.getSourceManager();
1743 const LangOptions &LangOpts = Ctx.getLangOpts();
1744 std::stringstream SS;
1745 bool IdxIsLitZero = false;
1747 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
1748 if ((*ICE).isZero())
1749 IdxIsLitZero = true;
1750 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
1751 if (!DreString)
1752 return std::nullopt;
1754 if (IdxIsLitZero) {
1755 // If the index is literal zero, we produce the most concise fix-it:
1756 SS << (*DreString).str() << ".data()";
1757 } else {
1758 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts);
1759 if (!IndexString)
1760 return std::nullopt;
1762 SS << "&" << (*DreString).str() << ".data()"
1763 << "[" << (*IndexString).str() << "]";
1765 return FixItList{
1766 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
1770 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const {
1771 DeclUseList DREs = getClaimedVarUseSites();
1773 if (DREs.size() != 1)
1774 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1775 // give up
1776 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1777 if (S.lookup(VD) == Strategy::Kind::Span) {
1778 FixItList Fixes;
1779 std::stringstream SS;
1780 const Stmt *PreIncNode = getBaseStmt();
1781 StringRef varName = VD->getName();
1782 const ASTContext &Ctx = VD->getASTContext();
1784 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1785 SS << "(" << varName.data() << " = " << varName.data()
1786 << ".subspan(1)).data()";
1787 std::optional<SourceLocation> PreIncLocation =
1788 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1789 if (!PreIncLocation)
1790 return std::nullopt;
1792 Fixes.push_back(FixItHint::CreateReplacement(
1793 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str()));
1794 return Fixes;
1797 return std::nullopt; // Not in the cases that we can handle for now, give up.
1801 // For a non-null initializer `Init` of `T *` type, this function returns
1802 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
1803 // to output stream.
1804 // In many cases, this function cannot figure out the actual extent `S`. It
1805 // then will use a place holder to replace `S` to ask users to fill `S` in. The
1806 // initializer shall be used to initialize a variable of type `std::span<T>`.
1808 // FIXME: Support multi-level pointers
1810 // Parameters:
1811 // `Init` a pointer to the initializer expression
1812 // `Ctx` a reference to the ASTContext
1813 static FixItList
1814 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
1815 const StringRef UserFillPlaceHolder) {
1816 const SourceManager &SM = Ctx.getSourceManager();
1817 const LangOptions &LangOpts = Ctx.getLangOpts();
1819 // If `Init` has a constant value that is (or equivalent to) a
1820 // NULL pointer, we use the default constructor to initialize the span
1821 // object, i.e., a `std:span` variable declaration with no initializer.
1822 // So the fix-it is just to remove the initializer.
1823 if (Init->isNullPointerConstant(Ctx,
1824 // FIXME: Why does this function not ask for `const ASTContext
1825 // &`? It should. Maybe worth an NFC patch later.
1826 Expr::NullPointerConstantValueDependence::
1827 NPC_ValueDependentIsNotNull)) {
1828 std::optional<SourceLocation> InitLocation =
1829 getEndCharLoc(Init, SM, LangOpts);
1830 if (!InitLocation)
1831 return {};
1833 SourceRange SR(Init->getBeginLoc(), *InitLocation);
1835 return {FixItHint::CreateRemoval(SR)};
1838 FixItList FixIts{};
1839 std::string ExtentText = UserFillPlaceHolder.data();
1840 StringRef One = "1";
1842 // Insert `{` before `Init`:
1843 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
1844 // Try to get the data extent. Break into different cases:
1845 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
1846 // In cases `Init` is `new T[n]` and there is no explicit cast over
1847 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1848 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
1849 // simpler for the case where `Init` is `new T`.
1850 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
1851 if (!Ext->HasSideEffects(Ctx)) {
1852 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts);
1853 if (!ExtentString)
1854 return {};
1855 ExtentText = *ExtentString;
1857 } else if (!CxxNew->isArray())
1858 // Although the initializer is not allocating a buffer, the pointer
1859 // variable could still be used in buffer access operations.
1860 ExtentText = One;
1861 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType(
1862 Init->IgnoreImpCasts()->getType())) {
1863 // In cases `Init` is of an array type after stripping off implicit casts,
1864 // the extent is the array size. Note that if the array size is not a
1865 // constant, we cannot use it as the extent.
1866 ExtentText = getAPIntText(CArrTy->getSize());
1867 } else {
1868 // In cases `Init` is of the form `&Var` after stripping of implicit
1869 // casts, where `&` is the built-in operator, the extent is 1.
1870 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
1871 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
1872 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
1873 ExtentText = One;
1874 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1875 // and explicit casting, etc. etc.
1878 SmallString<32> StrBuffer{};
1879 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts);
1881 if (!LocPassInit)
1882 return {};
1884 StrBuffer.append(", ");
1885 StrBuffer.append(ExtentText);
1886 StrBuffer.append("}");
1887 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str()));
1888 return FixIts;
1891 #ifndef NDEBUG
1892 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \
1893 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
1894 #else
1895 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
1896 #endif
1898 // For the given variable declaration with a pointer-to-T type, returns the text
1899 // `std::span<T>`. If it is unable to generate the text, returns
1900 // `std::nullopt`.
1901 static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD,
1902 const ASTContext &Ctx) {
1903 assert(VD->getType()->isPointerType());
1905 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
1906 std::optional<std::string> PteTyText = getPointeeTypeText(
1907 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
1909 if (!PteTyText)
1910 return std::nullopt;
1912 std::string SpanTyText = "std::span<";
1914 SpanTyText.append(*PteTyText);
1915 // Append qualifiers to span element type if any:
1916 if (PteTyQualifiers) {
1917 SpanTyText.append(" ");
1918 SpanTyText.append(PteTyQualifiers->getAsString());
1920 SpanTyText.append(">");
1921 return SpanTyText;
1924 // For a `VarDecl` of the form `T * var (= Init)?`, this
1925 // function generates fix-its that
1926 // 1) replace `T * var` with `std::span<T> var`; and
1927 // 2) change `Init` accordingly to a span constructor, if it exists.
1929 // FIXME: support Multi-level pointers
1931 // Parameters:
1932 // `D` a pointer the variable declaration node
1933 // `Ctx` a reference to the ASTContext
1934 // `UserFillPlaceHolder` the user-input placeholder text
1935 // Returns:
1936 // the non-empty fix-it list, if fix-its are successfuly generated; empty
1937 // list otherwise.
1938 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
1939 const StringRef UserFillPlaceHolder,
1940 UnsafeBufferUsageHandler &Handler) {
1941 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager()))
1942 return {};
1944 FixItList FixIts{};
1945 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx);
1947 if (!SpanTyText) {
1948 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
1949 return {};
1952 // Will hold the text for `std::span<T> Ident`:
1953 std::stringstream SS;
1955 SS << *SpanTyText;
1956 // Append qualifiers to the type of `D`, if any:
1957 if (D->getType().hasQualifiers())
1958 SS << " " << D->getType().getQualifiers().getAsString();
1960 // The end of the range of the original source that will be replaced
1961 // by `std::span<T> ident`:
1962 SourceLocation EndLocForReplacement = D->getEndLoc();
1963 std::optional<StringRef> IdentText =
1964 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts());
1966 if (!IdentText) {
1967 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
1968 return {};
1970 // Fix the initializer if it exists:
1971 if (const Expr *Init = D->getInit()) {
1972 FixItList InitFixIts =
1973 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
1974 if (InitFixIts.empty())
1975 return {};
1976 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
1977 std::make_move_iterator(InitFixIts.end()));
1978 // If the declaration has the form `T *ident = init`, we want to replace
1979 // `T *ident = ` with `std::span<T> ident`:
1980 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1);
1982 SS << " " << IdentText->str();
1983 if (!EndLocForReplacement.isValid()) {
1984 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
1985 return {};
1987 FixIts.push_back(FixItHint::CreateReplacement(
1988 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str()));
1989 return FixIts;
1992 static bool hasConflictingOverload(const FunctionDecl *FD) {
1993 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult();
1996 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
1997 // types, this function produces fix-its to make the change self-contained. Let
1998 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
1999 // entity defined by the `FunctionDecl` after the change to the parameters.
2000 // Fix-its produced by this function are
2001 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2002 // of 'F';
2003 // 2. Create a declaration of "NewF" next to each declaration of `F`;
2004 // 3. Create a definition of "F" (as its' original definition is now belongs
2005 // to "NewF") next to its original definition. The body of the creating
2006 // definition calls to "NewF".
2008 // Example:
2010 // void f(int *p); // original declaration
2011 // void f(int *p) { // original definition
2012 // p[5];
2013 // }
2015 // To change the parameter `p` to be of `std::span<int>` type, we
2016 // also add overloads:
2018 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
2019 // void f(std::span<int> p); // added overload decl
2020 // void f(std::span<int> p) { // original def where param is changed
2021 // p[5];
2022 // }
2023 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
2024 // return f(std::span(p, <# size #>));
2025 // }
2027 static std::optional<FixItList>
2028 createOverloadsForFixedParams(const Strategy &S, const FunctionDecl *FD,
2029 const ASTContext &Ctx,
2030 UnsafeBufferUsageHandler &Handler) {
2031 // FIXME: need to make this conflict checking better:
2032 if (hasConflictingOverload(FD))
2033 return std::nullopt;
2035 const SourceManager &SM = Ctx.getSourceManager();
2036 const LangOptions &LangOpts = Ctx.getLangOpts();
2037 const unsigned NumParms = FD->getNumParams();
2038 std::vector<std::string> NewTysTexts(NumParms);
2039 std::vector<bool> ParmsMask(NumParms, false);
2040 bool AtLeastOneParmToFix = false;
2042 for (unsigned i = 0; i < NumParms; i++) {
2043 const ParmVarDecl *PVD = FD->getParamDecl(i);
2045 if (S.lookup(PVD) == Strategy::Kind::Wontfix)
2046 continue;
2047 if (S.lookup(PVD) != Strategy::Kind::Span)
2048 // Not supported, not suppose to happen:
2049 return std::nullopt;
2051 std::optional<Qualifiers> PteTyQuals = std::nullopt;
2052 std::optional<std::string> PteTyText =
2053 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals);
2055 if (!PteTyText)
2056 // something wrong in obtaining the text of the pointee type, give up
2057 return std::nullopt;
2058 // FIXME: whether we should create std::span type depends on the Strategy.
2059 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals);
2060 ParmsMask[i] = true;
2061 AtLeastOneParmToFix = true;
2063 if (!AtLeastOneParmToFix)
2064 // No need to create function overloads:
2065 return {};
2066 // FIXME Respect indentation of the original code.
2068 // A lambda that creates the text representation of a function declaration
2069 // with the new type signatures:
2070 const auto NewOverloadSignatureCreator =
2071 [&SM, &LangOpts, &NewTysTexts,
2072 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2073 std::stringstream SS;
2075 SS << ";";
2076 SS << getEndOfLine().str();
2077 // Append: ret-type func-name "("
2078 if (auto Prefix = getRangeText(
2079 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
2080 SM, LangOpts))
2081 SS << Prefix->str();
2082 else
2083 return std::nullopt; // give up
2084 // Append: parameter-type-list
2085 const unsigned NumParms = FD->getNumParams();
2087 for (unsigned i = 0; i < NumParms; i++) {
2088 const ParmVarDecl *Parm = FD->getParamDecl(i);
2090 if (Parm->isImplicit())
2091 continue;
2092 if (ParmsMask[i]) {
2093 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
2094 // new type:
2095 SS << NewTysTexts[i];
2096 // print parameter name if provided:
2097 if (IdentifierInfo *II = Parm->getIdentifier())
2098 SS << ' ' << II->getName().str();
2099 } else if (auto ParmTypeText = getRangeText(
2100 getSourceRangeToTokenEnd(Parm, SM, LangOpts),
2101 SM, LangOpts)) {
2102 // print the whole `Parm` without modification:
2103 SS << ParmTypeText->str();
2104 } else
2105 return std::nullopt; // something wrong, give up
2106 if (i != NumParms - 1)
2107 SS << ", ";
2109 SS << ")";
2110 return SS.str();
2113 // A lambda that creates the text representation of a function definition with
2114 // the original signature:
2115 const auto OldOverloadDefCreator =
2116 [&Handler, &SM, &LangOpts, &NewTysTexts,
2117 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2118 std::stringstream SS;
2120 SS << getEndOfLine().str();
2121 // Append: attr-name ret-type func-name "(" param-list ")" "{"
2122 if (auto FDPrefix = getRangeText(
2123 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
2124 LangOpts))
2125 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ")
2126 << FDPrefix->str() << "{";
2127 else
2128 return std::nullopt;
2129 // Append: "return" func-name "("
2130 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
2131 SS << "return " << FunQualName->str() << "(";
2132 else
2133 return std::nullopt;
2135 // Append: arg-list
2136 const unsigned NumParms = FD->getNumParams();
2137 for (unsigned i = 0; i < NumParms; i++) {
2138 const ParmVarDecl *Parm = FD->getParamDecl(i);
2140 if (Parm->isImplicit())
2141 continue;
2142 // FIXME: If a parameter has no name, it is unused in the
2143 // definition. So we could just leave it as it is.
2144 if (!Parm->getIdentifier())
2145 // If a parameter of a function definition has no name:
2146 return std::nullopt;
2147 if (ParmsMask[i])
2148 // This is our spanified paramter!
2149 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
2150 << ", " << getUserFillPlaceHolder("size") << ")";
2151 else
2152 SS << Parm->getIdentifier()->getName().str();
2153 if (i != NumParms - 1)
2154 SS << ", ";
2156 // finish call and the body
2157 SS << ");}" << getEndOfLine().str();
2158 // FIXME: 80-char line formatting?
2159 return SS.str();
2162 FixItList FixIts{};
2163 for (FunctionDecl *FReDecl : FD->redecls()) {
2164 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts);
2166 if (!Loc)
2167 return {};
2168 if (FReDecl->isThisDeclarationADefinition()) {
2169 assert(FReDecl == FD && "inconsistent function definition");
2170 // Inserts a definition with the old signature to the end of
2171 // `FReDecl`:
2172 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
2173 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef));
2174 else
2175 return {}; // give up
2176 } else {
2177 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
2178 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
2179 FixIts.emplace_back(FixItHint::CreateInsertion(
2180 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt(
2181 FReDecl->getBeginLoc(), " ")));
2183 // Inserts a declaration with the new signature to the end of `FReDecl`:
2184 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
2185 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl));
2186 else
2187 return {};
2190 return FixIts;
2193 // To fix a `ParmVarDecl` to be of `std::span` type.
2194 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
2195 UnsafeBufferUsageHandler &Handler) {
2196 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) {
2197 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
2198 return {};
2200 if (PVD->hasDefaultArg()) {
2201 // FIXME: generate fix-its for default values:
2202 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
2203 return {};
2206 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
2207 std::optional<std::string> PteTyText = getPointeeTypeText(
2208 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
2210 if (!PteTyText) {
2211 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
2212 return {};
2215 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
2217 if (!PVDNameText) {
2218 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
2219 return {};
2222 std::stringstream SS;
2223 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx);
2225 if (PteTyQualifiers)
2226 // Append qualifiers if they exist:
2227 SS << getSpanTypeText(*PteTyText, PteTyQualifiers);
2228 else
2229 SS << getSpanTypeText(*PteTyText);
2230 // Append qualifiers to the type of the parameter:
2231 if (PVD->getType().hasQualifiers())
2232 SS << ' ' << PVD->getType().getQualifiers().getAsString();
2233 // Append parameter's name:
2234 SS << ' ' << PVDNameText->str();
2235 // Add replacement fix-it:
2236 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())};
2239 static FixItList fixVariableWithSpan(const VarDecl *VD,
2240 const DeclUseTracker &Tracker,
2241 ASTContext &Ctx,
2242 UnsafeBufferUsageHandler &Handler) {
2243 const DeclStmt *DS = Tracker.lookupDecl(VD);
2244 if (!DS) {
2245 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet");
2246 return {};
2248 if (!DS->isSingleDecl()) {
2249 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2250 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
2251 return {};
2253 // Currently DS is an unused variable but we'll need it when
2254 // non-single decls are implemented, where the pointee type name
2255 // and the '*' are spread around the place.
2256 (void)DS;
2258 // FIXME: handle cases where DS has multiple declarations
2259 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
2262 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due
2263 // to any unexpected problem.
2264 static FixItList
2265 fixVariable(const VarDecl *VD, Strategy::Kind K,
2266 /* The function decl under analysis */ const Decl *D,
2267 const DeclUseTracker &Tracker, ASTContext &Ctx,
2268 UnsafeBufferUsageHandler &Handler) {
2269 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) {
2270 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext());
2271 if (!FD || FD != D) {
2272 // `FD != D` means that `PVD` belongs to a function that is not being
2273 // analyzed currently. Thus `FD` may not be complete.
2274 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
2275 return {};
2278 // TODO If function has a try block we can't change params unless we check
2279 // also its catch block for their use.
2280 // FIXME We might support static class methods, some select methods,
2281 // operators and possibly lamdas.
2282 if (FD->isMain() || FD->isConstexpr() ||
2283 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
2284 FD->isVariadic() ||
2285 // also covers call-operator of lamdas
2286 isa<CXXMethodDecl>(FD) ||
2287 // skip when the function body is a try-block
2288 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) ||
2289 FD->isOverloadedOperator()) {
2290 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
2291 return {}; // TODO test all these cases
2295 switch (K) {
2296 case Strategy::Kind::Span: {
2297 if (VD->getType()->isPointerType()) {
2298 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD))
2299 return fixParamWithSpan(PVD, Ctx, Handler);
2301 if (VD->isLocalVarDecl())
2302 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
2304 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
2305 return {};
2307 case Strategy::Kind::Iterator:
2308 case Strategy::Kind::Array:
2309 case Strategy::Kind::Vector:
2310 llvm_unreachable("Strategy not implemented yet!");
2311 case Strategy::Kind::Wontfix:
2312 llvm_unreachable("Invalid strategy!");
2314 llvm_unreachable("Unknown strategy!");
2317 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
2318 // `RemoveRange` of 'h' overlaps with a macro use.
2319 static bool overlapWithMacro(const FixItList &FixIts) {
2320 // FIXME: For now we only check if the range (or the first token) is (part of)
2321 // a macro expansion. Ideally, we want to check for all tokens in the range.
2322 return llvm::any_of(FixIts, [](const FixItHint &Hint) {
2323 auto Range = Hint.RemoveRange;
2324 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
2325 // If the range (or the first token) is (part of) a macro expansion:
2326 return true;
2327 return false;
2331 // Returns true iff `VD` is a parameter of the declaration `D`:
2332 static bool isParameterOf(const VarDecl *VD, const Decl *D) {
2333 return isa<ParmVarDecl>(VD) &&
2334 VD->getDeclContext() == dyn_cast<DeclContext>(D);
2337 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable
2338 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
2339 // contain `v`.
2340 static void eraseVarsForUnfixableGroupMates(
2341 std::map<const VarDecl *, FixItList> &FixItsForVariable,
2342 const VariableGroupsManager &VarGrpMgr) {
2343 // Variables will be removed from `FixItsForVariable`:
2344 SmallVector<const VarDecl *, 8> ToErase;
2346 for (const auto &[VD, Ignore] : FixItsForVariable) {
2347 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD);
2348 if (llvm::any_of(Grp,
2349 [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
2350 return !FixItsForVariable.count(GrpMember);
2351 })) {
2352 // At least one group member cannot be fixed, so we have to erase the
2353 // whole group:
2354 for (const VarDecl *Member : Grp)
2355 ToErase.push_back(Member);
2358 for (auto *VarToErase : ToErase)
2359 FixItsForVariable.erase(VarToErase);
2362 // Returns the fix-its that create bounds-safe function overloads for the
2363 // function `D`, if `D`'s parameters will be changed to safe-types through
2364 // fix-its in `FixItsForVariable`.
2366 // NOTE: In case `D`'s parameters will be changed but bounds-safe function
2367 // overloads cannot created, the whole group that contains the parameters will
2368 // be erased from `FixItsForVariable`.
2369 static FixItList createFunctionOverloadsForParms(
2370 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
2371 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
2372 const Strategy &S, ASTContext &Ctx, UnsafeBufferUsageHandler &Handler) {
2373 FixItList FixItsSharedByParms{};
2375 std::optional<FixItList> OverloadFixes =
2376 createOverloadsForFixedParams(S, FD, Ctx, Handler);
2378 if (OverloadFixes) {
2379 FixItsSharedByParms.append(*OverloadFixes);
2380 } else {
2381 // Something wrong in generating `OverloadFixes`, need to remove the
2382 // whole group, where parameters are in, from `FixItsForVariable` (Note
2383 // that all parameters should be in the same group):
2384 for (auto *Member : VarGrpMgr.getGroupOfParms())
2385 FixItsForVariable.erase(Member);
2387 return FixItsSharedByParms;
2390 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
2391 static std::map<const VarDecl *, FixItList>
2392 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S,
2393 ASTContext &Ctx,
2394 /* The function decl under analysis */ const Decl *D,
2395 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
2396 const VariableGroupsManager &VarGrpMgr) {
2397 // `FixItsForVariable` will map each variable to a set of fix-its directly
2398 // associated to the variable itself. Fix-its of distinct variables in
2399 // `FixItsForVariable` are disjoint.
2400 std::map<const VarDecl *, FixItList> FixItsForVariable;
2402 // Populate `FixItsForVariable` with fix-its directly associated with each
2403 // variable. Fix-its directly associated to a variable 'v' are the ones
2404 // produced by the `FixableGadget`s whose claimed variable is 'v'.
2405 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
2406 FixItsForVariable[VD] =
2407 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler);
2408 // If we fail to produce Fix-It for the declaration we have to skip the
2409 // variable entirely.
2410 if (FixItsForVariable[VD].empty()) {
2411 FixItsForVariable.erase(VD);
2412 continue;
2414 for (const auto &F : Fixables) {
2415 std::optional<FixItList> Fixits = F->getFixits(S);
2417 if (Fixits) {
2418 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(),
2419 Fixits->begin(), Fixits->end());
2420 continue;
2422 #ifndef NDEBUG
2423 Handler.addDebugNoteForVar(
2424 VD, F->getBaseStmt()->getBeginLoc(),
2425 ("gadget '" + F->getDebugName() + "' refused to produce a fix")
2426 .str());
2427 #endif
2428 FixItsForVariable.erase(VD);
2429 break;
2433 // `FixItsForVariable` now contains only variables that can be
2434 // fixed. A variable can be fixed if its' declaration and all Fixables
2435 // associated to it can all be fixed.
2437 // To further remove from `FixItsForVariable` variables whose group mates
2438 // cannot be fixed...
2439 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
2440 // Now `FixItsForVariable` gets further reduced: a variable is in
2441 // `FixItsForVariable` iff it can be fixed and all its group mates can be
2442 // fixed.
2444 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
2445 // That is, when fixing multiple parameters in one step, these fix-its will
2446 // be applied only once (instead of being applied per parameter).
2447 FixItList FixItsSharedByParms{};
2449 if (auto *FD = dyn_cast<FunctionDecl>(D))
2450 FixItsSharedByParms = createFunctionOverloadsForParms(
2451 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
2453 // The map that maps each variable `v` to fix-its for the whole group where
2454 // `v` is in:
2455 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
2456 FixItsForVariable};
2458 for (auto &[Var, Ignore] : FixItsForVariable) {
2459 bool AnyParm = false;
2460 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm);
2462 for (const VarDecl *GrpMate : VarGroupForVD) {
2463 if (Var == GrpMate)
2464 continue;
2465 if (FixItsForVariable.count(GrpMate))
2466 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]);
2468 if (AnyParm) {
2469 // This assertion should never fail. Otherwise we have a bug.
2470 assert(!FixItsSharedByParms.empty() &&
2471 "Should not try to fix a parameter that does not belong to a "
2472 "FunctionDecl");
2473 FinalFixItsForVariable[Var].append(FixItsSharedByParms);
2476 // Fix-its that will be applied in one step shall NOT:
2477 // 1. overlap with macros or/and templates; or
2478 // 2. conflict with each other.
2479 // Otherwise, the fix-its will be dropped.
2480 for (auto Iter = FinalFixItsForVariable.begin();
2481 Iter != FinalFixItsForVariable.end();)
2482 if (overlapWithMacro(Iter->second) ||
2483 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) {
2484 Iter = FinalFixItsForVariable.erase(Iter);
2485 } else
2486 Iter++;
2487 return FinalFixItsForVariable;
2490 template <typename VarDeclIterTy>
2491 static Strategy
2492 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
2493 Strategy S;
2494 for (const VarDecl *VD : UnsafeVars) {
2495 S.set(VD, Strategy::Kind::Span);
2497 return S;
2500 // Manages variable groups:
2501 class VariableGroupsManagerImpl : public VariableGroupsManager {
2502 const std::vector<VarGrpTy> Groups;
2503 const std::map<const VarDecl *, unsigned> &VarGrpMap;
2504 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
2506 public:
2507 VariableGroupsManagerImpl(
2508 const std::vector<VarGrpTy> &Groups,
2509 const std::map<const VarDecl *, unsigned> &VarGrpMap,
2510 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
2511 : Groups(Groups), VarGrpMap(VarGrpMap),
2512 GrpsUnionForParms(GrpsUnionForParms) {}
2514 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
2515 if (GrpsUnionForParms.contains(Var)) {
2516 if (HasParm)
2517 *HasParm = true;
2518 return GrpsUnionForParms.getArrayRef();
2520 if (HasParm)
2521 *HasParm = false;
2523 auto It = VarGrpMap.find(Var);
2525 if (It == VarGrpMap.end())
2526 return std::nullopt;
2527 return Groups[It->second];
2530 VarGrpRef getGroupOfParms() const override {
2531 return GrpsUnionForParms.getArrayRef();
2535 void clang::checkUnsafeBufferUsage(const Decl *D,
2536 UnsafeBufferUsageHandler &Handler,
2537 bool EmitSuggestions) {
2538 #ifndef NDEBUG
2539 Handler.clearDebugNotes();
2540 #endif
2542 assert(D && D->getBody());
2543 // We do not want to visit a Lambda expression defined inside a method independently.
2544 // Instead, it should be visited along with the outer method.
2545 // FIXME: do we want to do the same thing for `BlockDecl`s?
2546 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) {
2547 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass())
2548 return;
2551 // Do not emit fixit suggestions for functions declared in an
2552 // extern "C" block.
2553 if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
2554 for (FunctionDecl *FReDecl : FD->redecls()) {
2555 if (FReDecl->isExternC()) {
2556 EmitSuggestions = false;
2557 break;
2562 WarningGadgetSets UnsafeOps;
2563 FixableGadgetSets FixablesForAllVars;
2565 auto [FixableGadgets, WarningGadgets, Tracker] =
2566 findGadgets(D, Handler, EmitSuggestions);
2568 if (!EmitSuggestions) {
2569 // Our job is very easy without suggestions. Just warn about
2570 // every problematic operation and consider it done. No need to deal
2571 // with fixable gadgets, no need to group operations by variable.
2572 for (const auto &G : WarningGadgets) {
2573 Handler.handleUnsafeOperation(G->getBaseStmt(),
2574 /*IsRelatedToDecl=*/false);
2577 // This return guarantees that most of the machine doesn't run when
2578 // suggestions aren't requested.
2579 assert(FixableGadgets.size() == 0 &&
2580 "Fixable gadgets found but suggestions not requested!");
2581 return;
2584 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2585 // function under the analysis. No need to fix any Fixables.
2586 if (!WarningGadgets.empty()) {
2587 // Gadgets "claim" variables they're responsible for. Once this loop
2588 // finishes, the tracker will only track DREs that weren't claimed by any
2589 // gadgets, i.e. not understood by the analysis.
2590 for (const auto &G : FixableGadgets) {
2591 for (const auto *DRE : G->getClaimedVarUseSites()) {
2592 Tracker.claimUse(DRE);
2597 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2598 // function under the analysis. Thus, it early returns here as there is
2599 // nothing needs to be fixed.
2601 // Note this claim is based on the assumption that there is no unsafe
2602 // variable whose declaration is invisible from the analyzing function.
2603 // Otherwise, we need to consider if the uses of those unsafe varuables needs
2604 // fix.
2605 // So far, we are not fixing any global variables or class members. And,
2606 // lambdas will be analyzed along with the enclosing function. So this early
2607 // return is correct for now.
2608 if (WarningGadgets.empty())
2609 return;
2611 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets));
2612 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets));
2614 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
2616 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
2617 for (auto it = FixablesForAllVars.byVar.cbegin();
2618 it != FixablesForAllVars.byVar.cend();) {
2619 // FIXME: need to deal with global variables later
2620 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) {
2621 #ifndef NDEBUG
2622 Handler.addDebugNoteForVar(
2623 it->first, it->first->getBeginLoc(),
2624 ("failed to produce fixit for '" + it->first->getNameAsString() +
2625 "' : neither local nor a parameter"));
2626 #endif
2627 it = FixablesForAllVars.byVar.erase(it);
2628 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
2629 #ifndef NDEBUG
2630 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
2631 ("failed to produce fixit for '" +
2632 it->first->getNameAsString() +
2633 "' : has a reference type"));
2634 #endif
2635 it = FixablesForAllVars.byVar.erase(it);
2636 } else if (Tracker.hasUnclaimedUses(it->first)) {
2637 #ifndef NDEBUG
2638 auto AllUnclaimed = Tracker.getUnclaimedUses(it->first);
2639 for (auto UnclaimedDRE : AllUnclaimed) {
2640 std::string UnclaimedUseTrace =
2641 getDREAncestorString(UnclaimedDRE, D->getASTContext());
2643 Handler.addDebugNoteForVar(
2644 it->first, UnclaimedDRE->getBeginLoc(),
2645 ("failed to produce fixit for '" + it->first->getNameAsString() +
2646 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
2647 UnclaimedUseTrace));
2649 #endif
2650 it = FixablesForAllVars.byVar.erase(it);
2651 } else if (it->first->isInitCapture()) {
2652 #ifndef NDEBUG
2653 Handler.addDebugNoteForVar(
2654 it->first, it->first->getBeginLoc(),
2655 ("failed to produce fixit for '" + it->first->getNameAsString() +
2656 "' : init capture"));
2657 #endif
2658 it = FixablesForAllVars.byVar.erase(it);
2659 }else {
2660 ++it;
2664 // Fixpoint iteration for pointer assignments
2665 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
2666 DepMapTy DependenciesMap{};
2667 DepMapTy PtrAssignmentGraph{};
2669 for (auto it : FixablesForAllVars.byVar) {
2670 for (const FixableGadget *fixable : it.second) {
2671 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
2672 fixable->getStrategyImplications();
2673 if (ImplPair) {
2674 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
2675 PtrAssignmentGraph[Impl.first].insert(Impl.second);
2681 The following code does a BFS traversal of the `PtrAssignmentGraph`
2682 considering all unsafe vars as starting nodes and constructs an undirected
2683 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
2684 elimiates all variables that are unreachable from any unsafe var. In other
2685 words, this removes all dependencies that don't include any unsafe variable
2686 and consequently don't need any fixit generation.
2687 Note: A careful reader would observe that the code traverses
2688 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
2689 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
2690 achieve the same result but the one used here dramatically cuts the
2691 amount of hoops the second part of the algorithm needs to jump, given that
2692 a lot of these connections become "direct". The reader is advised not to
2693 imagine how the graph is transformed because of using `Var` instead of
2694 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
2695 and think about why it's equivalent later.
2697 std::set<const VarDecl *> VisitedVarsDirected{};
2698 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
2699 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) {
2701 std::queue<const VarDecl*> QueueDirected{};
2702 QueueDirected.push(Var);
2703 while(!QueueDirected.empty()) {
2704 const VarDecl* CurrentVar = QueueDirected.front();
2705 QueueDirected.pop();
2706 VisitedVarsDirected.insert(CurrentVar);
2707 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
2708 for (const VarDecl *Adj : AdjacentNodes) {
2709 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) {
2710 QueueDirected.push(Adj);
2712 DependenciesMap[Var].insert(Adj);
2713 DependenciesMap[Adj].insert(Var);
2719 // `Groups` stores the set of Connected Components in the graph.
2720 std::vector<VarGrpTy> Groups;
2721 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
2722 // variables belong to. Group indexes refer to the elements in `Groups`.
2723 // `VarGrpMap` is complete in that every variable that needs fix is in it.
2724 std::map<const VarDecl *, unsigned> VarGrpMap;
2725 // The union group over the ones in "Groups" that contain parameters of `D`:
2726 llvm::SetVector<const VarDecl *>
2727 GrpsUnionForParms; // these variables need to be fixed in one step
2729 // Group Connected Components for Unsafe Vars
2730 // (Dependencies based on pointer assignments)
2731 std::set<const VarDecl *> VisitedVars{};
2732 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
2733 if (VisitedVars.find(Var) == VisitedVars.end()) {
2734 VarGrpTy &VarGroup = Groups.emplace_back();
2735 std::queue<const VarDecl*> Queue{};
2737 Queue.push(Var);
2738 while(!Queue.empty()) {
2739 const VarDecl* CurrentVar = Queue.front();
2740 Queue.pop();
2741 VisitedVars.insert(CurrentVar);
2742 VarGroup.push_back(CurrentVar);
2743 auto AdjacentNodes = DependenciesMap[CurrentVar];
2744 for (const VarDecl *Adj : AdjacentNodes) {
2745 if (VisitedVars.find(Adj) == VisitedVars.end()) {
2746 Queue.push(Adj);
2751 bool HasParm = false;
2752 unsigned GrpIdx = Groups.size() - 1;
2754 for (const VarDecl *V : VarGroup) {
2755 VarGrpMap[V] = GrpIdx;
2756 if (!HasParm && isParameterOf(V, D))
2757 HasParm = true;
2759 if (HasParm)
2760 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end());
2764 // Remove a `FixableGadget` if the associated variable is not in the graph
2765 // computed above. We do not want to generate fix-its for such variables,
2766 // since they are neither warned nor reachable from a warned one.
2768 // Note a variable is not warned if it is not directly used in any unsafe
2769 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
2770 // does not exist another variable `u` such that `u` is warned and fixing `u`
2771 // (transitively) implicates fixing `v`.
2773 // For example,
2774 // ```
2775 // void f(int * p) {
2776 // int * a = p; *p = 0;
2777 // }
2778 // ```
2779 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
2780 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
2781 // the function above, `p` becomes reachable from a warned variable.
2782 for (auto I = FixablesForAllVars.byVar.begin();
2783 I != FixablesForAllVars.byVar.end();) {
2784 // Note `VisitedVars` contain all the variables in the graph:
2785 if (!VisitedVars.count((*I).first)) {
2786 // no such var in graph:
2787 I = FixablesForAllVars.byVar.erase(I);
2788 } else
2789 ++I;
2792 // We assign strategies to variables that are 1) in the graph and 2) can be
2793 // fixed. Other variables have the default "Won't fix" strategy.
2794 Strategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range(
2795 VisitedVars, [&FixablesForAllVars](const VarDecl *V) {
2796 // If a warned variable has no "Fixable", it is considered unfixable:
2797 return FixablesForAllVars.byVar.count(V);
2798 }));
2799 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
2801 if (isa<NamedDecl>(D))
2802 // The only case where `D` is not a `NamedDecl` is when `D` is a
2803 // `BlockDecl`. Let's not fix variables in blocks for now
2804 FixItsForVariableGroup =
2805 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D,
2806 Tracker, Handler, VarGrpMgr);
2808 for (const auto &G : UnsafeOps.noVar) {
2809 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false);
2812 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
2813 auto FixItsIt = FixItsForVariableGroup.find(VD);
2814 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr,
2815 FixItsIt != FixItsForVariableGroup.end()
2816 ? std::move(FixItsIt->second)
2817 : FixItList{},
2819 for (const auto &G : WarningGadgets) {
2820 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true);