[clang] Handle __declspec() attributes in using
[llvm-project.git] / clang / lib / Analysis / UnsafeBufferUsage.cpp
blob61c2c4e4b52ad52692e16674c96c180aa55af465
1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10 #include "clang/AST/RecursiveASTVisitor.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "clang/Lex/Lexer.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include <memory>
16 #include <optional>
18 using namespace llvm;
19 using namespace clang;
20 using namespace ast_matchers;
22 namespace clang::ast_matchers {
23 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
24 // except for those belonging to a different callable of "n".
25 class MatchDescendantVisitor
26 : public RecursiveASTVisitor<MatchDescendantVisitor> {
27 public:
28 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase;
30 // Creates an AST visitor that matches `Matcher` on all
31 // descendants of a given node "n" except for the ones
32 // belonging to a different callable of "n".
33 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
34 internal::ASTMatchFinder *Finder,
35 internal::BoundNodesTreeBuilder *Builder,
36 internal::ASTMatchFinder::BindKind Bind)
37 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
38 Matches(false) {}
40 // Returns true if a match is found in a subtree of `DynNode`, which belongs
41 // to the same callable of `DynNode`.
42 bool findMatch(const DynTypedNode &DynNode) {
43 Matches = false;
44 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
45 TraverseStmt(const_cast<Stmt *>(StmtNode));
46 *Builder = ResultBindings;
47 return Matches;
49 return false;
52 // The following are overriding methods from the base visitor class.
53 // They are public only to allow CRTP to work. They are *not *part
54 // of the public API of this class.
56 // For the matchers so far used in safe buffers, we only need to match
57 // `Stmt`s. To override more as needed.
59 bool TraverseDecl(Decl *Node) {
60 if (!Node)
61 return true;
62 if (!match(*Node))
63 return false;
64 // To skip callables:
65 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node))
66 return true;
67 // Traverse descendants
68 return VisitorBase::TraverseDecl(Node);
71 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
72 if (!Node)
73 return true;
74 if (!match(*Node))
75 return false;
76 // To skip callables:
77 if (isa<LambdaExpr>(Node))
78 return true;
79 return VisitorBase::TraverseStmt(Node);
82 bool shouldVisitTemplateInstantiations() const { return true; }
83 bool shouldVisitImplicitCode() const {
84 // TODO: let's ignore implicit code for now
85 return false;
88 private:
89 // Sets 'Matched' to true if 'Matcher' matches 'Node'
91 // Returns 'true' if traversal should continue after this function
92 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
93 template <typename T> bool match(const T &Node) {
94 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
96 if (Matcher->matches(DynTypedNode::create(Node), Finder,
97 &RecursiveBuilder)) {
98 ResultBindings.addMatch(RecursiveBuilder);
99 Matches = true;
100 if (Bind != internal::ASTMatchFinder::BK_All)
101 return false; // Abort as soon as a match is found.
103 return true;
106 const internal::DynTypedMatcher *const Matcher;
107 internal::ASTMatchFinder *const Finder;
108 internal::BoundNodesTreeBuilder *const Builder;
109 internal::BoundNodesTreeBuilder ResultBindings;
110 const internal::ASTMatchFinder::BindKind Bind;
111 bool Matches;
114 AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher<Stmt>, innerMatcher) {
115 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
117 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All);
118 return Visitor.findMatch(DynTypedNode::create(Node));
121 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
122 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, Handler) {
123 return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
126 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
127 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
130 // Returns a matcher that matches any expression 'e' such that `innerMatcher`
131 // matches 'e' and 'e' is in an Unspecified Lvalue Context.
132 static internal::Matcher<Stmt>
133 isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
134 return implicitCastExpr(hasCastKind(CastKind::CK_LValueToRValue),
135 castSubExpr(innerMatcher));
136 // FIXME: add assignmentTo context...
138 } // namespace clang::ast_matchers
140 namespace {
141 // Because the analysis revolves around variables and their types, we'll need to
142 // track uses of variables (aka DeclRefExprs).
143 using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
145 // Convenience typedef.
146 using FixItList = SmallVector<FixItHint, 4>;
148 // Defined below.
149 class Strategy;
150 } // namespace
152 // Because we're dealing with raw pointers, let's define what we mean by that.
153 static auto hasPointerType() {
154 return hasType(hasCanonicalType(pointerType()));
157 static auto hasArrayType() {
158 return hasType(hasCanonicalType(arrayType()));
161 namespace {
162 /// Gadget is an individual operation in the code that may be of interest to
163 /// this analysis. Each (non-abstract) subclass corresponds to a specific
164 /// rigid AST structure that constitutes an operation on a pointer-type object.
165 /// Discovery of a gadget in the code corresponds to claiming that we understand
166 /// what this part of code is doing well enough to potentially improve it.
167 /// Gadgets can be warning (immediately deserving a warning) or fixable (not
168 /// always deserving a warning per se, but requires our attention to identify
169 /// it warrants a fixit).
170 class Gadget {
171 public:
172 enum class Kind {
173 #define GADGET(x) x,
174 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
177 /// Common type of ASTMatchers used for discovering gadgets.
178 /// Useful for implementing the static matcher() methods
179 /// that are expected from all non-abstract subclasses.
180 using Matcher = decltype(stmt());
182 Gadget(Kind K) : K(K) {}
184 Kind getKind() const { return K; }
186 virtual bool isWarningGadget() const = 0;
187 virtual const Stmt *getBaseStmt() const = 0;
189 /// Returns the list of pointer-type variables on which this gadget performs
190 /// its operation. Typically, there's only one variable. This isn't a list
191 /// of all DeclRefExprs in the gadget's AST!
192 virtual DeclUseList getClaimedVarUseSites() const = 0;
194 virtual ~Gadget() = default;
196 private:
197 Kind K;
201 /// Warning gadgets correspond to unsafe code patterns that warrants
202 /// an immediate warning.
203 class WarningGadget : public Gadget {
204 public:
205 WarningGadget(Kind K) : Gadget(K) {}
207 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
208 bool isWarningGadget() const final { return true; }
211 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
212 /// properly recognized in order to emit fixes. For example, if a raw pointer-type
213 /// variable is replaced by a safe C++ container, every use of such variable must be
214 /// carefully considered and possibly updated.
215 class FixableGadget : public Gadget {
216 public:
217 FixableGadget(Kind K) : Gadget(K) {}
219 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
220 bool isWarningGadget() const final { return false; }
222 /// Returns a fixit that would fix the current gadget according to
223 /// the current strategy. Returns None if the fix cannot be produced;
224 /// returns an empty list if no fixes are necessary.
225 virtual std::optional<FixItList> getFixits(const Strategy &) const {
226 return std::nullopt;
230 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
231 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
233 /// An increment of a pointer-type value is unsafe as it may run the pointer
234 /// out of bounds.
235 class IncrementGadget : public WarningGadget {
236 static constexpr const char *const OpTag = "op";
237 const UnaryOperator *Op;
239 public:
240 IncrementGadget(const MatchFinder::MatchResult &Result)
241 : WarningGadget(Kind::Increment),
242 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
244 static bool classof(const Gadget *G) {
245 return G->getKind() == Kind::Increment;
248 static Matcher matcher() {
249 return stmt(unaryOperator(
250 hasOperatorName("++"),
251 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
252 ).bind(OpTag));
255 const UnaryOperator *getBaseStmt() const override { return Op; }
257 DeclUseList getClaimedVarUseSites() const override {
258 SmallVector<const DeclRefExpr *, 2> Uses;
259 if (const auto *DRE =
260 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
261 Uses.push_back(DRE);
264 return std::move(Uses);
268 /// A decrement of a pointer-type value is unsafe as it may run the pointer
269 /// out of bounds.
270 class DecrementGadget : public WarningGadget {
271 static constexpr const char *const OpTag = "op";
272 const UnaryOperator *Op;
274 public:
275 DecrementGadget(const MatchFinder::MatchResult &Result)
276 : WarningGadget(Kind::Decrement),
277 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
279 static bool classof(const Gadget *G) {
280 return G->getKind() == Kind::Decrement;
283 static Matcher matcher() {
284 return stmt(unaryOperator(
285 hasOperatorName("--"),
286 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
287 ).bind(OpTag));
290 const UnaryOperator *getBaseStmt() const override { return Op; }
292 DeclUseList getClaimedVarUseSites() const override {
293 if (const auto *DRE =
294 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
295 return {DRE};
298 return {};
302 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
303 /// it doesn't have any bounds checks for the array.
304 class ArraySubscriptGadget : public WarningGadget {
305 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
306 const ArraySubscriptExpr *ASE;
308 public:
309 ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
310 : WarningGadget(Kind::ArraySubscript),
311 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {}
313 static bool classof(const Gadget *G) {
314 return G->getKind() == Kind::ArraySubscript;
317 static Matcher matcher() {
318 // FIXME: What if the index is integer literal 0? Should this be
319 // a safe gadget in this case?
320 // clang-format off
321 return stmt(arraySubscriptExpr(
322 hasBase(ignoringParenImpCasts(
323 anyOf(hasPointerType(), hasArrayType()))),
324 unless(hasIndex(integerLiteral(equals(0)))))
325 .bind(ArraySubscrTag));
326 // clang-format on
329 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
331 DeclUseList getClaimedVarUseSites() const override {
332 if (const auto *DRE =
333 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
334 return {DRE};
337 return {};
341 /// A pointer arithmetic expression of one of the forms:
342 /// \code
343 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
344 /// \endcode
345 class PointerArithmeticGadget : public WarningGadget {
346 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
347 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
348 const BinaryOperator *PA; // pointer arithmetic expression
349 const Expr * Ptr; // the pointer expression in `PA`
351 public:
352 PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
353 : WarningGadget(Kind::PointerArithmetic),
354 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)),
355 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {}
357 static bool classof(const Gadget *G) {
358 return G->getKind() == Kind::PointerArithmetic;
361 static Matcher matcher() {
362 auto HasIntegerType = anyOf(
363 hasType(isInteger()), hasType(enumType()));
364 auto PtrAtRight = allOf(hasOperatorName("+"),
365 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
366 hasLHS(HasIntegerType));
367 auto PtrAtLeft = allOf(
368 anyOf(hasOperatorName("+"), hasOperatorName("-"),
369 hasOperatorName("+="), hasOperatorName("-=")),
370 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
371 hasRHS(HasIntegerType));
373 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)).bind(PointerArithmeticTag));
376 const Stmt *getBaseStmt() const override { return PA; }
378 DeclUseList getClaimedVarUseSites() const override {
379 if (const auto *DRE =
380 dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
381 return {DRE};
384 return {};
386 // FIXME: pointer adding zero should be fine
387 //FIXME: this gadge will need a fix-it
391 /// A call of a function or method that performs unchecked buffer operations
392 /// over one of its pointer parameters.
393 class UnsafeBufferUsageAttrGadget : public WarningGadget {
394 constexpr static const char *const OpTag = "call_expr";
395 const CallExpr *Op;
397 public:
398 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
399 : WarningGadget(Kind::UnsafeBufferUsageAttr),
400 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {}
402 static bool classof(const Gadget *G) {
403 return G->getKind() == Kind::UnsafeBufferUsageAttr;
406 static Matcher matcher() {
407 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
408 .bind(OpTag));
410 const Stmt *getBaseStmt() const override { return Op; }
412 DeclUseList getClaimedVarUseSites() const override {
413 return {};
418 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
419 // Context (see `isInUnspecifiedLvalueContext`).
420 // Note here `[]` is the built-in subscript operator.
421 class ULCArraySubscriptGadget : public FixableGadget {
422 private:
423 static constexpr const char *const ULCArraySubscriptTag = "ArraySubscriptUnderULC";
424 const ArraySubscriptExpr *Node;
426 public:
427 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
428 : FixableGadget(Kind::ULCArraySubscript),
429 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
430 assert(Node != nullptr && "Expecting a non-null matching result");
433 static bool classof(const Gadget *G) {
434 return G->getKind() == Kind::ULCArraySubscript;
437 static Matcher matcher() {
438 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
439 auto BaseIsArrayOrPtrDRE =
440 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr)));
441 auto Target =
442 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
444 return expr(isInUnspecifiedLvalueContext(Target));
447 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
449 virtual const Stmt *getBaseStmt() const override { return Node; }
451 virtual DeclUseList getClaimedVarUseSites() const override {
452 if (const auto *DRE = dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
453 return {DRE};
455 return {};
458 } // namespace
460 namespace {
461 // An auxiliary tracking facility for the fixit analysis. It helps connect
462 // declarations to its and make sure we've covered all uses with our analysis
463 // before we try to fix the declaration.
464 class DeclUseTracker {
465 using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
466 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
468 // Allocate on the heap for easier move.
469 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
470 DefMapTy Defs{};
472 public:
473 DeclUseTracker() = default;
474 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
475 DeclUseTracker(DeclUseTracker &&) = default;
476 DeclUseTracker &operator=(DeclUseTracker &&) = default;
478 // Start tracking a freshly discovered DRE.
479 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
481 // Stop tracking the DRE as it's been fully figured out.
482 void claimUse(const DeclRefExpr *DRE) {
483 assert(Uses->count(DRE) &&
484 "DRE not found or claimed by multiple matchers!");
485 Uses->erase(DRE);
488 // A variable is unclaimed if at least one use is unclaimed.
489 bool hasUnclaimedUses(const VarDecl *VD) const {
490 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
491 return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
492 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
496 void discoverDecl(const DeclStmt *DS) {
497 for (const Decl *D : DS->decls()) {
498 if (const auto *VD = dyn_cast<VarDecl>(D)) {
499 // FIXME: Assertion temporarily disabled due to a bug in
500 // ASTMatcher internal behavior in presence of GNU
501 // statement-expressions. We need to properly investigate this
502 // because it can screw up our algorithm in other ways.
503 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
504 Defs[VD] = DS;
509 const DeclStmt *lookupDecl(const VarDecl *VD) const {
510 auto It = Defs.find(VD);
511 assert(It != Defs.end() && "Definition never discovered!");
512 return It->second;
515 } // namespace
517 namespace {
518 // Strategy is a map from variables to the way we plan to emit fixes for
519 // these variables. It is figured out gradually by trying different fixes
520 // for different variables depending on gadgets in which these variables
521 // participate.
522 class Strategy {
523 public:
524 enum class Kind {
525 Wontfix, // We don't plan to emit a fixit for this variable.
526 Span, // We recommend replacing the variable with std::span.
527 Iterator, // We recommend replacing the variable with std::span::iterator.
528 Array, // We recommend replacing the variable with std::array.
529 Vector // We recommend replacing the variable with std::vector.
532 private:
533 using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
535 MapTy Map;
537 public:
538 Strategy() = default;
539 Strategy(const Strategy &) = delete; // Let's avoid copies.
540 Strategy(Strategy &&) = default;
542 void set(const VarDecl *VD, Kind K) {
543 Map[VD] = K;
546 Kind lookup(const VarDecl *VD) const {
547 auto I = Map.find(VD);
548 if (I == Map.end())
549 return Kind::Wontfix;
551 return I->second;
554 } // namespace
556 /// Scan the function and return a list of gadgets found with provided kits.
557 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
558 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler) {
560 struct GadgetFinderCallback : MatchFinder::MatchCallback {
561 FixableGadgetList FixableGadgets;
562 WarningGadgetList WarningGadgets;
563 DeclUseTracker Tracker;
565 void run(const MatchFinder::MatchResult &Result) override {
566 // In debug mode, assert that we've found exactly one gadget.
567 // This helps us avoid conflicts in .bind() tags.
568 #if NDEBUG
569 #define NEXT return
570 #else
571 [[maybe_unused]] int numFound = 0;
572 #define NEXT ++numFound
573 #endif
575 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
576 Tracker.discoverUse(DRE);
577 NEXT;
580 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
581 Tracker.discoverDecl(DS);
582 NEXT;
585 // Figure out which matcher we've found, and call the appropriate
586 // subclass constructor.
587 // FIXME: Can we do this more logarithmically?
588 #define FIXABLE_GADGET(name) \
589 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
590 FixableGadgets.push_back(std::make_unique<name ## Gadget>(Result)); \
591 NEXT; \
593 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
594 #define WARNING_GADGET(name) \
595 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
596 WarningGadgets.push_back(std::make_unique<name ## Gadget>(Result)); \
597 NEXT; \
599 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
601 assert(numFound >= 1 && "Gadgets not found in match result!");
602 assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
606 MatchFinder M;
607 GadgetFinderCallback CB;
609 // clang-format off
610 M.addMatcher(
611 stmt(forEveryDescendant(
612 eachOf(
613 // A `FixableGadget` matcher and a `WarningGadget` matcher should not disable
614 // each other (they could if they were put in the same `anyOf` group).
615 // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers
616 // match for the same node, so that we can group them
617 // in one `anyOf` group (for better performance via short-circuiting).
618 stmt(anyOf(
619 #define FIXABLE_GADGET(x) \
620 x ## Gadget::matcher().bind(#x),
621 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
622 // Also match DeclStmts because we'll need them when fixing
623 // their underlying VarDecls that otherwise don't have
624 // any backreferences to DeclStmts.
625 declStmt().bind("any_ds")
627 stmt(anyOf(
628 // Add Gadget::matcher() for every gadget in the registry.
629 #define WARNING_GADGET(x) \
630 allOf(x ## Gadget::matcher().bind(#x), notInSafeBufferOptOut(&Handler)),
631 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
632 // In parallel, match all DeclRefExprs so that to find out
633 // whether there are any uncovered by gadgets.
634 declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre")
639 // clang-format on
641 M.match(*D->getBody(), D->getASTContext());
643 // Gadgets "claim" variables they're responsible for. Once this loop finishes,
644 // the tracker will only track DREs that weren't claimed by any gadgets,
645 // i.e. not understood by the analysis.
646 for (const auto &G : CB.FixableGadgets) {
647 for (const auto *DRE : G->getClaimedVarUseSites()) {
648 CB.Tracker.claimUse(DRE);
652 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), std::move(CB.Tracker)};
655 struct WarningGadgetSets {
656 std::map<const VarDecl *, std::set<std::unique_ptr<WarningGadget>>> byVar;
657 // These Gadgets are not related to pointer variables (e. g. temporaries).
658 llvm::SmallVector<std::unique_ptr<WarningGadget>, 16> noVar;
661 static WarningGadgetSets
662 groupWarningGadgetsByVar(WarningGadgetList &&AllUnsafeOperations) {
663 WarningGadgetSets result;
664 // If some gadgets cover more than one
665 // variable, they'll appear more than once in the map.
666 for (auto &G : AllUnsafeOperations) {
667 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
669 bool AssociatedWithVarDecl = false;
670 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
671 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
672 result.byVar[VD].emplace(std::move(G));
673 AssociatedWithVarDecl = true;
677 if (!AssociatedWithVarDecl) {
678 result.noVar.emplace_back(std::move(G));
679 continue;
682 return result;
685 struct FixableGadgetSets {
686 std::map<const VarDecl *, std::set<std::unique_ptr<FixableGadget>>> byVar;
689 static FixableGadgetSets
690 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
691 FixableGadgetSets FixablesForUnsafeVars;
692 for (auto &F : AllFixableOperations) {
693 DeclUseList DREs = F->getClaimedVarUseSites();
695 for (const DeclRefExpr *DRE : DREs) {
696 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
697 FixablesForUnsafeVars.byVar[VD].emplace(std::move(F));
701 return FixablesForUnsafeVars;
704 bool clang::internal::anyConflict(
705 const SmallVectorImpl<FixItHint> &FixIts, const SourceManager &SM) {
706 // A simple interval overlap detection algorithm. Sorts all ranges by their
707 // begin location then finds the first overlap in one pass.
708 std::vector<const FixItHint *> All; // a copy of `FixIts`
710 for (const FixItHint &H : FixIts)
711 All.push_back(&H);
712 std::sort(All.begin(), All.end(),
713 [&SM](const FixItHint *H1, const FixItHint *H2) {
714 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(),
715 H2->RemoveRange.getBegin());
718 const FixItHint *CurrHint = nullptr;
720 for (const FixItHint *Hint : All) {
721 if (!CurrHint ||
722 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(),
723 Hint->RemoveRange.getBegin())) {
724 // Either to initialize `CurrHint` or `CurrHint` does not
725 // overlap with `Hint`:
726 CurrHint = Hint;
727 } else
728 // In case `Hint` overlaps the `CurrHint`, we found at least one
729 // conflict:
730 return true;
732 return false;
735 std::optional<FixItList>
736 ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
737 if (const auto *DRE = dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
738 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
739 switch (S.lookup(VD)) {
740 case Strategy::Kind::Span: {
741 // If the index has a negative constant value, we give up as no valid
742 // fix-it can be generated:
743 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
744 VD->getASTContext();
745 if (auto ConstVal = Node->getIdx()->getIntegerConstantExpr(Ctx)) {
746 if (ConstVal->isNegative())
747 return std::nullopt;
748 } else if (!Node->getIdx()->getType()->isUnsignedIntegerType())
749 return std::nullopt;
750 // no-op is a good fix-it, otherwise
751 return FixItList{};
753 case Strategy::Kind::Wontfix:
754 case Strategy::Kind::Iterator:
755 case Strategy::Kind::Array:
756 case Strategy::Kind::Vector:
757 llvm_unreachable("unsupported strategies for FixableGadgets");
760 return std::nullopt;
763 // Return the text representation of the given `APInt Val`:
764 static std::string getAPIntText(APInt Val) {
765 SmallVector<char> Txt;
766 Val.toString(Txt, 10, true);
767 // APInt::toString does not add '\0' to the end of the string for us:
768 Txt.push_back('\0');
769 return Txt.data();
772 // Return the source location of the last character of the AST `Node`.
773 template <typename NodeTy>
774 static SourceLocation getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
775 const LangOptions &LangOpts) {
776 return Lexer::getLocForEndOfToken(Node->getEndLoc(), 1, SM, LangOpts);
779 // Return the source location just past the last character of the AST `Node`.
780 template <typename NodeTy>
781 static SourceLocation getPastLoc(const NodeTy *Node, const SourceManager &SM,
782 const LangOptions &LangOpts) {
783 return Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
786 // Return text representation of an `Expr`.
787 static StringRef getExprText(const Expr *E, const SourceManager &SM,
788 const LangOptions &LangOpts) {
789 SourceLocation LastCharLoc = getPastLoc(E, SM, LangOpts);
791 return Lexer::getSourceText(
792 CharSourceRange::getCharRange(E->getBeginLoc(), LastCharLoc), SM,
793 LangOpts);
796 // For a non-null initializer `Init` of `T *` type, this function returns
797 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
798 // to output stream.
799 // In many cases, this function cannot figure out the actual extent `S`. It
800 // then will use a place holder to replace `S` to ask users to fill `S` in. The
801 // initializer shall be used to initialize a variable of type `std::span<T>`.
803 // FIXME: Support multi-level pointers
805 // Parameters:
806 // `Init` a pointer to the initializer expression
807 // `Ctx` a reference to the ASTContext
808 static FixItList
809 populateInitializerFixItWithSpan(const Expr *Init, const ASTContext &Ctx,
810 const StringRef UserFillPlaceHolder) {
811 FixItList FixIts{};
812 const SourceManager &SM = Ctx.getSourceManager();
813 const LangOptions &LangOpts = Ctx.getLangOpts();
814 std::string ExtentText = UserFillPlaceHolder.data();
815 StringRef One = "1";
817 // Insert `{` before `Init`:
818 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
819 // Try to get the data extent. Break into different cases:
820 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
821 // In cases `Init` is `new T[n]` and there is no explicit cast over
822 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
823 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
824 // simpler for the case where `Init` is `new T`.
825 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
826 if (!Ext->HasSideEffects(Ctx))
827 ExtentText = getExprText(Ext, SM, LangOpts);
828 } else if (!CxxNew->isArray())
829 // Although the initializer is not allocating a buffer, the pointer
830 // variable could still be used in buffer access operations.
831 ExtentText = One;
832 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType(
833 Init->IgnoreImpCasts()->getType())) {
834 // In cases `Init` is of an array type after stripping off implicit casts,
835 // the extent is the array size. Note that if the array size is not a
836 // constant, we cannot use it as the extent.
837 ExtentText = getAPIntText(CArrTy->getSize());
838 } else {
839 // In cases `Init` is of the form `&Var` after stripping of implicit
840 // casts, where `&` is the built-in operator, the extent is 1.
841 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
842 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
843 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
844 ExtentText = One;
845 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, and explicit casting, etc.
846 // etc.
849 SmallString<32> StrBuffer{};
850 SourceLocation LocPassInit = getPastLoc(Init, SM, LangOpts);
852 StrBuffer.append(", ");
853 StrBuffer.append(ExtentText);
854 StrBuffer.append("}");
855 FixIts.push_back(FixItHint::CreateInsertion(LocPassInit, StrBuffer.str()));
856 return FixIts;
859 // For a `VarDecl` of the form `T * var (= Init)?`, this
860 // function generates a fix-it for the declaration, which re-declares `var` to
861 // be of `span<T>` type and transforms the initializer, if present, to a span
862 // constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user
863 // to fill in.
865 // FIXME: support Multi-level pointers
867 // Parameters:
868 // `D` a pointer the variable declaration node
869 // `Ctx` a reference to the ASTContext
870 // Returns:
871 // the generated fix-it
872 static FixItList fixVarDeclWithSpan(const VarDecl *D, const ASTContext &Ctx,
873 const StringRef UserFillPlaceHolder) {
874 const QualType &SpanEltT = D->getType()->getPointeeType();
875 assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!");
877 FixItList FixIts{};
878 SourceLocation
879 ReplacementLastLoc; // the inclusive end location of the replacement
880 const SourceManager &SM = Ctx.getSourceManager();
882 if (const Expr *Init = D->getInit()) {
883 FixItList InitFixIts =
884 populateInitializerFixItWithSpan(Init, Ctx, UserFillPlaceHolder);
886 if (InitFixIts.empty())
887 return {}; // Something wrong with fixing initializer, give up
888 // The loc right before the initializer:
889 ReplacementLastLoc = Init->getBeginLoc().getLocWithOffset(-1);
890 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
891 std::make_move_iterator(InitFixIts.end()));
892 } else
893 ReplacementLastLoc = getEndCharLoc(D, SM, Ctx.getLangOpts());
895 // Producing fix-it for variable declaration---make `D` to be of span type:
896 SmallString<32> Replacement;
897 raw_svector_ostream OS(Replacement);
899 OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName();
900 FixIts.push_back(FixItHint::CreateReplacement(
901 SourceRange{D->getBeginLoc(), ReplacementLastLoc}, OS.str()));
902 return FixIts;
905 static FixItList fixVariableWithSpan(const VarDecl *VD,
906 const DeclUseTracker &Tracker,
907 const ASTContext &Ctx,
908 UnsafeBufferUsageHandler &Handler) {
909 const DeclStmt *DS = Tracker.lookupDecl(VD);
910 assert(DS && "Fixing non-local variables not implemented yet!");
911 if (!DS->isSingleDecl()) {
912 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
913 return{};
915 // Currently DS is an unused variable but we'll need it when
916 // non-single decls are implemented, where the pointee type name
917 // and the '*' are spread around the place.
918 (void)DS;
920 // FIXME: handle cases where DS has multiple declarations
921 return fixVarDeclWithSpan(VD, Ctx, Handler.getUserFillPlaceHolder());
924 static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K,
925 const DeclUseTracker &Tracker,
926 const ASTContext &Ctx,
927 UnsafeBufferUsageHandler &Handler) {
928 switch (K) {
929 case Strategy::Kind::Span: {
930 if (VD->getType()->isPointerType() && VD->isLocalVarDecl())
931 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
932 return {};
934 case Strategy::Kind::Iterator:
935 case Strategy::Kind::Array:
936 case Strategy::Kind::Vector:
937 llvm_unreachable("Strategy not implemented yet!");
938 case Strategy::Kind::Wontfix:
939 llvm_unreachable("Invalid strategy!");
941 llvm_unreachable("Unknown strategy!");
944 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
945 // `RemoveRange` of 'h' overlaps with a macro use.
946 static bool overlapWithMacro(const FixItList &FixIts) {
947 // FIXME: For now we only check if the range (or the first token) is (part of)
948 // a macro expansion. Ideally, we want to check for all tokens in the range.
949 return llvm::any_of(FixIts, [](const FixItHint &Hint) {
950 auto BeginLoc = Hint.RemoveRange.getBegin();
951 if (BeginLoc.isMacroID())
952 // If the range (or the first token) is (part of) a macro expansion:
953 return true;
954 return false;
958 static std::map<const VarDecl *, FixItList>
959 getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S,
960 const DeclUseTracker &Tracker, const ASTContext &Ctx,
961 UnsafeBufferUsageHandler &Handler) {
962 std::map<const VarDecl *, FixItList> FixItsForVariable;
963 for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) {
964 FixItsForVariable[VD] = fixVariable(VD, S.lookup(VD), Tracker, Ctx, Handler);
965 // If we fail to produce Fix-It for the declaration we have to skip the
966 // variable entirely.
967 if (FixItsForVariable[VD].empty()) {
968 FixItsForVariable.erase(VD);
969 continue;
971 bool ImpossibleToFix = false;
972 llvm::SmallVector<FixItHint, 16> FixItsForVD;
973 for (const auto &F : Fixables) {
974 std::optional<FixItList> Fixits = F->getFixits(S);
975 if (!Fixits) {
976 ImpossibleToFix = true;
977 break;
978 } else {
979 const FixItList CorrectFixes = Fixits.value();
980 FixItsForVD.insert(FixItsForVD.end(), CorrectFixes.begin(),
981 CorrectFixes.end());
984 if (ImpossibleToFix)
985 FixItsForVariable.erase(VD);
986 else
987 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(),
988 FixItsForVD.begin(), FixItsForVD.end());
989 // We conservatively discard fix-its of a variable if
990 // a fix-it overlaps with macros; or
991 // a fix-it conflicts with another one
992 if (overlapWithMacro(FixItsForVariable[VD]) ||
993 clang::internal::anyConflict(FixItsForVariable[VD],
994 Ctx.getSourceManager())) {
995 FixItsForVariable.erase(VD);
998 return FixItsForVariable;
1001 static Strategy
1002 getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) {
1003 Strategy S;
1004 for (const VarDecl *VD : UnsafeVars) {
1005 S.set(VD, Strategy::Kind::Span);
1007 return S;
1010 void clang::checkUnsafeBufferUsage(const Decl *D,
1011 UnsafeBufferUsageHandler &Handler,
1012 bool EmitFixits) {
1013 assert(D && D->getBody());
1015 WarningGadgetSets UnsafeOps;
1016 FixableGadgetSets FixablesForUnsafeVars;
1017 DeclUseTracker Tracker;
1020 // FIXME: We could skip even matching Fixables' matchers if EmitFixits ==
1021 // false.
1022 auto [FixableGadgets, WarningGadgets, TrackerRes] = findGadgets(D, Handler);
1023 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets));
1024 FixablesForUnsafeVars = groupFixablesByVar(std::move(FixableGadgets));
1025 Tracker = std::move(TrackerRes);
1028 std::map<const VarDecl *, FixItList> FixItsForVariable;
1030 if (EmitFixits) {
1031 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
1032 for (auto it = FixablesForUnsafeVars.byVar.cbegin();
1033 it != FixablesForUnsafeVars.byVar.cend();) {
1034 // FIXME: Support ParmVarDecl as well.
1035 if (!it->first->isLocalVarDecl() || Tracker.hasUnclaimedUses(it->first)) {
1036 it = FixablesForUnsafeVars.byVar.erase(it);
1037 } else {
1038 ++it;
1042 llvm::SmallVector<const VarDecl *, 16> UnsafeVars;
1043 for (const auto &[VD, ignore] : FixablesForUnsafeVars.byVar)
1044 UnsafeVars.push_back(VD);
1046 Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars);
1047 FixItsForVariable = getFixIts(FixablesForUnsafeVars, NaiveStrategy, Tracker,
1048 D->getASTContext(), Handler);
1050 // FIXME Detect overlapping FixIts.
1053 for (const auto &G : UnsafeOps.noVar) {
1054 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false);
1057 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
1058 auto FixItsIt =
1059 EmitFixits ? FixItsForVariable.find(VD) : FixItsForVariable.end();
1060 Handler.handleFixableVariable(VD, FixItsIt != FixItsForVariable.end()
1061 ? std::move(FixItsIt->second)
1062 : FixItList{});
1063 for (const auto &G : WarningGadgets) {
1064 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true);