clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp

   1 //===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "NoRecursionCheck.h"
  10 #include "clang/AST/ASTContext.h"
  11 #include "clang/ASTMatchers/ASTMatchFinder.h"
  12 #include "clang/Analysis/CallGraph.h"
  13 #include "llvm/ADT/DenseMapInfo.h"
  14 #include "llvm/ADT/SCCIterator.h"
  15
  16 using namespace clang::ast_matchers;
  17
  18 namespace clang::tidy::misc {
  19
  20 namespace {
  21
  22 /// Much like SmallSet, with two differences:
  23 /// 1. It can *only* be constructed from an ArrayRef<>. If the element count
  24 ///    is small, there is no copy and said storage *must* outlive us.
  25 /// 2. it is immutable, the way it was constructed it will stay.
  26 template <typename T, unsigned SmallSize> class ImmutableSmallSet {
  27   ArrayRef<T> Vector;
  28   llvm::DenseSet<T> Set;
  29
  30   static_assert(SmallSize <= 32, "N should be small");
  31
  32   bool isSmall() const { return Set.empty(); }
  33
  34 public:
  35   using size_type = size_t;
  36
  37   ImmutableSmallSet() = delete;
  38   ImmutableSmallSet(const ImmutableSmallSet &) = delete;
  39   ImmutableSmallSet(ImmutableSmallSet &&) = delete;
  40   T &operator=(const ImmutableSmallSet &) = delete;
  41   T &operator=(ImmutableSmallSet &&) = delete;
  42
  43   // WARNING: Storage *must* outlive us if we decide that the size is small.
  44   ImmutableSmallSet(ArrayRef<T> Storage) {
  45     // Is size small-enough to just keep using the existing storage?
  46     if (Storage.size() <= SmallSize) {
  47       Vector = Storage;
  48       return;
  49     }
  50
  51     // We've decided that it isn't performant to keep using vector.
  52     // Let's migrate the data into Set.
  53     Set.reserve(Storage.size());
  54     Set.insert(Storage.begin(), Storage.end());
  55   }
  56
  57   /// count - Return 1 if the element is in the set, 0 otherwise.
  58   size_type count(const T &V) const {
  59     if (isSmall()) {
  60       // Since the collection is small, just do a linear search.
  61       return llvm::is_contained(Vector, V) ? 1 : 0;
  62     }
  63
  64     return Set.count(V);
  65   }
  66 };
  67
  68 /// Much like SmallSetVector, but with one difference:
  69 /// when the size is \p SmallSize or less, when checking whether an element is
  70 /// already in the set or not, we perform linear search over the vector,
  71 /// but if the size is larger than \p SmallSize, we look in set.
  72 /// FIXME: upstream this into SetVector/SmallSetVector itself.
  73 template <typename T, unsigned SmallSize> class SmartSmallSetVector {
  74 public:
  75   using size_type = size_t;
  76
  77 private:
  78   SmallVector<T, SmallSize> Vector;
  79   llvm::DenseSet<T> Set;
  80
  81   static_assert(SmallSize <= 32, "N should be small");
  82
  83   // Are we still using Vector for uniqness tracking?
  84   bool isSmall() const { return Set.empty(); }
  85
  86   // Will one more entry cause Vector to switch away from small-size storage?
  87   bool entiretyOfVectorSmallSizeIsOccupied() const {
  88     assert(isSmall() && Vector.size() <= SmallSize &&
  89            "Shouldn't ask if we have already [should have] migrated into Set.");
  90     return Vector.size() == SmallSize;
  91   }
  92
  93   void populateSet() {
  94     assert(Set.empty() && "Should not have already utilized the Set.");
  95     // Magical growth factor prediction - to how many elements do we expect to
  96     // sanely grow after switching away from small-size storage?
  97     const size_t NewMaxElts = 4 * Vector.size();
  98     Vector.reserve(NewMaxElts);
  99     Set.reserve(NewMaxElts);
 100     Set.insert(Vector.begin(), Vector.end());
 101   }
 102
 103   /// count - Return 1 if the element is in the set, 0 otherwise.
 104   size_type count(const T &V) const {
 105     if (isSmall()) {
 106       // Since the collection is small, just do a linear search.
 107       return llvm::is_contained(Vector, V) ? 1 : 0;
 108     }
 109     // Look-up in the Set.
 110     return Set.count(V);
 111   }
 112
 113   bool setInsert(const T &V) {
 114     if (count(V) != 0)
 115       return false; // Already exists.
 116     // Does not exist, Can/need to record it.
 117     if (isSmall()) { // Are we still using Vector for uniqness tracking?
 118       // Will one more entry fit within small-sized Vector?
 119       if (!entiretyOfVectorSmallSizeIsOccupied())
 120         return true; // We'll insert into vector right afterwards anyway.
 121       // Time to switch to Set.
 122       populateSet();
 123     }
 124     // Set time!
 125     // Note that this must be after `populateSet()` might have been called.
 126     bool SetInsertionSucceeded = Set.insert(V).second;
 127     (void)SetInsertionSucceeded;
 128     assert(SetInsertionSucceeded && "We did check that no such value existed");
 129     return true;
 130   }
 131
 132 public:
 133   /// Insert a new element into the SmartSmallSetVector.
 134   /// \returns true if the element was inserted into the SmartSmallSetVector.
 135   bool insert(const T &X) {
 136     bool Result = setInsert(X);
 137     if (Result)
 138       Vector.push_back(X);
 139     return Result;
 140   }
 141
 142   /// Clear the SmartSmallSetVector and return the underlying vector.
 143   decltype(Vector) takeVector() {
 144     Set.clear();
 145     return std::move(Vector);
 146   }
 147 };
 148
 149 constexpr unsigned SmallCallStackSize = 16;
 150 constexpr unsigned SmallSCCSize = 32;
 151
 152 using CallStackTy =
 153     llvm::SmallVector<CallGraphNode::CallRecord, SmallCallStackSize>;
 154
 155 // In given SCC, find *some* call stack that will be cyclic.
 156 // This will only find *one* such stack, it might not be the smallest one,
 157 // and there may be other loops.
 158 CallStackTy pathfindSomeCycle(ArrayRef<CallGraphNode *> SCC) {
 159   // We'll need to be able to performantly look up whether some CallGraphNode
 160   // is in SCC or not, so cache all the SCC elements in a set.
 161   const ImmutableSmallSet<CallGraphNode *, SmallSCCSize> SCCElts(SCC);
 162
 163   // Is node N part if the current SCC?
 164   auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) {
 165     return SCCElts.count(N) != 0;
 166   };
 167
 168   // Track the call stack that will cause a cycle.
 169   SmartSmallSetVector<CallGraphNode::CallRecord, SmallCallStackSize>
 170       CallStackSet;
 171
 172   // Arbitrarily take the first element of SCC as entry point.
 173   CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr);
 174   // Continue recursing into subsequent callees that are part of this SCC,
 175   // and are thus known to be part of the call graph loop, until loop forms.
 176   CallGraphNode::CallRecord *Node = &EntryNode;
 177   while (true) {
 178     // Did we see this node before?
 179     if (!CallStackSet.insert(*Node))
 180       break; // Cycle completed! Note that didn't insert the node into stack!
 181     // Else, perform depth-first traversal: out of all callees, pick first one
 182     // that is part of this SCC. This is not guaranteed to yield shortest cycle.
 183     Node = llvm::find_if(Node->Callee->callees(), NodeIsPartOfSCC);
 184   }
 185
 186   // Note that we failed to insert the last node, that completes the cycle.
 187   // But we really want to have it. So insert it manually into stack only.
 188   CallStackTy CallStack = CallStackSet.takeVector();
 189   CallStack.emplace_back(*Node);
 190
 191   return CallStack;
 192 }
 193
 194 } // namespace
 195
 196 void NoRecursionCheck::registerMatchers(MatchFinder *Finder) {
 197   Finder->addMatcher(translationUnitDecl().bind("TUDecl"), this);
 198 }
 199
 200 void NoRecursionCheck::handleSCC(ArrayRef<CallGraphNode *> SCC) {
 201   assert(!SCC.empty() && "Empty SCC does not make sense.");
 202
 203   // First of all, call out every strongly connected function.
 204   for (CallGraphNode *N : SCC) {
 205     FunctionDecl *D = N->getDefinition();
 206     diag(D->getLocation(), "function %0 is within a recursive call chain") << D;
 207   }
 208
 209   // Now, SCC only tells us about strongly connected function declarations in
 210   // the call graph. It doesn't *really* tell us about the cycles they form.
 211   // And there may be more than one cycle in SCC.
 212   // So let's form a call stack that eventually exposes *some* cycle.
 213   const CallStackTy EventuallyCyclicCallStack = pathfindSomeCycle(SCC);
 214   assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle");
 215
 216   // While last node of the call stack does cause a loop, due to the way we
 217   // pathfind the cycle, the loop does not necessarily begin at the first node
 218   // of the call stack, so drop front nodes of the call stack until it does.
 219   const auto CyclicCallStack =
 220       ArrayRef<CallGraphNode::CallRecord>(EventuallyCyclicCallStack)
 221           .drop_until([LastNode = EventuallyCyclicCallStack.back()](
 222                           CallGraphNode::CallRecord FrontNode) {
 223             return FrontNode == LastNode;
 224           });
 225   assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames");
 226
 227   // Which function we decided to be the entry point that lead to the recursion?
 228   FunctionDecl *CycleEntryFn = CyclicCallStack.front().Callee->getDefinition();
 229   // And now, for ease of understanding, let's print the call sequence that
 230   // forms the cycle in question.
 231   diag(CycleEntryFn->getLocation(),
 232        "example recursive call chain, starting from function %0",
 233        DiagnosticIDs::Note)
 234       << CycleEntryFn;
 235   for (int CurFrame = 1, NumFrames = CyclicCallStack.size();
 236        CurFrame != NumFrames; ++CurFrame) {
 237     CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1];
 238     CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame];
 239
 240     Decl *PrevDecl = PrevNode.Callee->getDecl();
 241     Decl *CurrDecl = CurrNode.Callee->getDecl();
 242
 243     diag(CurrNode.CallExpr->getBeginLoc(),
 244          "Frame #%0: function %1 calls function %2 here:", DiagnosticIDs::Note)
 245         << CurFrame << cast<NamedDecl>(PrevDecl) << cast<NamedDecl>(CurrDecl);
 246   }
 247
 248   diag(CyclicCallStack.back().CallExpr->getBeginLoc(),
 249        "... which was the starting point of the recursive call chain; there "
 250        "may be other cycles",
 251        DiagnosticIDs::Note);
 252 }
 253
 254 void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) {
 255   // Build call graph for the entire translation unit.
 256   const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>("TUDecl");
 257   CallGraph CG;
 258   CG.addToCallGraph(const_cast<TranslationUnitDecl *>(TU));
 259
 260   // Look for cycles in call graph,
 261   // by looking for Strongly Connected Components (SCC's)
 262   for (llvm::scc_iterator<CallGraph *> SCCI = llvm::scc_begin(&CG),
 263                                        SCCE = llvm::scc_end(&CG);
 264        SCCI != SCCE; ++SCCI) {
 265     if (!SCCI.hasCycle()) // We only care about cycles, not standalone nodes.
 266       continue;
 267     handleSCC(*SCCI);
 268   }
 269 }
 270
 271 } // namespace clang::tidy::misc