bolt/lib/Passes/BinaryFunctionCallGraph.cpp

   1 //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the BinaryFunctionCallGraph class.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "bolt/Passes/BinaryFunctionCallGraph.h"
  14 #include "bolt/Core/BinaryContext.h"
  15 #include "bolt/Core/BinaryFunction.h"
  16 #include "llvm/Support/CommandLine.h"
  17 #include "llvm/Support/Timer.h"
  18 #include <stack>
  19
  20 #define DEBUG_TYPE "callgraph"
  21
  22 using namespace llvm;
  23
  24 namespace opts {
  25
  26 extern cl::opt<bool> TimeOpts;
  27 extern cl::opt<unsigned> Verbosity;
  28 extern cl::OptionCategory BoltCategory;
  29
  30 static cl::opt<std::string>
  31     DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
  32               cl::cat(BoltCategory));
  33
  34 } // namespace opts
  35
  36 namespace llvm {
  37 namespace bolt {
  38
  39 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
  40                                                    uint32_t Size,
  41                                                    uint64_t Samples) {
  42   NodeId Id = CallGraph::addNode(Size, Samples);
  43   assert(size_t(Id) == Funcs.size());
  44   Funcs.push_back(BF);
  45   FuncToNodeId[BF] = Id;
  46   assert(Funcs[Id] == BF);
  47   return Id;
  48 }
  49
  50 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
  51   NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
  52                       "CG breakdown", "CG breakdown", opts::TimeOpts);
  53   std::deque<BinaryFunction *> TopologicalOrder;
  54   enum NodeStatus { NEW, VISITING, VISITED };
  55   std::vector<NodeStatus> NodeStatus(Funcs.size());
  56   std::stack<NodeId> Worklist;
  57
  58   for (BinaryFunction *Func : Funcs) {
  59     const NodeId Id = FuncToNodeId.at(Func);
  60     Worklist.push(Id);
  61     NodeStatus[Id] = NEW;
  62   }
  63
  64   while (!Worklist.empty()) {
  65     const NodeId FuncId = Worklist.top();
  66     Worklist.pop();
  67
  68     if (NodeStatus[FuncId] == VISITED)
  69       continue;
  70
  71     if (NodeStatus[FuncId] == VISITING) {
  72       TopologicalOrder.push_back(Funcs[FuncId]);
  73       NodeStatus[FuncId] = VISITED;
  74       continue;
  75     }
  76
  77     assert(NodeStatus[FuncId] == NEW);
  78     NodeStatus[FuncId] = VISITING;
  79     Worklist.push(FuncId);
  80     for (const NodeId Callee : successors(FuncId)) {
  81       if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
  82         continue;
  83       Worklist.push(Callee);
  84     }
  85   }
  86
  87   return TopologicalOrder;
  88 }
  89
  90 BinaryFunctionCallGraph
  91 buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
  92                bool IncludeSplitCalls, bool UseFunctionHotSize,
  93                bool UseSplitHotSize, bool UseEdgeCounts,
  94                bool IgnoreRecursiveCalls) {
  95   NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
  96                       "CG breakdown", opts::TimeOpts);
  97   BinaryFunctionCallGraph Cg;
  98   static constexpr uint64_t COUNT_NO_PROFILE =
  99       BinaryBasicBlock::COUNT_NO_PROFILE;
 100
 101   // Compute function size
 102   auto functionSize = [&](const BinaryFunction *Function) {
 103     return UseFunctionHotSize && Function->isSplit()
 104                ? Function->estimateHotSize(UseSplitHotSize)
 105                : Function->estimateSize();
 106   };
 107
 108   // Add call graph nodes.
 109   auto lookupNode = [&](BinaryFunction *Function) {
 110     const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
 111     if (Id == CallGraph::InvalidId) {
 112       // It's ok to use the hot size here when the function is split.  This is
 113       // because emitFunctions will emit the hot part first in the order that is
 114       // computed by ReorderFunctions.  The cold part will be emitted with the
 115       // rest of the cold functions and code.
 116       const size_t Size = functionSize(Function);
 117       // NOTE: for functions without a profile, we set the number of samples
 118       // to zero.  This will keep these functions from appearing in the hot
 119       // section.  This is a little weird because we wouldn't be trying to
 120       // create a node for a function unless it was the target of a call from
 121       // a hot block.  The alternative would be to set the count to one or
 122       // accumulate the number of calls from the callsite into the function
 123       // samples.  Results from perfomance testing seem to favor the zero
 124       // count though, so I'm leaving it this way for now.
 125       return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
 126     }
 127     return Id;
 128   };
 129
 130   // Add call graph edges.
 131   uint64_t NotProcessed = 0;
 132   uint64_t TotalCallsites = 0;
 133   uint64_t NoProfileCallsites = 0;
 134   uint64_t NumFallbacks = 0;
 135   uint64_t RecursiveCallsites = 0;
 136   for (auto &It : BC.getBinaryFunctions()) {
 137     BinaryFunction *Function = &It.second;
 138
 139     if (Filter(*Function))
 140       continue;
 141
 142     const CallGraph::NodeId SrcId = lookupNode(Function);
 143     // Offset of the current basic block from the beginning of the function
 144     uint64_t Offset = 0;
 145
 146     auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
 147       BinaryFunction *DstFunc =
 148           DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
 149       if (!DstFunc) {
 150         LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
 151                    << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
 152         return false;
 153       }
 154
 155       if (DstFunc == Function) {
 156         LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
 157                           << *DstFunc << "\n");
 158         ++RecursiveCallsites;
 159         if (IgnoreRecursiveCalls)
 160           return false;
 161       }
 162       if (Filter(*DstFunc)) {
 163         LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
 164                    << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
 165                    << '\n');
 166         return false;
 167       }
 168
 169       const CallGraph::NodeId DstId = lookupNode(DstFunc);
 170       const bool IsValidCount = Count != COUNT_NO_PROFILE;
 171       const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
 172       if (!IsValidCount)
 173         ++NoProfileCallsites;
 174       Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
 175       LLVM_DEBUG(if (opts::Verbosity > 1) {
 176         dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
 177                << *DstFunc << " @ " << Offset << "\n";
 178       });
 179       return true;
 180     };
 181
 182     // Pairs of (symbol, count) for each target at this callsite.
 183     using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
 184     using CallInfoTy = std::vector<TargetDesc>;
 185
 186     // Get pairs of (symbol, count) for each target at this callsite.
 187     // If the call is to an unknown function the symbol will be nullptr.
 188     // If there is no profiling data the count will be COUNT_NO_PROFILE.
 189     auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
 190       CallInfoTy Counts;
 191       const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
 192
 193       // If this is an indirect call use perf data directly.
 194       if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
 195         const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
 196             Inst, "CallProfile");
 197         for (const IndirectCallProfile &CSI : ICSP)
 198           if (CSI.Symbol)
 199             Counts.emplace_back(CSI.Symbol, CSI.Count);
 200       } else {
 201         const uint64_t Count = BB->getExecutionCount();
 202         Counts.emplace_back(DstSym, Count);
 203       }
 204
 205       return Counts;
 206     };
 207
 208     // If the function has an invalid profile, try to use the perf data
 209     // directly (if requested).  If there is no perf data for this function,
 210     // fall back to the CFG walker which attempts to handle missing data.
 211     if (!Function->hasValidProfile() && CgFromPerfData &&
 212         !Function->getAllCallSites().empty()) {
 213       LLVM_DEBUG(
 214           dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
 215                  << " for " << *Function << "\n");
 216       ++NumFallbacks;
 217       const size_t Size = functionSize(Function);
 218       for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
 219         ++TotalCallsites;
 220
 221         if (!CSI.Symbol)
 222           continue;
 223
 224         // The computed offset may exceed the hot part of the function; hence,
 225         // bound it by the size.
 226         Offset = CSI.Offset;
 227         if (Offset > Size)
 228           Offset = Size;
 229
 230         if (!recordCall(CSI.Symbol, CSI.Count))
 231           ++NotProcessed;
 232       }
 233     } else {
 234       for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
 235         // Don't count calls from split blocks unless requested.
 236         if (BB->isSplit() && !IncludeSplitCalls)
 237           continue;
 238
 239         // Determine whether the block is included in Function's (hot) size
 240         // See BinaryFunction::estimateHotSize
 241         bool BBIncludedInFunctionSize = false;
 242         if (UseFunctionHotSize && Function->isSplit()) {
 243           if (UseSplitHotSize)
 244             BBIncludedInFunctionSize = !BB->isSplit();
 245           else
 246             BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
 247         } else {
 248           BBIncludedInFunctionSize = true;
 249         }
 250
 251         for (MCInst &Inst : *BB) {
 252           // Find call instructions and extract target symbols from each one.
 253           if (BC.MIB->isCall(Inst)) {
 254             const CallInfoTy CallInfo = getCallInfo(BB, Inst);
 255
 256             if (!CallInfo.empty()) {
 257               for (const TargetDesc &CI : CallInfo) {
 258                 ++TotalCallsites;
 259                 if (!recordCall(CI.first, CI.second))
 260                   ++NotProcessed;
 261               }
 262             } else {
 263               ++TotalCallsites;
 264               ++NotProcessed;
 265             }
 266           }
 267           // Increase Offset if needed
 268           if (BBIncludedInFunctionSize)
 269             Offset += BC.computeCodeSize(&Inst, &Inst + 1);
 270         }
 271       }
 272     }
 273   }
 274
 275 #ifndef NDEBUG
 276   bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
 277 #else
 278   bool PrintInfo = false;
 279 #endif
 280   if (PrintInfo || opts::Verbosity > 0)
 281     outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
 282                      "(%u recursive), density = %.6lf, %u callsites not "
 283                      "processed, %u callsites with invalid profile, "
 284                      "used perf data for %u stale functions.\n",
 285                      Cg.numNodes(), TotalCallsites, RecursiveCallsites,
 286                      Cg.density(), NotProcessed, NoProfileCallsites,
 287                      NumFallbacks);
 288
 289   if (opts::DumpCGDot.getNumOccurrences()) {
 290     Cg.printDot(opts::DumpCGDot, [&](CallGraph::NodeId Id) {
 291       return Cg.nodeIdToFunc(Id)->getPrintName();
 292     });
 293   }
 294
 295   return Cg;
 296 }
 297
 298 } // namespace bolt
 299 } // namespace llvm