bolt/lib/Core/ParallelUtilities.cpp

   1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Implementation of the class that manages parallel work on BinaryFunctions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "bolt/Core/ParallelUtilities.h"
  14 #include "bolt/Core/BinaryContext.h"
  15 #include "bolt/Core/BinaryFunction.h"
  16 #include "llvm/Support/RWMutex.h"
  17 #include "llvm/Support/ThreadPool.h"
  18 #include "llvm/Support/Timer.h"
  19 #include <mutex>
  20
  21 #define DEBUG_TYPE "par-utils"
  22
  23 namespace opts {
  24 extern cl::OptionCategory BoltCategory;
  25
  26 cl::opt<unsigned>
  27 ThreadCount("thread-count",
  28   cl::desc("number of threads"),
  29   cl::init(hardware_concurrency().compute_thread_count()),
  30   cl::cat(BoltCategory));
  31
  32 cl::opt<bool>
  33 NoThreads("no-threads",
  34   cl::desc("disable multithreading"),
  35   cl::init(false),
  36   cl::cat(BoltCategory));
  37
  38 cl::opt<unsigned>
  39 TaskCount("tasks-per-thread",
  40   cl::desc("number of tasks to be created per thread"),
  41   cl::init(20),
  42   cl::cat(BoltCategory));
  43
  44 } // namespace opts
  45
  46 namespace llvm {
  47 namespace bolt {
  48 namespace ParallelUtilities {
  49
  50 namespace {
  51 /// A single thread pool that is used to run parallel tasks
  52 std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr;
  53
  54 unsigned computeCostFor(const BinaryFunction &BF,
  55                         const PredicateTy &SkipPredicate,
  56                         const SchedulingPolicy &SchedPolicy) {
  57   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
  58     return 1;
  59
  60   if (SkipPredicate && SkipPredicate(BF))
  61     return 0;
  62
  63   switch (SchedPolicy) {
  64   case SchedulingPolicy::SP_CONSTANT:
  65     return 1;
  66   case SchedulingPolicy::SP_INST_LINEAR:
  67     return BF.getSize();
  68   case SchedulingPolicy::SP_INST_QUADRATIC:
  69     return BF.getSize() * BF.getSize();
  70   case SchedulingPolicy::SP_BB_LINEAR:
  71     return BF.size();
  72   case SchedulingPolicy::SP_BB_QUADRATIC:
  73     return BF.size() * BF.size();
  74   default:
  75     llvm_unreachable("unsupported scheduling policy");
  76   }
  77 }
  78
  79 inline unsigned estimateTotalCost(const BinaryContext &BC,
  80                                   const PredicateTy &SkipPredicate,
  81                                   SchedulingPolicy &SchedPolicy) {
  82   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
  83     return BC.getBinaryFunctions().size();
  84
  85   unsigned TotalCost = 0;
  86   for (auto &BFI : BC.getBinaryFunctions()) {
  87     const BinaryFunction &BF = BFI.second;
  88     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
  89   }
  90
  91   // Switch to trivial scheduling if total estimated work is zero
  92   if (TotalCost == 0) {
  93     BC.outs()
  94         << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
  95            "switch to  trivial scheduling.\n";
  96
  97     SchedPolicy = SP_TRIVIAL;
  98     TotalCost = BC.getBinaryFunctions().size();
  99   }
 100   return TotalCost;
 101 }
 102
 103 } // namespace
 104
 105 ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) {
 106   if (ThreadPoolPtr.get())
 107     return *ThreadPoolPtr;
 108
 109   if (ThreadsCount > 1)
 110     ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
 111         llvm::hardware_concurrency(ThreadsCount));
 112   else
 113     ThreadPoolPtr = std::make_unique<SingleThreadExecutor>();
 114   return *ThreadPoolPtr;
 115 }
 116
 117 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
 118                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
 119                        std::string LogName, bool ForceSequential,
 120                        unsigned TasksPerThread) {
 121   if (BC.getBinaryFunctions().size() == 0)
 122     return;
 123
 124   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
 125                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
 126     Timer T(LogName, LogName);
 127     LLVM_DEBUG(T.startTimer());
 128
 129     for (auto It = BlockBegin; It != BlockEnd; ++It) {
 130       BinaryFunction &BF = It->second;
 131       if (SkipPredicate && SkipPredicate(BF))
 132         continue;
 133
 134       WorkFunction(BF);
 135     }
 136     LLVM_DEBUG(T.stopTimer());
 137   };
 138
 139   if (opts::NoThreads || ForceSequential) {
 140     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
 141     return;
 142   }
 143
 144   // Estimate the overall runtime cost using the scheduling policy
 145   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
 146   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
 147   const unsigned BlockCost =
 148       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
 149
 150   // Divide work into blocks of equal cost
 151   ThreadPoolInterface &Pool = getThreadPool();
 152   auto BlockBegin = BC.getBinaryFunctions().begin();
 153   unsigned CurrentCost = 0;
 154
 155   for (auto It = BC.getBinaryFunctions().begin();
 156        It != BC.getBinaryFunctions().end(); ++It) {
 157     BinaryFunction &BF = It->second;
 158     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
 159
 160     if (CurrentCost >= BlockCost) {
 161       Pool.async(runBlock, BlockBegin, std::next(It));
 162       BlockBegin = std::next(It);
 163       CurrentCost = 0;
 164     }
 165   }
 166   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
 167   Pool.wait();
 168 }
 169
 170 void runOnEachFunctionWithUniqueAllocId(
 171     BinaryContext &BC, SchedulingPolicy SchedPolicy,
 172     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
 173     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
 174   if (BC.getBinaryFunctions().size() == 0)
 175     return;
 176
 177   llvm::sys::RWMutex MainLock;
 178   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
 179                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
 180                       MCPlusBuilder::AllocatorIdTy AllocId) {
 181     Timer T(LogName, LogName);
 182     LLVM_DEBUG(T.startTimer());
 183     std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
 184     for (auto It = BlockBegin; It != BlockEnd; ++It) {
 185       BinaryFunction &BF = It->second;
 186       if (SkipPredicate && SkipPredicate(BF))
 187         continue;
 188
 189       WorkFunction(BF, AllocId);
 190     }
 191     LLVM_DEBUG(T.stopTimer());
 192   };
 193
 194   unsigned AllocId = 1;
 195   auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
 196     if (!BC.MIB->checkAllocatorExists(AllocId)) {
 197       MCPlusBuilder::AllocatorIdTy Id =
 198           BC.MIB->initializeNewAnnotationAllocator();
 199       (void)Id;
 200       assert(AllocId == Id && "unexpected allocator id created");
 201     }
 202   };
 203
 204   if (opts::NoThreads || ForceSequential) {
 205     EnsureAllocatorExists(AllocId);
 206     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
 207              AllocId);
 208     return;
 209   }
 210   // This lock is used to postpone task execution
 211   std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
 212
 213   // Estimate the overall runtime cost using the scheduling policy
 214   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
 215   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
 216   const unsigned BlockCost =
 217       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
 218
 219   // Divide work into blocks of equal cost
 220   ThreadPoolInterface &Pool = getThreadPool();
 221   auto BlockBegin = BC.getBinaryFunctions().begin();
 222   unsigned CurrentCost = 0;
 223   for (auto It = BC.getBinaryFunctions().begin();
 224        It != BC.getBinaryFunctions().end(); ++It) {
 225     BinaryFunction &BF = It->second;
 226     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
 227
 228     if (CurrentCost >= BlockCost) {
 229       EnsureAllocatorExists(AllocId);
 230       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
 231       AllocId++;
 232       BlockBegin = std::next(It);
 233       CurrentCost = 0;
 234     }
 235   }
 236
 237   EnsureAllocatorExists(AllocId);
 238
 239   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
 240   Lock.unlock();
 241   Pool.wait();
 242 }
 243
 244 } // namespace ParallelUtilities
 245 } // namespace bolt
 246 } // namespace llvm