bolt/lib/Core/ParallelUtilities.cpp

   1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Implementation of the class that manages parallel work on BinaryFunctions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "bolt/Core/ParallelUtilities.h"
  14 #include "bolt/Core/BinaryContext.h"
  15 #include "bolt/Core/BinaryFunction.h"
  16 #include "llvm/Support/RWMutex.h"
  17 #include "llvm/Support/ThreadPool.h"
  18 #include "llvm/Support/Timer.h"
  19 #include <mutex>
  20
  21 #define DEBUG_TYPE "par-utils"
  22
  23 namespace opts {
  24 extern cl::OptionCategory BoltCategory;
  25
  26 cl::opt<unsigned>
  27 ThreadCount("thread-count",
  28   cl::desc("number of threads"),
  29   cl::init(hardware_concurrency().compute_thread_count()),
  30   cl::cat(BoltCategory));
  31
  32 cl::opt<bool>
  33 NoThreads("no-threads",
  34   cl::desc("disable multithreading"),
  35   cl::init(false),
  36   cl::cat(BoltCategory));
  37
  38 cl::opt<unsigned>
  39 TaskCount("tasks-per-thread",
  40   cl::desc("number of tasks to be created per thread"),
  41   cl::init(20),
  42   cl::cat(BoltCategory));
  43
  44 } // namespace opts
  45
  46 namespace llvm {
  47 namespace bolt {
  48 namespace ParallelUtilities {
  49
  50 namespace {
  51 /// A single thread pool that is used to run parallel tasks
  52 std::unique_ptr<DefaultThreadPool> ThreadPoolPtr;
  53
  54 unsigned computeCostFor(const BinaryFunction &BF,
  55                         const PredicateTy &SkipPredicate,
  56                         const SchedulingPolicy &SchedPolicy) {
  57   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
  58     return 1;
  59
  60   if (SkipPredicate && SkipPredicate(BF))
  61     return 0;
  62
  63   switch (SchedPolicy) {
  64   case SchedulingPolicy::SP_CONSTANT:
  65     return 1;
  66   case SchedulingPolicy::SP_INST_LINEAR:
  67     return BF.getSize();
  68   case SchedulingPolicy::SP_INST_QUADRATIC:
  69     return BF.getSize() * BF.getSize();
  70   case SchedulingPolicy::SP_BB_LINEAR:
  71     return BF.size();
  72   case SchedulingPolicy::SP_BB_QUADRATIC:
  73     return BF.size() * BF.size();
  74   default:
  75     llvm_unreachable("unsupported scheduling policy");
  76   }
  77 }
  78
  79 inline unsigned estimateTotalCost(const BinaryContext &BC,
  80                                   const PredicateTy &SkipPredicate,
  81                                   SchedulingPolicy &SchedPolicy) {
  82   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
  83     return BC.getBinaryFunctions().size();
  84
  85   unsigned TotalCost = 0;
  86   for (auto &BFI : BC.getBinaryFunctions()) {
  87     const BinaryFunction &BF = BFI.second;
  88     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
  89   }
  90
  91   // Switch to trivial scheduling if total estimated work is zero
  92   if (TotalCost == 0) {
  93     BC.outs()
  94         << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
  95            "switch to  trivial scheduling.\n";
  96
  97     SchedPolicy = SP_TRIVIAL;
  98     TotalCost = BC.getBinaryFunctions().size();
  99   }
 100   return TotalCost;
 101 }
 102
 103 } // namespace
 104
 105 ThreadPoolInterface &getThreadPool() {
 106   if (ThreadPoolPtr.get())
 107     return *ThreadPoolPtr;
 108
 109   ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
 110       llvm::hardware_concurrency(opts::ThreadCount));
 111   return *ThreadPoolPtr;
 112 }
 113
 114 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
 115                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
 116                        std::string LogName, bool ForceSequential,
 117                        unsigned TasksPerThread) {
 118   if (BC.getBinaryFunctions().size() == 0)
 119     return;
 120
 121   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
 122                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
 123     Timer T(LogName, LogName);
 124     LLVM_DEBUG(T.startTimer());
 125
 126     for (auto It = BlockBegin; It != BlockEnd; ++It) {
 127       BinaryFunction &BF = It->second;
 128       if (SkipPredicate && SkipPredicate(BF))
 129         continue;
 130
 131       WorkFunction(BF);
 132     }
 133     LLVM_DEBUG(T.stopTimer());
 134   };
 135
 136   if (opts::NoThreads || ForceSequential) {
 137     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
 138     return;
 139   }
 140
 141   // Estimate the overall runtime cost using the scheduling policy
 142   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
 143   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
 144   const unsigned BlockCost =
 145       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
 146
 147   // Divide work into blocks of equal cost
 148   ThreadPoolInterface &Pool = getThreadPool();
 149   auto BlockBegin = BC.getBinaryFunctions().begin();
 150   unsigned CurrentCost = 0;
 151
 152   for (auto It = BC.getBinaryFunctions().begin();
 153        It != BC.getBinaryFunctions().end(); ++It) {
 154     BinaryFunction &BF = It->second;
 155     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
 156
 157     if (CurrentCost >= BlockCost) {
 158       Pool.async(runBlock, BlockBegin, std::next(It));
 159       BlockBegin = std::next(It);
 160       CurrentCost = 0;
 161     }
 162   }
 163   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
 164   Pool.wait();
 165 }
 166
 167 void runOnEachFunctionWithUniqueAllocId(
 168     BinaryContext &BC, SchedulingPolicy SchedPolicy,
 169     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
 170     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
 171   if (BC.getBinaryFunctions().size() == 0)
 172     return;
 173
 174   llvm::sys::RWMutex MainLock;
 175   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
 176                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
 177                       MCPlusBuilder::AllocatorIdTy AllocId) {
 178     Timer T(LogName, LogName);
 179     LLVM_DEBUG(T.startTimer());
 180     std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
 181     for (auto It = BlockBegin; It != BlockEnd; ++It) {
 182       BinaryFunction &BF = It->second;
 183       if (SkipPredicate && SkipPredicate(BF))
 184         continue;
 185
 186       WorkFunction(BF, AllocId);
 187     }
 188     LLVM_DEBUG(T.stopTimer());
 189   };
 190
 191   unsigned AllocId = 1;
 192   auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
 193     if (!BC.MIB->checkAllocatorExists(AllocId)) {
 194       MCPlusBuilder::AllocatorIdTy Id =
 195           BC.MIB->initializeNewAnnotationAllocator();
 196       (void)Id;
 197       assert(AllocId == Id && "unexpected allocator id created");
 198     }
 199   };
 200
 201   if (opts::NoThreads || ForceSequential) {
 202     EnsureAllocatorExists(AllocId);
 203     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
 204              AllocId);
 205     return;
 206   }
 207   // This lock is used to postpone task execution
 208   std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
 209
 210   // Estimate the overall runtime cost using the scheduling policy
 211   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
 212   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
 213   const unsigned BlockCost =
 214       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
 215
 216   // Divide work into blocks of equal cost
 217   ThreadPoolInterface &Pool = getThreadPool();
 218   auto BlockBegin = BC.getBinaryFunctions().begin();
 219   unsigned CurrentCost = 0;
 220   for (auto It = BC.getBinaryFunctions().begin();
 221        It != BC.getBinaryFunctions().end(); ++It) {
 222     BinaryFunction &BF = It->second;
 223     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
 224
 225     if (CurrentCost >= BlockCost) {
 226       EnsureAllocatorExists(AllocId);
 227       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
 228       AllocId++;
 229       BlockBegin = std::next(It);
 230       CurrentCost = 0;
 231     }
 232   }
 233
 234   EnsureAllocatorExists(AllocId);
 235
 236   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
 237   Lock.unlock();
 238   Pool.wait();
 239 }
 240
 241 } // namespace ParallelUtilities
 242 } // namespace bolt
 243 } // namespace llvm