Bump version to 19.1.0 (final)
[llvm-project.git] / bolt / lib / Core / ParallelUtilities.cpp
bloba24c37c06f1ac1ce4d0b49e697b8e876864cd81b
1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the class that manages parallel work on BinaryFunctions.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/RWMutex.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
19 #include <mutex>
21 #define DEBUG_TYPE "par-utils"
23 namespace opts {
24 extern cl::OptionCategory BoltCategory;
26 cl::opt<unsigned>
27 ThreadCount("thread-count",
28 cl::desc("number of threads"),
29 cl::init(hardware_concurrency().compute_thread_count()),
30 cl::cat(BoltCategory));
32 cl::opt<bool>
33 NoThreads("no-threads",
34 cl::desc("disable multithreading"),
35 cl::init(false),
36 cl::cat(BoltCategory));
38 cl::opt<unsigned>
39 TaskCount("tasks-per-thread",
40 cl::desc("number of tasks to be created per thread"),
41 cl::init(20),
42 cl::cat(BoltCategory));
44 } // namespace opts
46 namespace llvm {
47 namespace bolt {
48 namespace ParallelUtilities {
50 namespace {
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr<DefaultThreadPool> ThreadPoolPtr;
54 unsigned computeCostFor(const BinaryFunction &BF,
55 const PredicateTy &SkipPredicate,
56 const SchedulingPolicy &SchedPolicy) {
57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58 return 1;
60 if (SkipPredicate && SkipPredicate(BF))
61 return 0;
63 switch (SchedPolicy) {
64 case SchedulingPolicy::SP_CONSTANT:
65 return 1;
66 case SchedulingPolicy::SP_INST_LINEAR:
67 return BF.getSize();
68 case SchedulingPolicy::SP_INST_QUADRATIC:
69 return BF.getSize() * BF.getSize();
70 case SchedulingPolicy::SP_BB_LINEAR:
71 return BF.size();
72 case SchedulingPolicy::SP_BB_QUADRATIC:
73 return BF.size() * BF.size();
74 default:
75 llvm_unreachable("unsupported scheduling policy");
79 inline unsigned estimateTotalCost(const BinaryContext &BC,
80 const PredicateTy &SkipPredicate,
81 SchedulingPolicy &SchedPolicy) {
82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83 return BC.getBinaryFunctions().size();
85 unsigned TotalCost = 0;
86 for (auto &BFI : BC.getBinaryFunctions()) {
87 const BinaryFunction &BF = BFI.second;
88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
91 // Switch to trivial scheduling if total estimated work is zero
92 if (TotalCost == 0) {
93 BC.outs()
94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95 "switch to trivial scheduling.\n";
97 SchedPolicy = SP_TRIVIAL;
98 TotalCost = BC.getBinaryFunctions().size();
100 return TotalCost;
103 } // namespace
105 ThreadPoolInterface &getThreadPool() {
106 if (ThreadPoolPtr.get())
107 return *ThreadPoolPtr;
109 ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
110 llvm::hardware_concurrency(opts::ThreadCount));
111 return *ThreadPoolPtr;
114 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
115 WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
116 std::string LogName, bool ForceSequential,
117 unsigned TasksPerThread) {
118 if (BC.getBinaryFunctions().size() == 0)
119 return;
121 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
122 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
123 Timer T(LogName, LogName);
124 LLVM_DEBUG(T.startTimer());
126 for (auto It = BlockBegin; It != BlockEnd; ++It) {
127 BinaryFunction &BF = It->second;
128 if (SkipPredicate && SkipPredicate(BF))
129 continue;
131 WorkFunction(BF);
133 LLVM_DEBUG(T.stopTimer());
136 if (opts::NoThreads || ForceSequential) {
137 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
138 return;
141 // Estimate the overall runtime cost using the scheduling policy
142 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
143 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
144 const unsigned BlockCost =
145 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
147 // Divide work into blocks of equal cost
148 ThreadPoolInterface &Pool = getThreadPool();
149 auto BlockBegin = BC.getBinaryFunctions().begin();
150 unsigned CurrentCost = 0;
152 for (auto It = BC.getBinaryFunctions().begin();
153 It != BC.getBinaryFunctions().end(); ++It) {
154 BinaryFunction &BF = It->second;
155 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
157 if (CurrentCost >= BlockCost) {
158 Pool.async(runBlock, BlockBegin, std::next(It));
159 BlockBegin = std::next(It);
160 CurrentCost = 0;
163 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
164 Pool.wait();
167 void runOnEachFunctionWithUniqueAllocId(
168 BinaryContext &BC, SchedulingPolicy SchedPolicy,
169 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
170 std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
171 if (BC.getBinaryFunctions().size() == 0)
172 return;
174 llvm::sys::RWMutex MainLock;
175 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
176 std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
177 MCPlusBuilder::AllocatorIdTy AllocId) {
178 Timer T(LogName, LogName);
179 LLVM_DEBUG(T.startTimer());
180 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
181 for (auto It = BlockBegin; It != BlockEnd; ++It) {
182 BinaryFunction &BF = It->second;
183 if (SkipPredicate && SkipPredicate(BF))
184 continue;
186 WorkFunction(BF, AllocId);
188 LLVM_DEBUG(T.stopTimer());
191 unsigned AllocId = 1;
192 auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
193 if (!BC.MIB->checkAllocatorExists(AllocId)) {
194 MCPlusBuilder::AllocatorIdTy Id =
195 BC.MIB->initializeNewAnnotationAllocator();
196 (void)Id;
197 assert(AllocId == Id && "unexpected allocator id created");
201 if (opts::NoThreads || ForceSequential) {
202 EnsureAllocatorExists(AllocId);
203 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
204 AllocId);
205 return;
207 // This lock is used to postpone task execution
208 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
210 // Estimate the overall runtime cost using the scheduling policy
211 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
212 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
213 const unsigned BlockCost =
214 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
216 // Divide work into blocks of equal cost
217 ThreadPoolInterface &Pool = getThreadPool();
218 auto BlockBegin = BC.getBinaryFunctions().begin();
219 unsigned CurrentCost = 0;
220 for (auto It = BC.getBinaryFunctions().begin();
221 It != BC.getBinaryFunctions().end(); ++It) {
222 BinaryFunction &BF = It->second;
223 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
225 if (CurrentCost >= BlockCost) {
226 EnsureAllocatorExists(AllocId);
227 Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
228 AllocId++;
229 BlockBegin = std::next(It);
230 CurrentCost = 0;
234 EnsureAllocatorExists(AllocId);
236 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
237 Lock.unlock();
238 Pool.wait();
241 } // namespace ParallelUtilities
242 } // namespace bolt
243 } // namespace llvm