[nfc][Driver] Remove {{(.exe)?}} from sanitizer test (#121160)
[llvm-project.git] / bolt / lib / Core / ParallelUtilities.cpp
blob3a8a7dc0aee7bd7d54a0852d697c0ae26499c35c
1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the class that manages parallel work on BinaryFunctions.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/RWMutex.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
19 #include <mutex>
21 #define DEBUG_TYPE "par-utils"
23 namespace opts {
24 extern cl::OptionCategory BoltCategory;
26 cl::opt<unsigned>
27 ThreadCount("thread-count",
28 cl::desc("number of threads"),
29 cl::init(hardware_concurrency().compute_thread_count()),
30 cl::cat(BoltCategory));
32 cl::opt<bool>
33 NoThreads("no-threads",
34 cl::desc("disable multithreading"),
35 cl::init(false),
36 cl::cat(BoltCategory));
38 cl::opt<unsigned>
39 TaskCount("tasks-per-thread",
40 cl::desc("number of tasks to be created per thread"),
41 cl::init(20),
42 cl::cat(BoltCategory));
44 } // namespace opts
46 namespace llvm {
47 namespace bolt {
48 namespace ParallelUtilities {
50 namespace {
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr;
54 unsigned computeCostFor(const BinaryFunction &BF,
55 const PredicateTy &SkipPredicate,
56 const SchedulingPolicy &SchedPolicy) {
57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58 return 1;
60 if (SkipPredicate && SkipPredicate(BF))
61 return 0;
63 switch (SchedPolicy) {
64 case SchedulingPolicy::SP_CONSTANT:
65 return 1;
66 case SchedulingPolicy::SP_INST_LINEAR:
67 return BF.getSize();
68 case SchedulingPolicy::SP_INST_QUADRATIC:
69 return BF.getSize() * BF.getSize();
70 case SchedulingPolicy::SP_BB_LINEAR:
71 return BF.size();
72 case SchedulingPolicy::SP_BB_QUADRATIC:
73 return BF.size() * BF.size();
74 default:
75 llvm_unreachable("unsupported scheduling policy");
79 inline unsigned estimateTotalCost(const BinaryContext &BC,
80 const PredicateTy &SkipPredicate,
81 SchedulingPolicy &SchedPolicy) {
82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83 return BC.getBinaryFunctions().size();
85 unsigned TotalCost = 0;
86 for (auto &BFI : BC.getBinaryFunctions()) {
87 const BinaryFunction &BF = BFI.second;
88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
91 // Switch to trivial scheduling if total estimated work is zero
92 if (TotalCost == 0) {
93 BC.outs()
94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95 "switch to trivial scheduling.\n";
97 SchedPolicy = SP_TRIVIAL;
98 TotalCost = BC.getBinaryFunctions().size();
100 return TotalCost;
103 } // namespace
105 ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) {
106 if (ThreadPoolPtr.get())
107 return *ThreadPoolPtr;
109 if (ThreadsCount > 1)
110 ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
111 llvm::hardware_concurrency(ThreadsCount));
112 else
113 ThreadPoolPtr = std::make_unique<SingleThreadExecutor>();
114 return *ThreadPoolPtr;
117 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
118 WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
119 std::string LogName, bool ForceSequential,
120 unsigned TasksPerThread) {
121 if (BC.getBinaryFunctions().size() == 0)
122 return;
124 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
125 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
126 Timer T(LogName, LogName);
127 LLVM_DEBUG(T.startTimer());
129 for (auto It = BlockBegin; It != BlockEnd; ++It) {
130 BinaryFunction &BF = It->second;
131 if (SkipPredicate && SkipPredicate(BF))
132 continue;
134 WorkFunction(BF);
136 LLVM_DEBUG(T.stopTimer());
139 if (opts::NoThreads || ForceSequential) {
140 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
141 return;
144 // Estimate the overall runtime cost using the scheduling policy
145 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
146 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
147 const unsigned BlockCost =
148 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
150 // Divide work into blocks of equal cost
151 ThreadPoolInterface &Pool = getThreadPool();
152 auto BlockBegin = BC.getBinaryFunctions().begin();
153 unsigned CurrentCost = 0;
155 for (auto It = BC.getBinaryFunctions().begin();
156 It != BC.getBinaryFunctions().end(); ++It) {
157 BinaryFunction &BF = It->second;
158 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
160 if (CurrentCost >= BlockCost) {
161 Pool.async(runBlock, BlockBegin, std::next(It));
162 BlockBegin = std::next(It);
163 CurrentCost = 0;
166 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
167 Pool.wait();
170 void runOnEachFunctionWithUniqueAllocId(
171 BinaryContext &BC, SchedulingPolicy SchedPolicy,
172 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
173 std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
174 if (BC.getBinaryFunctions().size() == 0)
175 return;
177 llvm::sys::RWMutex MainLock;
178 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
179 std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
180 MCPlusBuilder::AllocatorIdTy AllocId) {
181 Timer T(LogName, LogName);
182 LLVM_DEBUG(T.startTimer());
183 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
184 for (auto It = BlockBegin; It != BlockEnd; ++It) {
185 BinaryFunction &BF = It->second;
186 if (SkipPredicate && SkipPredicate(BF))
187 continue;
189 WorkFunction(BF, AllocId);
191 LLVM_DEBUG(T.stopTimer());
194 unsigned AllocId = 1;
195 auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
196 if (!BC.MIB->checkAllocatorExists(AllocId)) {
197 MCPlusBuilder::AllocatorIdTy Id =
198 BC.MIB->initializeNewAnnotationAllocator();
199 (void)Id;
200 assert(AllocId == Id && "unexpected allocator id created");
204 if (opts::NoThreads || ForceSequential) {
205 EnsureAllocatorExists(AllocId);
206 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
207 AllocId);
208 return;
210 // This lock is used to postpone task execution
211 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
213 // Estimate the overall runtime cost using the scheduling policy
214 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
215 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
216 const unsigned BlockCost =
217 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
219 // Divide work into blocks of equal cost
220 ThreadPoolInterface &Pool = getThreadPool();
221 auto BlockBegin = BC.getBinaryFunctions().begin();
222 unsigned CurrentCost = 0;
223 for (auto It = BC.getBinaryFunctions().begin();
224 It != BC.getBinaryFunctions().end(); ++It) {
225 BinaryFunction &BF = It->second;
226 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
228 if (CurrentCost >= BlockCost) {
229 EnsureAllocatorExists(AllocId);
230 Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
231 AllocId++;
232 BlockBegin = std::next(It);
233 CurrentCost = 0;
237 EnsureAllocatorExists(AllocId);
239 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
240 Lock.unlock();
241 Pool.wait();
244 } // namespace ParallelUtilities
245 } // namespace bolt
246 } // namespace llvm