Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Core / ParallelUtilities.cpp
blobfb2b6dc7cd63b408b4c7e7e59238b293dea7eda9
1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the class that manages parallel work on BinaryFunctions.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/RWMutex.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
19 #include <mutex>
21 #define DEBUG_TYPE "par-utils"
23 namespace opts {
24 extern cl::OptionCategory BoltCategory;
26 cl::opt<unsigned>
27 ThreadCount("thread-count",
28 cl::desc("number of threads"),
29 cl::init(hardware_concurrency().compute_thread_count()),
30 cl::cat(BoltCategory));
32 cl::opt<bool>
33 NoThreads("no-threads",
34 cl::desc("disable multithreading"),
35 cl::init(false),
36 cl::cat(BoltCategory));
38 cl::opt<unsigned>
39 TaskCount("tasks-per-thread",
40 cl::desc("number of tasks to be created per thread"),
41 cl::init(20),
42 cl::cat(BoltCategory));
44 } // namespace opts
46 namespace llvm {
47 namespace bolt {
48 namespace ParallelUtilities {
50 namespace {
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr<ThreadPool> ThreadPoolPtr;
54 unsigned computeCostFor(const BinaryFunction &BF,
55 const PredicateTy &SkipPredicate,
56 const SchedulingPolicy &SchedPolicy) {
57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58 return 1;
60 if (SkipPredicate && SkipPredicate(BF))
61 return 0;
63 switch (SchedPolicy) {
64 case SchedulingPolicy::SP_CONSTANT:
65 return 1;
66 case SchedulingPolicy::SP_INST_LINEAR:
67 return BF.getSize();
68 case SchedulingPolicy::SP_INST_QUADRATIC:
69 return BF.getSize() * BF.getSize();
70 case SchedulingPolicy::SP_BB_LINEAR:
71 return BF.size();
72 case SchedulingPolicy::SP_BB_QUADRATIC:
73 return BF.size() * BF.size();
74 default:
75 llvm_unreachable("unsupported scheduling policy");
79 inline unsigned estimateTotalCost(const BinaryContext &BC,
80 const PredicateTy &SkipPredicate,
81 SchedulingPolicy &SchedPolicy) {
82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83 return BC.getBinaryFunctions().size();
85 unsigned TotalCost = 0;
86 for (auto &BFI : BC.getBinaryFunctions()) {
87 const BinaryFunction &BF = BFI.second;
88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
91 // Switch to trivial scheduling if total estimated work is zero
92 if (TotalCost == 0) {
93 outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
94 "switch to trivial scheduling.\n";
96 SchedPolicy = SP_TRIVIAL;
97 TotalCost = BC.getBinaryFunctions().size();
99 return TotalCost;
102 } // namespace
104 ThreadPool &getThreadPool() {
105 if (ThreadPoolPtr.get())
106 return *ThreadPoolPtr;
108 ThreadPoolPtr = std::make_unique<ThreadPool>(
109 llvm::hardware_concurrency(opts::ThreadCount));
110 return *ThreadPoolPtr;
113 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
114 WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
115 std::string LogName, bool ForceSequential,
116 unsigned TasksPerThread) {
117 if (BC.getBinaryFunctions().size() == 0)
118 return;
120 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
121 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
122 Timer T(LogName, LogName);
123 LLVM_DEBUG(T.startTimer());
125 for (auto It = BlockBegin; It != BlockEnd; ++It) {
126 BinaryFunction &BF = It->second;
127 if (SkipPredicate && SkipPredicate(BF))
128 continue;
130 WorkFunction(BF);
132 LLVM_DEBUG(T.stopTimer());
135 if (opts::NoThreads || ForceSequential) {
136 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
137 return;
140 // Estimate the overall runtime cost using the scheduling policy
141 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
142 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
143 const unsigned BlockCost =
144 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
146 // Divide work into blocks of equal cost
147 ThreadPool &Pool = getThreadPool();
148 auto BlockBegin = BC.getBinaryFunctions().begin();
149 unsigned CurrentCost = 0;
151 for (auto It = BC.getBinaryFunctions().begin();
152 It != BC.getBinaryFunctions().end(); ++It) {
153 BinaryFunction &BF = It->second;
154 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
156 if (CurrentCost >= BlockCost) {
157 Pool.async(runBlock, BlockBegin, std::next(It));
158 BlockBegin = std::next(It);
159 CurrentCost = 0;
162 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
163 Pool.wait();
166 void runOnEachFunctionWithUniqueAllocId(
167 BinaryContext &BC, SchedulingPolicy SchedPolicy,
168 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
169 std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
170 if (BC.getBinaryFunctions().size() == 0)
171 return;
173 llvm::sys::RWMutex MainLock;
174 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
175 std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
176 MCPlusBuilder::AllocatorIdTy AllocId) {
177 Timer T(LogName, LogName);
178 LLVM_DEBUG(T.startTimer());
179 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
180 for (auto It = BlockBegin; It != BlockEnd; ++It) {
181 BinaryFunction &BF = It->second;
182 if (SkipPredicate && SkipPredicate(BF))
183 continue;
185 WorkFunction(BF, AllocId);
187 LLVM_DEBUG(T.stopTimer());
190 if (opts::NoThreads || ForceSequential) {
191 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
192 return;
194 // This lock is used to postpone task execution
195 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
197 // Estimate the overall runtime cost using the scheduling policy
198 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
199 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
200 const unsigned BlockCost =
201 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
203 // Divide work into blocks of equal cost
204 ThreadPool &Pool = getThreadPool();
205 auto BlockBegin = BC.getBinaryFunctions().begin();
206 unsigned CurrentCost = 0;
207 unsigned AllocId = 1;
208 for (auto It = BC.getBinaryFunctions().begin();
209 It != BC.getBinaryFunctions().end(); ++It) {
210 BinaryFunction &BF = It->second;
211 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
213 if (CurrentCost >= BlockCost) {
214 if (!BC.MIB->checkAllocatorExists(AllocId)) {
215 MCPlusBuilder::AllocatorIdTy Id =
216 BC.MIB->initializeNewAnnotationAllocator();
217 (void)Id;
218 assert(AllocId == Id && "unexpected allocator id created");
220 Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
221 AllocId++;
222 BlockBegin = std::next(It);
223 CurrentCost = 0;
227 if (!BC.MIB->checkAllocatorExists(AllocId)) {
228 MCPlusBuilder::AllocatorIdTy Id =
229 BC.MIB->initializeNewAnnotationAllocator();
230 (void)Id;
231 assert(AllocId == Id && "unexpected allocator id created");
234 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
235 Lock.unlock();
236 Pool.wait();
239 } // namespace ParallelUtilities
240 } // namespace bolt
241 } // namespace llvm