1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of the class that manages parallel work on BinaryFunctions.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/RWMutex.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
21 #define DEBUG_TYPE "par-utils"
24 extern cl::OptionCategory BoltCategory
;
27 ThreadCount("thread-count",
28 cl::desc("number of threads"),
29 cl::init(hardware_concurrency().compute_thread_count()),
30 cl::cat(BoltCategory
));
33 NoThreads("no-threads",
34 cl::desc("disable multithreading"),
36 cl::cat(BoltCategory
));
39 TaskCount("tasks-per-thread",
40 cl::desc("number of tasks to be created per thread"),
42 cl::cat(BoltCategory
));
48 namespace ParallelUtilities
{
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr
<DefaultThreadPool
> ThreadPoolPtr
;
54 unsigned computeCostFor(const BinaryFunction
&BF
,
55 const PredicateTy
&SkipPredicate
,
56 const SchedulingPolicy
&SchedPolicy
) {
57 if (SchedPolicy
== SchedulingPolicy::SP_TRIVIAL
)
60 if (SkipPredicate
&& SkipPredicate(BF
))
63 switch (SchedPolicy
) {
64 case SchedulingPolicy::SP_CONSTANT
:
66 case SchedulingPolicy::SP_INST_LINEAR
:
68 case SchedulingPolicy::SP_INST_QUADRATIC
:
69 return BF
.getSize() * BF
.getSize();
70 case SchedulingPolicy::SP_BB_LINEAR
:
72 case SchedulingPolicy::SP_BB_QUADRATIC
:
73 return BF
.size() * BF
.size();
75 llvm_unreachable("unsupported scheduling policy");
79 inline unsigned estimateTotalCost(const BinaryContext
&BC
,
80 const PredicateTy
&SkipPredicate
,
81 SchedulingPolicy
&SchedPolicy
) {
82 if (SchedPolicy
== SchedulingPolicy::SP_TRIVIAL
)
83 return BC
.getBinaryFunctions().size();
85 unsigned TotalCost
= 0;
86 for (auto &BFI
: BC
.getBinaryFunctions()) {
87 const BinaryFunction
&BF
= BFI
.second
;
88 TotalCost
+= computeCostFor(BF
, SkipPredicate
, SchedPolicy
);
91 // Switch to trivial scheduling if total estimated work is zero
94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95 "switch to trivial scheduling.\n";
97 SchedPolicy
= SP_TRIVIAL
;
98 TotalCost
= BC
.getBinaryFunctions().size();
105 ThreadPoolInterface
&getThreadPool() {
106 if (ThreadPoolPtr
.get())
107 return *ThreadPoolPtr
;
109 ThreadPoolPtr
= std::make_unique
<DefaultThreadPool
>(
110 llvm::hardware_concurrency(opts::ThreadCount
));
111 return *ThreadPoolPtr
;
114 void runOnEachFunction(BinaryContext
&BC
, SchedulingPolicy SchedPolicy
,
115 WorkFuncTy WorkFunction
, PredicateTy SkipPredicate
,
116 std::string LogName
, bool ForceSequential
,
117 unsigned TasksPerThread
) {
118 if (BC
.getBinaryFunctions().size() == 0)
121 auto runBlock
= [&](std::map
<uint64_t, BinaryFunction
>::iterator BlockBegin
,
122 std::map
<uint64_t, BinaryFunction
>::iterator BlockEnd
) {
123 Timer
T(LogName
, LogName
);
124 LLVM_DEBUG(T
.startTimer());
126 for (auto It
= BlockBegin
; It
!= BlockEnd
; ++It
) {
127 BinaryFunction
&BF
= It
->second
;
128 if (SkipPredicate
&& SkipPredicate(BF
))
133 LLVM_DEBUG(T
.stopTimer());
136 if (opts::NoThreads
|| ForceSequential
) {
137 runBlock(BC
.getBinaryFunctions().begin(), BC
.getBinaryFunctions().end());
141 // Estimate the overall runtime cost using the scheduling policy
142 const unsigned TotalCost
= estimateTotalCost(BC
, SkipPredicate
, SchedPolicy
);
143 const unsigned BlocksCount
= TasksPerThread
* opts::ThreadCount
;
144 const unsigned BlockCost
=
145 TotalCost
> BlocksCount
? TotalCost
/ BlocksCount
: 1;
147 // Divide work into blocks of equal cost
148 ThreadPoolInterface
&Pool
= getThreadPool();
149 auto BlockBegin
= BC
.getBinaryFunctions().begin();
150 unsigned CurrentCost
= 0;
152 for (auto It
= BC
.getBinaryFunctions().begin();
153 It
!= BC
.getBinaryFunctions().end(); ++It
) {
154 BinaryFunction
&BF
= It
->second
;
155 CurrentCost
+= computeCostFor(BF
, SkipPredicate
, SchedPolicy
);
157 if (CurrentCost
>= BlockCost
) {
158 Pool
.async(runBlock
, BlockBegin
, std::next(It
));
159 BlockBegin
= std::next(It
);
163 Pool
.async(runBlock
, BlockBegin
, BC
.getBinaryFunctions().end());
167 void runOnEachFunctionWithUniqueAllocId(
168 BinaryContext
&BC
, SchedulingPolicy SchedPolicy
,
169 WorkFuncWithAllocTy WorkFunction
, PredicateTy SkipPredicate
,
170 std::string LogName
, bool ForceSequential
, unsigned TasksPerThread
) {
171 if (BC
.getBinaryFunctions().size() == 0)
174 llvm::sys::RWMutex MainLock
;
175 auto runBlock
= [&](std::map
<uint64_t, BinaryFunction
>::iterator BlockBegin
,
176 std::map
<uint64_t, BinaryFunction
>::iterator BlockEnd
,
177 MCPlusBuilder::AllocatorIdTy AllocId
) {
178 Timer
T(LogName
, LogName
);
179 LLVM_DEBUG(T
.startTimer());
180 std::shared_lock
<llvm::sys::RWMutex
> Lock(MainLock
);
181 for (auto It
= BlockBegin
; It
!= BlockEnd
; ++It
) {
182 BinaryFunction
&BF
= It
->second
;
183 if (SkipPredicate
&& SkipPredicate(BF
))
186 WorkFunction(BF
, AllocId
);
188 LLVM_DEBUG(T
.stopTimer());
191 unsigned AllocId
= 1;
192 auto EnsureAllocatorExists
= [&BC
](unsigned AllocId
) {
193 if (!BC
.MIB
->checkAllocatorExists(AllocId
)) {
194 MCPlusBuilder::AllocatorIdTy Id
=
195 BC
.MIB
->initializeNewAnnotationAllocator();
197 assert(AllocId
== Id
&& "unexpected allocator id created");
201 if (opts::NoThreads
|| ForceSequential
) {
202 EnsureAllocatorExists(AllocId
);
203 runBlock(BC
.getBinaryFunctions().begin(), BC
.getBinaryFunctions().end(),
207 // This lock is used to postpone task execution
208 std::unique_lock
<llvm::sys::RWMutex
> Lock(MainLock
);
210 // Estimate the overall runtime cost using the scheduling policy
211 const unsigned TotalCost
= estimateTotalCost(BC
, SkipPredicate
, SchedPolicy
);
212 const unsigned BlocksCount
= TasksPerThread
* opts::ThreadCount
;
213 const unsigned BlockCost
=
214 TotalCost
> BlocksCount
? TotalCost
/ BlocksCount
: 1;
216 // Divide work into blocks of equal cost
217 ThreadPoolInterface
&Pool
= getThreadPool();
218 auto BlockBegin
= BC
.getBinaryFunctions().begin();
219 unsigned CurrentCost
= 0;
220 for (auto It
= BC
.getBinaryFunctions().begin();
221 It
!= BC
.getBinaryFunctions().end(); ++It
) {
222 BinaryFunction
&BF
= It
->second
;
223 CurrentCost
+= computeCostFor(BF
, SkipPredicate
, SchedPolicy
);
225 if (CurrentCost
>= BlockCost
) {
226 EnsureAllocatorExists(AllocId
);
227 Pool
.async(runBlock
, BlockBegin
, std::next(It
), AllocId
);
229 BlockBegin
= std::next(It
);
234 EnsureAllocatorExists(AllocId
);
236 Pool
.async(runBlock
, BlockBegin
, BC
.getBinaryFunctions().end(), AllocId
);
241 } // namespace ParallelUtilities