1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SplitFunctions pass.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Core/FunctionLayout.h"
17 #include "bolt/Core/ParallelUtilities.h"
18 #include "bolt/Utils/CommandLineOpts.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/FormatVariadic.h"
32 #define DEBUG_TYPE "bolt-opts"
38 class DeprecatedSplitFunctionOptionParser
: public cl::parser
<bool> {
40 explicit DeprecatedSplitFunctionOptionParser(cl::Option
&O
)
41 : cl::parser
<bool>(O
) {}
43 bool parse(cl::Option
&O
, StringRef ArgName
, StringRef Arg
, bool &Value
) {
44 if (Arg
== "2" || Arg
== "3") {
46 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
47 "for option -{1} is deprecated\n",
51 return cl::parser
<bool>::parse(O
, ArgName
, Arg
, Value
);
58 extern cl::OptionCategory BoltOptCategory
;
60 extern cl::opt
<bool> SplitEH
;
61 extern cl::opt
<unsigned> ExecutionCountThreshold
;
62 extern cl::opt
<uint32_t> RandomSeed
;
64 static cl::opt
<bool> AggressiveSplitting(
65 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
66 cl::cat(BoltOptCategory
));
68 static cl::opt
<unsigned> SplitAlignThreshold(
69 "split-align-threshold",
70 cl::desc("when deciding to split a function, apply this alignment "
71 "while doing the size comparison (see -split-threshold). "
75 cl::Hidden
, cl::cat(BoltOptCategory
));
77 static cl::opt
<bool, false, DeprecatedSplitFunctionOptionParser
>
78 SplitFunctions("split-functions",
79 cl::desc("split functions into fragments"),
80 cl::cat(BoltOptCategory
));
82 static cl::opt
<unsigned> SplitThreshold(
84 cl::desc("split function only if its main size is reduced by more than "
85 "given amount of bytes. Default value: 0, i.e. split iff the "
86 "size is reduced. Note that on some architectures the size can "
87 "increase after splitting."),
88 cl::init(0), cl::Hidden
, cl::cat(BoltOptCategory
));
90 static cl::opt
<SplitFunctionsStrategy
> SplitStrategy(
91 "split-strategy", cl::init(SplitFunctionsStrategy::Profile2
),
92 cl::values(clEnumValN(SplitFunctionsStrategy::Profile2
, "profile2",
93 "split each function into a hot and cold fragment "
94 "using profiling information")),
95 cl::values(clEnumValN(
96 SplitFunctionsStrategy::Random2
, "random2",
97 "split each function into a hot and cold fragment at a randomly chosen "
98 "split point (ignoring any available profiling information)")),
99 cl::values(clEnumValN(
100 SplitFunctionsStrategy::RandomN
, "randomN",
101 "split each function into N fragments at a randomly chosen split "
102 "points (ignoring any available profiling information)")),
103 cl::values(clEnumValN(
104 SplitFunctionsStrategy::All
, "all",
105 "split all basic blocks of each function into fragments such that each "
106 "fragment contains exactly a single basic block")),
107 cl::desc("strategy used to partition blocks into fragments"),
108 cl::cat(BoltOptCategory
));
112 bool hasFullProfile(const BinaryFunction
&BF
) {
113 return llvm::all_of(BF
.blocks(), [](const BinaryBasicBlock
&BB
) {
114 return BB
.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE
;
118 bool allBlocksCold(const BinaryFunction
&BF
) {
119 return llvm::all_of(BF
.blocks(), [](const BinaryBasicBlock
&BB
) {
120 return BB
.getExecutionCount() == 0;
124 struct SplitProfile2 final
: public SplitStrategy
{
125 bool canSplit(const BinaryFunction
&BF
) override
{
126 return BF
.hasValidProfile() && hasFullProfile(BF
) && !allBlocksCold(BF
);
129 bool keepEmpty() override
{ return false; }
131 void fragment(const BlockIt Start
, const BlockIt End
) override
{
132 for (BinaryBasicBlock
*const BB
: llvm::make_range(Start
, End
)) {
133 if (BB
->getExecutionCount() == 0)
134 BB
->setFragmentNum(FragmentNum::cold());
139 struct SplitRandom2 final
: public SplitStrategy
{
140 std::minstd_rand0 Gen
;
142 SplitRandom2() : Gen(opts::RandomSeed
.getValue()) {}
144 bool canSplit(const BinaryFunction
&BF
) override
{ return true; }
146 bool keepEmpty() override
{ return false; }
148 void fragment(const BlockIt Start
, const BlockIt End
) override
{
149 using DiffT
= typename
std::iterator_traits
<BlockIt
>::difference_type
;
150 const DiffT NumBlocks
= End
- Start
;
151 assert(NumBlocks
> 0 && "Cannot fragment empty function");
153 // We want to split at least one block
154 const auto LastSplitPoint
= std::max
<DiffT
>(NumBlocks
- 1, 1);
155 std::uniform_int_distribution
<DiffT
> Dist(1, LastSplitPoint
);
156 const DiffT SplitPoint
= Dist(Gen
);
157 for (BinaryBasicBlock
*BB
: llvm::make_range(Start
+ SplitPoint
, End
))
158 BB
->setFragmentNum(FragmentNum::cold());
160 LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
161 "{1} possible) blocks to split\n",
162 NumBlocks
- SplitPoint
, End
- Start
));
166 struct SplitRandomN final
: public SplitStrategy
{
167 std::minstd_rand0 Gen
;
169 SplitRandomN() : Gen(opts::RandomSeed
.getValue()) {}
171 bool canSplit(const BinaryFunction
&BF
) override
{ return true; }
173 bool keepEmpty() override
{ return false; }
175 void fragment(const BlockIt Start
, const BlockIt End
) override
{
176 using DiffT
= typename
std::iterator_traits
<BlockIt
>::difference_type
;
177 const DiffT NumBlocks
= End
- Start
;
178 assert(NumBlocks
> 0 && "Cannot fragment empty function");
180 // With n blocks, there are n-1 places to split them.
181 const DiffT MaximumSplits
= NumBlocks
- 1;
182 // We want to generate at least two fragment if possible, but if there is
183 // only one block, no splits are possible.
184 const auto MinimumSplits
= std::min
<DiffT
>(MaximumSplits
, 1);
185 std::uniform_int_distribution
<DiffT
> Dist(MinimumSplits
, MaximumSplits
);
186 // Choose how many splits to perform
187 const DiffT NumSplits
= Dist(Gen
);
189 // Draw split points from a lottery
190 SmallVector
<unsigned, 0> Lottery(MaximumSplits
);
191 // Start lottery at 1, because there is no meaningful splitpoint before the
193 std::iota(Lottery
.begin(), Lottery
.end(), 1u);
194 std::shuffle(Lottery
.begin(), Lottery
.end(), Gen
);
195 Lottery
.resize(NumSplits
);
198 // Add one past the end entry to lottery
199 Lottery
.push_back(NumBlocks
);
201 unsigned LotteryIndex
= 0;
203 for (BinaryBasicBlock
*const BB
: make_range(Start
, End
)) {
204 // Check whether to start new fragment
205 if (BBPos
>= Lottery
[LotteryIndex
])
208 // Because LotteryIndex is 0 based and cold fragments are 1 based, we can
209 // use the index to assign fragments.
210 BB
->setFragmentNum(FragmentNum(LotteryIndex
));
217 struct SplitAll final
: public SplitStrategy
{
218 bool canSplit(const BinaryFunction
&BF
) override
{ return true; }
220 bool keepEmpty() override
{
221 // Keeping empty fragments allows us to test, that empty fragments do not
226 void fragment(const BlockIt Start
, const BlockIt End
) override
{
227 unsigned Fragment
= 0;
228 for (BinaryBasicBlock
*const BB
: llvm::make_range(Start
, End
))
229 BB
->setFragmentNum(FragmentNum(Fragment
++));
237 bool SplitFunctions::shouldOptimize(const BinaryFunction
&BF
) const {
238 // Apply execution count threshold
239 if (BF
.getKnownExecutionCount() < opts::ExecutionCountThreshold
)
242 return BinaryFunctionPass::shouldOptimize(BF
);
245 void SplitFunctions::runOnFunctions(BinaryContext
&BC
) {
246 if (!opts::SplitFunctions
)
249 std::unique_ptr
<SplitStrategy
> Strategy
;
250 bool ForceSequential
= false;
252 switch (opts::SplitStrategy
) {
253 case SplitFunctionsStrategy::Profile2
:
254 Strategy
= std::make_unique
<SplitProfile2
>();
256 case SplitFunctionsStrategy::Random2
:
257 Strategy
= std::make_unique
<SplitRandom2
>();
258 // If we split functions randomly, we need to ensure that across runs with
259 // the same input, we generate random numbers for each function in the same
261 ForceSequential
= true;
263 case SplitFunctionsStrategy::RandomN
:
264 Strategy
= std::make_unique
<SplitRandomN
>();
265 ForceSequential
= true;
267 case SplitFunctionsStrategy::All
:
268 Strategy
= std::make_unique
<SplitAll
>();
272 ParallelUtilities::PredicateTy SkipFunc
= [&](const BinaryFunction
&BF
) {
273 return !shouldOptimize(BF
);
276 ParallelUtilities::runOnEachFunction(
277 BC
, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR
,
278 [&](BinaryFunction
&BF
) { splitFunction(BF
, *Strategy
); }, SkipFunc
,
279 "SplitFunctions", ForceSequential
);
281 if (SplitBytesHot
+ SplitBytesCold
> 0)
282 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
283 << " hot bytes from " << SplitBytesCold
<< " cold bytes "
284 << format("(%.2lf%% of split functions is hot).\n",
285 100.0 * SplitBytesHot
/ (SplitBytesHot
+ SplitBytesCold
));
288 void SplitFunctions::splitFunction(BinaryFunction
&BF
, SplitStrategy
&S
) {
295 FunctionLayout
&Layout
= BF
.getLayout();
296 BinaryFunction::BasicBlockOrderType
PreSplitLayout(Layout
.block_begin(),
299 BinaryContext
&BC
= BF
.getBinaryContext();
300 size_t OriginalHotSize
;
304 std::tie(OriginalHotSize
, ColdSize
) = BC
.calculateEmittedSize(BF
);
305 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
306 << " pre-split is <0x"
307 << Twine::utohexstr(OriginalHotSize
) << ", 0x"
308 << Twine::utohexstr(ColdSize
) << ">\n");
311 BinaryFunction::BasicBlockOrderType
NewLayout(Layout
.block_begin(),
313 // Never outline the first basic block.
314 NewLayout
.front()->setCanOutline(false);
315 for (BinaryBasicBlock
*const BB
: NewLayout
) {
316 if (!BB
->canOutline())
319 // Do not split extra entry points in aarch64. They can be referred by
320 // using ADRs and when this happens, these blocks cannot be placed far
321 // away due to the limited range in ADR instruction.
322 if (BC
.isAArch64() && BB
->isEntryPoint()) {
323 BB
->setCanOutline(false);
327 if (BF
.hasEHRanges() && !opts::SplitEH
) {
328 // We cannot move landing pads (or rather entry points for landing pads).
329 if (BB
->isLandingPad()) {
330 BB
->setCanOutline(false);
333 // We cannot move a block that can throw since exception-handling
334 // runtime cannot deal with split functions. However, if we can guarantee
335 // that the block never throws, it is safe to move the block to
336 // decrease the size of the function.
337 for (MCInst
&Instr
: *BB
) {
338 if (BC
.MIB
->isInvoke(Instr
)) {
339 BB
->setCanOutline(false);
346 BF
.getLayout().updateLayoutIndices();
347 S
.fragment(NewLayout
.begin(), NewLayout
.end());
349 // Make sure all non-outlineable blocks are in the main-fragment.
350 for (BinaryBasicBlock
*const BB
: NewLayout
) {
351 if (!BB
->canOutline())
352 BB
->setFragmentNum(FragmentNum::main());
355 if (opts::AggressiveSplitting
) {
356 // All blocks with 0 count that we can move go to the end of the function.
357 // Even if they were natural to cluster formation and were seen in-between
359 llvm::stable_sort(NewLayout
, [&](const BinaryBasicBlock
*const A
,
360 const BinaryBasicBlock
*const B
) {
361 return A
->getFragmentNum() < B
->getFragmentNum();
363 } else if (BF
.hasEHRanges() && !opts::SplitEH
) {
364 // Typically functions with exception handling have landing pads at the end.
365 // We cannot move beginning of landing pads, but we can move 0-count blocks
366 // comprising landing pads to the end and thus facilitate splitting.
367 auto FirstLP
= NewLayout
.begin();
368 while ((*FirstLP
)->isLandingPad())
371 std::stable_sort(FirstLP
, NewLayout
.end(),
372 [&](BinaryBasicBlock
*A
, BinaryBasicBlock
*B
) {
373 return A
->getFragmentNum() < B
->getFragmentNum();
377 // Make sure that fragments are increasing.
378 FragmentNum CurrentFragment
= NewLayout
.back()->getFragmentNum();
379 for (BinaryBasicBlock
*const BB
: reverse(NewLayout
)) {
380 if (BB
->getFragmentNum() > CurrentFragment
)
381 BB
->setFragmentNum(CurrentFragment
);
382 CurrentFragment
= BB
->getFragmentNum();
385 if (!S
.keepEmpty()) {
386 FragmentNum CurrentFragment
= FragmentNum::main();
387 FragmentNum NewFragment
= FragmentNum::main();
388 for (BinaryBasicBlock
*const BB
: NewLayout
) {
389 if (BB
->getFragmentNum() > CurrentFragment
) {
390 CurrentFragment
= BB
->getFragmentNum();
391 NewFragment
= FragmentNum(NewFragment
.get() + 1);
393 BB
->setFragmentNum(NewFragment
);
397 BF
.getLayout().update(NewLayout
);
399 // For shared objects, invoke instructions and corresponding landing pads
400 // have to be placed in the same fragment. When we split them, create
401 // trampoline landing pads that will redirect the execution to real LPs.
402 TrampolineSetType Trampolines
;
403 if (!BC
.HasFixedLoadAddress
&& BF
.hasEHRanges() && BF
.isSplit())
404 Trampolines
= createEHTrampolines(BF
);
406 // Check the new size to see if it's worth splitting the function.
407 if (BC
.isX86() && BF
.isSplit()) {
408 std::tie(HotSize
, ColdSize
) = BC
.calculateEmittedSize(BF
);
409 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
410 << " post-split is <0x" << Twine::utohexstr(HotSize
)
411 << ", 0x" << Twine::utohexstr(ColdSize
) << ">\n");
412 if (alignTo(OriginalHotSize
, opts::SplitAlignThreshold
) <=
413 alignTo(HotSize
, opts::SplitAlignThreshold
) + opts::SplitThreshold
) {
414 if (opts::Verbosity
>= 2) {
415 outs() << "BOLT-INFO: Reversing splitting of function "
416 << formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF
, HotSize
,
417 ColdSize
, OriginalHotSize
);
420 // Reverse the action of createEHTrampolines(). The trampolines will be
421 // placed immediately before the matching destination resulting in no
423 if (PreSplitLayout
.size() != BF
.size())
424 PreSplitLayout
= mergeEHTrampolines(BF
, PreSplitLayout
, Trampolines
);
426 for (BinaryBasicBlock
&BB
: BF
)
427 BB
.setFragmentNum(FragmentNum::main());
428 BF
.getLayout().update(PreSplitLayout
);
430 SplitBytesHot
+= HotSize
;
431 SplitBytesCold
+= ColdSize
;
436 SplitFunctions::TrampolineSetType
437 SplitFunctions::createEHTrampolines(BinaryFunction
&BF
) const {
438 const auto &MIB
= BF
.getBinaryContext().MIB
;
440 // Map real landing pads to the corresponding trampolines.
441 TrampolineSetType LPTrampolines
;
443 // Iterate over the copy of basic blocks since we are adding new blocks to the
444 // function which will invalidate its iterators.
445 std::vector
<BinaryBasicBlock
*> Blocks(BF
.pbegin(), BF
.pend());
446 for (BinaryBasicBlock
*BB
: Blocks
) {
447 for (MCInst
&Instr
: *BB
) {
448 const std::optional
<MCPlus::MCLandingPad
> EHInfo
= MIB
->getEHInfo(Instr
);
449 if (!EHInfo
|| !EHInfo
->first
)
452 const MCSymbol
*LPLabel
= EHInfo
->first
;
453 BinaryBasicBlock
*LPBlock
= BF
.getBasicBlockForLabel(LPLabel
);
454 if (BB
->getFragmentNum() == LPBlock
->getFragmentNum())
457 const MCSymbol
*TrampolineLabel
= nullptr;
458 const TrampolineKey
Key(BB
->getFragmentNum(), LPLabel
);
459 auto Iter
= LPTrampolines
.find(Key
);
460 if (Iter
!= LPTrampolines
.end()) {
461 TrampolineLabel
= Iter
->second
;
463 // Create a trampoline basic block in the same fragment as the thrower.
464 // Note: there's no need to insert the jump instruction, it will be
465 // added by fixBranches().
466 BinaryBasicBlock
*TrampolineBB
= BF
.addBasicBlock();
467 TrampolineBB
->setFragmentNum(BB
->getFragmentNum());
468 TrampolineBB
->setExecutionCount(LPBlock
->getExecutionCount());
469 TrampolineBB
->addSuccessor(LPBlock
, TrampolineBB
->getExecutionCount());
470 TrampolineBB
->setCFIState(LPBlock
->getCFIState());
471 TrampolineLabel
= TrampolineBB
->getLabel();
472 LPTrampolines
.insert(std::make_pair(Key
, TrampolineLabel
));
475 // Substitute the landing pad with the trampoline.
476 MIB
->updateEHInfo(Instr
,
477 MCPlus::MCLandingPad(TrampolineLabel
, EHInfo
->second
));
481 if (LPTrampolines
.empty())
482 return LPTrampolines
;
484 // All trampoline blocks were added to the end of the function. Place them at
485 // the end of corresponding fragments.
486 BinaryFunction::BasicBlockOrderType
NewLayout(BF
.getLayout().block_begin(),
487 BF
.getLayout().block_end());
488 stable_sort(NewLayout
, [&](BinaryBasicBlock
*A
, BinaryBasicBlock
*B
) {
489 return A
->getFragmentNum() < B
->getFragmentNum();
491 BF
.getLayout().update(NewLayout
);
493 // Conservatively introduce branch instructions.
496 // Update exception-handling CFG for the function.
497 BF
.recomputeLandingPads();
499 return LPTrampolines
;
502 SplitFunctions::BasicBlockOrderType
SplitFunctions::mergeEHTrampolines(
503 BinaryFunction
&BF
, SplitFunctions::BasicBlockOrderType
&Layout
,
504 const SplitFunctions::TrampolineSetType
&Trampolines
) const {
505 DenseMap
<const MCSymbol
*, SmallVector
<const MCSymbol
*, 0>>
507 for (const auto &Entry
: Trampolines
) {
508 IncomingTrampolines
[Entry
.getFirst().Target
].emplace_back(
512 BasicBlockOrderType MergedLayout
;
513 for (BinaryBasicBlock
*BB
: Layout
) {
514 auto Iter
= IncomingTrampolines
.find(BB
->getLabel());
515 if (Iter
!= IncomingTrampolines
.end()) {
516 for (const MCSymbol
*const Trampoline
: Iter
->getSecond()) {
517 BinaryBasicBlock
*LPBlock
= BF
.getBasicBlockForLabel(Trampoline
);
518 assert(LPBlock
&& "Could not find matching landing pad block.");
519 MergedLayout
.push_back(LPBlock
);
522 MergedLayout
.push_back(BB
);