Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / SplitFunctions.cpp
blob34973cecdf49161658bb46409ec1e645ccf52db2
1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SplitFunctions pass.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Core/FunctionLayout.h"
17 #include "bolt/Core/ParallelUtilities.h"
18 #include "bolt/Utils/CommandLineOpts.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include <algorithm>
26 #include <iterator>
27 #include <memory>
28 #include <numeric>
29 #include <random>
30 #include <vector>
32 #define DEBUG_TYPE "bolt-opts"
34 using namespace llvm;
35 using namespace bolt;
37 namespace {
38 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
39 public:
40 explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
41 : cl::parser<bool>(O) {}
43 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
44 if (Arg == "2" || Arg == "3") {
45 Value = true;
46 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
47 "for option -{1} is deprecated\n",
48 Arg, ArgName);
49 return false;
51 return cl::parser<bool>::parse(O, ArgName, Arg, Value);
54 } // namespace
56 namespace opts {
58 extern cl::OptionCategory BoltOptCategory;
60 extern cl::opt<bool> SplitEH;
61 extern cl::opt<unsigned> ExecutionCountThreshold;
62 extern cl::opt<uint32_t> RandomSeed;
64 static cl::opt<bool> AggressiveSplitting(
65 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
66 cl::cat(BoltOptCategory));
68 static cl::opt<unsigned> SplitAlignThreshold(
69 "split-align-threshold",
70 cl::desc("when deciding to split a function, apply this alignment "
71 "while doing the size comparison (see -split-threshold). "
72 "Default value: 2."),
73 cl::init(2),
75 cl::Hidden, cl::cat(BoltOptCategory));
77 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
78 SplitFunctions("split-functions",
79 cl::desc("split functions into fragments"),
80 cl::cat(BoltOptCategory));
82 static cl::opt<unsigned> SplitThreshold(
83 "split-threshold",
84 cl::desc("split function only if its main size is reduced by more than "
85 "given amount of bytes. Default value: 0, i.e. split iff the "
86 "size is reduced. Note that on some architectures the size can "
87 "increase after splitting."),
88 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
90 static cl::opt<SplitFunctionsStrategy> SplitStrategy(
91 "split-strategy", cl::init(SplitFunctionsStrategy::Profile2),
92 cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
93 "split each function into a hot and cold fragment "
94 "using profiling information")),
95 cl::values(clEnumValN(
96 SplitFunctionsStrategy::Random2, "random2",
97 "split each function into a hot and cold fragment at a randomly chosen "
98 "split point (ignoring any available profiling information)")),
99 cl::values(clEnumValN(
100 SplitFunctionsStrategy::RandomN, "randomN",
101 "split each function into N fragments at a randomly chosen split "
102 "points (ignoring any available profiling information)")),
103 cl::values(clEnumValN(
104 SplitFunctionsStrategy::All, "all",
105 "split all basic blocks of each function into fragments such that each "
106 "fragment contains exactly a single basic block")),
107 cl::desc("strategy used to partition blocks into fragments"),
108 cl::cat(BoltOptCategory));
109 } // namespace opts
111 namespace {
112 bool hasFullProfile(const BinaryFunction &BF) {
113 return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
114 return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE;
118 bool allBlocksCold(const BinaryFunction &BF) {
119 return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
120 return BB.getExecutionCount() == 0;
124 struct SplitProfile2 final : public SplitStrategy {
125 bool canSplit(const BinaryFunction &BF) override {
126 return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
129 bool keepEmpty() override { return false; }
131 void fragment(const BlockIt Start, const BlockIt End) override {
132 for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
133 if (BB->getExecutionCount() == 0)
134 BB->setFragmentNum(FragmentNum::cold());
139 struct SplitRandom2 final : public SplitStrategy {
140 std::minstd_rand0 Gen;
142 SplitRandom2() : Gen(opts::RandomSeed.getValue()) {}
144 bool canSplit(const BinaryFunction &BF) override { return true; }
146 bool keepEmpty() override { return false; }
148 void fragment(const BlockIt Start, const BlockIt End) override {
149 using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
150 const DiffT NumBlocks = End - Start;
151 assert(NumBlocks > 0 && "Cannot fragment empty function");
153 // We want to split at least one block
154 const auto LastSplitPoint = std::max<DiffT>(NumBlocks - 1, 1);
155 std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint);
156 const DiffT SplitPoint = Dist(Gen);
157 for (BinaryBasicBlock *BB : llvm::make_range(Start + SplitPoint, End))
158 BB->setFragmentNum(FragmentNum::cold());
160 LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
161 "{1} possible) blocks to split\n",
162 NumBlocks - SplitPoint, End - Start));
166 struct SplitRandomN final : public SplitStrategy {
167 std::minstd_rand0 Gen;
169 SplitRandomN() : Gen(opts::RandomSeed.getValue()) {}
171 bool canSplit(const BinaryFunction &BF) override { return true; }
173 bool keepEmpty() override { return false; }
175 void fragment(const BlockIt Start, const BlockIt End) override {
176 using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
177 const DiffT NumBlocks = End - Start;
178 assert(NumBlocks > 0 && "Cannot fragment empty function");
180 // With n blocks, there are n-1 places to split them.
181 const DiffT MaximumSplits = NumBlocks - 1;
182 // We want to generate at least two fragment if possible, but if there is
183 // only one block, no splits are possible.
184 const auto MinimumSplits = std::min<DiffT>(MaximumSplits, 1);
185 std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits);
186 // Choose how many splits to perform
187 const DiffT NumSplits = Dist(Gen);
189 // Draw split points from a lottery
190 SmallVector<unsigned, 0> Lottery(MaximumSplits);
191 // Start lottery at 1, because there is no meaningful splitpoint before the
192 // first block.
193 std::iota(Lottery.begin(), Lottery.end(), 1u);
194 std::shuffle(Lottery.begin(), Lottery.end(), Gen);
195 Lottery.resize(NumSplits);
196 llvm::sort(Lottery);
198 // Add one past the end entry to lottery
199 Lottery.push_back(NumBlocks);
201 unsigned LotteryIndex = 0;
202 unsigned BBPos = 0;
203 for (BinaryBasicBlock *const BB : make_range(Start, End)) {
204 // Check whether to start new fragment
205 if (BBPos >= Lottery[LotteryIndex])
206 ++LotteryIndex;
208 // Because LotteryIndex is 0 based and cold fragments are 1 based, we can
209 // use the index to assign fragments.
210 BB->setFragmentNum(FragmentNum(LotteryIndex));
212 ++BBPos;
217 struct SplitAll final : public SplitStrategy {
218 bool canSplit(const BinaryFunction &BF) override { return true; }
220 bool keepEmpty() override {
221 // Keeping empty fragments allows us to test, that empty fragments do not
222 // generate symbols.
223 return true;
226 void fragment(const BlockIt Start, const BlockIt End) override {
227 unsigned Fragment = 0;
228 for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
229 BB->setFragmentNum(FragmentNum(Fragment++));
232 } // namespace
234 namespace llvm {
235 namespace bolt {
237 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
238 // Apply execution count threshold
239 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
240 return false;
242 return BinaryFunctionPass::shouldOptimize(BF);
245 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
246 if (!opts::SplitFunctions)
247 return;
249 std::unique_ptr<SplitStrategy> Strategy;
250 bool ForceSequential = false;
252 switch (opts::SplitStrategy) {
253 case SplitFunctionsStrategy::Profile2:
254 Strategy = std::make_unique<SplitProfile2>();
255 break;
256 case SplitFunctionsStrategy::Random2:
257 Strategy = std::make_unique<SplitRandom2>();
258 // If we split functions randomly, we need to ensure that across runs with
259 // the same input, we generate random numbers for each function in the same
260 // order.
261 ForceSequential = true;
262 break;
263 case SplitFunctionsStrategy::RandomN:
264 Strategy = std::make_unique<SplitRandomN>();
265 ForceSequential = true;
266 break;
267 case SplitFunctionsStrategy::All:
268 Strategy = std::make_unique<SplitAll>();
269 break;
272 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
273 return !shouldOptimize(BF);
276 ParallelUtilities::runOnEachFunction(
277 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR,
278 [&](BinaryFunction &BF) { splitFunction(BF, *Strategy); }, SkipFunc,
279 "SplitFunctions", ForceSequential);
281 if (SplitBytesHot + SplitBytesCold > 0)
282 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
283 << " hot bytes from " << SplitBytesCold << " cold bytes "
284 << format("(%.2lf%% of split functions is hot).\n",
285 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
288 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
289 if (BF.empty())
290 return;
292 if (!S.canSplit(BF))
293 return;
295 FunctionLayout &Layout = BF.getLayout();
296 BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(),
297 Layout.block_end());
299 BinaryContext &BC = BF.getBinaryContext();
300 size_t OriginalHotSize;
301 size_t HotSize;
302 size_t ColdSize;
303 if (BC.isX86()) {
304 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
305 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
306 << " pre-split is <0x"
307 << Twine::utohexstr(OriginalHotSize) << ", 0x"
308 << Twine::utohexstr(ColdSize) << ">\n");
311 BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(),
312 Layout.block_end());
313 // Never outline the first basic block.
314 NewLayout.front()->setCanOutline(false);
315 for (BinaryBasicBlock *const BB : NewLayout) {
316 if (!BB->canOutline())
317 continue;
319 // Do not split extra entry points in aarch64. They can be referred by
320 // using ADRs and when this happens, these blocks cannot be placed far
321 // away due to the limited range in ADR instruction.
322 if (BC.isAArch64() && BB->isEntryPoint()) {
323 BB->setCanOutline(false);
324 continue;
327 if (BF.hasEHRanges() && !opts::SplitEH) {
328 // We cannot move landing pads (or rather entry points for landing pads).
329 if (BB->isLandingPad()) {
330 BB->setCanOutline(false);
331 continue;
333 // We cannot move a block that can throw since exception-handling
334 // runtime cannot deal with split functions. However, if we can guarantee
335 // that the block never throws, it is safe to move the block to
336 // decrease the size of the function.
337 for (MCInst &Instr : *BB) {
338 if (BC.MIB->isInvoke(Instr)) {
339 BB->setCanOutline(false);
340 break;
346 BF.getLayout().updateLayoutIndices();
347 S.fragment(NewLayout.begin(), NewLayout.end());
349 // Make sure all non-outlineable blocks are in the main-fragment.
350 for (BinaryBasicBlock *const BB : NewLayout) {
351 if (!BB->canOutline())
352 BB->setFragmentNum(FragmentNum::main());
355 if (opts::AggressiveSplitting) {
356 // All blocks with 0 count that we can move go to the end of the function.
357 // Even if they were natural to cluster formation and were seen in-between
358 // hot basic blocks.
359 llvm::stable_sort(NewLayout, [&](const BinaryBasicBlock *const A,
360 const BinaryBasicBlock *const B) {
361 return A->getFragmentNum() < B->getFragmentNum();
363 } else if (BF.hasEHRanges() && !opts::SplitEH) {
364 // Typically functions with exception handling have landing pads at the end.
365 // We cannot move beginning of landing pads, but we can move 0-count blocks
366 // comprising landing pads to the end and thus facilitate splitting.
367 auto FirstLP = NewLayout.begin();
368 while ((*FirstLP)->isLandingPad())
369 ++FirstLP;
371 std::stable_sort(FirstLP, NewLayout.end(),
372 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
373 return A->getFragmentNum() < B->getFragmentNum();
377 // Make sure that fragments are increasing.
378 FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum();
379 for (BinaryBasicBlock *const BB : reverse(NewLayout)) {
380 if (BB->getFragmentNum() > CurrentFragment)
381 BB->setFragmentNum(CurrentFragment);
382 CurrentFragment = BB->getFragmentNum();
385 if (!S.keepEmpty()) {
386 FragmentNum CurrentFragment = FragmentNum::main();
387 FragmentNum NewFragment = FragmentNum::main();
388 for (BinaryBasicBlock *const BB : NewLayout) {
389 if (BB->getFragmentNum() > CurrentFragment) {
390 CurrentFragment = BB->getFragmentNum();
391 NewFragment = FragmentNum(NewFragment.get() + 1);
393 BB->setFragmentNum(NewFragment);
397 BF.getLayout().update(NewLayout);
399 // For shared objects, invoke instructions and corresponding landing pads
400 // have to be placed in the same fragment. When we split them, create
401 // trampoline landing pads that will redirect the execution to real LPs.
402 TrampolineSetType Trampolines;
403 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
404 Trampolines = createEHTrampolines(BF);
406 // Check the new size to see if it's worth splitting the function.
407 if (BC.isX86() && BF.isSplit()) {
408 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
409 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
410 << " post-split is <0x" << Twine::utohexstr(HotSize)
411 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
412 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
413 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
414 if (opts::Verbosity >= 2) {
415 outs() << "BOLT-INFO: Reversing splitting of function "
416 << formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize,
417 ColdSize, OriginalHotSize);
420 // Reverse the action of createEHTrampolines(). The trampolines will be
421 // placed immediately before the matching destination resulting in no
422 // extra code.
423 if (PreSplitLayout.size() != BF.size())
424 PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines);
426 for (BinaryBasicBlock &BB : BF)
427 BB.setFragmentNum(FragmentNum::main());
428 BF.getLayout().update(PreSplitLayout);
429 } else {
430 SplitBytesHot += HotSize;
431 SplitBytesCold += ColdSize;
436 SplitFunctions::TrampolineSetType
437 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
438 const auto &MIB = BF.getBinaryContext().MIB;
440 // Map real landing pads to the corresponding trampolines.
441 TrampolineSetType LPTrampolines;
443 // Iterate over the copy of basic blocks since we are adding new blocks to the
444 // function which will invalidate its iterators.
445 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
446 for (BinaryBasicBlock *BB : Blocks) {
447 for (MCInst &Instr : *BB) {
448 const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
449 if (!EHInfo || !EHInfo->first)
450 continue;
452 const MCSymbol *LPLabel = EHInfo->first;
453 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
454 if (BB->getFragmentNum() == LPBlock->getFragmentNum())
455 continue;
457 const MCSymbol *TrampolineLabel = nullptr;
458 const TrampolineKey Key(BB->getFragmentNum(), LPLabel);
459 auto Iter = LPTrampolines.find(Key);
460 if (Iter != LPTrampolines.end()) {
461 TrampolineLabel = Iter->second;
462 } else {
463 // Create a trampoline basic block in the same fragment as the thrower.
464 // Note: there's no need to insert the jump instruction, it will be
465 // added by fixBranches().
466 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
467 TrampolineBB->setFragmentNum(BB->getFragmentNum());
468 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
469 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
470 TrampolineBB->setCFIState(LPBlock->getCFIState());
471 TrampolineLabel = TrampolineBB->getLabel();
472 LPTrampolines.insert(std::make_pair(Key, TrampolineLabel));
475 // Substitute the landing pad with the trampoline.
476 MIB->updateEHInfo(Instr,
477 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
481 if (LPTrampolines.empty())
482 return LPTrampolines;
484 // All trampoline blocks were added to the end of the function. Place them at
485 // the end of corresponding fragments.
486 BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(),
487 BF.getLayout().block_end());
488 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
489 return A->getFragmentNum() < B->getFragmentNum();
491 BF.getLayout().update(NewLayout);
493 // Conservatively introduce branch instructions.
494 BF.fixBranches();
496 // Update exception-handling CFG for the function.
497 BF.recomputeLandingPads();
499 return LPTrampolines;
502 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
503 BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
504 const SplitFunctions::TrampolineSetType &Trampolines) const {
505 DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>>
506 IncomingTrampolines;
507 for (const auto &Entry : Trampolines) {
508 IncomingTrampolines[Entry.getFirst().Target].emplace_back(
509 Entry.getSecond());
512 BasicBlockOrderType MergedLayout;
513 for (BinaryBasicBlock *BB : Layout) {
514 auto Iter = IncomingTrampolines.find(BB->getLabel());
515 if (Iter != IncomingTrampolines.end()) {
516 for (const MCSymbol *const Trampoline : Iter->getSecond()) {
517 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline);
518 assert(LPBlock && "Could not find matching landing pad block.");
519 MergedLayout.push_back(LPBlock);
522 MergedLayout.push_back(BB);
525 return MergedLayout;
528 } // namespace bolt
529 } // namespace llvm