Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / Aligner.cpp
blob7c387525434bd39bc847ebe15349784435a6ea4f
1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AlignerPass class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/Aligner.h"
14 #include "bolt/Core/ParallelUtilities.h"
16 #define DEBUG_TYPE "bolt-aligner"
18 using namespace llvm;
20 namespace opts {
22 extern cl::OptionCategory BoltOptCategory;
24 extern cl::opt<bool> AlignBlocks;
25 extern cl::opt<bool> PreserveBlocksAlignment;
26 extern cl::opt<unsigned> AlignFunctions;
28 cl::opt<unsigned>
29 AlignBlocksMinSize("align-blocks-min-size",
30 cl::desc("minimal size of the basic block that should be aligned"),
31 cl::init(0),
32 cl::ZeroOrMore,
33 cl::Hidden,
34 cl::cat(BoltOptCategory));
36 cl::opt<unsigned> AlignBlocksThreshold(
37 "align-blocks-threshold",
38 cl::desc(
39 "align only blocks with frequency larger than containing function "
40 "execution frequency specified in percent. E.g. 1000 means aligning "
41 "blocks that are 10 times more frequently executed than the "
42 "containing function."),
43 cl::init(800), cl::Hidden, cl::cat(BoltOptCategory));
45 cl::opt<unsigned> AlignFunctionsMaxBytes(
46 "align-functions-max-bytes",
47 cl::desc("maximum number of bytes to use to align functions"), cl::init(32),
48 cl::cat(BoltOptCategory));
50 cl::opt<unsigned>
51 BlockAlignment("block-alignment",
52 cl::desc("boundary to use for alignment of basic blocks"),
53 cl::init(16),
54 cl::ZeroOrMore,
55 cl::cat(BoltOptCategory));
57 cl::opt<bool>
58 UseCompactAligner("use-compact-aligner",
59 cl::desc("Use compact approach for aligning functions"),
60 cl::init(true), cl::cat(BoltOptCategory));
62 } // end namespace opts
64 namespace llvm {
65 namespace bolt {
67 // Align function to the specified byte-boundary (typically, 64) offsetting
68 // the fuction by not more than the corresponding value
69 static void alignMaxBytes(BinaryFunction &Function) {
70 Function.setAlignment(opts::AlignFunctions);
71 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
72 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
75 // Align function to the specified byte-boundary (typically, 64) offsetting
76 // the fuction by not more than the minimum over
77 // -- the size of the function
78 // -- the specified number of bytes
79 static void alignCompact(BinaryFunction &Function,
80 const MCCodeEmitter *Emitter) {
81 const BinaryContext &BC = Function.getBinaryContext();
82 size_t HotSize = 0;
83 size_t ColdSize = 0;
85 for (const BinaryBasicBlock &BB : Function)
86 if (BB.isSplit())
87 ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
88 else
89 HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
91 Function.setAlignment(opts::AlignFunctions);
92 if (HotSize > 0)
93 Function.setMaxAlignmentBytes(
94 std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
96 // using the same option, max-align-bytes, both for cold and hot parts of the
97 // functions, as aligning cold functions typically does not affect performance
98 if (ColdSize > 0)
99 Function.setMaxColdAlignmentBytes(
100 std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
103 void AlignerPass::alignBlocks(BinaryFunction &Function,
104 const MCCodeEmitter *Emitter) {
105 if (!Function.hasValidProfile() || !Function.isSimple())
106 return;
108 const BinaryContext &BC = Function.getBinaryContext();
110 const uint64_t FuncCount =
111 std::max<uint64_t>(1, Function.getKnownExecutionCount());
112 BinaryBasicBlock *PrevBB = nullptr;
113 for (BinaryBasicBlock *BB : Function.getLayout().blocks()) {
114 uint64_t Count = BB->getKnownExecutionCount();
116 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
117 PrevBB = BB;
118 continue;
121 uint64_t FTCount = 0;
122 if (PrevBB && PrevBB->getFallthrough() == BB)
123 FTCount = PrevBB->getBranchInfo(*BB).Count;
125 PrevBB = BB;
127 if (Count < FTCount * 2)
128 continue;
130 const uint64_t BlockSize =
131 BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
132 const uint64_t BytesToUse =
133 std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize);
135 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
136 continue;
138 BB->setAlignment(opts::BlockAlignment);
139 BB->setAlignmentMaxBytes(BytesToUse);
141 // Update stats.
142 LLVM_DEBUG(
143 std::unique_lock<llvm::sys::RWMutex> Lock(AlignHistogramMtx);
144 AlignHistogram[BytesToUse]++;
145 AlignedBlocksCount += BB->getKnownExecutionCount();
150 void AlignerPass::runOnFunctions(BinaryContext &BC) {
151 if (!BC.HasRelocations)
152 return;
154 AlignHistogram.resize(opts::BlockAlignment);
156 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
157 // Create a separate MCCodeEmitter to allow lock free execution
158 BinaryContext::IndependentCodeEmitter Emitter =
159 BC.createIndependentMCCodeEmitter();
161 if (opts::UseCompactAligner)
162 alignCompact(BF, Emitter.MCE.get());
163 else
164 alignMaxBytes(BF);
166 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
167 alignBlocks(BF, Emitter.MCE.get());
170 ParallelUtilities::runOnEachFunction(
171 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun,
172 ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
174 LLVM_DEBUG(
175 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
176 for (unsigned I = 1; I < AlignHistogram.size(); ++I)
177 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
179 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
180 << AlignedBlocksCount << '\n';
184 } // end namespace bolt
185 } // end namespace llvm