1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the AlignerPass class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/Aligner.h"
14 #include "bolt/Core/ParallelUtilities.h"
16 #define DEBUG_TYPE "bolt-aligner"
22 extern cl::OptionCategory BoltOptCategory
;
24 extern cl::opt
<bool> AlignBlocks
;
25 extern cl::opt
<bool> PreserveBlocksAlignment
;
26 extern cl::opt
<unsigned> AlignFunctions
;
29 AlignBlocksMinSize("align-blocks-min-size",
30 cl::desc("minimal size of the basic block that should be aligned"),
34 cl::cat(BoltOptCategory
));
36 cl::opt
<unsigned> AlignBlocksThreshold(
37 "align-blocks-threshold",
39 "align only blocks with frequency larger than containing function "
40 "execution frequency specified in percent. E.g. 1000 means aligning "
41 "blocks that are 10 times more frequently executed than the "
42 "containing function."),
43 cl::init(800), cl::Hidden
, cl::cat(BoltOptCategory
));
45 cl::opt
<unsigned> AlignFunctionsMaxBytes(
46 "align-functions-max-bytes",
47 cl::desc("maximum number of bytes to use to align functions"), cl::init(32),
48 cl::cat(BoltOptCategory
));
51 BlockAlignment("block-alignment",
52 cl::desc("boundary to use for alignment of basic blocks"),
55 cl::cat(BoltOptCategory
));
58 UseCompactAligner("use-compact-aligner",
59 cl::desc("Use compact approach for aligning functions"),
60 cl::init(true), cl::cat(BoltOptCategory
));
62 } // end namespace opts
67 // Align function to the specified byte-boundary (typically, 64) offsetting
68 // the fuction by not more than the corresponding value
69 static void alignMaxBytes(BinaryFunction
&Function
) {
70 Function
.setAlignment(opts::AlignFunctions
);
71 Function
.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes
);
72 Function
.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes
);
75 // Align function to the specified byte-boundary (typically, 64) offsetting
76 // the fuction by not more than the minimum over
77 // -- the size of the function
78 // -- the specified number of bytes
79 static void alignCompact(BinaryFunction
&Function
,
80 const MCCodeEmitter
*Emitter
) {
81 const BinaryContext
&BC
= Function
.getBinaryContext();
85 for (const BinaryBasicBlock
&BB
: Function
)
87 ColdSize
+= BC
.computeCodeSize(BB
.begin(), BB
.end(), Emitter
);
89 HotSize
+= BC
.computeCodeSize(BB
.begin(), BB
.end(), Emitter
);
91 Function
.setAlignment(opts::AlignFunctions
);
93 Function
.setMaxAlignmentBytes(
94 std::min(size_t(opts::AlignFunctionsMaxBytes
), HotSize
));
96 // using the same option, max-align-bytes, both for cold and hot parts of the
97 // functions, as aligning cold functions typically does not affect performance
99 Function
.setMaxColdAlignmentBytes(
100 std::min(size_t(opts::AlignFunctionsMaxBytes
), ColdSize
));
103 void AlignerPass::alignBlocks(BinaryFunction
&Function
,
104 const MCCodeEmitter
*Emitter
) {
105 if (!Function
.hasValidProfile() || !Function
.isSimple())
108 const BinaryContext
&BC
= Function
.getBinaryContext();
110 const uint64_t FuncCount
=
111 std::max
<uint64_t>(1, Function
.getKnownExecutionCount());
112 BinaryBasicBlock
*PrevBB
= nullptr;
113 for (BinaryBasicBlock
*BB
: Function
.getLayout().blocks()) {
114 uint64_t Count
= BB
->getKnownExecutionCount();
116 if (Count
<= FuncCount
* opts::AlignBlocksThreshold
/ 100) {
121 uint64_t FTCount
= 0;
122 if (PrevBB
&& PrevBB
->getFallthrough() == BB
)
123 FTCount
= PrevBB
->getBranchInfo(*BB
).Count
;
127 if (Count
< FTCount
* 2)
130 const uint64_t BlockSize
=
131 BC
.computeCodeSize(BB
->begin(), BB
->end(), Emitter
);
132 const uint64_t BytesToUse
=
133 std::min
<uint64_t>(opts::BlockAlignment
- 1, BlockSize
);
135 if (opts::AlignBlocksMinSize
&& BlockSize
< opts::AlignBlocksMinSize
)
138 BB
->setAlignment(opts::BlockAlignment
);
139 BB
->setAlignmentMaxBytes(BytesToUse
);
143 std::unique_lock
<llvm::sys::RWMutex
> Lock(AlignHistogramMtx
);
144 AlignHistogram
[BytesToUse
]++;
145 AlignedBlocksCount
+= BB
->getKnownExecutionCount();
150 Error
AlignerPass::runOnFunctions(BinaryContext
&BC
) {
151 if (!BC
.HasRelocations
)
152 return Error::success();
154 AlignHistogram
.resize(opts::BlockAlignment
);
156 ParallelUtilities::WorkFuncTy WorkFun
= [&](BinaryFunction
&BF
) {
157 // Create a separate MCCodeEmitter to allow lock free execution
158 BinaryContext::IndependentCodeEmitter Emitter
=
159 BC
.createIndependentMCCodeEmitter();
161 if (opts::UseCompactAligner
)
162 alignCompact(BF
, Emitter
.MCE
.get());
166 if (opts::AlignBlocks
&& !opts::PreserveBlocksAlignment
)
167 alignBlocks(BF
, Emitter
.MCE
.get());
170 ParallelUtilities::runOnEachFunction(
171 BC
, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL
, WorkFun
,
172 ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
175 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
176 for (unsigned I
= 1; I
< AlignHistogram
.size(); ++I
)
177 dbgs() << " " << I
<< " : " << AlignHistogram
[I
] << '\n';
179 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
180 << AlignedBlocksCount
<< '\n';
182 return Error::success();
185 } // end namespace bolt
186 } // end namespace llvm