1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FixRISCVCallsPass.h"
16 #include "bolt/Passes/FixRelaxationPass.h"
17 #include "bolt/Passes/FrameOptimizer.h"
18 #include "bolt/Passes/Hugify.h"
19 #include "bolt/Passes/IdenticalCodeFolding.h"
20 #include "bolt/Passes/IndirectCallPromotion.h"
21 #include "bolt/Passes/Inliner.h"
22 #include "bolt/Passes/Instrumentation.h"
23 #include "bolt/Passes/JTFootprintReduction.h"
24 #include "bolt/Passes/LongJmp.h"
25 #include "bolt/Passes/LoopInversionPass.h"
26 #include "bolt/Passes/PLTCall.h"
27 #include "bolt/Passes/PatchEntries.h"
28 #include "bolt/Passes/RegReAssign.h"
29 #include "bolt/Passes/ReorderData.h"
30 #include "bolt/Passes/ReorderFunctions.h"
31 #include "bolt/Passes/RetpolineInsertion.h"
32 #include "bolt/Passes/SplitFunctions.h"
33 #include "bolt/Passes/StokeInfo.h"
34 #include "bolt/Passes/TailDuplication.h"
35 #include "bolt/Passes/ThreeWayBranch.h"
36 #include "bolt/Passes/ValidateInternalCalls.h"
37 #include "bolt/Passes/ValidateMemRefs.h"
38 #include "bolt/Passes/VeneerElimination.h"
39 #include "bolt/Utils/CommandLineOpts.h"
40 #include "llvm/Support/FormatVariadic.h"
41 #include "llvm/Support/Timer.h"
42 #include "llvm/Support/raw_ostream.h"
50 extern cl::opt
<bool> PrintAll
;
51 extern cl::opt
<bool> PrintDynoStats
;
52 extern cl::opt
<bool> DumpDotAll
;
53 extern cl::opt
<std::string
> AsmDump
;
54 extern cl::opt
<bolt::PLTCall::OptType
> PLT
;
57 DynoStatsAll("dyno-stats-all",
58 cl::desc("print dyno stats after each stage"),
59 cl::ZeroOrMore
, cl::Hidden
, cl::cat(BoltCategory
));
62 EliminateUnreachable("eliminate-unreachable",
63 cl::desc("eliminate unreachable code"), cl::init(true),
64 cl::cat(BoltOptCategory
));
66 cl::opt
<bool> ICF("icf", cl::desc("fold functions with identical code"),
67 cl::cat(BoltOptCategory
));
69 static cl::opt
<bool> JTFootprintReductionFlag(
70 "jt-footprint-reduction",
71 cl::desc("make jump tables size smaller at the cost of using more "
72 "instructions at jump sites"),
73 cl::cat(BoltOptCategory
));
77 cl::desc("keep no-op instructions. By default they are removed."),
78 cl::Hidden
, cl::cat(BoltOptCategory
));
80 cl::opt
<bool> NeverPrint("never-print", cl::desc("never print"),
81 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
84 PrintAfterBranchFixup("print-after-branch-fixup",
85 cl::desc("print function after fixing local branches"),
86 cl::Hidden
, cl::cat(BoltOptCategory
));
89 PrintAfterLowering("print-after-lowering",
90 cl::desc("print function after instruction lowering"),
91 cl::Hidden
, cl::cat(BoltOptCategory
));
94 PrintFinalized("print-finalized",
95 cl::desc("print function after CFG is finalized"),
96 cl::Hidden
, cl::cat(BoltOptCategory
));
100 cl::desc("print functions after frame optimizer pass"), cl::Hidden
,
101 cl::cat(BoltOptCategory
));
104 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
105 cl::Hidden
, cl::cat(BoltOptCategory
));
108 PrintICP("print-icp",
109 cl::desc("print functions after indirect call promotion"),
110 cl::Hidden
, cl::cat(BoltOptCategory
));
113 PrintInline("print-inline",
114 cl::desc("print functions after inlining optimization"),
115 cl::Hidden
, cl::cat(BoltOptCategory
));
117 static cl::opt
<bool> PrintJTFootprintReduction(
118 "print-after-jt-footprint-reduction",
119 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden
,
120 cl::cat(BoltOptCategory
));
123 PrintLongJmp("print-longjmp",
124 cl::desc("print functions after longjmp pass"), cl::Hidden
,
125 cl::cat(BoltOptCategory
));
128 PrintNormalized("print-normalized",
129 cl::desc("print functions after CFG is normalized"),
130 cl::Hidden
, cl::cat(BoltCategory
));
132 static cl::opt
<bool> PrintOptimizeBodyless(
133 "print-optimize-bodyless",
134 cl::desc("print functions after bodyless optimization"), cl::Hidden
,
135 cl::cat(BoltOptCategory
));
138 PrintPeepholes("print-peepholes",
139 cl::desc("print functions after peephole optimization"),
140 cl::Hidden
, cl::cat(BoltOptCategory
));
143 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
144 cl::Hidden
, cl::cat(BoltOptCategory
));
147 PrintProfileStats("print-profile-stats",
148 cl::desc("print profile quality/bias analysis"),
149 cl::cat(BoltCategory
));
152 PrintRegReAssign("print-regreassign",
153 cl::desc("print functions after regreassign pass"),
154 cl::Hidden
, cl::cat(BoltOptCategory
));
157 PrintReordered("print-reordered",
158 cl::desc("print functions after layout optimization"),
159 cl::Hidden
, cl::cat(BoltOptCategory
));
162 PrintReorderedFunctions("print-reordered-functions",
163 cl::desc("print functions after clustering"),
164 cl::Hidden
, cl::cat(BoltOptCategory
));
166 static cl::opt
<bool> PrintRetpolineInsertion(
167 "print-retpoline-insertion",
168 cl::desc("print functions after retpoline insertion pass"), cl::Hidden
,
169 cl::cat(BoltCategory
));
171 static cl::opt
<bool> PrintSCTC(
173 cl::desc("print functions after conditional tail call simplification"),
174 cl::Hidden
, cl::cat(BoltOptCategory
));
176 static cl::opt
<bool> PrintSimplifyROLoads(
177 "print-simplify-rodata-loads",
178 cl::desc("print functions after simplification of RO data loads"),
179 cl::Hidden
, cl::cat(BoltOptCategory
));
182 PrintSplit("print-split", cl::desc("print functions after code splitting"),
183 cl::Hidden
, cl::cat(BoltOptCategory
));
186 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
187 cl::Hidden
, cl::cat(BoltOptCategory
));
190 PrintFixRelaxations("print-fix-relaxations",
191 cl::desc("print functions after fix relaxations pass"),
192 cl::Hidden
, cl::cat(BoltOptCategory
));
195 PrintFixRISCVCalls("print-fix-riscv-calls",
196 cl::desc("print functions after fix RISCV calls pass"),
197 cl::Hidden
, cl::cat(BoltOptCategory
));
199 static cl::opt
<bool> PrintVeneerElimination(
200 "print-veneer-elimination",
201 cl::desc("print functions after veneer elimination pass"), cl::Hidden
,
202 cl::cat(BoltOptCategory
));
205 PrintUCE("print-uce",
206 cl::desc("print functions after unreachable code elimination"),
207 cl::Hidden
, cl::cat(BoltOptCategory
));
209 static cl::opt
<bool> RegReAssign(
212 "reassign registers so as to avoid using REX prefixes in hot code"),
213 cl::cat(BoltOptCategory
));
215 static cl::opt
<bool> SimplifyConditionalTailCalls(
216 "simplify-conditional-tail-calls",
217 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
218 cl::init(true), cl::cat(BoltOptCategory
));
220 static cl::opt
<bool> SimplifyRODataLoads(
221 "simplify-rodata-loads",
222 cl::desc("simplify loads from read-only sections by replacing the memory "
223 "operand with the constant found in the corresponding section"),
224 cl::cat(BoltOptCategory
));
226 static cl::list
<std::string
>
227 SpecializeMemcpy1("memcpy1-spec",
228 cl::desc("list of functions with call sites for which to specialize memcpy() "
230 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
231 cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
233 static cl::opt
<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
234 cl::cat(BoltOptCategory
));
236 static cl::opt
<bool> StringOps(
238 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
239 cl::cat(BoltOptCategory
));
241 static cl::opt
<bool> StripRepRet(
243 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
244 cl::init(true), cl::cat(BoltOptCategory
));
246 static cl::opt
<bool> VerifyCFG("verify-cfg",
247 cl::desc("verify the CFG after every pass"),
248 cl::Hidden
, cl::cat(BoltOptCategory
));
250 static cl::opt
<bool> ThreeWayBranchFlag("three-way-branch",
251 cl::desc("reorder three way branches"),
253 cl::cat(BoltOptCategory
));
255 static cl::opt
<bool> CMOVConversionFlag("cmov-conversion",
256 cl::desc("fold jcc+mov into cmov"),
258 cl::cat(BoltOptCategory
));
265 using namespace opts
;
267 const char BinaryFunctionPassManager::TimerGroupName
[] = "passman";
268 const char BinaryFunctionPassManager::TimerGroupDesc
[] =
269 "Binary Function Pass Manager";
271 void BinaryFunctionPassManager::runPasses() {
272 auto &BFs
= BC
.getBinaryFunctions();
273 for (size_t PassIdx
= 0; PassIdx
< Passes
.size(); PassIdx
++) {
274 const std::pair
<const bool, std::unique_ptr
<BinaryFunctionPass
>>
275 &OptPassPair
= Passes
[PassIdx
];
276 if (!OptPassPair
.first
)
279 const std::unique_ptr
<BinaryFunctionPass
> &Pass
= OptPassPair
.second
;
280 std::string PassIdName
=
281 formatv("{0:2}_{1}", PassIdx
, Pass
->getName()).str();
283 if (opts::Verbosity
> 0)
284 outs() << "BOLT-INFO: Starting pass: " << Pass
->getName() << "\n";
286 NamedRegionTimer
T(Pass
->getName(), Pass
->getName(), TimerGroupName
,
287 TimerGroupDesc
, TimeOpts
);
289 callWithDynoStats([this, &Pass
] { Pass
->runOnFunctions(BC
); }, BFs
,
290 Pass
->getName(), opts::DynoStatsAll
, BC
.isAArch64());
292 if (opts::VerifyCFG
&&
294 BFs
.begin(), BFs
.end(), true,
296 const std::pair
<const uint64_t, BinaryFunction
> &It
) {
297 return Valid
&& It
.second
.validateCFG();
299 errs() << "BOLT-ERROR: Invalid CFG detected after pass "
300 << Pass
->getName() << "\n";
304 if (opts::Verbosity
> 0)
305 outs() << "BOLT-INFO: Finished pass: " << Pass
->getName() << "\n";
307 if (!opts::PrintAll
&& !opts::DumpDotAll
&& !Pass
->printPass())
310 const std::string Message
= std::string("after ") + Pass
->getName();
312 for (auto &It
: BFs
) {
313 BinaryFunction
&Function
= It
.second
;
315 if (!Pass
->shouldPrint(Function
))
318 Function
.print(outs(), Message
);
320 if (opts::DumpDotAll
)
321 Function
.dumpGraphForPass(PassIdName
);
326 void BinaryFunctionPassManager::runAllPasses(BinaryContext
&BC
) {
327 BinaryFunctionPassManager
Manager(BC
);
329 const DynoStats InitialDynoStats
=
330 getDynoStats(BC
.getBinaryFunctions(), BC
.isAArch64());
332 Manager
.registerPass(std::make_unique
<AsmDumpPass
>(),
333 opts::AsmDump
.getNumOccurrences());
335 if (BC
.isAArch64()) {
336 Manager
.registerPass(std::make_unique
<FixRelaxations
>(PrintFixRelaxations
));
338 Manager
.registerPass(
339 std::make_unique
<VeneerElimination
>(PrintVeneerElimination
));
343 Manager
.registerPass(
344 std::make_unique
<FixRISCVCallsPass
>(PrintFixRISCVCalls
));
347 // Here we manage dependencies/order manually, since passes are run in the
348 // order they're registered.
350 // Run this pass first to use stats for the original functions.
351 Manager
.registerPass(std::make_unique
<PrintProgramStats
>(NeverPrint
));
353 if (opts::PrintProfileStats
)
354 Manager
.registerPass(std::make_unique
<PrintProfileStats
>(NeverPrint
));
356 Manager
.registerPass(std::make_unique
<ValidateInternalCalls
>(NeverPrint
));
358 Manager
.registerPass(std::make_unique
<ValidateMemRefs
>(NeverPrint
));
360 if (opts::Instrument
)
361 Manager
.registerPass(std::make_unique
<Instrumentation
>(NeverPrint
));
362 else if (opts::Hugify
)
363 Manager
.registerPass(std::make_unique
<HugePage
>(NeverPrint
));
365 Manager
.registerPass(std::make_unique
<ShortenInstructions
>(NeverPrint
));
367 Manager
.registerPass(std::make_unique
<RemoveNops
>(NeverPrint
),
370 Manager
.registerPass(std::make_unique
<NormalizeCFG
>(PrintNormalized
));
372 Manager
.registerPass(std::make_unique
<StripRepRet
>(NeverPrint
),
375 Manager
.registerPass(std::make_unique
<IdenticalCodeFolding
>(PrintICF
),
378 Manager
.registerPass(
379 std::make_unique
<SpecializeMemcpy1
>(NeverPrint
, opts::SpecializeMemcpy1
),
380 !opts::SpecializeMemcpy1
.empty());
382 Manager
.registerPass(std::make_unique
<InlineMemcpy
>(NeverPrint
),
385 Manager
.registerPass(std::make_unique
<IndirectCallPromotion
>(PrintICP
));
387 Manager
.registerPass(
388 std::make_unique
<JTFootprintReduction
>(PrintJTFootprintReduction
),
389 opts::JTFootprintReductionFlag
);
391 Manager
.registerPass(
392 std::make_unique
<SimplifyRODataLoads
>(PrintSimplifyROLoads
),
393 opts::SimplifyRODataLoads
);
395 Manager
.registerPass(std::make_unique
<RegReAssign
>(PrintRegReAssign
),
398 Manager
.registerPass(std::make_unique
<Inliner
>(PrintInline
));
400 Manager
.registerPass(std::make_unique
<IdenticalCodeFolding
>(PrintICF
),
403 Manager
.registerPass(std::make_unique
<PLTCall
>(PrintPLT
));
405 Manager
.registerPass(std::make_unique
<ThreeWayBranch
>(),
406 opts::ThreeWayBranchFlag
);
408 Manager
.registerPass(std::make_unique
<ReorderBasicBlocks
>(PrintReordered
));
410 Manager
.registerPass(std::make_unique
<EliminateUnreachableBlocks
>(PrintUCE
),
411 opts::EliminateUnreachable
);
413 Manager
.registerPass(std::make_unique
<SplitFunctions
>(PrintSplit
));
415 Manager
.registerPass(std::make_unique
<LoopInversionPass
>());
417 Manager
.registerPass(std::make_unique
<TailDuplication
>());
419 Manager
.registerPass(std::make_unique
<CMOVConversion
>(),
420 opts::CMOVConversionFlag
);
422 // This pass syncs local branches with CFG. If any of the following
423 // passes breaks the sync - they either need to re-run the pass or
424 // fix branches consistency internally.
425 Manager
.registerPass(std::make_unique
<FixupBranches
>(PrintAfterBranchFixup
));
427 // This pass should come close to last since it uses the estimated hot
428 // size of a function to determine the order. It should definitely
429 // also happen after any changes to the call graph are made, e.g. inlining.
430 Manager
.registerPass(
431 std::make_unique
<ReorderFunctions
>(PrintReorderedFunctions
));
433 // This is the second run of the SplitFunctions pass required by certain
434 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
435 // after ReorderFunctions allows the finalized function order to be utilized
436 // to make more sophisticated splitting decisions, like hot-warm-cold
438 Manager
.registerPass(std::make_unique
<SplitFunctions
>(PrintSplit
));
440 // Print final dyno stats right while CFG and instruction analysis are intact.
441 Manager
.registerPass(
442 std::make_unique
<DynoStatsPrintPass
>(
443 InitialDynoStats
, "after all optimizations before SCTC and FOP"),
444 opts::PrintDynoStats
|| opts::DynoStatsAll
);
446 // Add the StokeInfo pass, which extract functions for stoke optimization and
447 // get the liveness information for them
448 Manager
.registerPass(std::make_unique
<StokeInfo
>(PrintStoke
), opts::Stoke
);
450 // This pass introduces conditional jumps into external functions.
451 // Between extending CFG to support this and isolating this pass we chose
452 // the latter. Thus this pass will do double jump removal and unreachable
453 // code elimination if necessary and won't rely on peepholes/UCE for these
455 // More generally this pass should be the last optimization pass that
456 // modifies branches/control flow. This pass is run after function
457 // reordering so that it can tell whether calls are forward/backward
459 Manager
.registerPass(
460 std::make_unique
<SimplifyConditionalTailCalls
>(PrintSCTC
),
461 opts::SimplifyConditionalTailCalls
);
463 Manager
.registerPass(std::make_unique
<Peepholes
>(PrintPeepholes
));
465 Manager
.registerPass(std::make_unique
<AlignerPass
>());
467 // Perform reordering on data contained in one or more sections using
468 // memory profiling data.
469 Manager
.registerPass(std::make_unique
<ReorderData
>());
471 if (BC
.isAArch64()) {
472 Manager
.registerPass(std::make_unique
<ADRRelaxationPass
>());
474 // Tighten branches according to offset differences between branch and
475 // targets. No extra instructions after this pass, otherwise we may have
476 // relocations out of range and crash during linking.
477 Manager
.registerPass(std::make_unique
<LongJmpPass
>(PrintLongJmp
));
480 // This pass should always run last.*
481 Manager
.registerPass(std::make_unique
<FinalizeFunctions
>(PrintFinalized
));
483 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
484 // FrameOptimizer move values around and needs to update CFIs. To do this, it
485 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
486 // placed according to the final layout.
487 Manager
.registerPass(std::make_unique
<FrameOptimizerPass
>(PrintFOP
));
489 Manager
.registerPass(std::make_unique
<AllocCombinerPass
>(PrintFOP
));
491 Manager
.registerPass(
492 std::make_unique
<RetpolineInsertion
>(PrintRetpolineInsertion
));
494 // Assign each function an output section.
495 Manager
.registerPass(std::make_unique
<AssignSections
>());
497 // Patch original function entries
498 if (BC
.HasRelocations
)
499 Manager
.registerPass(std::make_unique
<PatchEntries
>());
501 // This pass turns tail calls into jumps which makes them invisible to
502 // function reordering. It's unsafe to use any CFG or instruction analysis
504 Manager
.registerPass(
505 std::make_unique
<InstructionLowering
>(PrintAfterLowering
));
507 // In non-relocation mode, mark functions that do not fit into their original
508 // space as non-simple if we have to (e.g. for correct debug info update).
509 // NOTE: this pass depends on finalized code.
510 if (!BC
.HasRelocations
)
511 Manager
.registerPass(std::make_unique
<CheckLargeFunctions
>(NeverPrint
));
513 Manager
.registerPass(std::make_unique
<LowerAnnotations
>(NeverPrint
));
515 // Check for dirty state of MCSymbols caused by running calculateEmittedSize
516 // in parallel and restore them
517 Manager
.registerPass(std::make_unique
<CleanMCState
>(NeverPrint
));