1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FixRISCVCallsPass.h"
16 #include "bolt/Passes/FixRelaxationPass.h"
17 #include "bolt/Passes/FrameOptimizer.h"
18 #include "bolt/Passes/Hugify.h"
19 #include "bolt/Passes/IdenticalCodeFolding.h"
20 #include "bolt/Passes/IndirectCallPromotion.h"
21 #include "bolt/Passes/Inliner.h"
22 #include "bolt/Passes/Instrumentation.h"
23 #include "bolt/Passes/JTFootprintReduction.h"
24 #include "bolt/Passes/LongJmp.h"
25 #include "bolt/Passes/LoopInversionPass.h"
26 #include "bolt/Passes/PLTCall.h"
27 #include "bolt/Passes/PatchEntries.h"
28 #include "bolt/Passes/RegReAssign.h"
29 #include "bolt/Passes/ReorderData.h"
30 #include "bolt/Passes/ReorderFunctions.h"
31 #include "bolt/Passes/RetpolineInsertion.h"
32 #include "bolt/Passes/SplitFunctions.h"
33 #include "bolt/Passes/StokeInfo.h"
34 #include "bolt/Passes/TailDuplication.h"
35 #include "bolt/Passes/ThreeWayBranch.h"
36 #include "bolt/Passes/ValidateInternalCalls.h"
37 #include "bolt/Passes/ValidateMemRefs.h"
38 #include "bolt/Passes/VeneerElimination.h"
39 #include "bolt/Utils/CommandLineOpts.h"
40 #include "llvm/Support/FormatVariadic.h"
41 #include "llvm/Support/Timer.h"
42 #include "llvm/Support/raw_ostream.h"
50 extern cl::opt
<bool> PrintAll
;
51 extern cl::opt
<bool> PrintDynoStats
;
52 extern cl::opt
<bool> DumpDotAll
;
53 extern cl::opt
<std::string
> AsmDump
;
54 extern cl::opt
<bolt::PLTCall::OptType
> PLT
;
57 DynoStatsAll("dyno-stats-all",
58 cl::desc("print dyno stats after each stage"),
59 cl::ZeroOrMore
, cl::Hidden
, cl::cat(BoltCategory
));
62 EliminateUnreachable("eliminate-unreachable",
63 cl::desc("eliminate unreachable code"), cl::init(true),
64 cl::cat(BoltOptCategory
));
66 cl::opt
<bool> ICF("icf", cl::desc("fold functions with identical code"),
67 cl::cat(BoltOptCategory
));
69 static cl::opt
<bool> JTFootprintReductionFlag(
70 "jt-footprint-reduction",
71 cl::desc("make jump tables size smaller at the cost of using more "
72 "instructions at jump sites"),
73 cl::cat(BoltOptCategory
));
75 cl::opt
<bool> NeverPrint("never-print", cl::desc("never print"),
76 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
79 PrintAfterBranchFixup("print-after-branch-fixup",
80 cl::desc("print function after fixing local branches"),
81 cl::Hidden
, cl::cat(BoltOptCategory
));
84 PrintAfterLowering("print-after-lowering",
85 cl::desc("print function after instruction lowering"),
86 cl::Hidden
, cl::cat(BoltOptCategory
));
89 PrintFinalized("print-finalized",
90 cl::desc("print function after CFG is finalized"),
91 cl::Hidden
, cl::cat(BoltOptCategory
));
95 cl::desc("print functions after frame optimizer pass"), cl::Hidden
,
96 cl::cat(BoltOptCategory
));
99 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
100 cl::Hidden
, cl::cat(BoltOptCategory
));
103 PrintICP("print-icp",
104 cl::desc("print functions after indirect call promotion"),
105 cl::Hidden
, cl::cat(BoltOptCategory
));
108 PrintInline("print-inline",
109 cl::desc("print functions after inlining optimization"),
110 cl::Hidden
, cl::cat(BoltOptCategory
));
112 static cl::opt
<bool> PrintJTFootprintReduction(
113 "print-after-jt-footprint-reduction",
114 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden
,
115 cl::cat(BoltOptCategory
));
118 PrintLongJmp("print-longjmp",
119 cl::desc("print functions after longjmp pass"), cl::Hidden
,
120 cl::cat(BoltOptCategory
));
123 PrintNormalized("print-normalized",
124 cl::desc("print functions after CFG is normalized"),
125 cl::Hidden
, cl::cat(BoltCategory
));
127 static cl::opt
<bool> PrintOptimizeBodyless(
128 "print-optimize-bodyless",
129 cl::desc("print functions after bodyless optimization"), cl::Hidden
,
130 cl::cat(BoltOptCategory
));
133 PrintPeepholes("print-peepholes",
134 cl::desc("print functions after peephole optimization"),
135 cl::Hidden
, cl::cat(BoltOptCategory
));
138 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
139 cl::Hidden
, cl::cat(BoltOptCategory
));
142 PrintProfileStats("print-profile-stats",
143 cl::desc("print profile quality/bias analysis"),
144 cl::cat(BoltCategory
));
147 PrintRegReAssign("print-regreassign",
148 cl::desc("print functions after regreassign pass"),
149 cl::Hidden
, cl::cat(BoltOptCategory
));
152 PrintReordered("print-reordered",
153 cl::desc("print functions after layout optimization"),
154 cl::Hidden
, cl::cat(BoltOptCategory
));
157 PrintReorderedFunctions("print-reordered-functions",
158 cl::desc("print functions after clustering"),
159 cl::Hidden
, cl::cat(BoltOptCategory
));
161 static cl::opt
<bool> PrintRetpolineInsertion(
162 "print-retpoline-insertion",
163 cl::desc("print functions after retpoline insertion pass"), cl::Hidden
,
164 cl::cat(BoltCategory
));
166 static cl::opt
<bool> PrintSCTC(
168 cl::desc("print functions after conditional tail call simplification"),
169 cl::Hidden
, cl::cat(BoltOptCategory
));
171 static cl::opt
<bool> PrintSimplifyROLoads(
172 "print-simplify-rodata-loads",
173 cl::desc("print functions after simplification of RO data loads"),
174 cl::Hidden
, cl::cat(BoltOptCategory
));
177 PrintSplit("print-split", cl::desc("print functions after code splitting"),
178 cl::Hidden
, cl::cat(BoltOptCategory
));
181 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
182 cl::Hidden
, cl::cat(BoltOptCategory
));
185 PrintFixRelaxations("print-fix-relaxations",
186 cl::desc("print functions after fix relaxations pass"),
187 cl::Hidden
, cl::cat(BoltOptCategory
));
190 PrintFixRISCVCalls("print-fix-riscv-calls",
191 cl::desc("print functions after fix RISCV calls pass"),
192 cl::Hidden
, cl::cat(BoltOptCategory
));
194 static cl::opt
<bool> PrintVeneerElimination(
195 "print-veneer-elimination",
196 cl::desc("print functions after veneer elimination pass"), cl::Hidden
,
197 cl::cat(BoltOptCategory
));
200 PrintUCE("print-uce",
201 cl::desc("print functions after unreachable code elimination"),
202 cl::Hidden
, cl::cat(BoltOptCategory
));
204 static cl::opt
<bool> RegReAssign(
207 "reassign registers so as to avoid using REX prefixes in hot code"),
208 cl::cat(BoltOptCategory
));
210 static cl::opt
<bool> SimplifyConditionalTailCalls(
211 "simplify-conditional-tail-calls",
212 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
213 cl::init(true), cl::cat(BoltOptCategory
));
215 static cl::opt
<bool> SimplifyRODataLoads(
216 "simplify-rodata-loads",
217 cl::desc("simplify loads from read-only sections by replacing the memory "
218 "operand with the constant found in the corresponding section"),
219 cl::cat(BoltOptCategory
));
221 static cl::list
<std::string
>
222 SpecializeMemcpy1("memcpy1-spec",
223 cl::desc("list of functions with call sites for which to specialize memcpy() "
225 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
226 cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
228 static cl::opt
<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
229 cl::cat(BoltOptCategory
));
231 static cl::opt
<bool> StringOps(
233 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
234 cl::cat(BoltOptCategory
));
236 static cl::opt
<bool> StripRepRet(
238 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
239 cl::init(true), cl::cat(BoltOptCategory
));
241 static cl::opt
<bool> VerifyCFG("verify-cfg",
242 cl::desc("verify the CFG after every pass"),
243 cl::Hidden
, cl::cat(BoltOptCategory
));
245 static cl::opt
<bool> ThreeWayBranchFlag("three-way-branch",
246 cl::desc("reorder three way branches"),
248 cl::cat(BoltOptCategory
));
250 static cl::opt
<bool> CMOVConversionFlag("cmov-conversion",
251 cl::desc("fold jcc+mov into cmov"),
253 cl::cat(BoltOptCategory
));
260 using namespace opts
;
262 const char BinaryFunctionPassManager::TimerGroupName
[] = "passman";
263 const char BinaryFunctionPassManager::TimerGroupDesc
[] =
264 "Binary Function Pass Manager";
266 void BinaryFunctionPassManager::runPasses() {
267 auto &BFs
= BC
.getBinaryFunctions();
268 for (size_t PassIdx
= 0; PassIdx
< Passes
.size(); PassIdx
++) {
269 const std::pair
<const bool, std::unique_ptr
<BinaryFunctionPass
>>
270 &OptPassPair
= Passes
[PassIdx
];
271 if (!OptPassPair
.first
)
274 const std::unique_ptr
<BinaryFunctionPass
> &Pass
= OptPassPair
.second
;
275 std::string PassIdName
=
276 formatv("{0:2}_{1}", PassIdx
, Pass
->getName()).str();
278 if (opts::Verbosity
> 0)
279 outs() << "BOLT-INFO: Starting pass: " << Pass
->getName() << "\n";
281 NamedRegionTimer
T(Pass
->getName(), Pass
->getName(), TimerGroupName
,
282 TimerGroupDesc
, TimeOpts
);
284 callWithDynoStats([this, &Pass
] { Pass
->runOnFunctions(BC
); }, BFs
,
285 Pass
->getName(), opts::DynoStatsAll
, BC
.isAArch64());
287 if (opts::VerifyCFG
&&
289 BFs
.begin(), BFs
.end(), true,
291 const std::pair
<const uint64_t, BinaryFunction
> &It
) {
292 return Valid
&& It
.second
.validateCFG();
294 errs() << "BOLT-ERROR: Invalid CFG detected after pass "
295 << Pass
->getName() << "\n";
299 if (opts::Verbosity
> 0)
300 outs() << "BOLT-INFO: Finished pass: " << Pass
->getName() << "\n";
302 if (!opts::PrintAll
&& !opts::DumpDotAll
&& !Pass
->printPass())
305 const std::string Message
= std::string("after ") + Pass
->getName();
307 for (auto &It
: BFs
) {
308 BinaryFunction
&Function
= It
.second
;
310 if (!Pass
->shouldPrint(Function
))
313 Function
.print(outs(), Message
);
315 if (opts::DumpDotAll
)
316 Function
.dumpGraphForPass(PassIdName
);
321 void BinaryFunctionPassManager::runAllPasses(BinaryContext
&BC
) {
322 BinaryFunctionPassManager
Manager(BC
);
324 const DynoStats InitialDynoStats
=
325 getDynoStats(BC
.getBinaryFunctions(), BC
.isAArch64());
327 Manager
.registerPass(std::make_unique
<AsmDumpPass
>(),
328 opts::AsmDump
.getNumOccurrences());
330 if (BC
.isAArch64()) {
331 Manager
.registerPass(std::make_unique
<FixRelaxations
>(PrintFixRelaxations
));
333 Manager
.registerPass(
334 std::make_unique
<VeneerElimination
>(PrintVeneerElimination
));
338 Manager
.registerPass(
339 std::make_unique
<FixRISCVCallsPass
>(PrintFixRISCVCalls
));
342 // Here we manage dependencies/order manually, since passes are run in the
343 // order they're registered.
345 // Run this pass first to use stats for the original functions.
346 Manager
.registerPass(std::make_unique
<PrintProgramStats
>(NeverPrint
));
348 if (opts::PrintProfileStats
)
349 Manager
.registerPass(std::make_unique
<PrintProfileStats
>(NeverPrint
));
351 Manager
.registerPass(std::make_unique
<ValidateInternalCalls
>(NeverPrint
));
353 Manager
.registerPass(std::make_unique
<ValidateMemRefs
>(NeverPrint
));
355 if (opts::Instrument
)
356 Manager
.registerPass(std::make_unique
<Instrumentation
>(NeverPrint
));
357 else if (opts::Hugify
)
358 Manager
.registerPass(std::make_unique
<HugePage
>(NeverPrint
));
360 Manager
.registerPass(std::make_unique
<ShortenInstructions
>(NeverPrint
));
362 Manager
.registerPass(std::make_unique
<RemoveNops
>(NeverPrint
));
364 Manager
.registerPass(std::make_unique
<NormalizeCFG
>(PrintNormalized
));
366 Manager
.registerPass(std::make_unique
<StripRepRet
>(NeverPrint
),
369 Manager
.registerPass(std::make_unique
<IdenticalCodeFolding
>(PrintICF
),
372 Manager
.registerPass(
373 std::make_unique
<SpecializeMemcpy1
>(NeverPrint
, opts::SpecializeMemcpy1
),
374 !opts::SpecializeMemcpy1
.empty());
376 Manager
.registerPass(std::make_unique
<InlineMemcpy
>(NeverPrint
),
379 Manager
.registerPass(std::make_unique
<IndirectCallPromotion
>(PrintICP
));
381 Manager
.registerPass(
382 std::make_unique
<JTFootprintReduction
>(PrintJTFootprintReduction
),
383 opts::JTFootprintReductionFlag
);
385 Manager
.registerPass(
386 std::make_unique
<SimplifyRODataLoads
>(PrintSimplifyROLoads
),
387 opts::SimplifyRODataLoads
);
389 Manager
.registerPass(std::make_unique
<RegReAssign
>(PrintRegReAssign
),
392 Manager
.registerPass(std::make_unique
<Inliner
>(PrintInline
));
394 Manager
.registerPass(std::make_unique
<IdenticalCodeFolding
>(PrintICF
),
397 Manager
.registerPass(std::make_unique
<PLTCall
>(PrintPLT
));
399 Manager
.registerPass(std::make_unique
<ThreeWayBranch
>(),
400 opts::ThreeWayBranchFlag
);
402 Manager
.registerPass(std::make_unique
<ReorderBasicBlocks
>(PrintReordered
));
404 Manager
.registerPass(std::make_unique
<EliminateUnreachableBlocks
>(PrintUCE
),
405 opts::EliminateUnreachable
);
407 Manager
.registerPass(std::make_unique
<SplitFunctions
>(PrintSplit
));
409 Manager
.registerPass(std::make_unique
<LoopInversionPass
>());
411 Manager
.registerPass(std::make_unique
<TailDuplication
>());
413 Manager
.registerPass(std::make_unique
<CMOVConversion
>(),
414 opts::CMOVConversionFlag
);
416 // This pass syncs local branches with CFG. If any of the following
417 // passes breaks the sync - they either need to re-run the pass or
418 // fix branches consistency internally.
419 Manager
.registerPass(std::make_unique
<FixupBranches
>(PrintAfterBranchFixup
));
421 // This pass should come close to last since it uses the estimated hot
422 // size of a function to determine the order. It should definitely
423 // also happen after any changes to the call graph are made, e.g. inlining.
424 Manager
.registerPass(
425 std::make_unique
<ReorderFunctions
>(PrintReorderedFunctions
));
427 // Print final dyno stats right while CFG and instruction analysis are intact.
428 Manager
.registerPass(
429 std::make_unique
<DynoStatsPrintPass
>(
430 InitialDynoStats
, "after all optimizations before SCTC and FOP"),
431 opts::PrintDynoStats
|| opts::DynoStatsAll
);
433 // Add the StokeInfo pass, which extract functions for stoke optimization and
434 // get the liveness information for them
435 Manager
.registerPass(std::make_unique
<StokeInfo
>(PrintStoke
), opts::Stoke
);
437 // This pass introduces conditional jumps into external functions.
438 // Between extending CFG to support this and isolating this pass we chose
439 // the latter. Thus this pass will do double jump removal and unreachable
440 // code elimination if necessary and won't rely on peepholes/UCE for these
442 // More generally this pass should be the last optimization pass that
443 // modifies branches/control flow. This pass is run after function
444 // reordering so that it can tell whether calls are forward/backward
446 Manager
.registerPass(
447 std::make_unique
<SimplifyConditionalTailCalls
>(PrintSCTC
),
448 opts::SimplifyConditionalTailCalls
);
450 Manager
.registerPass(std::make_unique
<Peepholes
>(PrintPeepholes
));
452 Manager
.registerPass(std::make_unique
<AlignerPass
>());
454 // Perform reordering on data contained in one or more sections using
455 // memory profiling data.
456 Manager
.registerPass(std::make_unique
<ReorderData
>());
458 if (BC
.isAArch64()) {
459 Manager
.registerPass(std::make_unique
<ADRRelaxationPass
>());
461 // Tighten branches according to offset differences between branch and
462 // targets. No extra instructions after this pass, otherwise we may have
463 // relocations out of range and crash during linking.
464 Manager
.registerPass(std::make_unique
<LongJmpPass
>(PrintLongJmp
));
467 // This pass should always run last.*
468 Manager
.registerPass(std::make_unique
<FinalizeFunctions
>(PrintFinalized
));
470 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
471 // FrameOptimizer move values around and needs to update CFIs. To do this, it
472 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
473 // placed according to the final layout.
474 Manager
.registerPass(std::make_unique
<FrameOptimizerPass
>(PrintFOP
));
476 Manager
.registerPass(std::make_unique
<AllocCombinerPass
>(PrintFOP
));
478 Manager
.registerPass(
479 std::make_unique
<RetpolineInsertion
>(PrintRetpolineInsertion
));
481 // Assign each function an output section.
482 Manager
.registerPass(std::make_unique
<AssignSections
>());
484 // Patch original function entries
485 if (BC
.HasRelocations
)
486 Manager
.registerPass(std::make_unique
<PatchEntries
>());
488 // This pass turns tail calls into jumps which makes them invisible to
489 // function reordering. It's unsafe to use any CFG or instruction analysis
491 Manager
.registerPass(
492 std::make_unique
<InstructionLowering
>(PrintAfterLowering
));
494 // In non-relocation mode, mark functions that do not fit into their original
495 // space as non-simple if we have to (e.g. for correct debug info update).
496 // NOTE: this pass depends on finalized code.
497 if (!BC
.HasRelocations
)
498 Manager
.registerPass(std::make_unique
<CheckLargeFunctions
>(NeverPrint
));
500 Manager
.registerPass(std::make_unique
<LowerAnnotations
>(NeverPrint
));
502 // Check for dirty state of MCSymbols caused by running calculateEmittedSize
503 // in parallel and restore them
504 Manager
.registerPass(std::make_unique
<CleanMCState
>(NeverPrint
));