1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/ContinuityStats.h"
16 #include "bolt/Passes/FixRISCVCallsPass.h"
17 #include "bolt/Passes/FixRelaxationPass.h"
18 #include "bolt/Passes/FrameOptimizer.h"
19 #include "bolt/Passes/Hugify.h"
20 #include "bolt/Passes/IdenticalCodeFolding.h"
21 #include "bolt/Passes/IndirectCallPromotion.h"
22 #include "bolt/Passes/Inliner.h"
23 #include "bolt/Passes/Instrumentation.h"
24 #include "bolt/Passes/JTFootprintReduction.h"
25 #include "bolt/Passes/LongJmp.h"
26 #include "bolt/Passes/LoopInversionPass.h"
27 #include "bolt/Passes/MCF.h"
28 #include "bolt/Passes/PLTCall.h"
29 #include "bolt/Passes/PatchEntries.h"
30 #include "bolt/Passes/RegReAssign.h"
31 #include "bolt/Passes/ReorderData.h"
32 #include "bolt/Passes/ReorderFunctions.h"
33 #include "bolt/Passes/RetpolineInsertion.h"
34 #include "bolt/Passes/SplitFunctions.h"
35 #include "bolt/Passes/StokeInfo.h"
36 #include "bolt/Passes/TailDuplication.h"
37 #include "bolt/Passes/ThreeWayBranch.h"
38 #include "bolt/Passes/ValidateInternalCalls.h"
39 #include "bolt/Passes/ValidateMemRefs.h"
40 #include "bolt/Passes/VeneerElimination.h"
41 #include "bolt/Utils/CommandLineOpts.h"
42 #include "llvm/Support/FormatVariadic.h"
43 #include "llvm/Support/Timer.h"
44 #include "llvm/Support/raw_ostream.h"
52 extern cl::opt
<bool> PrintAll
;
53 extern cl::opt
<bool> PrintDynoStats
;
54 extern cl::opt
<bool> DumpDotAll
;
55 extern cl::opt
<std::string
> AsmDump
;
56 extern cl::opt
<bolt::PLTCall::OptType
> PLT
;
57 extern cl::opt
<bolt::IdenticalCodeFolding::ICFLevel
, false,
58 llvm::bolt::DeprecatedICFNumericOptionParser
>
62 DynoStatsAll("dyno-stats-all",
63 cl::desc("print dyno stats after each stage"),
64 cl::ZeroOrMore
, cl::Hidden
, cl::cat(BoltCategory
));
67 EliminateUnreachable("eliminate-unreachable",
68 cl::desc("eliminate unreachable code"), cl::init(true),
69 cl::cat(BoltOptCategory
));
71 static cl::opt
<bool> JTFootprintReductionFlag(
72 "jt-footprint-reduction",
73 cl::desc("make jump tables size smaller at the cost of using more "
74 "instructions at jump sites"),
75 cl::cat(BoltOptCategory
));
79 cl::desc("keep no-op instructions. By default they are removed."),
80 cl::Hidden
, cl::cat(BoltOptCategory
));
82 cl::opt
<bool> NeverPrint("never-print", cl::desc("never print"),
83 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
86 PrintAfterBranchFixup("print-after-branch-fixup",
87 cl::desc("print function after fixing local branches"),
88 cl::Hidden
, cl::cat(BoltOptCategory
));
91 PrintAfterLowering("print-after-lowering",
92 cl::desc("print function after instruction lowering"),
93 cl::Hidden
, cl::cat(BoltOptCategory
));
95 static cl::opt
<bool> PrintEstimateEdgeCounts(
96 "print-estimate-edge-counts",
97 cl::desc("print function after edge counts are set for no-LBR profile"),
98 cl::Hidden
, cl::cat(BoltOptCategory
));
101 PrintFinalized("print-finalized",
102 cl::desc("print function after CFG is finalized"),
103 cl::Hidden
, cl::cat(BoltOptCategory
));
106 PrintFOP("print-fop",
107 cl::desc("print functions after frame optimizer pass"), cl::Hidden
,
108 cl::cat(BoltOptCategory
));
111 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
112 cl::Hidden
, cl::cat(BoltOptCategory
));
115 PrintICP("print-icp",
116 cl::desc("print functions after indirect call promotion"),
117 cl::Hidden
, cl::cat(BoltOptCategory
));
120 PrintInline("print-inline",
121 cl::desc("print functions after inlining optimization"),
122 cl::Hidden
, cl::cat(BoltOptCategory
));
124 static cl::opt
<bool> PrintJTFootprintReduction(
125 "print-after-jt-footprint-reduction",
126 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden
,
127 cl::cat(BoltOptCategory
));
130 PrintAdrRelaxation("print-adr-relaxation",
131 cl::desc("print functions after ADR Relaxation pass"),
132 cl::Hidden
, cl::cat(BoltOptCategory
));
135 PrintLongJmp("print-longjmp",
136 cl::desc("print functions after longjmp pass"), cl::Hidden
,
137 cl::cat(BoltOptCategory
));
140 PrintNormalized("print-normalized",
141 cl::desc("print functions after CFG is normalized"),
142 cl::Hidden
, cl::cat(BoltCategory
));
144 static cl::opt
<bool> PrintOptimizeBodyless(
145 "print-optimize-bodyless",
146 cl::desc("print functions after bodyless optimization"), cl::Hidden
,
147 cl::cat(BoltOptCategory
));
150 PrintPeepholes("print-peepholes",
151 cl::desc("print functions after peephole optimization"),
152 cl::Hidden
, cl::cat(BoltOptCategory
));
155 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
156 cl::Hidden
, cl::cat(BoltOptCategory
));
159 PrintProfileStats("print-profile-stats",
160 cl::desc("print profile quality/bias analysis"),
161 cl::cat(BoltCategory
));
164 PrintRegReAssign("print-regreassign",
165 cl::desc("print functions after regreassign pass"),
166 cl::Hidden
, cl::cat(BoltOptCategory
));
169 PrintReordered("print-reordered",
170 cl::desc("print functions after layout optimization"),
171 cl::Hidden
, cl::cat(BoltOptCategory
));
174 PrintReorderedFunctions("print-reordered-functions",
175 cl::desc("print functions after clustering"),
176 cl::Hidden
, cl::cat(BoltOptCategory
));
178 static cl::opt
<bool> PrintRetpolineInsertion(
179 "print-retpoline-insertion",
180 cl::desc("print functions after retpoline insertion pass"), cl::Hidden
,
181 cl::cat(BoltCategory
));
183 static cl::opt
<bool> PrintSCTC(
185 cl::desc("print functions after conditional tail call simplification"),
186 cl::Hidden
, cl::cat(BoltOptCategory
));
188 static cl::opt
<bool> PrintSimplifyROLoads(
189 "print-simplify-rodata-loads",
190 cl::desc("print functions after simplification of RO data loads"),
191 cl::Hidden
, cl::cat(BoltOptCategory
));
194 PrintSplit("print-split", cl::desc("print functions after code splitting"),
195 cl::Hidden
, cl::cat(BoltOptCategory
));
198 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
199 cl::Hidden
, cl::cat(BoltOptCategory
));
202 PrintFixRelaxations("print-fix-relaxations",
203 cl::desc("print functions after fix relaxations pass"),
204 cl::Hidden
, cl::cat(BoltOptCategory
));
207 PrintFixRISCVCalls("print-fix-riscv-calls",
208 cl::desc("print functions after fix RISCV calls pass"),
209 cl::Hidden
, cl::cat(BoltOptCategory
));
211 static cl::opt
<bool> PrintVeneerElimination(
212 "print-veneer-elimination",
213 cl::desc("print functions after veneer elimination pass"), cl::Hidden
,
214 cl::cat(BoltOptCategory
));
217 PrintUCE("print-uce",
218 cl::desc("print functions after unreachable code elimination"),
219 cl::Hidden
, cl::cat(BoltOptCategory
));
221 static cl::opt
<bool> RegReAssign(
224 "reassign registers so as to avoid using REX prefixes in hot code"),
225 cl::cat(BoltOptCategory
));
227 static cl::opt
<bool> SimplifyConditionalTailCalls(
228 "simplify-conditional-tail-calls",
229 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
230 cl::init(true), cl::cat(BoltOptCategory
));
232 static cl::opt
<bool> SimplifyRODataLoads(
233 "simplify-rodata-loads",
234 cl::desc("simplify loads from read-only sections by replacing the memory "
235 "operand with the constant found in the corresponding section"),
236 cl::cat(BoltOptCategory
));
238 static cl::list
<std::string
>
239 SpecializeMemcpy1("memcpy1-spec",
240 cl::desc("list of functions with call sites for which to specialize memcpy() "
242 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
243 cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
245 static cl::opt
<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
246 cl::cat(BoltOptCategory
));
248 static cl::opt
<bool> StringOps(
250 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
251 cl::cat(BoltOptCategory
));
253 static cl::opt
<bool> StripRepRet(
255 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
256 cl::init(true), cl::cat(BoltOptCategory
));
258 static cl::opt
<bool> VerifyCFG("verify-cfg",
259 cl::desc("verify the CFG after every pass"),
260 cl::Hidden
, cl::cat(BoltOptCategory
));
262 static cl::opt
<bool> ThreeWayBranchFlag("three-way-branch",
263 cl::desc("reorder three way branches"),
265 cl::cat(BoltOptCategory
));
267 static cl::opt
<bool> CMOVConversionFlag("cmov-conversion",
268 cl::desc("fold jcc+mov into cmov"),
270 cl::cat(BoltOptCategory
));
272 static cl::opt
<bool> ShortenInstructions("shorten-instructions",
273 cl::desc("shorten instructions"),
275 cl::cat(BoltOptCategory
));
281 using namespace opts
;
283 const char BinaryFunctionPassManager::TimerGroupName
[] = "passman";
284 const char BinaryFunctionPassManager::TimerGroupDesc
[] =
285 "Binary Function Pass Manager";
287 Error
BinaryFunctionPassManager::runPasses() {
288 auto &BFs
= BC
.getBinaryFunctions();
289 for (size_t PassIdx
= 0; PassIdx
< Passes
.size(); PassIdx
++) {
290 const std::pair
<const bool, std::unique_ptr
<BinaryFunctionPass
>>
291 &OptPassPair
= Passes
[PassIdx
];
292 if (!OptPassPair
.first
)
295 const std::unique_ptr
<BinaryFunctionPass
> &Pass
= OptPassPair
.second
;
296 std::string PassIdName
=
297 formatv("{0:2}_{1}", PassIdx
, Pass
->getName()).str();
299 if (opts::Verbosity
> 0)
300 BC
.outs() << "BOLT-INFO: Starting pass: " << Pass
->getName() << "\n";
302 NamedRegionTimer
T(Pass
->getName(), Pass
->getName(), TimerGroupName
,
303 TimerGroupDesc
, TimeOpts
);
305 Error E
= Error::success();
309 E
= joinErrors(std::move(E
), Pass
->runOnFunctions(BC
));
311 BFs
, Pass
->getName(), opts::DynoStatsAll
, BC
.isAArch64());
313 return Error(std::move(E
));
315 if (opts::VerifyCFG
&&
317 BFs
.begin(), BFs
.end(), true,
319 const std::pair
<const uint64_t, BinaryFunction
> &It
) {
320 return Valid
&& It
.second
.validateCFG();
322 return createFatalBOLTError(
323 Twine("BOLT-ERROR: Invalid CFG detected after pass ") +
324 Twine(Pass
->getName()) + Twine("\n"));
327 if (opts::Verbosity
> 0)
328 BC
.outs() << "BOLT-INFO: Finished pass: " << Pass
->getName() << "\n";
330 if (!opts::PrintAll
&& !opts::DumpDotAll
&& !Pass
->printPass())
333 const std::string Message
= std::string("after ") + Pass
->getName();
335 for (auto &It
: BFs
) {
336 BinaryFunction
&Function
= It
.second
;
338 if (!Pass
->shouldPrint(Function
))
341 Function
.print(BC
.outs(), Message
);
343 if (opts::DumpDotAll
)
344 Function
.dumpGraphForPass(PassIdName
);
347 return Error::success();
350 Error
BinaryFunctionPassManager::runAllPasses(BinaryContext
&BC
) {
351 BinaryFunctionPassManager
Manager(BC
);
353 Manager
.registerPass(
354 std::make_unique
<EstimateEdgeCounts
>(PrintEstimateEdgeCounts
));
356 Manager
.registerPass(std::make_unique
<DynoStatsSetPass
>());
358 Manager
.registerPass(std::make_unique
<AsmDumpPass
>(),
359 opts::AsmDump
.getNumOccurrences());
361 if (BC
.isAArch64()) {
362 Manager
.registerPass(std::make_unique
<FixRelaxations
>(PrintFixRelaxations
));
364 Manager
.registerPass(
365 std::make_unique
<VeneerElimination
>(PrintVeneerElimination
));
369 Manager
.registerPass(
370 std::make_unique
<FixRISCVCallsPass
>(PrintFixRISCVCalls
));
373 // Here we manage dependencies/order manually, since passes are run in the
374 // order they're registered.
376 // Run this pass first to use stats for the original functions.
377 Manager
.registerPass(std::make_unique
<PrintProgramStats
>());
379 if (opts::PrintProfileStats
)
380 Manager
.registerPass(std::make_unique
<PrintProfileStats
>(NeverPrint
));
382 Manager
.registerPass(std::make_unique
<PrintContinuityStats
>(NeverPrint
));
384 Manager
.registerPass(std::make_unique
<ValidateInternalCalls
>(NeverPrint
));
386 Manager
.registerPass(std::make_unique
<ValidateMemRefs
>(NeverPrint
));
388 if (opts::Instrument
)
389 Manager
.registerPass(std::make_unique
<Instrumentation
>(NeverPrint
));
390 else if (opts::Hugify
)
391 Manager
.registerPass(std::make_unique
<HugePage
>(NeverPrint
));
393 Manager
.registerPass(std::make_unique
<ShortenInstructions
>(NeverPrint
),
394 opts::ShortenInstructions
);
396 Manager
.registerPass(std::make_unique
<RemoveNops
>(NeverPrint
),
399 Manager
.registerPass(std::make_unique
<NormalizeCFG
>(PrintNormalized
));
402 Manager
.registerPass(std::make_unique
<StripRepRet
>(NeverPrint
),
405 Manager
.registerPass(std::make_unique
<IdenticalCodeFolding
>(PrintICF
),
406 opts::ICF
!= IdenticalCodeFolding::ICFLevel::None
);
408 Manager
.registerPass(
409 std::make_unique
<SpecializeMemcpy1
>(NeverPrint
, opts::SpecializeMemcpy1
),
410 !opts::SpecializeMemcpy1
.empty());
412 Manager
.registerPass(std::make_unique
<InlineMemcpy
>(NeverPrint
),
415 Manager
.registerPass(std::make_unique
<IndirectCallPromotion
>(PrintICP
));
417 Manager
.registerPass(
418 std::make_unique
<JTFootprintReduction
>(PrintJTFootprintReduction
),
419 opts::JTFootprintReductionFlag
);
421 Manager
.registerPass(
422 std::make_unique
<SimplifyRODataLoads
>(PrintSimplifyROLoads
),
423 opts::SimplifyRODataLoads
);
425 Manager
.registerPass(std::make_unique
<RegReAssign
>(PrintRegReAssign
),
428 Manager
.registerPass(std::make_unique
<Inliner
>(PrintInline
));
430 Manager
.registerPass(std::make_unique
<IdenticalCodeFolding
>(PrintICF
),
431 opts::ICF
!= IdenticalCodeFolding::ICFLevel::None
);
433 Manager
.registerPass(std::make_unique
<PLTCall
>(PrintPLT
));
435 Manager
.registerPass(std::make_unique
<ThreeWayBranch
>(),
436 opts::ThreeWayBranchFlag
);
438 Manager
.registerPass(std::make_unique
<ReorderBasicBlocks
>(PrintReordered
));
440 Manager
.registerPass(std::make_unique
<EliminateUnreachableBlocks
>(PrintUCE
),
441 opts::EliminateUnreachable
);
443 Manager
.registerPass(std::make_unique
<SplitFunctions
>(PrintSplit
));
445 Manager
.registerPass(std::make_unique
<LoopInversionPass
>());
447 Manager
.registerPass(std::make_unique
<TailDuplication
>());
449 Manager
.registerPass(std::make_unique
<CMOVConversion
>(),
450 opts::CMOVConversionFlag
);
452 // This pass syncs local branches with CFG. If any of the following
453 // passes breaks the sync - they either need to re-run the pass or
454 // fix branches consistency internally.
455 Manager
.registerPass(std::make_unique
<FixupBranches
>(PrintAfterBranchFixup
));
457 // This pass should come close to last since it uses the estimated hot
458 // size of a function to determine the order. It should definitely
459 // also happen after any changes to the call graph are made, e.g. inlining.
460 Manager
.registerPass(
461 std::make_unique
<ReorderFunctions
>(PrintReorderedFunctions
));
463 // This is the second run of the SplitFunctions pass required by certain
464 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
465 // after ReorderFunctions allows the finalized function order to be utilized
466 // to make more sophisticated splitting decisions, like hot-warm-cold
468 Manager
.registerPass(std::make_unique
<SplitFunctions
>(PrintSplit
));
470 // Print final dyno stats right while CFG and instruction analysis are intact.
471 Manager
.registerPass(std::make_unique
<DynoStatsPrintPass
>(
472 "after all optimizations before SCTC and FOP"),
473 opts::PrintDynoStats
|| opts::DynoStatsAll
);
475 // Add the StokeInfo pass, which extract functions for stoke optimization and
476 // get the liveness information for them
477 Manager
.registerPass(std::make_unique
<StokeInfo
>(PrintStoke
), opts::Stoke
);
479 // This pass introduces conditional jumps into external functions.
480 // Between extending CFG to support this and isolating this pass we chose
481 // the latter. Thus this pass will do double jump removal and unreachable
482 // code elimination if necessary and won't rely on peepholes/UCE for these
484 // More generally this pass should be the last optimization pass that
485 // modifies branches/control flow. This pass is run after function
486 // reordering so that it can tell whether calls are forward/backward
488 Manager
.registerPass(
489 std::make_unique
<SimplifyConditionalTailCalls
>(PrintSCTC
),
490 opts::SimplifyConditionalTailCalls
);
492 Manager
.registerPass(std::make_unique
<Peepholes
>(PrintPeepholes
));
494 Manager
.registerPass(std::make_unique
<AlignerPass
>());
496 // Perform reordering on data contained in one or more sections using
497 // memory profiling data.
498 Manager
.registerPass(std::make_unique
<ReorderData
>());
500 if (BC
.isAArch64()) {
501 Manager
.registerPass(
502 std::make_unique
<ADRRelaxationPass
>(PrintAdrRelaxation
));
504 // Tighten branches according to offset differences between branch and
505 // targets. No extra instructions after this pass, otherwise we may have
506 // relocations out of range and crash during linking.
507 Manager
.registerPass(std::make_unique
<LongJmpPass
>(PrintLongJmp
));
510 // This pass should always run last.*
511 Manager
.registerPass(std::make_unique
<FinalizeFunctions
>(PrintFinalized
));
513 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
514 // FrameOptimizer move values around and needs to update CFIs. To do this, it
515 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
516 // placed according to the final layout.
517 Manager
.registerPass(std::make_unique
<FrameOptimizerPass
>(PrintFOP
));
519 Manager
.registerPass(std::make_unique
<AllocCombinerPass
>(PrintFOP
));
521 Manager
.registerPass(
522 std::make_unique
<RetpolineInsertion
>(PrintRetpolineInsertion
));
524 // Assign each function an output section.
525 Manager
.registerPass(std::make_unique
<AssignSections
>());
527 // Patch original function entries
528 if (BC
.HasRelocations
)
529 Manager
.registerPass(std::make_unique
<PatchEntries
>());
531 // This pass turns tail calls into jumps which makes them invisible to
532 // function reordering. It's unsafe to use any CFG or instruction analysis
534 Manager
.registerPass(
535 std::make_unique
<InstructionLowering
>(PrintAfterLowering
));
537 // In non-relocation mode, mark functions that do not fit into their original
538 // space as non-simple if we have to (e.g. for correct debug info update).
539 // NOTE: this pass depends on finalized code.
540 if (!BC
.HasRelocations
)
541 Manager
.registerPass(std::make_unique
<CheckLargeFunctions
>(NeverPrint
));
543 Manager
.registerPass(std::make_unique
<LowerAnnotations
>(NeverPrint
));
545 // Check for dirty state of MCSymbols caused by running calculateEmittedSize
546 // in parallel and restore them
547 Manager
.registerPass(std::make_unique
<CleanMCState
>(NeverPrint
));
549 return Manager
.runPasses();