[libc] Switch to using the generic `<gpuintrin.h>` implementations (#121810)
[llvm-project.git] / bolt / lib / Rewrite / BinaryPassManager.cpp
blob2d851c751ae10506e3ffb6d86498b541d84a6dc7
1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/ContinuityStats.h"
16 #include "bolt/Passes/FixRISCVCallsPass.h"
17 #include "bolt/Passes/FixRelaxationPass.h"
18 #include "bolt/Passes/FrameOptimizer.h"
19 #include "bolt/Passes/Hugify.h"
20 #include "bolt/Passes/IdenticalCodeFolding.h"
21 #include "bolt/Passes/IndirectCallPromotion.h"
22 #include "bolt/Passes/Inliner.h"
23 #include "bolt/Passes/Instrumentation.h"
24 #include "bolt/Passes/JTFootprintReduction.h"
25 #include "bolt/Passes/LongJmp.h"
26 #include "bolt/Passes/LoopInversionPass.h"
27 #include "bolt/Passes/MCF.h"
28 #include "bolt/Passes/PLTCall.h"
29 #include "bolt/Passes/PatchEntries.h"
30 #include "bolt/Passes/RegReAssign.h"
31 #include "bolt/Passes/ReorderData.h"
32 #include "bolt/Passes/ReorderFunctions.h"
33 #include "bolt/Passes/RetpolineInsertion.h"
34 #include "bolt/Passes/SplitFunctions.h"
35 #include "bolt/Passes/StokeInfo.h"
36 #include "bolt/Passes/TailDuplication.h"
37 #include "bolt/Passes/ThreeWayBranch.h"
38 #include "bolt/Passes/ValidateInternalCalls.h"
39 #include "bolt/Passes/ValidateMemRefs.h"
40 #include "bolt/Passes/VeneerElimination.h"
41 #include "bolt/Utils/CommandLineOpts.h"
42 #include "llvm/Support/FormatVariadic.h"
43 #include "llvm/Support/Timer.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <memory>
46 #include <numeric>
48 using namespace llvm;
50 namespace opts {
52 extern cl::opt<bool> PrintAll;
53 extern cl::opt<bool> PrintDynoStats;
54 extern cl::opt<bool> DumpDotAll;
55 extern cl::opt<std::string> AsmDump;
56 extern cl::opt<bolt::PLTCall::OptType> PLT;
57 extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
58 llvm::bolt::DeprecatedICFNumericOptionParser>
59 ICF;
61 static cl::opt<bool>
62 DynoStatsAll("dyno-stats-all",
63 cl::desc("print dyno stats after each stage"),
64 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
66 static cl::opt<bool>
67 EliminateUnreachable("eliminate-unreachable",
68 cl::desc("eliminate unreachable code"), cl::init(true),
69 cl::cat(BoltOptCategory));
71 static cl::opt<bool> JTFootprintReductionFlag(
72 "jt-footprint-reduction",
73 cl::desc("make jump tables size smaller at the cost of using more "
74 "instructions at jump sites"),
75 cl::cat(BoltOptCategory));
77 cl::opt<bool>
78 KeepNops("keep-nops",
79 cl::desc("keep no-op instructions. By default they are removed."),
80 cl::Hidden, cl::cat(BoltOptCategory));
82 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
83 cl::ReallyHidden, cl::cat(BoltOptCategory));
85 cl::opt<bool>
86 PrintAfterBranchFixup("print-after-branch-fixup",
87 cl::desc("print function after fixing local branches"),
88 cl::Hidden, cl::cat(BoltOptCategory));
90 static cl::opt<bool>
91 PrintAfterLowering("print-after-lowering",
92 cl::desc("print function after instruction lowering"),
93 cl::Hidden, cl::cat(BoltOptCategory));
95 static cl::opt<bool> PrintEstimateEdgeCounts(
96 "print-estimate-edge-counts",
97 cl::desc("print function after edge counts are set for no-LBR profile"),
98 cl::Hidden, cl::cat(BoltOptCategory));
100 cl::opt<bool>
101 PrintFinalized("print-finalized",
102 cl::desc("print function after CFG is finalized"),
103 cl::Hidden, cl::cat(BoltOptCategory));
105 static cl::opt<bool>
106 PrintFOP("print-fop",
107 cl::desc("print functions after frame optimizer pass"), cl::Hidden,
108 cl::cat(BoltOptCategory));
110 static cl::opt<bool>
111 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
112 cl::Hidden, cl::cat(BoltOptCategory));
114 static cl::opt<bool>
115 PrintICP("print-icp",
116 cl::desc("print functions after indirect call promotion"),
117 cl::Hidden, cl::cat(BoltOptCategory));
119 static cl::opt<bool>
120 PrintInline("print-inline",
121 cl::desc("print functions after inlining optimization"),
122 cl::Hidden, cl::cat(BoltOptCategory));
124 static cl::opt<bool> PrintJTFootprintReduction(
125 "print-after-jt-footprint-reduction",
126 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
127 cl::cat(BoltOptCategory));
129 static cl::opt<bool>
130 PrintAdrRelaxation("print-adr-relaxation",
131 cl::desc("print functions after ADR Relaxation pass"),
132 cl::Hidden, cl::cat(BoltOptCategory));
134 static cl::opt<bool>
135 PrintLongJmp("print-longjmp",
136 cl::desc("print functions after longjmp pass"), cl::Hidden,
137 cl::cat(BoltOptCategory));
139 cl::opt<bool>
140 PrintNormalized("print-normalized",
141 cl::desc("print functions after CFG is normalized"),
142 cl::Hidden, cl::cat(BoltCategory));
144 static cl::opt<bool> PrintOptimizeBodyless(
145 "print-optimize-bodyless",
146 cl::desc("print functions after bodyless optimization"), cl::Hidden,
147 cl::cat(BoltOptCategory));
149 static cl::opt<bool>
150 PrintPeepholes("print-peepholes",
151 cl::desc("print functions after peephole optimization"),
152 cl::Hidden, cl::cat(BoltOptCategory));
154 static cl::opt<bool>
155 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
156 cl::Hidden, cl::cat(BoltOptCategory));
158 static cl::opt<bool>
159 PrintProfileStats("print-profile-stats",
160 cl::desc("print profile quality/bias analysis"),
161 cl::cat(BoltCategory));
163 static cl::opt<bool>
164 PrintRegReAssign("print-regreassign",
165 cl::desc("print functions after regreassign pass"),
166 cl::Hidden, cl::cat(BoltOptCategory));
168 cl::opt<bool>
169 PrintReordered("print-reordered",
170 cl::desc("print functions after layout optimization"),
171 cl::Hidden, cl::cat(BoltOptCategory));
173 static cl::opt<bool>
174 PrintReorderedFunctions("print-reordered-functions",
175 cl::desc("print functions after clustering"),
176 cl::Hidden, cl::cat(BoltOptCategory));
178 static cl::opt<bool> PrintRetpolineInsertion(
179 "print-retpoline-insertion",
180 cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
181 cl::cat(BoltCategory));
183 static cl::opt<bool> PrintSCTC(
184 "print-sctc",
185 cl::desc("print functions after conditional tail call simplification"),
186 cl::Hidden, cl::cat(BoltOptCategory));
188 static cl::opt<bool> PrintSimplifyROLoads(
189 "print-simplify-rodata-loads",
190 cl::desc("print functions after simplification of RO data loads"),
191 cl::Hidden, cl::cat(BoltOptCategory));
193 static cl::opt<bool>
194 PrintSplit("print-split", cl::desc("print functions after code splitting"),
195 cl::Hidden, cl::cat(BoltOptCategory));
197 static cl::opt<bool>
198 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
199 cl::Hidden, cl::cat(BoltOptCategory));
201 static cl::opt<bool>
202 PrintFixRelaxations("print-fix-relaxations",
203 cl::desc("print functions after fix relaxations pass"),
204 cl::Hidden, cl::cat(BoltOptCategory));
206 static cl::opt<bool>
207 PrintFixRISCVCalls("print-fix-riscv-calls",
208 cl::desc("print functions after fix RISCV calls pass"),
209 cl::Hidden, cl::cat(BoltOptCategory));
211 static cl::opt<bool> PrintVeneerElimination(
212 "print-veneer-elimination",
213 cl::desc("print functions after veneer elimination pass"), cl::Hidden,
214 cl::cat(BoltOptCategory));
216 static cl::opt<bool>
217 PrintUCE("print-uce",
218 cl::desc("print functions after unreachable code elimination"),
219 cl::Hidden, cl::cat(BoltOptCategory));
221 static cl::opt<bool> RegReAssign(
222 "reg-reassign",
223 cl::desc(
224 "reassign registers so as to avoid using REX prefixes in hot code"),
225 cl::cat(BoltOptCategory));
227 static cl::opt<bool> SimplifyConditionalTailCalls(
228 "simplify-conditional-tail-calls",
229 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
230 cl::init(true), cl::cat(BoltOptCategory));
232 static cl::opt<bool> SimplifyRODataLoads(
233 "simplify-rodata-loads",
234 cl::desc("simplify loads from read-only sections by replacing the memory "
235 "operand with the constant found in the corresponding section"),
236 cl::cat(BoltOptCategory));
238 static cl::list<std::string>
239 SpecializeMemcpy1("memcpy1-spec",
240 cl::desc("list of functions with call sites for which to specialize memcpy() "
241 "for size 1"),
242 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
243 cl::ZeroOrMore, cl::cat(BoltOptCategory));
245 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
246 cl::cat(BoltOptCategory));
248 static cl::opt<bool> StringOps(
249 "inline-memcpy",
250 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
251 cl::cat(BoltOptCategory));
253 static cl::opt<bool> StripRepRet(
254 "strip-rep-ret",
255 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
256 cl::init(true), cl::cat(BoltOptCategory));
258 static cl::opt<bool> VerifyCFG("verify-cfg",
259 cl::desc("verify the CFG after every pass"),
260 cl::Hidden, cl::cat(BoltOptCategory));
262 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
263 cl::desc("reorder three way branches"),
264 cl::ReallyHidden,
265 cl::cat(BoltOptCategory));
267 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
268 cl::desc("fold jcc+mov into cmov"),
269 cl::ReallyHidden,
270 cl::cat(BoltOptCategory));
272 static cl::opt<bool> ShortenInstructions("shorten-instructions",
273 cl::desc("shorten instructions"),
274 cl::init(true),
275 cl::cat(BoltOptCategory));
276 } // namespace opts
278 namespace llvm {
279 namespace bolt {
281 using namespace opts;
283 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
284 const char BinaryFunctionPassManager::TimerGroupDesc[] =
285 "Binary Function Pass Manager";
287 Error BinaryFunctionPassManager::runPasses() {
288 auto &BFs = BC.getBinaryFunctions();
289 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
290 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
291 &OptPassPair = Passes[PassIdx];
292 if (!OptPassPair.first)
293 continue;
295 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
296 std::string PassIdName =
297 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
299 if (opts::Verbosity > 0)
300 BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
302 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
303 TimerGroupDesc, TimeOpts);
305 Error E = Error::success();
306 callWithDynoStats(
307 BC.outs(),
308 [this, &E, &Pass] {
309 E = joinErrors(std::move(E), Pass->runOnFunctions(BC));
311 BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
312 if (E)
313 return Error(std::move(E));
315 if (opts::VerifyCFG &&
316 !std::accumulate(
317 BFs.begin(), BFs.end(), true,
318 [](const bool Valid,
319 const std::pair<const uint64_t, BinaryFunction> &It) {
320 return Valid && It.second.validateCFG();
321 })) {
322 return createFatalBOLTError(
323 Twine("BOLT-ERROR: Invalid CFG detected after pass ") +
324 Twine(Pass->getName()) + Twine("\n"));
327 if (opts::Verbosity > 0)
328 BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
330 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
331 continue;
333 const std::string Message = std::string("after ") + Pass->getName();
335 for (auto &It : BFs) {
336 BinaryFunction &Function = It.second;
338 if (!Pass->shouldPrint(Function))
339 continue;
341 Function.print(BC.outs(), Message);
343 if (opts::DumpDotAll)
344 Function.dumpGraphForPass(PassIdName);
347 return Error::success();
350 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
351 BinaryFunctionPassManager Manager(BC);
353 Manager.registerPass(
354 std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
356 Manager.registerPass(std::make_unique<DynoStatsSetPass>());
358 Manager.registerPass(std::make_unique<AsmDumpPass>(),
359 opts::AsmDump.getNumOccurrences());
361 if (BC.isAArch64()) {
362 Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
364 Manager.registerPass(
365 std::make_unique<VeneerElimination>(PrintVeneerElimination));
368 if (BC.isRISCV()) {
369 Manager.registerPass(
370 std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls));
373 // Here we manage dependencies/order manually, since passes are run in the
374 // order they're registered.
376 // Run this pass first to use stats for the original functions.
377 Manager.registerPass(std::make_unique<PrintProgramStats>());
379 if (opts::PrintProfileStats)
380 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
382 Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));
384 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
386 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
388 if (opts::Instrument)
389 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
390 else if (opts::Hugify)
391 Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
393 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint),
394 opts::ShortenInstructions);
396 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
397 !opts::KeepNops);
399 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
401 if (BC.isX86())
402 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
403 opts::StripRepRet);
405 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
406 opts::ICF != IdenticalCodeFolding::ICFLevel::None);
408 Manager.registerPass(
409 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
410 !opts::SpecializeMemcpy1.empty());
412 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
413 opts::StringOps);
415 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
417 Manager.registerPass(
418 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
419 opts::JTFootprintReductionFlag);
421 Manager.registerPass(
422 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
423 opts::SimplifyRODataLoads);
425 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
426 opts::RegReAssign);
428 Manager.registerPass(std::make_unique<Inliner>(PrintInline));
430 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
431 opts::ICF != IdenticalCodeFolding::ICFLevel::None);
433 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
435 Manager.registerPass(std::make_unique<ThreeWayBranch>(),
436 opts::ThreeWayBranchFlag);
438 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
440 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
441 opts::EliminateUnreachable);
443 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
445 Manager.registerPass(std::make_unique<LoopInversionPass>());
447 Manager.registerPass(std::make_unique<TailDuplication>());
449 Manager.registerPass(std::make_unique<CMOVConversion>(),
450 opts::CMOVConversionFlag);
452 // This pass syncs local branches with CFG. If any of the following
453 // passes breaks the sync - they either need to re-run the pass or
454 // fix branches consistency internally.
455 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
457 // This pass should come close to last since it uses the estimated hot
458 // size of a function to determine the order. It should definitely
459 // also happen after any changes to the call graph are made, e.g. inlining.
460 Manager.registerPass(
461 std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
463 // This is the second run of the SplitFunctions pass required by certain
464 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
465 // after ReorderFunctions allows the finalized function order to be utilized
466 // to make more sophisticated splitting decisions, like hot-warm-cold
467 // splitting.
468 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
470 // Print final dyno stats right while CFG and instruction analysis are intact.
471 Manager.registerPass(std::make_unique<DynoStatsPrintPass>(
472 "after all optimizations before SCTC and FOP"),
473 opts::PrintDynoStats || opts::DynoStatsAll);
475 // Add the StokeInfo pass, which extract functions for stoke optimization and
476 // get the liveness information for them
477 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
479 // This pass introduces conditional jumps into external functions.
480 // Between extending CFG to support this and isolating this pass we chose
481 // the latter. Thus this pass will do double jump removal and unreachable
482 // code elimination if necessary and won't rely on peepholes/UCE for these
483 // optimizations.
484 // More generally this pass should be the last optimization pass that
485 // modifies branches/control flow. This pass is run after function
486 // reordering so that it can tell whether calls are forward/backward
487 // accurately.
488 Manager.registerPass(
489 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
490 opts::SimplifyConditionalTailCalls);
492 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
494 Manager.registerPass(std::make_unique<AlignerPass>());
496 // Perform reordering on data contained in one or more sections using
497 // memory profiling data.
498 Manager.registerPass(std::make_unique<ReorderData>());
500 if (BC.isAArch64()) {
501 Manager.registerPass(
502 std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation));
504 // Tighten branches according to offset differences between branch and
505 // targets. No extra instructions after this pass, otherwise we may have
506 // relocations out of range and crash during linking.
507 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
510 // This pass should always run last.*
511 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
513 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
514 // FrameOptimizer move values around and needs to update CFIs. To do this, it
515 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
516 // placed according to the final layout.
517 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
519 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
521 Manager.registerPass(
522 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
524 // Assign each function an output section.
525 Manager.registerPass(std::make_unique<AssignSections>());
527 // Patch original function entries
528 if (BC.HasRelocations)
529 Manager.registerPass(std::make_unique<PatchEntries>());
531 // This pass turns tail calls into jumps which makes them invisible to
532 // function reordering. It's unsafe to use any CFG or instruction analysis
533 // after this point.
534 Manager.registerPass(
535 std::make_unique<InstructionLowering>(PrintAfterLowering));
537 // In non-relocation mode, mark functions that do not fit into their original
538 // space as non-simple if we have to (e.g. for correct debug info update).
539 // NOTE: this pass depends on finalized code.
540 if (!BC.HasRelocations)
541 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
543 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
545 // Check for dirty state of MCSymbols caused by running calculateEmittedSize
546 // in parallel and restore them
547 Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint));
549 return Manager.runPasses();
552 } // namespace bolt
553 } // namespace llvm