[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / bolt / lib / Rewrite / BinaryPassManager.cpp
blob9946608c96d8ee99b2ca7c977ca7999c3373d98d
1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FixRISCVCallsPass.h"
16 #include "bolt/Passes/FixRelaxationPass.h"
17 #include "bolt/Passes/FrameOptimizer.h"
18 #include "bolt/Passes/Hugify.h"
19 #include "bolt/Passes/IdenticalCodeFolding.h"
20 #include "bolt/Passes/IndirectCallPromotion.h"
21 #include "bolt/Passes/Inliner.h"
22 #include "bolt/Passes/Instrumentation.h"
23 #include "bolt/Passes/JTFootprintReduction.h"
24 #include "bolt/Passes/LongJmp.h"
25 #include "bolt/Passes/LoopInversionPass.h"
26 #include "bolt/Passes/PLTCall.h"
27 #include "bolt/Passes/PatchEntries.h"
28 #include "bolt/Passes/RegReAssign.h"
29 #include "bolt/Passes/ReorderData.h"
30 #include "bolt/Passes/ReorderFunctions.h"
31 #include "bolt/Passes/RetpolineInsertion.h"
32 #include "bolt/Passes/SplitFunctions.h"
33 #include "bolt/Passes/StokeInfo.h"
34 #include "bolt/Passes/TailDuplication.h"
35 #include "bolt/Passes/ThreeWayBranch.h"
36 #include "bolt/Passes/ValidateInternalCalls.h"
37 #include "bolt/Passes/ValidateMemRefs.h"
38 #include "bolt/Passes/VeneerElimination.h"
39 #include "bolt/Utils/CommandLineOpts.h"
40 #include "llvm/Support/FormatVariadic.h"
41 #include "llvm/Support/Timer.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <memory>
44 #include <numeric>
46 using namespace llvm;
48 namespace opts {
50 extern cl::opt<bool> PrintAll;
51 extern cl::opt<bool> PrintDynoStats;
52 extern cl::opt<bool> DumpDotAll;
53 extern cl::opt<std::string> AsmDump;
54 extern cl::opt<bolt::PLTCall::OptType> PLT;
56 static cl::opt<bool>
57 DynoStatsAll("dyno-stats-all",
58 cl::desc("print dyno stats after each stage"),
59 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
61 static cl::opt<bool>
62 EliminateUnreachable("eliminate-unreachable",
63 cl::desc("eliminate unreachable code"), cl::init(true),
64 cl::cat(BoltOptCategory));
66 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
67 cl::cat(BoltOptCategory));
69 static cl::opt<bool> JTFootprintReductionFlag(
70 "jt-footprint-reduction",
71 cl::desc("make jump tables size smaller at the cost of using more "
72 "instructions at jump sites"),
73 cl::cat(BoltOptCategory));
75 static cl::opt<bool>
76 KeepNops("keep-nops",
77 cl::desc("keep no-op instructions. By default they are removed."),
78 cl::Hidden, cl::cat(BoltOptCategory));
80 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
81 cl::ReallyHidden, cl::cat(BoltOptCategory));
83 cl::opt<bool>
84 PrintAfterBranchFixup("print-after-branch-fixup",
85 cl::desc("print function after fixing local branches"),
86 cl::Hidden, cl::cat(BoltOptCategory));
88 static cl::opt<bool>
89 PrintAfterLowering("print-after-lowering",
90 cl::desc("print function after instruction lowering"),
91 cl::Hidden, cl::cat(BoltOptCategory));
93 cl::opt<bool>
94 PrintFinalized("print-finalized",
95 cl::desc("print function after CFG is finalized"),
96 cl::Hidden, cl::cat(BoltOptCategory));
98 static cl::opt<bool>
99 PrintFOP("print-fop",
100 cl::desc("print functions after frame optimizer pass"), cl::Hidden,
101 cl::cat(BoltOptCategory));
103 static cl::opt<bool>
104 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
105 cl::Hidden, cl::cat(BoltOptCategory));
107 static cl::opt<bool>
108 PrintICP("print-icp",
109 cl::desc("print functions after indirect call promotion"),
110 cl::Hidden, cl::cat(BoltOptCategory));
112 static cl::opt<bool>
113 PrintInline("print-inline",
114 cl::desc("print functions after inlining optimization"),
115 cl::Hidden, cl::cat(BoltOptCategory));
117 static cl::opt<bool> PrintJTFootprintReduction(
118 "print-after-jt-footprint-reduction",
119 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
120 cl::cat(BoltOptCategory));
122 static cl::opt<bool>
123 PrintLongJmp("print-longjmp",
124 cl::desc("print functions after longjmp pass"), cl::Hidden,
125 cl::cat(BoltOptCategory));
127 cl::opt<bool>
128 PrintNormalized("print-normalized",
129 cl::desc("print functions after CFG is normalized"),
130 cl::Hidden, cl::cat(BoltCategory));
132 static cl::opt<bool> PrintOptimizeBodyless(
133 "print-optimize-bodyless",
134 cl::desc("print functions after bodyless optimization"), cl::Hidden,
135 cl::cat(BoltOptCategory));
137 static cl::opt<bool>
138 PrintPeepholes("print-peepholes",
139 cl::desc("print functions after peephole optimization"),
140 cl::Hidden, cl::cat(BoltOptCategory));
142 static cl::opt<bool>
143 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
144 cl::Hidden, cl::cat(BoltOptCategory));
146 static cl::opt<bool>
147 PrintProfileStats("print-profile-stats",
148 cl::desc("print profile quality/bias analysis"),
149 cl::cat(BoltCategory));
151 static cl::opt<bool>
152 PrintRegReAssign("print-regreassign",
153 cl::desc("print functions after regreassign pass"),
154 cl::Hidden, cl::cat(BoltOptCategory));
156 cl::opt<bool>
157 PrintReordered("print-reordered",
158 cl::desc("print functions after layout optimization"),
159 cl::Hidden, cl::cat(BoltOptCategory));
161 static cl::opt<bool>
162 PrintReorderedFunctions("print-reordered-functions",
163 cl::desc("print functions after clustering"),
164 cl::Hidden, cl::cat(BoltOptCategory));
166 static cl::opt<bool> PrintRetpolineInsertion(
167 "print-retpoline-insertion",
168 cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
169 cl::cat(BoltCategory));
171 static cl::opt<bool> PrintSCTC(
172 "print-sctc",
173 cl::desc("print functions after conditional tail call simplification"),
174 cl::Hidden, cl::cat(BoltOptCategory));
176 static cl::opt<bool> PrintSimplifyROLoads(
177 "print-simplify-rodata-loads",
178 cl::desc("print functions after simplification of RO data loads"),
179 cl::Hidden, cl::cat(BoltOptCategory));
181 static cl::opt<bool>
182 PrintSplit("print-split", cl::desc("print functions after code splitting"),
183 cl::Hidden, cl::cat(BoltOptCategory));
185 static cl::opt<bool>
186 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
187 cl::Hidden, cl::cat(BoltOptCategory));
189 static cl::opt<bool>
190 PrintFixRelaxations("print-fix-relaxations",
191 cl::desc("print functions after fix relaxations pass"),
192 cl::Hidden, cl::cat(BoltOptCategory));
194 static cl::opt<bool>
195 PrintFixRISCVCalls("print-fix-riscv-calls",
196 cl::desc("print functions after fix RISCV calls pass"),
197 cl::Hidden, cl::cat(BoltOptCategory));
199 static cl::opt<bool> PrintVeneerElimination(
200 "print-veneer-elimination",
201 cl::desc("print functions after veneer elimination pass"), cl::Hidden,
202 cl::cat(BoltOptCategory));
204 static cl::opt<bool>
205 PrintUCE("print-uce",
206 cl::desc("print functions after unreachable code elimination"),
207 cl::Hidden, cl::cat(BoltOptCategory));
209 static cl::opt<bool> RegReAssign(
210 "reg-reassign",
211 cl::desc(
212 "reassign registers so as to avoid using REX prefixes in hot code"),
213 cl::cat(BoltOptCategory));
215 static cl::opt<bool> SimplifyConditionalTailCalls(
216 "simplify-conditional-tail-calls",
217 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
218 cl::init(true), cl::cat(BoltOptCategory));
220 static cl::opt<bool> SimplifyRODataLoads(
221 "simplify-rodata-loads",
222 cl::desc("simplify loads from read-only sections by replacing the memory "
223 "operand with the constant found in the corresponding section"),
224 cl::cat(BoltOptCategory));
226 static cl::list<std::string>
227 SpecializeMemcpy1("memcpy1-spec",
228 cl::desc("list of functions with call sites for which to specialize memcpy() "
229 "for size 1"),
230 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
231 cl::ZeroOrMore, cl::cat(BoltOptCategory));
233 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
234 cl::cat(BoltOptCategory));
236 static cl::opt<bool> StringOps(
237 "inline-memcpy",
238 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
239 cl::cat(BoltOptCategory));
241 static cl::opt<bool> StripRepRet(
242 "strip-rep-ret",
243 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
244 cl::init(true), cl::cat(BoltOptCategory));
246 static cl::opt<bool> VerifyCFG("verify-cfg",
247 cl::desc("verify the CFG after every pass"),
248 cl::Hidden, cl::cat(BoltOptCategory));
250 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
251 cl::desc("reorder three way branches"),
252 cl::ReallyHidden,
253 cl::cat(BoltOptCategory));
255 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
256 cl::desc("fold jcc+mov into cmov"),
257 cl::ReallyHidden,
258 cl::cat(BoltOptCategory));
260 } // namespace opts
262 namespace llvm {
263 namespace bolt {
265 using namespace opts;
267 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
268 const char BinaryFunctionPassManager::TimerGroupDesc[] =
269 "Binary Function Pass Manager";
271 void BinaryFunctionPassManager::runPasses() {
272 auto &BFs = BC.getBinaryFunctions();
273 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
274 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
275 &OptPassPair = Passes[PassIdx];
276 if (!OptPassPair.first)
277 continue;
279 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
280 std::string PassIdName =
281 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
283 if (opts::Verbosity > 0)
284 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
286 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
287 TimerGroupDesc, TimeOpts);
289 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
290 Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
292 if (opts::VerifyCFG &&
293 !std::accumulate(
294 BFs.begin(), BFs.end(), true,
295 [](const bool Valid,
296 const std::pair<const uint64_t, BinaryFunction> &It) {
297 return Valid && It.second.validateCFG();
298 })) {
299 errs() << "BOLT-ERROR: Invalid CFG detected after pass "
300 << Pass->getName() << "\n";
301 exit(1);
304 if (opts::Verbosity > 0)
305 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
307 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
308 continue;
310 const std::string Message = std::string("after ") + Pass->getName();
312 for (auto &It : BFs) {
313 BinaryFunction &Function = It.second;
315 if (!Pass->shouldPrint(Function))
316 continue;
318 Function.print(outs(), Message);
320 if (opts::DumpDotAll)
321 Function.dumpGraphForPass(PassIdName);
326 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
327 BinaryFunctionPassManager Manager(BC);
329 const DynoStats InitialDynoStats =
330 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
332 Manager.registerPass(std::make_unique<AsmDumpPass>(),
333 opts::AsmDump.getNumOccurrences());
335 if (BC.isAArch64()) {
336 Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
338 Manager.registerPass(
339 std::make_unique<VeneerElimination>(PrintVeneerElimination));
342 if (BC.isRISCV()) {
343 Manager.registerPass(
344 std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls));
347 // Here we manage dependencies/order manually, since passes are run in the
348 // order they're registered.
350 // Run this pass first to use stats for the original functions.
351 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
353 if (opts::PrintProfileStats)
354 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
356 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
358 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
360 if (opts::Instrument)
361 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
362 else if (opts::Hugify)
363 Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
365 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
367 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
368 !opts::KeepNops);
370 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
372 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
373 opts::StripRepRet);
375 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
376 opts::ICF);
378 Manager.registerPass(
379 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
380 !opts::SpecializeMemcpy1.empty());
382 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
383 opts::StringOps);
385 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
387 Manager.registerPass(
388 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
389 opts::JTFootprintReductionFlag);
391 Manager.registerPass(
392 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
393 opts::SimplifyRODataLoads);
395 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
396 opts::RegReAssign);
398 Manager.registerPass(std::make_unique<Inliner>(PrintInline));
400 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
401 opts::ICF);
403 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
405 Manager.registerPass(std::make_unique<ThreeWayBranch>(),
406 opts::ThreeWayBranchFlag);
408 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
410 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
411 opts::EliminateUnreachable);
413 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
415 Manager.registerPass(std::make_unique<LoopInversionPass>());
417 Manager.registerPass(std::make_unique<TailDuplication>());
419 Manager.registerPass(std::make_unique<CMOVConversion>(),
420 opts::CMOVConversionFlag);
422 // This pass syncs local branches with CFG. If any of the following
423 // passes breaks the sync - they either need to re-run the pass or
424 // fix branches consistency internally.
425 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
427 // This pass should come close to last since it uses the estimated hot
428 // size of a function to determine the order. It should definitely
429 // also happen after any changes to the call graph are made, e.g. inlining.
430 Manager.registerPass(
431 std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
433 // This is the second run of the SplitFunctions pass required by certain
434 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
435 // after ReorderFunctions allows the finalized function order to be utilized
436 // to make more sophisticated splitting decisions, like hot-warm-cold
437 // splitting.
438 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
440 // Print final dyno stats right while CFG and instruction analysis are intact.
441 Manager.registerPass(
442 std::make_unique<DynoStatsPrintPass>(
443 InitialDynoStats, "after all optimizations before SCTC and FOP"),
444 opts::PrintDynoStats || opts::DynoStatsAll);
446 // Add the StokeInfo pass, which extract functions for stoke optimization and
447 // get the liveness information for them
448 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
450 // This pass introduces conditional jumps into external functions.
451 // Between extending CFG to support this and isolating this pass we chose
452 // the latter. Thus this pass will do double jump removal and unreachable
453 // code elimination if necessary and won't rely on peepholes/UCE for these
454 // optimizations.
455 // More generally this pass should be the last optimization pass that
456 // modifies branches/control flow. This pass is run after function
457 // reordering so that it can tell whether calls are forward/backward
458 // accurately.
459 Manager.registerPass(
460 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
461 opts::SimplifyConditionalTailCalls);
463 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
465 Manager.registerPass(std::make_unique<AlignerPass>());
467 // Perform reordering on data contained in one or more sections using
468 // memory profiling data.
469 Manager.registerPass(std::make_unique<ReorderData>());
471 if (BC.isAArch64()) {
472 Manager.registerPass(std::make_unique<ADRRelaxationPass>());
474 // Tighten branches according to offset differences between branch and
475 // targets. No extra instructions after this pass, otherwise we may have
476 // relocations out of range and crash during linking.
477 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
480 // This pass should always run last.*
481 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
483 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
484 // FrameOptimizer move values around and needs to update CFIs. To do this, it
485 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
486 // placed according to the final layout.
487 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
489 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
491 Manager.registerPass(
492 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
494 // Assign each function an output section.
495 Manager.registerPass(std::make_unique<AssignSections>());
497 // Patch original function entries
498 if (BC.HasRelocations)
499 Manager.registerPass(std::make_unique<PatchEntries>());
501 // This pass turns tail calls into jumps which makes them invisible to
502 // function reordering. It's unsafe to use any CFG or instruction analysis
503 // after this point.
504 Manager.registerPass(
505 std::make_unique<InstructionLowering>(PrintAfterLowering));
507 // In non-relocation mode, mark functions that do not fit into their original
508 // space as non-simple if we have to (e.g. for correct debug info update).
509 // NOTE: this pass depends on finalized code.
510 if (!BC.HasRelocations)
511 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
513 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
515 // Check for dirty state of MCSymbols caused by running calculateEmittedSize
516 // in parallel and restore them
517 Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint));
519 Manager.runPasses();
522 } // namespace bolt
523 } // namespace llvm