1 //===- Construction of pass pipelines -------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Analysis/AliasAnalysis.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/InlineAdvisor.h"
22 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
23 #include "llvm/Analysis/ProfileSummaryInfo.h"
24 #include "llvm/Analysis/ScopedNoAliasAA.h"
25 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Passes/OptimizationLevel.h"
28 #include "llvm/Passes/PassBuilder.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/PGOOptions.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
34 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
35 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
36 #include "llvm/Transforms/Coroutines/CoroEarly.h"
37 #include "llvm/Transforms/Coroutines/CoroElide.h"
38 #include "llvm/Transforms/Coroutines/CoroSplit.h"
39 #include "llvm/Transforms/IPO/AlwaysInliner.h"
40 #include "llvm/Transforms/IPO/Annotation2Metadata.h"
41 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
42 #include "llvm/Transforms/IPO/Attributor.h"
43 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
44 #include "llvm/Transforms/IPO/ConstantMerge.h"
45 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
46 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
47 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
48 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
49 #include "llvm/Transforms/IPO/FunctionAttrs.h"
50 #include "llvm/Transforms/IPO/GlobalDCE.h"
51 #include "llvm/Transforms/IPO/GlobalOpt.h"
52 #include "llvm/Transforms/IPO/GlobalSplit.h"
53 #include "llvm/Transforms/IPO/HotColdSplitting.h"
54 #include "llvm/Transforms/IPO/IROutliner.h"
55 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
56 #include "llvm/Transforms/IPO/Inliner.h"
57 #include "llvm/Transforms/IPO/LowerTypeTests.h"
58 #include "llvm/Transforms/IPO/MergeFunctions.h"
59 #include "llvm/Transforms/IPO/ModuleInliner.h"
60 #include "llvm/Transforms/IPO/OpenMPOpt.h"
61 #include "llvm/Transforms/IPO/PartialInlining.h"
62 #include "llvm/Transforms/IPO/SCCP.h"
63 #include "llvm/Transforms/IPO/SampleProfile.h"
64 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
65 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
66 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
67 #include "llvm/Transforms/InstCombine/InstCombine.h"
68 #include "llvm/Transforms/Instrumentation/CGProfile.h"
69 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
70 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
71 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
72 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
73 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
74 #include "llvm/Transforms/Scalar/ADCE.h"
75 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
76 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
77 #include "llvm/Transforms/Scalar/BDCE.h"
78 #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
79 #include "llvm/Transforms/Scalar/ConstraintElimination.h"
80 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
81 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
82 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
83 #include "llvm/Transforms/Scalar/DivRemPairs.h"
84 #include "llvm/Transforms/Scalar/EarlyCSE.h"
85 #include "llvm/Transforms/Scalar/Float2Int.h"
86 #include "llvm/Transforms/Scalar/GVN.h"
87 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
88 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
89 #include "llvm/Transforms/Scalar/JumpThreading.h"
90 #include "llvm/Transforms/Scalar/LICM.h"
91 #include "llvm/Transforms/Scalar/LoopDeletion.h"
92 #include "llvm/Transforms/Scalar/LoopDistribute.h"
93 #include "llvm/Transforms/Scalar/LoopFlatten.h"
94 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
95 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
96 #include "llvm/Transforms/Scalar/LoopInterchange.h"
97 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
98 #include "llvm/Transforms/Scalar/LoopPassManager.h"
99 #include "llvm/Transforms/Scalar/LoopRotation.h"
100 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
101 #include "llvm/Transforms/Scalar/LoopSink.h"
102 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
103 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
104 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
105 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
106 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
107 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
108 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
109 #include "llvm/Transforms/Scalar/NewGVN.h"
110 #include "llvm/Transforms/Scalar/Reassociate.h"
111 #include "llvm/Transforms/Scalar/SCCP.h"
112 #include "llvm/Transforms/Scalar/SROA.h"
113 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
114 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
115 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
116 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
117 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
118 #include "llvm/Transforms/Utils/AddDiscriminators.h"
119 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
120 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
121 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
122 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
123 #include "llvm/Transforms/Utils/Mem2Reg.h"
124 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
125 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
126 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
127 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
128 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
129 #include "llvm/Transforms/Vectorize/VectorCombine.h"
131 using namespace llvm
;
133 static cl::opt
<InliningAdvisorMode
> UseInlineAdvisor(
134 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default
), cl::Hidden
,
135 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
136 cl::values(clEnumValN(InliningAdvisorMode::Default
, "default",
137 "Heuristics-based inliner version."),
138 clEnumValN(InliningAdvisorMode::Development
, "development",
139 "Use development mode (runtime-loadable model)."),
140 clEnumValN(InliningAdvisorMode::Release
, "release",
141 "Use release mode (AOT-compiled model).")));
143 static cl::opt
<bool> EnableSyntheticCounts(
144 "enable-npm-synthetic-counts", cl::Hidden
,
145 cl::desc("Run synthetic function entry count generation "
148 /// Flag to enable inline deferral during PGO.
150 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
152 cl::desc("Enable inline deferral during PGO"));
154 static cl::opt
<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden
,
155 cl::desc("Enable memory profiler"));
157 static cl::opt
<bool> EnableModuleInliner("enable-module-inliner",
158 cl::init(false), cl::Hidden
,
159 cl::desc("Enable module inliner"));
161 static cl::opt
<bool> PerformMandatoryInliningsFirst(
162 "mandatory-inlining-first", cl::init(true), cl::Hidden
,
163 cl::desc("Perform mandatory inlinings module-wide, before performing "
166 static cl::opt
<bool> EnableO3NonTrivialUnswitching(
167 "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden
,
168 cl::desc("Enable non-trivial loop unswitching for -O3"));
170 static cl::opt
<bool> EnableEagerlyInvalidateAnalyses(
171 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden
,
172 cl::desc("Eagerly invalidate more analyses in default pipelines"));
174 static cl::opt
<bool> EnableNoRerunSimplificationPipeline(
175 "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden
,
177 "Prevent running the simplification pipeline on a function more "
178 "than once in the case that SCC mutations cause a function to be "
179 "visited multiple times as long as the function has not been changed"));
181 static cl::opt
<bool> EnableMergeFunctions(
182 "enable-merge-functions", cl::init(false), cl::Hidden
,
183 cl::desc("Enable function merging as part of the optimization pipeline"));
185 static cl::opt
<bool> EnablePostPGOLoopRotation(
186 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden
,
187 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
189 PipelineTuningOptions::PipelineTuningOptions() {
190 LoopInterleaving
= true;
191 LoopVectorization
= true;
192 SLPVectorization
= false;
193 LoopUnrolling
= true;
194 ForgetAllSCEVInLoopUnroll
= ForgetSCEVInLoopUnroll
;
195 LicmMssaOptCap
= SetLicmMssaOptCap
;
196 LicmMssaNoAccForPromotionCap
= SetLicmMssaNoAccForPromotionCap
;
197 CallGraphProfile
= true;
198 MergeFunctions
= EnableMergeFunctions
;
199 EagerlyInvalidateAnalyses
= EnableEagerlyInvalidateAnalyses
;
204 extern cl::opt
<unsigned> MaxDevirtIterations
;
205 extern cl::opt
<bool> EnableConstraintElimination
;
206 extern cl::opt
<bool> EnableFunctionSpecialization
;
207 extern cl::opt
<bool> EnableGVNHoist
;
208 extern cl::opt
<bool> EnableGVNSink
;
209 extern cl::opt
<bool> EnableHotColdSplit
;
210 extern cl::opt
<bool> EnableIROutliner
;
211 extern cl::opt
<bool> EnableOrderFileInstrumentation
;
212 extern cl::opt
<bool> EnableCHR
;
213 extern cl::opt
<bool> EnableLoopInterchange
;
214 extern cl::opt
<bool> EnableUnrollAndJam
;
215 extern cl::opt
<bool> EnableLoopFlatten
;
216 extern cl::opt
<bool> EnableDFAJumpThreading
;
217 extern cl::opt
<bool> RunNewGVN
;
218 extern cl::opt
<bool> RunPartialInlining
;
219 extern cl::opt
<bool> ExtraVectorizerPasses
;
221 extern cl::opt
<bool> FlattenedProfileUsed
;
223 extern cl::opt
<AttributorRunOption
> AttributorRun
;
224 extern cl::opt
<bool> EnableKnowledgeRetention
;
226 extern cl::opt
<bool> EnableMatrix
;
228 extern cl::opt
<bool> DisablePreInliner
;
229 extern cl::opt
<int> PreInlineThreshold
;
232 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager
&FPM
,
233 OptimizationLevel Level
) {
234 for (auto &C
: PeepholeEPCallbacks
)
238 // Helper to add AnnotationRemarksPass.
239 static void addAnnotationRemarksPass(ModulePassManager
&MPM
) {
240 MPM
.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
243 // Helper to check if the current compilation phase is preparing for LTO
244 static bool isLTOPreLink(ThinOrFullLTOPhase Phase
) {
245 return Phase
== ThinOrFullLTOPhase::ThinLTOPreLink
||
246 Phase
== ThinOrFullLTOPhase::FullLTOPreLink
;
249 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
251 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level
,
252 ThinOrFullLTOPhase Phase
) {
254 FunctionPassManager FPM
;
256 // Form SSA out of local memory accesses after breaking apart aggregates into
258 FPM
.addPass(SROAPass());
260 // Catch trivial redundancies
261 FPM
.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
263 // Hoisting of scalars and load expressions.
265 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
266 FPM
.addPass(InstCombinePass());
268 FPM
.addPass(LibCallsShrinkWrapPass());
270 invokePeepholeEPCallbacks(FPM
, Level
);
273 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
275 // Form canonically associated expression trees, and simplify the trees using
276 // basic mathematical properties. For example, this will form (nearly)
277 // minimal multiplication trees.
278 FPM
.addPass(ReassociatePass());
280 // Add the primary loop simplification pipeline.
281 // FIXME: Currently this is split into two loop pass pipelines because we run
282 // some function passes in between them. These can and should be removed
283 // and/or replaced by scheduling the loop pass equivalents in the correct
284 // positions. But those equivalent passes aren't powerful enough yet.
285 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
286 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
287 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
288 // `LoopInstSimplify`.
289 LoopPassManager LPM1
, LPM2
;
291 // Simplify the loop body. We do this initially to clean up after other loop
292 // passes run, either when iterating on a loop or on inner loops with
293 // implications on the outer loop.
294 LPM1
.addPass(LoopInstSimplifyPass());
295 LPM1
.addPass(LoopSimplifyCFGPass());
297 // Try to remove as much code from the loop header as possible,
298 // to reduce amount of IR that will have to be duplicated. However,
299 // do not perform speculative hoisting the first time as LICM
300 // will destroy metadata that may not need to be destroyed if run
301 // after loop rotation.
302 // TODO: Investigate promotion cap for O1.
303 LPM1
.addPass(LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
304 /*AllowSpeculation=*/false));
306 LPM1
.addPass(LoopRotatePass(/* Disable header duplication */ true,
307 isLTOPreLink(Phase
)));
308 // TODO: Investigate promotion cap for O1.
309 LPM1
.addPass(LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
310 /*AllowSpeculation=*/true));
311 LPM1
.addPass(SimpleLoopUnswitchPass());
312 if (EnableLoopFlatten
)
313 LPM1
.addPass(LoopFlattenPass());
315 LPM2
.addPass(LoopIdiomRecognizePass());
316 LPM2
.addPass(IndVarSimplifyPass());
318 for (auto &C
: LateLoopOptimizationsEPCallbacks
)
321 LPM2
.addPass(LoopDeletionPass());
323 if (EnableLoopInterchange
)
324 LPM2
.addPass(LoopInterchangePass());
326 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
327 // because it changes IR to makes profile annotation in back compile
328 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
329 // attributes so we need to make sure and allow the full unroll pass to pay
331 if (Phase
!= ThinOrFullLTOPhase::ThinLTOPreLink
|| !PGOOpt
||
332 PGOOpt
->Action
!= PGOOptions::SampleUse
)
333 LPM2
.addPass(LoopFullUnrollPass(Level
.getSpeedupLevel(),
334 /* OnlyWhenForced= */ !PTO
.LoopUnrolling
,
335 PTO
.ForgetAllSCEVInLoopUnroll
));
337 for (auto &C
: LoopOptimizerEndEPCallbacks
)
340 // We provide the opt remark emitter pass for LICM to use. We only need to do
341 // this once as it is immutable.
343 RequireAnalysisPass
<OptimizationRemarkEmitterAnalysis
, Function
>());
344 FPM
.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1
),
345 /*UseMemorySSA=*/true,
346 /*UseBlockFrequencyInfo=*/true));
348 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
349 FPM
.addPass(InstCombinePass());
350 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
351 // *All* loop passes must preserve it, in order to be able to use it.
352 FPM
.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2
),
353 /*UseMemorySSA=*/false,
354 /*UseBlockFrequencyInfo=*/false));
356 // Delete small array after loop unroll.
357 FPM
.addPass(SROAPass());
359 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
360 FPM
.addPass(MemCpyOptPass());
362 // Sparse conditional constant propagation.
363 // FIXME: It isn't clear why we do this *after* loop passes rather than
365 FPM
.addPass(SCCPPass());
367 // Delete dead bit computations (instcombine runs after to fold away the dead
368 // computations, and then ADCE will run later to exploit any new DCE
369 // opportunities that creates).
370 FPM
.addPass(BDCEPass());
372 // Run instcombine after redundancy and dead bit elimination to exploit
373 // opportunities opened up by them.
374 FPM
.addPass(InstCombinePass());
375 invokePeepholeEPCallbacks(FPM
, Level
);
377 FPM
.addPass(CoroElidePass());
379 for (auto &C
: ScalarOptimizerLateEPCallbacks
)
382 // Finally, do an expensive DCE pass to catch all the dead code exposed by
383 // the simplifications and basic cleanup after all the simplifications.
384 // TODO: Investigate if this is too expensive.
385 FPM
.addPass(ADCEPass());
387 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
388 FPM
.addPass(InstCombinePass());
389 invokePeepholeEPCallbacks(FPM
, Level
);
395 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level
,
396 ThinOrFullLTOPhase Phase
) {
397 assert(Level
!= OptimizationLevel::O0
&& "Must request optimizations!");
399 // The O1 pipeline has a separate pipeline creation function to simplify
400 // construction readability.
401 if (Level
.getSpeedupLevel() == 1)
402 return buildO1FunctionSimplificationPipeline(Level
, Phase
);
404 FunctionPassManager FPM
;
406 // Form SSA out of local memory accesses after breaking apart aggregates into
408 FPM
.addPass(SROAPass());
410 // Catch trivial redundancies
411 FPM
.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
412 if (EnableKnowledgeRetention
)
413 FPM
.addPass(AssumeSimplifyPass());
415 // Hoisting of scalars and load expressions.
417 FPM
.addPass(GVNHoistPass());
419 // Global value numbering based sinking.
421 FPM
.addPass(GVNSinkPass());
423 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
426 if (EnableConstraintElimination
)
427 FPM
.addPass(ConstraintEliminationPass());
429 // Speculative execution if the target has divergent branches; otherwise nop.
430 FPM
.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
432 // Optimize based on known information about branches, and cleanup afterward.
433 FPM
.addPass(JumpThreadingPass());
434 FPM
.addPass(CorrelatedValuePropagationPass());
437 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
438 FPM
.addPass(InstCombinePass());
439 if (Level
== OptimizationLevel::O3
)
440 FPM
.addPass(AggressiveInstCombinePass());
442 if (!Level
.isOptimizingForSize())
443 FPM
.addPass(LibCallsShrinkWrapPass());
445 invokePeepholeEPCallbacks(FPM
, Level
);
447 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
448 // using the size value profile. Don't perform this when optimizing for size.
449 if (PGOOpt
&& PGOOpt
->Action
== PGOOptions::IRUse
&&
450 !Level
.isOptimizingForSize())
451 FPM
.addPass(PGOMemOPSizeOpt());
453 FPM
.addPass(TailCallElimPass());
455 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
457 // Form canonically associated expression trees, and simplify the trees using
458 // basic mathematical properties. For example, this will form (nearly)
459 // minimal multiplication trees.
460 FPM
.addPass(ReassociatePass());
462 // Add the primary loop simplification pipeline.
463 // FIXME: Currently this is split into two loop pass pipelines because we run
464 // some function passes in between them. These can and should be removed
465 // and/or replaced by scheduling the loop pass equivalents in the correct
466 // positions. But those equivalent passes aren't powerful enough yet.
467 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
468 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
469 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
470 // `LoopInstSimplify`.
471 LoopPassManager LPM1
, LPM2
;
473 // Simplify the loop body. We do this initially to clean up after other loop
474 // passes run, either when iterating on a loop or on inner loops with
475 // implications on the outer loop.
476 LPM1
.addPass(LoopInstSimplifyPass());
477 LPM1
.addPass(LoopSimplifyCFGPass());
479 // Try to remove as much code from the loop header as possible,
480 // to reduce amount of IR that will have to be duplicated. However,
481 // do not perform speculative hoisting the first time as LICM
482 // will destroy metadata that may not need to be destroyed if run
483 // after loop rotation.
484 // TODO: Investigate promotion cap for O1.
485 LPM1
.addPass(LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
486 /*AllowSpeculation=*/false));
488 // Disable header duplication in loop rotation at -Oz.
490 LoopRotatePass(Level
!= OptimizationLevel::Oz
, isLTOPreLink(Phase
)));
491 // TODO: Investigate promotion cap for O1.
492 LPM1
.addPass(LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
493 /*AllowSpeculation=*/true));
495 SimpleLoopUnswitchPass(/* NonTrivial */ Level
== OptimizationLevel::O3
&&
496 EnableO3NonTrivialUnswitching
));
497 if (EnableLoopFlatten
)
498 LPM1
.addPass(LoopFlattenPass());
500 LPM2
.addPass(LoopIdiomRecognizePass());
501 LPM2
.addPass(IndVarSimplifyPass());
503 for (auto &C
: LateLoopOptimizationsEPCallbacks
)
506 LPM2
.addPass(LoopDeletionPass());
508 if (EnableLoopInterchange
)
509 LPM2
.addPass(LoopInterchangePass());
511 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
512 // because it changes IR to makes profile annotation in back compile
513 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
514 // attributes so we need to make sure and allow the full unroll pass to pay
516 if (Phase
!= ThinOrFullLTOPhase::ThinLTOPreLink
|| !PGOOpt
||
517 PGOOpt
->Action
!= PGOOptions::SampleUse
)
518 LPM2
.addPass(LoopFullUnrollPass(Level
.getSpeedupLevel(),
519 /* OnlyWhenForced= */ !PTO
.LoopUnrolling
,
520 PTO
.ForgetAllSCEVInLoopUnroll
));
522 for (auto &C
: LoopOptimizerEndEPCallbacks
)
525 // We provide the opt remark emitter pass for LICM to use. We only need to do
526 // this once as it is immutable.
528 RequireAnalysisPass
<OptimizationRemarkEmitterAnalysis
, Function
>());
529 FPM
.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1
),
530 /*UseMemorySSA=*/true,
531 /*UseBlockFrequencyInfo=*/true));
533 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
534 FPM
.addPass(InstCombinePass());
535 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
536 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
537 // *All* loop passes must preserve it, in order to be able to use it.
538 FPM
.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2
),
539 /*UseMemorySSA=*/false,
540 /*UseBlockFrequencyInfo=*/false));
542 // Delete small array after loop unroll.
543 FPM
.addPass(SROAPass());
545 // The matrix extension can introduce large vector operations early, which can
546 // benefit from running vector-combine early on.
548 FPM
.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
550 // Eliminate redundancies.
551 FPM
.addPass(MergedLoadStoreMotionPass());
553 FPM
.addPass(NewGVNPass());
555 FPM
.addPass(GVNPass());
557 // Sparse conditional constant propagation.
558 // FIXME: It isn't clear why we do this *after* loop passes rather than
560 FPM
.addPass(SCCPPass());
562 // Delete dead bit computations (instcombine runs after to fold away the dead
563 // computations, and then ADCE will run later to exploit any new DCE
564 // opportunities that creates).
565 FPM
.addPass(BDCEPass());
567 // Run instcombine after redundancy and dead bit elimination to exploit
568 // opportunities opened up by them.
569 FPM
.addPass(InstCombinePass());
570 invokePeepholeEPCallbacks(FPM
, Level
);
572 // Re-consider control flow based optimizations after redundancy elimination,
574 if (EnableDFAJumpThreading
&& Level
.getSizeLevel() == 0)
575 FPM
.addPass(DFAJumpThreadingPass());
577 FPM
.addPass(JumpThreadingPass());
578 FPM
.addPass(CorrelatedValuePropagationPass());
580 // Finally, do an expensive DCE pass to catch all the dead code exposed by
581 // the simplifications and basic cleanup after all the simplifications.
582 // TODO: Investigate if this is too expensive.
583 FPM
.addPass(ADCEPass());
585 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
586 FPM
.addPass(MemCpyOptPass());
588 FPM
.addPass(DSEPass());
589 FPM
.addPass(createFunctionToLoopPassAdaptor(
590 LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
591 /*AllowSpeculation=*/true),
592 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
594 FPM
.addPass(CoroElidePass());
596 for (auto &C
: ScalarOptimizerLateEPCallbacks
)
599 FPM
.addPass(SimplifyCFGPass(SimplifyCFGOptions()
600 .convertSwitchRangeToICmp(true)
601 .hoistCommonInsts(true)
602 .sinkCommonInsts(true)));
603 FPM
.addPass(InstCombinePass());
604 invokePeepholeEPCallbacks(FPM
, Level
);
606 if (EnableCHR
&& Level
== OptimizationLevel::O3
&& PGOOpt
&&
607 (PGOOpt
->Action
== PGOOptions::IRUse
||
608 PGOOpt
->Action
== PGOOptions::SampleUse
))
609 FPM
.addPass(ControlHeightReductionPass());
614 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager
&MPM
) {
615 MPM
.addPass(CanonicalizeAliasesPass());
616 MPM
.addPass(NameAnonGlobalPass());
619 void PassBuilder::addPGOInstrPasses(ModulePassManager
&MPM
,
620 OptimizationLevel Level
, bool RunProfileGen
,
621 bool IsCS
, std::string ProfileFile
,
622 std::string ProfileRemappingFile
,
623 ThinOrFullLTOPhase LTOPhase
) {
624 assert(Level
!= OptimizationLevel::O0
&& "Not expecting O0 here!");
625 if (!IsCS
&& !DisablePreInliner
) {
628 IP
.DefaultThreshold
= PreInlineThreshold
;
630 // FIXME: The hint threshold has the same value used by the regular inliner
631 // when not optimzing for size. This should probably be lowered after
632 // performance testing.
633 // FIXME: this comment is cargo culted from the old pass manager, revisit).
634 IP
.HintThreshold
= Level
.isOptimizingForSize() ? PreInlineThreshold
: 325;
635 ModuleInlinerWrapperPass
MIWP(
636 IP
, /* MandatoryFirst */ true,
637 InlineContext
{LTOPhase
, InlinePass::EarlyInliner
});
638 CGSCCPassManager
&CGPipeline
= MIWP
.getPM();
640 FunctionPassManager FPM
;
641 FPM
.addPass(SROAPass());
642 FPM
.addPass(EarlyCSEPass()); // Catch trivial redundancies.
643 FPM
.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
644 true))); // Merge & remove basic blocks.
645 FPM
.addPass(InstCombinePass()); // Combine silly sequences.
646 invokePeepholeEPCallbacks(FPM
, Level
);
648 CGPipeline
.addPass(createCGSCCToFunctionPassAdaptor(
649 std::move(FPM
), PTO
.EagerlyInvalidateAnalyses
));
651 MPM
.addPass(std::move(MIWP
));
653 // Delete anything that is now dead to make sure that we don't instrument
654 // dead code. Instrumentation can end up keeping dead code around and
655 // dramatically increase code size.
656 MPM
.addPass(GlobalDCEPass());
659 if (!RunProfileGen
) {
660 assert(!ProfileFile
.empty() && "Profile use expecting a profile file!");
661 MPM
.addPass(PGOInstrumentationUse(ProfileFile
, ProfileRemappingFile
, IsCS
));
662 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
663 // RequireAnalysisPass for PSI before subsequent non-module passes.
664 MPM
.addPass(RequireAnalysisPass
<ProfileSummaryAnalysis
, Module
>());
668 // Perform PGO instrumentation.
669 MPM
.addPass(PGOInstrumentationGen(IsCS
));
671 if (EnablePostPGOLoopRotation
) {
672 // Disable header duplication in loop rotation at -Oz.
673 MPM
.addPass(createModuleToFunctionPassAdaptor(
674 createFunctionToLoopPassAdaptor(
675 LoopRotatePass(Level
!= OptimizationLevel::Oz
),
676 /*UseMemorySSA=*/false,
677 /*UseBlockFrequencyInfo=*/false),
678 PTO
.EagerlyInvalidateAnalyses
));
681 // Add the profile lowering pass.
682 InstrProfOptions Options
;
683 if (!ProfileFile
.empty())
684 Options
.InstrProfileOutput
= ProfileFile
;
685 // Do counter promotion at Level greater than O0.
686 Options
.DoCounterPromotion
= true;
687 Options
.UseBFIInPromotion
= IsCS
;
688 MPM
.addPass(InstrProfiling(Options
, IsCS
));
691 void PassBuilder::addPGOInstrPassesForO0(ModulePassManager
&MPM
,
692 bool RunProfileGen
, bool IsCS
,
693 std::string ProfileFile
,
694 std::string ProfileRemappingFile
) {
695 if (!RunProfileGen
) {
696 assert(!ProfileFile
.empty() && "Profile use expecting a profile file!");
697 MPM
.addPass(PGOInstrumentationUse(ProfileFile
, ProfileRemappingFile
, IsCS
));
698 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
699 // RequireAnalysisPass for PSI before subsequent non-module passes.
700 MPM
.addPass(RequireAnalysisPass
<ProfileSummaryAnalysis
, Module
>());
704 // Perform PGO instrumentation.
705 MPM
.addPass(PGOInstrumentationGen(IsCS
));
706 // Add the profile lowering pass.
707 InstrProfOptions Options
;
708 if (!ProfileFile
.empty())
709 Options
.InstrProfileOutput
= ProfileFile
;
710 // Do not do counter promotion at O0.
711 Options
.DoCounterPromotion
= false;
712 Options
.UseBFIInPromotion
= IsCS
;
713 MPM
.addPass(InstrProfiling(Options
, IsCS
));
716 static InlineParams
getInlineParamsFromOptLevel(OptimizationLevel Level
) {
717 return getInlineParams(Level
.getSpeedupLevel(), Level
.getSizeLevel());
720 ModuleInlinerWrapperPass
721 PassBuilder::buildInlinerPipeline(OptimizationLevel Level
,
722 ThinOrFullLTOPhase Phase
) {
723 InlineParams IP
= getInlineParamsFromOptLevel(Level
);
724 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
725 // disable hot callsite inline (as much as possible [1]) because it makes
726 // profile annotation in the backend inaccurate.
728 // [1] Note the cost of a function could be below zero due to erased
729 // prologue / epilogue.
730 if (Phase
== ThinOrFullLTOPhase::ThinLTOPreLink
&& PGOOpt
&&
731 PGOOpt
->Action
== PGOOptions::SampleUse
)
732 IP
.HotCallSiteThreshold
= 0;
735 IP
.EnableDeferral
= EnablePGOInlineDeferral
;
737 ModuleInlinerWrapperPass
MIWP(
738 IP
, PerformMandatoryInliningsFirst
,
739 InlineContext
{Phase
, InlinePass::CGSCCInliner
},
740 UseInlineAdvisor
, MaxDevirtIterations
);
742 // Require the GlobalsAA analysis for the module so we can query it within
743 // the CGSCC pipeline.
744 MIWP
.addModulePass(RequireAnalysisPass
<GlobalsAA
, Module
>());
745 // Invalidate AAManager so it can be recreated and pick up the newly available
748 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass
<AAManager
>()));
750 // Require the ProfileSummaryAnalysis for the module so we can query it within
752 MIWP
.addModulePass(RequireAnalysisPass
<ProfileSummaryAnalysis
, Module
>());
754 // Now begin the main postorder CGSCC pipeline.
755 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
756 // manager and trying to emulate its precise behavior. Much of this doesn't
757 // make a lot of sense and we should revisit the core CGSCC structure.
758 CGSCCPassManager
&MainCGPipeline
= MIWP
.getPM();
760 // Note: historically, the PruneEH pass was run first to deduce nounwind and
761 // generally clean up exception handling overhead. It isn't clear this is
762 // valuable as the inliner doesn't currently care whether it is inlining an
765 if (AttributorRun
& AttributorRunOption::CGSCC
)
766 MainCGPipeline
.addPass(AttributorCGSCCPass());
768 // Now deduce any function attributes based in the current code.
769 MainCGPipeline
.addPass(PostOrderFunctionAttrsPass());
771 // When at O3 add argument promotion to the pass pipeline.
772 // FIXME: It isn't at all clear why this should be limited to O3.
773 if (Level
== OptimizationLevel::O3
)
774 MainCGPipeline
.addPass(ArgumentPromotionPass());
776 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
777 // there are no OpenMP runtime calls present in the module.
778 if (Level
== OptimizationLevel::O2
|| Level
== OptimizationLevel::O3
)
779 MainCGPipeline
.addPass(OpenMPOptCGSCCPass());
781 for (auto &C
: CGSCCOptimizerLateEPCallbacks
)
782 C(MainCGPipeline
, Level
);
784 // Lastly, add the core function simplification pipeline nested inside the
786 MainCGPipeline
.addPass(createCGSCCToFunctionPassAdaptor(
787 buildFunctionSimplificationPipeline(Level
, Phase
),
788 PTO
.EagerlyInvalidateAnalyses
, EnableNoRerunSimplificationPipeline
));
790 MainCGPipeline
.addPass(CoroSplitPass(Level
!= OptimizationLevel::O0
));
792 if (EnableNoRerunSimplificationPipeline
)
793 MIWP
.addLateModulePass(createModuleToFunctionPassAdaptor(
794 InvalidateAnalysisPass
<ShouldNotRunFunctionPassesAnalysis
>()));
800 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level
,
801 ThinOrFullLTOPhase Phase
) {
802 ModulePassManager MPM
;
804 InlineParams IP
= getInlineParamsFromOptLevel(Level
);
805 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
806 // disable hot callsite inline (as much as possible [1]) because it makes
807 // profile annotation in the backend inaccurate.
809 // [1] Note the cost of a function could be below zero due to erased
810 // prologue / epilogue.
811 if (Phase
== ThinOrFullLTOPhase::ThinLTOPreLink
&& PGOOpt
&&
812 PGOOpt
->Action
== PGOOptions::SampleUse
)
813 IP
.HotCallSiteThreshold
= 0;
816 IP
.EnableDeferral
= EnablePGOInlineDeferral
;
818 // The inline deferral logic is used to avoid losing some
819 // inlining chance in future. It is helpful in SCC inliner, in which
820 // inlining is processed in bottom-up order.
821 // While in module inliner, the inlining order is a priority-based order
822 // by default. The inline deferral is unnecessary there. So we disable the
823 // inline deferral logic in module inliner.
824 IP
.EnableDeferral
= false;
826 MPM
.addPass(ModuleInlinerPass(IP
, UseInlineAdvisor
, Phase
));
828 MPM
.addPass(createModuleToFunctionPassAdaptor(
829 buildFunctionSimplificationPipeline(Level
, Phase
),
830 PTO
.EagerlyInvalidateAnalyses
));
832 MPM
.addPass(createModuleToPostOrderCGSCCPassAdaptor(
833 CoroSplitPass(Level
!= OptimizationLevel::O0
)));
839 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level
,
840 ThinOrFullLTOPhase Phase
) {
841 ModulePassManager MPM
;
843 // Place pseudo probe instrumentation as the first pass of the pipeline to
844 // minimize the impact of optimization changes.
845 if (PGOOpt
&& PGOOpt
->PseudoProbeForProfiling
&&
846 Phase
!= ThinOrFullLTOPhase::ThinLTOPostLink
)
847 MPM
.addPass(SampleProfileProbePass(TM
));
849 bool HasSampleProfile
= PGOOpt
&& (PGOOpt
->Action
== PGOOptions::SampleUse
);
851 // In ThinLTO mode, when flattened profile is used, all the available
852 // profile information will be annotated in PreLink phase so there is
853 // no need to load the profile again in PostLink.
854 bool LoadSampleProfile
=
856 !(FlattenedProfileUsed
&& Phase
== ThinOrFullLTOPhase::ThinLTOPostLink
);
858 // During the ThinLTO backend phase we perform early indirect call promotion
859 // here, before globalopt. Otherwise imported available_externally functions
860 // look unreferenced and are removed. If we are going to load the sample
861 // profile then defer until later.
862 // TODO: See if we can move later and consolidate with the location where
863 // we perform ICP when we are loading a sample profile.
864 // TODO: We pass HasSampleProfile (whether there was a sample profile file
865 // passed to the compile) to the SamplePGO flag of ICP. This is used to
866 // determine whether the new direct calls are annotated with prof metadata.
867 // Ideally this should be determined from whether the IR is annotated with
868 // sample profile, and not whether the a sample profile was provided on the
869 // command line. E.g. for flattened profiles where we will not be reloading
870 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
871 // provide the sample profile file.
872 if (Phase
== ThinOrFullLTOPhase::ThinLTOPostLink
&& !LoadSampleProfile
)
873 MPM
.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile
));
875 // Do basic inference of function attributes from known properties of system
876 // libraries and other oracles.
877 MPM
.addPass(InferFunctionAttrsPass());
878 MPM
.addPass(CoroEarlyPass());
880 // Create an early function pass manager to cleanup the output of the
882 FunctionPassManager EarlyFPM
;
883 // Lower llvm.expect to metadata before attempting transforms.
884 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
885 EarlyFPM
.addPass(LowerExpectIntrinsicPass());
886 EarlyFPM
.addPass(SimplifyCFGPass());
887 EarlyFPM
.addPass(SROAPass());
888 EarlyFPM
.addPass(EarlyCSEPass());
889 if (Level
== OptimizationLevel::O3
)
890 EarlyFPM
.addPass(CallSiteSplittingPass());
892 // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
893 // to convert bitcast to direct calls so that they can be inlined during the
894 // profile annotation prepration step.
895 // More details about SamplePGO design can be found in:
896 // https://research.google.com/pubs/pub45290.html
897 // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
898 if (LoadSampleProfile
)
899 EarlyFPM
.addPass(InstCombinePass());
900 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM
),
901 PTO
.EagerlyInvalidateAnalyses
));
903 if (LoadSampleProfile
) {
904 // Annotate sample profile right after early FPM to ensure freshness of
906 MPM
.addPass(SampleProfileLoaderPass(PGOOpt
->ProfileFile
,
907 PGOOpt
->ProfileRemappingFile
, Phase
));
908 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
909 // RequireAnalysisPass for PSI before subsequent non-module passes.
910 MPM
.addPass(RequireAnalysisPass
<ProfileSummaryAnalysis
, Module
>());
911 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
912 // for the profile annotation to be accurate in the LTO backend.
913 if (Phase
!= ThinOrFullLTOPhase::ThinLTOPreLink
&&
914 Phase
!= ThinOrFullLTOPhase::FullLTOPreLink
)
915 // We perform early indirect call promotion here, before globalopt.
916 // This is important for the ThinLTO backend phase because otherwise
917 // imported available_externally functions look unreferenced and are
920 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
923 // Try to perform OpenMP specific optimizations on the module. This is a
924 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
925 if (Level
!= OptimizationLevel::O0
)
926 MPM
.addPass(OpenMPOptPass());
928 if (AttributorRun
& AttributorRunOption::MODULE
)
929 MPM
.addPass(AttributorPass());
931 // Lower type metadata and the type.test intrinsic in the ThinLTO
932 // post link pipeline after ICP. This is to enable usage of the type
933 // tests in ICP sequences.
934 if (Phase
== ThinOrFullLTOPhase::ThinLTOPostLink
)
935 MPM
.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
937 for (auto &C
: PipelineEarlySimplificationEPCallbacks
)
940 // Specialize functions with IPSCCP.
941 if (EnableFunctionSpecialization
&& Level
== OptimizationLevel::O3
)
942 MPM
.addPass(FunctionSpecializationPass());
944 // Interprocedural constant propagation now that basic cleanup has occurred
945 // and prior to optimizing globals.
946 // FIXME: This position in the pipeline hasn't been carefully considered in
947 // years, it should be re-analyzed.
948 MPM
.addPass(IPSCCPPass());
950 // Attach metadata to indirect call sites indicating the set of functions
951 // they may target at run-time. This should follow IPSCCP.
952 MPM
.addPass(CalledValuePropagationPass());
954 // Optimize globals to try and fold them into constants.
955 MPM
.addPass(GlobalOptPass());
957 // Promote any localized globals to SSA registers.
958 // FIXME: Should this instead by a run of SROA?
959 // FIXME: We should probably run instcombine and simplifycfg afterward to
960 // delete control flows that are dead once globals have been folded to
962 MPM
.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
964 // Remove any dead arguments exposed by cleanups and constant folding
966 MPM
.addPass(DeadArgumentEliminationPass());
968 // Create a small function pass pipeline to cleanup after all the global
970 FunctionPassManager GlobalCleanupPM
;
971 GlobalCleanupPM
.addPass(InstCombinePass());
972 invokePeepholeEPCallbacks(GlobalCleanupPM
, Level
);
974 GlobalCleanupPM
.addPass(
975 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
976 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM
),
977 PTO
.EagerlyInvalidateAnalyses
));
979 // Add all the requested passes for instrumentation PGO, if requested.
980 if (PGOOpt
&& Phase
!= ThinOrFullLTOPhase::ThinLTOPostLink
&&
981 (PGOOpt
->Action
== PGOOptions::IRInstr
||
982 PGOOpt
->Action
== PGOOptions::IRUse
)) {
983 addPGOInstrPasses(MPM
, Level
,
984 /* RunProfileGen */ PGOOpt
->Action
== PGOOptions::IRInstr
,
985 /* IsCS */ false, PGOOpt
->ProfileFile
,
986 PGOOpt
->ProfileRemappingFile
, Phase
);
987 MPM
.addPass(PGOIndirectCallPromotion(false, false));
989 if (PGOOpt
&& Phase
!= ThinOrFullLTOPhase::ThinLTOPostLink
&&
990 PGOOpt
->CSAction
== PGOOptions::CSIRInstr
)
991 MPM
.addPass(PGOInstrumentationGenCreateVar(PGOOpt
->CSProfileGenFile
));
993 // Synthesize function entry counts for non-PGO compilation.
994 if (EnableSyntheticCounts
&& !PGOOpt
)
995 MPM
.addPass(SyntheticCountsPropagation());
997 if (EnableModuleInliner
)
998 MPM
.addPass(buildModuleInlinerPipeline(Level
, Phase
));
1000 MPM
.addPass(buildInlinerPipeline(Level
, Phase
));
1002 MPM
.addPass(CoroCleanupPass());
1004 if (EnableMemProfiler
&& Phase
!= ThinOrFullLTOPhase::ThinLTOPreLink
) {
1005 MPM
.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
1006 MPM
.addPass(ModuleMemProfilerPass());
1012 /// TODO: Should LTO cause any differences to this set of passes?
1013 void PassBuilder::addVectorPasses(OptimizationLevel Level
,
1014 FunctionPassManager
&FPM
, bool IsFullLTO
) {
1015 FPM
.addPass(LoopVectorizePass(
1016 LoopVectorizeOptions(!PTO
.LoopInterleaving
, !PTO
.LoopVectorization
)));
1019 // The vectorizer may have significantly shortened a loop body; unroll
1020 // again. Unroll small loops to hide loop backedge latency and saturate any
1021 // parallel execution resources of an out-of-order processor. We also then
1022 // need to clean up redundancies and loop invariant code.
1023 // FIXME: It would be really good to use a loop-integrated instruction
1024 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1025 // across the loop nests.
1026 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1027 if (EnableUnrollAndJam
&& PTO
.LoopUnrolling
)
1028 FPM
.addPass(createFunctionToLoopPassAdaptor(
1029 LoopUnrollAndJamPass(Level
.getSpeedupLevel())));
1030 FPM
.addPass(LoopUnrollPass(LoopUnrollOptions(
1031 Level
.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO
.LoopUnrolling
,
1032 PTO
.ForgetAllSCEVInLoopUnroll
)));
1033 FPM
.addPass(WarnMissedTransformationsPass());
1037 // Eliminate loads by forwarding stores from the previous iteration to loads
1038 // of the current iteration.
1039 FPM
.addPass(LoopLoadEliminationPass());
1041 // Cleanup after the loop optimization passes.
1042 FPM
.addPass(InstCombinePass());
1044 if (Level
.getSpeedupLevel() > 1 && ExtraVectorizerPasses
) {
1045 ExtraVectorPassManager ExtraPasses
;
1046 // At higher optimization levels, try to clean up any runtime overlap and
1047 // alignment checks inserted by the vectorizer. We want to track correlated
1048 // runtime checks for two inner loops in the same outer loop, fold any
1049 // common computations, hoist loop-invariant aspects out of any outer loop,
1050 // and unswitch the runtime checks if possible. Once hoisted, we may have
1051 // dead (or speculatable) control flows or more combining opportunities.
1052 ExtraPasses
.addPass(EarlyCSEPass());
1053 ExtraPasses
.addPass(CorrelatedValuePropagationPass());
1054 ExtraPasses
.addPass(InstCombinePass());
1055 LoopPassManager LPM
;
1056 LPM
.addPass(LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
1057 /*AllowSpeculation=*/true));
1058 LPM
.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level
==
1059 OptimizationLevel::O3
));
1060 ExtraPasses
.addPass(
1061 RequireAnalysisPass
<OptimizationRemarkEmitterAnalysis
, Function
>());
1062 ExtraPasses
.addPass(
1063 createFunctionToLoopPassAdaptor(std::move(LPM
), /*UseMemorySSA=*/true,
1064 /*UseBlockFrequencyInfo=*/true));
1065 ExtraPasses
.addPass(
1066 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1067 ExtraPasses
.addPass(InstCombinePass());
1068 FPM
.addPass(std::move(ExtraPasses
));
1071 // Now that we've formed fast to execute loop structures, we do further
1072 // optimizations. These are run afterward as they might block doing complex
1073 // analyses and transforms such as what are needed for loop vectorization.
1075 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1076 // GVN, loop transforms, and others have already run, so it's now better to
1077 // convert to more optimized IR using more aggressive simplify CFG options.
1078 // The extra sinking transform can create larger basic blocks, so do this
1079 // before SLP vectorization.
1080 FPM
.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1081 .forwardSwitchCondToPhi(true)
1082 .convertSwitchRangeToICmp(true)
1083 .convertSwitchToLookupTable(true)
1084 .needCanonicalLoops(false)
1085 .hoistCommonInsts(true)
1086 .sinkCommonInsts(true)));
1089 FPM
.addPass(SCCPPass());
1090 FPM
.addPass(InstCombinePass());
1091 FPM
.addPass(BDCEPass());
1094 // Optimize parallel scalar instruction chains into SIMD instructions.
1095 if (PTO
.SLPVectorization
) {
1096 FPM
.addPass(SLPVectorizerPass());
1097 if (Level
.getSpeedupLevel() > 1 && ExtraVectorizerPasses
) {
1098 FPM
.addPass(EarlyCSEPass());
1101 // Enhance/cleanup vector code.
1102 FPM
.addPass(VectorCombinePass());
1105 FPM
.addPass(InstCombinePass());
1106 // Unroll small loops to hide loop backedge latency and saturate any
1107 // parallel execution resources of an out-of-order processor. We also then
1108 // need to clean up redundancies and loop invariant code.
1109 // FIXME: It would be really good to use a loop-integrated instruction
1110 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1111 // across the loop nests.
1112 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1113 if (EnableUnrollAndJam
&& PTO
.LoopUnrolling
) {
1114 FPM
.addPass(createFunctionToLoopPassAdaptor(
1115 LoopUnrollAndJamPass(Level
.getSpeedupLevel())));
1117 FPM
.addPass(LoopUnrollPass(LoopUnrollOptions(
1118 Level
.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO
.LoopUnrolling
,
1119 PTO
.ForgetAllSCEVInLoopUnroll
)));
1120 FPM
.addPass(WarnMissedTransformationsPass());
1121 FPM
.addPass(InstCombinePass());
1123 RequireAnalysisPass
<OptimizationRemarkEmitterAnalysis
, Function
>());
1124 FPM
.addPass(createFunctionToLoopPassAdaptor(
1125 LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
1126 /*AllowSpeculation=*/true),
1127 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1130 // Now that we've vectorized and unrolled loops, we may have more refined
1131 // alignment information, try to re-derive it here.
1132 FPM
.addPass(AlignmentFromAssumptionsPass());
1135 FPM
.addPass(InstCombinePass());
1139 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level
,
1140 ThinOrFullLTOPhase LTOPhase
) {
1141 const bool LTOPreLink
= (LTOPhase
== ThinOrFullLTOPhase::ThinLTOPreLink
||
1142 LTOPhase
== ThinOrFullLTOPhase::FullLTOPreLink
);
1143 ModulePassManager MPM
;
1145 // Optimize globals now that the module is fully simplified.
1146 MPM
.addPass(GlobalOptPass());
1147 MPM
.addPass(GlobalDCEPass());
1149 // Run partial inlining pass to partially inline functions that have
1151 if (RunPartialInlining
)
1152 MPM
.addPass(PartialInlinerPass());
1154 // Remove avail extern fns and globals definitions since we aren't compiling
1155 // an object file for later LTO. For LTO we want to preserve these so they
1156 // are eligible for inlining at link-time. Note if they are unreferenced they
1157 // will be removed by GlobalDCE later, so this only impacts referenced
1158 // available externally globals. Eventually they will be suppressed during
1159 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1160 // may make globals referenced by available external functions dead and saves
1161 // running remaining passes on the eliminated functions. These should be
1162 // preserved during prelinking for link-time inlining decisions.
1164 MPM
.addPass(EliminateAvailableExternallyPass());
1166 if (EnableOrderFileInstrumentation
)
1167 MPM
.addPass(InstrOrderFilePass());
1169 // Do RPO function attribute inference across the module to forward-propagate
1170 // attributes where applicable.
1171 // FIXME: Is this really an optimization rather than a canonicalization?
1172 MPM
.addPass(ReversePostOrderFunctionAttrsPass());
1174 // Do a post inline PGO instrumentation and use pass. This is a context
1175 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1176 // cross-module inline has not been done yet. The context sensitive
1177 // instrumentation is after all the inlines are done.
1178 if (!LTOPreLink
&& PGOOpt
) {
1179 if (PGOOpt
->CSAction
== PGOOptions::CSIRInstr
)
1180 addPGOInstrPasses(MPM
, Level
, /* RunProfileGen */ true,
1181 /* IsCS */ true, PGOOpt
->CSProfileGenFile
,
1182 PGOOpt
->ProfileRemappingFile
, LTOPhase
);
1183 else if (PGOOpt
->CSAction
== PGOOptions::CSIRUse
)
1184 addPGOInstrPasses(MPM
, Level
, /* RunProfileGen */ false,
1185 /* IsCS */ true, PGOOpt
->ProfileFile
,
1186 PGOOpt
->ProfileRemappingFile
, LTOPhase
);
1189 // Re-compute GlobalsAA here prior to function passes. This is particularly
1190 // useful as the above will have inlined, DCE'ed, and function-attr
1191 // propagated everything. We should at this point have a reasonably minimal
1192 // and richly annotated call graph. By computing aliasing and mod/ref
1193 // information for all local globals here, the late loop passes and notably
1194 // the vectorizer will be able to use them to help recognize vectorizable
1195 // memory operations.
1196 MPM
.addPass(RecomputeGlobalsAAPass());
1198 for (auto &C
: OptimizerEarlyEPCallbacks
)
1201 FunctionPassManager OptimizePM
;
1202 OptimizePM
.addPass(Float2IntPass());
1203 OptimizePM
.addPass(LowerConstantIntrinsicsPass());
1206 OptimizePM
.addPass(LowerMatrixIntrinsicsPass());
1207 OptimizePM
.addPass(EarlyCSEPass());
1210 // FIXME: We need to run some loop optimizations to re-rotate loops after
1211 // simplifycfg and others undo their rotation.
1213 // Optimize the loop execution. These passes operate on entire loop nests
1214 // rather than on each loop in an inside-out manner, and so they are actually
1217 for (auto &C
: VectorizerStartEPCallbacks
)
1218 C(OptimizePM
, Level
);
1220 LoopPassManager LPM
;
1221 // First rotate loops that may have been un-rotated by prior passes.
1222 // Disable header duplication at -Oz.
1223 LPM
.addPass(LoopRotatePass(Level
!= OptimizationLevel::Oz
, LTOPreLink
));
1224 // Some loops may have become dead by now. Try to delete them.
1225 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1226 // this may need to be revisited once we run GVN before loop deletion
1227 // in the simplification pipeline.
1228 LPM
.addPass(LoopDeletionPass());
1229 OptimizePM
.addPass(createFunctionToLoopPassAdaptor(
1230 std::move(LPM
), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1232 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1233 // into separate loop that would otherwise inhibit vectorization. This is
1234 // currently only performed for loops marked with the metadata
1235 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1236 OptimizePM
.addPass(LoopDistributePass());
1238 // Populates the VFABI attribute with the scalar-to-vector mappings
1239 // from the TargetLibraryInfo.
1240 OptimizePM
.addPass(InjectTLIMappings());
1242 addVectorPasses(Level
, OptimizePM
, /* IsFullLTO */ false);
1244 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1245 // canonicalization pass that enables other optimizations. As a result,
1246 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1247 // result too early.
1248 OptimizePM
.addPass(LoopSinkPass());
1250 // And finally clean up LCSSA form before generating code.
1251 OptimizePM
.addPass(InstSimplifyPass());
1253 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1254 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1255 // flattening of blocks.
1256 OptimizePM
.addPass(DivRemPairsPass());
1258 // Try to annotate calls that were created during optimization.
1259 OptimizePM
.addPass(TailCallElimPass());
1261 // LoopSink (and other loop passes since the last simplifyCFG) might have
1262 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1264 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1266 // Add the core optimizing pipeline.
1267 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM
),
1268 PTO
.EagerlyInvalidateAnalyses
));
1270 for (auto &C
: OptimizerLastEPCallbacks
)
1273 // Split out cold code. Splitting is done late to avoid hiding context from
1274 // other optimizations and inadvertently regressing performance. The tradeoff
1275 // is that this has a higher code size cost than splitting early.
1276 if (EnableHotColdSplit
&& !LTOPreLink
)
1277 MPM
.addPass(HotColdSplittingPass());
1279 // Search the code for similar regions of code. If enough similar regions can
1280 // be found where extracting the regions into their own function will decrease
1281 // the size of the program, we extract the regions, a deduplicate the
1282 // structurally similar regions.
1283 if (EnableIROutliner
)
1284 MPM
.addPass(IROutlinerPass());
1286 // Merge functions if requested.
1287 if (PTO
.MergeFunctions
)
1288 MPM
.addPass(MergeFunctionsPass());
1290 // Now we need to do some global optimization transforms.
1291 // FIXME: It would seem like these should come first in the optimization
1292 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1294 MPM
.addPass(GlobalDCEPass());
1295 MPM
.addPass(ConstantMergePass());
1297 if (PTO
.CallGraphProfile
&& !LTOPreLink
)
1298 MPM
.addPass(CGProfilePass());
1300 // TODO: Relative look table converter pass caused an issue when full lto is
1301 // enabled. See https://reviews.llvm.org/D94355 for more details.
1302 // Until the issue fixed, disable this pass during pre-linking phase.
1304 MPM
.addPass(RelLookupTableConverterPass());
1310 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level
,
1312 assert(Level
!= OptimizationLevel::O0
&&
1313 "Must request optimizations for the default pipeline!");
1315 ModulePassManager MPM
;
1317 // Convert @llvm.global.annotations to !annotation metadata.
1318 MPM
.addPass(Annotation2MetadataPass());
1320 // Force any function attributes we want the rest of the pipeline to observe.
1321 MPM
.addPass(ForceFunctionAttrsPass());
1323 // Apply module pipeline start EP callback.
1324 for (auto &C
: PipelineStartEPCallbacks
)
1327 if (PGOOpt
&& PGOOpt
->DebugInfoForProfiling
)
1328 MPM
.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1330 const ThinOrFullLTOPhase LTOPhase
= LTOPreLink
1331 ? ThinOrFullLTOPhase::FullLTOPreLink
1332 : ThinOrFullLTOPhase::None
;
1333 // Add the core simplification pipeline.
1334 MPM
.addPass(buildModuleSimplificationPipeline(Level
, LTOPhase
));
1336 // Now add the optimization pipeline.
1337 MPM
.addPass(buildModuleOptimizationPipeline(Level
, LTOPhase
));
1339 if (PGOOpt
&& PGOOpt
->PseudoProbeForProfiling
&&
1340 PGOOpt
->Action
== PGOOptions::SampleUse
)
1341 MPM
.addPass(PseudoProbeUpdatePass());
1343 // Emit annotation remarks.
1344 addAnnotationRemarksPass(MPM
);
1347 addRequiredLTOPreLinkPasses(MPM
);
1353 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level
) {
1354 assert(Level
!= OptimizationLevel::O0
&&
1355 "Must request optimizations for the default pipeline!");
1357 ModulePassManager MPM
;
1359 // Convert @llvm.global.annotations to !annotation metadata.
1360 MPM
.addPass(Annotation2MetadataPass());
1362 // Force any function attributes we want the rest of the pipeline to observe.
1363 MPM
.addPass(ForceFunctionAttrsPass());
1365 if (PGOOpt
&& PGOOpt
->DebugInfoForProfiling
)
1366 MPM
.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1368 // Apply module pipeline start EP callback.
1369 for (auto &C
: PipelineStartEPCallbacks
)
1372 // If we are planning to perform ThinLTO later, we don't bloat the code with
1373 // unrolling/vectorization/... now. Just simplify the module as much as we
1375 MPM
.addPass(buildModuleSimplificationPipeline(
1376 Level
, ThinOrFullLTOPhase::ThinLTOPreLink
));
1378 // Run partial inlining pass to partially inline functions that have
1380 // FIXME: It isn't clear whether this is really the right place to run this
1381 // in ThinLTO. Because there is another canonicalization and simplification
1382 // phase that will run after the thin link, running this here ends up with
1383 // less information than will be available later and it may grow functions in
1384 // ways that aren't beneficial.
1385 if (RunPartialInlining
)
1386 MPM
.addPass(PartialInlinerPass());
1388 // Reduce the size of the IR as much as possible.
1389 MPM
.addPass(GlobalOptPass());
1391 if (PGOOpt
&& PGOOpt
->PseudoProbeForProfiling
&&
1392 PGOOpt
->Action
== PGOOptions::SampleUse
)
1393 MPM
.addPass(PseudoProbeUpdatePass());
1395 // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
1396 // optimization is going to be done in PostLink stage, but clang can't
1397 // add callbacks there in case of in-process ThinLTO called by linker.
1398 for (auto &C
: OptimizerLastEPCallbacks
)
1401 // Emit annotation remarks.
1402 addAnnotationRemarksPass(MPM
);
1404 addRequiredLTOPreLinkPasses(MPM
);
1409 ModulePassManager
PassBuilder::buildThinLTODefaultPipeline(
1410 OptimizationLevel Level
, const ModuleSummaryIndex
*ImportSummary
) {
1411 ModulePassManager MPM
;
1413 // Convert @llvm.global.annotations to !annotation metadata.
1414 MPM
.addPass(Annotation2MetadataPass());
1416 if (ImportSummary
) {
1417 // These passes import type identifier resolutions for whole-program
1418 // devirtualization and CFI. They must run early because other passes may
1419 // disturb the specific instruction patterns that these passes look for,
1420 // creating dependencies on resolutions that may not appear in the summary.
1422 // For example, GVN may transform the pattern assume(type.test) appearing in
1423 // two basic blocks into assume(phi(type.test, type.test)), which would
1424 // transform a dependency on a WPD resolution into a dependency on a type
1425 // identifier resolution for CFI.
1427 // Also, WPD has access to more precise information than ICP and can
1428 // devirtualize more effectively, so it should operate on the IR first.
1430 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1431 // metadata and intrinsics.
1432 MPM
.addPass(WholeProgramDevirtPass(nullptr, ImportSummary
));
1433 MPM
.addPass(LowerTypeTestsPass(nullptr, ImportSummary
));
1436 if (Level
== OptimizationLevel::O0
) {
1437 // Run a second time to clean up any type tests left behind by WPD for use
1439 MPM
.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1440 // Drop available_externally and unreferenced globals. This is necessary
1441 // with ThinLTO in order to avoid leaving undefined references to dead
1442 // globals in the object file.
1443 MPM
.addPass(EliminateAvailableExternallyPass());
1444 MPM
.addPass(GlobalDCEPass());
1448 // Force any function attributes we want the rest of the pipeline to observe.
1449 MPM
.addPass(ForceFunctionAttrsPass());
1451 // Add the core simplification pipeline.
1452 MPM
.addPass(buildModuleSimplificationPipeline(
1453 Level
, ThinOrFullLTOPhase::ThinLTOPostLink
));
1455 // Now add the optimization pipeline.
1456 MPM
.addPass(buildModuleOptimizationPipeline(
1457 Level
, ThinOrFullLTOPhase::ThinLTOPostLink
));
1459 // Emit annotation remarks.
1460 addAnnotationRemarksPass(MPM
);
1466 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level
) {
1467 assert(Level
!= OptimizationLevel::O0
&&
1468 "Must request optimizations for the default pipeline!");
1469 // FIXME: We should use a customized pre-link pipeline!
1470 return buildPerModuleDefaultPipeline(Level
,
1471 /* LTOPreLink */ true);
1475 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level
,
1476 ModuleSummaryIndex
*ExportSummary
) {
1477 ModulePassManager MPM
;
1479 // Convert @llvm.global.annotations to !annotation metadata.
1480 MPM
.addPass(Annotation2MetadataPass());
1482 for (auto &C
: FullLinkTimeOptimizationEarlyEPCallbacks
)
1485 // Create a function that performs CFI checks for cross-DSO calls with targets
1486 // in the current module.
1487 MPM
.addPass(CrossDSOCFIPass());
1489 if (Level
== OptimizationLevel::O0
) {
1490 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1491 // metadata and intrinsics.
1492 MPM
.addPass(WholeProgramDevirtPass(ExportSummary
, nullptr));
1493 MPM
.addPass(LowerTypeTestsPass(ExportSummary
, nullptr));
1494 // Run a second time to clean up any type tests left behind by WPD for use
1496 MPM
.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1498 for (auto &C
: FullLinkTimeOptimizationLastEPCallbacks
)
1501 // Emit annotation remarks.
1502 addAnnotationRemarksPass(MPM
);
1507 if (PGOOpt
&& PGOOpt
->Action
== PGOOptions::SampleUse
) {
1508 // Load sample profile before running the LTO optimization pipeline.
1509 MPM
.addPass(SampleProfileLoaderPass(PGOOpt
->ProfileFile
,
1510 PGOOpt
->ProfileRemappingFile
,
1511 ThinOrFullLTOPhase::FullLTOPostLink
));
1512 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1513 // RequireAnalysisPass for PSI before subsequent non-module passes.
1514 MPM
.addPass(RequireAnalysisPass
<ProfileSummaryAnalysis
, Module
>());
1517 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1518 MPM
.addPass(OpenMPOptPass());
1520 // Remove unused virtual tables to improve the quality of code generated by
1521 // whole-program devirtualization and bitset lowering.
1522 MPM
.addPass(GlobalDCEPass());
1524 // Force any function attributes we want the rest of the pipeline to observe.
1525 MPM
.addPass(ForceFunctionAttrsPass());
1527 // Do basic inference of function attributes from known properties of system
1528 // libraries and other oracles.
1529 MPM
.addPass(InferFunctionAttrsPass());
1531 if (Level
.getSpeedupLevel() > 1) {
1532 MPM
.addPass(createModuleToFunctionPassAdaptor(
1533 CallSiteSplittingPass(), PTO
.EagerlyInvalidateAnalyses
));
1535 // Indirect call promotion. This should promote all the targets that are
1536 // left by the earlier promotion pass that promotes intra-module targets.
1537 // This two-step promotion is to save the compile time. For LTO, it should
1538 // produce the same result as if we only do promotion here.
1539 MPM
.addPass(PGOIndirectCallPromotion(
1540 true /* InLTO */, PGOOpt
&& PGOOpt
->Action
== PGOOptions::SampleUse
));
1542 if (EnableFunctionSpecialization
&& Level
== OptimizationLevel::O3
)
1543 MPM
.addPass(FunctionSpecializationPass());
1544 // Propagate constants at call sites into the functions they call. This
1545 // opens opportunities for globalopt (and inlining) by substituting function
1546 // pointers passed as arguments to direct uses of functions.
1547 MPM
.addPass(IPSCCPPass());
1549 // Attach metadata to indirect call sites indicating the set of functions
1550 // they may target at run-time. This should follow IPSCCP.
1551 MPM
.addPass(CalledValuePropagationPass());
1554 // Now deduce any function attributes based in the current code.
1556 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1558 // Do RPO function attribute inference across the module to forward-propagate
1559 // attributes where applicable.
1560 // FIXME: Is this really an optimization rather than a canonicalization?
1561 MPM
.addPass(ReversePostOrderFunctionAttrsPass());
1563 // Use in-range annotations on GEP indices to split globals where beneficial.
1564 MPM
.addPass(GlobalSplitPass());
1566 // Run whole program optimization of virtual call when the list of callees
1568 MPM
.addPass(WholeProgramDevirtPass(ExportSummary
, nullptr));
1570 // Stop here at -O1.
1571 if (Level
== OptimizationLevel::O1
) {
1572 // The LowerTypeTestsPass needs to run to lower type metadata and the
1573 // type.test intrinsics. The pass does nothing if CFI is disabled.
1574 MPM
.addPass(LowerTypeTestsPass(ExportSummary
, nullptr));
1575 // Run a second time to clean up any type tests left behind by WPD for use
1576 // in ICP (which is performed earlier than this in the regular LTO
1578 MPM
.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1580 for (auto &C
: FullLinkTimeOptimizationLastEPCallbacks
)
1583 // Emit annotation remarks.
1584 addAnnotationRemarksPass(MPM
);
1589 // Optimize globals to try and fold them into constants.
1590 MPM
.addPass(GlobalOptPass());
1592 // Promote any localized globals to SSA registers.
1593 MPM
.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1595 // Linking modules together can lead to duplicate global constant, only
1596 // keep one copy of each constant.
1597 MPM
.addPass(ConstantMergePass());
1599 // Remove unused arguments from functions.
1600 MPM
.addPass(DeadArgumentEliminationPass());
1602 // Reduce the code after globalopt and ipsccp. Both can open up significant
1603 // simplification opportunities, and both can propagate functions through
1604 // function pointers. When this happens, we often have to resolve varargs
1605 // calls, etc, so let instcombine do this.
1606 FunctionPassManager PeepholeFPM
;
1607 PeepholeFPM
.addPass(InstCombinePass());
1608 if (Level
== OptimizationLevel::O3
)
1609 PeepholeFPM
.addPass(AggressiveInstCombinePass());
1610 invokePeepholeEPCallbacks(PeepholeFPM
, Level
);
1612 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM
),
1613 PTO
.EagerlyInvalidateAnalyses
));
1615 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1616 // generally clean up exception handling overhead. It isn't clear this is
1617 // valuable as the inliner doesn't currently care whether it is inlining an
1618 // invoke or a call.
1619 // Run the inliner now.
1620 MPM
.addPass(ModuleInlinerWrapperPass(
1621 getInlineParamsFromOptLevel(Level
),
1622 /* MandatoryFirst */ true,
1623 InlineContext
{ThinOrFullLTOPhase::FullLTOPostLink
,
1624 InlinePass::CGSCCInliner
}));
1626 // Optimize globals again after we ran the inliner.
1627 MPM
.addPass(GlobalOptPass());
1629 // Garbage collect dead functions.
1630 MPM
.addPass(GlobalDCEPass());
1632 // If we didn't decide to inline a function, check to see if we can
1633 // transform it to pass arguments by value instead of by reference.
1634 MPM
.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
1636 FunctionPassManager FPM
;
1637 // The IPO Passes may leave cruft around. Clean up after them.
1638 FPM
.addPass(InstCombinePass());
1639 invokePeepholeEPCallbacks(FPM
, Level
);
1641 FPM
.addPass(JumpThreadingPass());
1643 // Do a post inline PGO instrumentation and use pass. This is a context
1644 // sensitive PGO pass.
1646 if (PGOOpt
->CSAction
== PGOOptions::CSIRInstr
)
1647 addPGOInstrPasses(MPM
, Level
, /* RunProfileGen */ true,
1648 /* IsCS */ true, PGOOpt
->CSProfileGenFile
,
1649 PGOOpt
->ProfileRemappingFile
,
1650 ThinOrFullLTOPhase::FullLTOPostLink
);
1651 else if (PGOOpt
->CSAction
== PGOOptions::CSIRUse
)
1652 addPGOInstrPasses(MPM
, Level
, /* RunProfileGen */ false,
1653 /* IsCS */ true, PGOOpt
->ProfileFile
,
1654 PGOOpt
->ProfileRemappingFile
,
1655 ThinOrFullLTOPhase::FullLTOPostLink
);
1659 FPM
.addPass(SROAPass());
1661 // LTO provides additional opportunities for tailcall elimination due to
1662 // link-time inlining, and visibility of nocapture attribute.
1663 FPM
.addPass(TailCallElimPass());
1665 // Run a few AA driver optimizations here and now to cleanup the code.
1666 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(FPM
),
1667 PTO
.EagerlyInvalidateAnalyses
));
1670 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1672 // Require the GlobalsAA analysis for the module so we can query it within
1674 MPM
.addPass(RequireAnalysisPass
<GlobalsAA
, Module
>());
1675 // Invalidate AAManager so it can be recreated and pick up the newly available
1678 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass
<AAManager
>()));
1680 FunctionPassManager MainFPM
;
1681 MainFPM
.addPass(createFunctionToLoopPassAdaptor(
1682 LICMPass(PTO
.LicmMssaOptCap
, PTO
.LicmMssaNoAccForPromotionCap
,
1683 /*AllowSpeculation=*/true),
1684 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1687 MainFPM
.addPass(NewGVNPass());
1689 MainFPM
.addPass(GVNPass());
1691 // Remove dead memcpy()'s.
1692 MainFPM
.addPass(MemCpyOptPass());
1694 // Nuke dead stores.
1695 MainFPM
.addPass(DSEPass());
1696 MainFPM
.addPass(MergedLoadStoreMotionPass());
1699 if (EnableConstraintElimination
)
1700 MainFPM
.addPass(ConstraintEliminationPass());
1702 LoopPassManager LPM
;
1703 if (EnableLoopFlatten
&& Level
.getSpeedupLevel() > 1)
1704 LPM
.addPass(LoopFlattenPass());
1705 LPM
.addPass(IndVarSimplifyPass());
1706 LPM
.addPass(LoopDeletionPass());
1707 // FIXME: Add loop interchange.
1709 // Unroll small loops and perform peeling.
1710 LPM
.addPass(LoopFullUnrollPass(Level
.getSpeedupLevel(),
1711 /* OnlyWhenForced= */ !PTO
.LoopUnrolling
,
1712 PTO
.ForgetAllSCEVInLoopUnroll
));
1713 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1714 // *All* loop passes must preserve it, in order to be able to use it.
1715 MainFPM
.addPass(createFunctionToLoopPassAdaptor(
1716 std::move(LPM
), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1718 MainFPM
.addPass(LoopDistributePass());
1720 addVectorPasses(Level
, MainFPM
, /* IsFullLTO */ true);
1722 // Run the OpenMPOpt CGSCC pass again late.
1724 createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
1726 invokePeepholeEPCallbacks(MainFPM
, Level
);
1727 MainFPM
.addPass(JumpThreadingPass());
1728 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM
),
1729 PTO
.EagerlyInvalidateAnalyses
));
1731 // Lower type metadata and the type.test intrinsic. This pass supports
1732 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1733 // to be run at link time if CFI is enabled. This pass does nothing if
1735 MPM
.addPass(LowerTypeTestsPass(ExportSummary
, nullptr));
1736 // Run a second time to clean up any type tests left behind by WPD for use
1737 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1738 MPM
.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1740 // Enable splitting late in the FullLTO post-link pipeline.
1741 if (EnableHotColdSplit
)
1742 MPM
.addPass(HotColdSplittingPass());
1744 // Add late LTO optimization passes.
1745 // Delete basic blocks, which optimization passes may have killed.
1746 MPM
.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
1747 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1750 // Drop bodies of available eternally objects to improve GlobalDCE.
1751 MPM
.addPass(EliminateAvailableExternallyPass());
1753 // Now that we have optimized the program, discard unreachable functions.
1754 MPM
.addPass(GlobalDCEPass());
1756 if (PTO
.MergeFunctions
)
1757 MPM
.addPass(MergeFunctionsPass());
1759 if (PTO
.CallGraphProfile
)
1760 MPM
.addPass(CGProfilePass());
1762 for (auto &C
: FullLinkTimeOptimizationLastEPCallbacks
)
1765 // Emit annotation remarks.
1766 addAnnotationRemarksPass(MPM
);
1771 ModulePassManager
PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level
,
1773 assert(Level
== OptimizationLevel::O0
&&
1774 "buildO0DefaultPipeline should only be used with O0");
1776 ModulePassManager MPM
;
1778 // Perform pseudo probe instrumentation in O0 mode. This is for the
1779 // consistency between different build modes. For example, a LTO build can be
1780 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1781 // the postlink will require pseudo probe instrumentation in the prelink.
1782 if (PGOOpt
&& PGOOpt
->PseudoProbeForProfiling
)
1783 MPM
.addPass(SampleProfileProbePass(TM
));
1785 if (PGOOpt
&& (PGOOpt
->Action
== PGOOptions::IRInstr
||
1786 PGOOpt
->Action
== PGOOptions::IRUse
))
1787 addPGOInstrPassesForO0(
1789 /* RunProfileGen */ (PGOOpt
->Action
== PGOOptions::IRInstr
),
1790 /* IsCS */ false, PGOOpt
->ProfileFile
, PGOOpt
->ProfileRemappingFile
);
1792 for (auto &C
: PipelineStartEPCallbacks
)
1795 if (PGOOpt
&& PGOOpt
->DebugInfoForProfiling
)
1796 MPM
.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1798 for (auto &C
: PipelineEarlySimplificationEPCallbacks
)
1801 // Build a minimal pipeline based on the semantics required by LLVM,
1802 // which is just that always inlining occurs. Further, disable generating
1803 // lifetime intrinsics to avoid enabling further optimizations during
1805 MPM
.addPass(AlwaysInlinerPass(
1806 /*InsertLifetimeIntrinsics=*/false));
1808 if (PTO
.MergeFunctions
)
1809 MPM
.addPass(MergeFunctionsPass());
1813 createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
1815 if (!CGSCCOptimizerLateEPCallbacks
.empty()) {
1816 CGSCCPassManager CGPM
;
1817 for (auto &C
: CGSCCOptimizerLateEPCallbacks
)
1819 if (!CGPM
.isEmpty())
1820 MPM
.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM
)));
1822 if (!LateLoopOptimizationsEPCallbacks
.empty()) {
1823 LoopPassManager LPM
;
1824 for (auto &C
: LateLoopOptimizationsEPCallbacks
)
1826 if (!LPM
.isEmpty()) {
1827 MPM
.addPass(createModuleToFunctionPassAdaptor(
1828 createFunctionToLoopPassAdaptor(std::move(LPM
))));
1831 if (!LoopOptimizerEndEPCallbacks
.empty()) {
1832 LoopPassManager LPM
;
1833 for (auto &C
: LoopOptimizerEndEPCallbacks
)
1835 if (!LPM
.isEmpty()) {
1836 MPM
.addPass(createModuleToFunctionPassAdaptor(
1837 createFunctionToLoopPassAdaptor(std::move(LPM
))));
1840 if (!ScalarOptimizerLateEPCallbacks
.empty()) {
1841 FunctionPassManager FPM
;
1842 for (auto &C
: ScalarOptimizerLateEPCallbacks
)
1845 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(FPM
)));
1848 for (auto &C
: OptimizerEarlyEPCallbacks
)
1851 if (!VectorizerStartEPCallbacks
.empty()) {
1852 FunctionPassManager FPM
;
1853 for (auto &C
: VectorizerStartEPCallbacks
)
1856 MPM
.addPass(createModuleToFunctionPassAdaptor(std::move(FPM
)));
1859 ModulePassManager CoroPM
;
1860 CoroPM
.addPass(CoroEarlyPass());
1861 CGSCCPassManager CGPM
;
1862 CGPM
.addPass(CoroSplitPass());
1863 CoroPM
.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM
)));
1864 CoroPM
.addPass(CoroCleanupPass());
1865 CoroPM
.addPass(GlobalDCEPass());
1866 MPM
.addPass(CoroConditionalWrapper(std::move(CoroPM
)));
1868 for (auto &C
: OptimizerLastEPCallbacks
)
1872 addRequiredLTOPreLinkPasses(MPM
);
1874 MPM
.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
1879 AAManager
PassBuilder::buildDefaultAAPipeline() {
1882 // The order in which these are registered determines their priority when
1885 // First we register the basic alias analysis that provides the majority of
1886 // per-function local AA logic. This is a stateless, on-demand local set of
1888 AA
.registerFunctionAnalysis
<BasicAA
>();
1890 // Next we query fast, specialized alias analyses that wrap IR-embedded
1891 // information about aliasing.
1892 AA
.registerFunctionAnalysis
<ScopedNoAliasAA
>();
1893 AA
.registerFunctionAnalysis
<TypeBasedAA
>();
1895 // Add support for querying global aliasing information when available.
1896 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1897 // analysis, all that the `AAManager` can do is query for any *cached*
1898 // results from `GlobalsAA` through a readonly proxy.
1899 AA
.registerModuleAnalysis
<GlobalsAA
>();
1901 // Add target-specific alias analyses.
1903 TM
->registerDefaultAliasAnalyses(AA
);