1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// The goal of hot/cold splitting is to improve the memory locality of code.
11 /// The splitting pass does this by identifying cold blocks and moving them into
12 /// separate functions.
14 /// When the splitting pass finds a cold block (referred to as "the sink"), it
15 /// grows a maximal cold region around that block. The maximal region contains
16 /// all blocks (post-)dominated by the sink [*]. In theory, these blocks are as
17 /// cold as the sink. Once a region is found, it's split out of the original
18 /// function provided it's profitable to do so.
20 /// [*] In practice, there is some added complexity because some blocks are not
23 /// TODO: Use the PM to get domtrees, and preserve BFI/BPI.
24 /// TODO: Reorder outlined functions.
26 //===----------------------------------------------------------------------===//
28 #include "llvm/ADT/PostOrderIterator.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/BlockFrequencyInfo.h"
33 #include "llvm/Analysis/BranchProbabilityInfo.h"
34 #include "llvm/Analysis/CFG.h"
35 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
36 #include "llvm/Analysis/PostDominators.h"
37 #include "llvm/Analysis/ProfileSummaryInfo.h"
38 #include "llvm/Analysis/TargetTransformInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CFG.h"
41 #include "llvm/IR/CallSite.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DiagnosticInfo.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/PassManager.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Use.h"
54 #include "llvm/IR/User.h"
55 #include "llvm/IR/Value.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/BlockFrequency.h"
58 #include "llvm/Support/BranchProbability.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include "llvm/Transforms/IPO.h"
62 #include "llvm/Transforms/IPO/HotColdSplitting.h"
63 #include "llvm/Transforms/Scalar.h"
64 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
65 #include "llvm/Transforms/Utils/Cloning.h"
66 #include "llvm/Transforms/Utils/CodeExtractor.h"
67 #include "llvm/Transforms/Utils/Local.h"
68 #include "llvm/Transforms/Utils/ValueMapper.h"
72 #define DEBUG_TYPE "hotcoldsplit"
74 STATISTIC(NumColdRegionsFound
, "Number of cold regions found.");
75 STATISTIC(NumColdRegionsOutlined
, "Number of cold regions outlined.");
79 static cl::opt
<bool> EnableStaticAnalyis("hot-cold-static-analysis",
80 cl::init(true), cl::Hidden
);
83 SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden
,
84 cl::desc("Base penalty for splitting cold code (as a "
85 "multiple of TCC_Basic)"));
89 /// A sequence of basic blocks.
91 /// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
92 using BlockSequence
= SmallVector
<BasicBlock
*, 0>;
94 // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
95 // this function unless you modify the MBB version as well.
97 /// A no successor, non-return block probably ends in unreachable and is cold.
98 /// Also consider a block that ends in an indirect branch to be a return block,
99 /// since many targets use plain indirect branches to return.
100 bool blockEndsInUnreachable(const BasicBlock
&BB
) {
101 if (!succ_empty(&BB
))
105 const Instruction
*I
= BB
.getTerminator();
106 return !(isa
<ReturnInst
>(I
) || isa
<IndirectBrInst
>(I
));
109 bool unlikelyExecuted(BasicBlock
&BB
) {
110 // Exception handling blocks are unlikely executed.
111 if (BB
.isEHPad() || isa
<ResumeInst
>(BB
.getTerminator()))
114 // The block is cold if it calls/invokes a cold function.
115 for (Instruction
&I
: BB
)
116 if (auto CS
= CallSite(&I
))
117 if (CS
.hasFnAttr(Attribute::Cold
))
120 // The block is cold if it has an unreachable terminator, unless it's
121 // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp).
122 if (blockEndsInUnreachable(BB
)) {
124 dyn_cast_or_null
<CallInst
>(BB
.getTerminator()->getPrevNode()))
125 if (CI
->hasFnAttr(Attribute::NoReturn
))
133 /// Check whether it's safe to outline \p BB.
134 static bool mayExtractBlock(const BasicBlock
&BB
) {
135 // EH pads are unsafe to outline because doing so breaks EH type tables. It
136 // follows that invoke instructions cannot be extracted, because CodeExtractor
137 // requires unwind destinations to be within the extraction region.
139 // Resumes that are not reachable from a cleanup landing pad are considered to
140 // be unreachable. It’s not safe to split them out either.
141 auto Term
= BB
.getTerminator();
142 return !BB
.hasAddressTaken() && !BB
.isEHPad() && !isa
<InvokeInst
>(Term
) &&
143 !isa
<ResumeInst
>(Term
);
146 /// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
147 /// If \p UpdateEntryCount is true (set when this is a new split function and
148 /// module has profile data), set entry count to 0 to ensure treated as cold.
149 /// Return true if the function is changed.
150 static bool markFunctionCold(Function
&F
, bool UpdateEntryCount
= false) {
151 assert(!F
.hasFnAttribute(Attribute::OptimizeNone
) && "Can't mark this cold");
152 bool Changed
= false;
153 if (!F
.hasFnAttribute(Attribute::Cold
)) {
154 F
.addFnAttr(Attribute::Cold
);
157 if (!F
.hasFnAttribute(Attribute::MinSize
)) {
158 F
.addFnAttr(Attribute::MinSize
);
161 if (UpdateEntryCount
) {
162 // Set the entry count to 0 to ensure it is placed in the unlikely text
163 // section when function sections are enabled.
171 class HotColdSplitting
{
173 HotColdSplitting(ProfileSummaryInfo
*ProfSI
,
174 function_ref
<BlockFrequencyInfo
*(Function
&)> GBFI
,
175 function_ref
<TargetTransformInfo
&(Function
&)> GTTI
,
176 std::function
<OptimizationRemarkEmitter
&(Function
&)> *GORE
,
177 function_ref
<AssumptionCache
*(Function
&)> LAC
)
178 : PSI(ProfSI
), GetBFI(GBFI
), GetTTI(GTTI
), GetORE(GORE
), LookupAC(LAC
) {}
182 bool isFunctionCold(const Function
&F
) const;
183 bool shouldOutlineFrom(const Function
&F
) const;
184 bool outlineColdRegions(Function
&F
, bool HasProfileSummary
);
185 Function
*extractColdRegion(const BlockSequence
&Region
, DominatorTree
&DT
,
186 BlockFrequencyInfo
*BFI
, TargetTransformInfo
&TTI
,
187 OptimizationRemarkEmitter
&ORE
,
188 AssumptionCache
*AC
, unsigned Count
);
189 ProfileSummaryInfo
*PSI
;
190 function_ref
<BlockFrequencyInfo
*(Function
&)> GetBFI
;
191 function_ref
<TargetTransformInfo
&(Function
&)> GetTTI
;
192 std::function
<OptimizationRemarkEmitter
&(Function
&)> *GetORE
;
193 function_ref
<AssumptionCache
*(Function
&)> LookupAC
;
196 class HotColdSplittingLegacyPass
: public ModulePass
{
199 HotColdSplittingLegacyPass() : ModulePass(ID
) {
200 initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
203 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
204 AU
.addRequired
<BlockFrequencyInfoWrapperPass
>();
205 AU
.addRequired
<ProfileSummaryInfoWrapperPass
>();
206 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
207 AU
.addUsedIfAvailable
<AssumptionCacheTracker
>();
210 bool runOnModule(Module
&M
) override
;
213 } // end anonymous namespace
215 /// Check whether \p F is inherently cold.
216 bool HotColdSplitting::isFunctionCold(const Function
&F
) const {
217 if (F
.hasFnAttribute(Attribute::Cold
))
220 if (F
.getCallingConv() == CallingConv::Cold
)
223 if (PSI
->isFunctionEntryCold(&F
))
229 // Returns false if the function should not be considered for hot-cold split
231 bool HotColdSplitting::shouldOutlineFrom(const Function
&F
) const {
232 if (F
.hasFnAttribute(Attribute::AlwaysInline
))
235 if (F
.hasFnAttribute(Attribute::NoInline
))
241 /// Get the benefit score of outlining \p Region.
242 static int getOutliningBenefit(ArrayRef
<BasicBlock
*> Region
,
243 TargetTransformInfo
&TTI
) {
244 // Sum up the code size costs of non-terminator instructions. Tight coupling
245 // with \ref getOutliningPenalty is needed to model the costs of terminators.
247 for (BasicBlock
*BB
: Region
)
248 for (Instruction
&I
: BB
->instructionsWithoutDebug())
249 if (&I
!= BB
->getTerminator())
251 TTI
.getInstructionCost(&I
, TargetTransformInfo::TCK_CodeSize
);
256 /// Get the penalty score for outlining \p Region.
257 static int getOutliningPenalty(ArrayRef
<BasicBlock
*> Region
,
258 unsigned NumInputs
, unsigned NumOutputs
) {
259 int Penalty
= SplittingThreshold
;
260 LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty
<< "\n");
262 // If the splitting threshold is set at or below zero, skip the usual
263 // profitability check.
264 if (SplittingThreshold
<= 0)
267 // The typical code size cost for materializing an argument for the outlined
269 LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs
<< " inputs\n");
270 const int CostForArgMaterialization
= TargetTransformInfo::TCC_Basic
;
271 Penalty
+= CostForArgMaterialization
* NumInputs
;
273 // The typical code size cost for an output alloca, its associated store, and
274 // its associated reload.
275 LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs
<< " outputs\n");
276 const int CostForRegionOutput
= 3 * TargetTransformInfo::TCC_Basic
;
277 Penalty
+= CostForRegionOutput
* NumOutputs
;
279 // Find the number of distinct exit blocks for the region. Use a conservative
280 // check to determine whether control returns from the region.
281 bool NoBlocksReturn
= true;
282 SmallPtrSet
<BasicBlock
*, 2> SuccsOutsideRegion
;
283 for (BasicBlock
*BB
: Region
) {
284 // If a block has no successors, only assume it does not return if it's
286 if (succ_empty(BB
)) {
287 NoBlocksReturn
&= isa
<UnreachableInst
>(BB
->getTerminator());
291 for (BasicBlock
*SuccBB
: successors(BB
)) {
292 if (find(Region
, SuccBB
) == Region
.end()) {
293 NoBlocksReturn
= false;
294 SuccsOutsideRegion
.insert(SuccBB
);
299 // Apply a `noreturn` bonus.
300 if (NoBlocksReturn
) {
301 LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region
.size()
302 << " non-returning terminators\n");
303 Penalty
-= Region
.size();
306 // Apply a penalty for having more than one successor outside of the region.
307 // This penalty accounts for the switch needed in the caller.
308 if (!SuccsOutsideRegion
.empty()) {
309 LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion
.size()
310 << " non-region successors\n");
311 Penalty
+= (SuccsOutsideRegion
.size() - 1) * TargetTransformInfo::TCC_Basic
;
317 Function
*HotColdSplitting::extractColdRegion(const BlockSequence
&Region
,
319 BlockFrequencyInfo
*BFI
,
320 TargetTransformInfo
&TTI
,
321 OptimizationRemarkEmitter
&ORE
,
324 assert(!Region
.empty());
326 // TODO: Pass BFI and BPI to update profile information.
327 CodeExtractor
CE(Region
, &DT
, /* AggregateArgs */ false, /* BFI */ nullptr,
328 /* BPI */ nullptr, AC
, /* AllowVarArgs */ false,
329 /* AllowAlloca */ false,
330 /* Suffix */ "cold." + std::to_string(Count
));
332 // Perform a simple cost/benefit analysis to decide whether or not to permit
334 SetVector
<Value
*> Inputs
, Outputs
, Sinks
;
335 CE
.findInputsOutputs(Inputs
, Outputs
, Sinks
);
336 int OutliningBenefit
= getOutliningBenefit(Region
, TTI
);
337 int OutliningPenalty
=
338 getOutliningPenalty(Region
, Inputs
.size(), Outputs
.size());
339 LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
340 << ", penalty = " << OutliningPenalty
<< "\n");
341 if (OutliningBenefit
<= OutliningPenalty
)
344 Function
*OrigF
= Region
[0]->getParent();
345 if (Function
*OutF
= CE
.extractCodeRegion()) {
346 User
*U
= *OutF
->user_begin();
347 CallInst
*CI
= cast
<CallInst
>(U
);
349 NumColdRegionsOutlined
++;
350 if (TTI
.useColdCCForColdCall(*OutF
)) {
351 OutF
->setCallingConv(CallingConv::Cold
);
352 CS
.setCallingConv(CallingConv::Cold
);
356 markFunctionCold(*OutF
, BFI
!= nullptr);
358 LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF
);
360 return OptimizationRemark(DEBUG_TYPE
, "HotColdSplit",
361 &*Region
[0]->begin())
362 << ore::NV("Original", OrigF
) << " split cold code into "
363 << ore::NV("Split", OutF
);
369 return OptimizationRemarkMissed(DEBUG_TYPE
, "ExtractFailed",
370 &*Region
[0]->begin())
371 << "Failed to extract region at block "
372 << ore::NV("Block", Region
.front());
377 /// A pair of (basic block, score).
378 using BlockTy
= std::pair
<BasicBlock
*, unsigned>;
381 /// A maximal outlining region. This contains all blocks post-dominated by a
382 /// sink block, the sink block itself, and all blocks dominated by the sink.
383 /// If sink-predecessors and sink-successors cannot be extracted in one region,
384 /// the static constructor returns a list of suitable extraction regions.
385 class OutliningRegion
{
386 /// A list of (block, score) pairs. A block's score is non-zero iff it's a
387 /// viable sub-region entry point. Blocks with higher scores are better entry
388 /// points (i.e. they are more distant ancestors of the sink block).
389 SmallVector
<BlockTy
, 0> Blocks
= {};
391 /// The suggested entry point into the region. If the region has multiple
392 /// entry points, all blocks within the region may not be reachable from this
394 BasicBlock
*SuggestedEntryPoint
= nullptr;
396 /// Whether the entire function is cold.
397 bool EntireFunctionCold
= false;
399 /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
400 static unsigned getEntryPointScore(BasicBlock
&BB
, unsigned Score
) {
401 return mayExtractBlock(BB
) ? Score
: 0;
404 /// These scores should be lower than the score for predecessor blocks,
405 /// because regions starting at predecessor blocks are typically larger.
406 static constexpr unsigned ScoreForSuccBlock
= 1;
407 static constexpr unsigned ScoreForSinkBlock
= 1;
409 OutliningRegion(const OutliningRegion
&) = delete;
410 OutliningRegion
&operator=(const OutliningRegion
&) = delete;
413 OutliningRegion() = default;
414 OutliningRegion(OutliningRegion
&&) = default;
415 OutliningRegion
&operator=(OutliningRegion
&&) = default;
417 static std::vector
<OutliningRegion
> create(BasicBlock
&SinkBB
,
418 const DominatorTree
&DT
,
419 const PostDominatorTree
&PDT
) {
420 std::vector
<OutliningRegion
> Regions
;
421 SmallPtrSet
<BasicBlock
*, 4> RegionBlocks
;
423 Regions
.emplace_back();
424 OutliningRegion
*ColdRegion
= &Regions
.back();
426 auto addBlockToRegion
= [&](BasicBlock
*BB
, unsigned Score
) {
427 RegionBlocks
.insert(BB
);
428 ColdRegion
->Blocks
.emplace_back(BB
, Score
);
431 // The ancestor farthest-away from SinkBB, and also post-dominated by it.
432 unsigned SinkScore
= getEntryPointScore(SinkBB
, ScoreForSinkBlock
);
433 ColdRegion
->SuggestedEntryPoint
= (SinkScore
> 0) ? &SinkBB
: nullptr;
434 unsigned BestScore
= SinkScore
;
436 // Visit SinkBB's ancestors using inverse DFS.
437 auto PredIt
= ++idf_begin(&SinkBB
);
438 auto PredEnd
= idf_end(&SinkBB
);
439 while (PredIt
!= PredEnd
) {
440 BasicBlock
&PredBB
= **PredIt
;
441 bool SinkPostDom
= PDT
.dominates(&SinkBB
, &PredBB
);
443 // If the predecessor is cold and has no predecessors, the entire
444 // function must be cold.
445 if (SinkPostDom
&& pred_empty(&PredBB
)) {
446 ColdRegion
->EntireFunctionCold
= true;
450 // If SinkBB does not post-dominate a predecessor, do not mark the
451 // predecessor (or any of its predecessors) cold.
452 if (!SinkPostDom
|| !mayExtractBlock(PredBB
)) {
453 PredIt
.skipChildren();
457 // Keep track of the post-dominated ancestor farthest away from the sink.
458 // The path length is always >= 2, ensuring that predecessor blocks are
459 // considered as entry points before the sink block.
460 unsigned PredScore
= getEntryPointScore(PredBB
, PredIt
.getPathLength());
461 if (PredScore
> BestScore
) {
462 ColdRegion
->SuggestedEntryPoint
= &PredBB
;
463 BestScore
= PredScore
;
466 addBlockToRegion(&PredBB
, PredScore
);
470 // If the sink can be added to the cold region, do so. It's considered as
471 // an entry point before any sink-successor blocks.
473 // Otherwise, split cold sink-successor blocks using a separate region.
474 // This satisfies the requirement that all extraction blocks other than the
475 // first have predecessors within the extraction region.
476 if (mayExtractBlock(SinkBB
)) {
477 addBlockToRegion(&SinkBB
, SinkScore
);
479 Regions
.emplace_back();
480 ColdRegion
= &Regions
.back();
484 // Find all successors of SinkBB dominated by SinkBB using DFS.
485 auto SuccIt
= ++df_begin(&SinkBB
);
486 auto SuccEnd
= df_end(&SinkBB
);
487 while (SuccIt
!= SuccEnd
) {
488 BasicBlock
&SuccBB
= **SuccIt
;
489 bool SinkDom
= DT
.dominates(&SinkBB
, &SuccBB
);
491 // Don't allow the backwards & forwards DFSes to mark the same block.
492 bool DuplicateBlock
= RegionBlocks
.count(&SuccBB
);
494 // If SinkBB does not dominate a successor, do not mark the successor (or
495 // any of its successors) cold.
496 if (DuplicateBlock
|| !SinkDom
|| !mayExtractBlock(SuccBB
)) {
497 SuccIt
.skipChildren();
501 unsigned SuccScore
= getEntryPointScore(SuccBB
, ScoreForSuccBlock
);
502 if (SuccScore
> BestScore
) {
503 ColdRegion
->SuggestedEntryPoint
= &SuccBB
;
504 BestScore
= SuccScore
;
507 addBlockToRegion(&SuccBB
, SuccScore
);
514 /// Whether this region has nothing to extract.
515 bool empty() const { return !SuggestedEntryPoint
; }
517 /// The blocks in this region.
518 ArrayRef
<std::pair
<BasicBlock
*, unsigned>> blocks() const { return Blocks
; }
520 /// Whether the entire function containing this region is cold.
521 bool isEntireFunctionCold() const { return EntireFunctionCold
; }
523 /// Remove a sub-region from this region and return it as a block sequence.
524 BlockSequence
takeSingleEntrySubRegion(DominatorTree
&DT
) {
525 assert(!empty() && !isEntireFunctionCold() && "Nothing to extract");
527 // Remove blocks dominated by the suggested entry point from this region.
528 // During the removal, identify the next best entry point into the region.
529 // Ensure that the first extracted block is the suggested entry point.
530 BlockSequence SubRegion
= {SuggestedEntryPoint
};
531 BasicBlock
*NextEntryPoint
= nullptr;
532 unsigned NextScore
= 0;
533 auto RegionEndIt
= Blocks
.end();
534 auto RegionStartIt
= remove_if(Blocks
, [&](const BlockTy
&Block
) {
535 BasicBlock
*BB
= Block
.first
;
536 unsigned Score
= Block
.second
;
538 BB
== SuggestedEntryPoint
|| DT
.dominates(SuggestedEntryPoint
, BB
);
539 if (!InSubRegion
&& Score
> NextScore
) {
543 if (InSubRegion
&& BB
!= SuggestedEntryPoint
)
544 SubRegion
.push_back(BB
);
547 Blocks
.erase(RegionStartIt
, RegionEndIt
);
549 // Update the suggested entry point.
550 SuggestedEntryPoint
= NextEntryPoint
;
557 bool HotColdSplitting::outlineColdRegions(Function
&F
, bool HasProfileSummary
) {
558 bool Changed
= false;
560 // The set of cold blocks.
561 SmallPtrSet
<BasicBlock
*, 4> ColdBlocks
;
563 // The worklist of non-intersecting regions left to outline.
564 SmallVector
<OutliningRegion
, 2> OutliningWorklist
;
566 // Set up an RPO traversal. Experimentally, this performs better (outlines
567 // more) than a PO traversal, because we prevent region overlap by keeping
568 // the first region to contain a block.
569 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
571 // Calculate domtrees lazily. This reduces compile-time significantly.
572 std::unique_ptr
<DominatorTree
> DT
;
573 std::unique_ptr
<PostDominatorTree
> PDT
;
575 // Calculate BFI lazily (it's only used to query ProfileSummaryInfo). This
576 // reduces compile-time significantly. TODO: When we *do* use BFI, we should
577 // be able to salvage its domtrees instead of recomputing them.
578 BlockFrequencyInfo
*BFI
= nullptr;
579 if (HasProfileSummary
)
582 TargetTransformInfo
&TTI
= GetTTI(F
);
583 OptimizationRemarkEmitter
&ORE
= (*GetORE
)(F
);
584 AssumptionCache
*AC
= LookupAC(F
);
586 // Find all cold regions.
587 for (BasicBlock
*BB
: RPOT
) {
588 // This block is already part of some outlining region.
589 if (ColdBlocks
.count(BB
))
592 bool Cold
= (BFI
&& PSI
->isColdBlock(BB
, BFI
)) ||
593 (EnableStaticAnalyis
&& unlikelyExecuted(*BB
));
598 dbgs() << "Found a cold block:\n";
603 DT
= make_unique
<DominatorTree
>(F
);
605 PDT
= make_unique
<PostDominatorTree
>(F
);
607 auto Regions
= OutliningRegion::create(*BB
, *DT
, *PDT
);
608 for (OutliningRegion
&Region
: Regions
) {
612 if (Region
.isEntireFunctionCold()) {
613 LLVM_DEBUG(dbgs() << "Entire function is cold\n");
614 return markFunctionCold(F
);
617 // If this outlining region intersects with another, drop the new region.
619 // TODO: It's theoretically possible to outline more by only keeping the
620 // largest region which contains a block, but the extra bookkeeping to do
621 // this is tricky/expensive.
622 bool RegionsOverlap
= any_of(Region
.blocks(), [&](const BlockTy
&Block
) {
623 return !ColdBlocks
.insert(Block
.first
).second
;
628 OutliningWorklist
.emplace_back(std::move(Region
));
629 ++NumColdRegionsFound
;
633 // Outline single-entry cold regions, splitting up larger regions as needed.
634 unsigned OutlinedFunctionID
= 1;
635 while (!OutliningWorklist
.empty()) {
636 OutliningRegion Region
= OutliningWorklist
.pop_back_val();
637 assert(!Region
.empty() && "Empty outlining region in worklist");
639 BlockSequence SubRegion
= Region
.takeSingleEntrySubRegion(*DT
);
641 dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
642 for (BasicBlock
*BB
: SubRegion
)
646 Function
*Outlined
= extractColdRegion(SubRegion
, *DT
, BFI
, TTI
, ORE
, AC
,
649 ++OutlinedFunctionID
;
652 } while (!Region
.empty());
658 bool HotColdSplitting::run(Module
&M
) {
659 bool Changed
= false;
660 bool HasProfileSummary
= M
.getProfileSummary();
661 for (auto It
= M
.begin(), End
= M
.end(); It
!= End
; ++It
) {
664 // Do not touch declarations.
665 if (F
.isDeclaration())
668 // Do not modify `optnone` functions.
669 if (F
.hasFnAttribute(Attribute::OptimizeNone
))
672 // Detect inherently cold functions and mark them as such.
673 if (isFunctionCold(F
)) {
674 Changed
|= markFunctionCold(F
);
678 if (!shouldOutlineFrom(F
)) {
679 LLVM_DEBUG(llvm::dbgs() << "Skipping " << F
.getName() << "\n");
683 LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F
.getName() << "\n");
684 Changed
|= outlineColdRegions(F
, HasProfileSummary
);
689 bool HotColdSplittingLegacyPass::runOnModule(Module
&M
) {
692 ProfileSummaryInfo
*PSI
=
693 &getAnalysis
<ProfileSummaryInfoWrapperPass
>().getPSI();
694 auto GTTI
= [this](Function
&F
) -> TargetTransformInfo
& {
695 return this->getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
697 auto GBFI
= [this](Function
&F
) {
698 return &this->getAnalysis
<BlockFrequencyInfoWrapperPass
>(F
).getBFI();
700 std::unique_ptr
<OptimizationRemarkEmitter
> ORE
;
701 std::function
<OptimizationRemarkEmitter
&(Function
&)> GetORE
=
702 [&ORE
](Function
&F
) -> OptimizationRemarkEmitter
& {
703 ORE
.reset(new OptimizationRemarkEmitter(&F
));
706 auto LookupAC
= [this](Function
&F
) -> AssumptionCache
* {
707 if (auto *ACT
= getAnalysisIfAvailable
<AssumptionCacheTracker
>())
708 return ACT
->lookupAssumptionCache(F
);
712 return HotColdSplitting(PSI
, GBFI
, GTTI
, &GetORE
, LookupAC
).run(M
);
716 HotColdSplittingPass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
717 auto &FAM
= AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
719 auto LookupAC
= [&FAM
](Function
&F
) -> AssumptionCache
* {
720 return FAM
.getCachedResult
<AssumptionAnalysis
>(F
);
723 auto GBFI
= [&FAM
](Function
&F
) {
724 return &FAM
.getResult
<BlockFrequencyAnalysis
>(F
);
727 std::function
<TargetTransformInfo
&(Function
&)> GTTI
=
728 [&FAM
](Function
&F
) -> TargetTransformInfo
& {
729 return FAM
.getResult
<TargetIRAnalysis
>(F
);
732 std::unique_ptr
<OptimizationRemarkEmitter
> ORE
;
733 std::function
<OptimizationRemarkEmitter
&(Function
&)> GetORE
=
734 [&ORE
](Function
&F
) -> OptimizationRemarkEmitter
& {
735 ORE
.reset(new OptimizationRemarkEmitter(&F
));
739 ProfileSummaryInfo
*PSI
= &AM
.getResult
<ProfileSummaryAnalysis
>(M
);
741 if (HotColdSplitting(PSI
, GBFI
, GTTI
, &GetORE
, LookupAC
).run(M
))
742 return PreservedAnalyses::none();
743 return PreservedAnalyses::all();
746 char HotColdSplittingLegacyPass::ID
= 0;
747 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass
, "hotcoldsplit",
748 "Hot Cold Splitting", false, false)
749 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass
)
750 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass
)
751 INITIALIZE_PASS_END(HotColdSplittingLegacyPass
, "hotcoldsplit",
752 "Hot Cold Splitting", false, false)
754 ModulePass
*llvm::createHotColdSplittingPass() {
755 return new HotColdSplittingLegacyPass();