1 //===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass looks for safe point where the prologue and epilogue can be
12 // The safe point for the prologue (resp. epilogue) is called Save
14 // A point is safe for prologue (resp. epilogue) if and only if
15 // it 1) dominates (resp. post-dominates) all the frame related operations and
16 // between 2) two executions of the Save (resp. Restore) point there is an
17 // execution of the Restore (resp. Save) point.
19 // For instance, the following points are safe:
20 // for (int i = 0; i < 10; ++i) {
25 // Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
26 // And the following points are not:
27 // for (int i = 0; i < 10; ++i) {
31 // for (int i = 0; i < 10; ++i) {
35 // Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
37 // This pass also ensures that the safe points are 3) cheaper than the regular
38 // entry and exits blocks.
40 // Property #1 is ensured via the use of MachineDominatorTree and
41 // MachinePostDominatorTree.
42 // Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
43 // points must be in the same loop.
44 // Property #3 is ensured via the MachineBlockFrequencyInfo.
46 // If this pass found points matching all these properties, then
47 // MachineFrameInfo is updated with this information.
49 //===----------------------------------------------------------------------===//
51 #include "llvm/ADT/BitVector.h"
52 #include "llvm/ADT/PostOrderIterator.h"
53 #include "llvm/ADT/SetVector.h"
54 #include "llvm/ADT/SmallVector.h"
55 #include "llvm/ADT/Statistic.h"
56 #include "llvm/Analysis/CFG.h"
57 #include "llvm/CodeGen/MachineBasicBlock.h"
58 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
59 #include "llvm/CodeGen/MachineDominators.h"
60 #include "llvm/CodeGen/MachineFrameInfo.h"
61 #include "llvm/CodeGen/MachineFunction.h"
62 #include "llvm/CodeGen/MachineFunctionPass.h"
63 #include "llvm/CodeGen/MachineInstr.h"
64 #include "llvm/CodeGen/MachineLoopInfo.h"
65 #include "llvm/CodeGen/MachineOperand.h"
66 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
67 #include "llvm/CodeGen/MachinePostDominators.h"
68 #include "llvm/CodeGen/RegisterClassInfo.h"
69 #include "llvm/CodeGen/RegisterScavenging.h"
70 #include "llvm/CodeGen/TargetFrameLowering.h"
71 #include "llvm/CodeGen/TargetInstrInfo.h"
72 #include "llvm/CodeGen/TargetLowering.h"
73 #include "llvm/CodeGen/TargetRegisterInfo.h"
74 #include "llvm/CodeGen/TargetSubtargetInfo.h"
75 #include "llvm/IR/Attributes.h"
76 #include "llvm/IR/Function.h"
77 #include "llvm/MC/MCAsmInfo.h"
78 #include "llvm/Pass.h"
79 #include "llvm/Support/CommandLine.h"
80 #include "llvm/Support/Debug.h"
81 #include "llvm/Support/ErrorHandling.h"
82 #include "llvm/Support/raw_ostream.h"
83 #include "llvm/Target/TargetMachine.h"
90 #define DEBUG_TYPE "shrink-wrap"
92 STATISTIC(NumFunc
, "Number of functions");
93 STATISTIC(NumCandidates
, "Number of shrink-wrapping candidates");
94 STATISTIC(NumCandidatesDropped
,
95 "Number of shrink-wrapping candidates dropped because of frequency");
97 static cl::opt
<cl::boolOrDefault
>
98 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden
,
99 cl::desc("enable the shrink-wrapping pass"));
103 /// Class to determine where the safe point to insert the
104 /// prologue and epilogue are.
105 /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
106 /// shrink-wrapping term for prologue/epilogue placement, this pass
107 /// does not rely on expensive data-flow analysis. Instead we use the
108 /// dominance properties and loop information to decide which point
109 /// are safe for such insertion.
110 class ShrinkWrap
: public MachineFunctionPass
{
111 /// Hold callee-saved information.
112 RegisterClassInfo RCI
;
113 MachineDominatorTree
*MDT
;
114 MachinePostDominatorTree
*MPDT
;
116 /// Current safe point found for the prologue.
117 /// The prologue will be inserted before the first instruction
118 /// in this basic block.
119 MachineBasicBlock
*Save
;
121 /// Current safe point found for the epilogue.
122 /// The epilogue will be inserted before the first terminator instruction
123 /// in this basic block.
124 MachineBasicBlock
*Restore
;
126 /// Hold the information of the basic block frequency.
127 /// Use to check the profitability of the new points.
128 MachineBlockFrequencyInfo
*MBFI
;
130 /// Hold the loop information. Used to determine if Save and Restore
131 /// are in the same loop.
132 MachineLoopInfo
*MLI
;
135 MachineOptimizationRemarkEmitter
*ORE
= nullptr;
137 /// Frequency of the Entry block.
140 /// Current opcode for frame setup.
141 unsigned FrameSetupOpcode
;
143 /// Current opcode for frame destroy.
144 unsigned FrameDestroyOpcode
;
146 /// Stack pointer register, used by llvm.{savestack,restorestack}
150 const MachineBasicBlock
*Entry
;
152 using SetOfRegs
= SmallSetVector
<unsigned, 16>;
154 /// Registers that need to be saved for the current function.
155 mutable SetOfRegs CurrentCSRs
;
157 /// Current MachineFunction.
158 MachineFunction
*MachineFunc
;
160 /// Check if \p MI uses or defines a callee-saved register or
161 /// a frame index. If this is the case, this means \p MI must happen
162 /// after Save and before Restore.
163 bool useOrDefCSROrFI(const MachineInstr
&MI
, RegScavenger
*RS
) const;
165 const SetOfRegs
&getCurrentCSRs(RegScavenger
*RS
) const {
166 if (CurrentCSRs
.empty()) {
168 const TargetFrameLowering
*TFI
=
169 MachineFunc
->getSubtarget().getFrameLowering();
171 TFI
->determineCalleeSaves(*MachineFunc
, SavedRegs
, RS
);
173 for (int Reg
= SavedRegs
.find_first(); Reg
!= -1;
174 Reg
= SavedRegs
.find_next(Reg
))
175 CurrentCSRs
.insert((unsigned)Reg
);
180 /// Update the Save and Restore points such that \p MBB is in
181 /// the region that is dominated by Save and post-dominated by Restore
182 /// and Save and Restore still match the safe point definition.
183 /// Such point may not exist and Save and/or Restore may be null after
185 void updateSaveRestorePoints(MachineBasicBlock
&MBB
, RegScavenger
*RS
);
187 /// Initialize the pass for \p MF.
188 void init(MachineFunction
&MF
) {
189 RCI
.runOnMachineFunction(MF
);
190 MDT
= &getAnalysis
<MachineDominatorTree
>();
191 MPDT
= &getAnalysis
<MachinePostDominatorTree
>();
194 MBFI
= &getAnalysis
<MachineBlockFrequencyInfo
>();
195 MLI
= &getAnalysis
<MachineLoopInfo
>();
196 ORE
= &getAnalysis
<MachineOptimizationRemarkEmitterPass
>().getORE();
197 EntryFreq
= MBFI
->getEntryFreq();
198 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
199 const TargetInstrInfo
&TII
= *Subtarget
.getInstrInfo();
200 FrameSetupOpcode
= TII
.getCallFrameSetupOpcode();
201 FrameDestroyOpcode
= TII
.getCallFrameDestroyOpcode();
202 SP
= Subtarget
.getTargetLowering()->getStackPointerRegisterToSaveRestore();
210 /// Check whether or not Save and Restore points are still interesting for
212 bool ArePointsInteresting() const { return Save
!= Entry
&& Save
&& Restore
; }
214 /// Check if shrink wrapping is enabled for this target and function.
215 static bool isShrinkWrapEnabled(const MachineFunction
&MF
);
220 ShrinkWrap() : MachineFunctionPass(ID
) {
221 initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
224 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
225 AU
.setPreservesAll();
226 AU
.addRequired
<MachineBlockFrequencyInfo
>();
227 AU
.addRequired
<MachineDominatorTree
>();
228 AU
.addRequired
<MachinePostDominatorTree
>();
229 AU
.addRequired
<MachineLoopInfo
>();
230 AU
.addRequired
<MachineOptimizationRemarkEmitterPass
>();
231 MachineFunctionPass::getAnalysisUsage(AU
);
234 MachineFunctionProperties
getRequiredProperties() const override
{
235 return MachineFunctionProperties().set(
236 MachineFunctionProperties::Property::NoVRegs
);
239 StringRef
getPassName() const override
{ return "Shrink Wrapping analysis"; }
241 /// Perform the shrink-wrapping analysis and update
242 /// the MachineFrameInfo attached to \p MF with the results.
243 bool runOnMachineFunction(MachineFunction
&MF
) override
;
246 } // end anonymous namespace
248 char ShrinkWrap::ID
= 0;
250 char &llvm::ShrinkWrapID
= ShrinkWrap::ID
;
252 INITIALIZE_PASS_BEGIN(ShrinkWrap
, DEBUG_TYPE
, "Shrink Wrap Pass", false, false)
253 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo
)
254 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
255 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree
)
256 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
257 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass
)
258 INITIALIZE_PASS_END(ShrinkWrap
, DEBUG_TYPE
, "Shrink Wrap Pass", false, false)
260 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr
&MI
,
261 RegScavenger
*RS
) const {
262 if (MI
.getOpcode() == FrameSetupOpcode
||
263 MI
.getOpcode() == FrameDestroyOpcode
) {
264 LLVM_DEBUG(dbgs() << "Frame instruction: " << MI
<< '\n');
267 for (const MachineOperand
&MO
: MI
.operands()) {
268 bool UseOrDefCSR
= false;
270 // Ignore instructions like DBG_VALUE which don't read/def the register.
271 if (!MO
.isDef() && !MO
.readsReg())
273 unsigned PhysReg
= MO
.getReg();
276 assert(TargetRegisterInfo::isPhysicalRegister(PhysReg
) &&
277 "Unallocated register?!");
278 // The stack pointer is not normally described as a callee-saved register
279 // in calling convention definitions, so we need to watch for it
280 // separately. An SP mentioned by a call instruction, we can ignore,
281 // though, as it's harmless and we do not want to effectively disable tail
282 // calls by forcing the restore point to post-dominate them.
283 UseOrDefCSR
= (!MI
.isCall() && PhysReg
== SP
) ||
284 RCI
.getLastCalleeSavedAlias(PhysReg
);
285 } else if (MO
.isRegMask()) {
286 // Check if this regmask clobbers any of the CSRs.
287 for (unsigned Reg
: getCurrentCSRs(RS
)) {
288 if (MO
.clobbersPhysReg(Reg
)) {
294 // Skip FrameIndex operands in DBG_VALUE instructions.
295 if (UseOrDefCSR
|| (MO
.isFI() && !MI
.isDebugValue())) {
296 LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR
<< ") or FI("
297 << MO
.isFI() << "): " << MI
<< '\n');
304 /// Helper function to find the immediate (post) dominator.
305 template <typename ListOfBBs
, typename DominanceAnalysis
>
306 static MachineBasicBlock
*FindIDom(MachineBasicBlock
&Block
, ListOfBBs BBs
,
307 DominanceAnalysis
&Dom
) {
308 MachineBasicBlock
*IDom
= &Block
;
309 for (MachineBasicBlock
*BB
: BBs
) {
310 IDom
= Dom
.findNearestCommonDominator(IDom
, BB
);
319 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock
&MBB
,
321 // Get rid of the easy cases first.
325 Save
= MDT
->findNearestCommonDominator(Save
, &MBB
);
328 LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
334 else if (MPDT
->getNode(&MBB
)) // If the block is not in the post dom tree, it
335 // means the block never returns. If that's the
336 // case, we don't want to call
337 // `findNearestCommonDominator`, which will
339 Restore
= MPDT
->findNearestCommonDominator(Restore
, &MBB
);
341 Restore
= nullptr; // Abort, we can't find a restore point in this case.
343 // Make sure we would be able to insert the restore code before the
345 if (Restore
== &MBB
) {
346 for (const MachineInstr
&Terminator
: MBB
.terminators()) {
347 if (!useOrDefCSROrFI(Terminator
, RS
))
349 // One of the terminator needs to happen before the restore point.
350 if (MBB
.succ_empty()) {
351 Restore
= nullptr; // Abort, we can't find a restore point in this case.
354 // Look for a restore point that post-dominates all the successors.
355 // The immediate post-dominator is what we are looking for.
356 Restore
= FindIDom
<>(*Restore
, Restore
->successors(), *MPDT
);
363 dbgs() << "Restore point needs to be spanned on several blocks\n");
367 // Make sure Save and Restore are suitable for shrink-wrapping:
368 // 1. all path from Save needs to lead to Restore before exiting.
369 // 2. all path to Restore needs to go through Save from Entry.
370 // We achieve that by making sure that:
371 // A. Save dominates Restore.
372 // B. Restore post-dominates Save.
373 // C. Save and Restore are in the same loop.
374 bool SaveDominatesRestore
= false;
375 bool RestorePostDominatesSave
= false;
376 while (Save
&& Restore
&&
377 (!(SaveDominatesRestore
= MDT
->dominates(Save
, Restore
)) ||
378 !(RestorePostDominatesSave
= MPDT
->dominates(Restore
, Save
)) ||
379 // Post-dominance is not enough in loops to ensure that all uses/defs
380 // are after the prologue and before the epilogue at runtime.
389 // All the uses/defs of CSRs are dominated by Save and post-dominated
390 // by Restore. However, the CSRs uses are still reachable after
391 // Restore and before Save are executed.
393 // For now, just push the restore/save points outside of loops.
394 // FIXME: Refine the criteria to still find interesting cases
396 MLI
->getLoopFor(Save
) || MLI
->getLoopFor(Restore
))) {
398 if (!SaveDominatesRestore
) {
399 Save
= MDT
->findNearestCommonDominator(Save
, Restore
);
403 if (!RestorePostDominatesSave
)
404 Restore
= MPDT
->findNearestCommonDominator(Restore
, Save
);
407 if (Save
&& Restore
&&
408 (MLI
->getLoopFor(Save
) || MLI
->getLoopFor(Restore
))) {
409 if (MLI
->getLoopDepth(Save
) > MLI
->getLoopDepth(Restore
)) {
410 // Push Save outside of this loop if immediate dominator is different
411 // from save block. If immediate dominator is not different, bail out.
412 Save
= FindIDom
<>(*Save
, Save
->predecessors(), *MDT
);
416 // If the loop does not exit, there is no point in looking
417 // for a post-dominator outside the loop.
418 SmallVector
<MachineBasicBlock
*, 4> ExitBlocks
;
419 MLI
->getLoopFor(Restore
)->getExitingBlocks(ExitBlocks
);
420 // Push Restore outside of this loop.
421 // Look for the immediate post-dominator of the loop exits.
422 MachineBasicBlock
*IPdom
= Restore
;
423 for (MachineBasicBlock
*LoopExitBB
: ExitBlocks
) {
424 IPdom
= FindIDom
<>(*IPdom
, LoopExitBB
->successors(), *MPDT
);
428 // If the immediate post-dominator is not in a less nested loop,
429 // then we are stuck in a program with an infinite loop.
430 // In that case, we will not find a safe point, hence, bail out.
431 if (IPdom
&& MLI
->getLoopDepth(IPdom
) < MLI
->getLoopDepth(Restore
))
442 static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter
*ORE
,
443 StringRef RemarkName
, StringRef RemarkMessage
,
444 const DiagnosticLocation
&Loc
,
445 const MachineBasicBlock
*MBB
) {
447 return MachineOptimizationRemarkMissed(DEBUG_TYPE
, RemarkName
, Loc
, MBB
)
451 LLVM_DEBUG(dbgs() << RemarkMessage
<< '\n');
455 bool ShrinkWrap::runOnMachineFunction(MachineFunction
&MF
) {
456 if (skipFunction(MF
.getFunction()) || MF
.empty() || !isShrinkWrapEnabled(MF
))
459 LLVM_DEBUG(dbgs() << "**** Analysing " << MF
.getName() << '\n');
463 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
464 if (containsIrreducibleCFG
<MachineBasicBlock
*>(RPOT
, *MLI
)) {
465 // If MF is irreducible, a block may be in a loop without
466 // MachineLoopInfo reporting it. I.e., we may use the
467 // post-dominance property in loops, which lead to incorrect
468 // results. Moreover, we may miss that the prologue and
469 // epilogue are not in the same loop, leading to unbalanced
470 // construction/deconstruction of the stack frame.
471 return giveUpWithRemarks(ORE
, "UnsupportedIrreducibleCFG",
472 "Irreducible CFGs are not supported yet.",
473 MF
.getFunction().getSubprogram(), &MF
.front());
476 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
477 std::unique_ptr
<RegScavenger
> RS(
478 TRI
->requiresRegisterScavenging(MF
) ? new RegScavenger() : nullptr);
480 for (MachineBasicBlock
&MBB
: MF
) {
481 LLVM_DEBUG(dbgs() << "Look into: " << MBB
.getNumber() << ' '
482 << MBB
.getName() << '\n');
484 if (MBB
.isEHFuncletEntry())
485 return giveUpWithRemarks(ORE
, "UnsupportedEHFunclets",
486 "EH Funclets are not supported yet.",
487 MBB
.front().getDebugLoc(), &MBB
);
490 // Push the prologue and epilogue outside of
491 // the region that may throw by making sure
492 // that all the landing pads are at least at the
493 // boundary of the save and restore points.
494 // The problem with exceptions is that the throw
495 // is not properly modeled and in particular, a
496 // basic block can jump out from the middle.
497 updateSaveRestorePoints(MBB
, RS
.get());
498 if (!ArePointsInteresting()) {
499 LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n");
505 for (const MachineInstr
&MI
: MBB
) {
506 if (!useOrDefCSROrFI(MI
, RS
.get()))
508 // Save (resp. restore) point must dominate (resp. post dominate)
509 // MI. Look for the proper basic block for those.
510 updateSaveRestorePoints(MBB
, RS
.get());
511 // If we are at a point where we cannot improve the placement of
512 // save/restore instructions, just give up.
513 if (!ArePointsInteresting()) {
514 LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
517 // No need to look for other instructions, this basic block
518 // will already be part of the handled region.
522 if (!ArePointsInteresting()) {
523 // If the points are not interesting at this point, then they must be null
524 // because it means we did not encounter any frame/CSR related code.
525 // Otherwise, we would have returned from the previous loop.
526 assert(!Save
&& !Restore
&& "We miss a shrink-wrap opportunity?!");
527 LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
531 LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
534 const TargetFrameLowering
*TFI
= MF
.getSubtarget().getFrameLowering();
536 LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
537 << Save
->getNumber() << ' ' << Save
->getName() << ' '
538 << MBFI
->getBlockFreq(Save
).getFrequency()
539 << "\nRestore: " << Restore
->getNumber() << ' '
540 << Restore
->getName() << ' '
541 << MBFI
->getBlockFreq(Restore
).getFrequency() << '\n');
543 bool IsSaveCheap
, TargetCanUseSaveAsPrologue
= false;
544 if (((IsSaveCheap
= EntryFreq
>= MBFI
->getBlockFreq(Save
).getFrequency()) &&
545 EntryFreq
>= MBFI
->getBlockFreq(Restore
).getFrequency()) &&
546 ((TargetCanUseSaveAsPrologue
= TFI
->canUseAsPrologue(*Save
)) &&
547 TFI
->canUseAsEpilogue(*Restore
)))
550 dbgs() << "New points are too expensive or invalid for the target\n");
551 MachineBasicBlock
*NewBB
;
552 if (!IsSaveCheap
|| !TargetCanUseSaveAsPrologue
) {
553 Save
= FindIDom
<>(*Save
, Save
->predecessors(), *MDT
);
558 // Restore is expensive.
559 Restore
= FindIDom
<>(*Restore
, Restore
->successors(), *MPDT
);
564 updateSaveRestorePoints(*NewBB
, RS
.get());
565 } while (Save
&& Restore
);
567 if (!ArePointsInteresting()) {
568 ++NumCandidatesDropped
;
572 LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
573 << Save
->getNumber() << ' ' << Save
->getName()
574 << "\nRestore: " << Restore
->getNumber() << ' '
575 << Restore
->getName() << '\n');
577 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
578 MFI
.setSavePoint(Save
);
579 MFI
.setRestorePoint(Restore
);
584 bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction
&MF
) {
585 const TargetFrameLowering
*TFI
= MF
.getSubtarget().getFrameLowering();
587 switch (EnableShrinkWrapOpt
) {
589 return TFI
->enableShrinkWrapping(MF
) &&
590 // Windows with CFI has some limitations that make it impossible
591 // to use shrink-wrapping.
592 !MF
.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
593 // Sanitizers look at the value of the stack at the location
594 // of the crash. Since a crash can happen anywhere, the
595 // frame must be lowered before anything else happen for the
596 // sanitizers to be able to get a correct stack frame.
597 !(MF
.getFunction().hasFnAttribute(Attribute::SanitizeAddress
) ||
598 MF
.getFunction().hasFnAttribute(Attribute::SanitizeThread
) ||
599 MF
.getFunction().hasFnAttribute(Attribute::SanitizeMemory
) ||
600 MF
.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress
));
601 // If EnableShrinkWrap is set, it takes precedence on whatever the
602 // target sets. The rational is that we assume we want to test
603 // something related to shrink-wrapping.
609 llvm_unreachable("Invalid shrink-wrapping state");