1 //===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass looks for safe point where the prologue and epilogue can be
11 // The safe point for the prologue (resp. epilogue) is called Save
13 // A point is safe for prologue (resp. epilogue) if and only if
14 // it 1) dominates (resp. post-dominates) all the frame related operations and
15 // between 2) two executions of the Save (resp. Restore) point there is an
16 // execution of the Restore (resp. Save) point.
18 // For instance, the following points are safe:
19 // for (int i = 0; i < 10; ++i) {
24 // Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
25 // And the following points are not:
26 // for (int i = 0; i < 10; ++i) {
30 // for (int i = 0; i < 10; ++i) {
34 // Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
36 // This pass also ensures that the safe points are 3) cheaper than the regular
37 // entry and exits blocks.
39 // Property #1 is ensured via the use of MachineDominatorTree and
40 // MachinePostDominatorTree.
41 // Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
42 // points must be in the same loop.
43 // Property #3 is ensured via the MachineBlockFrequencyInfo.
45 // If this pass found points matching all these properties, then
46 // MachineFrameInfo is updated with this information.
48 //===----------------------------------------------------------------------===//
50 #include "llvm/ADT/BitVector.h"
51 #include "llvm/ADT/PostOrderIterator.h"
52 #include "llvm/ADT/SetVector.h"
53 #include "llvm/ADT/SmallVector.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/CFG.h"
56 #include "llvm/CodeGen/MachineBasicBlock.h"
57 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
58 #include "llvm/CodeGen/MachineDominators.h"
59 #include "llvm/CodeGen/MachineFrameInfo.h"
60 #include "llvm/CodeGen/MachineFunction.h"
61 #include "llvm/CodeGen/MachineFunctionPass.h"
62 #include "llvm/CodeGen/MachineInstr.h"
63 #include "llvm/CodeGen/MachineLoopInfo.h"
64 #include "llvm/CodeGen/MachineOperand.h"
65 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
66 #include "llvm/CodeGen/MachinePostDominators.h"
67 #include "llvm/CodeGen/RegisterClassInfo.h"
68 #include "llvm/CodeGen/RegisterScavenging.h"
69 #include "llvm/CodeGen/TargetFrameLowering.h"
70 #include "llvm/CodeGen/TargetInstrInfo.h"
71 #include "llvm/CodeGen/TargetLowering.h"
72 #include "llvm/CodeGen/TargetRegisterInfo.h"
73 #include "llvm/CodeGen/TargetSubtargetInfo.h"
74 #include "llvm/IR/Attributes.h"
75 #include "llvm/IR/Function.h"
76 #include "llvm/MC/MCAsmInfo.h"
77 #include "llvm/Pass.h"
78 #include "llvm/Support/CommandLine.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/ErrorHandling.h"
81 #include "llvm/Support/raw_ostream.h"
82 #include "llvm/Target/TargetMachine.h"
89 #define DEBUG_TYPE "shrink-wrap"
91 STATISTIC(NumFunc
, "Number of functions");
92 STATISTIC(NumCandidates
, "Number of shrink-wrapping candidates");
93 STATISTIC(NumCandidatesDropped
,
94 "Number of shrink-wrapping candidates dropped because of frequency");
96 static cl::opt
<cl::boolOrDefault
>
97 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden
,
98 cl::desc("enable the shrink-wrapping pass"));
102 /// Class to determine where the safe point to insert the
103 /// prologue and epilogue are.
104 /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
105 /// shrink-wrapping term for prologue/epilogue placement, this pass
106 /// does not rely on expensive data-flow analysis. Instead we use the
107 /// dominance properties and loop information to decide which point
108 /// are safe for such insertion.
109 class ShrinkWrap
: public MachineFunctionPass
{
110 /// Hold callee-saved information.
111 RegisterClassInfo RCI
;
112 MachineDominatorTree
*MDT
;
113 MachinePostDominatorTree
*MPDT
;
115 /// Current safe point found for the prologue.
116 /// The prologue will be inserted before the first instruction
117 /// in this basic block.
118 MachineBasicBlock
*Save
;
120 /// Current safe point found for the epilogue.
121 /// The epilogue will be inserted before the first terminator instruction
122 /// in this basic block.
123 MachineBasicBlock
*Restore
;
125 /// Hold the information of the basic block frequency.
126 /// Use to check the profitability of the new points.
127 MachineBlockFrequencyInfo
*MBFI
;
129 /// Hold the loop information. Used to determine if Save and Restore
130 /// are in the same loop.
131 MachineLoopInfo
*MLI
;
134 MachineOptimizationRemarkEmitter
*ORE
= nullptr;
136 /// Frequency of the Entry block.
139 /// Current opcode for frame setup.
140 unsigned FrameSetupOpcode
;
142 /// Current opcode for frame destroy.
143 unsigned FrameDestroyOpcode
;
145 /// Stack pointer register, used by llvm.{savestack,restorestack}
149 const MachineBasicBlock
*Entry
;
151 using SetOfRegs
= SmallSetVector
<unsigned, 16>;
153 /// Registers that need to be saved for the current function.
154 mutable SetOfRegs CurrentCSRs
;
156 /// Current MachineFunction.
157 MachineFunction
*MachineFunc
;
159 /// Check if \p MI uses or defines a callee-saved register or
160 /// a frame index. If this is the case, this means \p MI must happen
161 /// after Save and before Restore.
162 bool useOrDefCSROrFI(const MachineInstr
&MI
, RegScavenger
*RS
) const;
164 const SetOfRegs
&getCurrentCSRs(RegScavenger
*RS
) const {
165 if (CurrentCSRs
.empty()) {
167 const TargetFrameLowering
*TFI
=
168 MachineFunc
->getSubtarget().getFrameLowering();
170 TFI
->determineCalleeSaves(*MachineFunc
, SavedRegs
, RS
);
172 for (int Reg
= SavedRegs
.find_first(); Reg
!= -1;
173 Reg
= SavedRegs
.find_next(Reg
))
174 CurrentCSRs
.insert((unsigned)Reg
);
179 /// Update the Save and Restore points such that \p MBB is in
180 /// the region that is dominated by Save and post-dominated by Restore
181 /// and Save and Restore still match the safe point definition.
182 /// Such point may not exist and Save and/or Restore may be null after
184 void updateSaveRestorePoints(MachineBasicBlock
&MBB
, RegScavenger
*RS
);
186 /// Initialize the pass for \p MF.
187 void init(MachineFunction
&MF
) {
188 RCI
.runOnMachineFunction(MF
);
189 MDT
= &getAnalysis
<MachineDominatorTree
>();
190 MPDT
= &getAnalysis
<MachinePostDominatorTree
>();
193 MBFI
= &getAnalysis
<MachineBlockFrequencyInfo
>();
194 MLI
= &getAnalysis
<MachineLoopInfo
>();
195 ORE
= &getAnalysis
<MachineOptimizationRemarkEmitterPass
>().getORE();
196 EntryFreq
= MBFI
->getEntryFreq();
197 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
198 const TargetInstrInfo
&TII
= *Subtarget
.getInstrInfo();
199 FrameSetupOpcode
= TII
.getCallFrameSetupOpcode();
200 FrameDestroyOpcode
= TII
.getCallFrameDestroyOpcode();
201 SP
= Subtarget
.getTargetLowering()->getStackPointerRegisterToSaveRestore();
209 /// Check whether or not Save and Restore points are still interesting for
211 bool ArePointsInteresting() const { return Save
!= Entry
&& Save
&& Restore
; }
213 /// Check if shrink wrapping is enabled for this target and function.
214 static bool isShrinkWrapEnabled(const MachineFunction
&MF
);
219 ShrinkWrap() : MachineFunctionPass(ID
) {
220 initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
223 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
224 AU
.setPreservesAll();
225 AU
.addRequired
<MachineBlockFrequencyInfo
>();
226 AU
.addRequired
<MachineDominatorTree
>();
227 AU
.addRequired
<MachinePostDominatorTree
>();
228 AU
.addRequired
<MachineLoopInfo
>();
229 AU
.addRequired
<MachineOptimizationRemarkEmitterPass
>();
230 MachineFunctionPass::getAnalysisUsage(AU
);
233 MachineFunctionProperties
getRequiredProperties() const override
{
234 return MachineFunctionProperties().set(
235 MachineFunctionProperties::Property::NoVRegs
);
238 StringRef
getPassName() const override
{ return "Shrink Wrapping analysis"; }
240 /// Perform the shrink-wrapping analysis and update
241 /// the MachineFrameInfo attached to \p MF with the results.
242 bool runOnMachineFunction(MachineFunction
&MF
) override
;
245 } // end anonymous namespace
247 char ShrinkWrap::ID
= 0;
249 char &llvm::ShrinkWrapID
= ShrinkWrap::ID
;
251 INITIALIZE_PASS_BEGIN(ShrinkWrap
, DEBUG_TYPE
, "Shrink Wrap Pass", false, false)
252 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo
)
253 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
254 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree
)
255 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
256 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass
)
257 INITIALIZE_PASS_END(ShrinkWrap
, DEBUG_TYPE
, "Shrink Wrap Pass", false, false)
259 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr
&MI
,
260 RegScavenger
*RS
) const {
261 // This prevents premature stack popping when occurs a indirect stack
262 // access. It is overly aggressive for the moment.
263 // TODO: - Obvious non-stack loads and store, such as global values,
264 // are known to not access the stack.
265 // - Further, data dependency and alias analysis can validate
266 // that load and stores never derive from the stack pointer.
267 if (MI
.mayLoadOrStore())
270 if (MI
.getOpcode() == FrameSetupOpcode
||
271 MI
.getOpcode() == FrameDestroyOpcode
) {
272 LLVM_DEBUG(dbgs() << "Frame instruction: " << MI
<< '\n');
275 for (const MachineOperand
&MO
: MI
.operands()) {
276 bool UseOrDefCSR
= false;
278 // Ignore instructions like DBG_VALUE which don't read/def the register.
279 if (!MO
.isDef() && !MO
.readsReg())
281 Register PhysReg
= MO
.getReg();
284 assert(Register::isPhysicalRegister(PhysReg
) && "Unallocated register?!");
285 // The stack pointer is not normally described as a callee-saved register
286 // in calling convention definitions, so we need to watch for it
287 // separately. An SP mentioned by a call instruction, we can ignore,
288 // though, as it's harmless and we do not want to effectively disable tail
289 // calls by forcing the restore point to post-dominate them.
290 UseOrDefCSR
= (!MI
.isCall() && PhysReg
== SP
) ||
291 RCI
.getLastCalleeSavedAlias(PhysReg
);
292 } else if (MO
.isRegMask()) {
293 // Check if this regmask clobbers any of the CSRs.
294 for (unsigned Reg
: getCurrentCSRs(RS
)) {
295 if (MO
.clobbersPhysReg(Reg
)) {
301 // Skip FrameIndex operands in DBG_VALUE instructions.
302 if (UseOrDefCSR
|| (MO
.isFI() && !MI
.isDebugValue())) {
303 LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR
<< ") or FI("
304 << MO
.isFI() << "): " << MI
<< '\n');
311 /// Helper function to find the immediate (post) dominator.
312 template <typename ListOfBBs
, typename DominanceAnalysis
>
313 static MachineBasicBlock
*FindIDom(MachineBasicBlock
&Block
, ListOfBBs BBs
,
314 DominanceAnalysis
&Dom
) {
315 MachineBasicBlock
*IDom
= &Block
;
316 for (MachineBasicBlock
*BB
: BBs
) {
317 IDom
= Dom
.findNearestCommonDominator(IDom
, BB
);
326 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock
&MBB
,
328 // Get rid of the easy cases first.
332 Save
= MDT
->findNearestCommonDominator(Save
, &MBB
);
335 LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
341 else if (MPDT
->getNode(&MBB
)) // If the block is not in the post dom tree, it
342 // means the block never returns. If that's the
343 // case, we don't want to call
344 // `findNearestCommonDominator`, which will
346 Restore
= MPDT
->findNearestCommonDominator(Restore
, &MBB
);
348 Restore
= nullptr; // Abort, we can't find a restore point in this case.
350 // Make sure we would be able to insert the restore code before the
352 if (Restore
== &MBB
) {
353 for (const MachineInstr
&Terminator
: MBB
.terminators()) {
354 if (!useOrDefCSROrFI(Terminator
, RS
))
356 // One of the terminator needs to happen before the restore point.
357 if (MBB
.succ_empty()) {
358 Restore
= nullptr; // Abort, we can't find a restore point in this case.
361 // Look for a restore point that post-dominates all the successors.
362 // The immediate post-dominator is what we are looking for.
363 Restore
= FindIDom
<>(*Restore
, Restore
->successors(), *MPDT
);
370 dbgs() << "Restore point needs to be spanned on several blocks\n");
374 // Make sure Save and Restore are suitable for shrink-wrapping:
375 // 1. all path from Save needs to lead to Restore before exiting.
376 // 2. all path to Restore needs to go through Save from Entry.
377 // We achieve that by making sure that:
378 // A. Save dominates Restore.
379 // B. Restore post-dominates Save.
380 // C. Save and Restore are in the same loop.
381 bool SaveDominatesRestore
= false;
382 bool RestorePostDominatesSave
= false;
383 while (Save
&& Restore
&&
384 (!(SaveDominatesRestore
= MDT
->dominates(Save
, Restore
)) ||
385 !(RestorePostDominatesSave
= MPDT
->dominates(Restore
, Save
)) ||
386 // Post-dominance is not enough in loops to ensure that all uses/defs
387 // are after the prologue and before the epilogue at runtime.
396 // All the uses/defs of CSRs are dominated by Save and post-dominated
397 // by Restore. However, the CSRs uses are still reachable after
398 // Restore and before Save are executed.
400 // For now, just push the restore/save points outside of loops.
401 // FIXME: Refine the criteria to still find interesting cases
403 MLI
->getLoopFor(Save
) || MLI
->getLoopFor(Restore
))) {
405 if (!SaveDominatesRestore
) {
406 Save
= MDT
->findNearestCommonDominator(Save
, Restore
);
410 if (!RestorePostDominatesSave
)
411 Restore
= MPDT
->findNearestCommonDominator(Restore
, Save
);
414 if (Save
&& Restore
&&
415 (MLI
->getLoopFor(Save
) || MLI
->getLoopFor(Restore
))) {
416 if (MLI
->getLoopDepth(Save
) > MLI
->getLoopDepth(Restore
)) {
417 // Push Save outside of this loop if immediate dominator is different
418 // from save block. If immediate dominator is not different, bail out.
419 Save
= FindIDom
<>(*Save
, Save
->predecessors(), *MDT
);
423 // If the loop does not exit, there is no point in looking
424 // for a post-dominator outside the loop.
425 SmallVector
<MachineBasicBlock
*, 4> ExitBlocks
;
426 MLI
->getLoopFor(Restore
)->getExitingBlocks(ExitBlocks
);
427 // Push Restore outside of this loop.
428 // Look for the immediate post-dominator of the loop exits.
429 MachineBasicBlock
*IPdom
= Restore
;
430 for (MachineBasicBlock
*LoopExitBB
: ExitBlocks
) {
431 IPdom
= FindIDom
<>(*IPdom
, LoopExitBB
->successors(), *MPDT
);
435 // If the immediate post-dominator is not in a less nested loop,
436 // then we are stuck in a program with an infinite loop.
437 // In that case, we will not find a safe point, hence, bail out.
438 if (IPdom
&& MLI
->getLoopDepth(IPdom
) < MLI
->getLoopDepth(Restore
))
449 static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter
*ORE
,
450 StringRef RemarkName
, StringRef RemarkMessage
,
451 const DiagnosticLocation
&Loc
,
452 const MachineBasicBlock
*MBB
) {
454 return MachineOptimizationRemarkMissed(DEBUG_TYPE
, RemarkName
, Loc
, MBB
)
458 LLVM_DEBUG(dbgs() << RemarkMessage
<< '\n');
462 bool ShrinkWrap::runOnMachineFunction(MachineFunction
&MF
) {
463 if (skipFunction(MF
.getFunction()) || MF
.empty() || !isShrinkWrapEnabled(MF
))
466 LLVM_DEBUG(dbgs() << "**** Analysing " << MF
.getName() << '\n');
470 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
471 if (containsIrreducibleCFG
<MachineBasicBlock
*>(RPOT
, *MLI
)) {
472 // If MF is irreducible, a block may be in a loop without
473 // MachineLoopInfo reporting it. I.e., we may use the
474 // post-dominance property in loops, which lead to incorrect
475 // results. Moreover, we may miss that the prologue and
476 // epilogue are not in the same loop, leading to unbalanced
477 // construction/deconstruction of the stack frame.
478 return giveUpWithRemarks(ORE
, "UnsupportedIrreducibleCFG",
479 "Irreducible CFGs are not supported yet.",
480 MF
.getFunction().getSubprogram(), &MF
.front());
483 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
484 std::unique_ptr
<RegScavenger
> RS(
485 TRI
->requiresRegisterScavenging(MF
) ? new RegScavenger() : nullptr);
487 for (MachineBasicBlock
&MBB
: MF
) {
488 LLVM_DEBUG(dbgs() << "Look into: " << MBB
.getNumber() << ' '
489 << MBB
.getName() << '\n');
491 if (MBB
.isEHFuncletEntry())
492 return giveUpWithRemarks(ORE
, "UnsupportedEHFunclets",
493 "EH Funclets are not supported yet.",
494 MBB
.front().getDebugLoc(), &MBB
);
497 // Push the prologue and epilogue outside of
498 // the region that may throw by making sure
499 // that all the landing pads are at least at the
500 // boundary of the save and restore points.
501 // The problem with exceptions is that the throw
502 // is not properly modeled and in particular, a
503 // basic block can jump out from the middle.
504 updateSaveRestorePoints(MBB
, RS
.get());
505 if (!ArePointsInteresting()) {
506 LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n");
512 for (const MachineInstr
&MI
: MBB
) {
513 if (!useOrDefCSROrFI(MI
, RS
.get()))
515 // Save (resp. restore) point must dominate (resp. post dominate)
516 // MI. Look for the proper basic block for those.
517 updateSaveRestorePoints(MBB
, RS
.get());
518 // If we are at a point where we cannot improve the placement of
519 // save/restore instructions, just give up.
520 if (!ArePointsInteresting()) {
521 LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
524 // No need to look for other instructions, this basic block
525 // will already be part of the handled region.
529 if (!ArePointsInteresting()) {
530 // If the points are not interesting at this point, then they must be null
531 // because it means we did not encounter any frame/CSR related code.
532 // Otherwise, we would have returned from the previous loop.
533 assert(!Save
&& !Restore
&& "We miss a shrink-wrap opportunity?!");
534 LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
538 LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
541 const TargetFrameLowering
*TFI
= MF
.getSubtarget().getFrameLowering();
543 LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
544 << Save
->getNumber() << ' ' << Save
->getName() << ' '
545 << MBFI
->getBlockFreq(Save
).getFrequency()
546 << "\nRestore: " << Restore
->getNumber() << ' '
547 << Restore
->getName() << ' '
548 << MBFI
->getBlockFreq(Restore
).getFrequency() << '\n');
550 bool IsSaveCheap
, TargetCanUseSaveAsPrologue
= false;
551 if (((IsSaveCheap
= EntryFreq
>= MBFI
->getBlockFreq(Save
).getFrequency()) &&
552 EntryFreq
>= MBFI
->getBlockFreq(Restore
).getFrequency()) &&
553 ((TargetCanUseSaveAsPrologue
= TFI
->canUseAsPrologue(*Save
)) &&
554 TFI
->canUseAsEpilogue(*Restore
)))
557 dbgs() << "New points are too expensive or invalid for the target\n");
558 MachineBasicBlock
*NewBB
;
559 if (!IsSaveCheap
|| !TargetCanUseSaveAsPrologue
) {
560 Save
= FindIDom
<>(*Save
, Save
->predecessors(), *MDT
);
565 // Restore is expensive.
566 Restore
= FindIDom
<>(*Restore
, Restore
->successors(), *MPDT
);
571 updateSaveRestorePoints(*NewBB
, RS
.get());
572 } while (Save
&& Restore
);
574 if (!ArePointsInteresting()) {
575 ++NumCandidatesDropped
;
579 LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
580 << Save
->getNumber() << ' ' << Save
->getName()
581 << "\nRestore: " << Restore
->getNumber() << ' '
582 << Restore
->getName() << '\n');
584 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
585 MFI
.setSavePoint(Save
);
586 MFI
.setRestorePoint(Restore
);
591 bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction
&MF
) {
592 const TargetFrameLowering
*TFI
= MF
.getSubtarget().getFrameLowering();
594 switch (EnableShrinkWrapOpt
) {
596 return TFI
->enableShrinkWrapping(MF
) &&
597 // Windows with CFI has some limitations that make it impossible
598 // to use shrink-wrapping.
599 !MF
.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
600 // Sanitizers look at the value of the stack at the location
601 // of the crash. Since a crash can happen anywhere, the
602 // frame must be lowered before anything else happen for the
603 // sanitizers to be able to get a correct stack frame.
604 !(MF
.getFunction().hasFnAttribute(Attribute::SanitizeAddress
) ||
605 MF
.getFunction().hasFnAttribute(Attribute::SanitizeThread
) ||
606 MF
.getFunction().hasFnAttribute(Attribute::SanitizeMemory
) ||
607 MF
.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress
));
608 // If EnableShrinkWrap is set, it takes precedence on whatever the
609 // target sets. The rational is that we assume we want to test
610 // something related to shrink-wrapping.
616 llvm_unreachable("Invalid shrink-wrapping state");