1 //===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass looks for safe point where the prologue and epilogue can be
11 // The safe point for the prologue (resp. epilogue) is called Save
13 // A point is safe for prologue (resp. epilogue) if and only if
14 // it 1) dominates (resp. post-dominates) all the frame related operations and
15 // between 2) two executions of the Save (resp. Restore) point there is an
16 // execution of the Restore (resp. Save) point.
18 // For instance, the following points are safe:
19 // for (int i = 0; i < 10; ++i) {
24 // Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
25 // And the following points are not:
26 // for (int i = 0; i < 10; ++i) {
30 // for (int i = 0; i < 10; ++i) {
34 // Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
36 // This pass also ensures that the safe points are 3) cheaper than the regular
37 // entry and exits blocks.
39 // Property #1 is ensured via the use of MachineDominatorTree and
40 // MachinePostDominatorTree.
41 // Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
42 // points must be in the same loop.
43 // Property #3 is ensured via the MachineBlockFrequencyInfo.
45 // If this pass found points matching all these properties, then
46 // MachineFrameInfo is updated with this information.
48 //===----------------------------------------------------------------------===//
50 #include "llvm/ADT/BitVector.h"
51 #include "llvm/ADT/PostOrderIterator.h"
52 #include "llvm/ADT/SetVector.h"
53 #include "llvm/ADT/SmallVector.h"
54 #include "llvm/ADT/Statistic.h"
55 #include "llvm/Analysis/CFG.h"
56 #include "llvm/CodeGen/MachineBasicBlock.h"
57 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
58 #include "llvm/CodeGen/MachineDominators.h"
59 #include "llvm/CodeGen/MachineFrameInfo.h"
60 #include "llvm/CodeGen/MachineFunction.h"
61 #include "llvm/CodeGen/MachineFunctionPass.h"
62 #include "llvm/CodeGen/MachineInstr.h"
63 #include "llvm/CodeGen/MachineLoopInfo.h"
64 #include "llvm/CodeGen/MachineOperand.h"
65 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
66 #include "llvm/CodeGen/MachinePostDominators.h"
67 #include "llvm/CodeGen/RegisterClassInfo.h"
68 #include "llvm/CodeGen/RegisterScavenging.h"
69 #include "llvm/CodeGen/TargetFrameLowering.h"
70 #include "llvm/CodeGen/TargetInstrInfo.h"
71 #include "llvm/CodeGen/TargetLowering.h"
72 #include "llvm/CodeGen/TargetRegisterInfo.h"
73 #include "llvm/CodeGen/TargetSubtargetInfo.h"
74 #include "llvm/IR/Attributes.h"
75 #include "llvm/IR/Function.h"
76 #include "llvm/MC/MCAsmInfo.h"
77 #include "llvm/Pass.h"
78 #include "llvm/Support/CommandLine.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/ErrorHandling.h"
81 #include "llvm/Support/raw_ostream.h"
82 #include "llvm/Target/TargetMachine.h"
89 #define DEBUG_TYPE "shrink-wrap"
91 STATISTIC(NumFunc
, "Number of functions");
92 STATISTIC(NumCandidates
, "Number of shrink-wrapping candidates");
93 STATISTIC(NumCandidatesDropped
,
94 "Number of shrink-wrapping candidates dropped because of frequency");
96 static cl::opt
<cl::boolOrDefault
>
97 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden
,
98 cl::desc("enable the shrink-wrapping pass"));
102 /// Class to determine where the safe point to insert the
103 /// prologue and epilogue are.
104 /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
105 /// shrink-wrapping term for prologue/epilogue placement, this pass
106 /// does not rely on expensive data-flow analysis. Instead we use the
107 /// dominance properties and loop information to decide which point
108 /// are safe for such insertion.
109 class ShrinkWrap
: public MachineFunctionPass
{
110 /// Hold callee-saved information.
111 RegisterClassInfo RCI
;
112 MachineDominatorTree
*MDT
;
113 MachinePostDominatorTree
*MPDT
;
115 /// Current safe point found for the prologue.
116 /// The prologue will be inserted before the first instruction
117 /// in this basic block.
118 MachineBasicBlock
*Save
;
120 /// Current safe point found for the epilogue.
121 /// The epilogue will be inserted before the first terminator instruction
122 /// in this basic block.
123 MachineBasicBlock
*Restore
;
125 /// Hold the information of the basic block frequency.
126 /// Use to check the profitability of the new points.
127 MachineBlockFrequencyInfo
*MBFI
;
129 /// Hold the loop information. Used to determine if Save and Restore
130 /// are in the same loop.
131 MachineLoopInfo
*MLI
;
134 MachineOptimizationRemarkEmitter
*ORE
= nullptr;
136 /// Frequency of the Entry block.
139 /// Current opcode for frame setup.
140 unsigned FrameSetupOpcode
;
142 /// Current opcode for frame destroy.
143 unsigned FrameDestroyOpcode
;
145 /// Stack pointer register, used by llvm.{savestack,restorestack}
149 const MachineBasicBlock
*Entry
;
151 using SetOfRegs
= SmallSetVector
<unsigned, 16>;
153 /// Registers that need to be saved for the current function.
154 mutable SetOfRegs CurrentCSRs
;
156 /// Current MachineFunction.
157 MachineFunction
*MachineFunc
;
159 /// Check if \p MI uses or defines a callee-saved register or
160 /// a frame index. If this is the case, this means \p MI must happen
161 /// after Save and before Restore.
162 bool useOrDefCSROrFI(const MachineInstr
&MI
, RegScavenger
*RS
) const;
164 const SetOfRegs
&getCurrentCSRs(RegScavenger
*RS
) const {
165 if (CurrentCSRs
.empty()) {
167 const TargetFrameLowering
*TFI
=
168 MachineFunc
->getSubtarget().getFrameLowering();
170 TFI
->determineCalleeSaves(*MachineFunc
, SavedRegs
, RS
);
172 for (int Reg
= SavedRegs
.find_first(); Reg
!= -1;
173 Reg
= SavedRegs
.find_next(Reg
))
174 CurrentCSRs
.insert((unsigned)Reg
);
179 /// Update the Save and Restore points such that \p MBB is in
180 /// the region that is dominated by Save and post-dominated by Restore
181 /// and Save and Restore still match the safe point definition.
182 /// Such point may not exist and Save and/or Restore may be null after
184 void updateSaveRestorePoints(MachineBasicBlock
&MBB
, RegScavenger
*RS
);
186 /// Initialize the pass for \p MF.
187 void init(MachineFunction
&MF
) {
188 RCI
.runOnMachineFunction(MF
);
189 MDT
= &getAnalysis
<MachineDominatorTree
>();
190 MPDT
= &getAnalysis
<MachinePostDominatorTree
>();
193 MBFI
= &getAnalysis
<MachineBlockFrequencyInfo
>();
194 MLI
= &getAnalysis
<MachineLoopInfo
>();
195 ORE
= &getAnalysis
<MachineOptimizationRemarkEmitterPass
>().getORE();
196 EntryFreq
= MBFI
->getEntryFreq();
197 const TargetSubtargetInfo
&Subtarget
= MF
.getSubtarget();
198 const TargetInstrInfo
&TII
= *Subtarget
.getInstrInfo();
199 FrameSetupOpcode
= TII
.getCallFrameSetupOpcode();
200 FrameDestroyOpcode
= TII
.getCallFrameDestroyOpcode();
201 SP
= Subtarget
.getTargetLowering()->getStackPointerRegisterToSaveRestore();
209 /// Check whether or not Save and Restore points are still interesting for
211 bool ArePointsInteresting() const { return Save
!= Entry
&& Save
&& Restore
; }
213 /// Check if shrink wrapping is enabled for this target and function.
214 static bool isShrinkWrapEnabled(const MachineFunction
&MF
);
219 ShrinkWrap() : MachineFunctionPass(ID
) {
220 initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
223 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
224 AU
.setPreservesAll();
225 AU
.addRequired
<MachineBlockFrequencyInfo
>();
226 AU
.addRequired
<MachineDominatorTree
>();
227 AU
.addRequired
<MachinePostDominatorTree
>();
228 AU
.addRequired
<MachineLoopInfo
>();
229 AU
.addRequired
<MachineOptimizationRemarkEmitterPass
>();
230 MachineFunctionPass::getAnalysisUsage(AU
);
233 MachineFunctionProperties
getRequiredProperties() const override
{
234 return MachineFunctionProperties().set(
235 MachineFunctionProperties::Property::NoVRegs
);
238 StringRef
getPassName() const override
{ return "Shrink Wrapping analysis"; }
240 /// Perform the shrink-wrapping analysis and update
241 /// the MachineFrameInfo attached to \p MF with the results.
242 bool runOnMachineFunction(MachineFunction
&MF
) override
;
245 } // end anonymous namespace
247 char ShrinkWrap::ID
= 0;
249 char &llvm::ShrinkWrapID
= ShrinkWrap::ID
;
251 INITIALIZE_PASS_BEGIN(ShrinkWrap
, DEBUG_TYPE
, "Shrink Wrap Pass", false, false)
252 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo
)
253 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
254 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree
)
255 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
256 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass
)
257 INITIALIZE_PASS_END(ShrinkWrap
, DEBUG_TYPE
, "Shrink Wrap Pass", false, false)
259 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr
&MI
,
260 RegScavenger
*RS
) const {
261 if (MI
.getOpcode() == FrameSetupOpcode
||
262 MI
.getOpcode() == FrameDestroyOpcode
) {
263 LLVM_DEBUG(dbgs() << "Frame instruction: " << MI
<< '\n');
266 for (const MachineOperand
&MO
: MI
.operands()) {
267 bool UseOrDefCSR
= false;
269 // Ignore instructions like DBG_VALUE which don't read/def the register.
270 if (!MO
.isDef() && !MO
.readsReg())
272 unsigned PhysReg
= MO
.getReg();
275 assert(TargetRegisterInfo::isPhysicalRegister(PhysReg
) &&
276 "Unallocated register?!");
277 // The stack pointer is not normally described as a callee-saved register
278 // in calling convention definitions, so we need to watch for it
279 // separately. An SP mentioned by a call instruction, we can ignore,
280 // though, as it's harmless and we do not want to effectively disable tail
281 // calls by forcing the restore point to post-dominate them.
282 UseOrDefCSR
= (!MI
.isCall() && PhysReg
== SP
) ||
283 RCI
.getLastCalleeSavedAlias(PhysReg
);
284 } else if (MO
.isRegMask()) {
285 // Check if this regmask clobbers any of the CSRs.
286 for (unsigned Reg
: getCurrentCSRs(RS
)) {
287 if (MO
.clobbersPhysReg(Reg
)) {
293 // Skip FrameIndex operands in DBG_VALUE instructions.
294 if (UseOrDefCSR
|| (MO
.isFI() && !MI
.isDebugValue())) {
295 LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR
<< ") or FI("
296 << MO
.isFI() << "): " << MI
<< '\n');
303 /// Helper function to find the immediate (post) dominator.
304 template <typename ListOfBBs
, typename DominanceAnalysis
>
305 static MachineBasicBlock
*FindIDom(MachineBasicBlock
&Block
, ListOfBBs BBs
,
306 DominanceAnalysis
&Dom
) {
307 MachineBasicBlock
*IDom
= &Block
;
308 for (MachineBasicBlock
*BB
: BBs
) {
309 IDom
= Dom
.findNearestCommonDominator(IDom
, BB
);
318 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock
&MBB
,
320 // Get rid of the easy cases first.
324 Save
= MDT
->findNearestCommonDominator(Save
, &MBB
);
327 LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
333 else if (MPDT
->getNode(&MBB
)) // If the block is not in the post dom tree, it
334 // means the block never returns. If that's the
335 // case, we don't want to call
336 // `findNearestCommonDominator`, which will
338 Restore
= MPDT
->findNearestCommonDominator(Restore
, &MBB
);
340 Restore
= nullptr; // Abort, we can't find a restore point in this case.
342 // Make sure we would be able to insert the restore code before the
344 if (Restore
== &MBB
) {
345 for (const MachineInstr
&Terminator
: MBB
.terminators()) {
346 if (!useOrDefCSROrFI(Terminator
, RS
))
348 // One of the terminator needs to happen before the restore point.
349 if (MBB
.succ_empty()) {
350 Restore
= nullptr; // Abort, we can't find a restore point in this case.
353 // Look for a restore point that post-dominates all the successors.
354 // The immediate post-dominator is what we are looking for.
355 Restore
= FindIDom
<>(*Restore
, Restore
->successors(), *MPDT
);
362 dbgs() << "Restore point needs to be spanned on several blocks\n");
366 // Make sure Save and Restore are suitable for shrink-wrapping:
367 // 1. all path from Save needs to lead to Restore before exiting.
368 // 2. all path to Restore needs to go through Save from Entry.
369 // We achieve that by making sure that:
370 // A. Save dominates Restore.
371 // B. Restore post-dominates Save.
372 // C. Save and Restore are in the same loop.
373 bool SaveDominatesRestore
= false;
374 bool RestorePostDominatesSave
= false;
375 while (Save
&& Restore
&&
376 (!(SaveDominatesRestore
= MDT
->dominates(Save
, Restore
)) ||
377 !(RestorePostDominatesSave
= MPDT
->dominates(Restore
, Save
)) ||
378 // Post-dominance is not enough in loops to ensure that all uses/defs
379 // are after the prologue and before the epilogue at runtime.
388 // All the uses/defs of CSRs are dominated by Save and post-dominated
389 // by Restore. However, the CSRs uses are still reachable after
390 // Restore and before Save are executed.
392 // For now, just push the restore/save points outside of loops.
393 // FIXME: Refine the criteria to still find interesting cases
395 MLI
->getLoopFor(Save
) || MLI
->getLoopFor(Restore
))) {
397 if (!SaveDominatesRestore
) {
398 Save
= MDT
->findNearestCommonDominator(Save
, Restore
);
402 if (!RestorePostDominatesSave
)
403 Restore
= MPDT
->findNearestCommonDominator(Restore
, Save
);
406 if (Save
&& Restore
&&
407 (MLI
->getLoopFor(Save
) || MLI
->getLoopFor(Restore
))) {
408 if (MLI
->getLoopDepth(Save
) > MLI
->getLoopDepth(Restore
)) {
409 // Push Save outside of this loop if immediate dominator is different
410 // from save block. If immediate dominator is not different, bail out.
411 Save
= FindIDom
<>(*Save
, Save
->predecessors(), *MDT
);
415 // If the loop does not exit, there is no point in looking
416 // for a post-dominator outside the loop.
417 SmallVector
<MachineBasicBlock
*, 4> ExitBlocks
;
418 MLI
->getLoopFor(Restore
)->getExitingBlocks(ExitBlocks
);
419 // Push Restore outside of this loop.
420 // Look for the immediate post-dominator of the loop exits.
421 MachineBasicBlock
*IPdom
= Restore
;
422 for (MachineBasicBlock
*LoopExitBB
: ExitBlocks
) {
423 IPdom
= FindIDom
<>(*IPdom
, LoopExitBB
->successors(), *MPDT
);
427 // If the immediate post-dominator is not in a less nested loop,
428 // then we are stuck in a program with an infinite loop.
429 // In that case, we will not find a safe point, hence, bail out.
430 if (IPdom
&& MLI
->getLoopDepth(IPdom
) < MLI
->getLoopDepth(Restore
))
441 static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter
*ORE
,
442 StringRef RemarkName
, StringRef RemarkMessage
,
443 const DiagnosticLocation
&Loc
,
444 const MachineBasicBlock
*MBB
) {
446 return MachineOptimizationRemarkMissed(DEBUG_TYPE
, RemarkName
, Loc
, MBB
)
450 LLVM_DEBUG(dbgs() << RemarkMessage
<< '\n');
454 bool ShrinkWrap::runOnMachineFunction(MachineFunction
&MF
) {
455 if (skipFunction(MF
.getFunction()) || MF
.empty() || !isShrinkWrapEnabled(MF
))
458 LLVM_DEBUG(dbgs() << "**** Analysing " << MF
.getName() << '\n');
462 ReversePostOrderTraversal
<MachineBasicBlock
*> RPOT(&*MF
.begin());
463 if (containsIrreducibleCFG
<MachineBasicBlock
*>(RPOT
, *MLI
)) {
464 // If MF is irreducible, a block may be in a loop without
465 // MachineLoopInfo reporting it. I.e., we may use the
466 // post-dominance property in loops, which lead to incorrect
467 // results. Moreover, we may miss that the prologue and
468 // epilogue are not in the same loop, leading to unbalanced
469 // construction/deconstruction of the stack frame.
470 return giveUpWithRemarks(ORE
, "UnsupportedIrreducibleCFG",
471 "Irreducible CFGs are not supported yet.",
472 MF
.getFunction().getSubprogram(), &MF
.front());
475 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
476 std::unique_ptr
<RegScavenger
> RS(
477 TRI
->requiresRegisterScavenging(MF
) ? new RegScavenger() : nullptr);
479 for (MachineBasicBlock
&MBB
: MF
) {
480 LLVM_DEBUG(dbgs() << "Look into: " << MBB
.getNumber() << ' '
481 << MBB
.getName() << '\n');
483 if (MBB
.isEHFuncletEntry())
484 return giveUpWithRemarks(ORE
, "UnsupportedEHFunclets",
485 "EH Funclets are not supported yet.",
486 MBB
.front().getDebugLoc(), &MBB
);
489 // Push the prologue and epilogue outside of
490 // the region that may throw by making sure
491 // that all the landing pads are at least at the
492 // boundary of the save and restore points.
493 // The problem with exceptions is that the throw
494 // is not properly modeled and in particular, a
495 // basic block can jump out from the middle.
496 updateSaveRestorePoints(MBB
, RS
.get());
497 if (!ArePointsInteresting()) {
498 LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n");
504 for (const MachineInstr
&MI
: MBB
) {
505 if (!useOrDefCSROrFI(MI
, RS
.get()))
507 // Save (resp. restore) point must dominate (resp. post dominate)
508 // MI. Look for the proper basic block for those.
509 updateSaveRestorePoints(MBB
, RS
.get());
510 // If we are at a point where we cannot improve the placement of
511 // save/restore instructions, just give up.
512 if (!ArePointsInteresting()) {
513 LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
516 // No need to look for other instructions, this basic block
517 // will already be part of the handled region.
521 if (!ArePointsInteresting()) {
522 // If the points are not interesting at this point, then they must be null
523 // because it means we did not encounter any frame/CSR related code.
524 // Otherwise, we would have returned from the previous loop.
525 assert(!Save
&& !Restore
&& "We miss a shrink-wrap opportunity?!");
526 LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
530 LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
533 const TargetFrameLowering
*TFI
= MF
.getSubtarget().getFrameLowering();
535 LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
536 << Save
->getNumber() << ' ' << Save
->getName() << ' '
537 << MBFI
->getBlockFreq(Save
).getFrequency()
538 << "\nRestore: " << Restore
->getNumber() << ' '
539 << Restore
->getName() << ' '
540 << MBFI
->getBlockFreq(Restore
).getFrequency() << '\n');
542 bool IsSaveCheap
, TargetCanUseSaveAsPrologue
= false;
543 if (((IsSaveCheap
= EntryFreq
>= MBFI
->getBlockFreq(Save
).getFrequency()) &&
544 EntryFreq
>= MBFI
->getBlockFreq(Restore
).getFrequency()) &&
545 ((TargetCanUseSaveAsPrologue
= TFI
->canUseAsPrologue(*Save
)) &&
546 TFI
->canUseAsEpilogue(*Restore
)))
549 dbgs() << "New points are too expensive or invalid for the target\n");
550 MachineBasicBlock
*NewBB
;
551 if (!IsSaveCheap
|| !TargetCanUseSaveAsPrologue
) {
552 Save
= FindIDom
<>(*Save
, Save
->predecessors(), *MDT
);
557 // Restore is expensive.
558 Restore
= FindIDom
<>(*Restore
, Restore
->successors(), *MPDT
);
563 updateSaveRestorePoints(*NewBB
, RS
.get());
564 } while (Save
&& Restore
);
566 if (!ArePointsInteresting()) {
567 ++NumCandidatesDropped
;
571 LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
572 << Save
->getNumber() << ' ' << Save
->getName()
573 << "\nRestore: " << Restore
->getNumber() << ' '
574 << Restore
->getName() << '\n');
576 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
577 MFI
.setSavePoint(Save
);
578 MFI
.setRestorePoint(Restore
);
583 bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction
&MF
) {
584 const TargetFrameLowering
*TFI
= MF
.getSubtarget().getFrameLowering();
586 switch (EnableShrinkWrapOpt
) {
588 return TFI
->enableShrinkWrapping(MF
) &&
589 // Windows with CFI has some limitations that make it impossible
590 // to use shrink-wrapping.
591 !MF
.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
592 // Sanitizers look at the value of the stack at the location
593 // of the crash. Since a crash can happen anywhere, the
594 // frame must be lowered before anything else happen for the
595 // sanitizers to be able to get a correct stack frame.
596 !(MF
.getFunction().hasFnAttribute(Attribute::SanitizeAddress
) ||
597 MF
.getFunction().hasFnAttribute(Attribute::SanitizeThread
) ||
598 MF
.getFunction().hasFnAttribute(Attribute::SanitizeMemory
) ||
599 MF
.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress
));
600 // If EnableShrinkWrap is set, it takes precedence on whatever the
601 // target sets. The rational is that we assume we want to test
602 // something related to shrink-wrapping.
608 llvm_unreachable("Invalid shrink-wrapping state");