1 //===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a shrink wrapping variant of prolog/epilog insertion:
11 // - Spills and restores of callee-saved registers (CSRs) are placed in the
12 // machine CFG to tightly surround their uses so that execution paths that
13 // do not use CSRs do not pay the spill/restore penalty.
15 // - Avoiding placment of spills/restores in loops: if a CSR is used inside a
16 // loop the spills are placed in the loop preheader, and restores are
17 // placed in the loop exit nodes (the successors of loop _exiting_ nodes).
19 // - Covering paths without CSR uses:
20 // If a region in a CFG uses CSRs and has multiple entry and/or exit points,
21 // the use info for the CSRs inside the region is propagated outward in the
22 // CFG to ensure validity of the spill/restore placements. This decreases
23 // the effectiveness of shrink wrapping but does not require edge splitting
24 // in the machine CFG.
26 // This shrink wrapping implementation uses an iterative analysis to determine
27 // which basic blocks require spills and restores for CSRs.
29 // This pass uses MachineDominators and MachineLoopInfo. Loop information
30 // is used to prevent placement of callee-saved register spills/restores
31 // in the bodies of loops.
33 //===----------------------------------------------------------------------===//
35 #define DEBUG_TYPE "shrink-wrap"
37 #include "PrologEpilogInserter.h"
38 #include "llvm/CodeGen/MachineDominators.h"
39 #include "llvm/CodeGen/MachineLoopInfo.h"
40 #include "llvm/CodeGen/MachineInstr.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineRegisterInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
45 #include "llvm/ADT/SparseBitVector.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/PostOrderIterator.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/ADT/STLExtras.h"
53 #include "llvm/ADT/Statistic.h"
58 STATISTIC(numSRReduced
, "Number of CSR spills+restores reduced.");
62 ShrinkWrapping("shrink-wrap",
63 cl::desc("Shrink wrap callee-saved register spills/restores"));
65 // Shrink wrap only the specified function, a debugging aid.
66 static cl::opt
<std::string
>
67 ShrinkWrapFunc("shrink-wrap-func", cl::Hidden
,
68 cl::desc("Shrink wrap the specified function"),
69 cl::value_desc("funcname"),
72 // Debugging level for shrink wrapping.
73 enum ShrinkWrapDebugLevel
{
74 None
, BasicInfo
, Iterations
, Details
77 static cl::opt
<enum ShrinkWrapDebugLevel
>
78 ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden
,
79 cl::desc("Print shrink wrapping debugging information"),
81 clEnumVal(None
, "disable debug output"),
82 clEnumVal(BasicInfo
, "print basic DF sets"),
83 clEnumVal(Iterations
, "print SR sets for each iteration"),
84 clEnumVal(Details
, "print all DF sets"),
88 void PEI::getAnalysisUsage(AnalysisUsage
&AU
) const {
90 if (ShrinkWrapping
|| ShrinkWrapFunc
!= "") {
91 AU
.addRequired
<MachineLoopInfo
>();
92 AU
.addRequired
<MachineDominatorTree
>();
94 AU
.addPreserved
<MachineLoopInfo
>();
95 AU
.addPreserved
<MachineDominatorTree
>();
96 MachineFunctionPass::getAnalysisUsage(AU
);
99 //===----------------------------------------------------------------------===//
100 // ShrinkWrapping implementation
101 //===----------------------------------------------------------------------===//
103 // Convienences for dealing with machine loops.
104 MachineBasicBlock
* PEI::getTopLevelLoopPreheader(MachineLoop
* LP
) {
105 assert(LP
&& "Machine loop is NULL.");
106 MachineBasicBlock
* PHDR
= LP
->getLoopPreheader();
107 MachineLoop
* PLP
= LP
->getParentLoop();
109 PHDR
= PLP
->getLoopPreheader();
110 PLP
= PLP
->getParentLoop();
115 MachineLoop
* PEI::getTopLevelLoopParent(MachineLoop
*LP
) {
118 MachineLoop
* PLP
= LP
->getParentLoop();
121 PLP
= PLP
->getParentLoop();
126 bool PEI::isReturnBlock(MachineBasicBlock
* MBB
) {
127 return (MBB
&& !MBB
->empty() && MBB
->back().getDesc().isReturn());
130 // Initialize shrink wrapping DFA sets, called before iterations.
131 void PEI::clearAnticAvailSets() {
138 // Clear all sets constructed by shrink wrapping.
139 void PEI::clearAllSets() {
140 ReturnBlocks
.clear();
141 clearAnticAvailSets();
149 // Initialize all shrink wrapping data.
150 void PEI::initShrinkWrappingInfo() {
154 HasFastExitPath
= false;
156 ShrinkWrapThisFunction
= ShrinkWrapping
;
157 // DEBUG: enable or disable shrink wrapping for the current function
158 // via --shrink-wrap-func=<funcname>.
160 if (ShrinkWrapFunc
!= "") {
161 std::string MFName
= MF
->getFunction()->getNameStr();
162 ShrinkWrapThisFunction
= (MFName
== ShrinkWrapFunc
);
168 /// placeCSRSpillsAndRestores - determine which MBBs of the function
169 /// need save, restore code for callee-saved registers by doing a DF analysis
170 /// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
171 /// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
172 /// is used to ensure that CSR save/restore code is not placed inside loops.
173 /// This function computes the maps of MBBs -> CSRs to spill and restore
174 /// in CSRSave, CSRRestore.
176 /// If shrink wrapping is not being performed, place all spills in
177 /// the entry block, all restores in return blocks. In this case,
178 /// CSRSave has a single mapping, CSRRestore has mappings for each
181 void PEI::placeCSRSpillsAndRestores(MachineFunction
&Fn
) {
185 initShrinkWrappingInfo();
187 DEBUG(if (ShrinkWrapThisFunction
) {
188 errs() << "Place CSR spills/restores for "
189 << MF
->getFunction()->getName() << "\n";
192 if (calculateSets(Fn
))
193 placeSpillsAndRestores(Fn
);
196 /// calcAnticInOut - calculate the anticipated in/out reg sets
197 /// for the given MBB by looking forward in the MCFG at MBB's
200 bool PEI::calcAnticInOut(MachineBasicBlock
* MBB
) {
201 bool changed
= false;
203 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
204 SmallVector
<MachineBasicBlock
*, 4> successors
;
205 for (MachineBasicBlock::succ_iterator SI
= MBB
->succ_begin(),
206 SE
= MBB
->succ_end(); SI
!= SE
; ++SI
) {
207 MachineBasicBlock
* SUCC
= *SI
;
209 successors
.push_back(SUCC
);
212 unsigned i
= 0, e
= successors
.size();
214 CSRegSet prevAnticOut
= AnticOut
[MBB
];
215 MachineBasicBlock
* SUCC
= successors
[i
];
217 AnticOut
[MBB
] = AnticIn
[SUCC
];
218 for (++i
; i
!= e
; ++i
) {
219 SUCC
= successors
[i
];
220 AnticOut
[MBB
] &= AnticIn
[SUCC
];
222 if (prevAnticOut
!= AnticOut
[MBB
])
226 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
227 CSRegSet prevAnticIn
= AnticIn
[MBB
];
228 AnticIn
[MBB
] = CSRUsed
[MBB
] | AnticOut
[MBB
];
229 if (prevAnticIn
|= AnticIn
[MBB
])
234 /// calcAvailInOut - calculate the available in/out reg sets
235 /// for the given MBB by looking backward in the MCFG at MBB's
238 bool PEI::calcAvailInOut(MachineBasicBlock
* MBB
) {
239 bool changed
= false;
241 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
242 SmallVector
<MachineBasicBlock
*, 4> predecessors
;
243 for (MachineBasicBlock::pred_iterator PI
= MBB
->pred_begin(),
244 PE
= MBB
->pred_end(); PI
!= PE
; ++PI
) {
245 MachineBasicBlock
* PRED
= *PI
;
247 predecessors
.push_back(PRED
);
250 unsigned i
= 0, e
= predecessors
.size();
252 CSRegSet prevAvailIn
= AvailIn
[MBB
];
253 MachineBasicBlock
* PRED
= predecessors
[i
];
255 AvailIn
[MBB
] = AvailOut
[PRED
];
256 for (++i
; i
!= e
; ++i
) {
257 PRED
= predecessors
[i
];
258 AvailIn
[MBB
] &= AvailOut
[PRED
];
260 if (prevAvailIn
!= AvailIn
[MBB
])
264 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
265 CSRegSet prevAvailOut
= AvailOut
[MBB
];
266 AvailOut
[MBB
] = CSRUsed
[MBB
] | AvailIn
[MBB
];
267 if (prevAvailOut
|= AvailOut
[MBB
])
272 /// calculateAnticAvail - build the sets anticipated and available
273 /// registers in the MCFG of the current function iteratively,
274 /// doing a combined forward and backward analysis.
276 void PEI::calculateAnticAvail(MachineFunction
&Fn
) {
277 // Initialize data flow sets.
278 clearAnticAvailSets();
280 // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
282 unsigned iterations
= 0;
286 for (MachineFunction::iterator MBBI
= Fn
.begin(), MBBE
= Fn
.end();
287 MBBI
!= MBBE
; ++MBBI
) {
288 MachineBasicBlock
* MBB
= MBBI
;
290 // Calculate anticipated in, out regs at MBB from
291 // anticipated at successors of MBB.
292 changed
|= calcAnticInOut(MBB
);
294 // Calculate available in, out regs at MBB from
295 // available at predecessors of MBB.
296 changed
|= calcAvailInOut(MBB
);
300 DEBUG(if (ShrinkWrapDebugging
>= Details
) {
301 DOUT
<< "-----------------------------------------------------------\n";
302 DOUT
<< " Antic/Avail Sets:\n";
303 DOUT
<< "-----------------------------------------------------------\n";
304 DOUT
<< "iterations = " << iterations
<< "\n";
305 DOUT
<< "-----------------------------------------------------------\n";
306 DOUT
<< "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
307 DOUT
<< "-----------------------------------------------------------\n";
308 for (MachineFunction::iterator MBBI
= Fn
.begin(), MBBE
= Fn
.end();
309 MBBI
!= MBBE
; ++MBBI
) {
310 MachineBasicBlock
* MBB
= MBBI
;
313 DOUT
<< "-----------------------------------------------------------\n";
317 /// propagateUsesAroundLoop - copy used register info from MBB to all blocks
318 /// of the loop given by LP and its parent loops. This prevents spills/restores
319 /// from being placed in the bodies of loops.
321 void PEI::propagateUsesAroundLoop(MachineBasicBlock
* MBB
, MachineLoop
* LP
) {
325 std::vector
<MachineBasicBlock
*> loopBlocks
= LP
->getBlocks();
326 for (unsigned i
= 0, e
= loopBlocks
.size(); i
!= e
; ++i
) {
327 MachineBasicBlock
* LBB
= loopBlocks
[i
];
330 if (CSRUsed
[LBB
].contains(CSRUsed
[MBB
]))
332 CSRUsed
[LBB
] |= CSRUsed
[MBB
];
336 /// calculateSets - collect the CSRs used in this function, compute
337 /// the DF sets that describe the initial minimal regions in the
338 /// Machine CFG around which CSR spills and restores must be placed.
340 /// Additionally, this function decides if shrink wrapping should
341 /// be disabled for the current function, checking the following:
342 /// 1. the current function has more than 500 MBBs: heuristic limit
343 /// on function size to reduce compile time impact of the current
344 /// iterative algorithm.
345 /// 2. all CSRs are used in the entry block.
346 /// 3. all CSRs are used in all immediate successors of the entry block.
347 /// 4. all CSRs are used in a subset of blocks, each of which dominates
348 /// all return blocks. These blocks, taken as a subgraph of the MCFG,
349 /// are equivalent to the entry block since all execution paths pass
352 bool PEI::calculateSets(MachineFunction
&Fn
) {
353 // Sets used to compute spill, restore placement sets.
354 const std::vector
<CalleeSavedInfo
> CSI
=
355 Fn
.getFrameInfo()->getCalleeSavedInfo();
357 // If no CSRs used, we are done.
359 DEBUG(if (ShrinkWrapThisFunction
)
360 errs() << "DISABLED: " << Fn
.getFunction()->getName()
361 << ": uses no callee-saved registers\n");
365 // Save refs to entry and return blocks.
366 EntryBlock
= Fn
.begin();
367 for (MachineFunction::iterator MBB
= Fn
.begin(), E
= Fn
.end();
369 if (isReturnBlock(MBB
))
370 ReturnBlocks
.push_back(MBB
);
372 // Determine if this function has fast exit paths.
373 DEBUG(if (ShrinkWrapThisFunction
)
376 // Limit shrink wrapping via the current iterative bit vector
377 // implementation to functions with <= 500 MBBs.
378 if (Fn
.size() > 500) {
379 DEBUG(if (ShrinkWrapThisFunction
)
380 errs() << "DISABLED: " << Fn
.getFunction()->getName()
381 << ": too large (" << Fn
.size() << " MBBs)\n");
382 ShrinkWrapThisFunction
= false;
385 // Return now if not shrink wrapping.
386 if (! ShrinkWrapThisFunction
)
389 // Collect set of used CSRs.
390 for (unsigned inx
= 0, e
= CSI
.size(); inx
!= e
; ++inx
) {
394 // Walk instructions in all MBBs, create CSRUsed[] sets, choose
395 // whether or not to shrink wrap this function.
396 MachineLoopInfo
&LI
= getAnalysis
<MachineLoopInfo
>();
397 MachineDominatorTree
&DT
= getAnalysis
<MachineDominatorTree
>();
398 const TargetRegisterInfo
*TRI
= Fn
.getTarget().getRegisterInfo();
400 bool allCSRUsesInEntryBlock
= true;
401 for (MachineFunction::iterator MBBI
= Fn
.begin(), MBBE
= Fn
.end();
402 MBBI
!= MBBE
; ++MBBI
) {
403 MachineBasicBlock
* MBB
= MBBI
;
404 for (MachineBasicBlock::iterator I
= MBB
->begin(); I
!= MBB
->end(); ++I
) {
405 for (unsigned inx
= 0, e
= CSI
.size(); inx
!= e
; ++inx
) {
406 unsigned Reg
= CSI
[inx
].getReg();
407 // If instruction I reads or modifies Reg, add it to UsedCSRegs,
408 // CSRUsed map for the current block.
409 for (unsigned opInx
= 0, opEnd
= I
->getNumOperands();
410 opInx
!= opEnd
; ++opInx
) {
411 const MachineOperand
&MO
= I
->getOperand(opInx
);
412 if (! (MO
.isReg() && (MO
.isUse() || MO
.isDef())))
414 unsigned MOReg
= MO
.getReg();
418 (TargetRegisterInfo::isPhysicalRegister(MOReg
) &&
419 TargetRegisterInfo::isPhysicalRegister(Reg
) &&
420 TRI
->isSubRegister(Reg
, MOReg
))) {
421 // CSR Reg is defined/used in block MBB.
422 CSRUsed
[MBB
].set(inx
);
423 // Check for uses in EntryBlock.
424 if (MBB
!= EntryBlock
)
425 allCSRUsesInEntryBlock
= false;
431 if (CSRUsed
[MBB
].empty())
434 // Propagate CSRUsed[MBB] in loops
435 if (MachineLoop
* LP
= LI
.getLoopFor(MBB
)) {
436 // Add top level loop to work list.
437 MachineBasicBlock
* HDR
= getTopLevelLoopPreheader(LP
);
438 MachineLoop
* PLP
= getTopLevelLoopParent(LP
);
441 HDR
= PLP
->getHeader();
442 assert(HDR
->pred_size() > 0 && "Loop header has no predecessors?");
443 MachineBasicBlock::pred_iterator PI
= HDR
->pred_begin();
448 // Push uses from inside loop to its parent loops,
449 // or to all other MBBs in its loop.
450 if (LP
->getLoopDepth() > 1) {
451 for (MachineLoop
* PLP
= LP
->getParentLoop(); PLP
;
452 PLP
= PLP
->getParentLoop()) {
453 propagateUsesAroundLoop(MBB
, PLP
);
456 propagateUsesAroundLoop(MBB
, LP
);
461 if (allCSRUsesInEntryBlock
) {
462 DEBUG(errs() << "DISABLED: " << Fn
.getFunction()->getName()
463 << ": all CSRs used in EntryBlock\n");
464 ShrinkWrapThisFunction
= false;
466 bool allCSRsUsedInEntryFanout
= true;
467 for (MachineBasicBlock::succ_iterator SI
= EntryBlock
->succ_begin(),
468 SE
= EntryBlock
->succ_end(); SI
!= SE
; ++SI
) {
469 MachineBasicBlock
* SUCC
= *SI
;
470 if (CSRUsed
[SUCC
] != UsedCSRegs
)
471 allCSRsUsedInEntryFanout
= false;
473 if (allCSRsUsedInEntryFanout
) {
474 DEBUG(errs() << "DISABLED: " << Fn
.getFunction()->getName()
475 << ": all CSRs used in imm successors of EntryBlock\n");
476 ShrinkWrapThisFunction
= false;
480 if (ShrinkWrapThisFunction
) {
481 // Check if MBB uses CSRs and dominates all exit nodes.
482 // Such nodes are equiv. to the entry node w.r.t.
483 // CSR uses: every path through the function must
484 // pass through this node. If each CSR is used at least
485 // once by these nodes, shrink wrapping is disabled.
486 CSRegSet CSRUsedInChokePoints
;
487 for (MachineFunction::iterator MBBI
= Fn
.begin(), MBBE
= Fn
.end();
488 MBBI
!= MBBE
; ++MBBI
) {
489 MachineBasicBlock
* MBB
= MBBI
;
490 if (MBB
== EntryBlock
|| CSRUsed
[MBB
].empty() || MBB
->succ_size() < 1)
492 bool dominatesExitNodes
= true;
493 for (unsigned ri
= 0, re
= ReturnBlocks
.size(); ri
!= re
; ++ri
)
494 if (! DT
.dominates(MBB
, ReturnBlocks
[ri
])) {
495 dominatesExitNodes
= false;
498 if (dominatesExitNodes
) {
499 CSRUsedInChokePoints
|= CSRUsed
[MBB
];
500 if (CSRUsedInChokePoints
== UsedCSRegs
) {
501 DEBUG(errs() << "DISABLED: " << Fn
.getFunction()->getName()
502 << ": all CSRs used in choke point(s) at "
503 << getBasicBlockName(MBB
) << "\n");
504 ShrinkWrapThisFunction
= false;
511 // Return now if we have decided not to apply shrink wrapping
512 // to the current function.
513 if (! ShrinkWrapThisFunction
)
517 errs() << "ENABLED: " << Fn
.getFunction()->getName();
519 errs() << " (fast exit path)";
521 if (ShrinkWrapDebugging
>= BasicInfo
) {
522 errs() << "------------------------------"
523 << "-----------------------------\n";
524 errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs
) << "\n";
525 if (ShrinkWrapDebugging
>= Details
) {
526 errs() << "------------------------------"
527 << "-----------------------------\n";
533 // Build initial DF sets to determine minimal regions in the
534 // Machine CFG around which CSRs must be spilled and restored.
535 calculateAnticAvail(Fn
);
540 /// addUsesForMEMERegion - add uses of CSRs spilled or restored in
541 /// multi-entry, multi-exit (MEME) regions so spill and restore
542 /// placement will not break code that enters or leaves a
543 /// shrink-wrapped region by inducing spills with no matching
544 /// restores or restores with no matching spills. A MEME region
545 /// is a subgraph of the MCFG with multiple entry edges, multiple
546 /// exit edges, or both. This code propagates use information
547 /// through the MCFG until all paths requiring spills and restores
548 /// _outside_ the computed minimal placement regions have been covered.
550 bool PEI::addUsesForMEMERegion(MachineBasicBlock
* MBB
,
551 SmallVector
<MachineBasicBlock
*, 4>& blks
) {
552 if (MBB
->succ_size() < 2 && MBB
->pred_size() < 2) {
553 bool processThisBlock
= false;
554 for (MachineBasicBlock::succ_iterator SI
= MBB
->succ_begin(),
555 SE
= MBB
->succ_end(); SI
!= SE
; ++SI
) {
556 MachineBasicBlock
* SUCC
= *SI
;
557 if (SUCC
->pred_size() > 1) {
558 processThisBlock
= true;
562 if (!CSRRestore
[MBB
].empty() && MBB
->succ_size() > 0) {
563 for (MachineBasicBlock::pred_iterator PI
= MBB
->pred_begin(),
564 PE
= MBB
->pred_end(); PI
!= PE
; ++PI
) {
565 MachineBasicBlock
* PRED
= *PI
;
566 if (PRED
->succ_size() > 1) {
567 processThisBlock
= true;
572 if (! processThisBlock
)
577 if (!CSRSave
[MBB
].empty())
579 else if (!CSRRestore
[MBB
].empty())
580 prop
= CSRRestore
[MBB
];
586 // Propagate selected bits to successors, predecessors of MBB.
587 bool addedUses
= false;
588 for (MachineBasicBlock::succ_iterator SI
= MBB
->succ_begin(),
589 SE
= MBB
->succ_end(); SI
!= SE
; ++SI
) {
590 MachineBasicBlock
* SUCC
= *SI
;
594 if (! CSRUsed
[SUCC
].contains(prop
)) {
595 CSRUsed
[SUCC
] |= prop
;
597 blks
.push_back(SUCC
);
598 DEBUG(if (ShrinkWrapDebugging
>= Iterations
)
599 errs() << getBasicBlockName(MBB
)
600 << "(" << stringifyCSRegSet(prop
) << ")->"
601 << "successor " << getBasicBlockName(SUCC
) << "\n");
604 for (MachineBasicBlock::pred_iterator PI
= MBB
->pred_begin(),
605 PE
= MBB
->pred_end(); PI
!= PE
; ++PI
) {
606 MachineBasicBlock
* PRED
= *PI
;
610 if (! CSRUsed
[PRED
].contains(prop
)) {
611 CSRUsed
[PRED
] |= prop
;
613 blks
.push_back(PRED
);
614 DEBUG(if (ShrinkWrapDebugging
>= Iterations
)
615 errs() << getBasicBlockName(MBB
)
616 << "(" << stringifyCSRegSet(prop
) << ")->"
617 << "predecessor " << getBasicBlockName(PRED
) << "\n");
623 /// addUsesForTopLevelLoops - add uses for CSRs used inside top
624 /// level loops to the exit blocks of those loops.
626 bool PEI::addUsesForTopLevelLoops(SmallVector
<MachineBasicBlock
*, 4>& blks
) {
627 bool addedUses
= false;
629 // Place restores for top level loops where needed.
630 for (DenseMap
<MachineBasicBlock
*, MachineLoop
*>::iterator
631 I
= TLLoops
.begin(), E
= TLLoops
.end(); I
!= E
; ++I
) {
632 MachineBasicBlock
* MBB
= I
->first
;
633 MachineLoop
* LP
= I
->second
;
634 MachineBasicBlock
* HDR
= LP
->getHeader();
635 SmallVector
<MachineBasicBlock
*, 4> exitBlocks
;
638 loopSpills
= CSRSave
[MBB
];
639 if (CSRSave
[MBB
].empty()) {
640 loopSpills
= CSRUsed
[HDR
];
641 assert(!loopSpills
.empty() && "No CSRs used in loop?");
642 } else if (CSRRestore
[MBB
].contains(CSRSave
[MBB
]))
645 LP
->getExitBlocks(exitBlocks
);
646 assert(exitBlocks
.size() > 0 && "Loop has no top level exit blocks?");
647 for (unsigned i
= 0, e
= exitBlocks
.size(); i
!= e
; ++i
) {
648 MachineBasicBlock
* EXB
= exitBlocks
[i
];
649 if (! CSRUsed
[EXB
].contains(loopSpills
)) {
650 CSRUsed
[EXB
] |= loopSpills
;
652 DEBUG(if (ShrinkWrapDebugging
>= Iterations
)
653 errs() << "LOOP " << getBasicBlockName(MBB
)
654 << "(" << stringifyCSRegSet(loopSpills
) << ")->"
655 << getBasicBlockName(EXB
) << "\n");
656 if (EXB
->succ_size() > 1 || EXB
->pred_size() > 1)
664 /// calcSpillPlacements - determine which CSRs should be spilled
665 /// in MBB using AnticIn sets of MBB's predecessors, keeping track
666 /// of changes to spilled reg sets. Add MBB to the set of blocks
667 /// that need to be processed for propagating use info to cover
668 /// multi-entry/exit regions.
670 bool PEI::calcSpillPlacements(MachineBasicBlock
* MBB
,
671 SmallVector
<MachineBasicBlock
*, 4> &blks
,
672 CSRegBlockMap
&prevSpills
) {
673 bool placedSpills
= false;
674 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
675 CSRegSet anticInPreds
;
676 SmallVector
<MachineBasicBlock
*, 4> predecessors
;
677 for (MachineBasicBlock::pred_iterator PI
= MBB
->pred_begin(),
678 PE
= MBB
->pred_end(); PI
!= PE
; ++PI
) {
679 MachineBasicBlock
* PRED
= *PI
;
681 predecessors
.push_back(PRED
);
683 unsigned i
= 0, e
= predecessors
.size();
685 MachineBasicBlock
* PRED
= predecessors
[i
];
686 anticInPreds
= UsedCSRegs
- AnticIn
[PRED
];
687 for (++i
; i
!= e
; ++i
) {
688 PRED
= predecessors
[i
];
689 anticInPreds
&= (UsedCSRegs
- AnticIn
[PRED
]);
692 // Handle uses in entry blocks (which have no predecessors).
693 // This is necessary because the DFA formulation assumes the
694 // entry and (multiple) exit nodes cannot have CSR uses, which
695 // is not the case in the real world.
696 anticInPreds
= UsedCSRegs
;
698 // Compute spills required at MBB:
699 CSRSave
[MBB
] |= (AnticIn
[MBB
] - AvailIn
[MBB
]) & anticInPreds
;
701 if (! CSRSave
[MBB
].empty()) {
702 if (MBB
== EntryBlock
) {
703 for (unsigned ri
= 0, re
= ReturnBlocks
.size(); ri
!= re
; ++ri
)
704 CSRRestore
[ReturnBlocks
[ri
]] |= CSRSave
[MBB
];
706 // Reset all regs spilled in MBB that are also spilled in EntryBlock.
707 if (CSRSave
[EntryBlock
].intersects(CSRSave
[MBB
])) {
708 CSRSave
[MBB
] = CSRSave
[MBB
] - CSRSave
[EntryBlock
];
712 placedSpills
= (CSRSave
[MBB
] != prevSpills
[MBB
]);
713 prevSpills
[MBB
] = CSRSave
[MBB
];
714 // Remember this block for adding restores to successor
715 // blocks for multi-entry region.
719 DEBUG(if (! CSRSave
[MBB
].empty() && ShrinkWrapDebugging
>= Iterations
)
720 errs() << "SAVE[" << getBasicBlockName(MBB
) << "] = "
721 << stringifyCSRegSet(CSRSave
[MBB
]) << "\n");
726 /// calcRestorePlacements - determine which CSRs should be restored
727 /// in MBB using AvailOut sets of MBB's succcessors, keeping track
728 /// of changes to restored reg sets. Add MBB to the set of blocks
729 /// that need to be processed for propagating use info to cover
730 /// multi-entry/exit regions.
732 bool PEI::calcRestorePlacements(MachineBasicBlock
* MBB
,
733 SmallVector
<MachineBasicBlock
*, 4> &blks
,
734 CSRegBlockMap
&prevRestores
) {
735 bool placedRestores
= false;
736 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
737 CSRegSet availOutSucc
;
738 SmallVector
<MachineBasicBlock
*, 4> successors
;
739 for (MachineBasicBlock::succ_iterator SI
= MBB
->succ_begin(),
740 SE
= MBB
->succ_end(); SI
!= SE
; ++SI
) {
741 MachineBasicBlock
* SUCC
= *SI
;
743 successors
.push_back(SUCC
);
745 unsigned i
= 0, e
= successors
.size();
747 MachineBasicBlock
* SUCC
= successors
[i
];
748 availOutSucc
= UsedCSRegs
- AvailOut
[SUCC
];
749 for (++i
; i
!= e
; ++i
) {
750 SUCC
= successors
[i
];
751 availOutSucc
&= (UsedCSRegs
- AvailOut
[SUCC
]);
754 if (! CSRUsed
[MBB
].empty() || ! AvailOut
[MBB
].empty()) {
755 // Handle uses in return blocks (which have no successors).
756 // This is necessary because the DFA formulation assumes the
757 // entry and (multiple) exit nodes cannot have CSR uses, which
758 // is not the case in the real world.
759 availOutSucc
= UsedCSRegs
;
762 // Compute restores required at MBB:
763 CSRRestore
[MBB
] |= (AvailOut
[MBB
] - AnticOut
[MBB
]) & availOutSucc
;
765 // Postprocess restore placements at MBB.
766 // Remove the CSRs that are restored in the return blocks.
767 // Lest this be confusing, note that:
768 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
769 if (MBB
->succ_size() && ! CSRRestore
[MBB
].empty()) {
770 if (! CSRSave
[EntryBlock
].empty())
771 CSRRestore
[MBB
] = CSRRestore
[MBB
] - CSRSave
[EntryBlock
];
773 placedRestores
= (CSRRestore
[MBB
] != prevRestores
[MBB
]);
774 prevRestores
[MBB
] = CSRRestore
[MBB
];
775 // Remember this block for adding saves to predecessor
776 // blocks for multi-entry region.
780 DEBUG(if (! CSRRestore
[MBB
].empty() && ShrinkWrapDebugging
>= Iterations
)
781 errs() << "RESTORE[" << getBasicBlockName(MBB
) << "] = "
782 << stringifyCSRegSet(CSRRestore
[MBB
]) << "\n");
784 return placedRestores
;
787 /// placeSpillsAndRestores - place spills and restores of CSRs
788 /// used in MBBs in minimal regions that contain the uses.
790 void PEI::placeSpillsAndRestores(MachineFunction
&Fn
) {
791 CSRegBlockMap prevCSRSave
;
792 CSRegBlockMap prevCSRRestore
;
793 SmallVector
<MachineBasicBlock
*, 4> cvBlocks
, ncvBlocks
;
795 unsigned iterations
= 0;
797 // Iterate computation of spill and restore placements in the MCFG until:
798 // 1. CSR use info has been fully propagated around the MCFG, and
799 // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
804 DEBUG(if (ShrinkWrapDebugging
>= Iterations
)
805 errs() << "iter " << iterations
806 << " --------------------------------------------------\n");
808 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
809 // which determines the placements of spills and restores.
810 // Keep track of changes to spills, restores in each iteration to
811 // minimize the total iterations.
812 bool SRChanged
= false;
813 for (MachineFunction::iterator MBBI
= Fn
.begin(), MBBE
= Fn
.end();
814 MBBI
!= MBBE
; ++MBBI
) {
815 MachineBasicBlock
* MBB
= MBBI
;
817 // Place spills for CSRs in MBB.
818 SRChanged
|= calcSpillPlacements(MBB
, cvBlocks
, prevCSRSave
);
820 // Place restores for CSRs in MBB.
821 SRChanged
|= calcRestorePlacements(MBB
, cvBlocks
, prevCSRRestore
);
824 // Add uses of CSRs used inside loops where needed.
825 changed
|= addUsesForTopLevelLoops(cvBlocks
);
827 // Add uses for CSRs spilled or restored at branch, join points.
828 if (changed
|| SRChanged
) {
829 while (! cvBlocks
.empty()) {
830 MachineBasicBlock
* MBB
= cvBlocks
.pop_back_val();
831 changed
|= addUsesForMEMERegion(MBB
, ncvBlocks
);
833 if (! ncvBlocks
.empty()) {
834 cvBlocks
= ncvBlocks
;
840 calculateAnticAvail(Fn
);
846 // Check for effectiveness:
847 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
848 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
849 // Gives a measure of how many CSR spills have been moved from EntryBlock
850 // to minimal regions enclosing their uses.
851 CSRegSet notSpilledInEntryBlock
= (UsedCSRegs
- CSRSave
[EntryBlock
]);
852 unsigned numSRReducedThisFunc
= notSpilledInEntryBlock
.count();
853 numSRReduced
+= numSRReducedThisFunc
;
854 DEBUG(if (ShrinkWrapDebugging
>= BasicInfo
) {
855 errs() << "-----------------------------------------------------------\n";
856 errs() << "total iterations = " << iterations
<< " ( "
857 << Fn
.getFunction()->getName()
858 << " " << numSRReducedThisFunc
861 errs() << "-----------------------------------------------------------\n";
863 errs() << "-----------------------------------------------------------\n";
864 if (numSRReducedThisFunc
)
865 verifySpillRestorePlacement();
869 // Debugging methods.
871 /// findFastExitPath - debugging method used to detect functions
872 /// with at least one path from the entry block to a return block
873 /// directly or which has a very small number of edges.
875 void PEI::findFastExitPath() {
878 // Fina a path from EntryBlock to any return block that does not branch:
886 for (MachineBasicBlock::succ_iterator SI
= EntryBlock
->succ_begin(),
887 SE
= EntryBlock
->succ_end(); SI
!= SE
; ++SI
) {
888 MachineBasicBlock
* SUCC
= *SI
;
890 // Assume positive, disprove existence of fast path.
891 HasFastExitPath
= true;
893 // Check the immediate successors.
894 if (isReturnBlock(SUCC
)) {
895 if (ShrinkWrapDebugging
>= BasicInfo
)
896 errs() << "Fast exit path: " << getBasicBlockName(EntryBlock
)
897 << "->" << getBasicBlockName(SUCC
) << "\n";
900 // Traverse df from SUCC, look for a branch block.
901 std::string exitPath
= getBasicBlockName(SUCC
);
902 for (df_iterator
<MachineBasicBlock
*> BI
= df_begin(SUCC
),
903 BE
= df_end(SUCC
); BI
!= BE
; ++BI
) {
904 MachineBasicBlock
* SBB
= *BI
;
905 // Reject paths with branch nodes.
906 if (SBB
->succ_size() > 1) {
907 HasFastExitPath
= false;
910 exitPath
+= "->" + getBasicBlockName(SBB
);
912 if (HasFastExitPath
) {
913 if (ShrinkWrapDebugging
>= BasicInfo
)
914 errs() << "Fast exit path: " << getBasicBlockName(EntryBlock
)
915 << "->" << exitPath
<< "\n";
921 /// verifySpillRestorePlacement - check the current spill/restore
922 /// sets for safety. Attempt to find spills without restores or
923 /// restores without spills.
924 /// Spills: walk df from each MBB in spill set ensuring that
925 /// all CSRs spilled at MMBB are restored on all paths
926 /// from MBB to all exit blocks.
927 /// Restores: walk idf from each MBB in restore set ensuring that
928 /// all CSRs restored at MBB are spilled on all paths
931 void PEI::verifySpillRestorePlacement() {
932 unsigned numReturnBlocks
= 0;
933 for (MachineFunction::iterator MBBI
= MF
->begin(), MBBE
= MF
->end();
934 MBBI
!= MBBE
; ++MBBI
) {
935 MachineBasicBlock
* MBB
= MBBI
;
936 if (isReturnBlock(MBB
) || MBB
->succ_size() == 0)
939 for (CSRegBlockMap::iterator BI
= CSRSave
.begin(),
940 BE
= CSRSave
.end(); BI
!= BE
; ++BI
) {
941 MachineBasicBlock
* MBB
= BI
->first
;
942 CSRegSet spilled
= BI
->second
;
948 DOUT
<< "SAVE[" << getBasicBlockName(MBB
) << "] = "
949 << stringifyCSRegSet(spilled
)
950 << " RESTORE[" << getBasicBlockName(MBB
) << "] = "
951 << stringifyCSRegSet(CSRRestore
[MBB
]) << "\n";
953 if (CSRRestore
[MBB
].intersects(spilled
)) {
954 restored
|= (CSRRestore
[MBB
] & spilled
);
957 // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
958 // we must find restores for all spills w/no intervening spills on all
959 // paths from MBB to all return blocks.
960 for (df_iterator
<MachineBasicBlock
*> BI
= df_begin(MBB
),
961 BE
= df_end(MBB
); BI
!= BE
; ++BI
) {
962 MachineBasicBlock
* SBB
= *BI
;
965 // Stop when we encounter spills of any CSRs spilled at MBB that
966 // have not yet been seen to be restored.
967 if (CSRSave
[SBB
].intersects(spilled
) &&
968 !restored
.contains(CSRSave
[SBB
] & spilled
))
970 // Collect the CSRs spilled at MBB that are restored
971 // at this DF successor of MBB.
972 if (CSRRestore
[SBB
].intersects(spilled
))
973 restored
|= (CSRRestore
[SBB
] & spilled
);
974 // If we are at a retun block, check that the restores
975 // we have seen so far exhaust the spills at MBB, then
976 // reset the restores.
977 if (isReturnBlock(SBB
) || SBB
->succ_size() == 0) {
978 if (restored
!= spilled
) {
979 CSRegSet notRestored
= (spilled
- restored
);
980 DEBUG(errs() << MF
->getFunction()->getName() << ": "
981 << stringifyCSRegSet(notRestored
)
982 << " spilled at " << getBasicBlockName(MBB
)
983 << " are never restored on path to return "
984 << getBasicBlockName(SBB
) << "\n");
991 // Check restore placements.
992 for (CSRegBlockMap::iterator BI
= CSRRestore
.begin(),
993 BE
= CSRRestore
.end(); BI
!= BE
; ++BI
) {
994 MachineBasicBlock
* MBB
= BI
->first
;
995 CSRegSet restored
= BI
->second
;
998 if (restored
.empty())
1001 DOUT
<< "SAVE[" << getBasicBlockName(MBB
) << "] = "
1002 << stringifyCSRegSet(CSRSave
[MBB
])
1003 << " RESTORE[" << getBasicBlockName(MBB
) << "] = "
1004 << stringifyCSRegSet(restored
) << "\n";
1006 if (CSRSave
[MBB
].intersects(restored
)) {
1007 spilled
|= (CSRSave
[MBB
] & restored
);
1009 // Walk inverse depth first from MBB to find spills of all
1010 // CSRs restored at MBB:
1011 for (idf_iterator
<MachineBasicBlock
*> BI
= idf_begin(MBB
),
1012 BE
= idf_end(MBB
); BI
!= BE
; ++BI
) {
1013 MachineBasicBlock
* PBB
= *BI
;
1016 // Stop when we encounter restores of any CSRs restored at MBB that
1017 // have not yet been seen to be spilled.
1018 if (CSRRestore
[PBB
].intersects(restored
) &&
1019 !spilled
.contains(CSRRestore
[PBB
] & restored
))
1021 // Collect the CSRs restored at MBB that are spilled
1022 // at this DF predecessor of MBB.
1023 if (CSRSave
[PBB
].intersects(restored
))
1024 spilled
|= (CSRSave
[PBB
] & restored
);
1026 if (spilled
!= restored
) {
1027 CSRegSet notSpilled
= (restored
- spilled
);
1028 DEBUG(errs() << MF
->getFunction()->getName() << ": "
1029 << stringifyCSRegSet(notSpilled
)
1030 << " restored at " << getBasicBlockName(MBB
)
1031 << " are never spilled\n");
1036 // Debugging print methods.
1037 std::string
PEI::getBasicBlockName(const MachineBasicBlock
* MBB
) {
1041 if (MBB
->getBasicBlock())
1042 return MBB
->getBasicBlock()->getNameStr();
1044 std::ostringstream name
;
1045 name
<< "_MBB_" << MBB
->getNumber();
1049 std::string
PEI::stringifyCSRegSet(const CSRegSet
& s
) {
1050 const TargetRegisterInfo
* TRI
= MF
->getTarget().getRegisterInfo();
1051 const std::vector
<CalleeSavedInfo
> CSI
=
1052 MF
->getFrameInfo()->getCalleeSavedInfo();
1054 std::ostringstream srep
;
1055 if (CSI
.size() == 0) {
1060 CSRegSet::iterator I
= s
.begin(), E
= s
.end();
1062 unsigned reg
= CSI
[*I
].getReg();
1063 srep
<< TRI
->getName(reg
);
1064 for (++I
; I
!= E
; ++I
) {
1065 reg
= CSI
[*I
].getReg();
1067 srep
<< TRI
->getName(reg
);
1074 void PEI::dumpSet(const CSRegSet
& s
) {
1075 DOUT
<< stringifyCSRegSet(s
) << "\n";
1078 void PEI::dumpUsed(MachineBasicBlock
* MBB
) {
1080 DOUT
<< "CSRUsed[" << getBasicBlockName(MBB
) << "] = "
1081 << stringifyCSRegSet(CSRUsed
[MBB
]) << "\n";
1085 void PEI::dumpAllUsed() {
1086 for (MachineFunction::iterator MBBI
= MF
->begin(), MBBE
= MF
->end();
1087 MBBI
!= MBBE
; ++MBBI
) {
1088 MachineBasicBlock
* MBB
= MBBI
;
1093 void PEI::dumpSets(MachineBasicBlock
* MBB
) {
1095 DOUT
<< getBasicBlockName(MBB
) << " | "
1096 << stringifyCSRegSet(CSRUsed
[MBB
]) << " | "
1097 << stringifyCSRegSet(AnticIn
[MBB
]) << " | "
1098 << stringifyCSRegSet(AnticOut
[MBB
]) << " | "
1099 << stringifyCSRegSet(AvailIn
[MBB
]) << " | "
1100 << stringifyCSRegSet(AvailOut
[MBB
]) << "\n";
1104 void PEI::dumpSets1(MachineBasicBlock
* MBB
) {
1106 DOUT
<< getBasicBlockName(MBB
) << " | "
1107 << stringifyCSRegSet(CSRUsed
[MBB
]) << " | "
1108 << stringifyCSRegSet(AnticIn
[MBB
]) << " | "
1109 << stringifyCSRegSet(AnticOut
[MBB
]) << " | "
1110 << stringifyCSRegSet(AvailIn
[MBB
]) << " | "
1111 << stringifyCSRegSet(AvailOut
[MBB
]) << " | "
1112 << stringifyCSRegSet(CSRSave
[MBB
]) << " | "
1113 << stringifyCSRegSet(CSRRestore
[MBB
]) << "\n";
1117 void PEI::dumpAllSets() {
1118 for (MachineFunction::iterator MBBI
= MF
->begin(), MBBE
= MF
->end();
1119 MBBI
!= MBBE
; ++MBBI
) {
1120 MachineBasicBlock
* MBB
= MBBI
;
1125 void PEI::dumpSRSets() {
1126 for (MachineFunction::iterator MBB
= MF
->begin(), E
= MF
->end();
1128 if (! CSRSave
[MBB
].empty()) {
1129 DOUT
<< "SAVE[" << getBasicBlockName(MBB
) << "] = "
1130 << stringifyCSRegSet(CSRSave
[MBB
]);
1131 if (CSRRestore
[MBB
].empty())
1134 if (! CSRRestore
[MBB
].empty()) {
1135 if (! CSRSave
[MBB
].empty())
1137 DOUT
<< "RESTORE[" << getBasicBlockName(MBB
) << "] = "
1138 << stringifyCSRegSet(CSRRestore
[MBB
]) << "\n";