1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs loop invariant code motion on machine instructions. We
11 // attempt to remove as much code from the body of a loop as possible.
13 // This pass does not attempt to throttle itself to limit register pressure.
14 // The register allocation phases are expected to perform rematerialization
15 // to recover when register pressure is high.
17 // This pass is not intended to be a replacement or a complete alternative
18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
19 // constructs that are not exposed before lowering and instruction selection.
21 //===----------------------------------------------------------------------===//
23 #define DEBUG_TYPE "machine-licm"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/PseudoSourceValue.h"
31 #include "llvm/Target/TargetLowering.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Target/TargetInstrInfo.h"
34 #include "llvm/Target/TargetInstrItineraries.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Analysis/AliasAnalysis.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
45 "Number of machine instructions hoisted out of loops");
47 "Number of instructions hoisted in low reg pressure situation");
48 STATISTIC(NumHighLatency
,
49 "Number of high latency instructions hoisted");
51 "Number of hoisted machine instructions CSEed");
52 STATISTIC(NumPostRAHoisted
,
53 "Number of machine instructions hoisted out of loops post regalloc");
56 class MachineLICM
: public MachineFunctionPass
{
59 const TargetMachine
*TM
;
60 const TargetInstrInfo
*TII
;
61 const TargetLowering
*TLI
;
62 const TargetRegisterInfo
*TRI
;
63 const MachineFrameInfo
*MFI
;
64 MachineRegisterInfo
*MRI
;
65 const InstrItineraryData
*InstrItins
;
67 // Various analyses that we use...
68 AliasAnalysis
*AA
; // Alias analysis info.
69 MachineLoopInfo
*MLI
; // Current MachineLoopInfo
70 MachineDominatorTree
*DT
; // Machine dominator tree for the cur loop
72 // State that is updated as we process loops
73 bool Changed
; // True if a loop is changed.
74 bool FirstInLoop
; // True if it's the first LICM in the loop.
75 MachineLoop
*CurLoop
; // The current loop we are working on.
76 MachineBasicBlock
*CurPreheader
; // The preheader for CurLoop.
78 BitVector AllocatableSet
;
80 // Track 'estimated' register pressure.
81 SmallSet
<unsigned, 32> RegSeen
;
82 SmallVector
<unsigned, 8> RegPressure
;
84 // Register pressure "limit" per register class. If the pressure
85 // is higher than the limit, then it's considered high.
86 SmallVector
<unsigned, 8> RegLimit
;
88 // Register pressure on path leading from loop preheader to current BB.
89 SmallVector
<SmallVector
<unsigned, 8>, 16> BackTrace
;
91 // For each opcode, keep a list of potential CSE instructions.
92 DenseMap
<unsigned, std::vector
<const MachineInstr
*> > CSEMap
;
95 static char ID
; // Pass identification, replacement for typeid
97 MachineFunctionPass(ID
), PreRegAlloc(true) {
98 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
101 explicit MachineLICM(bool PreRA
) :
102 MachineFunctionPass(ID
), PreRegAlloc(PreRA
) {
103 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
106 virtual bool runOnMachineFunction(MachineFunction
&MF
);
108 const char *getPassName() const { return "Machine Instruction LICM"; }
110 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
111 AU
.addRequired
<MachineLoopInfo
>();
112 AU
.addRequired
<MachineDominatorTree
>();
113 AU
.addRequired
<AliasAnalysis
>();
114 AU
.addPreserved
<MachineLoopInfo
>();
115 AU
.addPreserved
<MachineDominatorTree
>();
116 MachineFunctionPass::getAnalysisUsage(AU
);
119 virtual void releaseMemory() {
124 for (DenseMap
<unsigned,std::vector
<const MachineInstr
*> >::iterator
125 CI
= CSEMap
.begin(), CE
= CSEMap
.end(); CI
!= CE
; ++CI
)
131 /// CandidateInfo - Keep track of information about hoisting candidates.
132 struct CandidateInfo
{
136 CandidateInfo(MachineInstr
*mi
, unsigned def
, int fi
)
137 : MI(mi
), Def(def
), FI(fi
) {}
140 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
141 /// invariants out to the preheader.
142 void HoistRegionPostRA();
144 /// HoistPostRA - When an instruction is found to only use loop invariant
145 /// operands that is safe to hoist, this instruction is called to do the
147 void HoistPostRA(MachineInstr
*MI
, unsigned Def
);
149 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
150 /// gather register def and frame object update information.
151 void ProcessMI(MachineInstr
*MI
, unsigned *PhysRegDefs
,
152 SmallSet
<int, 32> &StoredFIs
,
153 SmallVector
<CandidateInfo
, 32> &Candidates
);
155 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
157 void AddToLiveIns(unsigned Reg
);
159 /// IsLICMCandidate - Returns true if the instruction may be a suitable
160 /// candidate for LICM. e.g. If the instruction is a call, then it's
161 /// obviously not safe to hoist it.
162 bool IsLICMCandidate(MachineInstr
&I
);
164 /// IsLoopInvariantInst - Returns true if the instruction is loop
165 /// invariant. I.e., all virtual register operands are defined outside of
166 /// the loop, physical registers aren't accessed (explicitly or implicitly),
167 /// and the instruction is hoistable.
169 bool IsLoopInvariantInst(MachineInstr
&I
);
171 /// HasAnyPHIUse - Return true if the specified register is used by any
173 bool HasAnyPHIUse(unsigned Reg
) const;
175 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
176 /// and an use in the current loop, return true if the target considered
178 bool HasHighOperandLatency(MachineInstr
&MI
, unsigned DefIdx
,
181 bool IsCheapInstruction(MachineInstr
&MI
) const;
183 /// CanCauseHighRegPressure - Visit BBs from header to current BB,
184 /// check if hoisting an instruction of the given cost matrix can cause high
185 /// register pressure.
186 bool CanCauseHighRegPressure(DenseMap
<unsigned, int> &Cost
);
188 /// UpdateBackTraceRegPressure - Traverse the back trace from header to
189 /// the current block and update their register pressures to reflect the
190 /// effect of hoisting MI from the current block to the preheader.
191 void UpdateBackTraceRegPressure(const MachineInstr
*MI
);
193 /// IsProfitableToHoist - Return true if it is potentially profitable to
194 /// hoist the given loop invariant.
195 bool IsProfitableToHoist(MachineInstr
&MI
);
197 /// HoistRegion - Walk the specified region of the CFG (defined by all
198 /// blocks dominated by the specified block, and that are in the current
199 /// loop) in depth first order w.r.t the DominatorTree. This allows us to
200 /// visit definitions before uses, allowing us to hoist a loop body in one
201 /// pass without iteration.
203 void HoistRegion(MachineDomTreeNode
*N
, bool IsHeader
= false);
205 /// InitRegPressure - Find all virtual register references that are liveout
206 /// of the preheader to initialize the starting "register pressure". Note
207 /// this does not count live through (livein but not used) registers.
208 void InitRegPressure(MachineBasicBlock
*BB
);
210 /// UpdateRegPressure - Update estimate of register pressure after the
211 /// specified instruction.
212 void UpdateRegPressure(const MachineInstr
*MI
);
214 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
215 /// the load itself could be hoisted. Return the unfolded and hoistable
216 /// load, or null if the load couldn't be unfolded or if it wouldn't
218 MachineInstr
*ExtractHoistableLoad(MachineInstr
*MI
);
220 /// LookForDuplicate - Find an instruction amount PrevMIs that is a
221 /// duplicate of MI. Return this instruction if it's found.
222 const MachineInstr
*LookForDuplicate(const MachineInstr
*MI
,
223 std::vector
<const MachineInstr
*> &PrevMIs
);
225 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
226 /// the preheader that compute the same value. If it's found, do a RAU on
227 /// with the definition of the existing instruction rather than hoisting
228 /// the instruction to the preheader.
229 bool EliminateCSE(MachineInstr
*MI
,
230 DenseMap
<unsigned, std::vector
<const MachineInstr
*> >::iterator
&CI
);
232 /// Hoist - When an instruction is found to only use loop invariant operands
233 /// that is safe to hoist, this instruction is called to do the dirty work.
234 /// It returns true if the instruction is hoisted.
235 bool Hoist(MachineInstr
*MI
, MachineBasicBlock
*Preheader
);
237 /// InitCSEMap - Initialize the CSE map with instructions that are in the
238 /// current loop preheader that may become duplicates of instructions that
239 /// are hoisted out of the loop.
240 void InitCSEMap(MachineBasicBlock
*BB
);
242 /// getCurPreheader - Get the preheader for the current loop, splitting
243 /// a critical edge if needed.
244 MachineBasicBlock
*getCurPreheader();
246 } // end anonymous namespace
248 char MachineLICM::ID
= 0;
249 INITIALIZE_PASS_BEGIN(MachineLICM
, "machinelicm",
250 "Machine Loop Invariant Code Motion", false, false)
251 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
252 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
253 INITIALIZE_AG_DEPENDENCY(AliasAnalysis
)
254 INITIALIZE_PASS_END(MachineLICM
, "machinelicm",
255 "Machine Loop Invariant Code Motion", false, false)
257 FunctionPass
*llvm::createMachineLICMPass(bool PreRegAlloc
) {
258 return new MachineLICM(PreRegAlloc
);
261 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
262 /// loop that has a unique predecessor.
263 static bool LoopIsOuterMostWithPredecessor(MachineLoop
*CurLoop
) {
264 // Check whether this loop even has a unique predecessor.
265 if (!CurLoop
->getLoopPredecessor())
267 // Ok, now check to see if any of its outer loops do.
268 for (MachineLoop
*L
= CurLoop
->getParentLoop(); L
; L
= L
->getParentLoop())
269 if (L
->getLoopPredecessor())
271 // None of them did, so this is the outermost with a unique predecessor.
275 bool MachineLICM::runOnMachineFunction(MachineFunction
&MF
) {
277 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
279 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
280 DEBUG(dbgs() << MF
.getFunction()->getName() << " ********\n");
282 Changed
= FirstInLoop
= false;
283 TM
= &MF
.getTarget();
284 TII
= TM
->getInstrInfo();
285 TLI
= TM
->getTargetLowering();
286 TRI
= TM
->getRegisterInfo();
287 MFI
= MF
.getFrameInfo();
288 MRI
= &MF
.getRegInfo();
289 InstrItins
= TM
->getInstrItineraryData();
290 AllocatableSet
= TRI
->getAllocatableSet(MF
);
293 // Estimate register pressure during pre-regalloc pass.
294 unsigned NumRC
= TRI
->getNumRegClasses();
295 RegPressure
.resize(NumRC
);
296 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
297 RegLimit
.resize(NumRC
);
298 for (TargetRegisterInfo::regclass_iterator I
= TRI
->regclass_begin(),
299 E
= TRI
->regclass_end(); I
!= E
; ++I
)
300 RegLimit
[(*I
)->getID()] = TRI
->getRegPressureLimit(*I
, MF
);
303 // Get our Loop information...
304 MLI
= &getAnalysis
<MachineLoopInfo
>();
305 DT
= &getAnalysis
<MachineDominatorTree
>();
306 AA
= &getAnalysis
<AliasAnalysis
>();
308 SmallVector
<MachineLoop
*, 8> Worklist(MLI
->begin(), MLI
->end());
309 while (!Worklist
.empty()) {
310 CurLoop
= Worklist
.pop_back_val();
313 // If this is done before regalloc, only visit outer-most preheader-sporting
315 if (PreRegAlloc
&& !LoopIsOuterMostWithPredecessor(CurLoop
)) {
316 Worklist
.append(CurLoop
->begin(), CurLoop
->end());
323 // CSEMap is initialized for loop header when the first instruction is
325 MachineDomTreeNode
*N
= DT
->getNode(CurLoop
->getHeader());
327 HoistRegion(N
, true);
335 /// InstructionStoresToFI - Return true if instruction stores to the
337 static bool InstructionStoresToFI(const MachineInstr
*MI
, int FI
) {
338 for (MachineInstr::mmo_iterator o
= MI
->memoperands_begin(),
339 oe
= MI
->memoperands_end(); o
!= oe
; ++o
) {
340 if (!(*o
)->isStore() || !(*o
)->getValue())
342 if (const FixedStackPseudoSourceValue
*Value
=
343 dyn_cast
<const FixedStackPseudoSourceValue
>((*o
)->getValue())) {
344 if (Value
->getFrameIndex() == FI
)
351 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
352 /// gather register def and frame object update information.
353 void MachineLICM::ProcessMI(MachineInstr
*MI
,
354 unsigned *PhysRegDefs
,
355 SmallSet
<int, 32> &StoredFIs
,
356 SmallVector
<CandidateInfo
, 32> &Candidates
) {
357 bool RuledOut
= false;
358 bool HasNonInvariantUse
= false;
360 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
361 const MachineOperand
&MO
= MI
->getOperand(i
);
363 // Remember if the instruction stores to the frame index.
364 int FI
= MO
.getIndex();
365 if (!StoredFIs
.count(FI
) &&
366 MFI
->isSpillSlotObjectIndex(FI
) &&
367 InstructionStoresToFI(MI
, FI
))
368 StoredFIs
.insert(FI
);
369 HasNonInvariantUse
= true;
375 unsigned Reg
= MO
.getReg();
378 assert(TargetRegisterInfo::isPhysicalRegister(Reg
) &&
379 "Not expecting virtual register!");
382 if (Reg
&& PhysRegDefs
[Reg
])
383 // If it's using a non-loop-invariant register, then it's obviously not
385 HasNonInvariantUse
= true;
389 if (MO
.isImplicit()) {
391 for (const unsigned *AS
= TRI
->getAliasSet(Reg
); *AS
; ++AS
)
394 // Non-dead implicit def? This cannot be hoisted.
396 // No need to check if a dead implicit def is also defined by
397 // another instruction.
401 // FIXME: For now, avoid instructions with multiple defs, unless
402 // it's a dead implicit def.
408 // If we have already seen another instruction that defines the same
409 // register, then this is not safe.
410 if (++PhysRegDefs
[Reg
] > 1)
411 // MI defined register is seen defined by another instruction in
412 // the loop, it cannot be a LICM candidate.
414 for (const unsigned *AS
= TRI
->getAliasSet(Reg
); *AS
; ++AS
)
415 if (++PhysRegDefs
[*AS
] > 1)
419 // Only consider reloads for now and remats which do not have register
420 // operands. FIXME: Consider unfold load folding instructions.
421 if (Def
&& !RuledOut
) {
423 if ((!HasNonInvariantUse
&& IsLICMCandidate(*MI
)) ||
424 (TII
->isLoadFromStackSlot(MI
, FI
) && MFI
->isSpillSlotObjectIndex(FI
)))
425 Candidates
.push_back(CandidateInfo(MI
, Def
, FI
));
429 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
430 /// invariants out to the preheader.
431 void MachineLICM::HoistRegionPostRA() {
432 unsigned NumRegs
= TRI
->getNumRegs();
433 unsigned *PhysRegDefs
= new unsigned[NumRegs
];
434 std::fill(PhysRegDefs
, PhysRegDefs
+ NumRegs
, 0);
436 SmallVector
<CandidateInfo
, 32> Candidates
;
437 SmallSet
<int, 32> StoredFIs
;
439 // Walk the entire region, count number of defs for each register, and
440 // collect potential LICM candidates.
441 const std::vector
<MachineBasicBlock
*> Blocks
= CurLoop
->getBlocks();
442 for (unsigned i
= 0, e
= Blocks
.size(); i
!= e
; ++i
) {
443 MachineBasicBlock
*BB
= Blocks
[i
];
444 // Conservatively treat live-in's as an external def.
445 // FIXME: That means a reload that're reused in successor block(s) will not
447 for (MachineBasicBlock::livein_iterator I
= BB
->livein_begin(),
448 E
= BB
->livein_end(); I
!= E
; ++I
) {
451 for (const unsigned *AS
= TRI
->getAliasSet(Reg
); *AS
; ++AS
)
455 for (MachineBasicBlock::iterator
456 MII
= BB
->begin(), E
= BB
->end(); MII
!= E
; ++MII
) {
457 MachineInstr
*MI
= &*MII
;
458 ProcessMI(MI
, PhysRegDefs
, StoredFIs
, Candidates
);
462 // Now evaluate whether the potential candidates qualify.
463 // 1. Check if the candidate defined register is defined by another
464 // instruction in the loop.
465 // 2. If the candidate is a load from stack slot (always true for now),
466 // check if the slot is stored anywhere in the loop.
467 for (unsigned i
= 0, e
= Candidates
.size(); i
!= e
; ++i
) {
468 if (Candidates
[i
].FI
!= INT_MIN
&&
469 StoredFIs
.count(Candidates
[i
].FI
))
472 if (PhysRegDefs
[Candidates
[i
].Def
] == 1) {
474 MachineInstr
*MI
= Candidates
[i
].MI
;
475 for (unsigned j
= 0, ee
= MI
->getNumOperands(); j
!= ee
; ++j
) {
476 const MachineOperand
&MO
= MI
->getOperand(j
);
477 if (!MO
.isReg() || MO
.isDef() || !MO
.getReg())
479 if (PhysRegDefs
[MO
.getReg()]) {
480 // If it's using a non-loop-invariant register, then it's obviously
481 // not safe to hoist.
487 HoistPostRA(MI
, Candidates
[i
].Def
);
491 delete[] PhysRegDefs
;
494 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
495 /// loop, and make sure it is not killed by any instructions in the loop.
496 void MachineLICM::AddToLiveIns(unsigned Reg
) {
497 const std::vector
<MachineBasicBlock
*> Blocks
= CurLoop
->getBlocks();
498 for (unsigned i
= 0, e
= Blocks
.size(); i
!= e
; ++i
) {
499 MachineBasicBlock
*BB
= Blocks
[i
];
500 if (!BB
->isLiveIn(Reg
))
502 for (MachineBasicBlock::iterator
503 MII
= BB
->begin(), E
= BB
->end(); MII
!= E
; ++MII
) {
504 MachineInstr
*MI
= &*MII
;
505 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
506 MachineOperand
&MO
= MI
->getOperand(i
);
507 if (!MO
.isReg() || !MO
.getReg() || MO
.isDef()) continue;
508 if (MO
.getReg() == Reg
|| TRI
->isSuperRegister(Reg
, MO
.getReg()))
515 /// HoistPostRA - When an instruction is found to only use loop invariant
516 /// operands that is safe to hoist, this instruction is called to do the
518 void MachineLICM::HoistPostRA(MachineInstr
*MI
, unsigned Def
) {
519 MachineBasicBlock
*Preheader
= getCurPreheader();
520 if (!Preheader
) return;
522 // Now move the instructions to the predecessor, inserting it before any
523 // terminator instructions.
525 dbgs() << "Hoisting " << *MI
;
526 if (Preheader
->getBasicBlock())
527 dbgs() << " to MachineBasicBlock "
528 << Preheader
->getName();
529 if (MI
->getParent()->getBasicBlock())
530 dbgs() << " from MachineBasicBlock "
531 << MI
->getParent()->getName();
535 // Splice the instruction to the preheader.
536 MachineBasicBlock
*MBB
= MI
->getParent();
537 Preheader
->splice(Preheader
->getFirstTerminator(), MBB
, MI
);
539 // Add register to livein list to all the BBs in the current loop since a
540 // loop invariant must be kept live throughout the whole loop. This is
541 // important to ensure later passes do not scavenge the def register.
548 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
549 /// dominated by the specified block, and that are in the current loop) in depth
550 /// first order w.r.t the DominatorTree. This allows us to visit definitions
551 /// before uses, allowing us to hoist a loop body in one pass without iteration.
553 void MachineLICM::HoistRegion(MachineDomTreeNode
*N
, bool IsHeader
) {
554 assert(N
!= 0 && "Null dominator tree node?");
555 MachineBasicBlock
*BB
= N
->getBlock();
557 // If this subregion is not in the top level loop at all, exit.
558 if (!CurLoop
->contains(BB
)) return;
560 MachineBasicBlock
*Preheader
= getCurPreheader();
565 // Compute registers which are livein into the loop headers.
568 InitRegPressure(Preheader
);
571 // Remember livein register pressure.
572 BackTrace
.push_back(RegPressure
);
574 for (MachineBasicBlock::iterator
575 MII
= BB
->begin(), E
= BB
->end(); MII
!= E
; ) {
576 MachineBasicBlock::iterator NextMII
= MII
; ++NextMII
;
577 MachineInstr
*MI
= &*MII
;
578 if (!Hoist(MI
, Preheader
))
579 UpdateRegPressure(MI
);
583 // Don't hoist things out of a large switch statement. This often causes
584 // code to be hoisted that wasn't going to be executed, and increases
585 // register pressure in a situation where it's likely to matter.
586 if (BB
->succ_size() < 25) {
587 const std::vector
<MachineDomTreeNode
*> &Children
= N
->getChildren();
588 for (unsigned I
= 0, E
= Children
.size(); I
!= E
; ++I
)
589 HoistRegion(Children
[I
]);
592 BackTrace
.pop_back();
595 static bool isOperandKill(const MachineOperand
&MO
, MachineRegisterInfo
*MRI
) {
596 return MO
.isKill() || MRI
->hasOneNonDBGUse(MO
.getReg());
599 /// InitRegPressure - Find all virtual register references that are liveout of
600 /// the preheader to initialize the starting "register pressure". Note this
601 /// does not count live through (livein but not used) registers.
602 void MachineLICM::InitRegPressure(MachineBasicBlock
*BB
) {
603 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
605 // If the preheader has only a single predecessor and it ends with a
606 // fallthrough or an unconditional branch, then scan its predecessor for live
607 // defs as well. This happens whenever the preheader is created by splitting
608 // the critical edge from the loop predecessor to the loop header.
609 if (BB
->pred_size() == 1) {
610 MachineBasicBlock
*TBB
= 0, *FBB
= 0;
611 SmallVector
<MachineOperand
, 4> Cond
;
612 if (!TII
->AnalyzeBranch(*BB
, TBB
, FBB
, Cond
, false) && Cond
.empty())
613 InitRegPressure(*BB
->pred_begin());
616 for (MachineBasicBlock::iterator MII
= BB
->begin(), E
= BB
->end();
618 MachineInstr
*MI
= &*MII
;
619 for (unsigned i
= 0, e
= MI
->getDesc().getNumOperands(); i
!= e
; ++i
) {
620 const MachineOperand
&MO
= MI
->getOperand(i
);
621 if (!MO
.isReg() || MO
.isImplicit())
623 unsigned Reg
= MO
.getReg();
624 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
627 bool isNew
= RegSeen
.insert(Reg
);
628 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
629 EVT VT
= *RC
->vt_begin();
630 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
632 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
634 bool isKill
= isOperandKill(MO
, MRI
);
635 if (isNew
&& !isKill
)
636 // Haven't seen this, it must be a livein.
637 RegPressure
[RCId
] += TLI
->getRepRegClassCostFor(VT
);
638 else if (!isNew
&& isKill
)
639 RegPressure
[RCId
] -= TLI
->getRepRegClassCostFor(VT
);
645 /// UpdateRegPressure - Update estimate of register pressure after the
646 /// specified instruction.
647 void MachineLICM::UpdateRegPressure(const MachineInstr
*MI
) {
648 if (MI
->isImplicitDef())
651 SmallVector
<unsigned, 4> Defs
;
652 for (unsigned i
= 0, e
= MI
->getDesc().getNumOperands(); i
!= e
; ++i
) {
653 const MachineOperand
&MO
= MI
->getOperand(i
);
654 if (!MO
.isReg() || MO
.isImplicit())
656 unsigned Reg
= MO
.getReg();
657 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
660 bool isNew
= RegSeen
.insert(Reg
);
663 else if (!isNew
&& isOperandKill(MO
, MRI
)) {
664 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
665 EVT VT
= *RC
->vt_begin();
666 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
667 unsigned RCCost
= TLI
->getRepRegClassCostFor(VT
);
669 if (RCCost
> RegPressure
[RCId
])
670 RegPressure
[RCId
] = 0;
672 RegPressure
[RCId
] -= RCCost
;
676 while (!Defs
.empty()) {
677 unsigned Reg
= Defs
.pop_back_val();
678 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
679 EVT VT
= *RC
->vt_begin();
680 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
681 unsigned RCCost
= TLI
->getRepRegClassCostFor(VT
);
682 RegPressure
[RCId
] += RCCost
;
686 /// IsLICMCandidate - Returns true if the instruction may be a suitable
687 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
688 /// not safe to hoist it.
689 bool MachineLICM::IsLICMCandidate(MachineInstr
&I
) {
690 // Check if it's safe to move the instruction.
691 bool DontMoveAcrossStore
= true;
692 if (!I
.isSafeToMove(TII
, AA
, DontMoveAcrossStore
))
698 /// IsLoopInvariantInst - Returns true if the instruction is loop
699 /// invariant. I.e., all virtual register operands are defined outside of the
700 /// loop, physical registers aren't accessed explicitly, and there are no side
701 /// effects that aren't captured by the operands or other flags.
703 bool MachineLICM::IsLoopInvariantInst(MachineInstr
&I
) {
704 if (!IsLICMCandidate(I
))
707 // The instruction is loop invariant if all of its operands are.
708 for (unsigned i
= 0, e
= I
.getNumOperands(); i
!= e
; ++i
) {
709 const MachineOperand
&MO
= I
.getOperand(i
);
714 unsigned Reg
= MO
.getReg();
715 if (Reg
== 0) continue;
717 // Don't hoist an instruction that uses or defines a physical register.
718 if (TargetRegisterInfo::isPhysicalRegister(Reg
)) {
720 // If the physreg has no defs anywhere, it's just an ambient register
721 // and we can freely move its uses. Alternatively, if it's allocatable,
722 // it could get allocated to something with a def during allocation.
723 if (!MRI
->def_empty(Reg
))
725 if (AllocatableSet
.test(Reg
))
727 // Check for a def among the register's aliases too.
728 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
) {
729 unsigned AliasReg
= *Alias
;
730 if (!MRI
->def_empty(AliasReg
))
732 if (AllocatableSet
.test(AliasReg
))
735 // Otherwise it's safe to move.
737 } else if (!MO
.isDead()) {
738 // A def that isn't dead. We can't move it.
740 } else if (CurLoop
->getHeader()->isLiveIn(Reg
)) {
741 // If the reg is live into the loop, we can't hoist an instruction
742 // which would clobber it.
750 assert(MRI
->getVRegDef(Reg
) &&
751 "Machine instr not mapped for this vreg?!");
753 // If the loop contains the definition of an operand, then the instruction
754 // isn't loop invariant.
755 if (CurLoop
->contains(MRI
->getVRegDef(Reg
)))
759 // If we got this far, the instruction is loop invariant!
764 /// HasAnyPHIUse - Return true if the specified register is used by any
766 bool MachineLICM::HasAnyPHIUse(unsigned Reg
) const {
767 for (MachineRegisterInfo::use_iterator UI
= MRI
->use_begin(Reg
),
768 UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
769 MachineInstr
*UseMI
= &*UI
;
772 // Look pass copies as well.
773 if (UseMI
->isCopy()) {
774 unsigned Def
= UseMI
->getOperand(0).getReg();
775 if (TargetRegisterInfo::isVirtualRegister(Def
) &&
783 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
784 /// and an use in the current loop, return true if the target considered
786 bool MachineLICM::HasHighOperandLatency(MachineInstr
&MI
,
787 unsigned DefIdx
, unsigned Reg
) const {
788 if (!InstrItins
|| InstrItins
->isEmpty() || MRI
->use_nodbg_empty(Reg
))
791 for (MachineRegisterInfo::use_nodbg_iterator I
= MRI
->use_nodbg_begin(Reg
),
792 E
= MRI
->use_nodbg_end(); I
!= E
; ++I
) {
793 MachineInstr
*UseMI
= &*I
;
794 if (UseMI
->isCopyLike())
796 if (!CurLoop
->contains(UseMI
->getParent()))
798 for (unsigned i
= 0, e
= UseMI
->getNumOperands(); i
!= e
; ++i
) {
799 const MachineOperand
&MO
= UseMI
->getOperand(i
);
800 if (!MO
.isReg() || !MO
.isUse())
802 unsigned MOReg
= MO
.getReg();
806 if (TII
->hasHighOperandLatency(InstrItins
, MRI
, &MI
, DefIdx
, UseMI
, i
))
810 // Only look at the first in loop use.
817 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
818 /// the operand latency between its def and a use is one or less.
819 bool MachineLICM::IsCheapInstruction(MachineInstr
&MI
) const {
820 if (MI
.getDesc().isAsCheapAsAMove() || MI
.isCopyLike())
822 if (!InstrItins
|| InstrItins
->isEmpty())
825 bool isCheap
= false;
826 unsigned NumDefs
= MI
.getDesc().getNumDefs();
827 for (unsigned i
= 0, e
= MI
.getNumOperands(); NumDefs
&& i
!= e
; ++i
) {
828 MachineOperand
&DefMO
= MI
.getOperand(i
);
829 if (!DefMO
.isReg() || !DefMO
.isDef())
832 unsigned Reg
= DefMO
.getReg();
833 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
836 if (!TII
->hasLowDefLatency(InstrItins
, &MI
, i
))
844 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
845 /// if hoisting an instruction of the given cost matrix can cause high
846 /// register pressure.
847 bool MachineLICM::CanCauseHighRegPressure(DenseMap
<unsigned, int> &Cost
) {
848 for (DenseMap
<unsigned, int>::iterator CI
= Cost
.begin(), CE
= Cost
.end();
853 unsigned RCId
= CI
->first
;
854 for (unsigned i
= BackTrace
.size(); i
!= 0; --i
) {
855 SmallVector
<unsigned, 8> &RP
= BackTrace
[i
-1];
856 if (RP
[RCId
] + CI
->second
>= RegLimit
[RCId
])
864 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the
865 /// current block and update their register pressures to reflect the effect
866 /// of hoisting MI from the current block to the preheader.
867 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr
*MI
) {
868 if (MI
->isImplicitDef())
871 // First compute the 'cost' of the instruction, i.e. its contribution
872 // to register pressure.
873 DenseMap
<unsigned, int> Cost
;
874 for (unsigned i
= 0, e
= MI
->getDesc().getNumOperands(); i
!= e
; ++i
) {
875 const MachineOperand
&MO
= MI
->getOperand(i
);
876 if (!MO
.isReg() || MO
.isImplicit())
878 unsigned Reg
= MO
.getReg();
879 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
882 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
883 EVT VT
= *RC
->vt_begin();
884 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
885 unsigned RCCost
= TLI
->getRepRegClassCostFor(VT
);
887 DenseMap
<unsigned, int>::iterator CI
= Cost
.find(RCId
);
888 if (CI
!= Cost
.end())
889 CI
->second
+= RCCost
;
891 Cost
.insert(std::make_pair(RCId
, RCCost
));
892 } else if (isOperandKill(MO
, MRI
)) {
893 DenseMap
<unsigned, int>::iterator CI
= Cost
.find(RCId
);
894 if (CI
!= Cost
.end())
895 CI
->second
-= RCCost
;
897 Cost
.insert(std::make_pair(RCId
, -RCCost
));
901 // Update register pressure of blocks from loop header to current block.
902 for (unsigned i
= 0, e
= BackTrace
.size(); i
!= e
; ++i
) {
903 SmallVector
<unsigned, 8> &RP
= BackTrace
[i
];
904 for (DenseMap
<unsigned, int>::iterator CI
= Cost
.begin(), CE
= Cost
.end();
906 unsigned RCId
= CI
->first
;
907 RP
[RCId
] += CI
->second
;
912 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
913 /// the given loop invariant.
914 bool MachineLICM::IsProfitableToHoist(MachineInstr
&MI
) {
915 if (MI
.isImplicitDef())
918 // If the instruction is cheap, only hoist if it is re-materilizable. LICM
919 // will increase register pressure. It's probably not worth it if the
920 // instruction is cheap.
921 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
922 // these tend to help performance in low register pressure situation. The
923 // trade off is it may cause spill in high pressure situation. It will end up
924 // adding a store in the loop preheader. But the reload is no more expensive.
925 // The side benefit is these loads are frequently CSE'ed.
926 if (IsCheapInstruction(MI
)) {
927 if (!TII
->isTriviallyReMaterializable(&MI
, AA
))
930 // Estimate register pressure to determine whether to LICM the instruction.
931 // In low register pressure situation, we can be more aggressive about
932 // hoisting. Also, favors hoisting long latency instructions even in
933 // moderately high pressure situation.
934 // FIXME: If there are long latency loop-invariant instructions inside the
935 // loop at this point, why didn't the optimizer's LICM hoist them?
936 DenseMap
<unsigned, int> Cost
;
937 for (unsigned i
= 0, e
= MI
.getDesc().getNumOperands(); i
!= e
; ++i
) {
938 const MachineOperand
&MO
= MI
.getOperand(i
);
939 if (!MO
.isReg() || MO
.isImplicit())
941 unsigned Reg
= MO
.getReg();
942 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
945 if (HasHighOperandLatency(MI
, i
, Reg
)) {
950 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
951 EVT VT
= *RC
->vt_begin();
952 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
953 unsigned RCCost
= TLI
->getRepRegClassCostFor(VT
);
954 DenseMap
<unsigned, int>::iterator CI
= Cost
.find(RCId
);
955 if (CI
!= Cost
.end())
956 CI
->second
+= RCCost
;
958 Cost
.insert(std::make_pair(RCId
, RCCost
));
959 } else if (isOperandKill(MO
, MRI
)) {
960 // Is a virtual register use is a kill, hoisting it out of the loop
961 // may actually reduce register pressure or be register pressure
963 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
964 EVT VT
= *RC
->vt_begin();
965 unsigned RCId
= TLI
->getRepRegClassFor(VT
)->getID();
966 unsigned RCCost
= TLI
->getRepRegClassCostFor(VT
);
967 DenseMap
<unsigned, int>::iterator CI
= Cost
.find(RCId
);
968 if (CI
!= Cost
.end())
969 CI
->second
-= RCCost
;
971 Cost
.insert(std::make_pair(RCId
, -RCCost
));
975 // Visit BBs from header to current BB, if hoisting this doesn't cause
976 // high register pressure, then it's safe to proceed.
977 if (!CanCauseHighRegPressure(Cost
)) {
982 // High register pressure situation, only hoist if the instruction is going to
984 if (!TII
->isTriviallyReMaterializable(&MI
, AA
) &&
985 !MI
.isInvariantLoad(AA
))
989 // If result(s) of this instruction is used by PHIs outside of the loop, then
990 // don't hoist it if the instruction because it will introduce an extra copy.
991 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
992 const MachineOperand
&MO
= MI
.getOperand(i
);
993 if (!MO
.isReg() || !MO
.isDef())
995 if (HasAnyPHIUse(MO
.getReg()))
1002 MachineInstr
*MachineLICM::ExtractHoistableLoad(MachineInstr
*MI
) {
1003 // Don't unfold simple loads.
1004 if (MI
->getDesc().canFoldAsLoad())
1007 // If not, we may be able to unfold a load and hoist that.
1008 // First test whether the instruction is loading from an amenable
1010 if (!MI
->isInvariantLoad(AA
))
1013 // Next determine the register class for a temporary register.
1014 unsigned LoadRegIndex
;
1016 TII
->getOpcodeAfterMemoryUnfold(MI
->getOpcode(),
1017 /*UnfoldLoad=*/true,
1018 /*UnfoldStore=*/false,
1020 if (NewOpc
== 0) return 0;
1021 const TargetInstrDesc
&TID
= TII
->get(NewOpc
);
1022 if (TID
.getNumDefs() != 1) return 0;
1023 const TargetRegisterClass
*RC
= TID
.OpInfo
[LoadRegIndex
].getRegClass(TRI
);
1024 // Ok, we're unfolding. Create a temporary register and do the unfold.
1025 unsigned Reg
= MRI
->createVirtualRegister(RC
);
1027 MachineFunction
&MF
= *MI
->getParent()->getParent();
1028 SmallVector
<MachineInstr
*, 2> NewMIs
;
1030 TII
->unfoldMemoryOperand(MF
, MI
, Reg
,
1031 /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
1035 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1037 assert(NewMIs
.size() == 2 &&
1038 "Unfolded a load into multiple instructions!");
1039 MachineBasicBlock
*MBB
= MI
->getParent();
1040 MBB
->insert(MI
, NewMIs
[0]);
1041 MBB
->insert(MI
, NewMIs
[1]);
1042 // If unfolding produced a load that wasn't loop-invariant or profitable to
1043 // hoist, discard the new instructions and bail.
1044 if (!IsLoopInvariantInst(*NewMIs
[0]) || !IsProfitableToHoist(*NewMIs
[0])) {
1045 NewMIs
[0]->eraseFromParent();
1046 NewMIs
[1]->eraseFromParent();
1050 // Update register pressure for the unfolded instruction.
1051 UpdateRegPressure(NewMIs
[1]);
1053 // Otherwise we successfully unfolded a load that we can hoist.
1054 MI
->eraseFromParent();
1058 void MachineLICM::InitCSEMap(MachineBasicBlock
*BB
) {
1059 for (MachineBasicBlock::iterator I
= BB
->begin(),E
= BB
->end(); I
!= E
; ++I
) {
1060 const MachineInstr
*MI
= &*I
;
1061 unsigned Opcode
= MI
->getOpcode();
1062 DenseMap
<unsigned, std::vector
<const MachineInstr
*> >::iterator
1063 CI
= CSEMap
.find(Opcode
);
1064 if (CI
!= CSEMap
.end())
1065 CI
->second
.push_back(MI
);
1067 std::vector
<const MachineInstr
*> CSEMIs
;
1068 CSEMIs
.push_back(MI
);
1069 CSEMap
.insert(std::make_pair(Opcode
, CSEMIs
));
1075 MachineLICM::LookForDuplicate(const MachineInstr
*MI
,
1076 std::vector
<const MachineInstr
*> &PrevMIs
) {
1077 for (unsigned i
= 0, e
= PrevMIs
.size(); i
!= e
; ++i
) {
1078 const MachineInstr
*PrevMI
= PrevMIs
[i
];
1079 if (TII
->produceSameValue(MI
, PrevMI
, (PreRegAlloc
? MRI
: 0)))
1085 bool MachineLICM::EliminateCSE(MachineInstr
*MI
,
1086 DenseMap
<unsigned, std::vector
<const MachineInstr
*> >::iterator
&CI
) {
1087 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1088 // the undef property onto uses.
1089 if (CI
== CSEMap
.end() || MI
->isImplicitDef())
1092 if (const MachineInstr
*Dup
= LookForDuplicate(MI
, CI
->second
)) {
1093 DEBUG(dbgs() << "CSEing " << *MI
<< " with " << *Dup
);
1095 // Replace virtual registers defined by MI by their counterparts defined
1097 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1098 const MachineOperand
&MO
= MI
->getOperand(i
);
1100 // Physical registers may not differ here.
1101 assert((!MO
.isReg() || MO
.getReg() == 0 ||
1102 !TargetRegisterInfo::isPhysicalRegister(MO
.getReg()) ||
1103 MO
.getReg() == Dup
->getOperand(i
).getReg()) &&
1104 "Instructions with different phys regs are not identical!");
1106 if (MO
.isReg() && MO
.isDef() &&
1107 !TargetRegisterInfo::isPhysicalRegister(MO
.getReg())) {
1108 MRI
->replaceRegWith(MO
.getReg(), Dup
->getOperand(i
).getReg());
1109 MRI
->clearKillFlags(Dup
->getOperand(i
).getReg());
1112 MI
->eraseFromParent();
1119 /// Hoist - When an instruction is found to use only loop invariant operands
1120 /// that are safe to hoist, this instruction is called to do the dirty work.
1122 bool MachineLICM::Hoist(MachineInstr
*MI
, MachineBasicBlock
*Preheader
) {
1123 // First check whether we should hoist this instruction.
1124 if (!IsLoopInvariantInst(*MI
) || !IsProfitableToHoist(*MI
)) {
1125 // If not, try unfolding a hoistable load.
1126 MI
= ExtractHoistableLoad(MI
);
1127 if (!MI
) return false;
1130 // Now move the instructions to the predecessor, inserting it before any
1131 // terminator instructions.
1133 dbgs() << "Hoisting " << *MI
;
1134 if (Preheader
->getBasicBlock())
1135 dbgs() << " to MachineBasicBlock "
1136 << Preheader
->getName();
1137 if (MI
->getParent()->getBasicBlock())
1138 dbgs() << " from MachineBasicBlock "
1139 << MI
->getParent()->getName();
1143 // If this is the first instruction being hoisted to the preheader,
1144 // initialize the CSE map with potential common expressions.
1146 InitCSEMap(Preheader
);
1147 FirstInLoop
= false;
1150 // Look for opportunity to CSE the hoisted instruction.
1151 unsigned Opcode
= MI
->getOpcode();
1152 DenseMap
<unsigned, std::vector
<const MachineInstr
*> >::iterator
1153 CI
= CSEMap
.find(Opcode
);
1154 if (!EliminateCSE(MI
, CI
)) {
1155 // Otherwise, splice the instruction to the preheader.
1156 Preheader
->splice(Preheader
->getFirstTerminator(),MI
->getParent(),MI
);
1158 // Update register pressure for BBs from header to this block.
1159 UpdateBackTraceRegPressure(MI
);
1161 // Clear the kill flags of any register this instruction defines,
1162 // since they may need to be live throughout the entire loop
1163 // rather than just live for part of it.
1164 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1165 MachineOperand
&MO
= MI
->getOperand(i
);
1166 if (MO
.isReg() && MO
.isDef() && !MO
.isDead())
1167 MRI
->clearKillFlags(MO
.getReg());
1170 // Add to the CSE map.
1171 if (CI
!= CSEMap
.end())
1172 CI
->second
.push_back(MI
);
1174 std::vector
<const MachineInstr
*> CSEMIs
;
1175 CSEMIs
.push_back(MI
);
1176 CSEMap
.insert(std::make_pair(Opcode
, CSEMIs
));
1186 MachineBasicBlock
*MachineLICM::getCurPreheader() {
1187 // Determine the block to which to hoist instructions. If we can't find a
1188 // suitable loop predecessor, we can't do any hoisting.
1190 // If we've tried to get a preheader and failed, don't try again.
1191 if (CurPreheader
== reinterpret_cast<MachineBasicBlock
*>(-1))
1194 if (!CurPreheader
) {
1195 CurPreheader
= CurLoop
->getLoopPreheader();
1196 if (!CurPreheader
) {
1197 MachineBasicBlock
*Pred
= CurLoop
->getLoopPredecessor();
1199 CurPreheader
= reinterpret_cast<MachineBasicBlock
*>(-1);
1203 CurPreheader
= Pred
->SplitCriticalEdge(CurLoop
->getHeader(), this);
1204 if (!CurPreheader
) {
1205 CurPreheader
= reinterpret_cast<MachineBasicBlock
*>(-1);
1210 return CurPreheader
;