1 //===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a top-down list scheduler, using standard algorithms.
11 // The basic approach uses a priority queue of available nodes to schedule.
12 // One at a time, nodes are taken from the priority queue (thus in priority
13 // order), checked for legality to schedule, and emitted if legal.
15 // Nodes may not be legal to schedule either due to structural hazards (e.g.
16 // pipeline or resource constraints) or because an input to the instruction has
17 // not completed execution.
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "post-RA-sched"
22 #include "ExactHazardRecognizer.h"
23 #include "SimpleHazardRecognizer.h"
24 #include "ScheduleDAGInstrs.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/CodeGen/LatencyPriorityQueue.h"
27 #include "llvm/CodeGen/SchedulerRegistry.h"
28 #include "llvm/CodeGen/MachineDominators.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
33 #include "llvm/Target/TargetLowering.h"
34 #include "llvm/Target/TargetMachine.h"
35 #include "llvm/Target/TargetInstrInfo.h"
36 #include "llvm/Target/TargetRegisterInfo.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/ADT/Statistic.h"
46 STATISTIC(NumNoops
, "Number of noops inserted");
47 STATISTIC(NumStalls
, "Number of pipeline stalls");
50 EnableAntiDepBreaking("break-anti-dependencies",
51 cl::desc("Break post-RA scheduling anti-dependencies"),
52 cl::init(true), cl::Hidden
);
55 EnablePostRAHazardAvoidance("avoid-hazards",
56 cl::desc("Enable exact hazard avoidance"),
57 cl::init(true), cl::Hidden
);
59 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
61 DebugDiv("postra-sched-debugdiv",
62 cl::desc("Debug control MBBs that are scheduled"),
63 cl::init(0), cl::Hidden
);
65 DebugMod("postra-sched-debugmod",
66 cl::desc("Debug control MBBs that are scheduled"),
67 cl::init(0), cl::Hidden
);
70 class VISIBILITY_HIDDEN PostRAScheduler
: public MachineFunctionPass
{
73 PostRAScheduler() : MachineFunctionPass(&ID
) {}
75 void getAnalysisUsage(AnalysisUsage
&AU
) const {
77 AU
.addRequired
<MachineDominatorTree
>();
78 AU
.addPreserved
<MachineDominatorTree
>();
79 AU
.addRequired
<MachineLoopInfo
>();
80 AU
.addPreserved
<MachineLoopInfo
>();
81 MachineFunctionPass::getAnalysisUsage(AU
);
84 const char *getPassName() const {
85 return "Post RA top-down list latency scheduler";
88 bool runOnMachineFunction(MachineFunction
&Fn
);
90 char PostRAScheduler::ID
= 0;
92 class VISIBILITY_HIDDEN SchedulePostRATDList
: public ScheduleDAGInstrs
{
93 /// AvailableQueue - The priority queue to use for the available SUnits.
95 LatencyPriorityQueue AvailableQueue
;
97 /// PendingQueue - This contains all of the instructions whose operands have
98 /// been issued, but their results are not ready yet (due to the latency of
99 /// the operation). Once the operands becomes available, the instruction is
100 /// added to the AvailableQueue.
101 std::vector
<SUnit
*> PendingQueue
;
103 /// Topo - A topological ordering for SUnits.
104 ScheduleDAGTopologicalSort Topo
;
106 /// AllocatableSet - The set of allocatable registers.
107 /// We'll be ignoring anti-dependencies on non-allocatable registers,
108 /// because they may not be safe to break.
109 const BitVector AllocatableSet
;
111 /// HazardRec - The hazard recognizer to use.
112 ScheduleHazardRecognizer
*HazardRec
;
114 /// Classes - For live regs that are only used in one register class in a
115 /// live range, the register class. If the register is not live, the
116 /// corresponding value is null. If the register is live but used in
117 /// multiple register classes, the corresponding value is -1 casted to a
119 const TargetRegisterClass
*
120 Classes
[TargetRegisterInfo::FirstVirtualRegister
];
122 /// RegRegs - Map registers to all their references within a live range.
123 std::multimap
<unsigned, MachineOperand
*> RegRefs
;
125 /// The index of the most recent kill (proceding bottom-up), or ~0u if
126 /// the register is not live.
127 unsigned KillIndices
[TargetRegisterInfo::FirstVirtualRegister
];
129 /// The index of the most recent complete def (proceding bottom up), or ~0u
130 /// if the register is live.
131 unsigned DefIndices
[TargetRegisterInfo::FirstVirtualRegister
];
134 SchedulePostRATDList(MachineFunction
&MF
,
135 const MachineLoopInfo
&MLI
,
136 const MachineDominatorTree
&MDT
,
137 ScheduleHazardRecognizer
*HR
)
138 : ScheduleDAGInstrs(MF
, MLI
, MDT
), Topo(SUnits
),
139 AllocatableSet(TRI
->getAllocatableSet(MF
)),
142 ~SchedulePostRATDList() {
146 /// StartBlock - Initialize register live-range state for scheduling in
149 void StartBlock(MachineBasicBlock
*BB
);
151 /// Schedule - Schedule the instruction range using list scheduling.
155 /// FixupKills - Fix register kill flags that have been made
156 /// invalid due to scheduling
158 void FixupKills(MachineBasicBlock
*MBB
);
160 /// Observe - Update liveness information to account for the current
161 /// instruction, which will not be scheduled.
163 void Observe(MachineInstr
*MI
, unsigned Count
);
165 /// FinishBlock - Clean up register live-range state.
170 void PrescanInstruction(MachineInstr
*MI
);
171 void ScanInstruction(MachineInstr
*MI
, unsigned Count
);
172 void ReleaseSucc(SUnit
*SU
, SDep
*SuccEdge
);
173 void ReleaseSuccessors(SUnit
*SU
);
174 void ScheduleNodeTopDown(SUnit
*SU
, unsigned CurCycle
);
175 void ListScheduleTopDown();
176 bool BreakAntiDependencies();
177 unsigned findSuitableFreeRegister(unsigned AntiDepReg
,
179 const TargetRegisterClass
*);
180 void StartBlockForKills(MachineBasicBlock
*BB
);
184 /// isSchedulingBoundary - Test if the given instruction should be
185 /// considered a scheduling boundary. This primarily includes labels
188 static bool isSchedulingBoundary(const MachineInstr
*MI
,
189 const MachineFunction
&MF
) {
190 // Terminators and labels can't be scheduled around.
191 if (MI
->getDesc().isTerminator() || MI
->isLabel())
194 // Don't attempt to schedule around any instruction that modifies
195 // a stack-oriented pointer, as it's unlikely to be profitable. This
196 // saves compile time, because it doesn't require every single
197 // stack slot reference to depend on the instruction that does the
199 const TargetLowering
&TLI
= *MF
.getTarget().getTargetLowering();
200 if (MI
->modifiesRegister(TLI
.getStackPointerRegisterToSaveRestore()))
206 bool PostRAScheduler::runOnMachineFunction(MachineFunction
&Fn
) {
207 DEBUG(errs() << "PostRAScheduler\n");
209 const MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
210 const MachineDominatorTree
&MDT
= getAnalysis
<MachineDominatorTree
>();
211 const InstrItineraryData
&InstrItins
= Fn
.getTarget().getInstrItineraryData();
212 ScheduleHazardRecognizer
*HR
= EnablePostRAHazardAvoidance
?
213 (ScheduleHazardRecognizer
*)new ExactHazardRecognizer(InstrItins
) :
214 (ScheduleHazardRecognizer
*)new SimpleHazardRecognizer();
216 SchedulePostRATDList
Scheduler(Fn
, MLI
, MDT
, HR
);
218 // Loop over all of the basic blocks
219 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
220 MBB
!= MBBe
; ++MBB
) {
222 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
224 static int bbcnt
= 0;
225 if (bbcnt
++ % DebugDiv
!= DebugMod
)
227 errs() << "*** DEBUG scheduling " << Fn
.getFunction()->getNameStr() <<
228 ":MBB ID#" << MBB
->getNumber() << " ***\n";
232 // Initialize register live-range state for scheduling in this block.
233 Scheduler
.StartBlock(MBB
);
235 // Schedule each sequence of instructions not interrupted by a label
236 // or anything else that effectively needs to shut down scheduling.
237 MachineBasicBlock::iterator Current
= MBB
->end();
238 unsigned Count
= MBB
->size(), CurrentCount
= Count
;
239 for (MachineBasicBlock::iterator I
= Current
; I
!= MBB
->begin(); ) {
240 MachineInstr
*MI
= prior(I
);
241 if (isSchedulingBoundary(MI
, Fn
)) {
242 Scheduler
.Run(MBB
, I
, Current
, CurrentCount
);
243 Scheduler
.EmitSchedule();
245 CurrentCount
= Count
- 1;
246 Scheduler
.Observe(MI
, CurrentCount
);
251 assert(Count
== 0 && "Instruction count mismatch!");
252 assert((MBB
->begin() == Current
|| CurrentCount
!= 0) &&
253 "Instruction count mismatch!");
254 Scheduler
.Run(MBB
, MBB
->begin(), Current
, CurrentCount
);
255 Scheduler
.EmitSchedule();
257 // Clean up register live-range state.
258 Scheduler
.FinishBlock();
260 // Update register kills
261 Scheduler
.FixupKills(MBB
);
267 /// StartBlock - Initialize register live-range state for scheduling in
270 void SchedulePostRATDList::StartBlock(MachineBasicBlock
*BB
) {
271 // Call the superclass.
272 ScheduleDAGInstrs::StartBlock(BB
);
274 // Reset the hazard recognizer.
277 // Clear out the register class data.
278 std::fill(Classes
, array_endof(Classes
),
279 static_cast<const TargetRegisterClass
*>(0));
281 // Initialize the indices to indicate that no registers are live.
282 std::fill(KillIndices
, array_endof(KillIndices
), ~0u);
283 std::fill(DefIndices
, array_endof(DefIndices
), BB
->size());
285 // Determine the live-out physregs for this block.
286 if (!BB
->empty() && BB
->back().getDesc().isReturn())
287 // In a return block, examine the function live-out regs.
288 for (MachineRegisterInfo::liveout_iterator I
= MRI
.liveout_begin(),
289 E
= MRI
.liveout_end(); I
!= E
; ++I
) {
291 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
292 KillIndices
[Reg
] = BB
->size();
293 DefIndices
[Reg
] = ~0u;
294 // Repeat, for all aliases.
295 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
) {
296 unsigned AliasReg
= *Alias
;
297 Classes
[AliasReg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
298 KillIndices
[AliasReg
] = BB
->size();
299 DefIndices
[AliasReg
] = ~0u;
303 // In a non-return block, examine the live-in regs of all successors.
304 for (MachineBasicBlock::succ_iterator SI
= BB
->succ_begin(),
305 SE
= BB
->succ_end(); SI
!= SE
; ++SI
)
306 for (MachineBasicBlock::livein_iterator I
= (*SI
)->livein_begin(),
307 E
= (*SI
)->livein_end(); I
!= E
; ++I
) {
309 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
310 KillIndices
[Reg
] = BB
->size();
311 DefIndices
[Reg
] = ~0u;
312 // Repeat, for all aliases.
313 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
) {
314 unsigned AliasReg
= *Alias
;
315 Classes
[AliasReg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
316 KillIndices
[AliasReg
] = BB
->size();
317 DefIndices
[AliasReg
] = ~0u;
321 // Consider callee-saved registers as live-out, since we're running after
322 // prologue/epilogue insertion so there's no way to add additional
325 // TODO: there is a new method
326 // MachineFrameInfo::getPristineRegs(MBB). It gives you a list of
327 // CSRs that have not been saved when entering the MBB. The
328 // remaining CSRs have been saved and can be treated like call
329 // clobbered registers.
330 for (const unsigned *I
= TRI
->getCalleeSavedRegs(); *I
; ++I
) {
332 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
333 KillIndices
[Reg
] = BB
->size();
334 DefIndices
[Reg
] = ~0u;
335 // Repeat, for all aliases.
336 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
) {
337 unsigned AliasReg
= *Alias
;
338 Classes
[AliasReg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
339 KillIndices
[AliasReg
] = BB
->size();
340 DefIndices
[AliasReg
] = ~0u;
345 /// Schedule - Schedule the instruction range using list scheduling.
347 void SchedulePostRATDList::Schedule() {
348 DEBUG(errs() << "********** List Scheduling **********\n");
350 // Build the scheduling graph.
353 if (EnableAntiDepBreaking
) {
354 if (BreakAntiDependencies()) {
355 // We made changes. Update the dependency graph.
356 // Theoretically we could update the graph in place:
357 // When a live range is changed to use a different register, remove
358 // the def's anti-dependence *and* output-dependence edges due to
359 // that register, and add new anti-dependence and output-dependence
360 // edges based on the next live range of the register.
368 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
369 SUnits
[su
].dumpAll(this));
371 AvailableQueue
.initNodes(SUnits
);
373 ListScheduleTopDown();
375 AvailableQueue
.releaseState();
378 /// Observe - Update liveness information to account for the current
379 /// instruction, which will not be scheduled.
381 void SchedulePostRATDList::Observe(MachineInstr
*MI
, unsigned Count
) {
382 assert(Count
< InsertPosIndex
&& "Instruction index out of expected range!");
384 // Any register which was defined within the previous scheduling region
385 // may have been rescheduled and its lifetime may overlap with registers
386 // in ways not reflected in our current liveness state. For each such
387 // register, adjust the liveness state to be conservatively correct.
388 for (unsigned Reg
= 0; Reg
!= TargetRegisterInfo::FirstVirtualRegister
; ++Reg
)
389 if (DefIndices
[Reg
] < InsertPosIndex
&& DefIndices
[Reg
] >= Count
) {
390 assert(KillIndices
[Reg
] == ~0u && "Clobbered register is live!");
391 // Mark this register to be non-renamable.
392 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
393 // Move the def index to the end of the previous region, to reflect
394 // that the def could theoretically have been scheduled at the end.
395 DefIndices
[Reg
] = InsertPosIndex
;
398 PrescanInstruction(MI
);
399 ScanInstruction(MI
, Count
);
402 /// FinishBlock - Clean up register live-range state.
404 void SchedulePostRATDList::FinishBlock() {
407 // Call the superclass.
408 ScheduleDAGInstrs::FinishBlock();
411 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
413 static SDep
*CriticalPathStep(SUnit
*SU
) {
415 unsigned NextDepth
= 0;
416 // Find the predecessor edge with the greatest depth.
417 for (SUnit::pred_iterator P
= SU
->Preds
.begin(), PE
= SU
->Preds
.end();
419 SUnit
*PredSU
= P
->getSUnit();
420 unsigned PredLatency
= P
->getLatency();
421 unsigned PredTotalLatency
= PredSU
->getDepth() + PredLatency
;
422 // In the case of a latency tie, prefer an anti-dependency edge over
423 // other types of edges.
424 if (NextDepth
< PredTotalLatency
||
425 (NextDepth
== PredTotalLatency
&& P
->getKind() == SDep::Anti
)) {
426 NextDepth
= PredTotalLatency
;
433 void SchedulePostRATDList::PrescanInstruction(MachineInstr
*MI
) {
434 // Scan the register operands for this instruction and update
435 // Classes and RegRefs.
436 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
437 MachineOperand
&MO
= MI
->getOperand(i
);
438 if (!MO
.isReg()) continue;
439 unsigned Reg
= MO
.getReg();
440 if (Reg
== 0) continue;
441 const TargetRegisterClass
*NewRC
= 0;
443 if (i
< MI
->getDesc().getNumOperands())
444 NewRC
= MI
->getDesc().OpInfo
[i
].getRegClass(TRI
);
446 // For now, only allow the register to be changed if its register
447 // class is consistent across all uses.
448 if (!Classes
[Reg
] && NewRC
)
449 Classes
[Reg
] = NewRC
;
450 else if (!NewRC
|| Classes
[Reg
] != NewRC
)
451 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
453 // Now check for aliases.
454 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
) {
455 // If an alias of the reg is used during the live range, give up.
456 // Note that this allows us to skip checking if AntiDepReg
457 // overlaps with any of the aliases, among other things.
458 unsigned AliasReg
= *Alias
;
459 if (Classes
[AliasReg
]) {
460 Classes
[AliasReg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
461 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
465 // If we're still willing to consider this register, note the reference.
466 if (Classes
[Reg
] != reinterpret_cast<TargetRegisterClass
*>(-1))
467 RegRefs
.insert(std::make_pair(Reg
, &MO
));
471 void SchedulePostRATDList::ScanInstruction(MachineInstr
*MI
,
474 // Proceding upwards, registers that are defed but not used in this
475 // instruction are now dead.
476 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
477 MachineOperand
&MO
= MI
->getOperand(i
);
478 if (!MO
.isReg()) continue;
479 unsigned Reg
= MO
.getReg();
480 if (Reg
== 0) continue;
481 if (!MO
.isDef()) continue;
482 // Ignore two-addr defs.
483 if (MI
->isRegTiedToUseOperand(i
)) continue;
485 DefIndices
[Reg
] = Count
;
486 KillIndices
[Reg
] = ~0u;
487 assert(((KillIndices
[Reg
] == ~0u) !=
488 (DefIndices
[Reg
] == ~0u)) &&
489 "Kill and Def maps aren't consistent for Reg!");
492 // Repeat, for all subregs.
493 for (const unsigned *Subreg
= TRI
->getSubRegisters(Reg
);
495 unsigned SubregReg
= *Subreg
;
496 DefIndices
[SubregReg
] = Count
;
497 KillIndices
[SubregReg
] = ~0u;
498 Classes
[SubregReg
] = 0;
499 RegRefs
.erase(SubregReg
);
501 // Conservatively mark super-registers as unusable.
502 for (const unsigned *Super
= TRI
->getSuperRegisters(Reg
);
504 unsigned SuperReg
= *Super
;
505 Classes
[SuperReg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
508 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
509 MachineOperand
&MO
= MI
->getOperand(i
);
510 if (!MO
.isReg()) continue;
511 unsigned Reg
= MO
.getReg();
512 if (Reg
== 0) continue;
513 if (!MO
.isUse()) continue;
515 const TargetRegisterClass
*NewRC
= 0;
516 if (i
< MI
->getDesc().getNumOperands())
517 NewRC
= MI
->getDesc().OpInfo
[i
].getRegClass(TRI
);
519 // For now, only allow the register to be changed if its register
520 // class is consistent across all uses.
521 if (!Classes
[Reg
] && NewRC
)
522 Classes
[Reg
] = NewRC
;
523 else if (!NewRC
|| Classes
[Reg
] != NewRC
)
524 Classes
[Reg
] = reinterpret_cast<TargetRegisterClass
*>(-1);
526 RegRefs
.insert(std::make_pair(Reg
, &MO
));
528 // It wasn't previously live but now it is, this is a kill.
529 if (KillIndices
[Reg
] == ~0u) {
530 KillIndices
[Reg
] = Count
;
531 DefIndices
[Reg
] = ~0u;
532 assert(((KillIndices
[Reg
] == ~0u) !=
533 (DefIndices
[Reg
] == ~0u)) &&
534 "Kill and Def maps aren't consistent for Reg!");
536 // Repeat, for all aliases.
537 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
) {
538 unsigned AliasReg
= *Alias
;
539 if (KillIndices
[AliasReg
] == ~0u) {
540 KillIndices
[AliasReg
] = Count
;
541 DefIndices
[AliasReg
] = ~0u;
548 SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg
,
550 const TargetRegisterClass
*RC
) {
551 for (TargetRegisterClass::iterator R
= RC
->allocation_order_begin(MF
),
552 RE
= RC
->allocation_order_end(MF
); R
!= RE
; ++R
) {
553 unsigned NewReg
= *R
;
554 // Don't replace a register with itself.
555 if (NewReg
== AntiDepReg
) continue;
556 // Don't replace a register with one that was recently used to repair
557 // an anti-dependence with this AntiDepReg, because that would
558 // re-introduce that anti-dependence.
559 if (NewReg
== LastNewReg
) continue;
560 // If NewReg is dead and NewReg's most recent def is not before
561 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
562 assert(((KillIndices
[AntiDepReg
] == ~0u) != (DefIndices
[AntiDepReg
] == ~0u)) &&
563 "Kill and Def maps aren't consistent for AntiDepReg!");
564 assert(((KillIndices
[NewReg
] == ~0u) != (DefIndices
[NewReg
] == ~0u)) &&
565 "Kill and Def maps aren't consistent for NewReg!");
566 if (KillIndices
[NewReg
] != ~0u ||
567 Classes
[NewReg
] == reinterpret_cast<TargetRegisterClass
*>(-1) ||
568 KillIndices
[AntiDepReg
] > DefIndices
[NewReg
])
573 // No registers are free and available!
577 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
578 /// of the ScheduleDAG and break them by renaming registers.
580 bool SchedulePostRATDList::BreakAntiDependencies() {
581 // The code below assumes that there is at least one instruction,
582 // so just duck out immediately if the block is empty.
583 if (SUnits
.empty()) return false;
585 // Find the node at the bottom of the critical path.
587 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
588 SUnit
*SU
= &SUnits
[i
];
589 if (!Max
|| SU
->getDepth() + SU
->Latency
> Max
->getDepth() + Max
->Latency
)
593 DEBUG(errs() << "Critical path has total latency "
594 << (Max
->getDepth() + Max
->Latency
) << "\n");
596 // Track progress along the critical path through the SUnit graph as we walk
598 SUnit
*CriticalPathSU
= Max
;
599 MachineInstr
*CriticalPathMI
= CriticalPathSU
->getInstr();
601 // Consider this pattern:
610 // There are three anti-dependencies here, and without special care,
611 // we'd break all of them using the same register:
620 // because at each anti-dependence, B is the first register that
621 // isn't A which is free. This re-introduces anti-dependencies
622 // at all but one of the original anti-dependencies that we were
623 // trying to break. To avoid this, keep track of the most recent
624 // register that each register was replaced with, avoid
625 // using it to repair an anti-dependence on the same register.
626 // This lets us produce this:
635 // This still has an anti-dependence on B, but at least it isn't on the
636 // original critical path.
638 // TODO: If we tracked more than one register here, we could potentially
639 // fix that remaining critical edge too. This is a little more involved,
640 // because unlike the most recent register, less recent registers should
641 // still be considered, though only if no other registers are available.
642 unsigned LastNewReg
[TargetRegisterInfo::FirstVirtualRegister
] = {};
644 // Attempt to break anti-dependence edges on the critical path. Walk the
645 // instructions from the bottom up, tracking information about liveness
646 // as we go to help determine which registers are available.
647 bool Changed
= false;
648 unsigned Count
= InsertPosIndex
- 1;
649 for (MachineBasicBlock::iterator I
= InsertPos
, E
= Begin
;
651 MachineInstr
*MI
= --I
;
653 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
654 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
655 // is left behind appearing to clobber the super-register, while the
656 // subregister needs to remain live. So we just ignore them.
657 if (MI
->getOpcode() == TargetInstrInfo::IMPLICIT_DEF
)
660 // Check if this instruction has a dependence on the critical path that
661 // is an anti-dependence that we may be able to break. If it is, set
662 // AntiDepReg to the non-zero register associated with the anti-dependence.
664 // We limit our attention to the critical path as a heuristic to avoid
665 // breaking anti-dependence edges that aren't going to significantly
666 // impact the overall schedule. There are a limited number of registers
667 // and we want to save them for the important edges.
669 // TODO: Instructions with multiple defs could have multiple
670 // anti-dependencies. The current code here only knows how to break one
671 // edge per instruction. Note that we'd have to be able to break all of
672 // the anti-dependencies in an instruction in order to be effective.
673 unsigned AntiDepReg
= 0;
674 if (MI
== CriticalPathMI
) {
675 if (SDep
*Edge
= CriticalPathStep(CriticalPathSU
)) {
676 SUnit
*NextSU
= Edge
->getSUnit();
678 // Only consider anti-dependence edges.
679 if (Edge
->getKind() == SDep::Anti
) {
680 AntiDepReg
= Edge
->getReg();
681 assert(AntiDepReg
!= 0 && "Anti-dependence on reg0?");
682 // Don't break anti-dependencies on non-allocatable registers.
683 if (!AllocatableSet
.test(AntiDepReg
))
686 // If the SUnit has other dependencies on the SUnit that it
687 // anti-depends on, don't bother breaking the anti-dependency
688 // since those edges would prevent such units from being
689 // scheduled past each other regardless.
691 // Also, if there are dependencies on other SUnits with the
692 // same register as the anti-dependency, don't attempt to
694 for (SUnit::pred_iterator P
= CriticalPathSU
->Preds
.begin(),
695 PE
= CriticalPathSU
->Preds
.end(); P
!= PE
; ++P
)
696 if (P
->getSUnit() == NextSU
?
697 (P
->getKind() != SDep::Anti
|| P
->getReg() != AntiDepReg
) :
698 (P
->getKind() == SDep::Data
&& P
->getReg() == AntiDepReg
)) {
704 CriticalPathSU
= NextSU
;
705 CriticalPathMI
= CriticalPathSU
->getInstr();
707 // We've reached the end of the critical path.
713 PrescanInstruction(MI
);
715 // If this instruction has a use of AntiDepReg, breaking it
717 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
718 MachineOperand
&MO
= MI
->getOperand(i
);
719 if (!MO
.isReg()) continue;
720 unsigned Reg
= MO
.getReg();
721 if (Reg
== 0) continue;
722 if (MO
.isUse() && AntiDepReg
== Reg
) {
728 // Determine AntiDepReg's register class, if it is live and is
729 // consistently used within a single class.
730 const TargetRegisterClass
*RC
= AntiDepReg
!= 0 ? Classes
[AntiDepReg
] : 0;
731 assert((AntiDepReg
== 0 || RC
!= NULL
) &&
732 "Register should be live if it's causing an anti-dependence!");
733 if (RC
== reinterpret_cast<TargetRegisterClass
*>(-1))
736 // Look for a suitable register to use to break the anti-depenence.
738 // TODO: Instead of picking the first free register, consider which might
740 if (AntiDepReg
!= 0) {
741 if (unsigned NewReg
= findSuitableFreeRegister(AntiDepReg
,
742 LastNewReg
[AntiDepReg
],
744 DEBUG(errs() << "Breaking anti-dependence edge on "
745 << TRI
->getName(AntiDepReg
)
746 << " with " << RegRefs
.count(AntiDepReg
) << " references"
747 << " using " << TRI
->getName(NewReg
) << "!\n");
749 // Update the references to the old register to refer to the new
751 std::pair
<std::multimap
<unsigned, MachineOperand
*>::iterator
,
752 std::multimap
<unsigned, MachineOperand
*>::iterator
>
753 Range
= RegRefs
.equal_range(AntiDepReg
);
754 for (std::multimap
<unsigned, MachineOperand
*>::iterator
755 Q
= Range
.first
, QE
= Range
.second
; Q
!= QE
; ++Q
)
756 Q
->second
->setReg(NewReg
);
758 // We just went back in time and modified history; the
759 // liveness information for the anti-depenence reg is now
760 // inconsistent. Set the state as if it were dead.
761 Classes
[NewReg
] = Classes
[AntiDepReg
];
762 DefIndices
[NewReg
] = DefIndices
[AntiDepReg
];
763 KillIndices
[NewReg
] = KillIndices
[AntiDepReg
];
764 assert(((KillIndices
[NewReg
] == ~0u) !=
765 (DefIndices
[NewReg
] == ~0u)) &&
766 "Kill and Def maps aren't consistent for NewReg!");
768 Classes
[AntiDepReg
] = 0;
769 DefIndices
[AntiDepReg
] = KillIndices
[AntiDepReg
];
770 KillIndices
[AntiDepReg
] = ~0u;
771 assert(((KillIndices
[AntiDepReg
] == ~0u) !=
772 (DefIndices
[AntiDepReg
] == ~0u)) &&
773 "Kill and Def maps aren't consistent for AntiDepReg!");
775 RegRefs
.erase(AntiDepReg
);
777 LastNewReg
[AntiDepReg
] = NewReg
;
781 ScanInstruction(MI
, Count
);
787 /// StartBlockForKills - Initialize register live-range state for updating kills
789 void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock
*BB
) {
790 // Initialize the indices to indicate that no registers are live.
791 std::fill(KillIndices
, array_endof(KillIndices
), ~0u);
793 // Determine the live-out physregs for this block.
794 if (!BB
->empty() && BB
->back().getDesc().isReturn()) {
795 // In a return block, examine the function live-out regs.
796 for (MachineRegisterInfo::liveout_iterator I
= MRI
.liveout_begin(),
797 E
= MRI
.liveout_end(); I
!= E
; ++I
) {
799 KillIndices
[Reg
] = BB
->size();
800 // Repeat, for all subregs.
801 for (const unsigned *Subreg
= TRI
->getSubRegisters(Reg
);
803 KillIndices
[*Subreg
] = BB
->size();
808 // In a non-return block, examine the live-in regs of all successors.
809 for (MachineBasicBlock::succ_iterator SI
= BB
->succ_begin(),
810 SE
= BB
->succ_end(); SI
!= SE
; ++SI
) {
811 for (MachineBasicBlock::livein_iterator I
= (*SI
)->livein_begin(),
812 E
= (*SI
)->livein_end(); I
!= E
; ++I
) {
814 KillIndices
[Reg
] = BB
->size();
815 // Repeat, for all subregs.
816 for (const unsigned *Subreg
= TRI
->getSubRegisters(Reg
);
818 KillIndices
[*Subreg
] = BB
->size();
825 /// FixupKills - Fix the register kill flags, they may have been made
826 /// incorrect by instruction reordering.
828 void SchedulePostRATDList::FixupKills(MachineBasicBlock
*MBB
) {
829 DEBUG(errs() << "Fixup kills for BB ID#" << MBB
->getNumber() << '\n');
831 std::set
<unsigned> killedRegs
;
832 BitVector ReservedRegs
= TRI
->getReservedRegs(MF
);
834 StartBlockForKills(MBB
);
836 // Examine block from end to start...
837 unsigned Count
= MBB
->size();
838 for (MachineBasicBlock::iterator I
= MBB
->end(), E
= MBB
->begin();
840 MachineInstr
*MI
= --I
;
842 // Update liveness. Registers that are defed but not used in this
843 // instruction are now dead. Mark register and all subregs as they
844 // are completely defined.
845 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
846 MachineOperand
&MO
= MI
->getOperand(i
);
847 if (!MO
.isReg()) continue;
848 unsigned Reg
= MO
.getReg();
849 if (Reg
== 0) continue;
850 if (!MO
.isDef()) continue;
851 // Ignore two-addr defs.
852 if (MI
->isRegTiedToUseOperand(i
)) continue;
854 KillIndices
[Reg
] = ~0u;
856 // Repeat for all subregs.
857 for (const unsigned *Subreg
= TRI
->getSubRegisters(Reg
);
859 KillIndices
[*Subreg
] = ~0u;
863 // Examine all used registers and set kill flag. When a register
864 // is used multiple times we only set the kill flag on the first
867 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
868 MachineOperand
&MO
= MI
->getOperand(i
);
869 if (!MO
.isReg() || !MO
.isUse()) continue;
870 unsigned Reg
= MO
.getReg();
871 if ((Reg
== 0) || ReservedRegs
.test(Reg
)) continue;
874 if (killedRegs
.find(Reg
) == killedRegs
.end()) {
876 // A register is not killed if any subregs are live...
877 for (const unsigned *Subreg
= TRI
->getSubRegisters(Reg
);
879 if (KillIndices
[*Subreg
] != ~0u) {
885 // If subreg is not live, then register is killed if it became
886 // live in this instruction
888 kill
= (KillIndices
[Reg
] == ~0u);
891 if (MO
.isKill() != kill
) {
893 DEBUG(errs() << "Fixed " << MO
<< " in ");
897 killedRegs
.insert(Reg
);
900 // Mark any used register (that is not using undef) and subregs as
902 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
903 MachineOperand
&MO
= MI
->getOperand(i
);
904 if (!MO
.isReg() || !MO
.isUse() || MO
.isUndef()) continue;
905 unsigned Reg
= MO
.getReg();
906 if ((Reg
== 0) || ReservedRegs
.test(Reg
)) continue;
908 KillIndices
[Reg
] = Count
;
910 for (const unsigned *Subreg
= TRI
->getSubRegisters(Reg
);
912 KillIndices
[*Subreg
] = Count
;
918 //===----------------------------------------------------------------------===//
919 // Top-Down Scheduling
920 //===----------------------------------------------------------------------===//
922 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
923 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
924 void SchedulePostRATDList::ReleaseSucc(SUnit
*SU
, SDep
*SuccEdge
) {
925 SUnit
*SuccSU
= SuccEdge
->getSUnit();
926 --SuccSU
->NumPredsLeft
;
929 if (SuccSU
->NumPredsLeft
< 0) {
930 errs() << "*** Scheduling failed! ***\n";
932 errs() << " has been released too many times!\n";
937 // Compute how many cycles it will be before this actually becomes
938 // available. This is the max of the start time of all predecessors plus
940 SuccSU
->setDepthToAtLeast(SU
->getDepth() + SuccEdge
->getLatency());
942 // If all the node's predecessors are scheduled, this node is ready
943 // to be scheduled. Ignore the special ExitSU node.
944 if (SuccSU
->NumPredsLeft
== 0 && SuccSU
!= &ExitSU
)
945 PendingQueue
.push_back(SuccSU
);
948 /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
949 void SchedulePostRATDList::ReleaseSuccessors(SUnit
*SU
) {
950 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
952 ReleaseSucc(SU
, &*I
);
955 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
956 /// count of its successors. If a successor pending count is zero, add it to
957 /// the Available queue.
958 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit
*SU
, unsigned CurCycle
) {
959 DEBUG(errs() << "*** Scheduling [" << CurCycle
<< "]: ");
960 DEBUG(SU
->dump(this));
962 Sequence
.push_back(SU
);
963 assert(CurCycle
>= SU
->getDepth() && "Node scheduled above its depth!");
964 SU
->setDepthToAtLeast(CurCycle
);
966 ReleaseSuccessors(SU
);
967 SU
->isScheduled
= true;
968 AvailableQueue
.ScheduledNode(SU
);
971 /// ListScheduleTopDown - The main loop of list scheduling for top-down
973 void SchedulePostRATDList::ListScheduleTopDown() {
974 unsigned CurCycle
= 0;
976 // Release any successors of the special Entry node.
977 ReleaseSuccessors(&EntrySU
);
979 // All leaves to Available queue.
980 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
981 // It is available if it has no predecessors.
982 if (SUnits
[i
].Preds
.empty()) {
983 AvailableQueue
.push(&SUnits
[i
]);
984 SUnits
[i
].isAvailable
= true;
988 // In any cycle where we can't schedule any instructions, we must
989 // stall or emit a noop, depending on the target.
990 bool CycleHasInsts
= false;
992 // While Available queue is not empty, grab the node with the highest
993 // priority. If it is not ready put it back. Schedule the node.
994 std::vector
<SUnit
*> NotReady
;
995 Sequence
.reserve(SUnits
.size());
996 while (!AvailableQueue
.empty() || !PendingQueue
.empty()) {
997 // Check to see if any of the pending instructions are ready to issue. If
998 // so, add them to the available queue.
999 unsigned MinDepth
= ~0u;
1000 for (unsigned i
= 0, e
= PendingQueue
.size(); i
!= e
; ++i
) {
1001 if (PendingQueue
[i
]->getDepth() <= CurCycle
) {
1002 AvailableQueue
.push(PendingQueue
[i
]);
1003 PendingQueue
[i
]->isAvailable
= true;
1004 PendingQueue
[i
] = PendingQueue
.back();
1005 PendingQueue
.pop_back();
1007 } else if (PendingQueue
[i
]->getDepth() < MinDepth
)
1008 MinDepth
= PendingQueue
[i
]->getDepth();
1011 DEBUG(errs() << "\n*** Examining Available\n";
1012 LatencyPriorityQueue q
= AvailableQueue
;
1013 while (!q
.empty()) {
1014 SUnit
*su
= q
.pop();
1015 errs() << "Height " << su
->getHeight() << ": ";
1019 SUnit
*FoundSUnit
= 0;
1021 bool HasNoopHazards
= false;
1022 while (!AvailableQueue
.empty()) {
1023 SUnit
*CurSUnit
= AvailableQueue
.pop();
1025 ScheduleHazardRecognizer::HazardType HT
=
1026 HazardRec
->getHazardType(CurSUnit
);
1027 if (HT
== ScheduleHazardRecognizer::NoHazard
) {
1028 FoundSUnit
= CurSUnit
;
1032 // Remember if this is a noop hazard.
1033 HasNoopHazards
|= HT
== ScheduleHazardRecognizer::NoopHazard
;
1035 NotReady
.push_back(CurSUnit
);
1038 // Add the nodes that aren't ready back onto the available list.
1039 if (!NotReady
.empty()) {
1040 AvailableQueue
.push_all(NotReady
);
1044 // If we found a node to schedule, do it now.
1046 ScheduleNodeTopDown(FoundSUnit
, CurCycle
);
1047 HazardRec
->EmitInstruction(FoundSUnit
);
1048 CycleHasInsts
= true;
1050 // If we are using the target-specific hazards, then don't
1051 // advance the cycle time just because we schedule a node. If
1052 // the target allows it we can schedule multiple nodes in the
1054 if (!EnablePostRAHazardAvoidance
) {
1055 if (FoundSUnit
->Latency
) // Don't increment CurCycle for pseudo-ops!
1059 if (CycleHasInsts
) {
1060 DEBUG(errs() << "*** Finished cycle " << CurCycle
<< '\n');
1061 HazardRec
->AdvanceCycle();
1062 } else if (!HasNoopHazards
) {
1063 // Otherwise, we have a pipeline stall, but no other problem,
1064 // just advance the current cycle and try again.
1065 DEBUG(errs() << "*** Stall in cycle " << CurCycle
<< '\n');
1066 HazardRec
->AdvanceCycle();
1069 // Otherwise, we have no instructions to issue and we have instructions
1070 // that will fault if we don't do this right. This is the case for
1071 // processors without pipeline interlocks and other cases.
1072 DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle
<< '\n');
1073 HazardRec
->EmitNoop();
1074 Sequence
.push_back(0); // NULL here means noop
1079 CycleHasInsts
= false;
1084 VerifySchedule(/*isBottomUp=*/false);
1088 //===----------------------------------------------------------------------===//
1089 // Public Constructor Functions
1090 //===----------------------------------------------------------------------===//
1092 FunctionPass
*llvm::createPostRAScheduler() {
1093 return new PostRAScheduler();