1 //===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Evan Cheng and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a top-down list scheduler, using standard algorithms.
11 // The basic approach uses a priority queue of available nodes to schedule.
12 // One at a time, nodes are taken from the priority queue (thus in priority
13 // order), checked for legality to schedule, and emitted if legal.
15 // Nodes may not be legal to schedule either due to structural hazards (e.g.
16 // pipeline or resource constraints) or because an input to the instruction has
17 // not completed execution.
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "sched"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 #include "llvm/CodeGen/SchedulerRegistry.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/SSARegMap.h"
26 #include "llvm/Target/MRegisterInfo.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetMachine.h"
29 #include "llvm/Target/TargetInstrInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/Compiler.h"
32 #include "llvm/ADT/Statistic.h"
37 STATISTIC(NumNoops
, "Number of noops inserted");
38 STATISTIC(NumStalls
, "Number of pipeline stalls");
40 static RegisterScheduler
41 tdListDAGScheduler("list-td", " Top-down list scheduler",
42 createTDListDAGScheduler
);
45 //===----------------------------------------------------------------------===//
46 /// ScheduleDAGList - The actual list scheduler implementation. This supports
47 /// top-down scheduling.
49 class VISIBILITY_HIDDEN ScheduleDAGList
: public ScheduleDAG
{
51 /// AvailableQueue - The priority queue to use for the available SUnits.
53 SchedulingPriorityQueue
*AvailableQueue
;
55 /// PendingQueue - This contains all of the instructions whose operands have
56 /// been issued, but their results are not ready yet (due to the latency of
57 /// the operation). Once the operands becomes available, the instruction is
58 /// added to the AvailableQueue. This keeps track of each SUnit and the
59 /// number of cycles left to execute before the operation is available.
60 std::vector
<std::pair
<unsigned, SUnit
*> > PendingQueue
;
62 /// HazardRec - The hazard recognizer to use.
63 HazardRecognizer
*HazardRec
;
66 ScheduleDAGList(SelectionDAG
&dag
, MachineBasicBlock
*bb
,
67 const TargetMachine
&tm
,
68 SchedulingPriorityQueue
*availqueue
,
70 : ScheduleDAG(dag
, bb
, tm
),
71 AvailableQueue(availqueue
), HazardRec(HR
) {
76 delete AvailableQueue
;
82 void ReleaseSucc(SUnit
*SuccSU
, bool isChain
);
83 void ScheduleNodeTopDown(SUnit
*SU
, unsigned CurCycle
);
84 void ListScheduleTopDown();
86 } // end anonymous namespace
88 HazardRecognizer::~HazardRecognizer() {}
91 /// Schedule - Schedule the DAG using list scheduling.
92 void ScheduleDAGList::Schedule() {
93 DOUT
<< "********** List Scheduling **********\n";
95 // Build scheduling units.
98 AvailableQueue
->initNodes(SUnitMap
, SUnits
);
100 ListScheduleTopDown();
102 AvailableQueue
->releaseState();
104 DOUT
<< "*** Final schedule ***\n";
105 DEBUG(dumpSchedule());
108 // Emit in scheduled order
112 //===----------------------------------------------------------------------===//
113 // Top-Down Scheduling
114 //===----------------------------------------------------------------------===//
116 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
117 /// the PendingQueue if the count reaches zero.
118 void ScheduleDAGList::ReleaseSucc(SUnit
*SuccSU
, bool isChain
) {
120 SuccSU
->NumPredsLeft
--;
122 SuccSU
->NumChainPredsLeft
--;
124 assert(SuccSU
->NumPredsLeft
>= 0 && SuccSU
->NumChainPredsLeft
>= 0 &&
125 "List scheduling internal error");
127 if ((SuccSU
->NumPredsLeft
+ SuccSU
->NumChainPredsLeft
) == 0) {
128 // Compute how many cycles it will be before this actually becomes
129 // available. This is the max of the start time of all predecessors plus
131 unsigned AvailableCycle
= 0;
132 for (SUnit::pred_iterator I
= SuccSU
->Preds
.begin(),
133 E
= SuccSU
->Preds
.end(); I
!= E
; ++I
) {
134 // If this is a token edge, we don't need to wait for the latency of the
135 // preceeding instruction (e.g. a long-latency load) unless there is also
136 // some other data dependence.
137 SUnit
&Pred
= *I
->first
;
138 unsigned PredDoneCycle
= Pred
.Cycle
;
140 PredDoneCycle
+= Pred
.Latency
;
141 else if (Pred
.Latency
)
144 AvailableCycle
= std::max(AvailableCycle
, PredDoneCycle
);
147 PendingQueue
.push_back(std::make_pair(AvailableCycle
, SuccSU
));
151 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
152 /// count of its successors. If a successor pending count is zero, add it to
153 /// the Available queue.
154 void ScheduleDAGList::ScheduleNodeTopDown(SUnit
*SU
, unsigned CurCycle
) {
155 DOUT
<< "*** Scheduling [" << CurCycle
<< "]: ";
156 DEBUG(SU
->dump(&DAG
));
158 Sequence
.push_back(SU
);
159 SU
->Cycle
= CurCycle
;
161 // Bottom up: release successors.
162 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
164 ReleaseSucc(I
->first
, I
->second
);
167 /// ListScheduleTopDown - The main loop of list scheduling for top-down
169 void ScheduleDAGList::ListScheduleTopDown() {
170 unsigned CurCycle
= 0;
171 SUnit
*Entry
= SUnitMap
[DAG
.getEntryNode().Val
];
173 // All leaves to Available queue.
174 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
175 // It is available if it has no predecessors.
176 if (SUnits
[i
].Preds
.size() == 0 && &SUnits
[i
] != Entry
) {
177 AvailableQueue
->push(&SUnits
[i
]);
178 SUnits
[i
].isAvailable
= SUnits
[i
].isPending
= true;
182 // Emit the entry node first.
183 ScheduleNodeTopDown(Entry
, CurCycle
);
184 HazardRec
->EmitInstruction(Entry
->Node
);
186 // While Available queue is not empty, grab the node with the highest
187 // priority. If it is not ready put it back. Schedule the node.
188 std::vector
<SUnit
*> NotReady
;
189 while (!AvailableQueue
->empty() || !PendingQueue
.empty()) {
190 // Check to see if any of the pending instructions are ready to issue. If
191 // so, add them to the available queue.
192 for (unsigned i
= 0, e
= PendingQueue
.size(); i
!= e
; ++i
) {
193 if (PendingQueue
[i
].first
== CurCycle
) {
194 AvailableQueue
->push(PendingQueue
[i
].second
);
195 PendingQueue
[i
].second
->isAvailable
= true;
196 PendingQueue
[i
] = PendingQueue
.back();
197 PendingQueue
.pop_back();
200 assert(PendingQueue
[i
].first
> CurCycle
&& "Negative latency?");
204 // If there are no instructions available, don't try to issue anything, and
205 // don't advance the hazard recognizer.
206 if (AvailableQueue
->empty()) {
211 SUnit
*FoundSUnit
= 0;
212 SDNode
*FoundNode
= 0;
214 bool HasNoopHazards
= false;
215 while (!AvailableQueue
->empty()) {
216 SUnit
*CurSUnit
= AvailableQueue
->pop();
218 // Get the node represented by this SUnit.
219 FoundNode
= CurSUnit
->Node
;
221 // If this is a pseudo op, like copyfromreg, look to see if there is a
222 // real target node flagged to it. If so, use the target node.
223 for (unsigned i
= 0, e
= CurSUnit
->FlaggedNodes
.size();
224 FoundNode
->getOpcode() < ISD::BUILTIN_OP_END
&& i
!= e
; ++i
)
225 FoundNode
= CurSUnit
->FlaggedNodes
[i
];
227 HazardRecognizer::HazardType HT
= HazardRec
->getHazardType(FoundNode
);
228 if (HT
== HazardRecognizer::NoHazard
) {
229 FoundSUnit
= CurSUnit
;
233 // Remember if this is a noop hazard.
234 HasNoopHazards
|= HT
== HazardRecognizer::NoopHazard
;
236 NotReady
.push_back(CurSUnit
);
239 // Add the nodes that aren't ready back onto the available list.
240 if (!NotReady
.empty()) {
241 AvailableQueue
->push_all(NotReady
);
245 // If we found a node to schedule, do it now.
247 ScheduleNodeTopDown(FoundSUnit
, CurCycle
);
248 HazardRec
->EmitInstruction(FoundNode
);
249 FoundSUnit
->isScheduled
= true;
250 AvailableQueue
->ScheduledNode(FoundSUnit
);
252 // If this is a pseudo-op node, we don't want to increment the current
254 if (FoundSUnit
->Latency
) // Don't increment CurCycle for pseudo-ops!
256 } else if (!HasNoopHazards
) {
257 // Otherwise, we have a pipeline stall, but no other problem, just advance
258 // the current cycle and try again.
259 DOUT
<< "*** Advancing cycle, no work to do\n";
260 HazardRec
->AdvanceCycle();
264 // Otherwise, we have no instructions to issue and we have instructions
265 // that will fault if we don't do this right. This is the case for
266 // processors without pipeline interlocks and other cases.
267 DOUT
<< "*** Emitting noop\n";
268 HazardRec
->EmitNoop();
269 Sequence
.push_back(0); // NULL SUnit* -> noop
276 // Verify that all SUnits were scheduled.
277 bool AnyNotSched
= false;
278 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
279 if (SUnits
[i
].NumPredsLeft
!= 0 || SUnits
[i
].NumChainPredsLeft
!= 0) {
281 cerr
<< "*** List scheduling failed! ***\n";
282 SUnits
[i
].dump(&DAG
);
283 cerr
<< "has not been scheduled!\n";
287 assert(!AnyNotSched
);
291 //===----------------------------------------------------------------------===//
292 // LatencyPriorityQueue Implementation
293 //===----------------------------------------------------------------------===//
295 // This is a SchedulingPriorityQueue that schedules using latency information to
296 // reduce the length of the critical path through the basic block.
299 class LatencyPriorityQueue
;
301 /// Sorting functions for the Available queue.
302 struct latency_sort
: public std::binary_function
<SUnit
*, SUnit
*, bool> {
303 LatencyPriorityQueue
*PQ
;
304 latency_sort(LatencyPriorityQueue
*pq
) : PQ(pq
) {}
305 latency_sort(const latency_sort
&RHS
) : PQ(RHS
.PQ
) {}
307 bool operator()(const SUnit
* left
, const SUnit
* right
) const;
309 } // end anonymous namespace
312 class LatencyPriorityQueue
: public SchedulingPriorityQueue
{
313 // SUnits - The SUnits for the current graph.
314 std::vector
<SUnit
> *SUnits
;
316 // Latencies - The latency (max of latency from this node to the bb exit)
318 std::vector
<int> Latencies
;
320 /// NumNodesSolelyBlocking - This vector contains, for every node in the
321 /// Queue, the number of nodes that the node is the sole unscheduled
322 /// predecessor for. This is used as a tie-breaker heuristic for better
324 std::vector
<unsigned> NumNodesSolelyBlocking
;
326 std::priority_queue
<SUnit
*, std::vector
<SUnit
*>, latency_sort
> Queue
;
328 LatencyPriorityQueue() : Queue(latency_sort(this)) {
331 void initNodes(DenseMap
<SDNode
*, SUnit
*> &sumap
,
332 std::vector
<SUnit
> &sunits
) {
334 // Calculate node priorities.
335 CalculatePriorities();
337 void releaseState() {
342 unsigned getLatency(unsigned NodeNum
) const {
343 assert(NodeNum
< Latencies
.size());
344 return Latencies
[NodeNum
];
347 unsigned getNumSolelyBlockNodes(unsigned NodeNum
) const {
348 assert(NodeNum
< NumNodesSolelyBlocking
.size());
349 return NumNodesSolelyBlocking
[NodeNum
];
352 bool empty() const { return Queue
.empty(); }
354 virtual void push(SUnit
*U
) {
357 void push_impl(SUnit
*U
);
359 void push_all(const std::vector
<SUnit
*> &Nodes
) {
360 for (unsigned i
= 0, e
= Nodes
.size(); i
!= e
; ++i
)
365 if (empty()) return NULL
;
366 SUnit
*V
= Queue
.top();
371 // ScheduledNode - As nodes are scheduled, we look to see if there are any
372 // successor nodes that have a single unscheduled predecessor. If so, that
373 // single predecessor has a higher priority, since scheduling it will make
374 // the node available.
375 void ScheduledNode(SUnit
*Node
);
378 void CalculatePriorities();
379 int CalcLatency(const SUnit
&SU
);
380 void AdjustPriorityOfUnscheduledPreds(SUnit
*SU
);
381 SUnit
*getSingleUnscheduledPred(SUnit
*SU
);
383 /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
384 /// node from a priority queue. We should roll our own heap to make this
385 /// better or something.
386 void RemoveFromPriorityQueue(SUnit
*SU
) {
387 std::vector
<SUnit
*> Temp
;
389 assert(!Queue
.empty() && "Not in queue!");
390 while (Queue
.top() != SU
) {
391 Temp
.push_back(Queue
.top());
393 assert(!Queue
.empty() && "Not in queue!");
396 // Remove the node from the PQ.
399 // Add all the other nodes back.
400 for (unsigned i
= 0, e
= Temp
.size(); i
!= e
; ++i
)
406 bool latency_sort::operator()(const SUnit
*LHS
, const SUnit
*RHS
) const {
407 unsigned LHSNum
= LHS
->NodeNum
;
408 unsigned RHSNum
= RHS
->NodeNum
;
410 // The most important heuristic is scheduling the critical path.
411 unsigned LHSLatency
= PQ
->getLatency(LHSNum
);
412 unsigned RHSLatency
= PQ
->getLatency(RHSNum
);
413 if (LHSLatency
< RHSLatency
) return true;
414 if (LHSLatency
> RHSLatency
) return false;
416 // After that, if two nodes have identical latencies, look to see if one will
417 // unblock more other nodes than the other.
418 unsigned LHSBlocked
= PQ
->getNumSolelyBlockNodes(LHSNum
);
419 unsigned RHSBlocked
= PQ
->getNumSolelyBlockNodes(RHSNum
);
420 if (LHSBlocked
< RHSBlocked
) return true;
421 if (LHSBlocked
> RHSBlocked
) return false;
423 // Finally, just to provide a stable ordering, use the node number as a
425 return LHSNum
< RHSNum
;
429 /// CalcNodePriority - Calculate the maximal path from the node to the exit.
431 int LatencyPriorityQueue::CalcLatency(const SUnit
&SU
) {
432 int &Latency
= Latencies
[SU
.NodeNum
];
436 int MaxSuccLatency
= 0;
437 for (SUnit::const_succ_iterator I
= SU
.Succs
.begin(), E
= SU
.Succs
.end();
439 MaxSuccLatency
= std::max(MaxSuccLatency
, CalcLatency(*I
->first
));
441 return Latency
= MaxSuccLatency
+ SU
.Latency
;
444 /// CalculatePriorities - Calculate priorities of all scheduling units.
445 void LatencyPriorityQueue::CalculatePriorities() {
446 Latencies
.assign(SUnits
->size(), -1);
447 NumNodesSolelyBlocking
.assign(SUnits
->size(), 0);
449 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
)
450 CalcLatency((*SUnits
)[i
]);
453 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
454 /// of SU, return it, otherwise return null.
455 SUnit
*LatencyPriorityQueue::getSingleUnscheduledPred(SUnit
*SU
) {
456 SUnit
*OnlyAvailablePred
= 0;
457 for (SUnit::const_pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
459 SUnit
&Pred
= *I
->first
;
460 if (!Pred
.isScheduled
) {
461 // We found an available, but not scheduled, predecessor. If it's the
462 // only one we have found, keep track of it... otherwise give up.
463 if (OnlyAvailablePred
&& OnlyAvailablePred
!= &Pred
)
465 OnlyAvailablePred
= &Pred
;
469 return OnlyAvailablePred
;
472 void LatencyPriorityQueue::push_impl(SUnit
*SU
) {
473 // Look at all of the successors of this node. Count the number of nodes that
474 // this node is the sole unscheduled node for.
475 unsigned NumNodesBlocking
= 0;
476 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
478 if (getSingleUnscheduledPred(I
->first
) == SU
)
480 NumNodesSolelyBlocking
[SU
->NodeNum
] = NumNodesBlocking
;
486 // ScheduledNode - As nodes are scheduled, we look to see if there are any
487 // successor nodes that have a single unscheduled predecessor. If so, that
488 // single predecessor has a higher priority, since scheduling it will make
489 // the node available.
490 void LatencyPriorityQueue::ScheduledNode(SUnit
*SU
) {
491 for (SUnit::const_succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
493 AdjustPriorityOfUnscheduledPreds(I
->first
);
496 /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
497 /// scheduled. If SU is not itself available, then there is at least one
498 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
499 /// unscheduled predecessor, we want to increase its priority: it getting
500 /// scheduled will make this node available, so it is better than some other
501 /// node of the same priority that will not make a node available.
502 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit
*SU
) {
503 if (SU
->isPending
) return; // All preds scheduled.
505 SUnit
*OnlyAvailablePred
= getSingleUnscheduledPred(SU
);
506 if (OnlyAvailablePred
== 0 || !OnlyAvailablePred
->isAvailable
) return;
508 // Okay, we found a single predecessor that is available, but not scheduled.
509 // Since it is available, it must be in the priority queue. First remove it.
510 RemoveFromPriorityQueue(OnlyAvailablePred
);
512 // Reinsert the node into the priority queue, which recomputes its
513 // NumNodesSolelyBlocking value.
514 push(OnlyAvailablePred
);
518 //===----------------------------------------------------------------------===//
519 // Public Constructor Functions
520 //===----------------------------------------------------------------------===//
522 /// createTDListDAGScheduler - This creates a top-down list scheduler with a
523 /// new hazard recognizer. This scheduler takes ownership of the hazard
524 /// recognizer and deletes it when done.
525 ScheduleDAG
* llvm::createTDListDAGScheduler(SelectionDAGISel
*IS
,
527 MachineBasicBlock
*BB
) {
528 return new ScheduleDAGList(*DAG
, BB
, DAG
->getTarget(),
529 new LatencyPriorityQueue(),
530 IS
->CreateTargetHazardRecognizer());