1 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a fast scheduler.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "ScheduleDAGSDNodes.h"
16 #include "llvm/CodeGen/SchedulerRegistry.h"
17 #include "llvm/CodeGen/SelectionDAGISel.h"
18 #include "llvm/Target/TargetRegisterInfo.h"
19 #include "llvm/Target/TargetData.h"
20 #include "llvm/Target/TargetInstrInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/CommandLine.h"
29 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
30 STATISTIC(NumDups
, "Number of duplicated nodes");
31 STATISTIC(NumPRCopies
, "Number of physical copies");
33 static RegisterScheduler
34 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
35 createFastDAGScheduler
);
38 /// FastPriorityQueue - A degenerate priority queue that considers
39 /// all nodes to have the same priority.
41 struct VISIBILITY_HIDDEN FastPriorityQueue
{
42 SmallVector
<SUnit
*, 16> Queue
;
44 bool empty() const { return Queue
.empty(); }
51 if (empty()) return NULL
;
52 SUnit
*V
= Queue
.back();
58 //===----------------------------------------------------------------------===//
59 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
61 class VISIBILITY_HIDDEN ScheduleDAGFast
: public ScheduleDAGSDNodes
{
63 /// AvailableQueue - The priority queue to use for the available SUnits.
64 FastPriorityQueue AvailableQueue
;
66 /// LiveRegDefs - A set of physical registers and their definition
67 /// that are "live". These nodes must be scheduled before any other nodes that
68 /// modifies the registers can be scheduled.
70 std::vector
<SUnit
*> LiveRegDefs
;
71 std::vector
<unsigned> LiveRegCycles
;
74 ScheduleDAGFast(MachineFunction
&mf
)
75 : ScheduleDAGSDNodes(mf
) {}
79 /// AddPred - adds a predecessor edge to SUnit SU.
80 /// This returns true if this is a new predecessor.
81 void AddPred(SUnit
*SU
, const SDep
&D
) {
85 /// RemovePred - removes a predecessor edge from SUnit SU.
86 /// This returns true if an edge was removed.
87 void RemovePred(SUnit
*SU
, const SDep
&D
) {
92 void ReleasePred(SUnit
*SU
, SDep
*PredEdge
);
93 void ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
);
94 void ScheduleNodeBottomUp(SUnit
*, unsigned);
95 SUnit
*CopyAndMoveSuccessors(SUnit
*);
96 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
97 const TargetRegisterClass
*,
98 const TargetRegisterClass
*,
99 SmallVector
<SUnit
*, 2>&);
100 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
101 void ListScheduleBottomUp();
103 /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
104 bool ForceUnitLatencies() const { return true; }
106 } // end anonymous namespace
109 /// Schedule - Schedule the DAG using list scheduling.
110 void ScheduleDAGFast::Schedule() {
111 DOUT
<< "********** List Scheduling **********\n";
114 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
115 LiveRegCycles
.resize(TRI
->getNumRegs(), 0);
117 // Build the scheduling graph.
120 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
121 SUnits
[su
].dumpAll(this));
123 // Execute the actual scheduling loop.
124 ListScheduleBottomUp();
127 //===----------------------------------------------------------------------===//
128 // Bottom-Up Scheduling
129 //===----------------------------------------------------------------------===//
131 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
132 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
133 void ScheduleDAGFast::ReleasePred(SUnit
*SU
, SDep
*PredEdge
) {
134 SUnit
*PredSU
= PredEdge
->getSUnit();
135 --PredSU
->NumSuccsLeft
;
138 if (PredSU
->NumSuccsLeft
< 0) {
139 cerr
<< "*** Scheduling failed! ***\n";
141 cerr
<< " has been released too many times!\n";
146 // If all the node's successors are scheduled, this node is ready
147 // to be scheduled. Ignore the special EntrySU node.
148 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
149 PredSU
->isAvailable
= true;
150 AvailableQueue
.push(PredSU
);
154 void ScheduleDAGFast::ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
) {
155 // Bottom up: release predecessors
156 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
158 ReleasePred(SU
, &*I
);
159 if (I
->isAssignedRegDep()) {
160 // This is a physical register dependency and it's impossible or
161 // expensive to copy the register. Make sure nothing that can
162 // clobber the register is scheduled between the predecessor and
164 if (!LiveRegDefs
[I
->getReg()]) {
166 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
167 LiveRegCycles
[I
->getReg()] = CurCycle
;
173 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
174 /// count of its predecessors. If a predecessor pending count is zero, add it to
175 /// the Available queue.
176 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit
*SU
, unsigned CurCycle
) {
177 DOUT
<< "*** Scheduling [" << CurCycle
<< "]: ";
178 DEBUG(SU
->dump(this));
180 assert(CurCycle
>= SU
->getHeight() && "Node scheduled below its height!");
181 SU
->setHeightToAtLeast(CurCycle
);
182 Sequence
.push_back(SU
);
184 ReleasePredecessors(SU
, CurCycle
);
186 // Release all the implicit physical register defs that are live.
187 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
189 if (I
->isAssignedRegDep()) {
190 if (LiveRegCycles
[I
->getReg()] == I
->getSUnit()->getHeight()) {
191 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
192 assert(LiveRegDefs
[I
->getReg()] == SU
&&
193 "Physical register dependency violated?");
195 LiveRegDefs
[I
->getReg()] = NULL
;
196 LiveRegCycles
[I
->getReg()] = 0;
201 SU
->isScheduled
= true;
204 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
205 /// successors to the newly created node.
206 SUnit
*ScheduleDAGFast::CopyAndMoveSuccessors(SUnit
*SU
) {
207 if (SU
->getNode()->getFlaggedNode())
210 SDNode
*N
= SU
->getNode();
215 bool TryUnfold
= false;
216 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
217 MVT VT
= N
->getValueType(i
);
220 else if (VT
== MVT::Other
)
223 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
224 const SDValue
&Op
= N
->getOperand(i
);
225 MVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
231 SmallVector
<SDNode
*, 2> NewNodes
;
232 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
235 DOUT
<< "Unfolding SU # " << SU
->NodeNum
<< "\n";
236 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
239 SDNode
*LoadNode
= NewNodes
[0];
240 unsigned NumVals
= N
->getNumValues();
241 unsigned OldNumVals
= SU
->getNode()->getNumValues();
242 for (unsigned i
= 0; i
!= NumVals
; ++i
)
243 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
244 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
245 SDValue(LoadNode
, 1));
247 SUnit
*NewSU
= NewSUnit(N
);
248 assert(N
->getNodeId() == -1 && "Node already inserted!");
249 N
->setNodeId(NewSU
->NodeNum
);
251 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
252 for (unsigned i
= 0; i
!= TID
.getNumOperands(); ++i
) {
253 if (TID
.getOperandConstraint(i
, TOI::TIED_TO
) != -1) {
254 NewSU
->isTwoAddress
= true;
258 if (TID
.isCommutable())
259 NewSU
->isCommutable
= true;
261 // LoadNode may already exist. This can happen when there is another
262 // load from the same location and producing the same type of value
263 // but it has different alignment or volatileness.
264 bool isNewLoad
= true;
266 if (LoadNode
->getNodeId() != -1) {
267 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
270 LoadSU
= NewSUnit(LoadNode
);
271 LoadNode
->setNodeId(LoadSU
->NodeNum
);
275 SmallVector
<SDep
, 4> ChainSuccs
;
276 SmallVector
<SDep
, 4> LoadPreds
;
277 SmallVector
<SDep
, 4> NodePreds
;
278 SmallVector
<SDep
, 4> NodeSuccs
;
279 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
283 else if (I
->getSUnit()->getNode() &&
284 I
->getSUnit()->getNode()->isOperandOf(LoadNode
))
285 LoadPreds
.push_back(*I
);
287 NodePreds
.push_back(*I
);
289 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
292 ChainSuccs
.push_back(*I
);
294 NodeSuccs
.push_back(*I
);
297 if (ChainPred
.getSUnit()) {
298 RemovePred(SU
, ChainPred
);
300 AddPred(LoadSU
, ChainPred
);
302 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
303 const SDep
&Pred
= LoadPreds
[i
];
304 RemovePred(SU
, Pred
);
306 AddPred(LoadSU
, Pred
);
309 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
310 const SDep
&Pred
= NodePreds
[i
];
311 RemovePred(SU
, Pred
);
312 AddPred(NewSU
, Pred
);
314 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
315 SDep D
= NodeSuccs
[i
];
316 SUnit
*SuccDep
= D
.getSUnit();
318 RemovePred(SuccDep
, D
);
322 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
323 SDep D
= ChainSuccs
[i
];
324 SUnit
*SuccDep
= D
.getSUnit();
326 RemovePred(SuccDep
, D
);
333 AddPred(NewSU
, SDep(LoadSU
, SDep::Order
, LoadSU
->Latency
));
338 if (NewSU
->NumSuccsLeft
== 0) {
339 NewSU
->isAvailable
= true;
345 DOUT
<< "Duplicating SU # " << SU
->NodeNum
<< "\n";
348 // New SUnit has the exact same predecessors.
349 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
351 if (!I
->isArtificial())
354 // Only copy scheduled successors. Cut them from old node's successor
355 // list and move them over.
356 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
357 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
359 if (I
->isArtificial())
361 SUnit
*SuccSU
= I
->getSUnit();
362 if (SuccSU
->isScheduled
) {
367 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
370 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
371 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
377 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
378 /// scheduled successors of the given SUnit to the last copy.
379 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
380 const TargetRegisterClass
*DestRC
,
381 const TargetRegisterClass
*SrcRC
,
382 SmallVector
<SUnit
*, 2> &Copies
) {
383 SUnit
*CopyFromSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
384 CopyFromSU
->CopySrcRC
= SrcRC
;
385 CopyFromSU
->CopyDstRC
= DestRC
;
387 SUnit
*CopyToSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
388 CopyToSU
->CopySrcRC
= DestRC
;
389 CopyToSU
->CopyDstRC
= SrcRC
;
391 // Only copy scheduled successors. Cut them from old node's successor
392 // list and move them over.
393 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
394 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
396 if (I
->isArtificial())
398 SUnit
*SuccSU
= I
->getSUnit();
399 if (SuccSU
->isScheduled
) {
401 D
.setSUnit(CopyToSU
);
403 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
406 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
) {
407 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
410 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
411 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
413 Copies
.push_back(CopyFromSU
);
414 Copies
.push_back(CopyToSU
);
419 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
420 /// definition of the specified node.
421 /// FIXME: Move to SelectionDAG?
422 static MVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
423 const TargetInstrInfo
*TII
) {
424 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
425 assert(TID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
426 unsigned NumRes
= TID
.getNumDefs();
427 for (const unsigned *ImpDef
= TID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
432 return N
->getValueType(NumRes
);
435 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
436 /// scheduling of the given node to satisfy live physical register dependencies.
437 /// If the specific node is the last one that's available to schedule, do
438 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
439 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit
*SU
,
440 SmallVector
<unsigned, 4> &LRegs
){
441 if (NumLiveRegs
== 0)
444 SmallSet
<unsigned, 4> RegAdded
;
445 // If this node would clobber any "live" register, then it's not ready.
446 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
448 if (I
->isAssignedRegDep()) {
449 unsigned Reg
= I
->getReg();
450 if (LiveRegDefs
[Reg
] && LiveRegDefs
[Reg
] != I
->getSUnit()) {
451 if (RegAdded
.insert(Reg
))
452 LRegs
.push_back(Reg
);
454 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
);
456 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != I
->getSUnit()) {
457 if (RegAdded
.insert(*Alias
))
458 LRegs
.push_back(*Alias
);
463 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getFlaggedNode()) {
464 if (!Node
->isMachineOpcode())
466 const TargetInstrDesc
&TID
= TII
->get(Node
->getMachineOpcode());
467 if (!TID
.ImplicitDefs
)
469 for (const unsigned *Reg
= TID
.ImplicitDefs
; *Reg
; ++Reg
) {
470 if (LiveRegDefs
[*Reg
] && LiveRegDefs
[*Reg
] != SU
) {
471 if (RegAdded
.insert(*Reg
))
472 LRegs
.push_back(*Reg
);
474 for (const unsigned *Alias
= TRI
->getAliasSet(*Reg
);
476 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != SU
) {
477 if (RegAdded
.insert(*Alias
))
478 LRegs
.push_back(*Alias
);
482 return !LRegs
.empty();
486 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
488 void ScheduleDAGFast::ListScheduleBottomUp() {
489 unsigned CurCycle
= 0;
491 // Release any predecessors of the special Exit node.
492 ReleasePredecessors(&ExitSU
, CurCycle
);
494 // Add root to Available queue.
495 if (!SUnits
.empty()) {
496 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
497 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
498 RootSU
->isAvailable
= true;
499 AvailableQueue
.push(RootSU
);
502 // While Available queue is not empty, grab the node with the highest
503 // priority. If it is not ready put it back. Schedule the node.
504 SmallVector
<SUnit
*, 4> NotReady
;
505 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
506 Sequence
.reserve(SUnits
.size());
507 while (!AvailableQueue
.empty()) {
508 bool Delayed
= false;
510 SUnit
*CurSU
= AvailableQueue
.pop();
512 SmallVector
<unsigned, 4> LRegs
;
513 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
516 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
518 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
519 NotReady
.push_back(CurSU
);
520 CurSU
= AvailableQueue
.pop();
523 // All candidates are delayed due to live physical reg dependencies.
524 // Try code duplication or inserting cross class copies
526 if (Delayed
&& !CurSU
) {
528 // Try duplicating the nodes that produces these
529 // "expensive to copy" values to break the dependency. In case even
530 // that doesn't work, insert cross class copies.
531 SUnit
*TrySU
= NotReady
[0];
532 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
533 assert(LRegs
.size() == 1 && "Can't handle this yet!");
534 unsigned Reg
= LRegs
[0];
535 SUnit
*LRDef
= LiveRegDefs
[Reg
];
536 MVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
537 const TargetRegisterClass
*RC
=
538 TRI
->getPhysicalRegisterRegClass(Reg
, VT
);
539 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
541 // If cross copy register class is null, then it must be possible copy
542 // the value directly. Do not try duplicate the def.
545 NewDef
= CopyAndMoveSuccessors(LRDef
);
549 // Issue copies, these can be expensive cross register class copies.
550 SmallVector
<SUnit
*, 2> Copies
;
551 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
552 DOUT
<< "Adding an edge from SU # " << TrySU
->NodeNum
553 << " to SU #" << Copies
.front()->NodeNum
<< "\n";
554 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
555 /*Reg=*/0, /*isNormalMemory=*/false,
556 /*isMustAlias=*/false, /*isArtificial=*/true));
557 NewDef
= Copies
.back();
560 DOUT
<< "Adding an edge from SU # " << NewDef
->NodeNum
561 << " to SU #" << TrySU
->NodeNum
<< "\n";
562 LiveRegDefs
[Reg
] = NewDef
;
563 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
564 /*Reg=*/0, /*isNormalMemory=*/false,
565 /*isMustAlias=*/false, /*isArtificial=*/true));
566 TrySU
->isAvailable
= false;
571 assert(false && "Unable to resolve live physical register dependencies!");
576 // Add the nodes that aren't ready back onto the available list.
577 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
578 NotReady
[i
]->isPending
= false;
579 // May no longer be available due to backtracking.
580 if (NotReady
[i
]->isAvailable
)
581 AvailableQueue
.push(NotReady
[i
]);
586 ScheduleNodeBottomUp(CurSU
, CurCycle
);
590 // Reverse the order if it is bottom up.
591 std::reverse(Sequence
.begin(), Sequence
.end());
595 // Verify that all SUnits were scheduled.
596 bool AnyNotSched
= false;
597 unsigned DeadNodes
= 0;
599 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
600 if (!SUnits
[i
].isScheduled
) {
601 if (SUnits
[i
].NumPreds
== 0 && SUnits
[i
].NumSuccs
== 0) {
606 cerr
<< "*** List scheduling failed! ***\n";
607 SUnits
[i
].dump(this);
608 cerr
<< "has not been scheduled!\n";
611 if (SUnits
[i
].NumSuccsLeft
!= 0) {
613 cerr
<< "*** List scheduling failed! ***\n";
614 SUnits
[i
].dump(this);
615 cerr
<< "has successors left!\n";
619 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; ++i
)
622 assert(!AnyNotSched
);
623 assert(Sequence
.size() + DeadNodes
- Noops
== SUnits
.size() &&
624 "The number of nodes scheduled doesn't match the expected number!");
628 //===----------------------------------------------------------------------===//
629 // Public Constructor Functions
630 //===----------------------------------------------------------------------===//
632 llvm::ScheduleDAGSDNodes
*
633 llvm::createFastDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
634 return new ScheduleDAGFast(*IS
->MF
);