1 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a fast scheduler.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "ScheduleDAGSDNodes.h"
16 #include "llvm/CodeGen/SchedulerRegistry.h"
17 #include "llvm/CodeGen/SelectionDAGISel.h"
18 #include "llvm/Target/TargetRegisterInfo.h"
19 #include "llvm/Target/TargetData.h"
20 #include "llvm/Target/TargetInstrInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
31 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
32 STATISTIC(NumDups
, "Number of duplicated nodes");
33 STATISTIC(NumPRCopies
, "Number of physical copies");
35 static RegisterScheduler
36 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
37 createFastDAGScheduler
);
40 /// FastPriorityQueue - A degenerate priority queue that considers
41 /// all nodes to have the same priority.
43 struct VISIBILITY_HIDDEN FastPriorityQueue
{
44 SmallVector
<SUnit
*, 16> Queue
;
46 bool empty() const { return Queue
.empty(); }
53 if (empty()) return NULL
;
54 SUnit
*V
= Queue
.back();
60 //===----------------------------------------------------------------------===//
61 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
63 class VISIBILITY_HIDDEN ScheduleDAGFast
: public ScheduleDAGSDNodes
{
65 /// AvailableQueue - The priority queue to use for the available SUnits.
66 FastPriorityQueue AvailableQueue
;
68 /// LiveRegDefs - A set of physical registers and their definition
69 /// that are "live". These nodes must be scheduled before any other nodes that
70 /// modifies the registers can be scheduled.
72 std::vector
<SUnit
*> LiveRegDefs
;
73 std::vector
<unsigned> LiveRegCycles
;
76 ScheduleDAGFast(MachineFunction
&mf
)
77 : ScheduleDAGSDNodes(mf
) {}
81 /// AddPred - adds a predecessor edge to SUnit SU.
82 /// This returns true if this is a new predecessor.
83 void AddPred(SUnit
*SU
, const SDep
&D
) {
87 /// RemovePred - removes a predecessor edge from SUnit SU.
88 /// This returns true if an edge was removed.
89 void RemovePred(SUnit
*SU
, const SDep
&D
) {
94 void ReleasePred(SUnit
*SU
, SDep
*PredEdge
);
95 void ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
);
96 void ScheduleNodeBottomUp(SUnit
*, unsigned);
97 SUnit
*CopyAndMoveSuccessors(SUnit
*);
98 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
99 const TargetRegisterClass
*,
100 const TargetRegisterClass
*,
101 SmallVector
<SUnit
*, 2>&);
102 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
103 void ListScheduleBottomUp();
105 /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
106 bool ForceUnitLatencies() const { return true; }
108 } // end anonymous namespace
111 /// Schedule - Schedule the DAG using list scheduling.
112 void ScheduleDAGFast::Schedule() {
113 DEBUG(errs() << "********** List Scheduling **********\n");
116 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
117 LiveRegCycles
.resize(TRI
->getNumRegs(), 0);
119 // Build the scheduling graph.
122 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
123 SUnits
[su
].dumpAll(this));
125 // Execute the actual scheduling loop.
126 ListScheduleBottomUp();
129 //===----------------------------------------------------------------------===//
130 // Bottom-Up Scheduling
131 //===----------------------------------------------------------------------===//
133 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
134 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
135 void ScheduleDAGFast::ReleasePred(SUnit
*SU
, SDep
*PredEdge
) {
136 SUnit
*PredSU
= PredEdge
->getSUnit();
137 --PredSU
->NumSuccsLeft
;
140 if (PredSU
->NumSuccsLeft
< 0) {
141 errs() << "*** Scheduling failed! ***\n";
143 errs() << " has been released too many times!\n";
148 // If all the node's successors are scheduled, this node is ready
149 // to be scheduled. Ignore the special EntrySU node.
150 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
151 PredSU
->isAvailable
= true;
152 AvailableQueue
.push(PredSU
);
156 void ScheduleDAGFast::ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
) {
157 // Bottom up: release predecessors
158 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
160 ReleasePred(SU
, &*I
);
161 if (I
->isAssignedRegDep()) {
162 // This is a physical register dependency and it's impossible or
163 // expensive to copy the register. Make sure nothing that can
164 // clobber the register is scheduled between the predecessor and
166 if (!LiveRegDefs
[I
->getReg()]) {
168 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
169 LiveRegCycles
[I
->getReg()] = CurCycle
;
175 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
176 /// count of its predecessors. If a predecessor pending count is zero, add it to
177 /// the Available queue.
178 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit
*SU
, unsigned CurCycle
) {
179 DEBUG(errs() << "*** Scheduling [" << CurCycle
<< "]: ");
180 DEBUG(SU
->dump(this));
182 assert(CurCycle
>= SU
->getHeight() && "Node scheduled below its height!");
183 SU
->setHeightToAtLeast(CurCycle
);
184 Sequence
.push_back(SU
);
186 ReleasePredecessors(SU
, CurCycle
);
188 // Release all the implicit physical register defs that are live.
189 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
191 if (I
->isAssignedRegDep()) {
192 if (LiveRegCycles
[I
->getReg()] == I
->getSUnit()->getHeight()) {
193 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
194 assert(LiveRegDefs
[I
->getReg()] == SU
&&
195 "Physical register dependency violated?");
197 LiveRegDefs
[I
->getReg()] = NULL
;
198 LiveRegCycles
[I
->getReg()] = 0;
203 SU
->isScheduled
= true;
206 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
207 /// successors to the newly created node.
208 SUnit
*ScheduleDAGFast::CopyAndMoveSuccessors(SUnit
*SU
) {
209 if (SU
->getNode()->getFlaggedNode())
212 SDNode
*N
= SU
->getNode();
217 bool TryUnfold
= false;
218 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
219 EVT VT
= N
->getValueType(i
);
222 else if (VT
== MVT::Other
)
225 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
226 const SDValue
&Op
= N
->getOperand(i
);
227 EVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
233 SmallVector
<SDNode
*, 2> NewNodes
;
234 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
237 DEBUG(errs() << "Unfolding SU # " << SU
->NodeNum
<< "\n");
238 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
241 SDNode
*LoadNode
= NewNodes
[0];
242 unsigned NumVals
= N
->getNumValues();
243 unsigned OldNumVals
= SU
->getNode()->getNumValues();
244 for (unsigned i
= 0; i
!= NumVals
; ++i
)
245 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
246 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
247 SDValue(LoadNode
, 1));
249 SUnit
*NewSU
= NewSUnit(N
);
250 assert(N
->getNodeId() == -1 && "Node already inserted!");
251 N
->setNodeId(NewSU
->NodeNum
);
253 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
254 for (unsigned i
= 0; i
!= TID
.getNumOperands(); ++i
) {
255 if (TID
.getOperandConstraint(i
, TOI::TIED_TO
) != -1) {
256 NewSU
->isTwoAddress
= true;
260 if (TID
.isCommutable())
261 NewSU
->isCommutable
= true;
263 // LoadNode may already exist. This can happen when there is another
264 // load from the same location and producing the same type of value
265 // but it has different alignment or volatileness.
266 bool isNewLoad
= true;
268 if (LoadNode
->getNodeId() != -1) {
269 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
272 LoadSU
= NewSUnit(LoadNode
);
273 LoadNode
->setNodeId(LoadSU
->NodeNum
);
277 SmallVector
<SDep
, 4> ChainSuccs
;
278 SmallVector
<SDep
, 4> LoadPreds
;
279 SmallVector
<SDep
, 4> NodePreds
;
280 SmallVector
<SDep
, 4> NodeSuccs
;
281 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
285 else if (I
->getSUnit()->getNode() &&
286 I
->getSUnit()->getNode()->isOperandOf(LoadNode
))
287 LoadPreds
.push_back(*I
);
289 NodePreds
.push_back(*I
);
291 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
294 ChainSuccs
.push_back(*I
);
296 NodeSuccs
.push_back(*I
);
299 if (ChainPred
.getSUnit()) {
300 RemovePred(SU
, ChainPred
);
302 AddPred(LoadSU
, ChainPred
);
304 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
305 const SDep
&Pred
= LoadPreds
[i
];
306 RemovePred(SU
, Pred
);
308 AddPred(LoadSU
, Pred
);
311 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
312 const SDep
&Pred
= NodePreds
[i
];
313 RemovePred(SU
, Pred
);
314 AddPred(NewSU
, Pred
);
316 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
317 SDep D
= NodeSuccs
[i
];
318 SUnit
*SuccDep
= D
.getSUnit();
320 RemovePred(SuccDep
, D
);
324 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
325 SDep D
= ChainSuccs
[i
];
326 SUnit
*SuccDep
= D
.getSUnit();
328 RemovePred(SuccDep
, D
);
335 AddPred(NewSU
, SDep(LoadSU
, SDep::Order
, LoadSU
->Latency
));
340 if (NewSU
->NumSuccsLeft
== 0) {
341 NewSU
->isAvailable
= true;
347 DEBUG(errs() << "Duplicating SU # " << SU
->NodeNum
<< "\n");
350 // New SUnit has the exact same predecessors.
351 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
353 if (!I
->isArtificial())
356 // Only copy scheduled successors. Cut them from old node's successor
357 // list and move them over.
358 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
359 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
361 if (I
->isArtificial())
363 SUnit
*SuccSU
= I
->getSUnit();
364 if (SuccSU
->isScheduled
) {
369 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
372 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
373 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
379 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
380 /// scheduled successors of the given SUnit to the last copy.
381 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
382 const TargetRegisterClass
*DestRC
,
383 const TargetRegisterClass
*SrcRC
,
384 SmallVector
<SUnit
*, 2> &Copies
) {
385 SUnit
*CopyFromSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
386 CopyFromSU
->CopySrcRC
= SrcRC
;
387 CopyFromSU
->CopyDstRC
= DestRC
;
389 SUnit
*CopyToSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
390 CopyToSU
->CopySrcRC
= DestRC
;
391 CopyToSU
->CopyDstRC
= SrcRC
;
393 // Only copy scheduled successors. Cut them from old node's successor
394 // list and move them over.
395 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
396 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
398 if (I
->isArtificial())
400 SUnit
*SuccSU
= I
->getSUnit();
401 if (SuccSU
->isScheduled
) {
403 D
.setSUnit(CopyToSU
);
405 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
408 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
) {
409 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
412 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
413 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
415 Copies
.push_back(CopyFromSU
);
416 Copies
.push_back(CopyToSU
);
421 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
422 /// definition of the specified node.
423 /// FIXME: Move to SelectionDAG?
424 static EVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
425 const TargetInstrInfo
*TII
) {
426 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
427 assert(TID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
428 unsigned NumRes
= TID
.getNumDefs();
429 for (const unsigned *ImpDef
= TID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
434 return N
->getValueType(NumRes
);
437 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
438 /// scheduling of the given node to satisfy live physical register dependencies.
439 /// If the specific node is the last one that's available to schedule, do
440 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
441 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit
*SU
,
442 SmallVector
<unsigned, 4> &LRegs
){
443 if (NumLiveRegs
== 0)
446 SmallSet
<unsigned, 4> RegAdded
;
447 // If this node would clobber any "live" register, then it's not ready.
448 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
450 if (I
->isAssignedRegDep()) {
451 unsigned Reg
= I
->getReg();
452 if (LiveRegDefs
[Reg
] && LiveRegDefs
[Reg
] != I
->getSUnit()) {
453 if (RegAdded
.insert(Reg
))
454 LRegs
.push_back(Reg
);
456 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
);
458 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != I
->getSUnit()) {
459 if (RegAdded
.insert(*Alias
))
460 LRegs
.push_back(*Alias
);
465 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getFlaggedNode()) {
466 if (!Node
->isMachineOpcode())
468 const TargetInstrDesc
&TID
= TII
->get(Node
->getMachineOpcode());
469 if (!TID
.ImplicitDefs
)
471 for (const unsigned *Reg
= TID
.ImplicitDefs
; *Reg
; ++Reg
) {
472 if (LiveRegDefs
[*Reg
] && LiveRegDefs
[*Reg
] != SU
) {
473 if (RegAdded
.insert(*Reg
))
474 LRegs
.push_back(*Reg
);
476 for (const unsigned *Alias
= TRI
->getAliasSet(*Reg
);
478 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != SU
) {
479 if (RegAdded
.insert(*Alias
))
480 LRegs
.push_back(*Alias
);
484 return !LRegs
.empty();
488 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
490 void ScheduleDAGFast::ListScheduleBottomUp() {
491 unsigned CurCycle
= 0;
493 // Release any predecessors of the special Exit node.
494 ReleasePredecessors(&ExitSU
, CurCycle
);
496 // Add root to Available queue.
497 if (!SUnits
.empty()) {
498 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
499 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
500 RootSU
->isAvailable
= true;
501 AvailableQueue
.push(RootSU
);
504 // While Available queue is not empty, grab the node with the highest
505 // priority. If it is not ready put it back. Schedule the node.
506 SmallVector
<SUnit
*, 4> NotReady
;
507 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
508 Sequence
.reserve(SUnits
.size());
509 while (!AvailableQueue
.empty()) {
510 bool Delayed
= false;
512 SUnit
*CurSU
= AvailableQueue
.pop();
514 SmallVector
<unsigned, 4> LRegs
;
515 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
518 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
520 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
521 NotReady
.push_back(CurSU
);
522 CurSU
= AvailableQueue
.pop();
525 // All candidates are delayed due to live physical reg dependencies.
526 // Try code duplication or inserting cross class copies
528 if (Delayed
&& !CurSU
) {
530 // Try duplicating the nodes that produces these
531 // "expensive to copy" values to break the dependency. In case even
532 // that doesn't work, insert cross class copies.
533 SUnit
*TrySU
= NotReady
[0];
534 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
535 assert(LRegs
.size() == 1 && "Can't handle this yet!");
536 unsigned Reg
= LRegs
[0];
537 SUnit
*LRDef
= LiveRegDefs
[Reg
];
538 EVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
539 const TargetRegisterClass
*RC
=
540 TRI
->getPhysicalRegisterRegClass(Reg
, VT
);
541 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
543 // If cross copy register class is null, then it must be possible copy
544 // the value directly. Do not try duplicate the def.
547 NewDef
= CopyAndMoveSuccessors(LRDef
);
551 // Issue copies, these can be expensive cross register class copies.
552 SmallVector
<SUnit
*, 2> Copies
;
553 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
554 DEBUG(errs() << "Adding an edge from SU # " << TrySU
->NodeNum
555 << " to SU #" << Copies
.front()->NodeNum
<< "\n");
556 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
557 /*Reg=*/0, /*isNormalMemory=*/false,
558 /*isMustAlias=*/false, /*isArtificial=*/true));
559 NewDef
= Copies
.back();
562 DEBUG(errs() << "Adding an edge from SU # " << NewDef
->NodeNum
563 << " to SU #" << TrySU
->NodeNum
<< "\n");
564 LiveRegDefs
[Reg
] = NewDef
;
565 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
566 /*Reg=*/0, /*isNormalMemory=*/false,
567 /*isMustAlias=*/false, /*isArtificial=*/true));
568 TrySU
->isAvailable
= false;
573 llvm_unreachable("Unable to resolve live physical register dependencies!");
577 // Add the nodes that aren't ready back onto the available list.
578 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
579 NotReady
[i
]->isPending
= false;
580 // May no longer be available due to backtracking.
581 if (NotReady
[i
]->isAvailable
)
582 AvailableQueue
.push(NotReady
[i
]);
587 ScheduleNodeBottomUp(CurSU
, CurCycle
);
591 // Reverse the order if it is bottom up.
592 std::reverse(Sequence
.begin(), Sequence
.end());
596 // Verify that all SUnits were scheduled.
597 bool AnyNotSched
= false;
598 unsigned DeadNodes
= 0;
600 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
601 if (!SUnits
[i
].isScheduled
) {
602 if (SUnits
[i
].NumPreds
== 0 && SUnits
[i
].NumSuccs
== 0) {
607 errs() << "*** List scheduling failed! ***\n";
608 SUnits
[i
].dump(this);
609 errs() << "has not been scheduled!\n";
612 if (SUnits
[i
].NumSuccsLeft
!= 0) {
614 errs() << "*** List scheduling failed! ***\n";
615 SUnits
[i
].dump(this);
616 errs() << "has successors left!\n";
620 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; ++i
)
623 assert(!AnyNotSched
);
624 assert(Sequence
.size() + DeadNodes
- Noops
== SUnits
.size() &&
625 "The number of nodes scheduled doesn't match the expected number!");
629 //===----------------------------------------------------------------------===//
630 // Public Constructor Functions
631 //===----------------------------------------------------------------------===//
633 llvm::ScheduleDAGSDNodes
*
634 llvm::createFastDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
635 return new ScheduleDAGFast(*IS
->MF
);