1 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a fast scheduler.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "ScheduleDAGSDNodes.h"
16 #include "llvm/CodeGen/SchedulerRegistry.h"
17 #include "llvm/CodeGen/SelectionDAGISel.h"
18 #include "llvm/Target/TargetRegisterInfo.h"
19 #include "llvm/Target/TargetData.h"
20 #include "llvm/Target/TargetInstrInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/ErrorHandling.h"
30 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
31 STATISTIC(NumDups
, "Number of duplicated nodes");
32 STATISTIC(NumPRCopies
, "Number of physical copies");
34 static RegisterScheduler
35 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
36 createFastDAGScheduler
);
39 /// FastPriorityQueue - A degenerate priority queue that considers
40 /// all nodes to have the same priority.
42 struct VISIBILITY_HIDDEN FastPriorityQueue
{
43 SmallVector
<SUnit
*, 16> Queue
;
45 bool empty() const { return Queue
.empty(); }
52 if (empty()) return NULL
;
53 SUnit
*V
= Queue
.back();
59 //===----------------------------------------------------------------------===//
60 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
62 class VISIBILITY_HIDDEN ScheduleDAGFast
: public ScheduleDAGSDNodes
{
64 /// AvailableQueue - The priority queue to use for the available SUnits.
65 FastPriorityQueue AvailableQueue
;
67 /// LiveRegDefs - A set of physical registers and their definition
68 /// that are "live". These nodes must be scheduled before any other nodes that
69 /// modifies the registers can be scheduled.
71 std::vector
<SUnit
*> LiveRegDefs
;
72 std::vector
<unsigned> LiveRegCycles
;
75 ScheduleDAGFast(MachineFunction
&mf
)
76 : ScheduleDAGSDNodes(mf
) {}
80 /// AddPred - adds a predecessor edge to SUnit SU.
81 /// This returns true if this is a new predecessor.
82 void AddPred(SUnit
*SU
, const SDep
&D
) {
86 /// RemovePred - removes a predecessor edge from SUnit SU.
87 /// This returns true if an edge was removed.
88 void RemovePred(SUnit
*SU
, const SDep
&D
) {
93 void ReleasePred(SUnit
*SU
, SDep
*PredEdge
);
94 void ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
);
95 void ScheduleNodeBottomUp(SUnit
*, unsigned);
96 SUnit
*CopyAndMoveSuccessors(SUnit
*);
97 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
98 const TargetRegisterClass
*,
99 const TargetRegisterClass
*,
100 SmallVector
<SUnit
*, 2>&);
101 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
102 void ListScheduleBottomUp();
104 /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
105 bool ForceUnitLatencies() const { return true; }
107 } // end anonymous namespace
110 /// Schedule - Schedule the DAG using list scheduling.
111 void ScheduleDAGFast::Schedule() {
112 DOUT
<< "********** List Scheduling **********\n";
115 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
116 LiveRegCycles
.resize(TRI
->getNumRegs(), 0);
118 // Build the scheduling graph.
121 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
122 SUnits
[su
].dumpAll(this));
124 // Execute the actual scheduling loop.
125 ListScheduleBottomUp();
128 //===----------------------------------------------------------------------===//
129 // Bottom-Up Scheduling
130 //===----------------------------------------------------------------------===//
132 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
133 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
134 void ScheduleDAGFast::ReleasePred(SUnit
*SU
, SDep
*PredEdge
) {
135 SUnit
*PredSU
= PredEdge
->getSUnit();
136 --PredSU
->NumSuccsLeft
;
139 if (PredSU
->NumSuccsLeft
< 0) {
140 cerr
<< "*** Scheduling failed! ***\n";
142 cerr
<< " has been released too many times!\n";
147 // If all the node's successors are scheduled, this node is ready
148 // to be scheduled. Ignore the special EntrySU node.
149 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
150 PredSU
->isAvailable
= true;
151 AvailableQueue
.push(PredSU
);
155 void ScheduleDAGFast::ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
) {
156 // Bottom up: release predecessors
157 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
159 ReleasePred(SU
, &*I
);
160 if (I
->isAssignedRegDep()) {
161 // This is a physical register dependency and it's impossible or
162 // expensive to copy the register. Make sure nothing that can
163 // clobber the register is scheduled between the predecessor and
165 if (!LiveRegDefs
[I
->getReg()]) {
167 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
168 LiveRegCycles
[I
->getReg()] = CurCycle
;
174 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
175 /// count of its predecessors. If a predecessor pending count is zero, add it to
176 /// the Available queue.
177 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit
*SU
, unsigned CurCycle
) {
178 DOUT
<< "*** Scheduling [" << CurCycle
<< "]: ";
179 DEBUG(SU
->dump(this));
181 assert(CurCycle
>= SU
->getHeight() && "Node scheduled below its height!");
182 SU
->setHeightToAtLeast(CurCycle
);
183 Sequence
.push_back(SU
);
185 ReleasePredecessors(SU
, CurCycle
);
187 // Release all the implicit physical register defs that are live.
188 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
190 if (I
->isAssignedRegDep()) {
191 if (LiveRegCycles
[I
->getReg()] == I
->getSUnit()->getHeight()) {
192 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
193 assert(LiveRegDefs
[I
->getReg()] == SU
&&
194 "Physical register dependency violated?");
196 LiveRegDefs
[I
->getReg()] = NULL
;
197 LiveRegCycles
[I
->getReg()] = 0;
202 SU
->isScheduled
= true;
205 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
206 /// successors to the newly created node.
207 SUnit
*ScheduleDAGFast::CopyAndMoveSuccessors(SUnit
*SU
) {
208 if (SU
->getNode()->getFlaggedNode())
211 SDNode
*N
= SU
->getNode();
216 bool TryUnfold
= false;
217 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
218 EVT VT
= N
->getValueType(i
);
221 else if (VT
== MVT::Other
)
224 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
225 const SDValue
&Op
= N
->getOperand(i
);
226 EVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
232 SmallVector
<SDNode
*, 2> NewNodes
;
233 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
236 DOUT
<< "Unfolding SU # " << SU
->NodeNum
<< "\n";
237 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
240 SDNode
*LoadNode
= NewNodes
[0];
241 unsigned NumVals
= N
->getNumValues();
242 unsigned OldNumVals
= SU
->getNode()->getNumValues();
243 for (unsigned i
= 0; i
!= NumVals
; ++i
)
244 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
245 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
246 SDValue(LoadNode
, 1));
248 SUnit
*NewSU
= NewSUnit(N
);
249 assert(N
->getNodeId() == -1 && "Node already inserted!");
250 N
->setNodeId(NewSU
->NodeNum
);
252 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
253 for (unsigned i
= 0; i
!= TID
.getNumOperands(); ++i
) {
254 if (TID
.getOperandConstraint(i
, TOI::TIED_TO
) != -1) {
255 NewSU
->isTwoAddress
= true;
259 if (TID
.isCommutable())
260 NewSU
->isCommutable
= true;
262 // LoadNode may already exist. This can happen when there is another
263 // load from the same location and producing the same type of value
264 // but it has different alignment or volatileness.
265 bool isNewLoad
= true;
267 if (LoadNode
->getNodeId() != -1) {
268 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
271 LoadSU
= NewSUnit(LoadNode
);
272 LoadNode
->setNodeId(LoadSU
->NodeNum
);
276 SmallVector
<SDep
, 4> ChainSuccs
;
277 SmallVector
<SDep
, 4> LoadPreds
;
278 SmallVector
<SDep
, 4> NodePreds
;
279 SmallVector
<SDep
, 4> NodeSuccs
;
280 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
284 else if (I
->getSUnit()->getNode() &&
285 I
->getSUnit()->getNode()->isOperandOf(LoadNode
))
286 LoadPreds
.push_back(*I
);
288 NodePreds
.push_back(*I
);
290 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
293 ChainSuccs
.push_back(*I
);
295 NodeSuccs
.push_back(*I
);
298 if (ChainPred
.getSUnit()) {
299 RemovePred(SU
, ChainPred
);
301 AddPred(LoadSU
, ChainPred
);
303 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
304 const SDep
&Pred
= LoadPreds
[i
];
305 RemovePred(SU
, Pred
);
307 AddPred(LoadSU
, Pred
);
310 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
311 const SDep
&Pred
= NodePreds
[i
];
312 RemovePred(SU
, Pred
);
313 AddPred(NewSU
, Pred
);
315 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
316 SDep D
= NodeSuccs
[i
];
317 SUnit
*SuccDep
= D
.getSUnit();
319 RemovePred(SuccDep
, D
);
323 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
324 SDep D
= ChainSuccs
[i
];
325 SUnit
*SuccDep
= D
.getSUnit();
327 RemovePred(SuccDep
, D
);
334 AddPred(NewSU
, SDep(LoadSU
, SDep::Order
, LoadSU
->Latency
));
339 if (NewSU
->NumSuccsLeft
== 0) {
340 NewSU
->isAvailable
= true;
346 DOUT
<< "Duplicating SU # " << SU
->NodeNum
<< "\n";
349 // New SUnit has the exact same predecessors.
350 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
352 if (!I
->isArtificial())
355 // Only copy scheduled successors. Cut them from old node's successor
356 // list and move them over.
357 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
358 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
360 if (I
->isArtificial())
362 SUnit
*SuccSU
= I
->getSUnit();
363 if (SuccSU
->isScheduled
) {
368 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
371 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
372 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
378 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
379 /// scheduled successors of the given SUnit to the last copy.
380 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
381 const TargetRegisterClass
*DestRC
,
382 const TargetRegisterClass
*SrcRC
,
383 SmallVector
<SUnit
*, 2> &Copies
) {
384 SUnit
*CopyFromSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
385 CopyFromSU
->CopySrcRC
= SrcRC
;
386 CopyFromSU
->CopyDstRC
= DestRC
;
388 SUnit
*CopyToSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
389 CopyToSU
->CopySrcRC
= DestRC
;
390 CopyToSU
->CopyDstRC
= SrcRC
;
392 // Only copy scheduled successors. Cut them from old node's successor
393 // list and move them over.
394 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
395 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
397 if (I
->isArtificial())
399 SUnit
*SuccSU
= I
->getSUnit();
400 if (SuccSU
->isScheduled
) {
402 D
.setSUnit(CopyToSU
);
404 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
407 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
) {
408 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
411 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
412 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
414 Copies
.push_back(CopyFromSU
);
415 Copies
.push_back(CopyToSU
);
420 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
421 /// definition of the specified node.
422 /// FIXME: Move to SelectionDAG?
423 static EVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
424 const TargetInstrInfo
*TII
) {
425 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
426 assert(TID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
427 unsigned NumRes
= TID
.getNumDefs();
428 for (const unsigned *ImpDef
= TID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
433 return N
->getValueType(NumRes
);
436 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
437 /// scheduling of the given node to satisfy live physical register dependencies.
438 /// If the specific node is the last one that's available to schedule, do
439 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
440 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit
*SU
,
441 SmallVector
<unsigned, 4> &LRegs
){
442 if (NumLiveRegs
== 0)
445 SmallSet
<unsigned, 4> RegAdded
;
446 // If this node would clobber any "live" register, then it's not ready.
447 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
449 if (I
->isAssignedRegDep()) {
450 unsigned Reg
= I
->getReg();
451 if (LiveRegDefs
[Reg
] && LiveRegDefs
[Reg
] != I
->getSUnit()) {
452 if (RegAdded
.insert(Reg
))
453 LRegs
.push_back(Reg
);
455 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
);
457 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != I
->getSUnit()) {
458 if (RegAdded
.insert(*Alias
))
459 LRegs
.push_back(*Alias
);
464 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getFlaggedNode()) {
465 if (!Node
->isMachineOpcode())
467 const TargetInstrDesc
&TID
= TII
->get(Node
->getMachineOpcode());
468 if (!TID
.ImplicitDefs
)
470 for (const unsigned *Reg
= TID
.ImplicitDefs
; *Reg
; ++Reg
) {
471 if (LiveRegDefs
[*Reg
] && LiveRegDefs
[*Reg
] != SU
) {
472 if (RegAdded
.insert(*Reg
))
473 LRegs
.push_back(*Reg
);
475 for (const unsigned *Alias
= TRI
->getAliasSet(*Reg
);
477 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != SU
) {
478 if (RegAdded
.insert(*Alias
))
479 LRegs
.push_back(*Alias
);
483 return !LRegs
.empty();
487 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
489 void ScheduleDAGFast::ListScheduleBottomUp() {
490 unsigned CurCycle
= 0;
492 // Release any predecessors of the special Exit node.
493 ReleasePredecessors(&ExitSU
, CurCycle
);
495 // Add root to Available queue.
496 if (!SUnits
.empty()) {
497 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
498 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
499 RootSU
->isAvailable
= true;
500 AvailableQueue
.push(RootSU
);
503 // While Available queue is not empty, grab the node with the highest
504 // priority. If it is not ready put it back. Schedule the node.
505 SmallVector
<SUnit
*, 4> NotReady
;
506 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
507 Sequence
.reserve(SUnits
.size());
508 while (!AvailableQueue
.empty()) {
509 bool Delayed
= false;
511 SUnit
*CurSU
= AvailableQueue
.pop();
513 SmallVector
<unsigned, 4> LRegs
;
514 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
517 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
519 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
520 NotReady
.push_back(CurSU
);
521 CurSU
= AvailableQueue
.pop();
524 // All candidates are delayed due to live physical reg dependencies.
525 // Try code duplication or inserting cross class copies
527 if (Delayed
&& !CurSU
) {
529 // Try duplicating the nodes that produces these
530 // "expensive to copy" values to break the dependency. In case even
531 // that doesn't work, insert cross class copies.
532 SUnit
*TrySU
= NotReady
[0];
533 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
534 assert(LRegs
.size() == 1 && "Can't handle this yet!");
535 unsigned Reg
= LRegs
[0];
536 SUnit
*LRDef
= LiveRegDefs
[Reg
];
537 EVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
538 const TargetRegisterClass
*RC
=
539 TRI
->getPhysicalRegisterRegClass(Reg
, VT
);
540 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
542 // If cross copy register class is null, then it must be possible copy
543 // the value directly. Do not try duplicate the def.
546 NewDef
= CopyAndMoveSuccessors(LRDef
);
550 // Issue copies, these can be expensive cross register class copies.
551 SmallVector
<SUnit
*, 2> Copies
;
552 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
553 DOUT
<< "Adding an edge from SU # " << TrySU
->NodeNum
554 << " to SU #" << Copies
.front()->NodeNum
<< "\n";
555 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
556 /*Reg=*/0, /*isNormalMemory=*/false,
557 /*isMustAlias=*/false, /*isArtificial=*/true));
558 NewDef
= Copies
.back();
561 DOUT
<< "Adding an edge from SU # " << NewDef
->NodeNum
562 << " to SU #" << TrySU
->NodeNum
<< "\n";
563 LiveRegDefs
[Reg
] = NewDef
;
564 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
565 /*Reg=*/0, /*isNormalMemory=*/false,
566 /*isMustAlias=*/false, /*isArtificial=*/true));
567 TrySU
->isAvailable
= false;
572 llvm_unreachable("Unable to resolve live physical register dependencies!");
576 // Add the nodes that aren't ready back onto the available list.
577 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
578 NotReady
[i
]->isPending
= false;
579 // May no longer be available due to backtracking.
580 if (NotReady
[i
]->isAvailable
)
581 AvailableQueue
.push(NotReady
[i
]);
586 ScheduleNodeBottomUp(CurSU
, CurCycle
);
590 // Reverse the order if it is bottom up.
591 std::reverse(Sequence
.begin(), Sequence
.end());
595 // Verify that all SUnits were scheduled.
596 bool AnyNotSched
= false;
597 unsigned DeadNodes
= 0;
599 for (unsigned i
= 0, e
= SUnits
.size(); i
!= e
; ++i
) {
600 if (!SUnits
[i
].isScheduled
) {
601 if (SUnits
[i
].NumPreds
== 0 && SUnits
[i
].NumSuccs
== 0) {
606 cerr
<< "*** List scheduling failed! ***\n";
607 SUnits
[i
].dump(this);
608 cerr
<< "has not been scheduled!\n";
611 if (SUnits
[i
].NumSuccsLeft
!= 0) {
613 cerr
<< "*** List scheduling failed! ***\n";
614 SUnits
[i
].dump(this);
615 cerr
<< "has successors left!\n";
619 for (unsigned i
= 0, e
= Sequence
.size(); i
!= e
; ++i
)
622 assert(!AnyNotSched
);
623 assert(Sequence
.size() + DeadNodes
- Noops
== SUnits
.size() &&
624 "The number of nodes scheduled doesn't match the expected number!");
628 //===----------------------------------------------------------------------===//
629 // Public Constructor Functions
630 //===----------------------------------------------------------------------===//
632 llvm::ScheduleDAGSDNodes
*
633 llvm::createFastDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
634 return new ScheduleDAGFast(*IS
->MF
);