1 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a fast scheduler.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "ScheduleDAGSDNodes.h"
16 #include "llvm/InlineAsm.h"
17 #include "llvm/CodeGen/SchedulerRegistry.h"
18 #include "llvm/CodeGen/SelectionDAGISel.h"
19 #include "llvm/Target/TargetRegisterInfo.h"
20 #include "llvm/Target/TargetData.h"
21 #include "llvm/Target/TargetInstrInfo.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/raw_ostream.h"
30 STATISTIC(NumUnfolds
, "Number of nodes unfolded");
31 STATISTIC(NumDups
, "Number of duplicated nodes");
32 STATISTIC(NumPRCopies
, "Number of physical copies");
34 static RegisterScheduler
35 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
36 createFastDAGScheduler
);
39 /// FastPriorityQueue - A degenerate priority queue that considers
40 /// all nodes to have the same priority.
42 struct FastPriorityQueue
{
43 SmallVector
<SUnit
*, 16> Queue
;
45 bool empty() const { return Queue
.empty(); }
52 if (empty()) return NULL
;
53 SUnit
*V
= Queue
.back();
59 //===----------------------------------------------------------------------===//
60 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
62 class ScheduleDAGFast
: public ScheduleDAGSDNodes
{
64 /// AvailableQueue - The priority queue to use for the available SUnits.
65 FastPriorityQueue AvailableQueue
;
67 /// LiveRegDefs - A set of physical registers and their definition
68 /// that are "live". These nodes must be scheduled before any other nodes that
69 /// modifies the registers can be scheduled.
71 std::vector
<SUnit
*> LiveRegDefs
;
72 std::vector
<unsigned> LiveRegCycles
;
75 ScheduleDAGFast(MachineFunction
&mf
)
76 : ScheduleDAGSDNodes(mf
) {}
80 /// AddPred - adds a predecessor edge to SUnit SU.
81 /// This returns true if this is a new predecessor.
82 void AddPred(SUnit
*SU
, const SDep
&D
) {
86 /// RemovePred - removes a predecessor edge from SUnit SU.
87 /// This returns true if an edge was removed.
88 void RemovePred(SUnit
*SU
, const SDep
&D
) {
93 void ReleasePred(SUnit
*SU
, SDep
*PredEdge
);
94 void ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
);
95 void ScheduleNodeBottomUp(SUnit
*, unsigned);
96 SUnit
*CopyAndMoveSuccessors(SUnit
*);
97 void InsertCopiesAndMoveSuccs(SUnit
*, unsigned,
98 const TargetRegisterClass
*,
99 const TargetRegisterClass
*,
100 SmallVector
<SUnit
*, 2>&);
101 bool DelayForLiveRegsBottomUp(SUnit
*, SmallVector
<unsigned, 4>&);
102 void ListScheduleBottomUp();
104 /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
105 bool ForceUnitLatencies() const { return true; }
107 } // end anonymous namespace
110 /// Schedule - Schedule the DAG using list scheduling.
111 void ScheduleDAGFast::Schedule() {
112 DEBUG(dbgs() << "********** List Scheduling **********\n");
115 LiveRegDefs
.resize(TRI
->getNumRegs(), NULL
);
116 LiveRegCycles
.resize(TRI
->getNumRegs(), 0);
118 // Build the scheduling graph.
119 BuildSchedGraph(NULL
);
121 DEBUG(for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
; ++su
)
122 SUnits
[su
].dumpAll(this));
124 // Execute the actual scheduling loop.
125 ListScheduleBottomUp();
128 //===----------------------------------------------------------------------===//
129 // Bottom-Up Scheduling
130 //===----------------------------------------------------------------------===//
132 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
133 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
134 void ScheduleDAGFast::ReleasePred(SUnit
*SU
, SDep
*PredEdge
) {
135 SUnit
*PredSU
= PredEdge
->getSUnit();
138 if (PredSU
->NumSuccsLeft
== 0) {
139 dbgs() << "*** Scheduling failed! ***\n";
141 dbgs() << " has been released too many times!\n";
145 --PredSU
->NumSuccsLeft
;
147 // If all the node's successors are scheduled, this node is ready
148 // to be scheduled. Ignore the special EntrySU node.
149 if (PredSU
->NumSuccsLeft
== 0 && PredSU
!= &EntrySU
) {
150 PredSU
->isAvailable
= true;
151 AvailableQueue
.push(PredSU
);
155 void ScheduleDAGFast::ReleasePredecessors(SUnit
*SU
, unsigned CurCycle
) {
156 // Bottom up: release predecessors
157 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
159 ReleasePred(SU
, &*I
);
160 if (I
->isAssignedRegDep()) {
161 // This is a physical register dependency and it's impossible or
162 // expensive to copy the register. Make sure nothing that can
163 // clobber the register is scheduled between the predecessor and
165 if (!LiveRegDefs
[I
->getReg()]) {
167 LiveRegDefs
[I
->getReg()] = I
->getSUnit();
168 LiveRegCycles
[I
->getReg()] = CurCycle
;
174 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
175 /// count of its predecessors. If a predecessor pending count is zero, add it to
176 /// the Available queue.
177 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit
*SU
, unsigned CurCycle
) {
178 DEBUG(dbgs() << "*** Scheduling [" << CurCycle
<< "]: ");
179 DEBUG(SU
->dump(this));
181 assert(CurCycle
>= SU
->getHeight() && "Node scheduled below its height!");
182 SU
->setHeightToAtLeast(CurCycle
);
183 Sequence
.push_back(SU
);
185 ReleasePredecessors(SU
, CurCycle
);
187 // Release all the implicit physical register defs that are live.
188 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
190 if (I
->isAssignedRegDep()) {
191 if (LiveRegCycles
[I
->getReg()] == I
->getSUnit()->getHeight()) {
192 assert(NumLiveRegs
> 0 && "NumLiveRegs is already zero!");
193 assert(LiveRegDefs
[I
->getReg()] == SU
&&
194 "Physical register dependency violated?");
196 LiveRegDefs
[I
->getReg()] = NULL
;
197 LiveRegCycles
[I
->getReg()] = 0;
202 SU
->isScheduled
= true;
205 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
206 /// successors to the newly created node.
207 SUnit
*ScheduleDAGFast::CopyAndMoveSuccessors(SUnit
*SU
) {
208 if (SU
->getNode()->getFlaggedNode())
211 SDNode
*N
= SU
->getNode();
216 bool TryUnfold
= false;
217 for (unsigned i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
218 EVT VT
= N
->getValueType(i
);
221 else if (VT
== MVT::Other
)
224 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
225 const SDValue
&Op
= N
->getOperand(i
);
226 EVT VT
= Op
.getNode()->getValueType(Op
.getResNo());
232 SmallVector
<SDNode
*, 2> NewNodes
;
233 if (!TII
->unfoldMemoryOperand(*DAG
, N
, NewNodes
))
236 DEBUG(dbgs() << "Unfolding SU # " << SU
->NodeNum
<< "\n");
237 assert(NewNodes
.size() == 2 && "Expected a load folding node!");
240 SDNode
*LoadNode
= NewNodes
[0];
241 unsigned NumVals
= N
->getNumValues();
242 unsigned OldNumVals
= SU
->getNode()->getNumValues();
243 for (unsigned i
= 0; i
!= NumVals
; ++i
)
244 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), i
), SDValue(N
, i
));
245 DAG
->ReplaceAllUsesOfValueWith(SDValue(SU
->getNode(), OldNumVals
-1),
246 SDValue(LoadNode
, 1));
248 SUnit
*NewSU
= NewSUnit(N
);
249 assert(N
->getNodeId() == -1 && "Node already inserted!");
250 N
->setNodeId(NewSU
->NodeNum
);
252 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
253 for (unsigned i
= 0; i
!= TID
.getNumOperands(); ++i
) {
254 if (TID
.getOperandConstraint(i
, TOI::TIED_TO
) != -1) {
255 NewSU
->isTwoAddress
= true;
259 if (TID
.isCommutable())
260 NewSU
->isCommutable
= true;
262 // LoadNode may already exist. This can happen when there is another
263 // load from the same location and producing the same type of value
264 // but it has different alignment or volatileness.
265 bool isNewLoad
= true;
267 if (LoadNode
->getNodeId() != -1) {
268 LoadSU
= &SUnits
[LoadNode
->getNodeId()];
271 LoadSU
= NewSUnit(LoadNode
);
272 LoadNode
->setNodeId(LoadSU
->NodeNum
);
276 SmallVector
<SDep
, 4> ChainSuccs
;
277 SmallVector
<SDep
, 4> LoadPreds
;
278 SmallVector
<SDep
, 4> NodePreds
;
279 SmallVector
<SDep
, 4> NodeSuccs
;
280 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
284 else if (I
->getSUnit()->getNode() &&
285 I
->getSUnit()->getNode()->isOperandOf(LoadNode
))
286 LoadPreds
.push_back(*I
);
288 NodePreds
.push_back(*I
);
290 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
293 ChainSuccs
.push_back(*I
);
295 NodeSuccs
.push_back(*I
);
298 if (ChainPred
.getSUnit()) {
299 RemovePred(SU
, ChainPred
);
301 AddPred(LoadSU
, ChainPred
);
303 for (unsigned i
= 0, e
= LoadPreds
.size(); i
!= e
; ++i
) {
304 const SDep
&Pred
= LoadPreds
[i
];
305 RemovePred(SU
, Pred
);
307 AddPred(LoadSU
, Pred
);
310 for (unsigned i
= 0, e
= NodePreds
.size(); i
!= e
; ++i
) {
311 const SDep
&Pred
= NodePreds
[i
];
312 RemovePred(SU
, Pred
);
313 AddPred(NewSU
, Pred
);
315 for (unsigned i
= 0, e
= NodeSuccs
.size(); i
!= e
; ++i
) {
316 SDep D
= NodeSuccs
[i
];
317 SUnit
*SuccDep
= D
.getSUnit();
319 RemovePred(SuccDep
, D
);
323 for (unsigned i
= 0, e
= ChainSuccs
.size(); i
!= e
; ++i
) {
324 SDep D
= ChainSuccs
[i
];
325 SUnit
*SuccDep
= D
.getSUnit();
327 RemovePred(SuccDep
, D
);
334 AddPred(NewSU
, SDep(LoadSU
, SDep::Order
, LoadSU
->Latency
));
339 if (NewSU
->NumSuccsLeft
== 0) {
340 NewSU
->isAvailable
= true;
346 DEBUG(dbgs() << "Duplicating SU # " << SU
->NodeNum
<< "\n");
349 // New SUnit has the exact same predecessors.
350 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
352 if (!I
->isArtificial())
355 // Only copy scheduled successors. Cut them from old node's successor
356 // list and move them over.
357 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
358 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
360 if (I
->isArtificial())
362 SUnit
*SuccSU
= I
->getSUnit();
363 if (SuccSU
->isScheduled
) {
368 DelDeps
.push_back(std::make_pair(SuccSU
, D
));
371 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
)
372 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
378 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
379 /// scheduled successors of the given SUnit to the last copy.
380 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit
*SU
, unsigned Reg
,
381 const TargetRegisterClass
*DestRC
,
382 const TargetRegisterClass
*SrcRC
,
383 SmallVector
<SUnit
*, 2> &Copies
) {
384 SUnit
*CopyFromSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
385 CopyFromSU
->CopySrcRC
= SrcRC
;
386 CopyFromSU
->CopyDstRC
= DestRC
;
388 SUnit
*CopyToSU
= NewSUnit(static_cast<SDNode
*>(NULL
));
389 CopyToSU
->CopySrcRC
= DestRC
;
390 CopyToSU
->CopyDstRC
= SrcRC
;
392 // Only copy scheduled successors. Cut them from old node's successor
393 // list and move them over.
394 SmallVector
<std::pair
<SUnit
*, SDep
>, 4> DelDeps
;
395 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
397 if (I
->isArtificial())
399 SUnit
*SuccSU
= I
->getSUnit();
400 if (SuccSU
->isScheduled
) {
402 D
.setSUnit(CopyToSU
);
404 DelDeps
.push_back(std::make_pair(SuccSU
, *I
));
407 for (unsigned i
= 0, e
= DelDeps
.size(); i
!= e
; ++i
) {
408 RemovePred(DelDeps
[i
].first
, DelDeps
[i
].second
);
411 AddPred(CopyFromSU
, SDep(SU
, SDep::Data
, SU
->Latency
, Reg
));
412 AddPred(CopyToSU
, SDep(CopyFromSU
, SDep::Data
, CopyFromSU
->Latency
, 0));
414 Copies
.push_back(CopyFromSU
);
415 Copies
.push_back(CopyToSU
);
420 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
421 /// definition of the specified node.
422 /// FIXME: Move to SelectionDAG?
423 static EVT
getPhysicalRegisterVT(SDNode
*N
, unsigned Reg
,
424 const TargetInstrInfo
*TII
) {
425 const TargetInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
426 assert(TID
.ImplicitDefs
&& "Physical reg def must be in implicit def list!");
427 unsigned NumRes
= TID
.getNumDefs();
428 for (const unsigned *ImpDef
= TID
.getImplicitDefs(); *ImpDef
; ++ImpDef
) {
433 return N
->getValueType(NumRes
);
436 /// CheckForLiveRegDef - Return true and update live register vector if the
437 /// specified register def of the specified SUnit clobbers any "live" registers.
438 static bool CheckForLiveRegDef(SUnit
*SU
, unsigned Reg
,
439 std::vector
<SUnit
*> &LiveRegDefs
,
440 SmallSet
<unsigned, 4> &RegAdded
,
441 SmallVector
<unsigned, 4> &LRegs
,
442 const TargetRegisterInfo
*TRI
) {
444 if (LiveRegDefs
[Reg
] && LiveRegDefs
[Reg
] != SU
) {
445 if (RegAdded
.insert(Reg
)) {
446 LRegs
.push_back(Reg
);
450 for (const unsigned *Alias
= TRI
->getAliasSet(Reg
); *Alias
; ++Alias
)
451 if (LiveRegDefs
[*Alias
] && LiveRegDefs
[*Alias
] != SU
) {
452 if (RegAdded
.insert(*Alias
)) {
453 LRegs
.push_back(*Alias
);
460 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
461 /// scheduling of the given node to satisfy live physical register dependencies.
462 /// If the specific node is the last one that's available to schedule, do
463 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
464 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit
*SU
,
465 SmallVector
<unsigned, 4> &LRegs
){
466 if (NumLiveRegs
== 0)
469 SmallSet
<unsigned, 4> RegAdded
;
470 // If this node would clobber any "live" register, then it's not ready.
471 for (SUnit::pred_iterator I
= SU
->Preds
.begin(), E
= SU
->Preds
.end();
473 if (I
->isAssignedRegDep()) {
474 CheckForLiveRegDef(I
->getSUnit(), I
->getReg(), LiveRegDefs
,
475 RegAdded
, LRegs
, TRI
);
479 for (SDNode
*Node
= SU
->getNode(); Node
; Node
= Node
->getFlaggedNode()) {
480 if (Node
->getOpcode() == ISD::INLINEASM
) {
481 // Inline asm can clobber physical defs.
482 unsigned NumOps
= Node
->getNumOperands();
483 if (Node
->getOperand(NumOps
-1).getValueType() == MVT::Flag
)
484 --NumOps
; // Ignore the flag operand.
486 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
488 cast
<ConstantSDNode
>(Node
->getOperand(i
))->getZExtValue();
489 unsigned NumVals
= InlineAsm::getNumOperandRegisters(Flags
);
491 ++i
; // Skip the ID value.
492 if (InlineAsm::isRegDefKind(Flags
) ||
493 InlineAsm::isRegDefEarlyClobberKind(Flags
)) {
494 // Check for def of register or earlyclobber register.
495 for (; NumVals
; --NumVals
, ++i
) {
496 unsigned Reg
= cast
<RegisterSDNode
>(Node
->getOperand(i
))->getReg();
497 if (TargetRegisterInfo::isPhysicalRegister(Reg
))
498 CheckForLiveRegDef(SU
, Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
505 if (!Node
->isMachineOpcode())
507 const TargetInstrDesc
&TID
= TII
->get(Node
->getMachineOpcode());
508 if (!TID
.ImplicitDefs
)
510 for (const unsigned *Reg
= TID
.ImplicitDefs
; *Reg
; ++Reg
) {
511 CheckForLiveRegDef(SU
, *Reg
, LiveRegDefs
, RegAdded
, LRegs
, TRI
);
514 return !LRegs
.empty();
518 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
520 void ScheduleDAGFast::ListScheduleBottomUp() {
521 unsigned CurCycle
= 0;
523 // Release any predecessors of the special Exit node.
524 ReleasePredecessors(&ExitSU
, CurCycle
);
526 // Add root to Available queue.
527 if (!SUnits
.empty()) {
528 SUnit
*RootSU
= &SUnits
[DAG
->getRoot().getNode()->getNodeId()];
529 assert(RootSU
->Succs
.empty() && "Graph root shouldn't have successors!");
530 RootSU
->isAvailable
= true;
531 AvailableQueue
.push(RootSU
);
534 // While Available queue is not empty, grab the node with the highest
535 // priority. If it is not ready put it back. Schedule the node.
536 SmallVector
<SUnit
*, 4> NotReady
;
537 DenseMap
<SUnit
*, SmallVector
<unsigned, 4> > LRegsMap
;
538 Sequence
.reserve(SUnits
.size());
539 while (!AvailableQueue
.empty()) {
540 bool Delayed
= false;
542 SUnit
*CurSU
= AvailableQueue
.pop();
544 SmallVector
<unsigned, 4> LRegs
;
545 if (!DelayForLiveRegsBottomUp(CurSU
, LRegs
))
548 LRegsMap
.insert(std::make_pair(CurSU
, LRegs
));
550 CurSU
->isPending
= true; // This SU is not in AvailableQueue right now.
551 NotReady
.push_back(CurSU
);
552 CurSU
= AvailableQueue
.pop();
555 // All candidates are delayed due to live physical reg dependencies.
556 // Try code duplication or inserting cross class copies
558 if (Delayed
&& !CurSU
) {
560 // Try duplicating the nodes that produces these
561 // "expensive to copy" values to break the dependency. In case even
562 // that doesn't work, insert cross class copies.
563 SUnit
*TrySU
= NotReady
[0];
564 SmallVector
<unsigned, 4> &LRegs
= LRegsMap
[TrySU
];
565 assert(LRegs
.size() == 1 && "Can't handle this yet!");
566 unsigned Reg
= LRegs
[0];
567 SUnit
*LRDef
= LiveRegDefs
[Reg
];
568 EVT VT
= getPhysicalRegisterVT(LRDef
->getNode(), Reg
, TII
);
569 const TargetRegisterClass
*RC
=
570 TRI
->getMinimalPhysRegClass(Reg
, VT
);
571 const TargetRegisterClass
*DestRC
= TRI
->getCrossCopyRegClass(RC
);
573 // If cross copy register class is null, then it must be possible copy
574 // the value directly. Do not try duplicate the def.
577 NewDef
= CopyAndMoveSuccessors(LRDef
);
581 // Issue copies, these can be expensive cross register class copies.
582 SmallVector
<SUnit
*, 2> Copies
;
583 InsertCopiesAndMoveSuccs(LRDef
, Reg
, DestRC
, RC
, Copies
);
584 DEBUG(dbgs() << "Adding an edge from SU # " << TrySU
->NodeNum
585 << " to SU #" << Copies
.front()->NodeNum
<< "\n");
586 AddPred(TrySU
, SDep(Copies
.front(), SDep::Order
, /*Latency=*/1,
587 /*Reg=*/0, /*isNormalMemory=*/false,
588 /*isMustAlias=*/false, /*isArtificial=*/true));
589 NewDef
= Copies
.back();
592 DEBUG(dbgs() << "Adding an edge from SU # " << NewDef
->NodeNum
593 << " to SU #" << TrySU
->NodeNum
<< "\n");
594 LiveRegDefs
[Reg
] = NewDef
;
595 AddPred(NewDef
, SDep(TrySU
, SDep::Order
, /*Latency=*/1,
596 /*Reg=*/0, /*isNormalMemory=*/false,
597 /*isMustAlias=*/false, /*isArtificial=*/true));
598 TrySU
->isAvailable
= false;
603 llvm_unreachable("Unable to resolve live physical register dependencies!");
607 // Add the nodes that aren't ready back onto the available list.
608 for (unsigned i
= 0, e
= NotReady
.size(); i
!= e
; ++i
) {
609 NotReady
[i
]->isPending
= false;
610 // May no longer be available due to backtracking.
611 if (NotReady
[i
]->isAvailable
)
612 AvailableQueue
.push(NotReady
[i
]);
617 ScheduleNodeBottomUp(CurSU
, CurCycle
);
621 // Reverse the order since it is bottom up.
622 std::reverse(Sequence
.begin(), Sequence
.end());
625 VerifySchedule(/*isBottomUp=*/true);
629 //===----------------------------------------------------------------------===//
630 // Public Constructor Functions
631 //===----------------------------------------------------------------------===//
633 llvm::ScheduleDAGSDNodes
*
634 llvm::createFastDAGScheduler(SelectionDAGISel
*IS
, CodeGenOpt::Level
) {
635 return new ScheduleDAGFast(*IS
->MF
);