1 //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file Implements the ScheduleDAG class, which is a base class used by
11 /// scheduling implementation classes.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/ScheduleDAG.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
21 #include "llvm/CodeGen/SelectionDAGNodes.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
39 #define DEBUG_TYPE "pre-RA-sched"
42 static cl::opt
<bool> StressSchedOpt(
43 "stress-sched", cl::Hidden
, cl::init(false),
44 cl::desc("Stress test instruction scheduling"));
47 void SchedulingPriorityQueue::anchor() {}
49 ScheduleDAG::ScheduleDAG(MachineFunction
&mf
)
50 : TM(mf
.getTarget()), TII(mf
.getSubtarget().getInstrInfo()),
51 TRI(mf
.getSubtarget().getRegisterInfo()), MF(mf
),
52 MRI(mf
.getRegInfo()) {
54 StressSched
= StressSchedOpt
;
58 ScheduleDAG::~ScheduleDAG() = default;
60 void ScheduleDAG::clearDAG() {
66 const MCInstrDesc
*ScheduleDAG::getNodeDesc(const SDNode
*Node
) const {
67 if (!Node
|| !Node
->isMachineOpcode()) return nullptr;
68 return &TII
->get(Node
->getMachineOpcode());
71 LLVM_DUMP_METHOD
void SDep::dump(const TargetRegisterInfo
*TRI
) const {
73 case Data
: dbgs() << "Data"; break;
74 case Anti
: dbgs() << "Anti"; break;
75 case Output
: dbgs() << "Out "; break;
76 case Order
: dbgs() << "Ord "; break;
81 dbgs() << " Latency=" << getLatency();
82 if (TRI
&& isAssignedRegDep())
83 dbgs() << " Reg=" << printReg(getReg(), TRI
);
87 dbgs() << " Latency=" << getLatency();
90 dbgs() << " Latency=" << getLatency();
91 switch(Contents
.OrdKind
) {
92 case Barrier
: dbgs() << " Barrier"; break;
94 case MustAliasMem
: dbgs() << " Memory"; break;
95 case Artificial
: dbgs() << " Artificial"; break;
96 case Weak
: dbgs() << " Weak"; break;
97 case Cluster
: dbgs() << " Cluster"; break;
103 bool SUnit::addPred(const SDep
&D
, bool Required
) {
104 // If this node already has this dependence, don't add a redundant one.
105 for (SDep
&PredDep
: Preds
) {
106 // Zero-latency weak edges may be added purely for heuristic ordering. Don't
107 // add them if another kind of edge already exists.
108 if (!Required
&& PredDep
.getSUnit() == D
.getSUnit())
110 if (PredDep
.overlaps(D
)) {
111 // Extend the latency if needed. Equivalent to
112 // removePred(PredDep) + addPred(D).
113 if (PredDep
.getLatency() < D
.getLatency()) {
114 SUnit
*PredSU
= PredDep
.getSUnit();
115 // Find the corresponding successor in N.
116 SDep ForwardD
= PredDep
;
117 ForwardD
.setSUnit(this);
118 for (SDep
&SuccDep
: PredSU
->Succs
) {
119 if (SuccDep
== ForwardD
) {
120 SuccDep
.setLatency(D
.getLatency());
124 PredDep
.setLatency(D
.getLatency());
129 // Now add a corresponding succ to N.
132 SUnit
*N
= D
.getSUnit();
133 // Update the bookkeeping.
134 if (D
.getKind() == SDep::Data
) {
135 assert(NumPreds
< std::numeric_limits
<unsigned>::max() &&
136 "NumPreds will overflow!");
137 assert(N
->NumSuccs
< std::numeric_limits
<unsigned>::max() &&
138 "NumSuccs will overflow!");
142 if (!N
->isScheduled
) {
147 assert(NumPredsLeft
< std::numeric_limits
<unsigned>::max() &&
148 "NumPredsLeft will overflow!");
157 assert(N
->NumSuccsLeft
< std::numeric_limits
<unsigned>::max() &&
158 "NumSuccsLeft will overflow!");
163 N
->Succs
.push_back(P
);
164 if (P
.getLatency() != 0) {
165 this->setDepthDirty();
171 void SUnit::removePred(const SDep
&D
) {
172 // Find the matching predecessor.
173 SmallVectorImpl
<SDep
>::iterator I
= llvm::find(Preds
, D
);
174 if (I
== Preds
.end())
176 // Find the corresponding successor in N.
179 SUnit
*N
= D
.getSUnit();
180 SmallVectorImpl
<SDep
>::iterator Succ
= llvm::find(N
->Succs
, P
);
181 assert(Succ
!= N
->Succs
.end() && "Mismatching preds / succs lists!");
182 N
->Succs
.erase(Succ
);
184 // Update the bookkeeping.
185 if (P
.getKind() == SDep::Data
) {
186 assert(NumPreds
> 0 && "NumPreds will underflow!");
187 assert(N
->NumSuccs
> 0 && "NumSuccs will underflow!");
191 if (!N
->isScheduled
) {
195 assert(NumPredsLeft
> 0 && "NumPredsLeft will underflow!");
203 assert(N
->NumSuccsLeft
> 0 && "NumSuccsLeft will underflow!");
207 if (P
.getLatency() != 0) {
208 this->setDepthDirty();
213 void SUnit::setDepthDirty() {
214 if (!isDepthCurrent
) return;
215 SmallVector
<SUnit
*, 8> WorkList
;
216 WorkList
.push_back(this);
218 SUnit
*SU
= WorkList
.pop_back_val();
219 SU
->isDepthCurrent
= false;
220 for (SDep
&SuccDep
: SU
->Succs
) {
221 SUnit
*SuccSU
= SuccDep
.getSUnit();
222 if (SuccSU
->isDepthCurrent
)
223 WorkList
.push_back(SuccSU
);
225 } while (!WorkList
.empty());
228 void SUnit::setHeightDirty() {
229 if (!isHeightCurrent
) return;
230 SmallVector
<SUnit
*, 8> WorkList
;
231 WorkList
.push_back(this);
233 SUnit
*SU
= WorkList
.pop_back_val();
234 SU
->isHeightCurrent
= false;
235 for (SDep
&PredDep
: SU
->Preds
) {
236 SUnit
*PredSU
= PredDep
.getSUnit();
237 if (PredSU
->isHeightCurrent
)
238 WorkList
.push_back(PredSU
);
240 } while (!WorkList
.empty());
243 void SUnit::setDepthToAtLeast(unsigned NewDepth
) {
244 if (NewDepth
<= getDepth())
248 isDepthCurrent
= true;
251 void SUnit::setHeightToAtLeast(unsigned NewHeight
) {
252 if (NewHeight
<= getHeight())
256 isHeightCurrent
= true;
259 /// Calculates the maximal path from the node to the exit.
260 void SUnit::ComputeDepth() {
261 SmallVector
<SUnit
*, 8> WorkList
;
262 WorkList
.push_back(this);
264 SUnit
*Cur
= WorkList
.back();
267 unsigned MaxPredDepth
= 0;
268 for (const SDep
&PredDep
: Cur
->Preds
) {
269 SUnit
*PredSU
= PredDep
.getSUnit();
270 if (PredSU
->isDepthCurrent
)
271 MaxPredDepth
= std::max(MaxPredDepth
,
272 PredSU
->Depth
+ PredDep
.getLatency());
275 WorkList
.push_back(PredSU
);
281 if (MaxPredDepth
!= Cur
->Depth
) {
282 Cur
->setDepthDirty();
283 Cur
->Depth
= MaxPredDepth
;
285 Cur
->isDepthCurrent
= true;
287 } while (!WorkList
.empty());
290 /// Calculates the maximal path from the node to the entry.
291 void SUnit::ComputeHeight() {
292 SmallVector
<SUnit
*, 8> WorkList
;
293 WorkList
.push_back(this);
295 SUnit
*Cur
= WorkList
.back();
298 unsigned MaxSuccHeight
= 0;
299 for (const SDep
&SuccDep
: Cur
->Succs
) {
300 SUnit
*SuccSU
= SuccDep
.getSUnit();
301 if (SuccSU
->isHeightCurrent
)
302 MaxSuccHeight
= std::max(MaxSuccHeight
,
303 SuccSU
->Height
+ SuccDep
.getLatency());
306 WorkList
.push_back(SuccSU
);
312 if (MaxSuccHeight
!= Cur
->Height
) {
313 Cur
->setHeightDirty();
314 Cur
->Height
= MaxSuccHeight
;
316 Cur
->isHeightCurrent
= true;
318 } while (!WorkList
.empty());
321 void SUnit::biasCriticalPath() {
325 SUnit::pred_iterator BestI
= Preds
.begin();
326 unsigned MaxDepth
= BestI
->getSUnit()->getDepth();
327 for (SUnit::pred_iterator I
= std::next(BestI
), E
= Preds
.end(); I
!= E
;
329 if (I
->getKind() == SDep::Data
&& I
->getSUnit()->getDepth() > MaxDepth
)
332 if (BestI
!= Preds
.begin())
333 std::swap(*Preds
.begin(), *BestI
);
336 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
337 LLVM_DUMP_METHOD
void SUnit::dumpAttributes() const {
338 dbgs() << " # preds left : " << NumPredsLeft
<< "\n";
339 dbgs() << " # succs left : " << NumSuccsLeft
<< "\n";
341 dbgs() << " # weak preds left : " << WeakPredsLeft
<< "\n";
343 dbgs() << " # weak succs left : " << WeakSuccsLeft
<< "\n";
344 dbgs() << " # rdefs left : " << NumRegDefsLeft
<< "\n";
345 dbgs() << " Latency : " << Latency
<< "\n";
346 dbgs() << " Depth : " << getDepth() << "\n";
347 dbgs() << " Height : " << getHeight() << "\n";
350 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeName(const SUnit
&SU
) const {
353 else if (&SU
== &ExitSU
)
356 dbgs() << "SU(" << SU
.NodeNum
<< ")";
359 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeAll(const SUnit
&SU
) const {
362 if (SU
.Preds
.size() > 0) {
363 dbgs() << " Predecessors:\n";
364 for (const SDep
&Dep
: SU
.Preds
) {
366 dumpNodeName(*Dep
.getSUnit());
372 if (SU
.Succs
.size() > 0) {
373 dbgs() << " Successors:\n";
374 for (const SDep
&Dep
: SU
.Succs
) {
376 dumpNodeName(*Dep
.getSUnit());
386 unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp
) {
387 bool AnyNotSched
= false;
388 unsigned DeadNodes
= 0;
389 for (const SUnit
&SUnit
: SUnits
) {
390 if (!SUnit
.isScheduled
) {
391 if (SUnit
.NumPreds
== 0 && SUnit
.NumSuccs
== 0) {
396 dbgs() << "*** Scheduling failed! ***\n";
398 dbgs() << "has not been scheduled!\n";
401 if (SUnit
.isScheduled
&&
402 (isBottomUp
? SUnit
.getHeight() : SUnit
.getDepth()) >
403 unsigned(std::numeric_limits
<int>::max())) {
405 dbgs() << "*** Scheduling failed! ***\n";
407 dbgs() << "has an unexpected "
408 << (isBottomUp
? "Height" : "Depth") << " value!\n";
412 if (SUnit
.NumSuccsLeft
!= 0) {
414 dbgs() << "*** Scheduling failed! ***\n";
416 dbgs() << "has successors left!\n";
420 if (SUnit
.NumPredsLeft
!= 0) {
422 dbgs() << "*** Scheduling failed! ***\n";
424 dbgs() << "has predecessors left!\n";
429 assert(!AnyNotSched
);
430 return SUnits
.size() - DeadNodes
;
434 void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
435 // The idea of the algorithm is taken from
436 // "Online algorithms for managing the topological order of
437 // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
438 // This is the MNR algorithm, which was first introduced by
439 // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
440 // "Maintaining a topological order under edge insertions".
442 // Short description of the algorithm:
444 // Topological ordering, ord, of a DAG maps each node to a topological
445 // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
447 // This means that if there is a path from the node X to the node Z,
448 // then ord(X) < ord(Z).
450 // This property can be used to check for reachability of nodes:
451 // if Z is reachable from X, then an insertion of the edge Z->X would
454 // The algorithm first computes a topological ordering for the DAG by
455 // initializing the Index2Node and Node2Index arrays and then tries to keep
456 // the ordering up-to-date after edge insertions by reordering the DAG.
458 // On insertion of the edge X->Y, the algorithm first marks by calling DFS
459 // the nodes reachable from Y, and then shifts them using Shift to lie
460 // immediately after X in Index2Node.
461 unsigned DAGSize
= SUnits
.size();
462 std::vector
<SUnit
*> WorkList
;
463 WorkList
.reserve(DAGSize
);
465 Index2Node
.resize(DAGSize
);
466 Node2Index
.resize(DAGSize
);
468 // Initialize the data structures.
470 WorkList
.push_back(ExitSU
);
471 for (SUnit
&SU
: SUnits
) {
472 int NodeNum
= SU
.NodeNum
;
473 unsigned Degree
= SU
.Succs
.size();
474 // Temporarily use the Node2Index array as scratch space for degree counts.
475 Node2Index
[NodeNum
] = Degree
;
477 // Is it a node without dependencies?
479 assert(SU
.Succs
.empty() && "SUnit should have no successors");
480 // Collect leaf nodes.
481 WorkList
.push_back(&SU
);
486 while (!WorkList
.empty()) {
487 SUnit
*SU
= WorkList
.back();
489 if (SU
->NodeNum
< DAGSize
)
490 Allocate(SU
->NodeNum
, --Id
);
491 for (const SDep
&PredDep
: SU
->Preds
) {
492 SUnit
*SU
= PredDep
.getSUnit();
493 if (SU
->NodeNum
< DAGSize
&& !--Node2Index
[SU
->NodeNum
])
494 // If all dependencies of the node are processed already,
495 // then the node can be computed now.
496 WorkList
.push_back(SU
);
500 Visited
.resize(DAGSize
);
503 // Check correctness of the ordering
504 for (SUnit
&SU
: SUnits
) {
505 for (const SDep
&PD
: SU
.Preds
) {
506 assert(Node2Index
[SU
.NodeNum
] > Node2Index
[PD
.getSUnit()->NodeNum
] &&
507 "Wrong topological sorting");
513 void ScheduleDAGTopologicalSort::AddPred(SUnit
*Y
, SUnit
*X
) {
514 int UpperBound
, LowerBound
;
515 LowerBound
= Node2Index
[Y
->NodeNum
];
516 UpperBound
= Node2Index
[X
->NodeNum
];
517 bool HasLoop
= false;
518 // Is Ord(X) < Ord(Y) ?
519 if (LowerBound
< UpperBound
) {
520 // Update the topological order.
522 DFS(Y
, UpperBound
, HasLoop
);
523 assert(!HasLoop
&& "Inserted edge creates a loop!");
524 // Recompute topological indexes.
525 Shift(Visited
, LowerBound
, UpperBound
);
529 void ScheduleDAGTopologicalSort::RemovePred(SUnit
*M
, SUnit
*N
) {
530 // InitDAGTopologicalSorting();
533 void ScheduleDAGTopologicalSort::DFS(const SUnit
*SU
, int UpperBound
,
535 std::vector
<const SUnit
*> WorkList
;
536 WorkList
.reserve(SUnits
.size());
538 WorkList
.push_back(SU
);
540 SU
= WorkList
.back();
542 Visited
.set(SU
->NodeNum
);
543 for (const SDep
&SuccDep
544 : make_range(SU
->Succs
.rbegin(), SU
->Succs
.rend())) {
545 unsigned s
= SuccDep
.getSUnit()->NodeNum
;
546 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
547 if (s
>= Node2Index
.size())
549 if (Node2Index
[s
] == UpperBound
) {
553 // Visit successors if not already and in affected region.
554 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
555 WorkList
.push_back(SuccDep
.getSUnit());
558 } while (!WorkList
.empty());
561 std::vector
<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit
&StartSU
,
562 const SUnit
&TargetSU
,
564 std::vector
<const SUnit
*> WorkList
;
565 int LowerBound
= Node2Index
[StartSU
.NodeNum
];
566 int UpperBound
= Node2Index
[TargetSU
.NodeNum
];
568 BitVector VisitedBack
;
569 std::vector
<int> Nodes
;
571 if (LowerBound
> UpperBound
) {
576 WorkList
.reserve(SUnits
.size());
579 // Starting from StartSU, visit all successors up
581 WorkList
.push_back(&StartSU
);
583 const SUnit
*SU
= WorkList
.back();
585 for (int I
= SU
->Succs
.size()-1; I
>= 0; --I
) {
586 const SUnit
*Succ
= SU
->Succs
[I
].getSUnit();
587 unsigned s
= Succ
->NodeNum
;
588 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
589 if (Succ
->isBoundaryNode())
591 if (Node2Index
[s
] == UpperBound
) {
595 // Visit successors if not already and in affected region.
596 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
598 WorkList
.push_back(Succ
);
601 } while (!WorkList
.empty());
609 VisitedBack
.resize(SUnits
.size());
612 // Starting from TargetSU, visit all predecessors up
613 // to LowerBound. SUs that are visited by the two
614 // passes are added to Nodes.
615 WorkList
.push_back(&TargetSU
);
617 const SUnit
*SU
= WorkList
.back();
619 for (int I
= SU
->Preds
.size()-1; I
>= 0; --I
) {
620 const SUnit
*Pred
= SU
->Preds
[I
].getSUnit();
621 unsigned s
= Pred
->NodeNum
;
622 // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
623 if (Pred
->isBoundaryNode())
625 if (Node2Index
[s
] == LowerBound
) {
629 if (!VisitedBack
.test(s
) && Visited
.test(s
)) {
631 WorkList
.push_back(Pred
);
635 } while (!WorkList
.empty());
637 assert(Found
&& "Error in SUnit Graph!");
642 void ScheduleDAGTopologicalSort::Shift(BitVector
& Visited
, int LowerBound
,
648 for (i
= LowerBound
; i
<= UpperBound
; ++i
) {
649 // w is node at topological index i.
650 int w
= Index2Node
[i
];
651 if (Visited
.test(w
)) {
657 Allocate(w
, i
- shift
);
661 for (unsigned LI
: L
) {
662 Allocate(LI
, i
- shift
);
667 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit
*TargetSU
, SUnit
*SU
) {
668 // Is SU reachable from TargetSU via successor edges?
669 if (IsReachable(SU
, TargetSU
))
671 for (const SDep
&PredDep
: TargetSU
->Preds
)
672 if (PredDep
.isAssignedRegDep() &&
673 IsReachable(SU
, PredDep
.getSUnit()))
678 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit
*SU
,
679 const SUnit
*TargetSU
) {
680 // If insertion of the edge SU->TargetSU would create a cycle
681 // then there is a path from TargetSU to SU.
682 int UpperBound
, LowerBound
;
683 LowerBound
= Node2Index
[TargetSU
->NodeNum
];
684 UpperBound
= Node2Index
[SU
->NodeNum
];
685 bool HasLoop
= false;
686 // Is Ord(TargetSU) < Ord(SU) ?
687 if (LowerBound
< UpperBound
) {
689 // There may be a path from TargetSU to SU. Check for it.
690 DFS(TargetSU
, UpperBound
, HasLoop
);
695 void ScheduleDAGTopologicalSort::Allocate(int n
, int index
) {
696 Node2Index
[n
] = index
;
697 Index2Node
[index
] = n
;
700 ScheduleDAGTopologicalSort::
701 ScheduleDAGTopologicalSort(std::vector
<SUnit
> &sunits
, SUnit
*exitsu
)
702 : SUnits(sunits
), ExitSU(exitsu
) {}
704 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;