1 //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file Implements the ScheduleDAG class, which is a base class used by
10 /// scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ScheduleDAG.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
20 #include "llvm/CodeGen/SelectionDAGNodes.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
38 #define DEBUG_TYPE "pre-RA-sched"
40 STATISTIC(NumNewPredsAdded
, "Number of times a single predecessor was added");
41 STATISTIC(NumTopoInits
,
42 "Number of times the topological order has been recomputed");
45 static cl::opt
<bool> StressSchedOpt(
46 "stress-sched", cl::Hidden
, cl::init(false),
47 cl::desc("Stress test instruction scheduling"));
50 void SchedulingPriorityQueue::anchor() {}
52 ScheduleDAG::ScheduleDAG(MachineFunction
&mf
)
53 : TM(mf
.getTarget()), TII(mf
.getSubtarget().getInstrInfo()),
54 TRI(mf
.getSubtarget().getRegisterInfo()), MF(mf
),
55 MRI(mf
.getRegInfo()) {
57 StressSched
= StressSchedOpt
;
61 ScheduleDAG::~ScheduleDAG() = default;
63 void ScheduleDAG::clearDAG() {
69 const MCInstrDesc
*ScheduleDAG::getNodeDesc(const SDNode
*Node
) const {
70 if (!Node
|| !Node
->isMachineOpcode()) return nullptr;
71 return &TII
->get(Node
->getMachineOpcode());
74 LLVM_DUMP_METHOD
void SDep::dump(const TargetRegisterInfo
*TRI
) const {
76 case Data
: dbgs() << "Data"; break;
77 case Anti
: dbgs() << "Anti"; break;
78 case Output
: dbgs() << "Out "; break;
79 case Order
: dbgs() << "Ord "; break;
84 dbgs() << " Latency=" << getLatency();
85 if (TRI
&& isAssignedRegDep())
86 dbgs() << " Reg=" << printReg(getReg(), TRI
);
90 dbgs() << " Latency=" << getLatency();
93 dbgs() << " Latency=" << getLatency();
94 switch(Contents
.OrdKind
) {
95 case Barrier
: dbgs() << " Barrier"; break;
97 case MustAliasMem
: dbgs() << " Memory"; break;
98 case Artificial
: dbgs() << " Artificial"; break;
99 case Weak
: dbgs() << " Weak"; break;
100 case Cluster
: dbgs() << " Cluster"; break;
106 bool SUnit::addPred(const SDep
&D
, bool Required
) {
107 // If this node already has this dependence, don't add a redundant one.
108 for (SDep
&PredDep
: Preds
) {
109 // Zero-latency weak edges may be added purely for heuristic ordering. Don't
110 // add them if another kind of edge already exists.
111 if (!Required
&& PredDep
.getSUnit() == D
.getSUnit())
113 if (PredDep
.overlaps(D
)) {
114 // Extend the latency if needed. Equivalent to
115 // removePred(PredDep) + addPred(D).
116 if (PredDep
.getLatency() < D
.getLatency()) {
117 SUnit
*PredSU
= PredDep
.getSUnit();
118 // Find the corresponding successor in N.
119 SDep ForwardD
= PredDep
;
120 ForwardD
.setSUnit(this);
121 for (SDep
&SuccDep
: PredSU
->Succs
) {
122 if (SuccDep
== ForwardD
) {
123 SuccDep
.setLatency(D
.getLatency());
127 PredDep
.setLatency(D
.getLatency());
132 // Now add a corresponding succ to N.
135 SUnit
*N
= D
.getSUnit();
136 // Update the bookkeeping.
137 if (D
.getKind() == SDep::Data
) {
138 assert(NumPreds
< std::numeric_limits
<unsigned>::max() &&
139 "NumPreds will overflow!");
140 assert(N
->NumSuccs
< std::numeric_limits
<unsigned>::max() &&
141 "NumSuccs will overflow!");
145 if (!N
->isScheduled
) {
150 assert(NumPredsLeft
< std::numeric_limits
<unsigned>::max() &&
151 "NumPredsLeft will overflow!");
160 assert(N
->NumSuccsLeft
< std::numeric_limits
<unsigned>::max() &&
161 "NumSuccsLeft will overflow!");
166 N
->Succs
.push_back(P
);
167 if (P
.getLatency() != 0) {
168 this->setDepthDirty();
174 void SUnit::removePred(const SDep
&D
) {
175 // Find the matching predecessor.
176 SmallVectorImpl
<SDep
>::iterator I
= llvm::find(Preds
, D
);
177 if (I
== Preds
.end())
179 // Find the corresponding successor in N.
182 SUnit
*N
= D
.getSUnit();
183 SmallVectorImpl
<SDep
>::iterator Succ
= llvm::find(N
->Succs
, P
);
184 assert(Succ
!= N
->Succs
.end() && "Mismatching preds / succs lists!");
185 // Update the bookkeeping.
186 if (P
.getKind() == SDep::Data
) {
187 assert(NumPreds
> 0 && "NumPreds will underflow!");
188 assert(N
->NumSuccs
> 0 && "NumSuccs will underflow!");
192 if (!N
->isScheduled
) {
194 assert(WeakPredsLeft
> 0 && "WeakPredsLeft will underflow!");
197 assert(NumPredsLeft
> 0 && "NumPredsLeft will underflow!");
203 assert(N
->WeakSuccsLeft
> 0 && "WeakSuccsLeft will underflow!");
206 assert(N
->NumSuccsLeft
> 0 && "NumSuccsLeft will underflow!");
210 N
->Succs
.erase(Succ
);
212 if (P
.getLatency() != 0) {
213 this->setDepthDirty();
218 void SUnit::setDepthDirty() {
219 if (!isDepthCurrent
) return;
220 SmallVector
<SUnit
*, 8> WorkList
;
221 WorkList
.push_back(this);
223 SUnit
*SU
= WorkList
.pop_back_val();
224 SU
->isDepthCurrent
= false;
225 for (SDep
&SuccDep
: SU
->Succs
) {
226 SUnit
*SuccSU
= SuccDep
.getSUnit();
227 if (SuccSU
->isDepthCurrent
)
228 WorkList
.push_back(SuccSU
);
230 } while (!WorkList
.empty());
233 void SUnit::setHeightDirty() {
234 if (!isHeightCurrent
) return;
235 SmallVector
<SUnit
*, 8> WorkList
;
236 WorkList
.push_back(this);
238 SUnit
*SU
= WorkList
.pop_back_val();
239 SU
->isHeightCurrent
= false;
240 for (SDep
&PredDep
: SU
->Preds
) {
241 SUnit
*PredSU
= PredDep
.getSUnit();
242 if (PredSU
->isHeightCurrent
)
243 WorkList
.push_back(PredSU
);
245 } while (!WorkList
.empty());
248 void SUnit::setDepthToAtLeast(unsigned NewDepth
) {
249 if (NewDepth
<= getDepth())
253 isDepthCurrent
= true;
256 void SUnit::setHeightToAtLeast(unsigned NewHeight
) {
257 if (NewHeight
<= getHeight())
261 isHeightCurrent
= true;
264 /// Calculates the maximal path from the node to the exit.
265 void SUnit::ComputeDepth() {
266 SmallVector
<SUnit
*, 8> WorkList
;
267 WorkList
.push_back(this);
269 SUnit
*Cur
= WorkList
.back();
272 unsigned MaxPredDepth
= 0;
273 for (const SDep
&PredDep
: Cur
->Preds
) {
274 SUnit
*PredSU
= PredDep
.getSUnit();
275 if (PredSU
->isDepthCurrent
)
276 MaxPredDepth
= std::max(MaxPredDepth
,
277 PredSU
->Depth
+ PredDep
.getLatency());
280 WorkList
.push_back(PredSU
);
286 if (MaxPredDepth
!= Cur
->Depth
) {
287 Cur
->setDepthDirty();
288 Cur
->Depth
= MaxPredDepth
;
290 Cur
->isDepthCurrent
= true;
292 } while (!WorkList
.empty());
295 /// Calculates the maximal path from the node to the entry.
296 void SUnit::ComputeHeight() {
297 SmallVector
<SUnit
*, 8> WorkList
;
298 WorkList
.push_back(this);
300 SUnit
*Cur
= WorkList
.back();
303 unsigned MaxSuccHeight
= 0;
304 for (const SDep
&SuccDep
: Cur
->Succs
) {
305 SUnit
*SuccSU
= SuccDep
.getSUnit();
306 if (SuccSU
->isHeightCurrent
)
307 MaxSuccHeight
= std::max(MaxSuccHeight
,
308 SuccSU
->Height
+ SuccDep
.getLatency());
311 WorkList
.push_back(SuccSU
);
317 if (MaxSuccHeight
!= Cur
->Height
) {
318 Cur
->setHeightDirty();
319 Cur
->Height
= MaxSuccHeight
;
321 Cur
->isHeightCurrent
= true;
323 } while (!WorkList
.empty());
326 void SUnit::biasCriticalPath() {
330 SUnit::pred_iterator BestI
= Preds
.begin();
331 unsigned MaxDepth
= BestI
->getSUnit()->getDepth();
332 for (SUnit::pred_iterator I
= std::next(BestI
), E
= Preds
.end(); I
!= E
;
334 if (I
->getKind() == SDep::Data
&& I
->getSUnit()->getDepth() > MaxDepth
)
337 if (BestI
!= Preds
.begin())
338 std::swap(*Preds
.begin(), *BestI
);
341 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
342 LLVM_DUMP_METHOD
void SUnit::dumpAttributes() const {
343 dbgs() << " # preds left : " << NumPredsLeft
<< "\n";
344 dbgs() << " # succs left : " << NumSuccsLeft
<< "\n";
346 dbgs() << " # weak preds left : " << WeakPredsLeft
<< "\n";
348 dbgs() << " # weak succs left : " << WeakSuccsLeft
<< "\n";
349 dbgs() << " # rdefs left : " << NumRegDefsLeft
<< "\n";
350 dbgs() << " Latency : " << Latency
<< "\n";
351 dbgs() << " Depth : " << getDepth() << "\n";
352 dbgs() << " Height : " << getHeight() << "\n";
355 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeName(const SUnit
&SU
) const {
358 else if (&SU
== &ExitSU
)
361 dbgs() << "SU(" << SU
.NodeNum
<< ")";
364 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeAll(const SUnit
&SU
) const {
367 if (SU
.Preds
.size() > 0) {
368 dbgs() << " Predecessors:\n";
369 for (const SDep
&Dep
: SU
.Preds
) {
371 dumpNodeName(*Dep
.getSUnit());
377 if (SU
.Succs
.size() > 0) {
378 dbgs() << " Successors:\n";
379 for (const SDep
&Dep
: SU
.Succs
) {
381 dumpNodeName(*Dep
.getSUnit());
391 unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp
) {
392 bool AnyNotSched
= false;
393 unsigned DeadNodes
= 0;
394 for (const SUnit
&SUnit
: SUnits
) {
395 if (!SUnit
.isScheduled
) {
396 if (SUnit
.NumPreds
== 0 && SUnit
.NumSuccs
== 0) {
401 dbgs() << "*** Scheduling failed! ***\n";
403 dbgs() << "has not been scheduled!\n";
406 if (SUnit
.isScheduled
&&
407 (isBottomUp
? SUnit
.getHeight() : SUnit
.getDepth()) >
408 unsigned(std::numeric_limits
<int>::max())) {
410 dbgs() << "*** Scheduling failed! ***\n";
412 dbgs() << "has an unexpected "
413 << (isBottomUp
? "Height" : "Depth") << " value!\n";
417 if (SUnit
.NumSuccsLeft
!= 0) {
419 dbgs() << "*** Scheduling failed! ***\n";
421 dbgs() << "has successors left!\n";
425 if (SUnit
.NumPredsLeft
!= 0) {
427 dbgs() << "*** Scheduling failed! ***\n";
429 dbgs() << "has predecessors left!\n";
434 assert(!AnyNotSched
);
435 return SUnits
.size() - DeadNodes
;
439 void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
440 // The idea of the algorithm is taken from
441 // "Online algorithms for managing the topological order of
442 // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
443 // This is the MNR algorithm, which was first introduced by
444 // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
445 // "Maintaining a topological order under edge insertions".
447 // Short description of the algorithm:
449 // Topological ordering, ord, of a DAG maps each node to a topological
450 // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
452 // This means that if there is a path from the node X to the node Z,
453 // then ord(X) < ord(Z).
455 // This property can be used to check for reachability of nodes:
456 // if Z is reachable from X, then an insertion of the edge Z->X would
459 // The algorithm first computes a topological ordering for the DAG by
460 // initializing the Index2Node and Node2Index arrays and then tries to keep
461 // the ordering up-to-date after edge insertions by reordering the DAG.
463 // On insertion of the edge X->Y, the algorithm first marks by calling DFS
464 // the nodes reachable from Y, and then shifts them using Shift to lie
465 // immediately after X in Index2Node.
467 // Cancel pending updates, mark as valid.
471 unsigned DAGSize
= SUnits
.size();
472 std::vector
<SUnit
*> WorkList
;
473 WorkList
.reserve(DAGSize
);
475 Index2Node
.resize(DAGSize
);
476 Node2Index
.resize(DAGSize
);
478 // Initialize the data structures.
480 WorkList
.push_back(ExitSU
);
481 for (SUnit
&SU
: SUnits
) {
482 int NodeNum
= SU
.NodeNum
;
483 unsigned Degree
= SU
.Succs
.size();
484 // Temporarily use the Node2Index array as scratch space for degree counts.
485 Node2Index
[NodeNum
] = Degree
;
487 // Is it a node without dependencies?
489 assert(SU
.Succs
.empty() && "SUnit should have no successors");
490 // Collect leaf nodes.
491 WorkList
.push_back(&SU
);
496 while (!WorkList
.empty()) {
497 SUnit
*SU
= WorkList
.back();
499 if (SU
->NodeNum
< DAGSize
)
500 Allocate(SU
->NodeNum
, --Id
);
501 for (const SDep
&PredDep
: SU
->Preds
) {
502 SUnit
*SU
= PredDep
.getSUnit();
503 if (SU
->NodeNum
< DAGSize
&& !--Node2Index
[SU
->NodeNum
])
504 // If all dependencies of the node are processed already,
505 // then the node can be computed now.
506 WorkList
.push_back(SU
);
510 Visited
.resize(DAGSize
);
514 // Check correctness of the ordering
515 for (SUnit
&SU
: SUnits
) {
516 for (const SDep
&PD
: SU
.Preds
) {
517 assert(Node2Index
[SU
.NodeNum
] > Node2Index
[PD
.getSUnit()->NodeNum
] &&
518 "Wrong topological sorting");
524 void ScheduleDAGTopologicalSort::FixOrder() {
525 // Recompute from scratch after new nodes have been added.
527 InitDAGTopologicalSorting();
531 // Otherwise apply updates one-by-one.
532 for (auto &U
: Updates
)
533 AddPred(U
.first
, U
.second
);
537 void ScheduleDAGTopologicalSort::AddPredQueued(SUnit
*Y
, SUnit
*X
) {
538 // Recomputing the order from scratch is likely more efficient than applying
539 // updates one-by-one for too many updates. The current cut-off is arbitrarily
541 Dirty
= Dirty
|| Updates
.size() > 10;
546 Updates
.emplace_back(Y
, X
);
549 void ScheduleDAGTopologicalSort::AddPred(SUnit
*Y
, SUnit
*X
) {
550 int UpperBound
, LowerBound
;
551 LowerBound
= Node2Index
[Y
->NodeNum
];
552 UpperBound
= Node2Index
[X
->NodeNum
];
553 bool HasLoop
= false;
554 // Is Ord(X) < Ord(Y) ?
555 if (LowerBound
< UpperBound
) {
556 // Update the topological order.
558 DFS(Y
, UpperBound
, HasLoop
);
559 assert(!HasLoop
&& "Inserted edge creates a loop!");
560 // Recompute topological indexes.
561 Shift(Visited
, LowerBound
, UpperBound
);
567 void ScheduleDAGTopologicalSort::RemovePred(SUnit
*M
, SUnit
*N
) {
568 // InitDAGTopologicalSorting();
571 void ScheduleDAGTopologicalSort::DFS(const SUnit
*SU
, int UpperBound
,
573 std::vector
<const SUnit
*> WorkList
;
574 WorkList
.reserve(SUnits
.size());
576 WorkList
.push_back(SU
);
578 SU
= WorkList
.back();
580 Visited
.set(SU
->NodeNum
);
581 for (const SDep
&SuccDep
: llvm::reverse(SU
->Succs
)) {
582 unsigned s
= SuccDep
.getSUnit()->NodeNum
;
583 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
584 if (s
>= Node2Index
.size())
586 if (Node2Index
[s
] == UpperBound
) {
590 // Visit successors if not already and in affected region.
591 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
592 WorkList
.push_back(SuccDep
.getSUnit());
595 } while (!WorkList
.empty());
598 std::vector
<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit
&StartSU
,
599 const SUnit
&TargetSU
,
601 std::vector
<const SUnit
*> WorkList
;
602 int LowerBound
= Node2Index
[StartSU
.NodeNum
];
603 int UpperBound
= Node2Index
[TargetSU
.NodeNum
];
605 BitVector VisitedBack
;
606 std::vector
<int> Nodes
;
608 if (LowerBound
> UpperBound
) {
613 WorkList
.reserve(SUnits
.size());
616 // Starting from StartSU, visit all successors up
618 WorkList
.push_back(&StartSU
);
620 const SUnit
*SU
= WorkList
.back();
622 for (const SDep
&SD
: llvm::reverse(SU
->Succs
)) {
623 const SUnit
*Succ
= SD
.getSUnit();
624 unsigned s
= Succ
->NodeNum
;
625 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
626 if (Succ
->isBoundaryNode())
628 if (Node2Index
[s
] == UpperBound
) {
632 // Visit successors if not already and in affected region.
633 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
635 WorkList
.push_back(Succ
);
638 } while (!WorkList
.empty());
646 VisitedBack
.resize(SUnits
.size());
649 // Starting from TargetSU, visit all predecessors up
650 // to LowerBound. SUs that are visited by the two
651 // passes are added to Nodes.
652 WorkList
.push_back(&TargetSU
);
654 const SUnit
*SU
= WorkList
.back();
656 for (const SDep
&SD
: llvm::reverse(SU
->Preds
)) {
657 const SUnit
*Pred
= SD
.getSUnit();
658 unsigned s
= Pred
->NodeNum
;
659 // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
660 if (Pred
->isBoundaryNode())
662 if (Node2Index
[s
] == LowerBound
) {
666 if (!VisitedBack
.test(s
) && Visited
.test(s
)) {
668 WorkList
.push_back(Pred
);
672 } while (!WorkList
.empty());
674 assert(Found
&& "Error in SUnit Graph!");
679 void ScheduleDAGTopologicalSort::Shift(BitVector
& Visited
, int LowerBound
,
685 for (i
= LowerBound
; i
<= UpperBound
; ++i
) {
686 // w is node at topological index i.
687 int w
= Index2Node
[i
];
688 if (Visited
.test(w
)) {
694 Allocate(w
, i
- shift
);
698 for (unsigned LI
: L
) {
699 Allocate(LI
, i
- shift
);
704 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit
*TargetSU
, SUnit
*SU
) {
706 // Is SU reachable from TargetSU via successor edges?
707 if (IsReachable(SU
, TargetSU
))
709 for (const SDep
&PredDep
: TargetSU
->Preds
)
710 if (PredDep
.isAssignedRegDep() &&
711 IsReachable(SU
, PredDep
.getSUnit()))
716 void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit
*SU
) {
717 assert(SU
->NodeNum
== Index2Node
.size() && "Node cannot be added at the end");
718 assert(SU
->NumPreds
== 0 && "Can only add SU's with no predecessors");
719 Node2Index
.push_back(Index2Node
.size());
720 Index2Node
.push_back(SU
->NodeNum
);
721 Visited
.resize(Node2Index
.size());
724 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit
*SU
,
725 const SUnit
*TargetSU
) {
726 assert(TargetSU
!= nullptr && "Invalid target SUnit");
727 assert(SU
!= nullptr && "Invalid SUnit");
729 // If insertion of the edge SU->TargetSU would create a cycle
730 // then there is a path from TargetSU to SU.
731 int UpperBound
, LowerBound
;
732 LowerBound
= Node2Index
[TargetSU
->NodeNum
];
733 UpperBound
= Node2Index
[SU
->NodeNum
];
734 bool HasLoop
= false;
735 // Is Ord(TargetSU) < Ord(SU) ?
736 if (LowerBound
< UpperBound
) {
738 // There may be a path from TargetSU to SU. Check for it.
739 DFS(TargetSU
, UpperBound
, HasLoop
);
744 void ScheduleDAGTopologicalSort::Allocate(int n
, int index
) {
745 Node2Index
[n
] = index
;
746 Index2Node
[index
] = n
;
749 ScheduleDAGTopologicalSort::
750 ScheduleDAGTopologicalSort(std::vector
<SUnit
> &sunits
, SUnit
*exitsu
)
751 : SUnits(sunits
), ExitSU(exitsu
) {}
753 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;