1 //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file Implements the ScheduleDAG class, which is a base class used by
10 /// scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ScheduleDAG.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
21 #include "llvm/CodeGen/SelectionDAGNodes.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
39 #define DEBUG_TYPE "pre-RA-sched"
41 STATISTIC(NumNewPredsAdded
, "Number of times a single predecessor was added");
42 STATISTIC(NumTopoInits
,
43 "Number of times the topological order has been recomputed");
46 static cl::opt
<bool> StressSchedOpt(
47 "stress-sched", cl::Hidden
, cl::init(false),
48 cl::desc("Stress test instruction scheduling"));
51 void SchedulingPriorityQueue::anchor() {}
53 ScheduleDAG::ScheduleDAG(MachineFunction
&mf
)
54 : TM(mf
.getTarget()), TII(mf
.getSubtarget().getInstrInfo()),
55 TRI(mf
.getSubtarget().getRegisterInfo()), MF(mf
),
56 MRI(mf
.getRegInfo()) {
58 StressSched
= StressSchedOpt
;
62 ScheduleDAG::~ScheduleDAG() = default;
64 void ScheduleDAG::clearDAG() {
70 const MCInstrDesc
*ScheduleDAG::getNodeDesc(const SDNode
*Node
) const {
71 if (!Node
|| !Node
->isMachineOpcode()) return nullptr;
72 return &TII
->get(Node
->getMachineOpcode());
75 LLVM_DUMP_METHOD
void SDep::dump(const TargetRegisterInfo
*TRI
) const {
77 case Data
: dbgs() << "Data"; break;
78 case Anti
: dbgs() << "Anti"; break;
79 case Output
: dbgs() << "Out "; break;
80 case Order
: dbgs() << "Ord "; break;
85 dbgs() << " Latency=" << getLatency();
86 if (TRI
&& isAssignedRegDep())
87 dbgs() << " Reg=" << printReg(getReg(), TRI
);
91 dbgs() << " Latency=" << getLatency();
94 dbgs() << " Latency=" << getLatency();
95 switch(Contents
.OrdKind
) {
96 case Barrier
: dbgs() << " Barrier"; break;
98 case MustAliasMem
: dbgs() << " Memory"; break;
99 case Artificial
: dbgs() << " Artificial"; break;
100 case Weak
: dbgs() << " Weak"; break;
101 case Cluster
: dbgs() << " Cluster"; break;
107 bool SUnit::addPred(const SDep
&D
, bool Required
) {
108 // If this node already has this dependence, don't add a redundant one.
109 for (SDep
&PredDep
: Preds
) {
110 // Zero-latency weak edges may be added purely for heuristic ordering. Don't
111 // add them if another kind of edge already exists.
112 if (!Required
&& PredDep
.getSUnit() == D
.getSUnit())
114 if (PredDep
.overlaps(D
)) {
115 // Extend the latency if needed. Equivalent to
116 // removePred(PredDep) + addPred(D).
117 if (PredDep
.getLatency() < D
.getLatency()) {
118 SUnit
*PredSU
= PredDep
.getSUnit();
119 // Find the corresponding successor in N.
120 SDep ForwardD
= PredDep
;
121 ForwardD
.setSUnit(this);
122 for (SDep
&SuccDep
: PredSU
->Succs
) {
123 if (SuccDep
== ForwardD
) {
124 SuccDep
.setLatency(D
.getLatency());
128 PredDep
.setLatency(D
.getLatency());
133 // Now add a corresponding succ to N.
136 SUnit
*N
= D
.getSUnit();
137 // Update the bookkeeping.
138 if (D
.getKind() == SDep::Data
) {
139 assert(NumPreds
< std::numeric_limits
<unsigned>::max() &&
140 "NumPreds will overflow!");
141 assert(N
->NumSuccs
< std::numeric_limits
<unsigned>::max() &&
142 "NumSuccs will overflow!");
146 if (!N
->isScheduled
) {
151 assert(NumPredsLeft
< std::numeric_limits
<unsigned>::max() &&
152 "NumPredsLeft will overflow!");
161 assert(N
->NumSuccsLeft
< std::numeric_limits
<unsigned>::max() &&
162 "NumSuccsLeft will overflow!");
167 N
->Succs
.push_back(P
);
168 if (P
.getLatency() != 0) {
169 this->setDepthDirty();
175 void SUnit::removePred(const SDep
&D
) {
176 // Find the matching predecessor.
177 SmallVectorImpl
<SDep
>::iterator I
= llvm::find(Preds
, D
);
178 if (I
== Preds
.end())
180 // Find the corresponding successor in N.
183 SUnit
*N
= D
.getSUnit();
184 SmallVectorImpl
<SDep
>::iterator Succ
= llvm::find(N
->Succs
, P
);
185 assert(Succ
!= N
->Succs
.end() && "Mismatching preds / succs lists!");
186 N
->Succs
.erase(Succ
);
188 // Update the bookkeeping.
189 if (P
.getKind() == SDep::Data
) {
190 assert(NumPreds
> 0 && "NumPreds will underflow!");
191 assert(N
->NumSuccs
> 0 && "NumSuccs will underflow!");
195 if (!N
->isScheduled
) {
199 assert(NumPredsLeft
> 0 && "NumPredsLeft will underflow!");
207 assert(N
->NumSuccsLeft
> 0 && "NumSuccsLeft will underflow!");
211 if (P
.getLatency() != 0) {
212 this->setDepthDirty();
217 void SUnit::setDepthDirty() {
218 if (!isDepthCurrent
) return;
219 SmallVector
<SUnit
*, 8> WorkList
;
220 WorkList
.push_back(this);
222 SUnit
*SU
= WorkList
.pop_back_val();
223 SU
->isDepthCurrent
= false;
224 for (SDep
&SuccDep
: SU
->Succs
) {
225 SUnit
*SuccSU
= SuccDep
.getSUnit();
226 if (SuccSU
->isDepthCurrent
)
227 WorkList
.push_back(SuccSU
);
229 } while (!WorkList
.empty());
232 void SUnit::setHeightDirty() {
233 if (!isHeightCurrent
) return;
234 SmallVector
<SUnit
*, 8> WorkList
;
235 WorkList
.push_back(this);
237 SUnit
*SU
= WorkList
.pop_back_val();
238 SU
->isHeightCurrent
= false;
239 for (SDep
&PredDep
: SU
->Preds
) {
240 SUnit
*PredSU
= PredDep
.getSUnit();
241 if (PredSU
->isHeightCurrent
)
242 WorkList
.push_back(PredSU
);
244 } while (!WorkList
.empty());
247 void SUnit::setDepthToAtLeast(unsigned NewDepth
) {
248 if (NewDepth
<= getDepth())
252 isDepthCurrent
= true;
255 void SUnit::setHeightToAtLeast(unsigned NewHeight
) {
256 if (NewHeight
<= getHeight())
260 isHeightCurrent
= true;
263 /// Calculates the maximal path from the node to the exit.
264 void SUnit::ComputeDepth() {
265 SmallVector
<SUnit
*, 8> WorkList
;
266 WorkList
.push_back(this);
268 SUnit
*Cur
= WorkList
.back();
271 unsigned MaxPredDepth
= 0;
272 for (const SDep
&PredDep
: Cur
->Preds
) {
273 SUnit
*PredSU
= PredDep
.getSUnit();
274 if (PredSU
->isDepthCurrent
)
275 MaxPredDepth
= std::max(MaxPredDepth
,
276 PredSU
->Depth
+ PredDep
.getLatency());
279 WorkList
.push_back(PredSU
);
285 if (MaxPredDepth
!= Cur
->Depth
) {
286 Cur
->setDepthDirty();
287 Cur
->Depth
= MaxPredDepth
;
289 Cur
->isDepthCurrent
= true;
291 } while (!WorkList
.empty());
294 /// Calculates the maximal path from the node to the entry.
295 void SUnit::ComputeHeight() {
296 SmallVector
<SUnit
*, 8> WorkList
;
297 WorkList
.push_back(this);
299 SUnit
*Cur
= WorkList
.back();
302 unsigned MaxSuccHeight
= 0;
303 for (const SDep
&SuccDep
: Cur
->Succs
) {
304 SUnit
*SuccSU
= SuccDep
.getSUnit();
305 if (SuccSU
->isHeightCurrent
)
306 MaxSuccHeight
= std::max(MaxSuccHeight
,
307 SuccSU
->Height
+ SuccDep
.getLatency());
310 WorkList
.push_back(SuccSU
);
316 if (MaxSuccHeight
!= Cur
->Height
) {
317 Cur
->setHeightDirty();
318 Cur
->Height
= MaxSuccHeight
;
320 Cur
->isHeightCurrent
= true;
322 } while (!WorkList
.empty());
325 void SUnit::biasCriticalPath() {
329 SUnit::pred_iterator BestI
= Preds
.begin();
330 unsigned MaxDepth
= BestI
->getSUnit()->getDepth();
331 for (SUnit::pred_iterator I
= std::next(BestI
), E
= Preds
.end(); I
!= E
;
333 if (I
->getKind() == SDep::Data
&& I
->getSUnit()->getDepth() > MaxDepth
)
336 if (BestI
!= Preds
.begin())
337 std::swap(*Preds
.begin(), *BestI
);
340 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
341 LLVM_DUMP_METHOD
void SUnit::dumpAttributes() const {
342 dbgs() << " # preds left : " << NumPredsLeft
<< "\n";
343 dbgs() << " # succs left : " << NumSuccsLeft
<< "\n";
345 dbgs() << " # weak preds left : " << WeakPredsLeft
<< "\n";
347 dbgs() << " # weak succs left : " << WeakSuccsLeft
<< "\n";
348 dbgs() << " # rdefs left : " << NumRegDefsLeft
<< "\n";
349 dbgs() << " Latency : " << Latency
<< "\n";
350 dbgs() << " Depth : " << getDepth() << "\n";
351 dbgs() << " Height : " << getHeight() << "\n";
354 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeName(const SUnit
&SU
) const {
357 else if (&SU
== &ExitSU
)
360 dbgs() << "SU(" << SU
.NodeNum
<< ")";
363 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeAll(const SUnit
&SU
) const {
366 if (SU
.Preds
.size() > 0) {
367 dbgs() << " Predecessors:\n";
368 for (const SDep
&Dep
: SU
.Preds
) {
370 dumpNodeName(*Dep
.getSUnit());
376 if (SU
.Succs
.size() > 0) {
377 dbgs() << " Successors:\n";
378 for (const SDep
&Dep
: SU
.Succs
) {
380 dumpNodeName(*Dep
.getSUnit());
390 unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp
) {
391 bool AnyNotSched
= false;
392 unsigned DeadNodes
= 0;
393 for (const SUnit
&SUnit
: SUnits
) {
394 if (!SUnit
.isScheduled
) {
395 if (SUnit
.NumPreds
== 0 && SUnit
.NumSuccs
== 0) {
400 dbgs() << "*** Scheduling failed! ***\n";
402 dbgs() << "has not been scheduled!\n";
405 if (SUnit
.isScheduled
&&
406 (isBottomUp
? SUnit
.getHeight() : SUnit
.getDepth()) >
407 unsigned(std::numeric_limits
<int>::max())) {
409 dbgs() << "*** Scheduling failed! ***\n";
411 dbgs() << "has an unexpected "
412 << (isBottomUp
? "Height" : "Depth") << " value!\n";
416 if (SUnit
.NumSuccsLeft
!= 0) {
418 dbgs() << "*** Scheduling failed! ***\n";
420 dbgs() << "has successors left!\n";
424 if (SUnit
.NumPredsLeft
!= 0) {
426 dbgs() << "*** Scheduling failed! ***\n";
428 dbgs() << "has predecessors left!\n";
433 assert(!AnyNotSched
);
434 return SUnits
.size() - DeadNodes
;
438 void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
439 // The idea of the algorithm is taken from
440 // "Online algorithms for managing the topological order of
441 // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
442 // This is the MNR algorithm, which was first introduced by
443 // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
444 // "Maintaining a topological order under edge insertions".
446 // Short description of the algorithm:
448 // Topological ordering, ord, of a DAG maps each node to a topological
449 // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
451 // This means that if there is a path from the node X to the node Z,
452 // then ord(X) < ord(Z).
454 // This property can be used to check for reachability of nodes:
455 // if Z is reachable from X, then an insertion of the edge Z->X would
458 // The algorithm first computes a topological ordering for the DAG by
459 // initializing the Index2Node and Node2Index arrays and then tries to keep
460 // the ordering up-to-date after edge insertions by reordering the DAG.
462 // On insertion of the edge X->Y, the algorithm first marks by calling DFS
463 // the nodes reachable from Y, and then shifts them using Shift to lie
464 // immediately after X in Index2Node.
466 // Cancel pending updates, mark as valid.
470 unsigned DAGSize
= SUnits
.size();
471 std::vector
<SUnit
*> WorkList
;
472 WorkList
.reserve(DAGSize
);
474 Index2Node
.resize(DAGSize
);
475 Node2Index
.resize(DAGSize
);
477 // Initialize the data structures.
479 WorkList
.push_back(ExitSU
);
480 for (SUnit
&SU
: SUnits
) {
481 int NodeNum
= SU
.NodeNum
;
482 unsigned Degree
= SU
.Succs
.size();
483 // Temporarily use the Node2Index array as scratch space for degree counts.
484 Node2Index
[NodeNum
] = Degree
;
486 // Is it a node without dependencies?
488 assert(SU
.Succs
.empty() && "SUnit should have no successors");
489 // Collect leaf nodes.
490 WorkList
.push_back(&SU
);
495 while (!WorkList
.empty()) {
496 SUnit
*SU
= WorkList
.back();
498 if (SU
->NodeNum
< DAGSize
)
499 Allocate(SU
->NodeNum
, --Id
);
500 for (const SDep
&PredDep
: SU
->Preds
) {
501 SUnit
*SU
= PredDep
.getSUnit();
502 if (SU
->NodeNum
< DAGSize
&& !--Node2Index
[SU
->NodeNum
])
503 // If all dependencies of the node are processed already,
504 // then the node can be computed now.
505 WorkList
.push_back(SU
);
509 Visited
.resize(DAGSize
);
513 // Check correctness of the ordering
514 for (SUnit
&SU
: SUnits
) {
515 for (const SDep
&PD
: SU
.Preds
) {
516 assert(Node2Index
[SU
.NodeNum
] > Node2Index
[PD
.getSUnit()->NodeNum
] &&
517 "Wrong topological sorting");
523 void ScheduleDAGTopologicalSort::FixOrder() {
524 // Recompute from scratch after new nodes have been added.
526 InitDAGTopologicalSorting();
530 // Otherwise apply updates one-by-one.
531 for (auto &U
: Updates
)
532 AddPred(U
.first
, U
.second
);
536 void ScheduleDAGTopologicalSort::AddPredQueued(SUnit
*Y
, SUnit
*X
) {
537 // Recomputing the order from scratch is likely more efficient than applying
538 // updates one-by-one for too many updates. The current cut-off is arbitrarily
540 Dirty
= Dirty
|| Updates
.size() > 10;
545 Updates
.emplace_back(Y
, X
);
548 void ScheduleDAGTopologicalSort::AddPred(SUnit
*Y
, SUnit
*X
) {
549 int UpperBound
, LowerBound
;
550 LowerBound
= Node2Index
[Y
->NodeNum
];
551 UpperBound
= Node2Index
[X
->NodeNum
];
552 bool HasLoop
= false;
553 // Is Ord(X) < Ord(Y) ?
554 if (LowerBound
< UpperBound
) {
555 // Update the topological order.
557 DFS(Y
, UpperBound
, HasLoop
);
558 assert(!HasLoop
&& "Inserted edge creates a loop!");
559 // Recompute topological indexes.
560 Shift(Visited
, LowerBound
, UpperBound
);
566 void ScheduleDAGTopologicalSort::RemovePred(SUnit
*M
, SUnit
*N
) {
567 // InitDAGTopologicalSorting();
570 void ScheduleDAGTopologicalSort::DFS(const SUnit
*SU
, int UpperBound
,
572 std::vector
<const SUnit
*> WorkList
;
573 WorkList
.reserve(SUnits
.size());
575 WorkList
.push_back(SU
);
577 SU
= WorkList
.back();
579 Visited
.set(SU
->NodeNum
);
580 for (const SDep
&SuccDep
: llvm::reverse(SU
->Succs
)) {
581 unsigned s
= SuccDep
.getSUnit()->NodeNum
;
582 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
583 if (s
>= Node2Index
.size())
585 if (Node2Index
[s
] == UpperBound
) {
589 // Visit successors if not already and in affected region.
590 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
591 WorkList
.push_back(SuccDep
.getSUnit());
594 } while (!WorkList
.empty());
597 std::vector
<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit
&StartSU
,
598 const SUnit
&TargetSU
,
600 std::vector
<const SUnit
*> WorkList
;
601 int LowerBound
= Node2Index
[StartSU
.NodeNum
];
602 int UpperBound
= Node2Index
[TargetSU
.NodeNum
];
604 BitVector VisitedBack
;
605 std::vector
<int> Nodes
;
607 if (LowerBound
> UpperBound
) {
612 WorkList
.reserve(SUnits
.size());
615 // Starting from StartSU, visit all successors up
617 WorkList
.push_back(&StartSU
);
619 const SUnit
*SU
= WorkList
.back();
621 for (const SDep
&SD
: llvm::reverse(SU
->Succs
)) {
622 const SUnit
*Succ
= SD
.getSUnit();
623 unsigned s
= Succ
->NodeNum
;
624 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
625 if (Succ
->isBoundaryNode())
627 if (Node2Index
[s
] == UpperBound
) {
631 // Visit successors if not already and in affected region.
632 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
634 WorkList
.push_back(Succ
);
637 } while (!WorkList
.empty());
645 VisitedBack
.resize(SUnits
.size());
648 // Starting from TargetSU, visit all predecessors up
649 // to LowerBound. SUs that are visited by the two
650 // passes are added to Nodes.
651 WorkList
.push_back(&TargetSU
);
653 const SUnit
*SU
= WorkList
.back();
655 for (const SDep
&SD
: llvm::reverse(SU
->Preds
)) {
656 const SUnit
*Pred
= SD
.getSUnit();
657 unsigned s
= Pred
->NodeNum
;
658 // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
659 if (Pred
->isBoundaryNode())
661 if (Node2Index
[s
] == LowerBound
) {
665 if (!VisitedBack
.test(s
) && Visited
.test(s
)) {
667 WorkList
.push_back(Pred
);
671 } while (!WorkList
.empty());
673 assert(Found
&& "Error in SUnit Graph!");
678 void ScheduleDAGTopologicalSort::Shift(BitVector
& Visited
, int LowerBound
,
684 for (i
= LowerBound
; i
<= UpperBound
; ++i
) {
685 // w is node at topological index i.
686 int w
= Index2Node
[i
];
687 if (Visited
.test(w
)) {
693 Allocate(w
, i
- shift
);
697 for (unsigned LI
: L
) {
698 Allocate(LI
, i
- shift
);
703 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit
*TargetSU
, SUnit
*SU
) {
705 // Is SU reachable from TargetSU via successor edges?
706 if (IsReachable(SU
, TargetSU
))
708 for (const SDep
&PredDep
: TargetSU
->Preds
)
709 if (PredDep
.isAssignedRegDep() &&
710 IsReachable(SU
, PredDep
.getSUnit()))
715 void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit
*SU
) {
716 assert(SU
->NodeNum
== Index2Node
.size() && "Node cannot be added at the end");
717 assert(SU
->NumPreds
== 0 && "Can only add SU's with no predecessors");
718 Node2Index
.push_back(Index2Node
.size());
719 Index2Node
.push_back(SU
->NodeNum
);
720 Visited
.resize(Node2Index
.size());
723 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit
*SU
,
724 const SUnit
*TargetSU
) {
726 // If insertion of the edge SU->TargetSU would create a cycle
727 // then there is a path from TargetSU to SU.
728 int UpperBound
, LowerBound
;
729 LowerBound
= Node2Index
[TargetSU
->NodeNum
];
730 UpperBound
= Node2Index
[SU
->NodeNum
];
731 bool HasLoop
= false;
732 // Is Ord(TargetSU) < Ord(SU) ?
733 if (LowerBound
< UpperBound
) {
735 // There may be a path from TargetSU to SU. Check for it.
736 DFS(TargetSU
, UpperBound
, HasLoop
);
741 void ScheduleDAGTopologicalSort::Allocate(int n
, int index
) {
742 Node2Index
[n
] = index
;
743 Index2Node
[index
] = n
;
746 ScheduleDAGTopologicalSort::
747 ScheduleDAGTopologicalSort(std::vector
<SUnit
> &sunits
, SUnit
*exitsu
)
748 : SUnits(sunits
), ExitSU(exitsu
) {}
750 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;