1 //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file Implements the ScheduleDAG class, which is a base class used by
10 /// scheduling implementation classes.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/ScheduleDAG.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/iterator_range.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
20 #include "llvm/CodeGen/SelectionDAGNodes.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
38 #define DEBUG_TYPE "pre-RA-sched"
41 static cl::opt
<bool> StressSchedOpt(
42 "stress-sched", cl::Hidden
, cl::init(false),
43 cl::desc("Stress test instruction scheduling"));
46 void SchedulingPriorityQueue::anchor() {}
48 ScheduleDAG::ScheduleDAG(MachineFunction
&mf
)
49 : TM(mf
.getTarget()), TII(mf
.getSubtarget().getInstrInfo()),
50 TRI(mf
.getSubtarget().getRegisterInfo()), MF(mf
),
51 MRI(mf
.getRegInfo()) {
53 StressSched
= StressSchedOpt
;
57 ScheduleDAG::~ScheduleDAG() = default;
59 void ScheduleDAG::clearDAG() {
65 const MCInstrDesc
*ScheduleDAG::getNodeDesc(const SDNode
*Node
) const {
66 if (!Node
|| !Node
->isMachineOpcode()) return nullptr;
67 return &TII
->get(Node
->getMachineOpcode());
70 LLVM_DUMP_METHOD
void SDep::dump(const TargetRegisterInfo
*TRI
) const {
72 case Data
: dbgs() << "Data"; break;
73 case Anti
: dbgs() << "Anti"; break;
74 case Output
: dbgs() << "Out "; break;
75 case Order
: dbgs() << "Ord "; break;
80 dbgs() << " Latency=" << getLatency();
81 if (TRI
&& isAssignedRegDep())
82 dbgs() << " Reg=" << printReg(getReg(), TRI
);
86 dbgs() << " Latency=" << getLatency();
89 dbgs() << " Latency=" << getLatency();
90 switch(Contents
.OrdKind
) {
91 case Barrier
: dbgs() << " Barrier"; break;
93 case MustAliasMem
: dbgs() << " Memory"; break;
94 case Artificial
: dbgs() << " Artificial"; break;
95 case Weak
: dbgs() << " Weak"; break;
96 case Cluster
: dbgs() << " Cluster"; break;
102 bool SUnit::addPred(const SDep
&D
, bool Required
) {
103 // If this node already has this dependence, don't add a redundant one.
104 for (SDep
&PredDep
: Preds
) {
105 // Zero-latency weak edges may be added purely for heuristic ordering. Don't
106 // add them if another kind of edge already exists.
107 if (!Required
&& PredDep
.getSUnit() == D
.getSUnit())
109 if (PredDep
.overlaps(D
)) {
110 // Extend the latency if needed. Equivalent to
111 // removePred(PredDep) + addPred(D).
112 if (PredDep
.getLatency() < D
.getLatency()) {
113 SUnit
*PredSU
= PredDep
.getSUnit();
114 // Find the corresponding successor in N.
115 SDep ForwardD
= PredDep
;
116 ForwardD
.setSUnit(this);
117 for (SDep
&SuccDep
: PredSU
->Succs
) {
118 if (SuccDep
== ForwardD
) {
119 SuccDep
.setLatency(D
.getLatency());
123 PredDep
.setLatency(D
.getLatency());
128 // Now add a corresponding succ to N.
131 SUnit
*N
= D
.getSUnit();
132 // Update the bookkeeping.
133 if (D
.getKind() == SDep::Data
) {
134 assert(NumPreds
< std::numeric_limits
<unsigned>::max() &&
135 "NumPreds will overflow!");
136 assert(N
->NumSuccs
< std::numeric_limits
<unsigned>::max() &&
137 "NumSuccs will overflow!");
141 if (!N
->isScheduled
) {
146 assert(NumPredsLeft
< std::numeric_limits
<unsigned>::max() &&
147 "NumPredsLeft will overflow!");
156 assert(N
->NumSuccsLeft
< std::numeric_limits
<unsigned>::max() &&
157 "NumSuccsLeft will overflow!");
162 N
->Succs
.push_back(P
);
163 if (P
.getLatency() != 0) {
164 this->setDepthDirty();
170 void SUnit::removePred(const SDep
&D
) {
171 // Find the matching predecessor.
172 SmallVectorImpl
<SDep
>::iterator I
= llvm::find(Preds
, D
);
173 if (I
== Preds
.end())
175 // Find the corresponding successor in N.
178 SUnit
*N
= D
.getSUnit();
179 SmallVectorImpl
<SDep
>::iterator Succ
= llvm::find(N
->Succs
, P
);
180 assert(Succ
!= N
->Succs
.end() && "Mismatching preds / succs lists!");
181 N
->Succs
.erase(Succ
);
183 // Update the bookkeeping.
184 if (P
.getKind() == SDep::Data
) {
185 assert(NumPreds
> 0 && "NumPreds will underflow!");
186 assert(N
->NumSuccs
> 0 && "NumSuccs will underflow!");
190 if (!N
->isScheduled
) {
194 assert(NumPredsLeft
> 0 && "NumPredsLeft will underflow!");
202 assert(N
->NumSuccsLeft
> 0 && "NumSuccsLeft will underflow!");
206 if (P
.getLatency() != 0) {
207 this->setDepthDirty();
212 void SUnit::setDepthDirty() {
213 if (!isDepthCurrent
) return;
214 SmallVector
<SUnit
*, 8> WorkList
;
215 WorkList
.push_back(this);
217 SUnit
*SU
= WorkList
.pop_back_val();
218 SU
->isDepthCurrent
= false;
219 for (SDep
&SuccDep
: SU
->Succs
) {
220 SUnit
*SuccSU
= SuccDep
.getSUnit();
221 if (SuccSU
->isDepthCurrent
)
222 WorkList
.push_back(SuccSU
);
224 } while (!WorkList
.empty());
227 void SUnit::setHeightDirty() {
228 if (!isHeightCurrent
) return;
229 SmallVector
<SUnit
*, 8> WorkList
;
230 WorkList
.push_back(this);
232 SUnit
*SU
= WorkList
.pop_back_val();
233 SU
->isHeightCurrent
= false;
234 for (SDep
&PredDep
: SU
->Preds
) {
235 SUnit
*PredSU
= PredDep
.getSUnit();
236 if (PredSU
->isHeightCurrent
)
237 WorkList
.push_back(PredSU
);
239 } while (!WorkList
.empty());
242 void SUnit::setDepthToAtLeast(unsigned NewDepth
) {
243 if (NewDepth
<= getDepth())
247 isDepthCurrent
= true;
250 void SUnit::setHeightToAtLeast(unsigned NewHeight
) {
251 if (NewHeight
<= getHeight())
255 isHeightCurrent
= true;
258 /// Calculates the maximal path from the node to the exit.
259 void SUnit::ComputeDepth() {
260 SmallVector
<SUnit
*, 8> WorkList
;
261 WorkList
.push_back(this);
263 SUnit
*Cur
= WorkList
.back();
266 unsigned MaxPredDepth
= 0;
267 for (const SDep
&PredDep
: Cur
->Preds
) {
268 SUnit
*PredSU
= PredDep
.getSUnit();
269 if (PredSU
->isDepthCurrent
)
270 MaxPredDepth
= std::max(MaxPredDepth
,
271 PredSU
->Depth
+ PredDep
.getLatency());
274 WorkList
.push_back(PredSU
);
280 if (MaxPredDepth
!= Cur
->Depth
) {
281 Cur
->setDepthDirty();
282 Cur
->Depth
= MaxPredDepth
;
284 Cur
->isDepthCurrent
= true;
286 } while (!WorkList
.empty());
289 /// Calculates the maximal path from the node to the entry.
290 void SUnit::ComputeHeight() {
291 SmallVector
<SUnit
*, 8> WorkList
;
292 WorkList
.push_back(this);
294 SUnit
*Cur
= WorkList
.back();
297 unsigned MaxSuccHeight
= 0;
298 for (const SDep
&SuccDep
: Cur
->Succs
) {
299 SUnit
*SuccSU
= SuccDep
.getSUnit();
300 if (SuccSU
->isHeightCurrent
)
301 MaxSuccHeight
= std::max(MaxSuccHeight
,
302 SuccSU
->Height
+ SuccDep
.getLatency());
305 WorkList
.push_back(SuccSU
);
311 if (MaxSuccHeight
!= Cur
->Height
) {
312 Cur
->setHeightDirty();
313 Cur
->Height
= MaxSuccHeight
;
315 Cur
->isHeightCurrent
= true;
317 } while (!WorkList
.empty());
320 void SUnit::biasCriticalPath() {
324 SUnit::pred_iterator BestI
= Preds
.begin();
325 unsigned MaxDepth
= BestI
->getSUnit()->getDepth();
326 for (SUnit::pred_iterator I
= std::next(BestI
), E
= Preds
.end(); I
!= E
;
328 if (I
->getKind() == SDep::Data
&& I
->getSUnit()->getDepth() > MaxDepth
)
331 if (BestI
!= Preds
.begin())
332 std::swap(*Preds
.begin(), *BestI
);
335 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
336 LLVM_DUMP_METHOD
void SUnit::dumpAttributes() const {
337 dbgs() << " # preds left : " << NumPredsLeft
<< "\n";
338 dbgs() << " # succs left : " << NumSuccsLeft
<< "\n";
340 dbgs() << " # weak preds left : " << WeakPredsLeft
<< "\n";
342 dbgs() << " # weak succs left : " << WeakSuccsLeft
<< "\n";
343 dbgs() << " # rdefs left : " << NumRegDefsLeft
<< "\n";
344 dbgs() << " Latency : " << Latency
<< "\n";
345 dbgs() << " Depth : " << getDepth() << "\n";
346 dbgs() << " Height : " << getHeight() << "\n";
349 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeName(const SUnit
&SU
) const {
352 else if (&SU
== &ExitSU
)
355 dbgs() << "SU(" << SU
.NodeNum
<< ")";
358 LLVM_DUMP_METHOD
void ScheduleDAG::dumpNodeAll(const SUnit
&SU
) const {
361 if (SU
.Preds
.size() > 0) {
362 dbgs() << " Predecessors:\n";
363 for (const SDep
&Dep
: SU
.Preds
) {
365 dumpNodeName(*Dep
.getSUnit());
371 if (SU
.Succs
.size() > 0) {
372 dbgs() << " Successors:\n";
373 for (const SDep
&Dep
: SU
.Succs
) {
375 dumpNodeName(*Dep
.getSUnit());
385 unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp
) {
386 bool AnyNotSched
= false;
387 unsigned DeadNodes
= 0;
388 for (const SUnit
&SUnit
: SUnits
) {
389 if (!SUnit
.isScheduled
) {
390 if (SUnit
.NumPreds
== 0 && SUnit
.NumSuccs
== 0) {
395 dbgs() << "*** Scheduling failed! ***\n";
397 dbgs() << "has not been scheduled!\n";
400 if (SUnit
.isScheduled
&&
401 (isBottomUp
? SUnit
.getHeight() : SUnit
.getDepth()) >
402 unsigned(std::numeric_limits
<int>::max())) {
404 dbgs() << "*** Scheduling failed! ***\n";
406 dbgs() << "has an unexpected "
407 << (isBottomUp
? "Height" : "Depth") << " value!\n";
411 if (SUnit
.NumSuccsLeft
!= 0) {
413 dbgs() << "*** Scheduling failed! ***\n";
415 dbgs() << "has successors left!\n";
419 if (SUnit
.NumPredsLeft
!= 0) {
421 dbgs() << "*** Scheduling failed! ***\n";
423 dbgs() << "has predecessors left!\n";
428 assert(!AnyNotSched
);
429 return SUnits
.size() - DeadNodes
;
433 void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
434 // The idea of the algorithm is taken from
435 // "Online algorithms for managing the topological order of
436 // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
437 // This is the MNR algorithm, which was first introduced by
438 // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
439 // "Maintaining a topological order under edge insertions".
441 // Short description of the algorithm:
443 // Topological ordering, ord, of a DAG maps each node to a topological
444 // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
446 // This means that if there is a path from the node X to the node Z,
447 // then ord(X) < ord(Z).
449 // This property can be used to check for reachability of nodes:
450 // if Z is reachable from X, then an insertion of the edge Z->X would
453 // The algorithm first computes a topological ordering for the DAG by
454 // initializing the Index2Node and Node2Index arrays and then tries to keep
455 // the ordering up-to-date after edge insertions by reordering the DAG.
457 // On insertion of the edge X->Y, the algorithm first marks by calling DFS
458 // the nodes reachable from Y, and then shifts them using Shift to lie
459 // immediately after X in Index2Node.
460 unsigned DAGSize
= SUnits
.size();
461 std::vector
<SUnit
*> WorkList
;
462 WorkList
.reserve(DAGSize
);
464 Index2Node
.resize(DAGSize
);
465 Node2Index
.resize(DAGSize
);
467 // Initialize the data structures.
469 WorkList
.push_back(ExitSU
);
470 for (SUnit
&SU
: SUnits
) {
471 int NodeNum
= SU
.NodeNum
;
472 unsigned Degree
= SU
.Succs
.size();
473 // Temporarily use the Node2Index array as scratch space for degree counts.
474 Node2Index
[NodeNum
] = Degree
;
476 // Is it a node without dependencies?
478 assert(SU
.Succs
.empty() && "SUnit should have no successors");
479 // Collect leaf nodes.
480 WorkList
.push_back(&SU
);
485 while (!WorkList
.empty()) {
486 SUnit
*SU
= WorkList
.back();
488 if (SU
->NodeNum
< DAGSize
)
489 Allocate(SU
->NodeNum
, --Id
);
490 for (const SDep
&PredDep
: SU
->Preds
) {
491 SUnit
*SU
= PredDep
.getSUnit();
492 if (SU
->NodeNum
< DAGSize
&& !--Node2Index
[SU
->NodeNum
])
493 // If all dependencies of the node are processed already,
494 // then the node can be computed now.
495 WorkList
.push_back(SU
);
499 Visited
.resize(DAGSize
);
502 // Check correctness of the ordering
503 for (SUnit
&SU
: SUnits
) {
504 for (const SDep
&PD
: SU
.Preds
) {
505 assert(Node2Index
[SU
.NodeNum
] > Node2Index
[PD
.getSUnit()->NodeNum
] &&
506 "Wrong topological sorting");
512 void ScheduleDAGTopologicalSort::AddPred(SUnit
*Y
, SUnit
*X
) {
513 int UpperBound
, LowerBound
;
514 LowerBound
= Node2Index
[Y
->NodeNum
];
515 UpperBound
= Node2Index
[X
->NodeNum
];
516 bool HasLoop
= false;
517 // Is Ord(X) < Ord(Y) ?
518 if (LowerBound
< UpperBound
) {
519 // Update the topological order.
521 DFS(Y
, UpperBound
, HasLoop
);
522 assert(!HasLoop
&& "Inserted edge creates a loop!");
523 // Recompute topological indexes.
524 Shift(Visited
, LowerBound
, UpperBound
);
528 void ScheduleDAGTopologicalSort::RemovePred(SUnit
*M
, SUnit
*N
) {
529 // InitDAGTopologicalSorting();
532 void ScheduleDAGTopologicalSort::DFS(const SUnit
*SU
, int UpperBound
,
534 std::vector
<const SUnit
*> WorkList
;
535 WorkList
.reserve(SUnits
.size());
537 WorkList
.push_back(SU
);
539 SU
= WorkList
.back();
541 Visited
.set(SU
->NodeNum
);
542 for (const SDep
&SuccDep
543 : make_range(SU
->Succs
.rbegin(), SU
->Succs
.rend())) {
544 unsigned s
= SuccDep
.getSUnit()->NodeNum
;
545 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
546 if (s
>= Node2Index
.size())
548 if (Node2Index
[s
] == UpperBound
) {
552 // Visit successors if not already and in affected region.
553 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
554 WorkList
.push_back(SuccDep
.getSUnit());
557 } while (!WorkList
.empty());
560 std::vector
<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit
&StartSU
,
561 const SUnit
&TargetSU
,
563 std::vector
<const SUnit
*> WorkList
;
564 int LowerBound
= Node2Index
[StartSU
.NodeNum
];
565 int UpperBound
= Node2Index
[TargetSU
.NodeNum
];
567 BitVector VisitedBack
;
568 std::vector
<int> Nodes
;
570 if (LowerBound
> UpperBound
) {
575 WorkList
.reserve(SUnits
.size());
578 // Starting from StartSU, visit all successors up
580 WorkList
.push_back(&StartSU
);
582 const SUnit
*SU
= WorkList
.back();
584 for (int I
= SU
->Succs
.size()-1; I
>= 0; --I
) {
585 const SUnit
*Succ
= SU
->Succs
[I
].getSUnit();
586 unsigned s
= Succ
->NodeNum
;
587 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
588 if (Succ
->isBoundaryNode())
590 if (Node2Index
[s
] == UpperBound
) {
594 // Visit successors if not already and in affected region.
595 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
597 WorkList
.push_back(Succ
);
600 } while (!WorkList
.empty());
608 VisitedBack
.resize(SUnits
.size());
611 // Starting from TargetSU, visit all predecessors up
612 // to LowerBound. SUs that are visited by the two
613 // passes are added to Nodes.
614 WorkList
.push_back(&TargetSU
);
616 const SUnit
*SU
= WorkList
.back();
618 for (int I
= SU
->Preds
.size()-1; I
>= 0; --I
) {
619 const SUnit
*Pred
= SU
->Preds
[I
].getSUnit();
620 unsigned s
= Pred
->NodeNum
;
621 // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
622 if (Pred
->isBoundaryNode())
624 if (Node2Index
[s
] == LowerBound
) {
628 if (!VisitedBack
.test(s
) && Visited
.test(s
)) {
630 WorkList
.push_back(Pred
);
634 } while (!WorkList
.empty());
636 assert(Found
&& "Error in SUnit Graph!");
641 void ScheduleDAGTopologicalSort::Shift(BitVector
& Visited
, int LowerBound
,
647 for (i
= LowerBound
; i
<= UpperBound
; ++i
) {
648 // w is node at topological index i.
649 int w
= Index2Node
[i
];
650 if (Visited
.test(w
)) {
656 Allocate(w
, i
- shift
);
660 for (unsigned LI
: L
) {
661 Allocate(LI
, i
- shift
);
666 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit
*TargetSU
, SUnit
*SU
) {
667 // Is SU reachable from TargetSU via successor edges?
668 if (IsReachable(SU
, TargetSU
))
670 for (const SDep
&PredDep
: TargetSU
->Preds
)
671 if (PredDep
.isAssignedRegDep() &&
672 IsReachable(SU
, PredDep
.getSUnit()))
677 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit
*SU
,
678 const SUnit
*TargetSU
) {
679 // If insertion of the edge SU->TargetSU would create a cycle
680 // then there is a path from TargetSU to SU.
681 int UpperBound
, LowerBound
;
682 LowerBound
= Node2Index
[TargetSU
->NodeNum
];
683 UpperBound
= Node2Index
[SU
->NodeNum
];
684 bool HasLoop
= false;
685 // Is Ord(TargetSU) < Ord(SU) ?
686 if (LowerBound
< UpperBound
) {
688 // There may be a path from TargetSU to SU. Check for it.
689 DFS(TargetSU
, UpperBound
, HasLoop
);
694 void ScheduleDAGTopologicalSort::Allocate(int n
, int index
) {
695 Node2Index
[n
] = index
;
696 Index2Node
[index
] = n
;
699 ScheduleDAGTopologicalSort::
700 ScheduleDAGTopologicalSort(std::vector
<SUnit
> &sunits
, SUnit
*exitsu
)
701 : SUnits(sunits
), ExitSU(exitsu
) {}
703 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;