1 //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file Implements the ScheduleDAG class, which is a base class used by
11 /// scheduling implementation classes.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/ScheduleDAG.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
21 #include "llvm/CodeGen/SelectionDAGNodes.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
39 #define DEBUG_TYPE "pre-RA-sched"
42 static cl::opt
<bool> StressSchedOpt(
43 "stress-sched", cl::Hidden
, cl::init(false),
44 cl::desc("Stress test instruction scheduling"));
47 void SchedulingPriorityQueue::anchor() {}
49 ScheduleDAG::ScheduleDAG(MachineFunction
&mf
)
50 : TM(mf
.getTarget()), TII(mf
.getSubtarget().getInstrInfo()),
51 TRI(mf
.getSubtarget().getRegisterInfo()), MF(mf
),
52 MRI(mf
.getRegInfo()) {
54 StressSched
= StressSchedOpt
;
58 ScheduleDAG::~ScheduleDAG() = default;
60 void ScheduleDAG::clearDAG() {
66 const MCInstrDesc
*ScheduleDAG::getNodeDesc(const SDNode
*Node
) const {
67 if (!Node
|| !Node
->isMachineOpcode()) return nullptr;
68 return &TII
->get(Node
->getMachineOpcode());
72 raw_ostream
&SDep::print(raw_ostream
&OS
, const TargetRegisterInfo
*TRI
) const {
74 case Data
: OS
<< "Data"; break;
75 case Anti
: OS
<< "Anti"; break;
76 case Output
: OS
<< "Out "; break;
77 case Order
: OS
<< "Ord "; break;
82 OS
<< " Latency=" << getLatency();
83 if (TRI
&& isAssignedRegDep())
84 OS
<< " Reg=" << printReg(getReg(), TRI
);
88 OS
<< " Latency=" << getLatency();
91 OS
<< " Latency=" << getLatency();
92 switch(Contents
.OrdKind
) {
93 case Barrier
: OS
<< " Barrier"; break;
95 case MustAliasMem
: OS
<< " Memory"; break;
96 case Artificial
: OS
<< " Artificial"; break;
97 case Weak
: OS
<< " Weak"; break;
98 case Cluster
: OS
<< " Cluster"; break;
106 bool SUnit::addPred(const SDep
&D
, bool Required
) {
107 // If this node already has this dependence, don't add a redundant one.
108 for (SDep
&PredDep
: Preds
) {
109 // Zero-latency weak edges may be added purely for heuristic ordering. Don't
110 // add them if another kind of edge already exists.
111 if (!Required
&& PredDep
.getSUnit() == D
.getSUnit())
113 if (PredDep
.overlaps(D
)) {
114 // Extend the latency if needed. Equivalent to
115 // removePred(PredDep) + addPred(D).
116 if (PredDep
.getLatency() < D
.getLatency()) {
117 SUnit
*PredSU
= PredDep
.getSUnit();
118 // Find the corresponding successor in N.
119 SDep ForwardD
= PredDep
;
120 ForwardD
.setSUnit(this);
121 for (SDep
&SuccDep
: PredSU
->Succs
) {
122 if (SuccDep
== ForwardD
) {
123 SuccDep
.setLatency(D
.getLatency());
127 PredDep
.setLatency(D
.getLatency());
132 // Now add a corresponding succ to N.
135 SUnit
*N
= D
.getSUnit();
136 // Update the bookkeeping.
137 if (D
.getKind() == SDep::Data
) {
138 assert(NumPreds
< std::numeric_limits
<unsigned>::max() &&
139 "NumPreds will overflow!");
140 assert(N
->NumSuccs
< std::numeric_limits
<unsigned>::max() &&
141 "NumSuccs will overflow!");
145 if (!N
->isScheduled
) {
150 assert(NumPredsLeft
< std::numeric_limits
<unsigned>::max() &&
151 "NumPredsLeft will overflow!");
160 assert(N
->NumSuccsLeft
< std::numeric_limits
<unsigned>::max() &&
161 "NumSuccsLeft will overflow!");
166 N
->Succs
.push_back(P
);
167 if (P
.getLatency() != 0) {
168 this->setDepthDirty();
174 void SUnit::removePred(const SDep
&D
) {
175 // Find the matching predecessor.
176 SmallVectorImpl
<SDep
>::iterator I
= llvm::find(Preds
, D
);
177 if (I
== Preds
.end())
179 // Find the corresponding successor in N.
182 SUnit
*N
= D
.getSUnit();
183 SmallVectorImpl
<SDep
>::iterator Succ
= llvm::find(N
->Succs
, P
);
184 assert(Succ
!= N
->Succs
.end() && "Mismatching preds / succs lists!");
185 N
->Succs
.erase(Succ
);
187 // Update the bookkeeping.
188 if (P
.getKind() == SDep::Data
) {
189 assert(NumPreds
> 0 && "NumPreds will underflow!");
190 assert(N
->NumSuccs
> 0 && "NumSuccs will underflow!");
194 if (!N
->isScheduled
) {
198 assert(NumPredsLeft
> 0 && "NumPredsLeft will underflow!");
206 assert(N
->NumSuccsLeft
> 0 && "NumSuccsLeft will underflow!");
210 if (P
.getLatency() != 0) {
211 this->setDepthDirty();
216 void SUnit::setDepthDirty() {
217 if (!isDepthCurrent
) return;
218 SmallVector
<SUnit
*, 8> WorkList
;
219 WorkList
.push_back(this);
221 SUnit
*SU
= WorkList
.pop_back_val();
222 SU
->isDepthCurrent
= false;
223 for (SDep
&SuccDep
: SU
->Succs
) {
224 SUnit
*SuccSU
= SuccDep
.getSUnit();
225 if (SuccSU
->isDepthCurrent
)
226 WorkList
.push_back(SuccSU
);
228 } while (!WorkList
.empty());
231 void SUnit::setHeightDirty() {
232 if (!isHeightCurrent
) return;
233 SmallVector
<SUnit
*, 8> WorkList
;
234 WorkList
.push_back(this);
236 SUnit
*SU
= WorkList
.pop_back_val();
237 SU
->isHeightCurrent
= false;
238 for (SDep
&PredDep
: SU
->Preds
) {
239 SUnit
*PredSU
= PredDep
.getSUnit();
240 if (PredSU
->isHeightCurrent
)
241 WorkList
.push_back(PredSU
);
243 } while (!WorkList
.empty());
246 void SUnit::setDepthToAtLeast(unsigned NewDepth
) {
247 if (NewDepth
<= getDepth())
251 isDepthCurrent
= true;
254 void SUnit::setHeightToAtLeast(unsigned NewHeight
) {
255 if (NewHeight
<= getHeight())
259 isHeightCurrent
= true;
262 /// Calculates the maximal path from the node to the exit.
263 void SUnit::ComputeDepth() {
264 SmallVector
<SUnit
*, 8> WorkList
;
265 WorkList
.push_back(this);
267 SUnit
*Cur
= WorkList
.back();
270 unsigned MaxPredDepth
= 0;
271 for (const SDep
&PredDep
: Cur
->Preds
) {
272 SUnit
*PredSU
= PredDep
.getSUnit();
273 if (PredSU
->isDepthCurrent
)
274 MaxPredDepth
= std::max(MaxPredDepth
,
275 PredSU
->Depth
+ PredDep
.getLatency());
278 WorkList
.push_back(PredSU
);
284 if (MaxPredDepth
!= Cur
->Depth
) {
285 Cur
->setDepthDirty();
286 Cur
->Depth
= MaxPredDepth
;
288 Cur
->isDepthCurrent
= true;
290 } while (!WorkList
.empty());
293 /// Calculates the maximal path from the node to the entry.
294 void SUnit::ComputeHeight() {
295 SmallVector
<SUnit
*, 8> WorkList
;
296 WorkList
.push_back(this);
298 SUnit
*Cur
= WorkList
.back();
301 unsigned MaxSuccHeight
= 0;
302 for (const SDep
&SuccDep
: Cur
->Succs
) {
303 SUnit
*SuccSU
= SuccDep
.getSUnit();
304 if (SuccSU
->isHeightCurrent
)
305 MaxSuccHeight
= std::max(MaxSuccHeight
,
306 SuccSU
->Height
+ SuccDep
.getLatency());
309 WorkList
.push_back(SuccSU
);
315 if (MaxSuccHeight
!= Cur
->Height
) {
316 Cur
->setHeightDirty();
317 Cur
->Height
= MaxSuccHeight
;
319 Cur
->isHeightCurrent
= true;
321 } while (!WorkList
.empty());
324 void SUnit::biasCriticalPath() {
328 SUnit::pred_iterator BestI
= Preds
.begin();
329 unsigned MaxDepth
= BestI
->getSUnit()->getDepth();
330 for (SUnit::pred_iterator I
= std::next(BestI
), E
= Preds
.end(); I
!= E
;
332 if (I
->getKind() == SDep::Data
&& I
->getSUnit()->getDepth() > MaxDepth
)
335 if (BestI
!= Preds
.begin())
336 std::swap(*Preds
.begin(), *BestI
);
339 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
341 raw_ostream
&SUnit::print(raw_ostream
&OS
,
342 const SUnit
*Entry
, const SUnit
*Exit
) const {
345 else if (this == Exit
)
348 OS
<< "SU(" << NodeNum
<< ")";
353 raw_ostream
&SUnit::print(raw_ostream
&OS
, const ScheduleDAG
*G
) const {
354 return print(OS
, &G
->EntrySU
, &G
->ExitSU
);
358 void SUnit::dump(const ScheduleDAG
*G
) const {
364 LLVM_DUMP_METHOD
void SUnit::dumpAll(const ScheduleDAG
*G
) const {
367 dbgs() << " # preds left : " << NumPredsLeft
<< "\n";
368 dbgs() << " # succs left : " << NumSuccsLeft
<< "\n";
370 dbgs() << " # weak preds left : " << WeakPredsLeft
<< "\n";
372 dbgs() << " # weak succs left : " << WeakSuccsLeft
<< "\n";
373 dbgs() << " # rdefs left : " << NumRegDefsLeft
<< "\n";
374 dbgs() << " Latency : " << Latency
<< "\n";
375 dbgs() << " Depth : " << getDepth() << "\n";
376 dbgs() << " Height : " << getHeight() << "\n";
378 if (Preds
.size() != 0) {
379 dbgs() << " Predecessors:\n";
380 for (const SDep
&Dep
: Preds
) {
382 Dep
.getSUnit()->print(dbgs(), G
); dbgs() << ": ";
383 Dep
.print(dbgs(), G
->TRI
); dbgs() << '\n';
386 if (Succs
.size() != 0) {
387 dbgs() << " Successors:\n";
388 for (const SDep
&Dep
: Succs
) {
390 Dep
.getSUnit()->print(dbgs(), G
); dbgs() << ": ";
391 Dep
.print(dbgs(), G
->TRI
); dbgs() << '\n';
398 unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp
) {
399 bool AnyNotSched
= false;
400 unsigned DeadNodes
= 0;
401 for (const SUnit
&SUnit
: SUnits
) {
402 if (!SUnit
.isScheduled
) {
403 if (SUnit
.NumPreds
== 0 && SUnit
.NumSuccs
== 0) {
408 dbgs() << "*** Scheduling failed! ***\n";
410 dbgs() << "has not been scheduled!\n";
413 if (SUnit
.isScheduled
&&
414 (isBottomUp
? SUnit
.getHeight() : SUnit
.getDepth()) >
415 unsigned(std::numeric_limits
<int>::max())) {
417 dbgs() << "*** Scheduling failed! ***\n";
419 dbgs() << "has an unexpected "
420 << (isBottomUp
? "Height" : "Depth") << " value!\n";
424 if (SUnit
.NumSuccsLeft
!= 0) {
426 dbgs() << "*** Scheduling failed! ***\n";
428 dbgs() << "has successors left!\n";
432 if (SUnit
.NumPredsLeft
!= 0) {
434 dbgs() << "*** Scheduling failed! ***\n";
436 dbgs() << "has predecessors left!\n";
441 assert(!AnyNotSched
);
442 return SUnits
.size() - DeadNodes
;
446 void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
447 // The idea of the algorithm is taken from
448 // "Online algorithms for managing the topological order of
449 // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
450 // This is the MNR algorithm, which was first introduced by
451 // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
452 // "Maintaining a topological order under edge insertions".
454 // Short description of the algorithm:
456 // Topological ordering, ord, of a DAG maps each node to a topological
457 // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
459 // This means that if there is a path from the node X to the node Z,
460 // then ord(X) < ord(Z).
462 // This property can be used to check for reachability of nodes:
463 // if Z is reachable from X, then an insertion of the edge Z->X would
466 // The algorithm first computes a topological ordering for the DAG by
467 // initializing the Index2Node and Node2Index arrays and then tries to keep
468 // the ordering up-to-date after edge insertions by reordering the DAG.
470 // On insertion of the edge X->Y, the algorithm first marks by calling DFS
471 // the nodes reachable from Y, and then shifts them using Shift to lie
472 // immediately after X in Index2Node.
473 unsigned DAGSize
= SUnits
.size();
474 std::vector
<SUnit
*> WorkList
;
475 WorkList
.reserve(DAGSize
);
477 Index2Node
.resize(DAGSize
);
478 Node2Index
.resize(DAGSize
);
480 // Initialize the data structures.
482 WorkList
.push_back(ExitSU
);
483 for (SUnit
&SU
: SUnits
) {
484 int NodeNum
= SU
.NodeNum
;
485 unsigned Degree
= SU
.Succs
.size();
486 // Temporarily use the Node2Index array as scratch space for degree counts.
487 Node2Index
[NodeNum
] = Degree
;
489 // Is it a node without dependencies?
491 assert(SU
.Succs
.empty() && "SUnit should have no successors");
492 // Collect leaf nodes.
493 WorkList
.push_back(&SU
);
498 while (!WorkList
.empty()) {
499 SUnit
*SU
= WorkList
.back();
501 if (SU
->NodeNum
< DAGSize
)
502 Allocate(SU
->NodeNum
, --Id
);
503 for (const SDep
&PredDep
: SU
->Preds
) {
504 SUnit
*SU
= PredDep
.getSUnit();
505 if (SU
->NodeNum
< DAGSize
&& !--Node2Index
[SU
->NodeNum
])
506 // If all dependencies of the node are processed already,
507 // then the node can be computed now.
508 WorkList
.push_back(SU
);
512 Visited
.resize(DAGSize
);
515 // Check correctness of the ordering
516 for (SUnit
&SU
: SUnits
) {
517 for (const SDep
&PD
: SU
.Preds
) {
518 assert(Node2Index
[SU
.NodeNum
] > Node2Index
[PD
.getSUnit()->NodeNum
] &&
519 "Wrong topological sorting");
525 void ScheduleDAGTopologicalSort::AddPred(SUnit
*Y
, SUnit
*X
) {
526 int UpperBound
, LowerBound
;
527 LowerBound
= Node2Index
[Y
->NodeNum
];
528 UpperBound
= Node2Index
[X
->NodeNum
];
529 bool HasLoop
= false;
530 // Is Ord(X) < Ord(Y) ?
531 if (LowerBound
< UpperBound
) {
532 // Update the topological order.
534 DFS(Y
, UpperBound
, HasLoop
);
535 assert(!HasLoop
&& "Inserted edge creates a loop!");
536 // Recompute topological indexes.
537 Shift(Visited
, LowerBound
, UpperBound
);
541 void ScheduleDAGTopologicalSort::RemovePred(SUnit
*M
, SUnit
*N
) {
542 // InitDAGTopologicalSorting();
545 void ScheduleDAGTopologicalSort::DFS(const SUnit
*SU
, int UpperBound
,
547 std::vector
<const SUnit
*> WorkList
;
548 WorkList
.reserve(SUnits
.size());
550 WorkList
.push_back(SU
);
552 SU
= WorkList
.back();
554 Visited
.set(SU
->NodeNum
);
555 for (const SDep
&SuccDep
556 : make_range(SU
->Succs
.rbegin(), SU
->Succs
.rend())) {
557 unsigned s
= SuccDep
.getSUnit()->NodeNum
;
558 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
559 if (s
>= Node2Index
.size())
561 if (Node2Index
[s
] == UpperBound
) {
565 // Visit successors if not already and in affected region.
566 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
567 WorkList
.push_back(SuccDep
.getSUnit());
570 } while (!WorkList
.empty());
573 std::vector
<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit
&StartSU
,
574 const SUnit
&TargetSU
,
576 std::vector
<const SUnit
*> WorkList
;
577 int LowerBound
= Node2Index
[StartSU
.NodeNum
];
578 int UpperBound
= Node2Index
[TargetSU
.NodeNum
];
580 BitVector VisitedBack
;
581 std::vector
<int> Nodes
;
583 if (LowerBound
> UpperBound
) {
588 WorkList
.reserve(SUnits
.size());
591 // Starting from StartSU, visit all successors up
593 WorkList
.push_back(&StartSU
);
595 const SUnit
*SU
= WorkList
.back();
597 for (int I
= SU
->Succs
.size()-1; I
>= 0; --I
) {
598 const SUnit
*Succ
= SU
->Succs
[I
].getSUnit();
599 unsigned s
= Succ
->NodeNum
;
600 // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
601 if (Succ
->isBoundaryNode())
603 if (Node2Index
[s
] == UpperBound
) {
607 // Visit successors if not already and in affected region.
608 if (!Visited
.test(s
) && Node2Index
[s
] < UpperBound
) {
610 WorkList
.push_back(Succ
);
613 } while (!WorkList
.empty());
621 VisitedBack
.resize(SUnits
.size());
624 // Starting from TargetSU, visit all predecessors up
625 // to LowerBound. SUs that are visited by the two
626 // passes are added to Nodes.
627 WorkList
.push_back(&TargetSU
);
629 const SUnit
*SU
= WorkList
.back();
631 for (int I
= SU
->Preds
.size()-1; I
>= 0; --I
) {
632 const SUnit
*Pred
= SU
->Preds
[I
].getSUnit();
633 unsigned s
= Pred
->NodeNum
;
634 // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
635 if (Pred
->isBoundaryNode())
637 if (Node2Index
[s
] == LowerBound
) {
641 if (!VisitedBack
.test(s
) && Visited
.test(s
)) {
643 WorkList
.push_back(Pred
);
647 } while (!WorkList
.empty());
649 assert(Found
&& "Error in SUnit Graph!");
654 void ScheduleDAGTopologicalSort::Shift(BitVector
& Visited
, int LowerBound
,
660 for (i
= LowerBound
; i
<= UpperBound
; ++i
) {
661 // w is node at topological index i.
662 int w
= Index2Node
[i
];
663 if (Visited
.test(w
)) {
669 Allocate(w
, i
- shift
);
673 for (unsigned LI
: L
) {
674 Allocate(LI
, i
- shift
);
679 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit
*TargetSU
, SUnit
*SU
) {
680 // Is SU reachable from TargetSU via successor edges?
681 if (IsReachable(SU
, TargetSU
))
683 for (const SDep
&PredDep
: TargetSU
->Preds
)
684 if (PredDep
.isAssignedRegDep() &&
685 IsReachable(SU
, PredDep
.getSUnit()))
690 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit
*SU
,
691 const SUnit
*TargetSU
) {
692 // If insertion of the edge SU->TargetSU would create a cycle
693 // then there is a path from TargetSU to SU.
694 int UpperBound
, LowerBound
;
695 LowerBound
= Node2Index
[TargetSU
->NodeNum
];
696 UpperBound
= Node2Index
[SU
->NodeNum
];
697 bool HasLoop
= false;
698 // Is Ord(TargetSU) < Ord(SU) ?
699 if (LowerBound
< UpperBound
) {
701 // There may be a path from TargetSU to SU. Check for it.
702 DFS(TargetSU
, UpperBound
, HasLoop
);
707 void ScheduleDAGTopologicalSort::Allocate(int n
, int index
) {
708 Node2Index
[n
] = index
;
709 Index2Node
[index
] = n
;
712 ScheduleDAGTopologicalSort::
713 ScheduleDAGTopologicalSort(std::vector
<SUnit
> &sunits
, SUnit
*exitsu
)
714 : SUnits(sunits
), ExitSU(exitsu
) {}
716 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;