1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/VLIWMachineScheduler.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/CodeGen/DFAPacketizer.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/RegisterClassInfo.h"
22 #include "llvm/CodeGen/RegisterPressure.h"
23 #include "llvm/CodeGen/ScheduleDAG.h"
24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSchedule.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "machine-scheduler"
44 static cl::opt
<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden
,
47 static cl::opt
<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden
,
50 static cl::opt
<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
51 cl::Hidden
, cl::init(1));
53 // Check if the scheduler should penalize instructions that are available to
54 // early due to a zero-latency dependence.
55 static cl::opt
<bool> CheckEarlyAvail("check-early-avail", cl::Hidden
,
58 // This value is used to determine if a register class is a high pressure set.
59 // We compute the maximum number of registers needed and divided by the total
60 // available. Then, we compare the result to this value.
61 static cl::opt
<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden
,
63 cl::desc("High register pressure threhold."));
65 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo
&STI
,
66 const TargetSchedModel
*SM
)
67 : TII(STI
.getInstrInfo()), SchedModel(SM
) {
68 ResourcesModel
= createPacketizer(STI
);
70 // This hard requirement could be relaxed,
71 // but for now do not let it proceed.
72 assert(ResourcesModel
&& "Unimplemented CreateTargetScheduleState.");
74 Packet
.reserve(SchedModel
->getIssueWidth());
76 ResourcesModel
->clearResources();
79 void VLIWResourceModel::reset() {
81 ResourcesModel
->clearResources();
84 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel
; }
86 /// Return true if there is a dependence between SUd and SUu.
87 bool VLIWResourceModel::hasDependence(const SUnit
*SUd
, const SUnit
*SUu
) {
88 if (SUd
->Succs
.size() == 0)
91 for (const auto &S
: SUd
->Succs
) {
92 // Since we do not add pseudos to packets, might as well
93 // ignore order dependencies.
97 if (S
.getSUnit() == SUu
&& S
.getLatency() > 0)
103 /// Check if scheduling of this SU is possible
104 /// in the current packet.
105 /// It is _not_ precise (statefull), it is more like
106 /// another heuristic. Many corner cases are figured
108 bool VLIWResourceModel::isResourceAvailable(SUnit
*SU
, bool IsTop
) {
109 if (!SU
|| !SU
->getInstr())
112 // First see if the pipeline could receive this instruction
113 // in the current cycle.
114 switch (SU
->getInstr()->getOpcode()) {
116 if (!ResourcesModel
->canReserveResources(*SU
->getInstr()))
119 case TargetOpcode::EXTRACT_SUBREG
:
120 case TargetOpcode::INSERT_SUBREG
:
121 case TargetOpcode::SUBREG_TO_REG
:
122 case TargetOpcode::REG_SEQUENCE
:
123 case TargetOpcode::IMPLICIT_DEF
:
124 case TargetOpcode::COPY
:
125 case TargetOpcode::INLINEASM
:
126 case TargetOpcode::INLINEASM_BR
:
130 // Now see if there are no other dependencies to instructions already
133 for (const SUnit
*U
: Packet
)
134 if (hasDependence(U
, SU
))
137 for (const SUnit
*U
: Packet
)
138 if (hasDependence(SU
, U
))
144 /// Keep track of available resources.
145 bool VLIWResourceModel::reserveResources(SUnit
*SU
, bool IsTop
) {
146 bool startNewCycle
= false;
147 // Artificially reset state.
153 // If this SU does not fit in the packet or the packet is now full
155 if (!isResourceAvailable(SU
, IsTop
) ||
156 Packet
.size() >= SchedModel
->getIssueWidth()) {
159 startNewCycle
= true;
162 switch (SU
->getInstr()->getOpcode()) {
164 ResourcesModel
->reserveResources(*SU
->getInstr());
166 case TargetOpcode::EXTRACT_SUBREG
:
167 case TargetOpcode::INSERT_SUBREG
:
168 case TargetOpcode::SUBREG_TO_REG
:
169 case TargetOpcode::REG_SEQUENCE
:
170 case TargetOpcode::IMPLICIT_DEF
:
171 case TargetOpcode::KILL
:
172 case TargetOpcode::CFI_INSTRUCTION
:
173 case TargetOpcode::EH_LABEL
:
174 case TargetOpcode::COPY
:
175 case TargetOpcode::INLINEASM
:
176 case TargetOpcode::INLINEASM_BR
:
179 Packet
.push_back(SU
);
182 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets
<< "]:\n");
183 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
) {
184 LLVM_DEBUG(dbgs() << "\t[" << i
<< "] SU(");
185 LLVM_DEBUG(dbgs() << Packet
[i
]->NodeNum
<< ")\t");
186 LLVM_DEBUG(Packet
[i
]->getInstr()->dump());
190 return startNewCycle
;
194 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo
&STI
) const {
195 return STI
.getInstrInfo()->CreateTargetScheduleState(STI
);
198 /// schedule - Called back from MachineScheduler::runOnMachineFunction
199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
200 /// only includes instructions that have DAG nodes, not scheduling boundaries.
201 void VLIWMachineScheduler::schedule() {
202 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
203 << printMBBReference(*BB
) << " " << BB
->getName()
204 << " in_func " << BB
->getParent()->getName()
205 << " at loop depth " << MLI
->getLoopDepth(BB
) << " \n");
207 buildDAGWithRegPressure();
209 Topo
.InitDAGTopologicalSorting();
211 // Postprocess the DAG to add platform-specific artificial dependencies.
214 SmallVector
<SUnit
*, 8> TopRoots
, BotRoots
;
215 findRootsAndBiasEdges(TopRoots
, BotRoots
);
217 // Initialize the strategy before modifying the DAG.
218 SchedImpl
->initialize(this);
222 for (const SUnit
&SU
: SUnits
)
223 if (SU
.getHeight() > maxH
)
224 maxH
= SU
.getHeight();
225 dbgs() << "Max Height " << maxH
<< "\n";
229 for (const SUnit
&SU
: SUnits
)
230 if (SU
.getDepth() > maxD
)
231 maxD
= SU
.getDepth();
232 dbgs() << "Max Depth " << maxD
<< "\n";
238 initQueues(TopRoots
, BotRoots
);
240 bool IsTopNode
= false;
243 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
244 SUnit
*SU
= SchedImpl
->pickNode(IsTopNode
);
248 if (!checkSchedLimit())
251 scheduleMI(SU
, IsTopNode
);
253 // Notify the scheduling strategy after updating the DAG.
254 SchedImpl
->schedNode(SU
, IsTopNode
);
256 updateQueues(SU
, IsTopNode
);
258 assert(CurrentTop
== CurrentBottom
&& "Nonempty unscheduled zone.");
263 dbgs() << "*** Final schedule for "
264 << printMBBReference(*begin()->getParent()) << " ***\n";
270 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI
*dag
) {
271 DAG
= static_cast<VLIWMachineScheduler
*>(dag
);
272 SchedModel
= DAG
->getSchedModel();
274 Top
.init(DAG
, SchedModel
);
275 Bot
.init(DAG
, SchedModel
);
277 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
278 // are disabled, then these HazardRecs will be disabled.
279 const InstrItineraryData
*Itin
= DAG
->getSchedModel()->getInstrItineraries();
280 const TargetSubtargetInfo
&STI
= DAG
->MF
.getSubtarget();
281 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
282 delete Top
.HazardRec
;
283 delete Bot
.HazardRec
;
284 Top
.HazardRec
= TII
->CreateTargetMIHazardRecognizer(Itin
, DAG
);
285 Bot
.HazardRec
= TII
->CreateTargetMIHazardRecognizer(Itin
, DAG
);
287 delete Top
.ResourceModel
;
288 delete Bot
.ResourceModel
;
289 Top
.ResourceModel
= createVLIWResourceModel(STI
, DAG
->getSchedModel());
290 Bot
.ResourceModel
= createVLIWResourceModel(STI
, DAG
->getSchedModel());
292 const std::vector
<unsigned> &MaxPressure
=
293 DAG
->getRegPressure().MaxSetPressure
;
294 HighPressureSets
.assign(MaxPressure
.size(), false);
295 for (unsigned i
= 0, e
= MaxPressure
.size(); i
< e
; ++i
) {
296 unsigned Limit
= DAG
->getRegClassInfo()->getRegPressureSetLimit(i
);
297 HighPressureSets
[i
] =
298 ((float)MaxPressure
[i
] > ((float)Limit
* RPThreshold
));
301 assert((!ForceTopDown
|| !ForceBottomUp
) &&
302 "-misched-topdown incompatible with -misched-bottomup");
305 VLIWResourceModel
*ConvergingVLIWScheduler::createVLIWResourceModel(
306 const TargetSubtargetInfo
&STI
, const TargetSchedModel
*SchedModel
) const {
307 return new VLIWResourceModel(STI
, SchedModel
);
310 void ConvergingVLIWScheduler::releaseTopNode(SUnit
*SU
) {
311 for (const SDep
&PI
: SU
->Preds
) {
312 unsigned PredReadyCycle
= PI
.getSUnit()->TopReadyCycle
;
313 unsigned MinLatency
= PI
.getLatency();
315 Top
.MaxMinLatency
= std::max(MinLatency
, Top
.MaxMinLatency
);
317 if (SU
->TopReadyCycle
< PredReadyCycle
+ MinLatency
)
318 SU
->TopReadyCycle
= PredReadyCycle
+ MinLatency
;
321 if (!SU
->isScheduled
)
322 Top
.releaseNode(SU
, SU
->TopReadyCycle
);
325 void ConvergingVLIWScheduler::releaseBottomNode(SUnit
*SU
) {
326 assert(SU
->getInstr() && "Scheduled SUnit must have instr");
328 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end(); I
!= E
;
330 unsigned SuccReadyCycle
= I
->getSUnit()->BotReadyCycle
;
331 unsigned MinLatency
= I
->getLatency();
333 Bot
.MaxMinLatency
= std::max(MinLatency
, Bot
.MaxMinLatency
);
335 if (SU
->BotReadyCycle
< SuccReadyCycle
+ MinLatency
)
336 SU
->BotReadyCycle
= SuccReadyCycle
+ MinLatency
;
339 if (!SU
->isScheduled
)
340 Bot
.releaseNode(SU
, SU
->BotReadyCycle
);
343 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
344 delete ResourceModel
;
348 /// Does this SU have a hazard within the current instruction group.
350 /// The scheduler supports two modes of hazard recognition. The first is the
351 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
352 /// supports highly complicated in-order reservation tables
353 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
355 /// The second is a streamlined mechanism that checks for hazards based on
356 /// simple counters that the scheduler itself maintains. It explicitly checks
357 /// for instruction dispatch limitations, including the number of micro-ops that
358 /// can dispatch per cycle.
360 /// TODO: Also check whether the SU must start a new group.
361 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit
*SU
) {
362 if (HazardRec
->isEnabled())
363 return HazardRec
->getHazardType(SU
) != ScheduleHazardRecognizer::NoHazard
;
365 unsigned uops
= SchedModel
->getNumMicroOps(SU
->getInstr());
366 if (IssueCount
+ uops
> SchedModel
->getIssueWidth())
372 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
373 SUnit
*SU
, unsigned ReadyCycle
) {
374 if (ReadyCycle
< MinReadyCycle
)
375 MinReadyCycle
= ReadyCycle
;
377 // Check for interlocks first. For the purpose of other heuristics, an
378 // instruction that cannot issue appears as if it's not in the ReadyQueue.
379 if (ReadyCycle
> CurrCycle
|| checkHazard(SU
))
386 /// Move the boundary of scheduled code by one cycle.
387 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
388 unsigned Width
= SchedModel
->getIssueWidth();
389 IssueCount
= (IssueCount
<= Width
) ? 0 : IssueCount
- Width
;
391 assert(MinReadyCycle
< std::numeric_limits
<unsigned>::max() &&
392 "MinReadyCycle uninitialized");
393 unsigned NextCycle
= std::max(CurrCycle
+ 1, MinReadyCycle
);
395 if (!HazardRec
->isEnabled()) {
396 // Bypass HazardRec virtual calls.
397 CurrCycle
= NextCycle
;
399 // Bypass getHazardType calls in case of long latency.
400 for (; CurrCycle
!= NextCycle
; ++CurrCycle
) {
402 HazardRec
->AdvanceCycle();
404 HazardRec
->RecedeCycle();
409 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available
.getName() << " cycle "
410 << CurrCycle
<< '\n');
413 /// Move the boundary of scheduled code by one SUnit.
414 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit
*SU
) {
415 bool startNewCycle
= false;
417 // Update the reservation table.
418 if (HazardRec
->isEnabled()) {
419 if (!isTop() && SU
->isCall
) {
420 // Calls are scheduled with their preceding instructions. For bottom-up
421 // scheduling, clear the pipeline state before emitting.
424 HazardRec
->EmitInstruction(SU
);
428 startNewCycle
= ResourceModel
->reserveResources(SU
, isTop());
430 // Check the instruction group dispatch limit.
431 // TODO: Check if this SU must end a dispatch group.
432 IssueCount
+= SchedModel
->getNumMicroOps(SU
->getInstr());
434 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle
<< '\n');
437 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount
<< " at cycle "
438 << CurrCycle
<< '\n');
441 /// Release pending ready nodes in to the available queue. This makes them
442 /// visible to heuristics.
443 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
444 // If the available queue is empty, it is safe to reset MinReadyCycle.
445 if (Available
.empty())
446 MinReadyCycle
= std::numeric_limits
<unsigned>::max();
448 // Check to see if any of the pending instructions are ready to issue. If
449 // so, add them to the available queue.
450 for (unsigned i
= 0, e
= Pending
.size(); i
!= e
; ++i
) {
451 SUnit
*SU
= *(Pending
.begin() + i
);
452 unsigned ReadyCycle
= isTop() ? SU
->TopReadyCycle
: SU
->BotReadyCycle
;
454 if (ReadyCycle
< MinReadyCycle
)
455 MinReadyCycle
= ReadyCycle
;
457 if (ReadyCycle
> CurrCycle
)
464 Pending
.remove(Pending
.begin() + i
);
468 CheckPending
= false;
471 /// Remove SU from the ready set for this boundary.
472 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit
*SU
) {
473 if (Available
.isInQueue(SU
))
474 Available
.remove(Available
.find(SU
));
476 assert(Pending
.isInQueue(SU
) && "bad ready count");
477 Pending
.remove(Pending
.find(SU
));
481 /// If this queue only has one ready candidate, return it. As a side effect,
482 /// advance the cycle until at least one node is ready. If multiple instructions
483 /// are ready, return NULL.
484 SUnit
*ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
488 auto AdvanceCycle
= [this]() {
489 if (Available
.empty())
491 if (Available
.size() == 1 && Pending
.size() > 0)
492 return !ResourceModel
->isResourceAvailable(*Available
.begin(), isTop()) ||
493 getWeakLeft(*Available
.begin(), isTop()) != 0;
496 for (unsigned i
= 0; AdvanceCycle(); ++i
) {
497 assert(i
<= (HazardRec
->getMaxLookAhead() + MaxMinLatency
) &&
500 ResourceModel
->reserveResources(nullptr, isTop());
504 if (Available
.size() == 1)
505 return *Available
.begin();
510 void ConvergingVLIWScheduler::traceCandidate(const char *Label
,
511 const ReadyQueue
&Q
, SUnit
*SU
,
512 int Cost
, PressureChange P
) {
513 dbgs() << Label
<< " " << Q
.getName() << " ";
515 dbgs() << DAG
->TRI
->getRegPressureSetName(P
.getPSet()) << ":"
516 << P
.getUnitInc() << " ";
519 dbgs() << "cost(" << Cost
<< ")\t";
523 // Very detailed queue dump, to be used with higher verbosity levels.
524 void ConvergingVLIWScheduler::readyQueueVerboseDump(
525 const RegPressureTracker
&RPTracker
, SchedCandidate
&Candidate
,
527 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
529 dbgs() << ">>> " << Q
.getName() << "\n";
530 for (ReadyQueue::iterator I
= Q
.begin(), E
= Q
.end(); I
!= E
; ++I
) {
531 RegPressureDelta RPDelta
;
532 TempTracker
.getMaxPressureDelta((*I
)->getInstr(), RPDelta
,
533 DAG
->getRegionCriticalPSets(),
534 DAG
->getRegPressure().MaxSetPressure
);
535 std::stringstream dbgstr
;
536 dbgstr
<< "SU(" << std::setw(3) << (*I
)->NodeNum
<< ")";
537 dbgs() << dbgstr
.str();
538 SchedulingCost(Q
, *I
, Candidate
, RPDelta
, true);
540 (*I
)->getInstr()->dump();
546 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
547 /// of SU, return true (we may have duplicates)
548 static inline bool isSingleUnscheduledPred(SUnit
*SU
, SUnit
*SU2
) {
549 if (SU
->NumPredsLeft
== 0)
552 for (auto &Pred
: SU
->Preds
) {
553 // We found an available, but not scheduled, predecessor.
554 if (!Pred
.getSUnit()->isScheduled
&& (Pred
.getSUnit() != SU2
))
561 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
562 /// of SU, return true (we may have duplicates)
563 static inline bool isSingleUnscheduledSucc(SUnit
*SU
, SUnit
*SU2
) {
564 if (SU
->NumSuccsLeft
== 0)
567 for (auto &Succ
: SU
->Succs
) {
568 // We found an available, but not scheduled, successor.
569 if (!Succ
.getSUnit()->isScheduled
&& (Succ
.getSUnit() != SU2
))
575 /// Check if the instruction changes the register pressure of a register in the
576 /// high pressure set. The function returns a negative value if the pressure
577 /// decreases and a positive value is the pressure increases. If the instruction
578 /// doesn't use a high pressure register or doesn't change the register
579 /// pressure, then return 0.
580 int ConvergingVLIWScheduler::pressureChange(const SUnit
*SU
, bool isBotUp
) {
581 PressureDiff
&PD
= DAG
->getPressureDiff(SU
);
582 for (const auto &P
: PD
) {
585 // The pressure differences are computed bottom-up, so the comparison for
586 // an increase is positive in the bottom direction, but negative in the
587 // top-down direction.
588 if (HighPressureSets
[P
.getPSet()])
589 return (isBotUp
? P
.getUnitInc() : -P
.getUnitInc());
594 /// Single point to compute overall scheduling cost.
595 /// TODO: More heuristics will be used soon.
596 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue
&Q
, SUnit
*SU
,
597 SchedCandidate
&Candidate
,
598 RegPressureDelta
&Delta
,
600 // Initial trivial priority.
603 // Do not waste time on a node that is already scheduled.
604 if (!SU
|| SU
->isScheduled
)
607 LLVM_DEBUG(if (verbose
) dbgs()
608 << ((Q
.getID() == TopQID
) ? "(top|" : "(bot|"));
609 // Forced priority is high.
610 if (SU
->isScheduleHigh
) {
611 ResCount
+= PriorityOne
;
612 LLVM_DEBUG(dbgs() << "H|");
615 unsigned IsAvailableAmt
= 0;
616 // Critical path first.
617 if (Q
.getID() == TopQID
) {
618 if (Top
.isLatencyBound(SU
)) {
619 LLVM_DEBUG(if (verbose
) dbgs() << "LB|");
620 ResCount
+= (SU
->getHeight() * ScaleTwo
);
623 LLVM_DEBUG(if (verbose
) {
624 std::stringstream dbgstr
;
625 dbgstr
<< "h" << std::setw(3) << SU
->getHeight() << "|";
626 dbgs() << dbgstr
.str();
629 // If resources are available for it, multiply the
630 // chance of scheduling.
631 if (Top
.ResourceModel
->isResourceAvailable(SU
, true)) {
632 IsAvailableAmt
= (PriorityTwo
+ PriorityThree
);
633 ResCount
+= IsAvailableAmt
;
634 LLVM_DEBUG(if (verbose
) dbgs() << "A|");
636 LLVM_DEBUG(if (verbose
) dbgs() << " |");
638 if (Bot
.isLatencyBound(SU
)) {
639 LLVM_DEBUG(if (verbose
) dbgs() << "LB|");
640 ResCount
+= (SU
->getDepth() * ScaleTwo
);
643 LLVM_DEBUG(if (verbose
) {
644 std::stringstream dbgstr
;
645 dbgstr
<< "d" << std::setw(3) << SU
->getDepth() << "|";
646 dbgs() << dbgstr
.str();
649 // If resources are available for it, multiply the
650 // chance of scheduling.
651 if (Bot
.ResourceModel
->isResourceAvailable(SU
, false)) {
652 IsAvailableAmt
= (PriorityTwo
+ PriorityThree
);
653 ResCount
+= IsAvailableAmt
;
654 LLVM_DEBUG(if (verbose
) dbgs() << "A|");
656 LLVM_DEBUG(if (verbose
) dbgs() << " |");
659 unsigned NumNodesBlocking
= 0;
660 if (Q
.getID() == TopQID
) {
661 // How many SUs does it block from scheduling?
662 // Look at all of the successors of this node.
663 // Count the number of nodes that
664 // this node is the sole unscheduled node for.
665 if (Top
.isLatencyBound(SU
))
666 for (const SDep
&SI
: SU
->Succs
)
667 if (isSingleUnscheduledPred(SI
.getSUnit(), SU
))
670 // How many unscheduled predecessors block this node?
671 if (Bot
.isLatencyBound(SU
))
672 for (const SDep
&PI
: SU
->Preds
)
673 if (isSingleUnscheduledSucc(PI
.getSUnit(), SU
))
676 ResCount
+= (NumNodesBlocking
* ScaleTwo
);
678 LLVM_DEBUG(if (verbose
) {
679 std::stringstream dbgstr
;
680 dbgstr
<< "blk " << std::setw(2) << NumNodesBlocking
<< ")|";
681 dbgs() << dbgstr
.str();
684 // Factor in reg pressure as a heuristic.
685 if (!IgnoreBBRegPressure
) {
686 // Decrease priority by the amount that register pressure exceeds the limit.
687 ResCount
-= (Delta
.Excess
.getUnitInc() * PriorityOne
);
688 // Decrease priority if register pressure exceeds the limit.
689 ResCount
-= (Delta
.CriticalMax
.getUnitInc() * PriorityOne
);
690 // Decrease priority slightly if register pressure would increase over the
692 ResCount
-= (Delta
.CurrentMax
.getUnitInc() * PriorityTwo
);
693 // If there are register pressure issues, then we remove the value added for
694 // the instruction being available. The rationale is that we really don't
695 // want to schedule an instruction that causes a spill.
696 if (IsAvailableAmt
&& pressureChange(SU
, Q
.getID() != TopQID
) > 0 &&
697 (Delta
.Excess
.getUnitInc() || Delta
.CriticalMax
.getUnitInc() ||
698 Delta
.CurrentMax
.getUnitInc()))
699 ResCount
-= IsAvailableAmt
;
700 LLVM_DEBUG(if (verbose
) {
701 dbgs() << "RP " << Delta
.Excess
.getUnitInc() << "/"
702 << Delta
.CriticalMax
.getUnitInc() << "/"
703 << Delta
.CurrentMax
.getUnitInc() << ")|";
707 // Give preference to a zero latency instruction if the dependent
708 // instruction is in the current packet.
709 if (Q
.getID() == TopQID
&& getWeakLeft(SU
, true) == 0) {
710 for (const SDep
&PI
: SU
->Preds
) {
711 if (!PI
.getSUnit()->getInstr()->isPseudo() && PI
.isAssignedRegDep() &&
712 PI
.getLatency() == 0 &&
713 Top
.ResourceModel
->isInPacket(PI
.getSUnit())) {
714 ResCount
+= PriorityThree
;
715 LLVM_DEBUG(if (verbose
) dbgs() << "Z|");
718 } else if (Q
.getID() == BotQID
&& getWeakLeft(SU
, false) == 0) {
719 for (const SDep
&SI
: SU
->Succs
) {
720 if (!SI
.getSUnit()->getInstr()->isPseudo() && SI
.isAssignedRegDep() &&
721 SI
.getLatency() == 0 &&
722 Bot
.ResourceModel
->isInPacket(SI
.getSUnit())) {
723 ResCount
+= PriorityThree
;
724 LLVM_DEBUG(if (verbose
) dbgs() << "Z|");
729 // If the instruction has a non-zero latency dependence with an instruction in
730 // the current packet, then it should not be scheduled yet. The case occurs
731 // when the dependent instruction is scheduled in a new packet, so the
732 // scheduler updates the current cycle and pending instructions become
734 if (CheckEarlyAvail
) {
735 if (Q
.getID() == TopQID
) {
736 for (const auto &PI
: SU
->Preds
) {
737 if (PI
.getLatency() > 0 &&
738 Top
.ResourceModel
->isInPacket(PI
.getSUnit())) {
739 ResCount
-= PriorityOne
;
740 LLVM_DEBUG(if (verbose
) dbgs() << "D|");
744 for (const auto &SI
: SU
->Succs
) {
745 if (SI
.getLatency() > 0 &&
746 Bot
.ResourceModel
->isInPacket(SI
.getSUnit())) {
747 ResCount
-= PriorityOne
;
748 LLVM_DEBUG(if (verbose
) dbgs() << "D|");
754 LLVM_DEBUG(if (verbose
) {
755 std::stringstream dbgstr
;
756 dbgstr
<< "Total " << std::setw(4) << ResCount
<< ")";
757 dbgs() << dbgstr
.str();
763 /// Pick the best candidate from the top queue.
765 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
766 /// DAG building. To adjust for the current scheduling location we need to
767 /// maintain the number of vreg uses remaining to be top-scheduled.
768 ConvergingVLIWScheduler::CandResult
769 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary
&Zone
,
770 const RegPressureTracker
&RPTracker
,
771 SchedCandidate
&Candidate
) {
772 ReadyQueue
&Q
= Zone
.Available
;
773 LLVM_DEBUG(if (SchedDebugVerboseLevel
> 1)
774 readyQueueVerboseDump(RPTracker
, Candidate
, Q
);
777 // getMaxPressureDelta temporarily modifies the tracker.
778 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
780 // BestSU remains NULL if no top candidates beat the best existing candidate.
781 CandResult FoundCandidate
= NoCand
;
782 for (ReadyQueue::iterator I
= Q
.begin(), E
= Q
.end(); I
!= E
; ++I
) {
783 RegPressureDelta RPDelta
;
784 TempTracker
.getMaxPressureDelta((*I
)->getInstr(), RPDelta
,
785 DAG
->getRegionCriticalPSets(),
786 DAG
->getRegPressure().MaxSetPressure
);
788 int CurrentCost
= SchedulingCost(Q
, *I
, Candidate
, RPDelta
, false);
790 // Initialize the candidate if needed.
792 LLVM_DEBUG(traceCandidate("DCAND", Q
, *I
, CurrentCost
));
794 Candidate
.RPDelta
= RPDelta
;
795 Candidate
.SCost
= CurrentCost
;
796 FoundCandidate
= NodeOrder
;
800 // Choose node order for negative cost candidates. There is no good
801 // candidate in this case.
802 if (CurrentCost
< 0 && Candidate
.SCost
< 0) {
803 if ((Q
.getID() == TopQID
&& (*I
)->NodeNum
< Candidate
.SU
->NodeNum
) ||
804 (Q
.getID() == BotQID
&& (*I
)->NodeNum
> Candidate
.SU
->NodeNum
)) {
805 LLVM_DEBUG(traceCandidate("NCAND", Q
, *I
, CurrentCost
));
807 Candidate
.RPDelta
= RPDelta
;
808 Candidate
.SCost
= CurrentCost
;
809 FoundCandidate
= NodeOrder
;
815 if (CurrentCost
> Candidate
.SCost
) {
816 LLVM_DEBUG(traceCandidate("CCAND", Q
, *I
, CurrentCost
));
818 Candidate
.RPDelta
= RPDelta
;
819 Candidate
.SCost
= CurrentCost
;
820 FoundCandidate
= BestCost
;
824 // Choose an instruction that does not depend on an artificial edge.
825 unsigned CurrWeak
= getWeakLeft(*I
, (Q
.getID() == TopQID
));
826 unsigned CandWeak
= getWeakLeft(Candidate
.SU
, (Q
.getID() == TopQID
));
827 if (CurrWeak
!= CandWeak
) {
828 if (CurrWeak
< CandWeak
) {
829 LLVM_DEBUG(traceCandidate("WCAND", Q
, *I
, CurrentCost
));
831 Candidate
.RPDelta
= RPDelta
;
832 Candidate
.SCost
= CurrentCost
;
833 FoundCandidate
= Weak
;
838 if (CurrentCost
== Candidate
.SCost
&& Zone
.isLatencyBound(*I
)) {
839 unsigned CurrSize
, CandSize
;
840 if (Q
.getID() == TopQID
) {
841 CurrSize
= (*I
)->Succs
.size();
842 CandSize
= Candidate
.SU
->Succs
.size();
844 CurrSize
= (*I
)->Preds
.size();
845 CandSize
= Candidate
.SU
->Preds
.size();
847 if (CurrSize
> CandSize
) {
848 LLVM_DEBUG(traceCandidate("SPCAND", Q
, *I
, CurrentCost
));
850 Candidate
.RPDelta
= RPDelta
;
851 Candidate
.SCost
= CurrentCost
;
852 FoundCandidate
= BestCost
;
854 // Keep the old candidate if it's a better candidate. That is, don't use
855 // the subsequent tie breaker.
856 if (CurrSize
!= CandSize
)
861 // To avoid scheduling indeterminism, we need a tie breaker
862 // for the case when cost is identical for two nodes.
863 if (UseNewerCandidate
&& CurrentCost
== Candidate
.SCost
) {
864 if ((Q
.getID() == TopQID
&& (*I
)->NodeNum
< Candidate
.SU
->NodeNum
) ||
865 (Q
.getID() == BotQID
&& (*I
)->NodeNum
> Candidate
.SU
->NodeNum
)) {
866 LLVM_DEBUG(traceCandidate("TCAND", Q
, *I
, CurrentCost
));
868 Candidate
.RPDelta
= RPDelta
;
869 Candidate
.SCost
= CurrentCost
;
870 FoundCandidate
= NodeOrder
;
875 // Fall through to original instruction order.
876 // Only consider node order if Candidate was chosen from this Q.
877 if (FoundCandidate
== NoCand
)
880 return FoundCandidate
;
883 /// Pick the best candidate node from either the top or bottom queue.
884 SUnit
*ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode
) {
885 // Schedule as far as possible in the direction of no choice. This is most
886 // efficient, but also provides the best heuristics for CriticalPSets.
887 if (SUnit
*SU
= Bot
.pickOnlyChoice()) {
888 LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
892 if (SUnit
*SU
= Top
.pickOnlyChoice()) {
893 LLVM_DEBUG(dbgs() << "Picked only Top\n");
897 SchedCandidate BotCand
;
898 // Prefer bottom scheduling when heuristics are silent.
899 CandResult BotResult
=
900 pickNodeFromQueue(Bot
, DAG
->getBotRPTracker(), BotCand
);
901 assert(BotResult
!= NoCand
&& "failed to find the first candidate");
903 // If either Q has a single candidate that provides the least increase in
904 // Excess pressure, we can immediately schedule from that Q.
906 // RegionCriticalPSets summarizes the pressure within the scheduled region and
907 // affects picking from either Q. If scheduling in one direction must
908 // increase pressure for one of the excess PSets, then schedule in that
909 // direction first to provide more freedom in the other direction.
910 if (BotResult
== SingleExcess
|| BotResult
== SingleCritical
) {
911 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
915 // Check if the top Q has a better candidate.
916 SchedCandidate TopCand
;
917 CandResult TopResult
=
918 pickNodeFromQueue(Top
, DAG
->getTopRPTracker(), TopCand
);
919 assert(TopResult
!= NoCand
&& "failed to find the first candidate");
921 if (TopResult
== SingleExcess
|| TopResult
== SingleCritical
) {
922 LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
926 // If either Q has a single candidate that minimizes pressure above the
927 // original region's pressure pick it.
928 if (BotResult
== SingleMax
) {
929 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
933 if (TopResult
== SingleMax
) {
934 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
938 if (TopCand
.SCost
> BotCand
.SCost
) {
939 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
943 // Otherwise prefer the bottom candidate in node order.
944 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
949 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
950 SUnit
*ConvergingVLIWScheduler::pickNode(bool &IsTopNode
) {
951 if (DAG
->top() == DAG
->bottom()) {
952 assert(Top
.Available
.empty() && Top
.Pending
.empty() &&
953 Bot
.Available
.empty() && Bot
.Pending
.empty() && "ReadyQ garbage");
958 SU
= Top
.pickOnlyChoice();
960 SchedCandidate TopCand
;
961 CandResult TopResult
=
962 pickNodeFromQueue(Top
, DAG
->getTopRPTracker(), TopCand
);
963 assert(TopResult
!= NoCand
&& "failed to find the first candidate");
968 } else if (ForceBottomUp
) {
969 SU
= Bot
.pickOnlyChoice();
971 SchedCandidate BotCand
;
972 CandResult BotResult
=
973 pickNodeFromQueue(Bot
, DAG
->getBotRPTracker(), BotCand
);
974 assert(BotResult
!= NoCand
&& "failed to find the first candidate");
980 SU
= pickNodeBidrectional(IsTopNode
);
982 if (SU
->isTopReady())
984 if (SU
->isBottomReady())
987 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode
? "Top" : "Bottom")
988 << " Scheduling instruction in cycle "
989 << (IsTopNode
? Top
.CurrCycle
: Bot
.CurrCycle
) << " ("
990 << reportPackets() << ")\n";
995 /// Update the scheduler's state after scheduling a node. This is the same node
996 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
997 /// to update it's state based on the current cycle before MachineSchedStrategy
999 void ConvergingVLIWScheduler::schedNode(SUnit
*SU
, bool IsTopNode
) {
1002 SU
->TopReadyCycle
= Top
.CurrCycle
;
1005 SU
->BotReadyCycle
= Bot
.CurrCycle
;