1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/VLIWMachineScheduler.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/CodeGen/DFAPacketizer.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/RegisterClassInfo.h"
22 #include "llvm/CodeGen/RegisterPressure.h"
23 #include "llvm/CodeGen/ScheduleDAG.h"
24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSchedule.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
43 #define DEBUG_TYPE "machine-scheduler"
45 static cl::opt
<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden
,
46 cl::ZeroOrMore
, cl::init(false));
48 static cl::opt
<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden
,
49 cl::ZeroOrMore
, cl::init(true));
51 static cl::opt
<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
52 cl::Hidden
, cl::ZeroOrMore
,
55 // Check if the scheduler should penalize instructions that are available to
56 // early due to a zero-latency dependence.
57 static cl::opt
<bool> CheckEarlyAvail("check-early-avail", cl::Hidden
,
58 cl::ZeroOrMore
, cl::init(true));
60 // This value is used to determine if a register class is a high pressure set.
61 // We compute the maximum number of registers needed and divided by the total
62 // available. Then, we compare the result to this value.
63 static cl::opt
<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden
,
65 cl::desc("High register pressure threhold."));
67 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo
&STI
,
68 const TargetSchedModel
*SM
)
69 : TII(STI
.getInstrInfo()), SchedModel(SM
) {
70 ResourcesModel
= createPacketizer(STI
);
72 // This hard requirement could be relaxed,
73 // but for now do not let it proceed.
74 assert(ResourcesModel
&& "Unimplemented CreateTargetScheduleState.");
76 Packet
.reserve(SchedModel
->getIssueWidth());
78 ResourcesModel
->clearResources();
81 void VLIWResourceModel::reset() {
83 ResourcesModel
->clearResources();
86 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel
; }
88 /// Return true if there is a dependence between SUd and SUu.
89 bool VLIWResourceModel::hasDependence(const SUnit
*SUd
, const SUnit
*SUu
) {
90 if (SUd
->Succs
.size() == 0)
93 for (const auto &S
: SUd
->Succs
) {
94 // Since we do not add pseudos to packets, might as well
95 // ignore order dependencies.
99 if (S
.getSUnit() == SUu
&& S
.getLatency() > 0)
105 /// Check if scheduling of this SU is possible
106 /// in the current packet.
107 /// It is _not_ precise (statefull), it is more like
108 /// another heuristic. Many corner cases are figured
110 bool VLIWResourceModel::isResourceAvailable(SUnit
*SU
, bool IsTop
) {
111 if (!SU
|| !SU
->getInstr())
114 // First see if the pipeline could receive this instruction
115 // in the current cycle.
116 switch (SU
->getInstr()->getOpcode()) {
118 if (!ResourcesModel
->canReserveResources(*SU
->getInstr()))
121 case TargetOpcode::EXTRACT_SUBREG
:
122 case TargetOpcode::INSERT_SUBREG
:
123 case TargetOpcode::SUBREG_TO_REG
:
124 case TargetOpcode::REG_SEQUENCE
:
125 case TargetOpcode::IMPLICIT_DEF
:
126 case TargetOpcode::COPY
:
127 case TargetOpcode::INLINEASM
:
128 case TargetOpcode::INLINEASM_BR
:
132 // Now see if there are no other dependencies to instructions already
135 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
)
136 if (hasDependence(Packet
[i
], SU
))
139 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
)
140 if (hasDependence(SU
, Packet
[i
]))
146 /// Keep track of available resources.
147 bool VLIWResourceModel::reserveResources(SUnit
*SU
, bool IsTop
) {
148 bool startNewCycle
= false;
149 // Artificially reset state.
155 // If this SU does not fit in the packet or the packet is now full
157 if (!isResourceAvailable(SU
, IsTop
) ||
158 Packet
.size() >= SchedModel
->getIssueWidth()) {
161 startNewCycle
= true;
164 switch (SU
->getInstr()->getOpcode()) {
166 ResourcesModel
->reserveResources(*SU
->getInstr());
168 case TargetOpcode::EXTRACT_SUBREG
:
169 case TargetOpcode::INSERT_SUBREG
:
170 case TargetOpcode::SUBREG_TO_REG
:
171 case TargetOpcode::REG_SEQUENCE
:
172 case TargetOpcode::IMPLICIT_DEF
:
173 case TargetOpcode::KILL
:
174 case TargetOpcode::CFI_INSTRUCTION
:
175 case TargetOpcode::EH_LABEL
:
176 case TargetOpcode::COPY
:
177 case TargetOpcode::INLINEASM
:
178 case TargetOpcode::INLINEASM_BR
:
181 Packet
.push_back(SU
);
184 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets
<< "]:\n");
185 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
) {
186 LLVM_DEBUG(dbgs() << "\t[" << i
<< "] SU(");
187 LLVM_DEBUG(dbgs() << Packet
[i
]->NodeNum
<< ")\t");
188 LLVM_DEBUG(Packet
[i
]->getInstr()->dump());
192 return startNewCycle
;
196 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo
&STI
) const {
197 return STI
.getInstrInfo()->CreateTargetScheduleState(STI
);
200 /// schedule - Called back from MachineScheduler::runOnMachineFunction
201 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
202 /// only includes instructions that have DAG nodes, not scheduling boundaries.
203 void VLIWMachineScheduler::schedule() {
204 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
205 << printMBBReference(*BB
) << " " << BB
->getName()
206 << " in_func " << BB
->getParent()->getName()
207 << " at loop depth " << MLI
->getLoopDepth(BB
) << " \n");
209 buildDAGWithRegPressure();
211 Topo
.InitDAGTopologicalSorting();
213 // Postprocess the DAG to add platform-specific artificial dependencies.
216 SmallVector
<SUnit
*, 8> TopRoots
, BotRoots
;
217 findRootsAndBiasEdges(TopRoots
, BotRoots
);
219 // Initialize the strategy before modifying the DAG.
220 SchedImpl
->initialize(this);
224 for (const SUnit
&SU
: SUnits
)
225 if (SU
.getHeight() > maxH
)
226 maxH
= SU
.getHeight();
227 dbgs() << "Max Height " << maxH
<< "\n";
231 for (const SUnit
&SU
: SUnits
)
232 if (SU
.getDepth() > maxD
)
233 maxD
= SU
.getDepth();
234 dbgs() << "Max Depth " << maxD
<< "\n";
240 initQueues(TopRoots
, BotRoots
);
242 bool IsTopNode
= false;
245 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
246 SUnit
*SU
= SchedImpl
->pickNode(IsTopNode
);
250 if (!checkSchedLimit())
253 scheduleMI(SU
, IsTopNode
);
255 // Notify the scheduling strategy after updating the DAG.
256 SchedImpl
->schedNode(SU
, IsTopNode
);
258 updateQueues(SU
, IsTopNode
);
260 assert(CurrentTop
== CurrentBottom
&& "Nonempty unscheduled zone.");
265 dbgs() << "*** Final schedule for "
266 << printMBBReference(*begin()->getParent()) << " ***\n";
272 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI
*dag
) {
273 DAG
= static_cast<VLIWMachineScheduler
*>(dag
);
274 SchedModel
= DAG
->getSchedModel();
276 Top
.init(DAG
, SchedModel
);
277 Bot
.init(DAG
, SchedModel
);
279 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
280 // are disabled, then these HazardRecs will be disabled.
281 const InstrItineraryData
*Itin
= DAG
->getSchedModel()->getInstrItineraries();
282 const TargetSubtargetInfo
&STI
= DAG
->MF
.getSubtarget();
283 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
284 delete Top
.HazardRec
;
285 delete Bot
.HazardRec
;
286 Top
.HazardRec
= TII
->CreateTargetMIHazardRecognizer(Itin
, DAG
);
287 Bot
.HazardRec
= TII
->CreateTargetMIHazardRecognizer(Itin
, DAG
);
289 delete Top
.ResourceModel
;
290 delete Bot
.ResourceModel
;
291 Top
.ResourceModel
= createVLIWResourceModel(STI
, DAG
->getSchedModel());
292 Bot
.ResourceModel
= createVLIWResourceModel(STI
, DAG
->getSchedModel());
294 const std::vector
<unsigned> &MaxPressure
=
295 DAG
->getRegPressure().MaxSetPressure
;
296 HighPressureSets
.assign(MaxPressure
.size(), false);
297 for (unsigned i
= 0, e
= MaxPressure
.size(); i
< e
; ++i
) {
298 unsigned Limit
= DAG
->getRegClassInfo()->getRegPressureSetLimit(i
);
299 HighPressureSets
[i
] =
300 ((float)MaxPressure
[i
] > ((float)Limit
* RPThreshold
));
303 assert((!ForceTopDown
|| !ForceBottomUp
) &&
304 "-misched-topdown incompatible with -misched-bottomup");
307 VLIWResourceModel
*ConvergingVLIWScheduler::createVLIWResourceModel(
308 const TargetSubtargetInfo
&STI
, const TargetSchedModel
*SchedModel
) const {
309 return new VLIWResourceModel(STI
, SchedModel
);
312 void ConvergingVLIWScheduler::releaseTopNode(SUnit
*SU
) {
313 for (const SDep
&PI
: SU
->Preds
) {
314 unsigned PredReadyCycle
= PI
.getSUnit()->TopReadyCycle
;
315 unsigned MinLatency
= PI
.getLatency();
317 Top
.MaxMinLatency
= std::max(MinLatency
, Top
.MaxMinLatency
);
319 if (SU
->TopReadyCycle
< PredReadyCycle
+ MinLatency
)
320 SU
->TopReadyCycle
= PredReadyCycle
+ MinLatency
;
323 if (!SU
->isScheduled
)
324 Top
.releaseNode(SU
, SU
->TopReadyCycle
);
327 void ConvergingVLIWScheduler::releaseBottomNode(SUnit
*SU
) {
328 assert(SU
->getInstr() && "Scheduled SUnit must have instr");
330 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end(); I
!= E
;
332 unsigned SuccReadyCycle
= I
->getSUnit()->BotReadyCycle
;
333 unsigned MinLatency
= I
->getLatency();
335 Bot
.MaxMinLatency
= std::max(MinLatency
, Bot
.MaxMinLatency
);
337 if (SU
->BotReadyCycle
< SuccReadyCycle
+ MinLatency
)
338 SU
->BotReadyCycle
= SuccReadyCycle
+ MinLatency
;
341 if (!SU
->isScheduled
)
342 Bot
.releaseNode(SU
, SU
->BotReadyCycle
);
345 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
346 delete ResourceModel
;
350 /// Does this SU have a hazard within the current instruction group.
352 /// The scheduler supports two modes of hazard recognition. The first is the
353 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
354 /// supports highly complicated in-order reservation tables
355 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
357 /// The second is a streamlined mechanism that checks for hazards based on
358 /// simple counters that the scheduler itself maintains. It explicitly checks
359 /// for instruction dispatch limitations, including the number of micro-ops that
360 /// can dispatch per cycle.
362 /// TODO: Also check whether the SU must start a new group.
363 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit
*SU
) {
364 if (HazardRec
->isEnabled())
365 return HazardRec
->getHazardType(SU
) != ScheduleHazardRecognizer::NoHazard
;
367 unsigned uops
= SchedModel
->getNumMicroOps(SU
->getInstr());
368 if (IssueCount
+ uops
> SchedModel
->getIssueWidth())
374 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
375 SUnit
*SU
, unsigned ReadyCycle
) {
376 if (ReadyCycle
< MinReadyCycle
)
377 MinReadyCycle
= ReadyCycle
;
379 // Check for interlocks first. For the purpose of other heuristics, an
380 // instruction that cannot issue appears as if it's not in the ReadyQueue.
381 if (ReadyCycle
> CurrCycle
|| checkHazard(SU
))
388 /// Move the boundary of scheduled code by one cycle.
389 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
390 unsigned Width
= SchedModel
->getIssueWidth();
391 IssueCount
= (IssueCount
<= Width
) ? 0 : IssueCount
- Width
;
393 assert(MinReadyCycle
< std::numeric_limits
<unsigned>::max() &&
394 "MinReadyCycle uninitialized");
395 unsigned NextCycle
= std::max(CurrCycle
+ 1, MinReadyCycle
);
397 if (!HazardRec
->isEnabled()) {
398 // Bypass HazardRec virtual calls.
399 CurrCycle
= NextCycle
;
401 // Bypass getHazardType calls in case of long latency.
402 for (; CurrCycle
!= NextCycle
; ++CurrCycle
) {
404 HazardRec
->AdvanceCycle();
406 HazardRec
->RecedeCycle();
411 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available
.getName() << " cycle "
412 << CurrCycle
<< '\n');
415 /// Move the boundary of scheduled code by one SUnit.
416 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit
*SU
) {
417 bool startNewCycle
= false;
419 // Update the reservation table.
420 if (HazardRec
->isEnabled()) {
421 if (!isTop() && SU
->isCall
) {
422 // Calls are scheduled with their preceding instructions. For bottom-up
423 // scheduling, clear the pipeline state before emitting.
426 HazardRec
->EmitInstruction(SU
);
430 startNewCycle
= ResourceModel
->reserveResources(SU
, isTop());
432 // Check the instruction group dispatch limit.
433 // TODO: Check if this SU must end a dispatch group.
434 IssueCount
+= SchedModel
->getNumMicroOps(SU
->getInstr());
436 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle
<< '\n');
439 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount
<< " at cycle "
440 << CurrCycle
<< '\n');
443 /// Release pending ready nodes in to the available queue. This makes them
444 /// visible to heuristics.
445 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
446 // If the available queue is empty, it is safe to reset MinReadyCycle.
447 if (Available
.empty())
448 MinReadyCycle
= std::numeric_limits
<unsigned>::max();
450 // Check to see if any of the pending instructions are ready to issue. If
451 // so, add them to the available queue.
452 for (unsigned i
= 0, e
= Pending
.size(); i
!= e
; ++i
) {
453 SUnit
*SU
= *(Pending
.begin() + i
);
454 unsigned ReadyCycle
= isTop() ? SU
->TopReadyCycle
: SU
->BotReadyCycle
;
456 if (ReadyCycle
< MinReadyCycle
)
457 MinReadyCycle
= ReadyCycle
;
459 if (ReadyCycle
> CurrCycle
)
466 Pending
.remove(Pending
.begin() + i
);
470 CheckPending
= false;
473 /// Remove SU from the ready set for this boundary.
474 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit
*SU
) {
475 if (Available
.isInQueue(SU
))
476 Available
.remove(Available
.find(SU
));
478 assert(Pending
.isInQueue(SU
) && "bad ready count");
479 Pending
.remove(Pending
.find(SU
));
483 /// If this queue only has one ready candidate, return it. As a side effect,
484 /// advance the cycle until at least one node is ready. If multiple instructions
485 /// are ready, return NULL.
486 SUnit
*ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
490 auto AdvanceCycle
= [this]() {
491 if (Available
.empty())
493 if (Available
.size() == 1 && Pending
.size() > 0)
494 return !ResourceModel
->isResourceAvailable(*Available
.begin(), isTop()) ||
495 getWeakLeft(*Available
.begin(), isTop()) != 0;
498 for (unsigned i
= 0; AdvanceCycle(); ++i
) {
499 assert(i
<= (HazardRec
->getMaxLookAhead() + MaxMinLatency
) &&
502 ResourceModel
->reserveResources(nullptr, isTop());
506 if (Available
.size() == 1)
507 return *Available
.begin();
512 void ConvergingVLIWScheduler::traceCandidate(const char *Label
,
513 const ReadyQueue
&Q
, SUnit
*SU
,
514 int Cost
, PressureChange P
) {
515 dbgs() << Label
<< " " << Q
.getName() << " ";
517 dbgs() << DAG
->TRI
->getRegPressureSetName(P
.getPSet()) << ":"
518 << P
.getUnitInc() << " ";
521 dbgs() << "cost(" << Cost
<< ")\t";
525 // Very detailed queue dump, to be used with higher verbosity levels.
526 void ConvergingVLIWScheduler::readyQueueVerboseDump(
527 const RegPressureTracker
&RPTracker
, SchedCandidate
&Candidate
,
529 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
531 dbgs() << ">>> " << Q
.getName() << "\n";
532 for (ReadyQueue::iterator I
= Q
.begin(), E
= Q
.end(); I
!= E
; ++I
) {
533 RegPressureDelta RPDelta
;
534 TempTracker
.getMaxPressureDelta((*I
)->getInstr(), RPDelta
,
535 DAG
->getRegionCriticalPSets(),
536 DAG
->getRegPressure().MaxSetPressure
);
537 std::stringstream dbgstr
;
538 dbgstr
<< "SU(" << std::setw(3) << (*I
)->NodeNum
<< ")";
539 dbgs() << dbgstr
.str();
540 SchedulingCost(Q
, *I
, Candidate
, RPDelta
, true);
542 (*I
)->getInstr()->dump();
548 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
549 /// of SU, return true (we may have duplicates)
550 static inline bool isSingleUnscheduledPred(SUnit
*SU
, SUnit
*SU2
) {
551 if (SU
->NumPredsLeft
== 0)
554 for (auto &Pred
: SU
->Preds
) {
555 // We found an available, but not scheduled, predecessor.
556 if (!Pred
.getSUnit()->isScheduled
&& (Pred
.getSUnit() != SU2
))
563 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
564 /// of SU, return true (we may have duplicates)
565 static inline bool isSingleUnscheduledSucc(SUnit
*SU
, SUnit
*SU2
) {
566 if (SU
->NumSuccsLeft
== 0)
569 for (auto &Succ
: SU
->Succs
) {
570 // We found an available, but not scheduled, successor.
571 if (!Succ
.getSUnit()->isScheduled
&& (Succ
.getSUnit() != SU2
))
577 /// Check if the instruction changes the register pressure of a register in the
578 /// high pressure set. The function returns a negative value if the pressure
579 /// decreases and a positive value is the pressure increases. If the instruction
580 /// doesn't use a high pressure register or doesn't change the register
581 /// pressure, then return 0.
582 int ConvergingVLIWScheduler::pressureChange(const SUnit
*SU
, bool isBotUp
) {
583 PressureDiff
&PD
= DAG
->getPressureDiff(SU
);
587 // The pressure differences are computed bottom-up, so the comparision for
588 // an increase is positive in the bottom direction, but negative in the
589 // top-down direction.
590 if (HighPressureSets
[P
.getPSet()])
591 return (isBotUp
? P
.getUnitInc() : -P
.getUnitInc());
596 /// Single point to compute overall scheduling cost.
597 /// TODO: More heuristics will be used soon.
598 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue
&Q
, SUnit
*SU
,
599 SchedCandidate
&Candidate
,
600 RegPressureDelta
&Delta
,
602 // Initial trivial priority.
605 // Do not waste time on a node that is already scheduled.
606 if (!SU
|| SU
->isScheduled
)
609 LLVM_DEBUG(if (verbose
) dbgs()
610 << ((Q
.getID() == TopQID
) ? "(top|" : "(bot|"));
611 // Forced priority is high.
612 if (SU
->isScheduleHigh
) {
613 ResCount
+= PriorityOne
;
614 LLVM_DEBUG(dbgs() << "H|");
617 unsigned IsAvailableAmt
= 0;
618 // Critical path first.
619 if (Q
.getID() == TopQID
) {
620 if (Top
.isLatencyBound(SU
)) {
621 LLVM_DEBUG(if (verbose
) dbgs() << "LB|");
622 ResCount
+= (SU
->getHeight() * ScaleTwo
);
625 LLVM_DEBUG(if (verbose
) {
626 std::stringstream dbgstr
;
627 dbgstr
<< "h" << std::setw(3) << SU
->getHeight() << "|";
628 dbgs() << dbgstr
.str();
631 // If resources are available for it, multiply the
632 // chance of scheduling.
633 if (Top
.ResourceModel
->isResourceAvailable(SU
, true)) {
634 IsAvailableAmt
= (PriorityTwo
+ PriorityThree
);
635 ResCount
+= IsAvailableAmt
;
636 LLVM_DEBUG(if (verbose
) dbgs() << "A|");
638 LLVM_DEBUG(if (verbose
) dbgs() << " |");
640 if (Bot
.isLatencyBound(SU
)) {
641 LLVM_DEBUG(if (verbose
) dbgs() << "LB|");
642 ResCount
+= (SU
->getDepth() * ScaleTwo
);
645 LLVM_DEBUG(if (verbose
) {
646 std::stringstream dbgstr
;
647 dbgstr
<< "d" << std::setw(3) << SU
->getDepth() << "|";
648 dbgs() << dbgstr
.str();
651 // If resources are available for it, multiply the
652 // chance of scheduling.
653 if (Bot
.ResourceModel
->isResourceAvailable(SU
, false)) {
654 IsAvailableAmt
= (PriorityTwo
+ PriorityThree
);
655 ResCount
+= IsAvailableAmt
;
656 LLVM_DEBUG(if (verbose
) dbgs() << "A|");
658 LLVM_DEBUG(if (verbose
) dbgs() << " |");
661 unsigned NumNodesBlocking
= 0;
662 if (Q
.getID() == TopQID
) {
663 // How many SUs does it block from scheduling?
664 // Look at all of the successors of this node.
665 // Count the number of nodes that
666 // this node is the sole unscheduled node for.
667 if (Top
.isLatencyBound(SU
))
668 for (const SDep
&SI
: SU
->Succs
)
669 if (isSingleUnscheduledPred(SI
.getSUnit(), SU
))
672 // How many unscheduled predecessors block this node?
673 if (Bot
.isLatencyBound(SU
))
674 for (const SDep
&PI
: SU
->Preds
)
675 if (isSingleUnscheduledSucc(PI
.getSUnit(), SU
))
678 ResCount
+= (NumNodesBlocking
* ScaleTwo
);
680 LLVM_DEBUG(if (verbose
) {
681 std::stringstream dbgstr
;
682 dbgstr
<< "blk " << std::setw(2) << NumNodesBlocking
<< ")|";
683 dbgs() << dbgstr
.str();
686 // Factor in reg pressure as a heuristic.
687 if (!IgnoreBBRegPressure
) {
688 // Decrease priority by the amount that register pressure exceeds the limit.
689 ResCount
-= (Delta
.Excess
.getUnitInc() * PriorityOne
);
690 // Decrease priority if register pressure exceeds the limit.
691 ResCount
-= (Delta
.CriticalMax
.getUnitInc() * PriorityOne
);
692 // Decrease priority slightly if register pressure would increase over the
694 ResCount
-= (Delta
.CurrentMax
.getUnitInc() * PriorityTwo
);
695 // If there are register pressure issues, then we remove the value added for
696 // the instruction being available. The rationale is that we really don't
697 // want to schedule an instruction that causes a spill.
698 if (IsAvailableAmt
&& pressureChange(SU
, Q
.getID() != TopQID
) > 0 &&
699 (Delta
.Excess
.getUnitInc() || Delta
.CriticalMax
.getUnitInc() ||
700 Delta
.CurrentMax
.getUnitInc()))
701 ResCount
-= IsAvailableAmt
;
702 LLVM_DEBUG(if (verbose
) {
703 dbgs() << "RP " << Delta
.Excess
.getUnitInc() << "/"
704 << Delta
.CriticalMax
.getUnitInc() << "/"
705 << Delta
.CurrentMax
.getUnitInc() << ")|";
709 // Give preference to a zero latency instruction if the dependent
710 // instruction is in the current packet.
711 if (Q
.getID() == TopQID
&& getWeakLeft(SU
, true) == 0) {
712 for (const SDep
&PI
: SU
->Preds
) {
713 if (!PI
.getSUnit()->getInstr()->isPseudo() && PI
.isAssignedRegDep() &&
714 PI
.getLatency() == 0 &&
715 Top
.ResourceModel
->isInPacket(PI
.getSUnit())) {
716 ResCount
+= PriorityThree
;
717 LLVM_DEBUG(if (verbose
) dbgs() << "Z|");
720 } else if (Q
.getID() == BotQID
&& getWeakLeft(SU
, false) == 0) {
721 for (const SDep
&SI
: SU
->Succs
) {
722 if (!SI
.getSUnit()->getInstr()->isPseudo() && SI
.isAssignedRegDep() &&
723 SI
.getLatency() == 0 &&
724 Bot
.ResourceModel
->isInPacket(SI
.getSUnit())) {
725 ResCount
+= PriorityThree
;
726 LLVM_DEBUG(if (verbose
) dbgs() << "Z|");
731 // If the instruction has a non-zero latency dependence with an instruction in
732 // the current packet, then it should not be scheduled yet. The case occurs
733 // when the dependent instruction is scheduled in a new packet, so the
734 // scheduler updates the current cycle and pending instructions become
736 if (CheckEarlyAvail
) {
737 if (Q
.getID() == TopQID
) {
738 for (const auto &PI
: SU
->Preds
) {
739 if (PI
.getLatency() > 0 &&
740 Top
.ResourceModel
->isInPacket(PI
.getSUnit())) {
741 ResCount
-= PriorityOne
;
742 LLVM_DEBUG(if (verbose
) dbgs() << "D|");
746 for (const auto &SI
: SU
->Succs
) {
747 if (SI
.getLatency() > 0 &&
748 Bot
.ResourceModel
->isInPacket(SI
.getSUnit())) {
749 ResCount
-= PriorityOne
;
750 LLVM_DEBUG(if (verbose
) dbgs() << "D|");
756 LLVM_DEBUG(if (verbose
) {
757 std::stringstream dbgstr
;
758 dbgstr
<< "Total " << std::setw(4) << ResCount
<< ")";
759 dbgs() << dbgstr
.str();
765 /// Pick the best candidate from the top queue.
767 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
768 /// DAG building. To adjust for the current scheduling location we need to
769 /// maintain the number of vreg uses remaining to be top-scheduled.
770 ConvergingVLIWScheduler::CandResult
771 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary
&Zone
,
772 const RegPressureTracker
&RPTracker
,
773 SchedCandidate
&Candidate
) {
774 ReadyQueue
&Q
= Zone
.Available
;
775 LLVM_DEBUG(if (SchedDebugVerboseLevel
> 1)
776 readyQueueVerboseDump(RPTracker
, Candidate
, Q
);
779 // getMaxPressureDelta temporarily modifies the tracker.
780 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
782 // BestSU remains NULL if no top candidates beat the best existing candidate.
783 CandResult FoundCandidate
= NoCand
;
784 for (ReadyQueue::iterator I
= Q
.begin(), E
= Q
.end(); I
!= E
; ++I
) {
785 RegPressureDelta RPDelta
;
786 TempTracker
.getMaxPressureDelta((*I
)->getInstr(), RPDelta
,
787 DAG
->getRegionCriticalPSets(),
788 DAG
->getRegPressure().MaxSetPressure
);
790 int CurrentCost
= SchedulingCost(Q
, *I
, Candidate
, RPDelta
, false);
792 // Initialize the candidate if needed.
794 LLVM_DEBUG(traceCandidate("DCAND", Q
, *I
, CurrentCost
));
796 Candidate
.RPDelta
= RPDelta
;
797 Candidate
.SCost
= CurrentCost
;
798 FoundCandidate
= NodeOrder
;
802 // Choose node order for negative cost candidates. There is no good
803 // candidate in this case.
804 if (CurrentCost
< 0 && Candidate
.SCost
< 0) {
805 if ((Q
.getID() == TopQID
&& (*I
)->NodeNum
< Candidate
.SU
->NodeNum
) ||
806 (Q
.getID() == BotQID
&& (*I
)->NodeNum
> Candidate
.SU
->NodeNum
)) {
807 LLVM_DEBUG(traceCandidate("NCAND", Q
, *I
, CurrentCost
));
809 Candidate
.RPDelta
= RPDelta
;
810 Candidate
.SCost
= CurrentCost
;
811 FoundCandidate
= NodeOrder
;
817 if (CurrentCost
> Candidate
.SCost
) {
818 LLVM_DEBUG(traceCandidate("CCAND", Q
, *I
, CurrentCost
));
820 Candidate
.RPDelta
= RPDelta
;
821 Candidate
.SCost
= CurrentCost
;
822 FoundCandidate
= BestCost
;
826 // Choose an instruction that does not depend on an artificial edge.
827 unsigned CurrWeak
= getWeakLeft(*I
, (Q
.getID() == TopQID
));
828 unsigned CandWeak
= getWeakLeft(Candidate
.SU
, (Q
.getID() == TopQID
));
829 if (CurrWeak
!= CandWeak
) {
830 if (CurrWeak
< CandWeak
) {
831 LLVM_DEBUG(traceCandidate("WCAND", Q
, *I
, CurrentCost
));
833 Candidate
.RPDelta
= RPDelta
;
834 Candidate
.SCost
= CurrentCost
;
835 FoundCandidate
= Weak
;
840 if (CurrentCost
== Candidate
.SCost
&& Zone
.isLatencyBound(*I
)) {
841 unsigned CurrSize
, CandSize
;
842 if (Q
.getID() == TopQID
) {
843 CurrSize
= (*I
)->Succs
.size();
844 CandSize
= Candidate
.SU
->Succs
.size();
846 CurrSize
= (*I
)->Preds
.size();
847 CandSize
= Candidate
.SU
->Preds
.size();
849 if (CurrSize
> CandSize
) {
850 LLVM_DEBUG(traceCandidate("SPCAND", Q
, *I
, CurrentCost
));
852 Candidate
.RPDelta
= RPDelta
;
853 Candidate
.SCost
= CurrentCost
;
854 FoundCandidate
= BestCost
;
856 // Keep the old candidate if it's a better candidate. That is, don't use
857 // the subsequent tie breaker.
858 if (CurrSize
!= CandSize
)
863 // To avoid scheduling indeterminism, we need a tie breaker
864 // for the case when cost is identical for two nodes.
865 if (UseNewerCandidate
&& CurrentCost
== Candidate
.SCost
) {
866 if ((Q
.getID() == TopQID
&& (*I
)->NodeNum
< Candidate
.SU
->NodeNum
) ||
867 (Q
.getID() == BotQID
&& (*I
)->NodeNum
> Candidate
.SU
->NodeNum
)) {
868 LLVM_DEBUG(traceCandidate("TCAND", Q
, *I
, CurrentCost
));
870 Candidate
.RPDelta
= RPDelta
;
871 Candidate
.SCost
= CurrentCost
;
872 FoundCandidate
= NodeOrder
;
877 // Fall through to original instruction order.
878 // Only consider node order if Candidate was chosen from this Q.
879 if (FoundCandidate
== NoCand
)
882 return FoundCandidate
;
885 /// Pick the best candidate node from either the top or bottom queue.
886 SUnit
*ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode
) {
887 // Schedule as far as possible in the direction of no choice. This is most
888 // efficient, but also provides the best heuristics for CriticalPSets.
889 if (SUnit
*SU
= Bot
.pickOnlyChoice()) {
890 LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
894 if (SUnit
*SU
= Top
.pickOnlyChoice()) {
895 LLVM_DEBUG(dbgs() << "Picked only Top\n");
899 SchedCandidate BotCand
;
900 // Prefer bottom scheduling when heuristics are silent.
901 CandResult BotResult
=
902 pickNodeFromQueue(Bot
, DAG
->getBotRPTracker(), BotCand
);
903 assert(BotResult
!= NoCand
&& "failed to find the first candidate");
905 // If either Q has a single candidate that provides the least increase in
906 // Excess pressure, we can immediately schedule from that Q.
908 // RegionCriticalPSets summarizes the pressure within the scheduled region and
909 // affects picking from either Q. If scheduling in one direction must
910 // increase pressure for one of the excess PSets, then schedule in that
911 // direction first to provide more freedom in the other direction.
912 if (BotResult
== SingleExcess
|| BotResult
== SingleCritical
) {
913 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
917 // Check if the top Q has a better candidate.
918 SchedCandidate TopCand
;
919 CandResult TopResult
=
920 pickNodeFromQueue(Top
, DAG
->getTopRPTracker(), TopCand
);
921 assert(TopResult
!= NoCand
&& "failed to find the first candidate");
923 if (TopResult
== SingleExcess
|| TopResult
== SingleCritical
) {
924 LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
928 // If either Q has a single candidate that minimizes pressure above the
929 // original region's pressure pick it.
930 if (BotResult
== SingleMax
) {
931 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
935 if (TopResult
== SingleMax
) {
936 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
940 if (TopCand
.SCost
> BotCand
.SCost
) {
941 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
945 // Otherwise prefer the bottom candidate in node order.
946 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
951 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
952 SUnit
*ConvergingVLIWScheduler::pickNode(bool &IsTopNode
) {
953 if (DAG
->top() == DAG
->bottom()) {
954 assert(Top
.Available
.empty() && Top
.Pending
.empty() &&
955 Bot
.Available
.empty() && Bot
.Pending
.empty() && "ReadyQ garbage");
960 SU
= Top
.pickOnlyChoice();
962 SchedCandidate TopCand
;
963 CandResult TopResult
=
964 pickNodeFromQueue(Top
, DAG
->getTopRPTracker(), TopCand
);
965 assert(TopResult
!= NoCand
&& "failed to find the first candidate");
970 } else if (ForceBottomUp
) {
971 SU
= Bot
.pickOnlyChoice();
973 SchedCandidate BotCand
;
974 CandResult BotResult
=
975 pickNodeFromQueue(Bot
, DAG
->getBotRPTracker(), BotCand
);
976 assert(BotResult
!= NoCand
&& "failed to find the first candidate");
982 SU
= pickNodeBidrectional(IsTopNode
);
984 if (SU
->isTopReady())
986 if (SU
->isBottomReady())
989 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode
? "Top" : "Bottom")
990 << " Scheduling instruction in cycle "
991 << (IsTopNode
? Top
.CurrCycle
: Bot
.CurrCycle
) << " ("
992 << reportPackets() << ")\n";
997 /// Update the scheduler's state after scheduling a node. This is the same node
998 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
999 /// to update it's state based on the current cycle before MachineSchedStrategy
1001 void ConvergingVLIWScheduler::schedNode(SUnit
*SU
, bool IsTopNode
) {
1004 SU
->TopReadyCycle
= Top
.CurrCycle
;
1007 SU
->BotReadyCycle
= Bot
.CurrCycle
;