1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
12 //===----------------------------------------------------------------------===//
14 #include "HexagonMachineScheduler.h"
15 #include "HexagonInstrInfo.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/CodeGen/DFAPacketizer.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstr.h"
22 #include "llvm/CodeGen/MachineLoopInfo.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/RegisterPressure.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/CodeGen/TargetRegisterInfo.h"
30 #include "llvm/CodeGen/TargetSchedule.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
45 #define DEBUG_TYPE "machine-scheduler"
47 static cl::opt
<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
48 cl::Hidden
, cl::ZeroOrMore
, cl::init(false));
50 static cl::opt
<bool> UseNewerCandidate("use-newer-candidate",
51 cl::Hidden
, cl::ZeroOrMore
, cl::init(true));
53 static cl::opt
<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
54 cl::Hidden
, cl::ZeroOrMore
, cl::init(1));
56 // Check if the scheduler should penalize instructions that are available to
57 // early due to a zero-latency dependence.
58 static cl::opt
<bool> CheckEarlyAvail("check-early-avail", cl::Hidden
,
59 cl::ZeroOrMore
, cl::init(true));
61 // This value is used to determine if a register class is a high pressure set.
62 // We compute the maximum number of registers needed and divided by the total
63 // available. Then, we compare the result to this value.
64 static cl::opt
<float> RPThreshold("hexagon-reg-pressure", cl::Hidden
,
65 cl::init(0.75f
), cl::desc("High register pressure threhold."));
67 /// Return true if there is a dependence between SUd and SUu.
68 static bool hasDependence(const SUnit
*SUd
, const SUnit
*SUu
,
69 const HexagonInstrInfo
&QII
) {
70 if (SUd
->Succs
.size() == 0)
73 // Enable .cur formation.
74 if (QII
.mayBeCurLoad(*SUd
->getInstr()))
77 if (QII
.canExecuteInBundle(*SUd
->getInstr(), *SUu
->getInstr()))
80 for (const auto &S
: SUd
->Succs
) {
81 // Since we do not add pseudos to packets, might as well
82 // ignore order dependencies.
86 if (S
.getSUnit() == SUu
&& S
.getLatency() > 0)
92 /// Check if scheduling of this SU is possible
93 /// in the current packet.
94 /// It is _not_ precise (statefull), it is more like
95 /// another heuristic. Many corner cases are figured
97 bool VLIWResourceModel::isResourceAvailable(SUnit
*SU
, bool IsTop
) {
98 if (!SU
|| !SU
->getInstr())
101 // First see if the pipeline could receive this instruction
102 // in the current cycle.
103 switch (SU
->getInstr()->getOpcode()) {
105 if (!ResourcesModel
->canReserveResources(*SU
->getInstr()))
108 case TargetOpcode::EXTRACT_SUBREG
:
109 case TargetOpcode::INSERT_SUBREG
:
110 case TargetOpcode::SUBREG_TO_REG
:
111 case TargetOpcode::REG_SEQUENCE
:
112 case TargetOpcode::IMPLICIT_DEF
:
113 case TargetOpcode::COPY
:
114 case TargetOpcode::INLINEASM
:
115 case TargetOpcode::INLINEASM_BR
:
119 MachineBasicBlock
*MBB
= SU
->getInstr()->getParent();
120 auto &QST
= MBB
->getParent()->getSubtarget
<HexagonSubtarget
>();
121 const auto &QII
= *QST
.getInstrInfo();
123 // Now see if there are no other dependencies to instructions already
126 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
)
127 if (hasDependence(Packet
[i
], SU
, QII
))
130 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
)
131 if (hasDependence(SU
, Packet
[i
], QII
))
137 /// Keep track of available resources.
138 bool VLIWResourceModel::reserveResources(SUnit
*SU
, bool IsTop
) {
139 bool startNewCycle
= false;
140 // Artificially reset state.
142 ResourcesModel
->clearResources();
147 // If this SU does not fit in the packet or the packet is now full
149 if (!isResourceAvailable(SU
, IsTop
) ||
150 Packet
.size() >= SchedModel
->getIssueWidth()) {
151 ResourcesModel
->clearResources();
154 startNewCycle
= true;
157 switch (SU
->getInstr()->getOpcode()) {
159 ResourcesModel
->reserveResources(*SU
->getInstr());
161 case TargetOpcode::EXTRACT_SUBREG
:
162 case TargetOpcode::INSERT_SUBREG
:
163 case TargetOpcode::SUBREG_TO_REG
:
164 case TargetOpcode::REG_SEQUENCE
:
165 case TargetOpcode::IMPLICIT_DEF
:
166 case TargetOpcode::KILL
:
167 case TargetOpcode::CFI_INSTRUCTION
:
168 case TargetOpcode::EH_LABEL
:
169 case TargetOpcode::COPY
:
170 case TargetOpcode::INLINEASM
:
171 case TargetOpcode::INLINEASM_BR
:
174 Packet
.push_back(SU
);
177 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets
<< "]:\n");
178 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
) {
179 LLVM_DEBUG(dbgs() << "\t[" << i
<< "] SU(");
180 LLVM_DEBUG(dbgs() << Packet
[i
]->NodeNum
<< ")\t");
181 LLVM_DEBUG(Packet
[i
]->getInstr()->dump());
185 return startNewCycle
;
188 /// schedule - Called back from MachineScheduler::runOnMachineFunction
189 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
190 /// only includes instructions that have DAG nodes, not scheduling boundaries.
191 void VLIWMachineScheduler::schedule() {
192 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
193 << printMBBReference(*BB
) << " " << BB
->getName()
194 << " in_func " << BB
->getParent()->getName()
195 << " at loop depth " << MLI
->getLoopDepth(BB
) << " \n");
197 buildDAGWithRegPressure();
199 Topo
.InitDAGTopologicalSorting();
201 // Postprocess the DAG to add platform-specific artificial dependencies.
204 SmallVector
<SUnit
*, 8> TopRoots
, BotRoots
;
205 findRootsAndBiasEdges(TopRoots
, BotRoots
);
207 // Initialize the strategy before modifying the DAG.
208 SchedImpl
->initialize(this);
210 LLVM_DEBUG(unsigned maxH
= 0;
211 for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
;
212 ++su
) if (SUnits
[su
].getHeight() > maxH
) maxH
=
213 SUnits
[su
].getHeight();
214 dbgs() << "Max Height " << maxH
<< "\n";);
215 LLVM_DEBUG(unsigned maxD
= 0;
216 for (unsigned su
= 0, e
= SUnits
.size(); su
!= e
;
217 ++su
) if (SUnits
[su
].getDepth() > maxD
) maxD
=
218 SUnits
[su
].getDepth();
219 dbgs() << "Max Depth " << maxD
<< "\n";);
222 initQueues(TopRoots
, BotRoots
);
224 bool IsTopNode
= false;
227 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
228 SUnit
*SU
= SchedImpl
->pickNode(IsTopNode
);
231 if (!checkSchedLimit())
234 scheduleMI(SU
, IsTopNode
);
236 // Notify the scheduling strategy after updating the DAG.
237 SchedImpl
->schedNode(SU
, IsTopNode
);
239 updateQueues(SU
, IsTopNode
);
241 assert(CurrentTop
== CurrentBottom
&& "Nonempty unscheduled zone.");
246 dbgs() << "*** Final schedule for "
247 << printMBBReference(*begin()->getParent()) << " ***\n";
253 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI
*dag
) {
254 DAG
= static_cast<VLIWMachineScheduler
*>(dag
);
255 SchedModel
= DAG
->getSchedModel();
257 Top
.init(DAG
, SchedModel
);
258 Bot
.init(DAG
, SchedModel
);
260 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
261 // are disabled, then these HazardRecs will be disabled.
262 const InstrItineraryData
*Itin
= DAG
->getSchedModel()->getInstrItineraries();
263 const TargetSubtargetInfo
&STI
= DAG
->MF
.getSubtarget();
264 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
265 delete Top
.HazardRec
;
266 delete Bot
.HazardRec
;
267 Top
.HazardRec
= TII
->CreateTargetMIHazardRecognizer(Itin
, DAG
);
268 Bot
.HazardRec
= TII
->CreateTargetMIHazardRecognizer(Itin
, DAG
);
270 delete Top
.ResourceModel
;
271 delete Bot
.ResourceModel
;
272 Top
.ResourceModel
= new VLIWResourceModel(STI
, DAG
->getSchedModel());
273 Bot
.ResourceModel
= new VLIWResourceModel(STI
, DAG
->getSchedModel());
275 const std::vector
<unsigned> &MaxPressure
=
276 DAG
->getRegPressure().MaxSetPressure
;
277 HighPressureSets
.assign(MaxPressure
.size(), 0);
278 for (unsigned i
= 0, e
= MaxPressure
.size(); i
< e
; ++i
) {
279 unsigned Limit
= DAG
->getRegClassInfo()->getRegPressureSetLimit(i
);
280 HighPressureSets
[i
] =
281 ((float) MaxPressure
[i
] > ((float) Limit
* RPThreshold
));
284 assert((!ForceTopDown
|| !ForceBottomUp
) &&
285 "-misched-topdown incompatible with -misched-bottomup");
288 void ConvergingVLIWScheduler::releaseTopNode(SUnit
*SU
) {
292 for (const SDep
&PI
: SU
->Preds
) {
293 unsigned PredReadyCycle
= PI
.getSUnit()->TopReadyCycle
;
294 unsigned MinLatency
= PI
.getLatency();
296 Top
.MaxMinLatency
= std::max(MinLatency
, Top
.MaxMinLatency
);
298 if (SU
->TopReadyCycle
< PredReadyCycle
+ MinLatency
)
299 SU
->TopReadyCycle
= PredReadyCycle
+ MinLatency
;
301 Top
.releaseNode(SU
, SU
->TopReadyCycle
);
304 void ConvergingVLIWScheduler::releaseBottomNode(SUnit
*SU
) {
308 assert(SU
->getInstr() && "Scheduled SUnit must have instr");
310 for (SUnit::succ_iterator I
= SU
->Succs
.begin(), E
= SU
->Succs
.end();
312 unsigned SuccReadyCycle
= I
->getSUnit()->BotReadyCycle
;
313 unsigned MinLatency
= I
->getLatency();
315 Bot
.MaxMinLatency
= std::max(MinLatency
, Bot
.MaxMinLatency
);
317 if (SU
->BotReadyCycle
< SuccReadyCycle
+ MinLatency
)
318 SU
->BotReadyCycle
= SuccReadyCycle
+ MinLatency
;
320 Bot
.releaseNode(SU
, SU
->BotReadyCycle
);
323 /// Does this SU have a hazard within the current instruction group.
325 /// The scheduler supports two modes of hazard recognition. The first is the
326 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
327 /// supports highly complicated in-order reservation tables
328 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
330 /// The second is a streamlined mechanism that checks for hazards based on
331 /// simple counters that the scheduler itself maintains. It explicitly checks
332 /// for instruction dispatch limitations, including the number of micro-ops that
333 /// can dispatch per cycle.
335 /// TODO: Also check whether the SU must start a new group.
336 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit
*SU
) {
337 if (HazardRec
->isEnabled())
338 return HazardRec
->getHazardType(SU
) != ScheduleHazardRecognizer::NoHazard
;
340 unsigned uops
= SchedModel
->getNumMicroOps(SU
->getInstr());
341 if (IssueCount
+ uops
> SchedModel
->getIssueWidth())
347 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit
*SU
,
348 unsigned ReadyCycle
) {
349 if (ReadyCycle
< MinReadyCycle
)
350 MinReadyCycle
= ReadyCycle
;
352 // Check for interlocks first. For the purpose of other heuristics, an
353 // instruction that cannot issue appears as if it's not in the ReadyQueue.
354 if (ReadyCycle
> CurrCycle
|| checkHazard(SU
))
361 /// Move the boundary of scheduled code by one cycle.
362 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
363 unsigned Width
= SchedModel
->getIssueWidth();
364 IssueCount
= (IssueCount
<= Width
) ? 0 : IssueCount
- Width
;
366 assert(MinReadyCycle
< std::numeric_limits
<unsigned>::max() &&
367 "MinReadyCycle uninitialized");
368 unsigned NextCycle
= std::max(CurrCycle
+ 1, MinReadyCycle
);
370 if (!HazardRec
->isEnabled()) {
371 // Bypass HazardRec virtual calls.
372 CurrCycle
= NextCycle
;
374 // Bypass getHazardType calls in case of long latency.
375 for (; CurrCycle
!= NextCycle
; ++CurrCycle
) {
377 HazardRec
->AdvanceCycle();
379 HazardRec
->RecedeCycle();
384 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available
.getName() << " cycle "
385 << CurrCycle
<< '\n');
388 /// Move the boundary of scheduled code by one SUnit.
389 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit
*SU
) {
390 bool startNewCycle
= false;
392 // Update the reservation table.
393 if (HazardRec
->isEnabled()) {
394 if (!isTop() && SU
->isCall
) {
395 // Calls are scheduled with their preceding instructions. For bottom-up
396 // scheduling, clear the pipeline state before emitting.
399 HazardRec
->EmitInstruction(SU
);
403 startNewCycle
= ResourceModel
->reserveResources(SU
, isTop());
405 // Check the instruction group dispatch limit.
406 // TODO: Check if this SU must end a dispatch group.
407 IssueCount
+= SchedModel
->getNumMicroOps(SU
->getInstr());
409 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle
<< '\n');
413 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount
<< " at cycle "
414 << CurrCycle
<< '\n');
417 /// Release pending ready nodes in to the available queue. This makes them
418 /// visible to heuristics.
419 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
420 // If the available queue is empty, it is safe to reset MinReadyCycle.
421 if (Available
.empty())
422 MinReadyCycle
= std::numeric_limits
<unsigned>::max();
424 // Check to see if any of the pending instructions are ready to issue. If
425 // so, add them to the available queue.
426 for (unsigned i
= 0, e
= Pending
.size(); i
!= e
; ++i
) {
427 SUnit
*SU
= *(Pending
.begin()+i
);
428 unsigned ReadyCycle
= isTop() ? SU
->TopReadyCycle
: SU
->BotReadyCycle
;
430 if (ReadyCycle
< MinReadyCycle
)
431 MinReadyCycle
= ReadyCycle
;
433 if (ReadyCycle
> CurrCycle
)
440 Pending
.remove(Pending
.begin()+i
);
443 CheckPending
= false;
446 /// Remove SU from the ready set for this boundary.
447 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit
*SU
) {
448 if (Available
.isInQueue(SU
))
449 Available
.remove(Available
.find(SU
));
451 assert(Pending
.isInQueue(SU
) && "bad ready count");
452 Pending
.remove(Pending
.find(SU
));
456 /// If this queue only has one ready candidate, return it. As a side effect,
457 /// advance the cycle until at least one node is ready. If multiple instructions
458 /// are ready, return NULL.
459 SUnit
*ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
463 auto AdvanceCycle
= [this]() {
464 if (Available
.empty())
466 if (Available
.size() == 1 && Pending
.size() > 0)
467 return !ResourceModel
->isResourceAvailable(*Available
.begin(), isTop()) ||
468 getWeakLeft(*Available
.begin(), isTop()) != 0;
471 for (unsigned i
= 0; AdvanceCycle(); ++i
) {
472 assert(i
<= (HazardRec
->getMaxLookAhead() + MaxMinLatency
) &&
473 "permanent hazard"); (void)i
;
474 ResourceModel
->reserveResources(nullptr, isTop());
478 if (Available
.size() == 1)
479 return *Available
.begin();
484 void ConvergingVLIWScheduler::traceCandidate(const char *Label
,
485 const ReadyQueue
&Q
, SUnit
*SU
, int Cost
, PressureChange P
) {
486 dbgs() << Label
<< " " << Q
.getName() << " ";
488 dbgs() << DAG
->TRI
->getRegPressureSetName(P
.getPSet()) << ":"
489 << P
.getUnitInc() << " ";
492 dbgs() << "cost(" << Cost
<< ")\t";
496 // Very detailed queue dump, to be used with higher verbosity levels.
497 void ConvergingVLIWScheduler::readyQueueVerboseDump(
498 const RegPressureTracker
&RPTracker
, SchedCandidate
&Candidate
,
500 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
502 dbgs() << ">>> " << Q
.getName() << "\n";
503 for (ReadyQueue::iterator I
= Q
.begin(), E
= Q
.end(); I
!= E
; ++I
) {
504 RegPressureDelta RPDelta
;
505 TempTracker
.getMaxPressureDelta((*I
)->getInstr(), RPDelta
,
506 DAG
->getRegionCriticalPSets(),
507 DAG
->getRegPressure().MaxSetPressure
);
508 std::stringstream dbgstr
;
509 dbgstr
<< "SU(" << std::setw(3) << (*I
)->NodeNum
<< ")";
510 dbgs() << dbgstr
.str();
511 SchedulingCost(Q
, *I
, Candidate
, RPDelta
, true);
513 (*I
)->getInstr()->dump();
519 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
520 /// of SU, return true (we may have duplicates)
521 static inline bool isSingleUnscheduledPred(SUnit
*SU
, SUnit
*SU2
) {
522 if (SU
->NumPredsLeft
== 0)
525 for (auto &Pred
: SU
->Preds
) {
526 // We found an available, but not scheduled, predecessor.
527 if (!Pred
.getSUnit()->isScheduled
&& (Pred
.getSUnit() != SU2
))
534 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
535 /// of SU, return true (we may have duplicates)
536 static inline bool isSingleUnscheduledSucc(SUnit
*SU
, SUnit
*SU2
) {
537 if (SU
->NumSuccsLeft
== 0)
540 for (auto &Succ
: SU
->Succs
) {
541 // We found an available, but not scheduled, successor.
542 if (!Succ
.getSUnit()->isScheduled
&& (Succ
.getSUnit() != SU2
))
548 /// Check if the instruction changes the register pressure of a register in the
549 /// high pressure set. The function returns a negative value if the pressure
550 /// decreases and a positive value is the pressure increases. If the instruction
551 /// doesn't use a high pressure register or doesn't change the register
552 /// pressure, then return 0.
553 int ConvergingVLIWScheduler::pressureChange(const SUnit
*SU
, bool isBotUp
) {
554 PressureDiff
&PD
= DAG
->getPressureDiff(SU
);
558 // The pressure differences are computed bottom-up, so the comparision for
559 // an increase is positive in the bottom direction, but negative in the
560 // top-down direction.
561 if (HighPressureSets
[P
.getPSet()])
562 return (isBotUp
? P
.getUnitInc() : -P
.getUnitInc());
567 // Constants used to denote relative importance of
568 // heuristic components for cost computation.
569 static const unsigned PriorityOne
= 200;
570 static const unsigned PriorityTwo
= 50;
571 static const unsigned PriorityThree
= 75;
572 static const unsigned ScaleTwo
= 10;
574 /// Single point to compute overall scheduling cost.
575 /// TODO: More heuristics will be used soon.
576 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue
&Q
, SUnit
*SU
,
577 SchedCandidate
&Candidate
,
578 RegPressureDelta
&Delta
,
580 // Initial trivial priority.
583 // Do not waste time on a node that is already scheduled.
584 if (!SU
|| SU
->isScheduled
)
587 LLVM_DEBUG(if (verbose
) dbgs()
588 << ((Q
.getID() == TopQID
) ? "(top|" : "(bot|"));
589 // Forced priority is high.
590 if (SU
->isScheduleHigh
) {
591 ResCount
+= PriorityOne
;
592 LLVM_DEBUG(dbgs() << "H|");
595 unsigned IsAvailableAmt
= 0;
596 // Critical path first.
597 if (Q
.getID() == TopQID
) {
598 if (Top
.isLatencyBound(SU
)) {
599 LLVM_DEBUG(if (verbose
) dbgs() << "LB|");
600 ResCount
+= (SU
->getHeight() * ScaleTwo
);
603 LLVM_DEBUG(if (verbose
) {
604 std::stringstream dbgstr
;
605 dbgstr
<< "h" << std::setw(3) << SU
->getHeight() << "|";
606 dbgs() << dbgstr
.str();
609 // If resources are available for it, multiply the
610 // chance of scheduling.
611 if (Top
.ResourceModel
->isResourceAvailable(SU
, true)) {
612 IsAvailableAmt
= (PriorityTwo
+ PriorityThree
);
613 ResCount
+= IsAvailableAmt
;
614 LLVM_DEBUG(if (verbose
) dbgs() << "A|");
616 LLVM_DEBUG(if (verbose
) dbgs() << " |");
618 if (Bot
.isLatencyBound(SU
)) {
619 LLVM_DEBUG(if (verbose
) dbgs() << "LB|");
620 ResCount
+= (SU
->getDepth() * ScaleTwo
);
623 LLVM_DEBUG(if (verbose
) {
624 std::stringstream dbgstr
;
625 dbgstr
<< "d" << std::setw(3) << SU
->getDepth() << "|";
626 dbgs() << dbgstr
.str();
629 // If resources are available for it, multiply the
630 // chance of scheduling.
631 if (Bot
.ResourceModel
->isResourceAvailable(SU
, false)) {
632 IsAvailableAmt
= (PriorityTwo
+ PriorityThree
);
633 ResCount
+= IsAvailableAmt
;
634 LLVM_DEBUG(if (verbose
) dbgs() << "A|");
636 LLVM_DEBUG(if (verbose
) dbgs() << " |");
639 unsigned NumNodesBlocking
= 0;
640 if (Q
.getID() == TopQID
) {
641 // How many SUs does it block from scheduling?
642 // Look at all of the successors of this node.
643 // Count the number of nodes that
644 // this node is the sole unscheduled node for.
645 if (Top
.isLatencyBound(SU
))
646 for (const SDep
&SI
: SU
->Succs
)
647 if (isSingleUnscheduledPred(SI
.getSUnit(), SU
))
650 // How many unscheduled predecessors block this node?
651 if (Bot
.isLatencyBound(SU
))
652 for (const SDep
&PI
: SU
->Preds
)
653 if (isSingleUnscheduledSucc(PI
.getSUnit(), SU
))
656 ResCount
+= (NumNodesBlocking
* ScaleTwo
);
658 LLVM_DEBUG(if (verbose
) {
659 std::stringstream dbgstr
;
660 dbgstr
<< "blk " << std::setw(2) << NumNodesBlocking
<< ")|";
661 dbgs() << dbgstr
.str();
664 // Factor in reg pressure as a heuristic.
665 if (!IgnoreBBRegPressure
) {
666 // Decrease priority by the amount that register pressure exceeds the limit.
667 ResCount
-= (Delta
.Excess
.getUnitInc()*PriorityOne
);
668 // Decrease priority if register pressure exceeds the limit.
669 ResCount
-= (Delta
.CriticalMax
.getUnitInc()*PriorityOne
);
670 // Decrease priority slightly if register pressure would increase over the
672 ResCount
-= (Delta
.CurrentMax
.getUnitInc()*PriorityTwo
);
673 // If there are register pressure issues, then we remove the value added for
674 // the instruction being available. The rationale is that we really don't
675 // want to schedule an instruction that causes a spill.
676 if (IsAvailableAmt
&& pressureChange(SU
, Q
.getID() != TopQID
) > 0 &&
677 (Delta
.Excess
.getUnitInc() || Delta
.CriticalMax
.getUnitInc() ||
678 Delta
.CurrentMax
.getUnitInc()))
679 ResCount
-= IsAvailableAmt
;
680 LLVM_DEBUG(if (verbose
) {
681 dbgs() << "RP " << Delta
.Excess
.getUnitInc() << "/"
682 << Delta
.CriticalMax
.getUnitInc() << "/"
683 << Delta
.CurrentMax
.getUnitInc() << ")|";
687 // Give a little extra priority to a .cur instruction if there is a resource
689 auto &QST
= DAG
->MF
.getSubtarget
<HexagonSubtarget
>();
690 auto &QII
= *QST
.getInstrInfo();
691 if (SU
->isInstr() && QII
.mayBeCurLoad(*SU
->getInstr())) {
692 if (Q
.getID() == TopQID
&&
693 Top
.ResourceModel
->isResourceAvailable(SU
, true)) {
694 ResCount
+= PriorityTwo
;
695 LLVM_DEBUG(if (verbose
) dbgs() << "C|");
696 } else if (Q
.getID() == BotQID
&&
697 Bot
.ResourceModel
->isResourceAvailable(SU
, false)) {
698 ResCount
+= PriorityTwo
;
699 LLVM_DEBUG(if (verbose
) dbgs() << "C|");
703 // Give preference to a zero latency instruction if the dependent
704 // instruction is in the current packet.
705 if (Q
.getID() == TopQID
&& getWeakLeft(SU
, true) == 0) {
706 for (const SDep
&PI
: SU
->Preds
) {
707 if (!PI
.getSUnit()->getInstr()->isPseudo() && PI
.isAssignedRegDep() &&
708 PI
.getLatency() == 0 &&
709 Top
.ResourceModel
->isInPacket(PI
.getSUnit())) {
710 ResCount
+= PriorityThree
;
711 LLVM_DEBUG(if (verbose
) dbgs() << "Z|");
714 } else if (Q
.getID() == BotQID
&& getWeakLeft(SU
, false) == 0) {
715 for (const SDep
&SI
: SU
->Succs
) {
716 if (!SI
.getSUnit()->getInstr()->isPseudo() && SI
.isAssignedRegDep() &&
717 SI
.getLatency() == 0 &&
718 Bot
.ResourceModel
->isInPacket(SI
.getSUnit())) {
719 ResCount
+= PriorityThree
;
720 LLVM_DEBUG(if (verbose
) dbgs() << "Z|");
725 // If the instruction has a non-zero latency dependence with an instruction in
726 // the current packet, then it should not be scheduled yet. The case occurs
727 // when the dependent instruction is scheduled in a new packet, so the
728 // scheduler updates the current cycle and pending instructions become
730 if (CheckEarlyAvail
) {
731 if (Q
.getID() == TopQID
) {
732 for (const auto &PI
: SU
->Preds
) {
733 if (PI
.getLatency() > 0 &&
734 Top
.ResourceModel
->isInPacket(PI
.getSUnit())) {
735 ResCount
-= PriorityOne
;
736 LLVM_DEBUG(if (verbose
) dbgs() << "D|");
740 for (const auto &SI
: SU
->Succs
) {
741 if (SI
.getLatency() > 0 &&
742 Bot
.ResourceModel
->isInPacket(SI
.getSUnit())) {
743 ResCount
-= PriorityOne
;
744 LLVM_DEBUG(if (verbose
) dbgs() << "D|");
750 LLVM_DEBUG(if (verbose
) {
751 std::stringstream dbgstr
;
752 dbgstr
<< "Total " << std::setw(4) << ResCount
<< ")";
753 dbgs() << dbgstr
.str();
759 /// Pick the best candidate from the top queue.
761 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
762 /// DAG building. To adjust for the current scheduling location we need to
763 /// maintain the number of vreg uses remaining to be top-scheduled.
764 ConvergingVLIWScheduler::CandResult
ConvergingVLIWScheduler::
765 pickNodeFromQueue(VLIWSchedBoundary
&Zone
, const RegPressureTracker
&RPTracker
,
766 SchedCandidate
&Candidate
) {
767 ReadyQueue
&Q
= Zone
.Available
;
768 LLVM_DEBUG(if (SchedDebugVerboseLevel
> 1)
769 readyQueueVerboseDump(RPTracker
, Candidate
, Q
);
772 // getMaxPressureDelta temporarily modifies the tracker.
773 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
775 // BestSU remains NULL if no top candidates beat the best existing candidate.
776 CandResult FoundCandidate
= NoCand
;
777 for (ReadyQueue::iterator I
= Q
.begin(), E
= Q
.end(); I
!= E
; ++I
) {
778 RegPressureDelta RPDelta
;
779 TempTracker
.getMaxPressureDelta((*I
)->getInstr(), RPDelta
,
780 DAG
->getRegionCriticalPSets(),
781 DAG
->getRegPressure().MaxSetPressure
);
783 int CurrentCost
= SchedulingCost(Q
, *I
, Candidate
, RPDelta
, false);
785 // Initialize the candidate if needed.
787 LLVM_DEBUG(traceCandidate("DCAND", Q
, *I
, CurrentCost
));
789 Candidate
.RPDelta
= RPDelta
;
790 Candidate
.SCost
= CurrentCost
;
791 FoundCandidate
= NodeOrder
;
795 // Choose node order for negative cost candidates. There is no good
796 // candidate in this case.
797 if (CurrentCost
< 0 && Candidate
.SCost
< 0) {
798 if ((Q
.getID() == TopQID
&& (*I
)->NodeNum
< Candidate
.SU
->NodeNum
)
799 || (Q
.getID() == BotQID
&& (*I
)->NodeNum
> Candidate
.SU
->NodeNum
)) {
800 LLVM_DEBUG(traceCandidate("NCAND", Q
, *I
, CurrentCost
));
802 Candidate
.RPDelta
= RPDelta
;
803 Candidate
.SCost
= CurrentCost
;
804 FoundCandidate
= NodeOrder
;
810 if (CurrentCost
> Candidate
.SCost
) {
811 LLVM_DEBUG(traceCandidate("CCAND", Q
, *I
, CurrentCost
));
813 Candidate
.RPDelta
= RPDelta
;
814 Candidate
.SCost
= CurrentCost
;
815 FoundCandidate
= BestCost
;
819 // Choose an instruction that does not depend on an artificial edge.
820 unsigned CurrWeak
= getWeakLeft(*I
, (Q
.getID() == TopQID
));
821 unsigned CandWeak
= getWeakLeft(Candidate
.SU
, (Q
.getID() == TopQID
));
822 if (CurrWeak
!= CandWeak
) {
823 if (CurrWeak
< CandWeak
) {
824 LLVM_DEBUG(traceCandidate("WCAND", Q
, *I
, CurrentCost
));
826 Candidate
.RPDelta
= RPDelta
;
827 Candidate
.SCost
= CurrentCost
;
828 FoundCandidate
= Weak
;
833 if (CurrentCost
== Candidate
.SCost
&& Zone
.isLatencyBound(*I
)) {
834 unsigned CurrSize
, CandSize
;
835 if (Q
.getID() == TopQID
) {
836 CurrSize
= (*I
)->Succs
.size();
837 CandSize
= Candidate
.SU
->Succs
.size();
839 CurrSize
= (*I
)->Preds
.size();
840 CandSize
= Candidate
.SU
->Preds
.size();
842 if (CurrSize
> CandSize
) {
843 LLVM_DEBUG(traceCandidate("SPCAND", Q
, *I
, CurrentCost
));
845 Candidate
.RPDelta
= RPDelta
;
846 Candidate
.SCost
= CurrentCost
;
847 FoundCandidate
= BestCost
;
849 // Keep the old candidate if it's a better candidate. That is, don't use
850 // the subsequent tie breaker.
851 if (CurrSize
!= CandSize
)
856 // To avoid scheduling indeterminism, we need a tie breaker
857 // for the case when cost is identical for two nodes.
858 if (UseNewerCandidate
&& CurrentCost
== Candidate
.SCost
) {
859 if ((Q
.getID() == TopQID
&& (*I
)->NodeNum
< Candidate
.SU
->NodeNum
)
860 || (Q
.getID() == BotQID
&& (*I
)->NodeNum
> Candidate
.SU
->NodeNum
)) {
861 LLVM_DEBUG(traceCandidate("TCAND", Q
, *I
, CurrentCost
));
863 Candidate
.RPDelta
= RPDelta
;
864 Candidate
.SCost
= CurrentCost
;
865 FoundCandidate
= NodeOrder
;
870 // Fall through to original instruction order.
871 // Only consider node order if Candidate was chosen from this Q.
872 if (FoundCandidate
== NoCand
)
875 return FoundCandidate
;
878 /// Pick the best candidate node from either the top or bottom queue.
879 SUnit
*ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode
) {
880 // Schedule as far as possible in the direction of no choice. This is most
881 // efficient, but also provides the best heuristics for CriticalPSets.
882 if (SUnit
*SU
= Bot
.pickOnlyChoice()) {
883 LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
887 if (SUnit
*SU
= Top
.pickOnlyChoice()) {
888 LLVM_DEBUG(dbgs() << "Picked only Top\n");
892 SchedCandidate BotCand
;
893 // Prefer bottom scheduling when heuristics are silent.
894 CandResult BotResult
= pickNodeFromQueue(Bot
,
895 DAG
->getBotRPTracker(), BotCand
);
896 assert(BotResult
!= NoCand
&& "failed to find the first candidate");
898 // If either Q has a single candidate that provides the least increase in
899 // Excess pressure, we can immediately schedule from that Q.
901 // RegionCriticalPSets summarizes the pressure within the scheduled region and
902 // affects picking from either Q. If scheduling in one direction must
903 // increase pressure for one of the excess PSets, then schedule in that
904 // direction first to provide more freedom in the other direction.
905 if (BotResult
== SingleExcess
|| BotResult
== SingleCritical
) {
906 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
910 // Check if the top Q has a better candidate.
911 SchedCandidate TopCand
;
912 CandResult TopResult
= pickNodeFromQueue(Top
,
913 DAG
->getTopRPTracker(), TopCand
);
914 assert(TopResult
!= NoCand
&& "failed to find the first candidate");
916 if (TopResult
== SingleExcess
|| TopResult
== SingleCritical
) {
917 LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
921 // If either Q has a single candidate that minimizes pressure above the
922 // original region's pressure pick it.
923 if (BotResult
== SingleMax
) {
924 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
928 if (TopResult
== SingleMax
) {
929 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
933 if (TopCand
.SCost
> BotCand
.SCost
) {
934 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
938 // Otherwise prefer the bottom candidate in node order.
939 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
944 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
945 SUnit
*ConvergingVLIWScheduler::pickNode(bool &IsTopNode
) {
946 if (DAG
->top() == DAG
->bottom()) {
947 assert(Top
.Available
.empty() && Top
.Pending
.empty() &&
948 Bot
.Available
.empty() && Bot
.Pending
.empty() && "ReadyQ garbage");
953 SU
= Top
.pickOnlyChoice();
955 SchedCandidate TopCand
;
956 CandResult TopResult
=
957 pickNodeFromQueue(Top
, DAG
->getTopRPTracker(), TopCand
);
958 assert(TopResult
!= NoCand
&& "failed to find the first candidate");
963 } else if (ForceBottomUp
) {
964 SU
= Bot
.pickOnlyChoice();
966 SchedCandidate BotCand
;
967 CandResult BotResult
=
968 pickNodeFromQueue(Bot
, DAG
->getBotRPTracker(), BotCand
);
969 assert(BotResult
!= NoCand
&& "failed to find the first candidate");
975 SU
= pickNodeBidrectional(IsTopNode
);
977 if (SU
->isTopReady())
979 if (SU
->isBottomReady())
982 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode
? "Top" : "Bottom")
983 << " Scheduling instruction in cycle "
984 << (IsTopNode
? Top
.CurrCycle
: Bot
.CurrCycle
) << " ("
985 << reportPackets() << ")\n";
990 /// Update the scheduler's state after scheduling a node. This is the same node
991 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
992 /// to update it's state based on the current cycle before MachineSchedStrategy
994 void ConvergingVLIWScheduler::schedNode(SUnit
*SU
, bool IsTopNode
) {
997 SU
->TopReadyCycle
= Top
.CurrCycle
;
1000 SU
->BotReadyCycle
= Bot
.CurrCycle
;