1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
19 //===----------------------------------------------------------------------===//
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/DFAPacketizer.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/CodeGen/TargetLowering.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Target/TargetMachine.h"
37 #define DEBUG_TYPE "scheduler"
39 static cl::opt
<bool> DisableDFASched("disable-dfa-sched", cl::Hidden
,
40 cl::ZeroOrMore
, cl::init(false),
41 cl::desc("Disable use of DFA during scheduling"));
43 static cl::opt
<int> RegPressureThreshold(
44 "dfa-sched-reg-pressure-threshold", cl::Hidden
, cl::ZeroOrMore
, cl::init(5),
45 cl::desc("Track reg pressure and switch priority to in-depth"));
47 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel
*IS
)
48 : Picker(this), InstrItins(IS
->MF
->getSubtarget().getInstrItineraryData()) {
49 const TargetSubtargetInfo
&STI
= IS
->MF
->getSubtarget();
50 TRI
= STI
.getRegisterInfo();
52 TII
= STI
.getInstrInfo();
53 ResourcesModel
.reset(TII
->CreateTargetScheduleState(STI
));
54 // This hard requirement could be relaxed, but for now
55 // do not let it proceed.
56 assert(ResourcesModel
&& "Unimplemented CreateTargetScheduleState.");
58 unsigned NumRC
= TRI
->getNumRegClasses();
59 RegLimit
.resize(NumRC
);
60 RegPressure
.resize(NumRC
);
61 std::fill(RegLimit
.begin(), RegLimit
.end(), 0);
62 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
63 for (const TargetRegisterClass
*RC
: TRI
->regclasses())
64 RegLimit
[RC
->getID()] = TRI
->getRegPressureLimit(RC
, *IS
->MF
);
66 ParallelLiveRanges
= 0;
67 HorizontalVerticalBalance
= 0;
71 ResourcePriorityQueue::numberRCValPredInSU(SUnit
*SU
, unsigned RCId
) {
72 unsigned NumberDeps
= 0;
73 for (SDep
&Pred
: SU
->Preds
) {
77 SUnit
*PredSU
= Pred
.getSUnit();
78 const SDNode
*ScegN
= PredSU
->getNode();
83 // If value is passed to CopyToReg, it is probably
85 switch (ScegN
->getOpcode()) {
87 case ISD::TokenFactor
: break;
88 case ISD::CopyFromReg
: NumberDeps
++; break;
89 case ISD::CopyToReg
: break;
90 case ISD::INLINEASM
: break;
91 case ISD::INLINEASM_BR
: break;
93 if (!ScegN
->isMachineOpcode())
96 for (unsigned i
= 0, e
= ScegN
->getNumValues(); i
!= e
; ++i
) {
97 MVT VT
= ScegN
->getSimpleValueType(i
);
98 if (TLI
->isTypeLegal(VT
)
99 && (TLI
->getRegClassFor(VT
)->getID() == RCId
)) {
108 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit
*SU
,
110 unsigned NumberDeps
= 0;
111 for (const SDep
&Succ
: SU
->Succs
) {
115 SUnit
*SuccSU
= Succ
.getSUnit();
116 const SDNode
*ScegN
= SuccSU
->getNode();
120 // If value is passed to CopyToReg, it is probably
122 switch (ScegN
->getOpcode()) {
124 case ISD::TokenFactor
: break;
125 case ISD::CopyFromReg
: break;
126 case ISD::CopyToReg
: NumberDeps
++; break;
127 case ISD::INLINEASM
: break;
128 case ISD::INLINEASM_BR
: break;
130 if (!ScegN
->isMachineOpcode())
133 for (unsigned i
= 0, e
= ScegN
->getNumOperands(); i
!= e
; ++i
) {
134 const SDValue
&Op
= ScegN
->getOperand(i
);
135 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
136 if (TLI
->isTypeLegal(VT
)
137 && (TLI
->getRegClassFor(VT
)->getID() == RCId
)) {
146 static unsigned numberCtrlDepsInSU(SUnit
*SU
) {
147 unsigned NumberDeps
= 0;
148 for (const SDep
&Succ
: SU
->Succs
)
155 static unsigned numberCtrlPredInSU(SUnit
*SU
) {
156 unsigned NumberDeps
= 0;
157 for (SDep
&Pred
: SU
->Preds
)
165 /// Initialize nodes.
167 void ResourcePriorityQueue::initNodes(std::vector
<SUnit
> &sunits
) {
169 NumNodesSolelyBlocking
.resize(SUnits
->size(), 0);
171 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
172 SUnit
*SU
= &(*SUnits
)[i
];
173 initNumRegDefsLeft(SU
);
178 /// This heuristic is used if DFA scheduling is not desired
179 /// for some VLIW platform.
180 bool resource_sort::operator()(const SUnit
*LHS
, const SUnit
*RHS
) const {
181 // The isScheduleHigh flag allows nodes with wraparound dependencies that
182 // cannot easily be modeled as edges with latencies to be scheduled as
183 // soon as possible in a top-down schedule.
184 if (LHS
->isScheduleHigh
&& !RHS
->isScheduleHigh
)
187 if (!LHS
->isScheduleHigh
&& RHS
->isScheduleHigh
)
190 unsigned LHSNum
= LHS
->NodeNum
;
191 unsigned RHSNum
= RHS
->NodeNum
;
193 // The most important heuristic is scheduling the critical path.
194 unsigned LHSLatency
= PQ
->getLatency(LHSNum
);
195 unsigned RHSLatency
= PQ
->getLatency(RHSNum
);
196 if (LHSLatency
< RHSLatency
) return true;
197 if (LHSLatency
> RHSLatency
) return false;
199 // After that, if two nodes have identical latencies, look to see if one will
200 // unblock more other nodes than the other.
201 unsigned LHSBlocked
= PQ
->getNumSolelyBlockNodes(LHSNum
);
202 unsigned RHSBlocked
= PQ
->getNumSolelyBlockNodes(RHSNum
);
203 if (LHSBlocked
< RHSBlocked
) return true;
204 if (LHSBlocked
> RHSBlocked
) return false;
206 // Finally, just to provide a stable ordering, use the node number as a
208 return LHSNum
< RHSNum
;
212 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
213 /// of SU, return it, otherwise return null.
214 SUnit
*ResourcePriorityQueue::getSingleUnscheduledPred(SUnit
*SU
) {
215 SUnit
*OnlyAvailablePred
= nullptr;
216 for (const SDep
&Pred
: SU
->Preds
) {
217 SUnit
&PredSU
= *Pred
.getSUnit();
218 if (!PredSU
.isScheduled
) {
219 // We found an available, but not scheduled, predecessor. If it's the
220 // only one we have found, keep track of it... otherwise give up.
221 if (OnlyAvailablePred
&& OnlyAvailablePred
!= &PredSU
)
223 OnlyAvailablePred
= &PredSU
;
226 return OnlyAvailablePred
;
229 void ResourcePriorityQueue::push(SUnit
*SU
) {
230 // Look at all of the successors of this node. Count the number of nodes that
231 // this node is the sole unscheduled node for.
232 unsigned NumNodesBlocking
= 0;
233 for (const SDep
&Succ
: SU
->Succs
)
234 if (getSingleUnscheduledPred(Succ
.getSUnit()) == SU
)
237 NumNodesSolelyBlocking
[SU
->NodeNum
] = NumNodesBlocking
;
241 /// Check if scheduling of this SU is possible
242 /// in the current packet.
243 bool ResourcePriorityQueue::isResourceAvailable(SUnit
*SU
) {
244 if (!SU
|| !SU
->getNode())
247 // If this is a compound instruction,
248 // it is likely to be a call. Do not delay it.
249 if (SU
->getNode()->getGluedNode())
252 // First see if the pipeline could receive this instruction
253 // in the current cycle.
254 if (SU
->getNode()->isMachineOpcode())
255 switch (SU
->getNode()->getMachineOpcode()) {
257 if (!ResourcesModel
->canReserveResources(&TII
->get(
258 SU
->getNode()->getMachineOpcode())))
261 case TargetOpcode::EXTRACT_SUBREG
:
262 case TargetOpcode::INSERT_SUBREG
:
263 case TargetOpcode::SUBREG_TO_REG
:
264 case TargetOpcode::REG_SEQUENCE
:
265 case TargetOpcode::IMPLICIT_DEF
:
269 // Now see if there are no other dependencies
270 // to instructions already in the packet.
271 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
)
272 for (const SDep
&Succ
: Packet
[i
]->Succs
) {
273 // Since we do not add pseudos to packets, might as well
274 // ignore order deps.
278 if (Succ
.getSUnit() == SU
)
285 /// Keep track of available resources.
286 void ResourcePriorityQueue::reserveResources(SUnit
*SU
) {
287 // If this SU does not fit in the packet
289 if (!isResourceAvailable(SU
) || SU
->getNode()->getGluedNode()) {
290 ResourcesModel
->clearResources();
294 if (SU
->getNode() && SU
->getNode()->isMachineOpcode()) {
295 switch (SU
->getNode()->getMachineOpcode()) {
297 ResourcesModel
->reserveResources(&TII
->get(
298 SU
->getNode()->getMachineOpcode()));
300 case TargetOpcode::EXTRACT_SUBREG
:
301 case TargetOpcode::INSERT_SUBREG
:
302 case TargetOpcode::SUBREG_TO_REG
:
303 case TargetOpcode::REG_SEQUENCE
:
304 case TargetOpcode::IMPLICIT_DEF
:
307 Packet
.push_back(SU
);
309 // Forcefully end packet for PseudoOps.
311 ResourcesModel
->clearResources();
315 // If packet is now full, reset the state so in the next cycle
317 if (Packet
.size() >= InstrItins
->SchedModel
.IssueWidth
) {
318 ResourcesModel
->clearResources();
323 int ResourcePriorityQueue::rawRegPressureDelta(SUnit
*SU
, unsigned RCId
) {
326 if (!SU
|| !SU
->getNode() || !SU
->getNode()->isMachineOpcode())
330 for (unsigned i
= 0, e
= SU
->getNode()->getNumValues(); i
!= e
; ++i
) {
331 MVT VT
= SU
->getNode()->getSimpleValueType(i
);
332 if (TLI
->isTypeLegal(VT
)
333 && TLI
->getRegClassFor(VT
)
334 && TLI
->getRegClassFor(VT
)->getID() == RCId
)
335 RegBalance
+= numberRCValSuccInSU(SU
, RCId
);
338 for (unsigned i
= 0, e
= SU
->getNode()->getNumOperands(); i
!= e
; ++i
) {
339 const SDValue
&Op
= SU
->getNode()->getOperand(i
);
340 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
341 if (isa
<ConstantSDNode
>(Op
.getNode()))
344 if (TLI
->isTypeLegal(VT
) && TLI
->getRegClassFor(VT
)
345 && TLI
->getRegClassFor(VT
)->getID() == RCId
)
346 RegBalance
-= numberRCValPredInSU(SU
, RCId
);
351 /// Estimates change in reg pressure from this SU.
352 /// It is achieved by trivial tracking of defined
353 /// and used vregs in dependent instructions.
354 /// The RawPressure flag makes this function to ignore
355 /// existing reg file sizes, and report raw def/use
357 int ResourcePriorityQueue::regPressureDelta(SUnit
*SU
, bool RawPressure
) {
360 if (!SU
|| !SU
->getNode() || !SU
->getNode()->isMachineOpcode())
364 for (const TargetRegisterClass
*RC
: TRI
->regclasses())
365 RegBalance
+= rawRegPressureDelta(SU
, RC
->getID());
368 for (const TargetRegisterClass
*RC
: TRI
->regclasses()) {
369 if ((RegPressure
[RC
->getID()] +
370 rawRegPressureDelta(SU
, RC
->getID()) > 0) &&
371 (RegPressure
[RC
->getID()] +
372 rawRegPressureDelta(SU
, RC
->getID()) >= RegLimit
[RC
->getID()]))
373 RegBalance
+= rawRegPressureDelta(SU
, RC
->getID());
380 // Constants used to denote relative importance of
381 // heuristic components for cost computation.
382 static const unsigned PriorityOne
= 200;
383 static const unsigned PriorityTwo
= 50;
384 static const unsigned PriorityThree
= 15;
385 static const unsigned PriorityFour
= 5;
386 static const unsigned ScaleOne
= 20;
387 static const unsigned ScaleTwo
= 10;
388 static const unsigned ScaleThree
= 5;
389 static const unsigned FactorOne
= 2;
391 /// Returns single number reflecting benefit of scheduling SU
392 /// in the current cycle.
393 int ResourcePriorityQueue::SUSchedulingCost(SUnit
*SU
) {
394 // Initial trivial priority.
397 // Do not waste time on a node that is already scheduled.
401 // Forced priority is high.
402 if (SU
->isScheduleHigh
)
403 ResCount
+= PriorityOne
;
405 // Adaptable scheduling
406 // A small, but very parallel
407 // region, where reg pressure is an issue.
408 if (HorizontalVerticalBalance
> RegPressureThreshold
) {
409 // Critical path first
410 ResCount
+= (SU
->getHeight() * ScaleTwo
);
411 // If resources are available for it, multiply the
412 // chance of scheduling.
413 if (isResourceAvailable(SU
))
414 ResCount
<<= FactorOne
;
416 // Consider change to reg pressure from scheduling
418 ResCount
-= (regPressureDelta(SU
,true) * ScaleOne
);
420 // Default heuristic, greeady and
421 // critical path driven.
423 // Critical path first.
424 ResCount
+= (SU
->getHeight() * ScaleTwo
);
425 // Now see how many instructions is blocked by this SU.
426 ResCount
+= (NumNodesSolelyBlocking
[SU
->NodeNum
] * ScaleTwo
);
427 // If resources are available for it, multiply the
428 // chance of scheduling.
429 if (isResourceAvailable(SU
))
430 ResCount
<<= FactorOne
;
432 ResCount
-= (regPressureDelta(SU
) * ScaleTwo
);
435 // These are platform-specific things.
436 // Will need to go into the back end
437 // and accessed from here via a hook.
438 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode()) {
439 if (N
->isMachineOpcode()) {
440 const MCInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
442 ResCount
+= (PriorityTwo
+ (ScaleThree
*N
->getNumValues()));
445 switch (N
->getOpcode()) {
447 case ISD::TokenFactor
:
448 case ISD::CopyFromReg
:
450 ResCount
+= PriorityFour
;
454 case ISD::INLINEASM_BR
:
455 ResCount
+= PriorityThree
;
463 /// Main resource tracking point.
464 void ResourcePriorityQueue::scheduledNode(SUnit
*SU
) {
465 // Use NULL entry as an event marker to reset
468 ResourcesModel
->clearResources();
473 const SDNode
*ScegN
= SU
->getNode();
474 // Update reg pressure tracking.
475 // First update current node.
476 if (ScegN
->isMachineOpcode()) {
477 // Estimate generated regs.
478 for (unsigned i
= 0, e
= ScegN
->getNumValues(); i
!= e
; ++i
) {
479 MVT VT
= ScegN
->getSimpleValueType(i
);
481 if (TLI
->isTypeLegal(VT
)) {
482 const TargetRegisterClass
*RC
= TLI
->getRegClassFor(VT
);
484 RegPressure
[RC
->getID()] += numberRCValSuccInSU(SU
, RC
->getID());
487 // Estimate killed regs.
488 for (unsigned i
= 0, e
= ScegN
->getNumOperands(); i
!= e
; ++i
) {
489 const SDValue
&Op
= ScegN
->getOperand(i
);
490 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
492 if (TLI
->isTypeLegal(VT
)) {
493 const TargetRegisterClass
*RC
= TLI
->getRegClassFor(VT
);
495 if (RegPressure
[RC
->getID()] >
496 (numberRCValPredInSU(SU
, RC
->getID())))
497 RegPressure
[RC
->getID()] -= numberRCValPredInSU(SU
, RC
->getID());
498 else RegPressure
[RC
->getID()] = 0;
502 for (SDep
&Pred
: SU
->Preds
) {
503 if (Pred
.isCtrl() || (Pred
.getSUnit()->NumRegDefsLeft
== 0))
505 --Pred
.getSUnit()->NumRegDefsLeft
;
509 // Reserve resources for this SU.
510 reserveResources(SU
);
512 // Adjust number of parallel live ranges.
513 // Heuristic is simple - node with no data successors reduces
514 // number of live ranges. All others, increase it.
515 unsigned NumberNonControlDeps
= 0;
517 for (const SDep
&Succ
: SU
->Succs
) {
518 adjustPriorityOfUnscheduledPreds(Succ
.getSUnit());
520 NumberNonControlDeps
++;
523 if (!NumberNonControlDeps
) {
524 if (ParallelLiveRanges
>= SU
->NumPreds
)
525 ParallelLiveRanges
-= SU
->NumPreds
;
527 ParallelLiveRanges
= 0;
531 ParallelLiveRanges
+= SU
->NumRegDefsLeft
;
533 // Track parallel live chains.
534 HorizontalVerticalBalance
+= (SU
->Succs
.size() - numberCtrlDepsInSU(SU
));
535 HorizontalVerticalBalance
-= (SU
->Preds
.size() - numberCtrlPredInSU(SU
));
538 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit
*SU
) {
539 unsigned NodeNumDefs
= 0;
540 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
541 if (N
->isMachineOpcode()) {
542 const MCInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
543 // No register need be allocated for this.
544 if (N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
) {
548 NodeNumDefs
= std::min(N
->getNumValues(), TID
.getNumDefs());
551 switch(N
->getOpcode()) {
553 case ISD::CopyFromReg
:
557 case ISD::INLINEASM_BR
:
562 SU
->NumRegDefsLeft
= NodeNumDefs
;
565 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
566 /// scheduled. If SU is not itself available, then there is at least one
567 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
568 /// unscheduled predecessor, we want to increase its priority: it getting
569 /// scheduled will make this node available, so it is better than some other
570 /// node of the same priority that will not make a node available.
571 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit
*SU
) {
572 if (SU
->isAvailable
) return; // All preds scheduled.
574 SUnit
*OnlyAvailablePred
= getSingleUnscheduledPred(SU
);
575 if (!OnlyAvailablePred
|| !OnlyAvailablePred
->isAvailable
)
578 // Okay, we found a single predecessor that is available, but not scheduled.
579 // Since it is available, it must be in the priority queue. First remove it.
580 remove(OnlyAvailablePred
);
582 // Reinsert the node into the priority queue, which recomputes its
583 // NumNodesSolelyBlocking value.
584 push(OnlyAvailablePred
);
588 /// Main access point - returns next instructions
589 /// to be placed in scheduling sequence.
590 SUnit
*ResourcePriorityQueue::pop() {
594 std::vector
<SUnit
*>::iterator Best
= Queue
.begin();
595 if (!DisableDFASched
) {
596 int BestCost
= SUSchedulingCost(*Best
);
597 for (auto I
= std::next(Queue
.begin()), E
= Queue
.end(); I
!= E
; ++I
) {
599 if (SUSchedulingCost(*I
) > BestCost
) {
600 BestCost
= SUSchedulingCost(*I
);
605 // Use default TD scheduling mechanism.
607 for (auto I
= std::next(Queue
.begin()), E
= Queue
.end(); I
!= E
; ++I
)
608 if (Picker(*Best
, *I
))
613 if (Best
!= std::prev(Queue
.end()))
614 std::swap(*Best
, Queue
.back());
622 void ResourcePriorityQueue::remove(SUnit
*SU
) {
623 assert(!Queue
.empty() && "Queue is empty!");
624 std::vector
<SUnit
*>::iterator I
= find(Queue
, SU
);
625 if (I
!= std::prev(Queue
.end()))
626 std::swap(*I
, Queue
.back());