1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
19 //===----------------------------------------------------------------------===//
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Target/TargetMachine.h"
33 #define DEBUG_TYPE "scheduler"
35 static cl::opt
<bool> DisableDFASched("disable-dfa-sched", cl::Hidden
,
36 cl::ZeroOrMore
, cl::init(false),
37 cl::desc("Disable use of DFA during scheduling"));
39 static cl::opt
<int> RegPressureThreshold(
40 "dfa-sched-reg-pressure-threshold", cl::Hidden
, cl::ZeroOrMore
, cl::init(5),
41 cl::desc("Track reg pressure and switch priority to in-depth"));
43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel
*IS
)
44 : Picker(this), InstrItins(IS
->MF
->getSubtarget().getInstrItineraryData()) {
45 const TargetSubtargetInfo
&STI
= IS
->MF
->getSubtarget();
46 TRI
= STI
.getRegisterInfo();
48 TII
= STI
.getInstrInfo();
49 ResourcesModel
.reset(TII
->CreateTargetScheduleState(STI
));
50 // This hard requirement could be relaxed, but for now
51 // do not let it proceed.
52 assert(ResourcesModel
&& "Unimplemented CreateTargetScheduleState.");
54 unsigned NumRC
= TRI
->getNumRegClasses();
55 RegLimit
.resize(NumRC
);
56 RegPressure
.resize(NumRC
);
57 std::fill(RegLimit
.begin(), RegLimit
.end(), 0);
58 std::fill(RegPressure
.begin(), RegPressure
.end(), 0);
59 for (const TargetRegisterClass
*RC
: TRI
->regclasses())
60 RegLimit
[RC
->getID()] = TRI
->getRegPressureLimit(RC
, *IS
->MF
);
62 ParallelLiveRanges
= 0;
63 HorizontalVerticalBalance
= 0;
67 ResourcePriorityQueue::numberRCValPredInSU(SUnit
*SU
, unsigned RCId
) {
68 unsigned NumberDeps
= 0;
69 for (SDep
&Pred
: SU
->Preds
) {
73 SUnit
*PredSU
= Pred
.getSUnit();
74 const SDNode
*ScegN
= PredSU
->getNode();
79 // If value is passed to CopyToReg, it is probably
81 switch (ScegN
->getOpcode()) {
83 case ISD::TokenFactor
: break;
84 case ISD::CopyFromReg
: NumberDeps
++; break;
85 case ISD::CopyToReg
: break;
86 case ISD::INLINEASM
: break;
87 case ISD::INLINEASM_BR
: break;
89 if (!ScegN
->isMachineOpcode())
92 for (unsigned i
= 0, e
= ScegN
->getNumValues(); i
!= e
; ++i
) {
93 MVT VT
= ScegN
->getSimpleValueType(i
);
94 if (TLI
->isTypeLegal(VT
)
95 && (TLI
->getRegClassFor(VT
)->getID() == RCId
)) {
104 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit
*SU
,
106 unsigned NumberDeps
= 0;
107 for (const SDep
&Succ
: SU
->Succs
) {
111 SUnit
*SuccSU
= Succ
.getSUnit();
112 const SDNode
*ScegN
= SuccSU
->getNode();
116 // If value is passed to CopyToReg, it is probably
118 switch (ScegN
->getOpcode()) {
120 case ISD::TokenFactor
: break;
121 case ISD::CopyFromReg
: break;
122 case ISD::CopyToReg
: NumberDeps
++; break;
123 case ISD::INLINEASM
: break;
124 case ISD::INLINEASM_BR
: break;
126 if (!ScegN
->isMachineOpcode())
129 for (unsigned i
= 0, e
= ScegN
->getNumOperands(); i
!= e
; ++i
) {
130 const SDValue
&Op
= ScegN
->getOperand(i
);
131 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
132 if (TLI
->isTypeLegal(VT
)
133 && (TLI
->getRegClassFor(VT
)->getID() == RCId
)) {
142 static unsigned numberCtrlDepsInSU(SUnit
*SU
) {
143 unsigned NumberDeps
= 0;
144 for (const SDep
&Succ
: SU
->Succs
)
151 static unsigned numberCtrlPredInSU(SUnit
*SU
) {
152 unsigned NumberDeps
= 0;
153 for (SDep
&Pred
: SU
->Preds
)
161 /// Initialize nodes.
163 void ResourcePriorityQueue::initNodes(std::vector
<SUnit
> &sunits
) {
165 NumNodesSolelyBlocking
.resize(SUnits
->size(), 0);
167 for (unsigned i
= 0, e
= SUnits
->size(); i
!= e
; ++i
) {
168 SUnit
*SU
= &(*SUnits
)[i
];
169 initNumRegDefsLeft(SU
);
174 /// This heuristic is used if DFA scheduling is not desired
175 /// for some VLIW platform.
176 bool resource_sort::operator()(const SUnit
*LHS
, const SUnit
*RHS
) const {
177 // The isScheduleHigh flag allows nodes with wraparound dependencies that
178 // cannot easily be modeled as edges with latencies to be scheduled as
179 // soon as possible in a top-down schedule.
180 if (LHS
->isScheduleHigh
&& !RHS
->isScheduleHigh
)
183 if (!LHS
->isScheduleHigh
&& RHS
->isScheduleHigh
)
186 unsigned LHSNum
= LHS
->NodeNum
;
187 unsigned RHSNum
= RHS
->NodeNum
;
189 // The most important heuristic is scheduling the critical path.
190 unsigned LHSLatency
= PQ
->getLatency(LHSNum
);
191 unsigned RHSLatency
= PQ
->getLatency(RHSNum
);
192 if (LHSLatency
< RHSLatency
) return true;
193 if (LHSLatency
> RHSLatency
) return false;
195 // After that, if two nodes have identical latencies, look to see if one will
196 // unblock more other nodes than the other.
197 unsigned LHSBlocked
= PQ
->getNumSolelyBlockNodes(LHSNum
);
198 unsigned RHSBlocked
= PQ
->getNumSolelyBlockNodes(RHSNum
);
199 if (LHSBlocked
< RHSBlocked
) return true;
200 if (LHSBlocked
> RHSBlocked
) return false;
202 // Finally, just to provide a stable ordering, use the node number as a
204 return LHSNum
< RHSNum
;
208 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
209 /// of SU, return it, otherwise return null.
210 SUnit
*ResourcePriorityQueue::getSingleUnscheduledPred(SUnit
*SU
) {
211 SUnit
*OnlyAvailablePred
= nullptr;
212 for (const SDep
&Pred
: SU
->Preds
) {
213 SUnit
&PredSU
= *Pred
.getSUnit();
214 if (!PredSU
.isScheduled
) {
215 // We found an available, but not scheduled, predecessor. If it's the
216 // only one we have found, keep track of it... otherwise give up.
217 if (OnlyAvailablePred
&& OnlyAvailablePred
!= &PredSU
)
219 OnlyAvailablePred
= &PredSU
;
222 return OnlyAvailablePred
;
225 void ResourcePriorityQueue::push(SUnit
*SU
) {
226 // Look at all of the successors of this node. Count the number of nodes that
227 // this node is the sole unscheduled node for.
228 unsigned NumNodesBlocking
= 0;
229 for (const SDep
&Succ
: SU
->Succs
)
230 if (getSingleUnscheduledPred(Succ
.getSUnit()) == SU
)
233 NumNodesSolelyBlocking
[SU
->NodeNum
] = NumNodesBlocking
;
237 /// Check if scheduling of this SU is possible
238 /// in the current packet.
239 bool ResourcePriorityQueue::isResourceAvailable(SUnit
*SU
) {
240 if (!SU
|| !SU
->getNode())
243 // If this is a compound instruction,
244 // it is likely to be a call. Do not delay it.
245 if (SU
->getNode()->getGluedNode())
248 // First see if the pipeline could receive this instruction
249 // in the current cycle.
250 if (SU
->getNode()->isMachineOpcode())
251 switch (SU
->getNode()->getMachineOpcode()) {
253 if (!ResourcesModel
->canReserveResources(&TII
->get(
254 SU
->getNode()->getMachineOpcode())))
257 case TargetOpcode::EXTRACT_SUBREG
:
258 case TargetOpcode::INSERT_SUBREG
:
259 case TargetOpcode::SUBREG_TO_REG
:
260 case TargetOpcode::REG_SEQUENCE
:
261 case TargetOpcode::IMPLICIT_DEF
:
265 // Now see if there are no other dependencies
266 // to instructions already in the packet.
267 for (unsigned i
= 0, e
= Packet
.size(); i
!= e
; ++i
)
268 for (const SDep
&Succ
: Packet
[i
]->Succs
) {
269 // Since we do not add pseudos to packets, might as well
270 // ignore order deps.
274 if (Succ
.getSUnit() == SU
)
281 /// Keep track of available resources.
282 void ResourcePriorityQueue::reserveResources(SUnit
*SU
) {
283 // If this SU does not fit in the packet
285 if (!isResourceAvailable(SU
) || SU
->getNode()->getGluedNode()) {
286 ResourcesModel
->clearResources();
290 if (SU
->getNode() && SU
->getNode()->isMachineOpcode()) {
291 switch (SU
->getNode()->getMachineOpcode()) {
293 ResourcesModel
->reserveResources(&TII
->get(
294 SU
->getNode()->getMachineOpcode()));
296 case TargetOpcode::EXTRACT_SUBREG
:
297 case TargetOpcode::INSERT_SUBREG
:
298 case TargetOpcode::SUBREG_TO_REG
:
299 case TargetOpcode::REG_SEQUENCE
:
300 case TargetOpcode::IMPLICIT_DEF
:
303 Packet
.push_back(SU
);
305 // Forcefully end packet for PseudoOps.
307 ResourcesModel
->clearResources();
311 // If packet is now full, reset the state so in the next cycle
313 if (Packet
.size() >= InstrItins
->SchedModel
.IssueWidth
) {
314 ResourcesModel
->clearResources();
319 int ResourcePriorityQueue::rawRegPressureDelta(SUnit
*SU
, unsigned RCId
) {
322 if (!SU
|| !SU
->getNode() || !SU
->getNode()->isMachineOpcode())
326 for (unsigned i
= 0, e
= SU
->getNode()->getNumValues(); i
!= e
; ++i
) {
327 MVT VT
= SU
->getNode()->getSimpleValueType(i
);
328 if (TLI
->isTypeLegal(VT
)
329 && TLI
->getRegClassFor(VT
)
330 && TLI
->getRegClassFor(VT
)->getID() == RCId
)
331 RegBalance
+= numberRCValSuccInSU(SU
, RCId
);
334 for (unsigned i
= 0, e
= SU
->getNode()->getNumOperands(); i
!= e
; ++i
) {
335 const SDValue
&Op
= SU
->getNode()->getOperand(i
);
336 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
337 if (isa
<ConstantSDNode
>(Op
.getNode()))
340 if (TLI
->isTypeLegal(VT
) && TLI
->getRegClassFor(VT
)
341 && TLI
->getRegClassFor(VT
)->getID() == RCId
)
342 RegBalance
-= numberRCValPredInSU(SU
, RCId
);
347 /// Estimates change in reg pressure from this SU.
348 /// It is achieved by trivial tracking of defined
349 /// and used vregs in dependent instructions.
350 /// The RawPressure flag makes this function to ignore
351 /// existing reg file sizes, and report raw def/use
353 int ResourcePriorityQueue::regPressureDelta(SUnit
*SU
, bool RawPressure
) {
356 if (!SU
|| !SU
->getNode() || !SU
->getNode()->isMachineOpcode())
360 for (const TargetRegisterClass
*RC
: TRI
->regclasses())
361 RegBalance
+= rawRegPressureDelta(SU
, RC
->getID());
364 for (const TargetRegisterClass
*RC
: TRI
->regclasses()) {
365 if ((RegPressure
[RC
->getID()] +
366 rawRegPressureDelta(SU
, RC
->getID()) > 0) &&
367 (RegPressure
[RC
->getID()] +
368 rawRegPressureDelta(SU
, RC
->getID()) >= RegLimit
[RC
->getID()]))
369 RegBalance
+= rawRegPressureDelta(SU
, RC
->getID());
376 // Constants used to denote relative importance of
377 // heuristic components for cost computation.
378 static const unsigned PriorityOne
= 200;
379 static const unsigned PriorityTwo
= 50;
380 static const unsigned PriorityThree
= 15;
381 static const unsigned PriorityFour
= 5;
382 static const unsigned ScaleOne
= 20;
383 static const unsigned ScaleTwo
= 10;
384 static const unsigned ScaleThree
= 5;
385 static const unsigned FactorOne
= 2;
387 /// Returns single number reflecting benefit of scheduling SU
388 /// in the current cycle.
389 int ResourcePriorityQueue::SUSchedulingCost(SUnit
*SU
) {
390 // Initial trivial priority.
393 // Do not waste time on a node that is already scheduled.
397 // Forced priority is high.
398 if (SU
->isScheduleHigh
)
399 ResCount
+= PriorityOne
;
401 // Adaptable scheduling
402 // A small, but very parallel
403 // region, where reg pressure is an issue.
404 if (HorizontalVerticalBalance
> RegPressureThreshold
) {
405 // Critical path first
406 ResCount
+= (SU
->getHeight() * ScaleTwo
);
407 // If resources are available for it, multiply the
408 // chance of scheduling.
409 if (isResourceAvailable(SU
))
410 ResCount
<<= FactorOne
;
412 // Consider change to reg pressure from scheduling
414 ResCount
-= (regPressureDelta(SU
,true) * ScaleOne
);
416 // Default heuristic, greeady and
417 // critical path driven.
419 // Critical path first.
420 ResCount
+= (SU
->getHeight() * ScaleTwo
);
421 // Now see how many instructions is blocked by this SU.
422 ResCount
+= (NumNodesSolelyBlocking
[SU
->NodeNum
] * ScaleTwo
);
423 // If resources are available for it, multiply the
424 // chance of scheduling.
425 if (isResourceAvailable(SU
))
426 ResCount
<<= FactorOne
;
428 ResCount
-= (regPressureDelta(SU
) * ScaleTwo
);
431 // These are platform-specific things.
432 // Will need to go into the back end
433 // and accessed from here via a hook.
434 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode()) {
435 if (N
->isMachineOpcode()) {
436 const MCInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
438 ResCount
+= (PriorityTwo
+ (ScaleThree
*N
->getNumValues()));
441 switch (N
->getOpcode()) {
443 case ISD::TokenFactor
:
444 case ISD::CopyFromReg
:
446 ResCount
+= PriorityFour
;
450 case ISD::INLINEASM_BR
:
451 ResCount
+= PriorityThree
;
459 /// Main resource tracking point.
460 void ResourcePriorityQueue::scheduledNode(SUnit
*SU
) {
461 // Use NULL entry as an event marker to reset
464 ResourcesModel
->clearResources();
469 const SDNode
*ScegN
= SU
->getNode();
470 // Update reg pressure tracking.
471 // First update current node.
472 if (ScegN
->isMachineOpcode()) {
473 // Estimate generated regs.
474 for (unsigned i
= 0, e
= ScegN
->getNumValues(); i
!= e
; ++i
) {
475 MVT VT
= ScegN
->getSimpleValueType(i
);
477 if (TLI
->isTypeLegal(VT
)) {
478 const TargetRegisterClass
*RC
= TLI
->getRegClassFor(VT
);
480 RegPressure
[RC
->getID()] += numberRCValSuccInSU(SU
, RC
->getID());
483 // Estimate killed regs.
484 for (unsigned i
= 0, e
= ScegN
->getNumOperands(); i
!= e
; ++i
) {
485 const SDValue
&Op
= ScegN
->getOperand(i
);
486 MVT VT
= Op
.getNode()->getSimpleValueType(Op
.getResNo());
488 if (TLI
->isTypeLegal(VT
)) {
489 const TargetRegisterClass
*RC
= TLI
->getRegClassFor(VT
);
491 if (RegPressure
[RC
->getID()] >
492 (numberRCValPredInSU(SU
, RC
->getID())))
493 RegPressure
[RC
->getID()] -= numberRCValPredInSU(SU
, RC
->getID());
494 else RegPressure
[RC
->getID()] = 0;
498 for (SDep
&Pred
: SU
->Preds
) {
499 if (Pred
.isCtrl() || (Pred
.getSUnit()->NumRegDefsLeft
== 0))
501 --Pred
.getSUnit()->NumRegDefsLeft
;
505 // Reserve resources for this SU.
506 reserveResources(SU
);
508 // Adjust number of parallel live ranges.
509 // Heuristic is simple - node with no data successors reduces
510 // number of live ranges. All others, increase it.
511 unsigned NumberNonControlDeps
= 0;
513 for (const SDep
&Succ
: SU
->Succs
) {
514 adjustPriorityOfUnscheduledPreds(Succ
.getSUnit());
516 NumberNonControlDeps
++;
519 if (!NumberNonControlDeps
) {
520 if (ParallelLiveRanges
>= SU
->NumPreds
)
521 ParallelLiveRanges
-= SU
->NumPreds
;
523 ParallelLiveRanges
= 0;
527 ParallelLiveRanges
+= SU
->NumRegDefsLeft
;
529 // Track parallel live chains.
530 HorizontalVerticalBalance
+= (SU
->Succs
.size() - numberCtrlDepsInSU(SU
));
531 HorizontalVerticalBalance
-= (SU
->Preds
.size() - numberCtrlPredInSU(SU
));
534 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit
*SU
) {
535 unsigned NodeNumDefs
= 0;
536 for (SDNode
*N
= SU
->getNode(); N
; N
= N
->getGluedNode())
537 if (N
->isMachineOpcode()) {
538 const MCInstrDesc
&TID
= TII
->get(N
->getMachineOpcode());
539 // No register need be allocated for this.
540 if (N
->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF
) {
544 NodeNumDefs
= std::min(N
->getNumValues(), TID
.getNumDefs());
547 switch(N
->getOpcode()) {
549 case ISD::CopyFromReg
:
553 case ISD::INLINEASM_BR
:
558 SU
->NumRegDefsLeft
= NodeNumDefs
;
561 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
562 /// scheduled. If SU is not itself available, then there is at least one
563 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
564 /// unscheduled predecessor, we want to increase its priority: it getting
565 /// scheduled will make this node available, so it is better than some other
566 /// node of the same priority that will not make a node available.
567 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit
*SU
) {
568 if (SU
->isAvailable
) return; // All preds scheduled.
570 SUnit
*OnlyAvailablePred
= getSingleUnscheduledPred(SU
);
571 if (!OnlyAvailablePred
|| !OnlyAvailablePred
->isAvailable
)
574 // Okay, we found a single predecessor that is available, but not scheduled.
575 // Since it is available, it must be in the priority queue. First remove it.
576 remove(OnlyAvailablePred
);
578 // Reinsert the node into the priority queue, which recomputes its
579 // NumNodesSolelyBlocking value.
580 push(OnlyAvailablePred
);
584 /// Main access point - returns next instructions
585 /// to be placed in scheduling sequence.
586 SUnit
*ResourcePriorityQueue::pop() {
590 std::vector
<SUnit
*>::iterator Best
= Queue
.begin();
591 if (!DisableDFASched
) {
592 int BestCost
= SUSchedulingCost(*Best
);
593 for (auto I
= std::next(Queue
.begin()), E
= Queue
.end(); I
!= E
; ++I
) {
595 if (SUSchedulingCost(*I
) > BestCost
) {
596 BestCost
= SUSchedulingCost(*I
);
601 // Use default TD scheduling mechanism.
603 for (auto I
= std::next(Queue
.begin()), E
= Queue
.end(); I
!= E
; ++I
)
604 if (Picker(*Best
, *I
))
609 if (Best
!= std::prev(Queue
.end()))
610 std::swap(*Best
, Queue
.back());
618 void ResourcePriorityQueue::remove(SUnit
*SU
) {
619 assert(!Queue
.empty() && "Queue is empty!");
620 std::vector
<SUnit
*>::iterator I
= find(Queue
, SU
);
621 if (I
!= std::prev(Queue
.end()))
622 std::swap(*I
, Queue
.back());