[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / CodeGen / SelectionDAG / ResourcePriorityQueue.cpp
blob55fe26eb64cdab489e85bd8e9e3302c3f24ca4c9
1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
12 // on VLIW platforms.
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
19 //===----------------------------------------------------------------------===//
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/DFAPacketizer.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/CodeGen/TargetLowering.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Target/TargetMachine.h"
35 using namespace llvm;
37 #define DEBUG_TYPE "scheduler"
39 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
40 cl::ZeroOrMore, cl::init(false),
41 cl::desc("Disable use of DFA during scheduling"));
43 static cl::opt<int> RegPressureThreshold(
44 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
45 cl::desc("Track reg pressure and switch priority to in-depth"));
47 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
48 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
49 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
50 TRI = STI.getRegisterInfo();
51 TLI = IS->TLI;
52 TII = STI.getInstrInfo();
53 ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
54 // This hard requirement could be relaxed, but for now
55 // do not let it proceed.
56 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
58 unsigned NumRC = TRI->getNumRegClasses();
59 RegLimit.resize(NumRC);
60 RegPressure.resize(NumRC);
61 std::fill(RegLimit.begin(), RegLimit.end(), 0);
62 std::fill(RegPressure.begin(), RegPressure.end(), 0);
63 for (const TargetRegisterClass *RC : TRI->regclasses())
64 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
66 ParallelLiveRanges = 0;
67 HorizontalVerticalBalance = 0;
70 unsigned
71 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
72 unsigned NumberDeps = 0;
73 for (SDep &Pred : SU->Preds) {
74 if (Pred.isCtrl())
75 continue;
77 SUnit *PredSU = Pred.getSUnit();
78 const SDNode *ScegN = PredSU->getNode();
80 if (!ScegN)
81 continue;
83 // If value is passed to CopyToReg, it is probably
84 // live outside BB.
85 switch (ScegN->getOpcode()) {
86 default: break;
87 case ISD::TokenFactor: break;
88 case ISD::CopyFromReg: NumberDeps++; break;
89 case ISD::CopyToReg: break;
90 case ISD::INLINEASM: break;
91 case ISD::INLINEASM_BR: break;
93 if (!ScegN->isMachineOpcode())
94 continue;
96 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
97 MVT VT = ScegN->getSimpleValueType(i);
98 if (TLI->isTypeLegal(VT)
99 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
100 NumberDeps++;
101 break;
105 return NumberDeps;
108 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
109 unsigned RCId) {
110 unsigned NumberDeps = 0;
111 for (const SDep &Succ : SU->Succs) {
112 if (Succ.isCtrl())
113 continue;
115 SUnit *SuccSU = Succ.getSUnit();
116 const SDNode *ScegN = SuccSU->getNode();
117 if (!ScegN)
118 continue;
120 // If value is passed to CopyToReg, it is probably
121 // live outside BB.
122 switch (ScegN->getOpcode()) {
123 default: break;
124 case ISD::TokenFactor: break;
125 case ISD::CopyFromReg: break;
126 case ISD::CopyToReg: NumberDeps++; break;
127 case ISD::INLINEASM: break;
128 case ISD::INLINEASM_BR: break;
130 if (!ScegN->isMachineOpcode())
131 continue;
133 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
134 const SDValue &Op = ScegN->getOperand(i);
135 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
136 if (TLI->isTypeLegal(VT)
137 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
138 NumberDeps++;
139 break;
143 return NumberDeps;
146 static unsigned numberCtrlDepsInSU(SUnit *SU) {
147 unsigned NumberDeps = 0;
148 for (const SDep &Succ : SU->Succs)
149 if (Succ.isCtrl())
150 NumberDeps++;
152 return NumberDeps;
155 static unsigned numberCtrlPredInSU(SUnit *SU) {
156 unsigned NumberDeps = 0;
157 for (SDep &Pred : SU->Preds)
158 if (Pred.isCtrl())
159 NumberDeps++;
161 return NumberDeps;
165 /// Initialize nodes.
167 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
168 SUnits = &sunits;
169 NumNodesSolelyBlocking.resize(SUnits->size(), 0);
171 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
172 SUnit *SU = &(*SUnits)[i];
173 initNumRegDefsLeft(SU);
174 SU->NodeQueueId = 0;
178 /// This heuristic is used if DFA scheduling is not desired
179 /// for some VLIW platform.
180 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
181 // The isScheduleHigh flag allows nodes with wraparound dependencies that
182 // cannot easily be modeled as edges with latencies to be scheduled as
183 // soon as possible in a top-down schedule.
184 if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
185 return false;
187 if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
188 return true;
190 unsigned LHSNum = LHS->NodeNum;
191 unsigned RHSNum = RHS->NodeNum;
193 // The most important heuristic is scheduling the critical path.
194 unsigned LHSLatency = PQ->getLatency(LHSNum);
195 unsigned RHSLatency = PQ->getLatency(RHSNum);
196 if (LHSLatency < RHSLatency) return true;
197 if (LHSLatency > RHSLatency) return false;
199 // After that, if two nodes have identical latencies, look to see if one will
200 // unblock more other nodes than the other.
201 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
202 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
203 if (LHSBlocked < RHSBlocked) return true;
204 if (LHSBlocked > RHSBlocked) return false;
206 // Finally, just to provide a stable ordering, use the node number as a
207 // deciding factor.
208 return LHSNum < RHSNum;
212 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
213 /// of SU, return it, otherwise return null.
214 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
215 SUnit *OnlyAvailablePred = nullptr;
216 for (const SDep &Pred : SU->Preds) {
217 SUnit &PredSU = *Pred.getSUnit();
218 if (!PredSU.isScheduled) {
219 // We found an available, but not scheduled, predecessor. If it's the
220 // only one we have found, keep track of it... otherwise give up.
221 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
222 return nullptr;
223 OnlyAvailablePred = &PredSU;
226 return OnlyAvailablePred;
229 void ResourcePriorityQueue::push(SUnit *SU) {
230 // Look at all of the successors of this node. Count the number of nodes that
231 // this node is the sole unscheduled node for.
232 unsigned NumNodesBlocking = 0;
233 for (const SDep &Succ : SU->Succs)
234 if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
235 ++NumNodesBlocking;
237 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
238 Queue.push_back(SU);
241 /// Check if scheduling of this SU is possible
242 /// in the current packet.
243 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
244 if (!SU || !SU->getNode())
245 return false;
247 // If this is a compound instruction,
248 // it is likely to be a call. Do not delay it.
249 if (SU->getNode()->getGluedNode())
250 return true;
252 // First see if the pipeline could receive this instruction
253 // in the current cycle.
254 if (SU->getNode()->isMachineOpcode())
255 switch (SU->getNode()->getMachineOpcode()) {
256 default:
257 if (!ResourcesModel->canReserveResources(&TII->get(
258 SU->getNode()->getMachineOpcode())))
259 return false;
260 break;
261 case TargetOpcode::EXTRACT_SUBREG:
262 case TargetOpcode::INSERT_SUBREG:
263 case TargetOpcode::SUBREG_TO_REG:
264 case TargetOpcode::REG_SEQUENCE:
265 case TargetOpcode::IMPLICIT_DEF:
266 break;
269 // Now see if there are no other dependencies
270 // to instructions already in the packet.
271 for (unsigned i = 0, e = Packet.size(); i != e; ++i)
272 for (const SDep &Succ : Packet[i]->Succs) {
273 // Since we do not add pseudos to packets, might as well
274 // ignore order deps.
275 if (Succ.isCtrl())
276 continue;
278 if (Succ.getSUnit() == SU)
279 return false;
282 return true;
285 /// Keep track of available resources.
286 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
287 // If this SU does not fit in the packet
288 // start a new one.
289 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
290 ResourcesModel->clearResources();
291 Packet.clear();
294 if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
295 switch (SU->getNode()->getMachineOpcode()) {
296 default:
297 ResourcesModel->reserveResources(&TII->get(
298 SU->getNode()->getMachineOpcode()));
299 break;
300 case TargetOpcode::EXTRACT_SUBREG:
301 case TargetOpcode::INSERT_SUBREG:
302 case TargetOpcode::SUBREG_TO_REG:
303 case TargetOpcode::REG_SEQUENCE:
304 case TargetOpcode::IMPLICIT_DEF:
305 break;
307 Packet.push_back(SU);
309 // Forcefully end packet for PseudoOps.
310 else {
311 ResourcesModel->clearResources();
312 Packet.clear();
315 // If packet is now full, reset the state so in the next cycle
316 // we start fresh.
317 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
318 ResourcesModel->clearResources();
319 Packet.clear();
323 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
324 int RegBalance = 0;
326 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
327 return RegBalance;
329 // Gen estimate.
330 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
331 MVT VT = SU->getNode()->getSimpleValueType(i);
332 if (TLI->isTypeLegal(VT)
333 && TLI->getRegClassFor(VT)
334 && TLI->getRegClassFor(VT)->getID() == RCId)
335 RegBalance += numberRCValSuccInSU(SU, RCId);
337 // Kill estimate.
338 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
339 const SDValue &Op = SU->getNode()->getOperand(i);
340 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
341 if (isa<ConstantSDNode>(Op.getNode()))
342 continue;
344 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
345 && TLI->getRegClassFor(VT)->getID() == RCId)
346 RegBalance -= numberRCValPredInSU(SU, RCId);
348 return RegBalance;
351 /// Estimates change in reg pressure from this SU.
352 /// It is achieved by trivial tracking of defined
353 /// and used vregs in dependent instructions.
354 /// The RawPressure flag makes this function to ignore
355 /// existing reg file sizes, and report raw def/use
356 /// balance.
357 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
358 int RegBalance = 0;
360 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
361 return RegBalance;
363 if (RawPressure) {
364 for (const TargetRegisterClass *RC : TRI->regclasses())
365 RegBalance += rawRegPressureDelta(SU, RC->getID());
367 else {
368 for (const TargetRegisterClass *RC : TRI->regclasses()) {
369 if ((RegPressure[RC->getID()] +
370 rawRegPressureDelta(SU, RC->getID()) > 0) &&
371 (RegPressure[RC->getID()] +
372 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
373 RegBalance += rawRegPressureDelta(SU, RC->getID());
377 return RegBalance;
380 // Constants used to denote relative importance of
381 // heuristic components for cost computation.
382 static const unsigned PriorityOne = 200;
383 static const unsigned PriorityTwo = 50;
384 static const unsigned PriorityThree = 15;
385 static const unsigned PriorityFour = 5;
386 static const unsigned ScaleOne = 20;
387 static const unsigned ScaleTwo = 10;
388 static const unsigned ScaleThree = 5;
389 static const unsigned FactorOne = 2;
391 /// Returns single number reflecting benefit of scheduling SU
392 /// in the current cycle.
393 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
394 // Initial trivial priority.
395 int ResCount = 1;
397 // Do not waste time on a node that is already scheduled.
398 if (SU->isScheduled)
399 return ResCount;
401 // Forced priority is high.
402 if (SU->isScheduleHigh)
403 ResCount += PriorityOne;
405 // Adaptable scheduling
406 // A small, but very parallel
407 // region, where reg pressure is an issue.
408 if (HorizontalVerticalBalance > RegPressureThreshold) {
409 // Critical path first
410 ResCount += (SU->getHeight() * ScaleTwo);
411 // If resources are available for it, multiply the
412 // chance of scheduling.
413 if (isResourceAvailable(SU))
414 ResCount <<= FactorOne;
416 // Consider change to reg pressure from scheduling
417 // this SU.
418 ResCount -= (regPressureDelta(SU,true) * ScaleOne);
420 // Default heuristic, greeady and
421 // critical path driven.
422 else {
423 // Critical path first.
424 ResCount += (SU->getHeight() * ScaleTwo);
425 // Now see how many instructions is blocked by this SU.
426 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
427 // If resources are available for it, multiply the
428 // chance of scheduling.
429 if (isResourceAvailable(SU))
430 ResCount <<= FactorOne;
432 ResCount -= (regPressureDelta(SU) * ScaleTwo);
435 // These are platform-specific things.
436 // Will need to go into the back end
437 // and accessed from here via a hook.
438 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
439 if (N->isMachineOpcode()) {
440 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
441 if (TID.isCall())
442 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
444 else
445 switch (N->getOpcode()) {
446 default: break;
447 case ISD::TokenFactor:
448 case ISD::CopyFromReg:
449 case ISD::CopyToReg:
450 ResCount += PriorityFour;
451 break;
453 case ISD::INLINEASM:
454 case ISD::INLINEASM_BR:
455 ResCount += PriorityThree;
456 break;
459 return ResCount;
463 /// Main resource tracking point.
464 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
465 // Use NULL entry as an event marker to reset
466 // the DFA state.
467 if (!SU) {
468 ResourcesModel->clearResources();
469 Packet.clear();
470 return;
473 const SDNode *ScegN = SU->getNode();
474 // Update reg pressure tracking.
475 // First update current node.
476 if (ScegN->isMachineOpcode()) {
477 // Estimate generated regs.
478 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
479 MVT VT = ScegN->getSimpleValueType(i);
481 if (TLI->isTypeLegal(VT)) {
482 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
483 if (RC)
484 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
487 // Estimate killed regs.
488 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
489 const SDValue &Op = ScegN->getOperand(i);
490 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
492 if (TLI->isTypeLegal(VT)) {
493 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
494 if (RC) {
495 if (RegPressure[RC->getID()] >
496 (numberRCValPredInSU(SU, RC->getID())))
497 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
498 else RegPressure[RC->getID()] = 0;
502 for (SDep &Pred : SU->Preds) {
503 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
504 continue;
505 --Pred.getSUnit()->NumRegDefsLeft;
509 // Reserve resources for this SU.
510 reserveResources(SU);
512 // Adjust number of parallel live ranges.
513 // Heuristic is simple - node with no data successors reduces
514 // number of live ranges. All others, increase it.
515 unsigned NumberNonControlDeps = 0;
517 for (const SDep &Succ : SU->Succs) {
518 adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
519 if (!Succ.isCtrl())
520 NumberNonControlDeps++;
523 if (!NumberNonControlDeps) {
524 if (ParallelLiveRanges >= SU->NumPreds)
525 ParallelLiveRanges -= SU->NumPreds;
526 else
527 ParallelLiveRanges = 0;
530 else
531 ParallelLiveRanges += SU->NumRegDefsLeft;
533 // Track parallel live chains.
534 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
535 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
538 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
539 unsigned NodeNumDefs = 0;
540 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
541 if (N->isMachineOpcode()) {
542 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
543 // No register need be allocated for this.
544 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
545 NodeNumDefs = 0;
546 break;
548 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
550 else
551 switch(N->getOpcode()) {
552 default: break;
553 case ISD::CopyFromReg:
554 NodeNumDefs++;
555 break;
556 case ISD::INLINEASM:
557 case ISD::INLINEASM_BR:
558 NodeNumDefs++;
559 break;
562 SU->NumRegDefsLeft = NodeNumDefs;
565 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
566 /// scheduled. If SU is not itself available, then there is at least one
567 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
568 /// unscheduled predecessor, we want to increase its priority: it getting
569 /// scheduled will make this node available, so it is better than some other
570 /// node of the same priority that will not make a node available.
571 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
572 if (SU->isAvailable) return; // All preds scheduled.
574 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
575 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
576 return;
578 // Okay, we found a single predecessor that is available, but not scheduled.
579 // Since it is available, it must be in the priority queue. First remove it.
580 remove(OnlyAvailablePred);
582 // Reinsert the node into the priority queue, which recomputes its
583 // NumNodesSolelyBlocking value.
584 push(OnlyAvailablePred);
588 /// Main access point - returns next instructions
589 /// to be placed in scheduling sequence.
590 SUnit *ResourcePriorityQueue::pop() {
591 if (empty())
592 return nullptr;
594 std::vector<SUnit *>::iterator Best = Queue.begin();
595 if (!DisableDFASched) {
596 int BestCost = SUSchedulingCost(*Best);
597 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
599 if (SUSchedulingCost(*I) > BestCost) {
600 BestCost = SUSchedulingCost(*I);
601 Best = I;
605 // Use default TD scheduling mechanism.
606 else {
607 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
608 if (Picker(*Best, *I))
609 Best = I;
612 SUnit *V = *Best;
613 if (Best != std::prev(Queue.end()))
614 std::swap(*Best, Queue.back());
616 Queue.pop_back();
618 return V;
622 void ResourcePriorityQueue::remove(SUnit *SU) {
623 assert(!Queue.empty() && "Queue is empty!");
624 std::vector<SUnit *>::iterator I = find(Queue, SU);
625 if (I != std::prev(Queue.end()))
626 std::swap(*I, Queue.back());
628 Queue.pop_back();