[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / CodeGen / SelectionDAG / ResourcePriorityQueue.cpp
blob34660e3a48ec5ebc37a38e5a34b9718d08982c17
1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
12 // on VLIW platforms.
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
19 //===----------------------------------------------------------------------===//
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Target/TargetMachine.h"
31 using namespace llvm;
33 #define DEBUG_TYPE "scheduler"
35 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
36 cl::ZeroOrMore, cl::init(false),
37 cl::desc("Disable use of DFA during scheduling"));
39 static cl::opt<int> RegPressureThreshold(
40 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
41 cl::desc("Track reg pressure and switch priority to in-depth"));
43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46 TRI = STI.getRegisterInfo();
47 TLI = IS->TLI;
48 TII = STI.getInstrInfo();
49 ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
50 // This hard requirement could be relaxed, but for now
51 // do not let it proceed.
52 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
54 unsigned NumRC = TRI->getNumRegClasses();
55 RegLimit.resize(NumRC);
56 RegPressure.resize(NumRC);
57 std::fill(RegLimit.begin(), RegLimit.end(), 0);
58 std::fill(RegPressure.begin(), RegPressure.end(), 0);
59 for (const TargetRegisterClass *RC : TRI->regclasses())
60 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
62 ParallelLiveRanges = 0;
63 HorizontalVerticalBalance = 0;
66 unsigned
67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68 unsigned NumberDeps = 0;
69 for (SDep &Pred : SU->Preds) {
70 if (Pred.isCtrl())
71 continue;
73 SUnit *PredSU = Pred.getSUnit();
74 const SDNode *ScegN = PredSU->getNode();
76 if (!ScegN)
77 continue;
79 // If value is passed to CopyToReg, it is probably
80 // live outside BB.
81 switch (ScegN->getOpcode()) {
82 default: break;
83 case ISD::TokenFactor: break;
84 case ISD::CopyFromReg: NumberDeps++; break;
85 case ISD::CopyToReg: break;
86 case ISD::INLINEASM: break;
87 case ISD::INLINEASM_BR: break;
89 if (!ScegN->isMachineOpcode())
90 continue;
92 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
93 MVT VT = ScegN->getSimpleValueType(i);
94 if (TLI->isTypeLegal(VT)
95 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
96 NumberDeps++;
97 break;
101 return NumberDeps;
104 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
105 unsigned RCId) {
106 unsigned NumberDeps = 0;
107 for (const SDep &Succ : SU->Succs) {
108 if (Succ.isCtrl())
109 continue;
111 SUnit *SuccSU = Succ.getSUnit();
112 const SDNode *ScegN = SuccSU->getNode();
113 if (!ScegN)
114 continue;
116 // If value is passed to CopyToReg, it is probably
117 // live outside BB.
118 switch (ScegN->getOpcode()) {
119 default: break;
120 case ISD::TokenFactor: break;
121 case ISD::CopyFromReg: break;
122 case ISD::CopyToReg: NumberDeps++; break;
123 case ISD::INLINEASM: break;
124 case ISD::INLINEASM_BR: break;
126 if (!ScegN->isMachineOpcode())
127 continue;
129 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
130 const SDValue &Op = ScegN->getOperand(i);
131 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
132 if (TLI->isTypeLegal(VT)
133 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
134 NumberDeps++;
135 break;
139 return NumberDeps;
142 static unsigned numberCtrlDepsInSU(SUnit *SU) {
143 unsigned NumberDeps = 0;
144 for (const SDep &Succ : SU->Succs)
145 if (Succ.isCtrl())
146 NumberDeps++;
148 return NumberDeps;
151 static unsigned numberCtrlPredInSU(SUnit *SU) {
152 unsigned NumberDeps = 0;
153 for (SDep &Pred : SU->Preds)
154 if (Pred.isCtrl())
155 NumberDeps++;
157 return NumberDeps;
161 /// Initialize nodes.
163 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
164 SUnits = &sunits;
165 NumNodesSolelyBlocking.resize(SUnits->size(), 0);
167 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
168 SUnit *SU = &(*SUnits)[i];
169 initNumRegDefsLeft(SU);
170 SU->NodeQueueId = 0;
174 /// This heuristic is used if DFA scheduling is not desired
175 /// for some VLIW platform.
176 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
177 // The isScheduleHigh flag allows nodes with wraparound dependencies that
178 // cannot easily be modeled as edges with latencies to be scheduled as
179 // soon as possible in a top-down schedule.
180 if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
181 return false;
183 if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
184 return true;
186 unsigned LHSNum = LHS->NodeNum;
187 unsigned RHSNum = RHS->NodeNum;
189 // The most important heuristic is scheduling the critical path.
190 unsigned LHSLatency = PQ->getLatency(LHSNum);
191 unsigned RHSLatency = PQ->getLatency(RHSNum);
192 if (LHSLatency < RHSLatency) return true;
193 if (LHSLatency > RHSLatency) return false;
195 // After that, if two nodes have identical latencies, look to see if one will
196 // unblock more other nodes than the other.
197 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
198 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
199 if (LHSBlocked < RHSBlocked) return true;
200 if (LHSBlocked > RHSBlocked) return false;
202 // Finally, just to provide a stable ordering, use the node number as a
203 // deciding factor.
204 return LHSNum < RHSNum;
208 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
209 /// of SU, return it, otherwise return null.
210 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
211 SUnit *OnlyAvailablePred = nullptr;
212 for (const SDep &Pred : SU->Preds) {
213 SUnit &PredSU = *Pred.getSUnit();
214 if (!PredSU.isScheduled) {
215 // We found an available, but not scheduled, predecessor. If it's the
216 // only one we have found, keep track of it... otherwise give up.
217 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
218 return nullptr;
219 OnlyAvailablePred = &PredSU;
222 return OnlyAvailablePred;
225 void ResourcePriorityQueue::push(SUnit *SU) {
226 // Look at all of the successors of this node. Count the number of nodes that
227 // this node is the sole unscheduled node for.
228 unsigned NumNodesBlocking = 0;
229 for (const SDep &Succ : SU->Succs)
230 if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
231 ++NumNodesBlocking;
233 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
234 Queue.push_back(SU);
237 /// Check if scheduling of this SU is possible
238 /// in the current packet.
239 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
240 if (!SU || !SU->getNode())
241 return false;
243 // If this is a compound instruction,
244 // it is likely to be a call. Do not delay it.
245 if (SU->getNode()->getGluedNode())
246 return true;
248 // First see if the pipeline could receive this instruction
249 // in the current cycle.
250 if (SU->getNode()->isMachineOpcode())
251 switch (SU->getNode()->getMachineOpcode()) {
252 default:
253 if (!ResourcesModel->canReserveResources(&TII->get(
254 SU->getNode()->getMachineOpcode())))
255 return false;
256 break;
257 case TargetOpcode::EXTRACT_SUBREG:
258 case TargetOpcode::INSERT_SUBREG:
259 case TargetOpcode::SUBREG_TO_REG:
260 case TargetOpcode::REG_SEQUENCE:
261 case TargetOpcode::IMPLICIT_DEF:
262 break;
265 // Now see if there are no other dependencies
266 // to instructions already in the packet.
267 for (unsigned i = 0, e = Packet.size(); i != e; ++i)
268 for (const SDep &Succ : Packet[i]->Succs) {
269 // Since we do not add pseudos to packets, might as well
270 // ignore order deps.
271 if (Succ.isCtrl())
272 continue;
274 if (Succ.getSUnit() == SU)
275 return false;
278 return true;
281 /// Keep track of available resources.
282 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
283 // If this SU does not fit in the packet
284 // start a new one.
285 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
286 ResourcesModel->clearResources();
287 Packet.clear();
290 if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
291 switch (SU->getNode()->getMachineOpcode()) {
292 default:
293 ResourcesModel->reserveResources(&TII->get(
294 SU->getNode()->getMachineOpcode()));
295 break;
296 case TargetOpcode::EXTRACT_SUBREG:
297 case TargetOpcode::INSERT_SUBREG:
298 case TargetOpcode::SUBREG_TO_REG:
299 case TargetOpcode::REG_SEQUENCE:
300 case TargetOpcode::IMPLICIT_DEF:
301 break;
303 Packet.push_back(SU);
305 // Forcefully end packet for PseudoOps.
306 else {
307 ResourcesModel->clearResources();
308 Packet.clear();
311 // If packet is now full, reset the state so in the next cycle
312 // we start fresh.
313 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
314 ResourcesModel->clearResources();
315 Packet.clear();
319 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
320 int RegBalance = 0;
322 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
323 return RegBalance;
325 // Gen estimate.
326 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
327 MVT VT = SU->getNode()->getSimpleValueType(i);
328 if (TLI->isTypeLegal(VT)
329 && TLI->getRegClassFor(VT)
330 && TLI->getRegClassFor(VT)->getID() == RCId)
331 RegBalance += numberRCValSuccInSU(SU, RCId);
333 // Kill estimate.
334 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
335 const SDValue &Op = SU->getNode()->getOperand(i);
336 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
337 if (isa<ConstantSDNode>(Op.getNode()))
338 continue;
340 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
341 && TLI->getRegClassFor(VT)->getID() == RCId)
342 RegBalance -= numberRCValPredInSU(SU, RCId);
344 return RegBalance;
347 /// Estimates change in reg pressure from this SU.
348 /// It is achieved by trivial tracking of defined
349 /// and used vregs in dependent instructions.
350 /// The RawPressure flag makes this function to ignore
351 /// existing reg file sizes, and report raw def/use
352 /// balance.
353 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
354 int RegBalance = 0;
356 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
357 return RegBalance;
359 if (RawPressure) {
360 for (const TargetRegisterClass *RC : TRI->regclasses())
361 RegBalance += rawRegPressureDelta(SU, RC->getID());
363 else {
364 for (const TargetRegisterClass *RC : TRI->regclasses()) {
365 if ((RegPressure[RC->getID()] +
366 rawRegPressureDelta(SU, RC->getID()) > 0) &&
367 (RegPressure[RC->getID()] +
368 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
369 RegBalance += rawRegPressureDelta(SU, RC->getID());
373 return RegBalance;
376 // Constants used to denote relative importance of
377 // heuristic components for cost computation.
378 static const unsigned PriorityOne = 200;
379 static const unsigned PriorityTwo = 50;
380 static const unsigned PriorityThree = 15;
381 static const unsigned PriorityFour = 5;
382 static const unsigned ScaleOne = 20;
383 static const unsigned ScaleTwo = 10;
384 static const unsigned ScaleThree = 5;
385 static const unsigned FactorOne = 2;
387 /// Returns single number reflecting benefit of scheduling SU
388 /// in the current cycle.
389 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
390 // Initial trivial priority.
391 int ResCount = 1;
393 // Do not waste time on a node that is already scheduled.
394 if (SU->isScheduled)
395 return ResCount;
397 // Forced priority is high.
398 if (SU->isScheduleHigh)
399 ResCount += PriorityOne;
401 // Adaptable scheduling
402 // A small, but very parallel
403 // region, where reg pressure is an issue.
404 if (HorizontalVerticalBalance > RegPressureThreshold) {
405 // Critical path first
406 ResCount += (SU->getHeight() * ScaleTwo);
407 // If resources are available for it, multiply the
408 // chance of scheduling.
409 if (isResourceAvailable(SU))
410 ResCount <<= FactorOne;
412 // Consider change to reg pressure from scheduling
413 // this SU.
414 ResCount -= (regPressureDelta(SU,true) * ScaleOne);
416 // Default heuristic, greeady and
417 // critical path driven.
418 else {
419 // Critical path first.
420 ResCount += (SU->getHeight() * ScaleTwo);
421 // Now see how many instructions is blocked by this SU.
422 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
423 // If resources are available for it, multiply the
424 // chance of scheduling.
425 if (isResourceAvailable(SU))
426 ResCount <<= FactorOne;
428 ResCount -= (regPressureDelta(SU) * ScaleTwo);
431 // These are platform-specific things.
432 // Will need to go into the back end
433 // and accessed from here via a hook.
434 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
435 if (N->isMachineOpcode()) {
436 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
437 if (TID.isCall())
438 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
440 else
441 switch (N->getOpcode()) {
442 default: break;
443 case ISD::TokenFactor:
444 case ISD::CopyFromReg:
445 case ISD::CopyToReg:
446 ResCount += PriorityFour;
447 break;
449 case ISD::INLINEASM:
450 case ISD::INLINEASM_BR:
451 ResCount += PriorityThree;
452 break;
455 return ResCount;
459 /// Main resource tracking point.
460 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
461 // Use NULL entry as an event marker to reset
462 // the DFA state.
463 if (!SU) {
464 ResourcesModel->clearResources();
465 Packet.clear();
466 return;
469 const SDNode *ScegN = SU->getNode();
470 // Update reg pressure tracking.
471 // First update current node.
472 if (ScegN->isMachineOpcode()) {
473 // Estimate generated regs.
474 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
475 MVT VT = ScegN->getSimpleValueType(i);
477 if (TLI->isTypeLegal(VT)) {
478 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
479 if (RC)
480 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
483 // Estimate killed regs.
484 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
485 const SDValue &Op = ScegN->getOperand(i);
486 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
488 if (TLI->isTypeLegal(VT)) {
489 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
490 if (RC) {
491 if (RegPressure[RC->getID()] >
492 (numberRCValPredInSU(SU, RC->getID())))
493 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
494 else RegPressure[RC->getID()] = 0;
498 for (SDep &Pred : SU->Preds) {
499 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
500 continue;
501 --Pred.getSUnit()->NumRegDefsLeft;
505 // Reserve resources for this SU.
506 reserveResources(SU);
508 // Adjust number of parallel live ranges.
509 // Heuristic is simple - node with no data successors reduces
510 // number of live ranges. All others, increase it.
511 unsigned NumberNonControlDeps = 0;
513 for (const SDep &Succ : SU->Succs) {
514 adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
515 if (!Succ.isCtrl())
516 NumberNonControlDeps++;
519 if (!NumberNonControlDeps) {
520 if (ParallelLiveRanges >= SU->NumPreds)
521 ParallelLiveRanges -= SU->NumPreds;
522 else
523 ParallelLiveRanges = 0;
526 else
527 ParallelLiveRanges += SU->NumRegDefsLeft;
529 // Track parallel live chains.
530 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
531 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
534 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
535 unsigned NodeNumDefs = 0;
536 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
537 if (N->isMachineOpcode()) {
538 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
539 // No register need be allocated for this.
540 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
541 NodeNumDefs = 0;
542 break;
544 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
546 else
547 switch(N->getOpcode()) {
548 default: break;
549 case ISD::CopyFromReg:
550 NodeNumDefs++;
551 break;
552 case ISD::INLINEASM:
553 case ISD::INLINEASM_BR:
554 NodeNumDefs++;
555 break;
558 SU->NumRegDefsLeft = NodeNumDefs;
561 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
562 /// scheduled. If SU is not itself available, then there is at least one
563 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
564 /// unscheduled predecessor, we want to increase its priority: it getting
565 /// scheduled will make this node available, so it is better than some other
566 /// node of the same priority that will not make a node available.
567 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
568 if (SU->isAvailable) return; // All preds scheduled.
570 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
571 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
572 return;
574 // Okay, we found a single predecessor that is available, but not scheduled.
575 // Since it is available, it must be in the priority queue. First remove it.
576 remove(OnlyAvailablePred);
578 // Reinsert the node into the priority queue, which recomputes its
579 // NumNodesSolelyBlocking value.
580 push(OnlyAvailablePred);
584 /// Main access point - returns next instructions
585 /// to be placed in scheduling sequence.
586 SUnit *ResourcePriorityQueue::pop() {
587 if (empty())
588 return nullptr;
590 std::vector<SUnit *>::iterator Best = Queue.begin();
591 if (!DisableDFASched) {
592 int BestCost = SUSchedulingCost(*Best);
593 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
595 if (SUSchedulingCost(*I) > BestCost) {
596 BestCost = SUSchedulingCost(*I);
597 Best = I;
601 // Use default TD scheduling mechanism.
602 else {
603 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
604 if (Picker(*Best, *I))
605 Best = I;
608 SUnit *V = *Best;
609 if (Best != std::prev(Queue.end()))
610 std::swap(*Best, Queue.back());
612 Queue.pop_back();
614 return V;
618 void ResourcePriorityQueue::remove(SUnit *SU) {
619 assert(!Queue.empty() && "Queue is empty!");
620 std::vector<SUnit *>::iterator I = find(Queue, SU);
621 if (I != std::prev(Queue.end()))
622 std::swap(*I, Queue.back());
624 Queue.pop_back();