llvm/lib/CodeGen/VLIWMachineScheduler.cpp

   1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // MachineScheduler schedules machine instructions after phi elimination. It
  10 // preserves LiveIntervals so it can be invoked before register allocation.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/CodeGen/VLIWMachineScheduler.h"
  15 #include "llvm/ADT/SmallVector.h"
  16 #include "llvm/CodeGen/DFAPacketizer.h"
  17 #include "llvm/CodeGen/MachineBasicBlock.h"
  18 #include "llvm/CodeGen/MachineFunction.h"
  19 #include "llvm/CodeGen/MachineInstr.h"
  20 #include "llvm/CodeGen/MachineLoopInfo.h"
  21 #include "llvm/CodeGen/RegisterClassInfo.h"
  22 #include "llvm/CodeGen/RegisterPressure.h"
  23 #include "llvm/CodeGen/ScheduleDAG.h"
  24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  25 #include "llvm/CodeGen/TargetInstrInfo.h"
  26 #include "llvm/CodeGen/TargetOpcodes.h"
  27 #include "llvm/CodeGen/TargetRegisterInfo.h"
  28 #include "llvm/CodeGen/TargetSchedule.h"
  29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  30 #include "llvm/Support/CommandLine.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/raw_ostream.h"
  33 #include <algorithm>
  34 #include <cassert>
  35 #include <iomanip>
  36 #include <limits>
  37 #include <memory>
  38 #include <sstream>
  39
  40 using namespace llvm;
  41
  42 #define DEBUG_TYPE "machine-scheduler"
  43
  44 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
  45                                          cl::init(false));
  46
  47 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
  48                                        cl::init(true));
  49
  50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
  51                                                 cl::Hidden, cl::init(1));
  52
  53 // Check if the scheduler should penalize instructions that are available to
  54 // early due to a zero-latency dependence.
  55 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
  56                                      cl::init(true));
  57
  58 // This value is used to determine if a register class is a high pressure set.
  59 // We compute the maximum number of registers needed and divided by the total
  60 // available. Then, we compare the result to this value.
  61 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
  62                                   cl::init(0.75f),
  63                                   cl::desc("High register pressure threhold."));
  64
  65 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
  66                                      const TargetSchedModel *SM)
  67     : TII(STI.getInstrInfo()), SchedModel(SM) {
  68   ResourcesModel = createPacketizer(STI);
  69
  70   // This hard requirement could be relaxed,
  71   // but for now do not let it proceed.
  72   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
  73
  74   Packet.reserve(SchedModel->getIssueWidth());
  75   Packet.clear();
  76   ResourcesModel->clearResources();
  77 }
  78
  79 void VLIWResourceModel::reset() {
  80   Packet.clear();
  81   ResourcesModel->clearResources();
  82 }
  83
  84 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
  85
  86 /// Return true if there is a dependence between SUd and SUu.
  87 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
  88   if (SUd->Succs.size() == 0)
  89     return false;
  90
  91   for (const auto &S : SUd->Succs) {
  92     // Since we do not add pseudos to packets, might as well
  93     // ignore order dependencies.
  94     if (S.isCtrl())
  95       continue;
  96
  97     if (S.getSUnit() == SUu && S.getLatency() > 0)
  98       return true;
  99   }
 100   return false;
 101 }
 102
 103 /// Check if scheduling of this SU is possible
 104 /// in the current packet.
 105 /// It is _not_ precise (statefull), it is more like
 106 /// another heuristic. Many corner cases are figured
 107 /// empirically.
 108 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
 109   if (!SU || !SU->getInstr())
 110     return false;
 111
 112   // First see if the pipeline could receive this instruction
 113   // in the current cycle.
 114   switch (SU->getInstr()->getOpcode()) {
 115   default:
 116     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
 117       return false;
 118     break;
 119   case TargetOpcode::EXTRACT_SUBREG:
 120   case TargetOpcode::INSERT_SUBREG:
 121   case TargetOpcode::SUBREG_TO_REG:
 122   case TargetOpcode::REG_SEQUENCE:
 123   case TargetOpcode::IMPLICIT_DEF:
 124   case TargetOpcode::COPY:
 125   case TargetOpcode::INLINEASM:
 126   case TargetOpcode::INLINEASM_BR:
 127     break;
 128   }
 129
 130   // Now see if there are no other dependencies to instructions already
 131   // in the packet.
 132   if (IsTop) {
 133     for (const SUnit *U : Packet)
 134       if (hasDependence(U, SU))
 135         return false;
 136   } else {
 137     for (const SUnit *U : Packet)
 138       if (hasDependence(SU, U))
 139         return false;
 140   }
 141   return true;
 142 }
 143
 144 /// Keep track of available resources.
 145 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
 146   bool startNewCycle = false;
 147   // Artificially reset state.
 148   if (!SU) {
 149     reset();
 150     TotalPackets++;
 151     return false;
 152   }
 153   // If this SU does not fit in the packet or the packet is now full
 154   // start a new one.
 155   if (!isResourceAvailable(SU, IsTop) ||
 156       Packet.size() >= SchedModel->getIssueWidth()) {
 157     reset();
 158     TotalPackets++;
 159     startNewCycle = true;
 160   }
 161
 162   switch (SU->getInstr()->getOpcode()) {
 163   default:
 164     ResourcesModel->reserveResources(*SU->getInstr());
 165     break;
 166   case TargetOpcode::EXTRACT_SUBREG:
 167   case TargetOpcode::INSERT_SUBREG:
 168   case TargetOpcode::SUBREG_TO_REG:
 169   case TargetOpcode::REG_SEQUENCE:
 170   case TargetOpcode::IMPLICIT_DEF:
 171   case TargetOpcode::KILL:
 172   case TargetOpcode::CFI_INSTRUCTION:
 173   case TargetOpcode::EH_LABEL:
 174   case TargetOpcode::COPY:
 175   case TargetOpcode::INLINEASM:
 176   case TargetOpcode::INLINEASM_BR:
 177     break;
 178   }
 179   Packet.push_back(SU);
 180
 181 #ifndef NDEBUG
 182   LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
 183   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 184     LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
 185     LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
 186     LLVM_DEBUG(Packet[i]->getInstr()->dump());
 187   }
 188 #endif
 189
 190   return startNewCycle;
 191 }
 192
 193 DFAPacketizer *
 194 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
 195   return STI.getInstrInfo()->CreateTargetScheduleState(STI);
 196 }
 197
 198 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 200 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 201 void VLIWMachineScheduler::schedule() {
 202   LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
 203                     << printMBBReference(*BB) << " " << BB->getName()
 204                     << " in_func " << BB->getParent()->getName()
 205                     << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
 206
 207   buildDAGWithRegPressure();
 208
 209   Topo.InitDAGTopologicalSorting();
 210
 211   // Postprocess the DAG to add platform-specific artificial dependencies.
 212   postProcessDAG();
 213
 214   SmallVector<SUnit *, 8> TopRoots, BotRoots;
 215   findRootsAndBiasEdges(TopRoots, BotRoots);
 216
 217   // Initialize the strategy before modifying the DAG.
 218   SchedImpl->initialize(this);
 219
 220   LLVM_DEBUG({
 221     unsigned maxH = 0;
 222     for (const SUnit &SU : SUnits)
 223       if (SU.getHeight() > maxH)
 224         maxH = SU.getHeight();
 225     dbgs() << "Max Height " << maxH << "\n";
 226   });
 227   LLVM_DEBUG({
 228     unsigned maxD = 0;
 229     for (const SUnit &SU : SUnits)
 230       if (SU.getDepth() > maxD)
 231         maxD = SU.getDepth();
 232     dbgs() << "Max Depth " << maxD << "\n";
 233   });
 234   LLVM_DEBUG(dump());
 235   if (ViewMISchedDAGs)
 236     viewGraph();
 237
 238   initQueues(TopRoots, BotRoots);
 239
 240   bool IsTopNode = false;
 241   while (true) {
 242     LLVM_DEBUG(
 243         dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
 244     SUnit *SU = SchedImpl->pickNode(IsTopNode);
 245     if (!SU)
 246       break;
 247
 248     if (!checkSchedLimit())
 249       break;
 250
 251     scheduleMI(SU, IsTopNode);
 252
 253     // Notify the scheduling strategy after updating the DAG.
 254     SchedImpl->schedNode(SU, IsTopNode);
 255
 256     updateQueues(SU, IsTopNode);
 257   }
 258   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 259
 260   placeDebugValues();
 261
 262   LLVM_DEBUG({
 263     dbgs() << "*** Final schedule for "
 264            << printMBBReference(*begin()->getParent()) << " ***\n";
 265     dumpSchedule();
 266     dbgs() << '\n';
 267   });
 268 }
 269
 270 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
 271   DAG = static_cast<VLIWMachineScheduler *>(dag);
 272   SchedModel = DAG->getSchedModel();
 273
 274   Top.init(DAG, SchedModel);
 275   Bot.init(DAG, SchedModel);
 276
 277   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
 278   // are disabled, then these HazardRecs will be disabled.
 279   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
 280   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
 281   const TargetInstrInfo *TII = STI.getInstrInfo();
 282   delete Top.HazardRec;
 283   delete Bot.HazardRec;
 284   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 285   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 286
 287   delete Top.ResourceModel;
 288   delete Bot.ResourceModel;
 289   Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
 290   Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
 291
 292   const std::vector<unsigned> &MaxPressure =
 293       DAG->getRegPressure().MaxSetPressure;
 294   HighPressureSets.assign(MaxPressure.size(), false);
 295   for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
 296     unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
 297     HighPressureSets[i] =
 298         ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
 299   }
 300
 301   assert((!ForceTopDown || !ForceBottomUp) &&
 302          "-misched-topdown incompatible with -misched-bottomup");
 303 }
 304
 305 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
 306     const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
 307   return new VLIWResourceModel(STI, SchedModel);
 308 }
 309
 310 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
 311   for (const SDep &PI : SU->Preds) {
 312     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
 313     unsigned MinLatency = PI.getLatency();
 314 #ifndef NDEBUG
 315     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
 316 #endif
 317     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
 318       SU->TopReadyCycle = PredReadyCycle + MinLatency;
 319   }
 320
 321   if (!SU->isScheduled)
 322     Top.releaseNode(SU, SU->TopReadyCycle);
 323 }
 324
 325 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
 326   assert(SU->getInstr() && "Scheduled SUnit must have instr");
 327
 328   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
 329        ++I) {
 330     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
 331     unsigned MinLatency = I->getLatency();
 332 #ifndef NDEBUG
 333     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
 334 #endif
 335     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
 336       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
 337   }
 338
 339   if (!SU->isScheduled)
 340     Bot.releaseNode(SU, SU->BotReadyCycle);
 341 }
 342
 343 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
 344   delete ResourceModel;
 345   delete HazardRec;
 346 }
 347
 348 /// Does this SU have a hazard within the current instruction group.
 349 ///
 350 /// The scheduler supports two modes of hazard recognition. The first is the
 351 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
 352 /// supports highly complicated in-order reservation tables
 353 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
 354 ///
 355 /// The second is a streamlined mechanism that checks for hazards based on
 356 /// simple counters that the scheduler itself maintains. It explicitly checks
 357 /// for instruction dispatch limitations, including the number of micro-ops that
 358 /// can dispatch per cycle.
 359 ///
 360 /// TODO: Also check whether the SU must start a new group.
 361 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
 362   if (HazardRec->isEnabled())
 363     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 364
 365   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
 366   if (IssueCount + uops > SchedModel->getIssueWidth())
 367     return true;
 368
 369   return false;
 370 }
 371
 372 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
 373     SUnit *SU, unsigned ReadyCycle) {
 374   if (ReadyCycle < MinReadyCycle)
 375     MinReadyCycle = ReadyCycle;
 376
 377   // Check for interlocks first. For the purpose of other heuristics, an
 378   // instruction that cannot issue appears as if it's not in the ReadyQueue.
 379   if (ReadyCycle > CurrCycle || checkHazard(SU))
 380
 381     Pending.push(SU);
 382   else
 383     Available.push(SU);
 384 }
 385
 386 /// Move the boundary of scheduled code by one cycle.
 387 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
 388   unsigned Width = SchedModel->getIssueWidth();
 389   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
 390
 391   assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
 392          "MinReadyCycle uninitialized");
 393   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
 394
 395   if (!HazardRec->isEnabled()) {
 396     // Bypass HazardRec virtual calls.
 397     CurrCycle = NextCycle;
 398   } else {
 399     // Bypass getHazardType calls in case of long latency.
 400     for (; CurrCycle != NextCycle; ++CurrCycle) {
 401       if (isTop())
 402         HazardRec->AdvanceCycle();
 403       else
 404         HazardRec->RecedeCycle();
 405     }
 406   }
 407   CheckPending = true;
 408
 409   LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
 410                     << CurrCycle << '\n');
 411 }
 412
 413 /// Move the boundary of scheduled code by one SUnit.
 414 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
 415   bool startNewCycle = false;
 416
 417   // Update the reservation table.
 418   if (HazardRec->isEnabled()) {
 419     if (!isTop() && SU->isCall) {
 420       // Calls are scheduled with their preceding instructions. For bottom-up
 421       // scheduling, clear the pipeline state before emitting.
 422       HazardRec->Reset();
 423     }
 424     HazardRec->EmitInstruction(SU);
 425   }
 426
 427   // Update DFA model.
 428   startNewCycle = ResourceModel->reserveResources(SU, isTop());
 429
 430   // Check the instruction group dispatch limit.
 431   // TODO: Check if this SU must end a dispatch group.
 432   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
 433   if (startNewCycle) {
 434     LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
 435     bumpCycle();
 436   } else
 437     LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
 438                       << CurrCycle << '\n');
 439 }
 440
 441 /// Release pending ready nodes in to the available queue. This makes them
 442 /// visible to heuristics.
 443 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
 444   // If the available queue is empty, it is safe to reset MinReadyCycle.
 445   if (Available.empty())
 446     MinReadyCycle = std::numeric_limits<unsigned>::max();
 447
 448   // Check to see if any of the pending instructions are ready to issue.  If
 449   // so, add them to the available queue.
 450   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
 451     SUnit *SU = *(Pending.begin() + i);
 452     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
 453
 454     if (ReadyCycle < MinReadyCycle)
 455       MinReadyCycle = ReadyCycle;
 456
 457     if (ReadyCycle > CurrCycle)
 458       continue;
 459
 460     if (checkHazard(SU))
 461       continue;
 462
 463     Available.push(SU);
 464     Pending.remove(Pending.begin() + i);
 465     --i;
 466     --e;
 467   }
 468   CheckPending = false;
 469 }
 470
 471 /// Remove SU from the ready set for this boundary.
 472 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
 473   if (Available.isInQueue(SU))
 474     Available.remove(Available.find(SU));
 475   else {
 476     assert(Pending.isInQueue(SU) && "bad ready count");
 477     Pending.remove(Pending.find(SU));
 478   }
 479 }
 480
 481 /// If this queue only has one ready candidate, return it. As a side effect,
 482 /// advance the cycle until at least one node is ready. If multiple instructions
 483 /// are ready, return NULL.
 484 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
 485   if (CheckPending)
 486     releasePending();
 487
 488   auto AdvanceCycle = [this]() {
 489     if (Available.empty())
 490       return true;
 491     if (Available.size() == 1 && Pending.size() > 0)
 492       return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
 493              getWeakLeft(*Available.begin(), isTop()) != 0;
 494     return false;
 495   };
 496   for (unsigned i = 0; AdvanceCycle(); ++i) {
 497     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
 498            "permanent hazard");
 499     (void)i;
 500     ResourceModel->reserveResources(nullptr, isTop());
 501     bumpCycle();
 502     releasePending();
 503   }
 504   if (Available.size() == 1)
 505     return *Available.begin();
 506   return nullptr;
 507 }
 508
 509 #ifndef NDEBUG
 510 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
 511                                              const ReadyQueue &Q, SUnit *SU,
 512                                              int Cost, PressureChange P) {
 513   dbgs() << Label << " " << Q.getName() << " ";
 514   if (P.isValid())
 515     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
 516            << P.getUnitInc() << " ";
 517   else
 518     dbgs() << "     ";
 519   dbgs() << "cost(" << Cost << ")\t";
 520   DAG->dumpNode(*SU);
 521 }
 522
 523 // Very detailed queue dump, to be used with higher verbosity levels.
 524 void ConvergingVLIWScheduler::readyQueueVerboseDump(
 525     const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
 526     ReadyQueue &Q) {
 527   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
 528
 529   dbgs() << ">>> " << Q.getName() << "\n";
 530   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 531     RegPressureDelta RPDelta;
 532     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 533                                     DAG->getRegionCriticalPSets(),
 534                                     DAG->getRegPressure().MaxSetPressure);
 535     std::stringstream dbgstr;
 536     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
 537     dbgs() << dbgstr.str();
 538     SchedulingCost(Q, *I, Candidate, RPDelta, true);
 539     dbgs() << "\t";
 540     (*I)->getInstr()->dump();
 541   }
 542   dbgs() << "\n";
 543 }
 544 #endif
 545
 546 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
 547 /// of SU, return true (we may have duplicates)
 548 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
 549   if (SU->NumPredsLeft == 0)
 550     return false;
 551
 552   for (auto &Pred : SU->Preds) {
 553     // We found an available, but not scheduled, predecessor.
 554     if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
 555       return false;
 556   }
 557
 558   return true;
 559 }
 560
 561 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
 562 /// of SU, return true (we may have duplicates)
 563 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
 564   if (SU->NumSuccsLeft == 0)
 565     return false;
 566
 567   for (auto &Succ : SU->Succs) {
 568     // We found an available, but not scheduled, successor.
 569     if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
 570       return false;
 571   }
 572   return true;
 573 }
 574
 575 /// Check if the instruction changes the register pressure of a register in the
 576 /// high pressure set. The function returns a negative value if the pressure
 577 /// decreases and a positive value is the pressure increases. If the instruction
 578 /// doesn't use a high pressure register or doesn't change the register
 579 /// pressure, then return 0.
 580 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
 581   PressureDiff &PD = DAG->getPressureDiff(SU);
 582   for (const auto &P : PD) {
 583     if (!P.isValid())
 584       continue;
 585     // The pressure differences are computed bottom-up, so the comparison for
 586     // an increase is positive in the bottom direction, but negative in the
 587     //  top-down direction.
 588     if (HighPressureSets[P.getPSet()])
 589       return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
 590   }
 591   return 0;
 592 }
 593
 594 /// Single point to compute overall scheduling cost.
 595 /// TODO: More heuristics will be used soon.
 596 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
 597                                             SchedCandidate &Candidate,
 598                                             RegPressureDelta &Delta,
 599                                             bool verbose) {
 600   // Initial trivial priority.
 601   int ResCount = 1;
 602
 603   // Do not waste time on a node that is already scheduled.
 604   if (!SU || SU->isScheduled)
 605     return ResCount;
 606
 607   LLVM_DEBUG(if (verbose) dbgs()
 608              << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
 609   // Forced priority is high.
 610   if (SU->isScheduleHigh) {
 611     ResCount += PriorityOne;
 612     LLVM_DEBUG(dbgs() << "H|");
 613   }
 614
 615   unsigned IsAvailableAmt = 0;
 616   // Critical path first.
 617   if (Q.getID() == TopQID) {
 618     if (Top.isLatencyBound(SU)) {
 619       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
 620       ResCount += (SU->getHeight() * ScaleTwo);
 621     }
 622
 623     LLVM_DEBUG(if (verbose) {
 624       std::stringstream dbgstr;
 625       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
 626       dbgs() << dbgstr.str();
 627     });
 628
 629     // If resources are available for it, multiply the
 630     // chance of scheduling.
 631     if (Top.ResourceModel->isResourceAvailable(SU, true)) {
 632       IsAvailableAmt = (PriorityTwo + PriorityThree);
 633       ResCount += IsAvailableAmt;
 634       LLVM_DEBUG(if (verbose) dbgs() << "A|");
 635     } else
 636       LLVM_DEBUG(if (verbose) dbgs() << " |");
 637   } else {
 638     if (Bot.isLatencyBound(SU)) {
 639       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
 640       ResCount += (SU->getDepth() * ScaleTwo);
 641     }
 642
 643     LLVM_DEBUG(if (verbose) {
 644       std::stringstream dbgstr;
 645       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
 646       dbgs() << dbgstr.str();
 647     });
 648
 649     // If resources are available for it, multiply the
 650     // chance of scheduling.
 651     if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
 652       IsAvailableAmt = (PriorityTwo + PriorityThree);
 653       ResCount += IsAvailableAmt;
 654       LLVM_DEBUG(if (verbose) dbgs() << "A|");
 655     } else
 656       LLVM_DEBUG(if (verbose) dbgs() << " |");
 657   }
 658
 659   unsigned NumNodesBlocking = 0;
 660   if (Q.getID() == TopQID) {
 661     // How many SUs does it block from scheduling?
 662     // Look at all of the successors of this node.
 663     // Count the number of nodes that
 664     // this node is the sole unscheduled node for.
 665     if (Top.isLatencyBound(SU))
 666       for (const SDep &SI : SU->Succs)
 667         if (isSingleUnscheduledPred(SI.getSUnit(), SU))
 668           ++NumNodesBlocking;
 669   } else {
 670     // How many unscheduled predecessors block this node?
 671     if (Bot.isLatencyBound(SU))
 672       for (const SDep &PI : SU->Preds)
 673         if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
 674           ++NumNodesBlocking;
 675   }
 676   ResCount += (NumNodesBlocking * ScaleTwo);
 677
 678   LLVM_DEBUG(if (verbose) {
 679     std::stringstream dbgstr;
 680     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
 681     dbgs() << dbgstr.str();
 682   });
 683
 684   // Factor in reg pressure as a heuristic.
 685   if (!IgnoreBBRegPressure) {
 686     // Decrease priority by the amount that register pressure exceeds the limit.
 687     ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
 688     // Decrease priority if register pressure exceeds the limit.
 689     ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
 690     // Decrease priority slightly if register pressure would increase over the
 691     // current maximum.
 692     ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
 693     // If there are register pressure issues, then we remove the value added for
 694     // the instruction being available. The rationale is that we really don't
 695     // want to schedule an instruction that causes a spill.
 696     if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
 697         (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
 698          Delta.CurrentMax.getUnitInc()))
 699       ResCount -= IsAvailableAmt;
 700     LLVM_DEBUG(if (verbose) {
 701       dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
 702              << Delta.CriticalMax.getUnitInc() << "/"
 703              << Delta.CurrentMax.getUnitInc() << ")|";
 704     });
 705   }
 706
 707   // Give preference to a zero latency instruction if the dependent
 708   // instruction is in the current packet.
 709   if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
 710     for (const SDep &PI : SU->Preds) {
 711       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
 712           PI.getLatency() == 0 &&
 713           Top.ResourceModel->isInPacket(PI.getSUnit())) {
 714         ResCount += PriorityThree;
 715         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
 716       }
 717     }
 718   } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
 719     for (const SDep &SI : SU->Succs) {
 720       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
 721           SI.getLatency() == 0 &&
 722           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 723         ResCount += PriorityThree;
 724         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
 725       }
 726     }
 727   }
 728
 729   // If the instruction has a non-zero latency dependence with an instruction in
 730   // the current packet, then it should not be scheduled yet. The case occurs
 731   // when the dependent instruction is scheduled in a new packet, so the
 732   // scheduler updates the current cycle and pending instructions become
 733   // available.
 734   if (CheckEarlyAvail) {
 735     if (Q.getID() == TopQID) {
 736       for (const auto &PI : SU->Preds) {
 737         if (PI.getLatency() > 0 &&
 738             Top.ResourceModel->isInPacket(PI.getSUnit())) {
 739           ResCount -= PriorityOne;
 740           LLVM_DEBUG(if (verbose) dbgs() << "D|");
 741         }
 742       }
 743     } else {
 744       for (const auto &SI : SU->Succs) {
 745         if (SI.getLatency() > 0 &&
 746             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 747           ResCount -= PriorityOne;
 748           LLVM_DEBUG(if (verbose) dbgs() << "D|");
 749         }
 750       }
 751     }
 752   }
 753
 754   LLVM_DEBUG(if (verbose) {
 755     std::stringstream dbgstr;
 756     dbgstr << "Total " << std::setw(4) << ResCount << ")";
 757     dbgs() << dbgstr.str();
 758   });
 759
 760   return ResCount;
 761 }
 762
 763 /// Pick the best candidate from the top queue.
 764 ///
 765 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 766 /// DAG building. To adjust for the current scheduling location we need to
 767 /// maintain the number of vreg uses remaining to be top-scheduled.
 768 ConvergingVLIWScheduler::CandResult
 769 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
 770                                            const RegPressureTracker &RPTracker,
 771                                            SchedCandidate &Candidate) {
 772   ReadyQueue &Q = Zone.Available;
 773   LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
 774                  readyQueueVerboseDump(RPTracker, Candidate, Q);
 775              else Q.dump(););
 776
 777   // getMaxPressureDelta temporarily modifies the tracker.
 778   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
 779
 780   // BestSU remains NULL if no top candidates beat the best existing candidate.
 781   CandResult FoundCandidate = NoCand;
 782   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 783     RegPressureDelta RPDelta;
 784     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 785                                     DAG->getRegionCriticalPSets(),
 786                                     DAG->getRegPressure().MaxSetPressure);
 787
 788     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
 789
 790     // Initialize the candidate if needed.
 791     if (!Candidate.SU) {
 792       LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
 793       Candidate.SU = *I;
 794       Candidate.RPDelta = RPDelta;
 795       Candidate.SCost = CurrentCost;
 796       FoundCandidate = NodeOrder;
 797       continue;
 798     }
 799
 800     // Choose node order for negative cost candidates. There is no good
 801     // candidate in this case.
 802     if (CurrentCost < 0 && Candidate.SCost < 0) {
 803       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
 804           (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
 805         LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
 806         Candidate.SU = *I;
 807         Candidate.RPDelta = RPDelta;
 808         Candidate.SCost = CurrentCost;
 809         FoundCandidate = NodeOrder;
 810       }
 811       continue;
 812     }
 813
 814     // Best cost.
 815     if (CurrentCost > Candidate.SCost) {
 816       LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
 817       Candidate.SU = *I;
 818       Candidate.RPDelta = RPDelta;
 819       Candidate.SCost = CurrentCost;
 820       FoundCandidate = BestCost;
 821       continue;
 822     }
 823
 824     // Choose an instruction that does not depend on an artificial edge.
 825     unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
 826     unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
 827     if (CurrWeak != CandWeak) {
 828       if (CurrWeak < CandWeak) {
 829         LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
 830         Candidate.SU = *I;
 831         Candidate.RPDelta = RPDelta;
 832         Candidate.SCost = CurrentCost;
 833         FoundCandidate = Weak;
 834       }
 835       continue;
 836     }
 837
 838     if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
 839       unsigned CurrSize, CandSize;
 840       if (Q.getID() == TopQID) {
 841         CurrSize = (*I)->Succs.size();
 842         CandSize = Candidate.SU->Succs.size();
 843       } else {
 844         CurrSize = (*I)->Preds.size();
 845         CandSize = Candidate.SU->Preds.size();
 846       }
 847       if (CurrSize > CandSize) {
 848         LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
 849         Candidate.SU = *I;
 850         Candidate.RPDelta = RPDelta;
 851         Candidate.SCost = CurrentCost;
 852         FoundCandidate = BestCost;
 853       }
 854       // Keep the old candidate if it's a better candidate. That is, don't use
 855       // the subsequent tie breaker.
 856       if (CurrSize != CandSize)
 857         continue;
 858     }
 859
 860     // Tie breaker.
 861     // To avoid scheduling indeterminism, we need a tie breaker
 862     // for the case when cost is identical for two nodes.
 863     if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
 864       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
 865           (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
 866         LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
 867         Candidate.SU = *I;
 868         Candidate.RPDelta = RPDelta;
 869         Candidate.SCost = CurrentCost;
 870         FoundCandidate = NodeOrder;
 871         continue;
 872       }
 873     }
 874
 875     // Fall through to original instruction order.
 876     // Only consider node order if Candidate was chosen from this Q.
 877     if (FoundCandidate == NoCand)
 878       continue;
 879   }
 880   return FoundCandidate;
 881 }
 882
 883 /// Pick the best candidate node from either the top or bottom queue.
 884 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
 885   // Schedule as far as possible in the direction of no choice. This is most
 886   // efficient, but also provides the best heuristics for CriticalPSets.
 887   if (SUnit *SU = Bot.pickOnlyChoice()) {
 888     LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
 889     IsTopNode = false;
 890     return SU;
 891   }
 892   if (SUnit *SU = Top.pickOnlyChoice()) {
 893     LLVM_DEBUG(dbgs() << "Picked only Top\n");
 894     IsTopNode = true;
 895     return SU;
 896   }
 897   SchedCandidate BotCand;
 898   // Prefer bottom scheduling when heuristics are silent.
 899   CandResult BotResult =
 900       pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
 901   assert(BotResult != NoCand && "failed to find the first candidate");
 902
 903   // If either Q has a single candidate that provides the least increase in
 904   // Excess pressure, we can immediately schedule from that Q.
 905   //
 906   // RegionCriticalPSets summarizes the pressure within the scheduled region and
 907   // affects picking from either Q. If scheduling in one direction must
 908   // increase pressure for one of the excess PSets, then schedule in that
 909   // direction first to provide more freedom in the other direction.
 910   if (BotResult == SingleExcess || BotResult == SingleCritical) {
 911     LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
 912     IsTopNode = false;
 913     return BotCand.SU;
 914   }
 915   // Check if the top Q has a better candidate.
 916   SchedCandidate TopCand;
 917   CandResult TopResult =
 918       pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
 919   assert(TopResult != NoCand && "failed to find the first candidate");
 920
 921   if (TopResult == SingleExcess || TopResult == SingleCritical) {
 922     LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
 923     IsTopNode = true;
 924     return TopCand.SU;
 925   }
 926   // If either Q has a single candidate that minimizes pressure above the
 927   // original region's pressure pick it.
 928   if (BotResult == SingleMax) {
 929     LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
 930     IsTopNode = false;
 931     return BotCand.SU;
 932   }
 933   if (TopResult == SingleMax) {
 934     LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
 935     IsTopNode = true;
 936     return TopCand.SU;
 937   }
 938   if (TopCand.SCost > BotCand.SCost) {
 939     LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
 940     IsTopNode = true;
 941     return TopCand.SU;
 942   }
 943   // Otherwise prefer the bottom candidate in node order.
 944   LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
 945   IsTopNode = false;
 946   return BotCand.SU;
 947 }
 948
 949 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
 950 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
 951   if (DAG->top() == DAG->bottom()) {
 952     assert(Top.Available.empty() && Top.Pending.empty() &&
 953            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
 954     return nullptr;
 955   }
 956   SUnit *SU;
 957   if (ForceTopDown) {
 958     SU = Top.pickOnlyChoice();
 959     if (!SU) {
 960       SchedCandidate TopCand;
 961       CandResult TopResult =
 962           pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
 963       assert(TopResult != NoCand && "failed to find the first candidate");
 964       (void)TopResult;
 965       SU = TopCand.SU;
 966     }
 967     IsTopNode = true;
 968   } else if (ForceBottomUp) {
 969     SU = Bot.pickOnlyChoice();
 970     if (!SU) {
 971       SchedCandidate BotCand;
 972       CandResult BotResult =
 973           pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
 974       assert(BotResult != NoCand && "failed to find the first candidate");
 975       (void)BotResult;
 976       SU = BotCand.SU;
 977     }
 978     IsTopNode = false;
 979   } else {
 980     SU = pickNodeBidrectional(IsTopNode);
 981   }
 982   if (SU->isTopReady())
 983     Top.removeReady(SU);
 984   if (SU->isBottomReady())
 985     Bot.removeReady(SU);
 986
 987   LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
 988                     << " Scheduling instruction in cycle "
 989                     << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
 990                     << reportPackets() << ")\n";
 991              DAG->dumpNode(*SU));
 992   return SU;
 993 }
 994
 995 /// Update the scheduler's state after scheduling a node. This is the same node
 996 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
 997 /// to update it's state based on the current cycle before MachineSchedStrategy
 998 /// does.
 999 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1000   if (IsTopNode) {
1001     Top.bumpNode(SU);
1002     SU->TopReadyCycle = Top.CurrCycle;
1003   } else {
1004     Bot.bumpNode(SU);
1005     SU->BotReadyCycle = Bot.CurrCycle;
1006   }
1007 }