lib/Target/AMDGPU/GCNILPSched.cpp

   1 //===---------------------------- GCNILPSched.cpp - -----------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/CodeGen/ScheduleDAG.h"
  14 #include "llvm/Support/Debug.h"
  15
  16 using namespace llvm;
  17
  18 #define DEBUG_TYPE "machine-scheduler"
  19
  20 namespace {
  21
  22 class GCNILPScheduler {
  23   struct Candidate : ilist_node<Candidate> {
  24     SUnit *SU;
  25
  26     Candidate(SUnit *SU_)
  27       : SU(SU_) {}
  28   };
  29
  30   SpecificBumpPtrAllocator<Candidate> Alloc;
  31   typedef simple_ilist<Candidate> Queue;
  32   Queue PendingQueue;
  33   Queue AvailQueue;
  34   unsigned CurQueueId = 0;
  35
  36   std::vector<unsigned> SUNumbers;
  37
  38   /// CurCycle - The current scheduler state corresponds to this cycle.
  39   unsigned CurCycle = 0;
  40
  41   unsigned getNodePriority(const SUnit *SU) const;
  42
  43   const SUnit *pickBest(const SUnit *left, const SUnit *right);
  44   Candidate* pickCandidate();
  45
  46   void releasePending();
  47   void advanceToCycle(unsigned NextCycle);
  48   void releasePredecessors(const SUnit* SU);
  49
  50 public:
  51   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
  52                                      const ScheduleDAG &DAG);
  53 };
  54 } // namespace
  55
  56 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
  57 /// Smaller number is the higher priority.
  58 static unsigned
  59 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
  60   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
  61   if (SethiUllmanNumber != 0)
  62     return SethiUllmanNumber;
  63
  64   unsigned Extra = 0;
  65   for (const SDep &Pred : SU->Preds) {
  66     if (Pred.isCtrl()) continue;  // ignore chain preds
  67     SUnit *PredSU = Pred.getSUnit();
  68     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
  69     if (PredSethiUllman > SethiUllmanNumber) {
  70       SethiUllmanNumber = PredSethiUllman;
  71       Extra = 0;
  72     }
  73     else if (PredSethiUllman == SethiUllmanNumber)
  74       ++Extra;
  75   }
  76
  77   SethiUllmanNumber += Extra;
  78
  79   if (SethiUllmanNumber == 0)
  80     SethiUllmanNumber = 1;
  81
  82   return SethiUllmanNumber;
  83 }
  84
  85 // Lower priority means schedule further down. For bottom-up scheduling, lower
  86 // priority SUs are scheduled before higher priority SUs.
  87 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
  88   assert(SU->NodeNum < SUNumbers.size());
  89   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
  90     // If SU does not have a register use, i.e. it doesn't produce a value
  91     // that would be consumed (e.g. store), then it terminates a chain of
  92     // computation.  Give it a large SethiUllman number so it will be
  93     // scheduled right before its predecessors that it doesn't lengthen
  94     // their live ranges.
  95     return 0xffff;
  96
  97   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
  98     // If SU does not have a register def, schedule it close to its uses
  99     // because it does not lengthen any live ranges.
 100     return 0;
 101
 102   return SUNumbers[SU->NodeNum];
 103 }
 104
 105 /// closestSucc - Returns the scheduled cycle of the successor which is
 106 /// closest to the current cycle.
 107 static unsigned closestSucc(const SUnit *SU) {
 108   unsigned MaxHeight = 0;
 109   for (const SDep &Succ : SU->Succs) {
 110     if (Succ.isCtrl()) continue;  // ignore chain succs
 111     unsigned Height = Succ.getSUnit()->getHeight();
 112     // If there are bunch of CopyToRegs stacked up, they should be considered
 113     // to be at the same position.
 114     if (Height > MaxHeight)
 115       MaxHeight = Height;
 116   }
 117   return MaxHeight;
 118 }
 119
 120 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
 121 /// for scratch registers, i.e. number of data dependencies.
 122 static unsigned calcMaxScratches(const SUnit *SU) {
 123   unsigned Scratches = 0;
 124   for (const SDep &Pred : SU->Preds) {
 125     if (Pred.isCtrl()) continue;  // ignore chain preds
 126     Scratches++;
 127   }
 128   return Scratches;
 129 }
 130
 131 // Return -1 if left has higher priority, 1 if right has higher priority.
 132 // Return 0 if latency-based priority is equivalent.
 133 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
 134   // Scheduling an instruction that uses a VReg whose postincrement has not yet
 135   // been scheduled will induce a copy. Model this as an extra cycle of latency.
 136   int LHeight = (int)left->getHeight();
 137   int RHeight = (int)right->getHeight();
 138
 139   // If either node is scheduling for latency, sort them by height/depth
 140   // and latency.
 141
 142   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
 143   // is enabled, grouping instructions by cycle, then its height is already
 144   // covered so only its depth matters. We also reach this point if both stall
 145   // but have the same height.
 146   if (LHeight != RHeight)
 147     return LHeight > RHeight ? 1 : -1;
 148
 149   int LDepth = left->getDepth();
 150   int RDepth = right->getDepth();
 151   if (LDepth != RDepth) {
 152     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
 153                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
 154                       << ") depth " << RDepth << "\n");
 155     return LDepth < RDepth ? 1 : -1;
 156   }
 157   if (left->Latency != right->Latency)
 158     return left->Latency > right->Latency ? 1 : -1;
 159
 160   return 0;
 161 }
 162
 163 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
 164 {
 165   // TODO: add register pressure lowering checks
 166
 167   bool const DisableSchedCriticalPath = false;
 168   int MaxReorderWindow = 6;
 169   if (!DisableSchedCriticalPath) {
 170     int spread = (int)left->getDepth() - (int)right->getDepth();
 171     if (std::abs(spread) > MaxReorderWindow) {
 172       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
 173                         << left->getDepth() << " != SU(" << right->NodeNum
 174                         << "): " << right->getDepth() << "\n");
 175       return left->getDepth() < right->getDepth() ? right : left;
 176     }
 177   }
 178
 179   bool const DisableSchedHeight = false;
 180   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
 181     int spread = (int)left->getHeight() - (int)right->getHeight();
 182     if (std::abs(spread) > MaxReorderWindow)
 183       return left->getHeight() > right->getHeight() ? right : left;
 184   }
 185
 186   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
 187   unsigned LPriority = getNodePriority(left);
 188   unsigned RPriority = getNodePriority(right);
 189
 190   if (LPriority != RPriority)
 191     return LPriority > RPriority ? right : left;
 192
 193   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
 194   // e.g.
 195   // t1 = op t2, c1
 196   // t3 = op t4, c2
 197   //
 198   // and the following instructions are both ready.
 199   // t2 = op c3
 200   // t4 = op c4
 201   //
 202   // Then schedule t2 = op first.
 203   // i.e.
 204   // t4 = op c4
 205   // t2 = op c3
 206   // t1 = op t2, c1
 207   // t3 = op t4, c2
 208   //
 209   // This creates more short live intervals.
 210   unsigned LDist = closestSucc(left);
 211   unsigned RDist = closestSucc(right);
 212   if (LDist != RDist)
 213     return LDist < RDist ? right : left;
 214
 215   // How many registers becomes live when the node is scheduled.
 216   unsigned LScratch = calcMaxScratches(left);
 217   unsigned RScratch = calcMaxScratches(right);
 218   if (LScratch != RScratch)
 219     return LScratch > RScratch ? right : left;
 220
 221   bool const DisableSchedCycles = false;
 222   if (!DisableSchedCycles) {
 223     int result = BUCompareLatency(left, right);
 224     if (result != 0)
 225       return result > 0 ? right : left;
 226     return left;
 227   }
 228   else {
 229     if (left->getHeight() != right->getHeight())
 230       return (left->getHeight() > right->getHeight()) ? right : left;
 231
 232     if (left->getDepth() != right->getDepth())
 233       return (left->getDepth() < right->getDepth()) ? right : left;
 234   }
 235
 236   assert(left->NodeQueueId && right->NodeQueueId &&
 237         "NodeQueueId cannot be zero");
 238   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
 239 }
 240
 241 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
 242   if (AvailQueue.empty())
 243     return nullptr;
 244   auto Best = AvailQueue.begin();
 245   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
 246     auto NewBestSU = pickBest(Best->SU, I->SU);
 247     if (NewBestSU != Best->SU) {
 248       assert(NewBestSU == I->SU);
 249       Best = I;
 250     }
 251   }
 252   return &*Best;
 253 }
 254
 255 void GCNILPScheduler::releasePending() {
 256   // Check to see if any of the pending instructions are ready to issue.  If
 257   // so, add them to the available queue.
 258   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
 259     auto &C = *I++;
 260     if (C.SU->getHeight() <= CurCycle) {
 261       PendingQueue.remove(C);
 262       AvailQueue.push_back(C);
 263       C.SU->NodeQueueId = CurQueueId++;
 264     }
 265   }
 266 }
 267
 268 /// Move the scheduler state forward by the specified number of Cycles.
 269 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
 270   if (NextCycle <= CurCycle)
 271     return;
 272   CurCycle = NextCycle;
 273   releasePending();
 274 }
 275
 276 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
 277   for (const auto &PredEdge : SU->Preds) {
 278     auto PredSU = PredEdge.getSUnit();
 279     if (PredEdge.isWeak())
 280       continue;
 281     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
 282
 283     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
 284
 285     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
 286       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
 287   }
 288 }
 289
 290 std::vector<const SUnit*>
 291 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
 292                           const ScheduleDAG &DAG) {
 293   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
 294
 295   std::vector<SUnit> SUSavedCopy;
 296   SUSavedCopy.resize(SUnits.size());
 297
 298   // we cannot save only those fields we touch: some of them are private
 299   // so save units verbatim: this assumes SUnit should have value semantics
 300   for (const SUnit &SU : SUnits)
 301     SUSavedCopy[SU.NodeNum] = SU;
 302
 303   SUNumbers.assign(SUnits.size(), 0);
 304   for (const SUnit &SU : SUnits)
 305     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
 306
 307   for (auto SU : BotRoots) {
 308     AvailQueue.push_back(
 309       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
 310   }
 311   releasePredecessors(&DAG.ExitSU);
 312
 313   std::vector<const SUnit*> Schedule;
 314   Schedule.reserve(SUnits.size());
 315   while (true) {
 316     if (AvailQueue.empty() && !PendingQueue.empty()) {
 317       auto EarliestSU = std::min_element(
 318         PendingQueue.begin(), PendingQueue.end(),
 319         [=](const Candidate& C1, const Candidate& C2) {
 320         return C1.SU->getHeight() < C2.SU->getHeight();
 321       })->SU;
 322       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
 323     }
 324     if (AvailQueue.empty())
 325       break;
 326
 327     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
 328                          "Ready queue:";
 329                for (auto &C
 330                     : AvailQueue) dbgs()
 331                << ' ' << C.SU->NodeNum;
 332                dbgs() << '\n';);
 333
 334     auto C = pickCandidate();
 335     assert(C);
 336     AvailQueue.remove(*C);
 337     auto SU = C->SU;
 338     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
 339
 340     advanceToCycle(SU->getHeight());
 341
 342     releasePredecessors(SU);
 343     Schedule.push_back(SU);
 344     SU->isScheduled = true;
 345   }
 346   assert(SUnits.size() == Schedule.size());
 347
 348   std::reverse(Schedule.begin(), Schedule.end());
 349
 350   // restore units
 351   for (auto &SU : SUnits)
 352     SU = SUSavedCopy[SU.NodeNum];
 353
 354   return Schedule;
 355 }
 356
 357 namespace llvm {
 358 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
 359                                               const ScheduleDAG &DAG) {
 360   GCNILPScheduler S;
 361   return S.schedule(BotRoots, DAG);
 362 }
 363 }