lib/Target/AMDGPU/GCNILPSched.cpp

   1 //===---------------------------- GCNILPSched.cpp - -----------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/CodeGen/ScheduleDAG.h"
  14
  15 using namespace llvm;
  16
  17 #define DEBUG_TYPE "machine-scheduler"
  18
  19 namespace {
  20
  21 class GCNILPScheduler {
  22   struct Candidate : ilist_node<Candidate> {
  23     SUnit *SU;
  24
  25     Candidate(SUnit *SU_)
  26       : SU(SU_) {}
  27   };
  28
  29   SpecificBumpPtrAllocator<Candidate> Alloc;
  30   typedef simple_ilist<Candidate> Queue;
  31   Queue PendingQueue;
  32   Queue AvailQueue;
  33   unsigned CurQueueId = 0;
  34
  35   std::vector<unsigned> SUNumbers;
  36
  37   /// CurCycle - The current scheduler state corresponds to this cycle.
  38   unsigned CurCycle = 0;
  39
  40   unsigned getNodePriority(const SUnit *SU) const;
  41
  42   const SUnit *pickBest(const SUnit *left, const SUnit *right);
  43   Candidate* pickCandidate();
  44
  45   void releasePending();
  46   void advanceToCycle(unsigned NextCycle);
  47   void releasePredecessors(const SUnit* SU);
  48
  49 public:
  50   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
  51                                      const ScheduleDAG &DAG);
  52 };
  53 } // namespace
  54
  55 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
  56 /// Smaller number is the higher priority.
  57 static unsigned
  58 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
  59   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
  60   if (SethiUllmanNumber != 0)
  61     return SethiUllmanNumber;
  62
  63   unsigned Extra = 0;
  64   for (const SDep &Pred : SU->Preds) {
  65     if (Pred.isCtrl()) continue;  // ignore chain preds
  66     SUnit *PredSU = Pred.getSUnit();
  67     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
  68     if (PredSethiUllman > SethiUllmanNumber) {
  69       SethiUllmanNumber = PredSethiUllman;
  70       Extra = 0;
  71     }
  72     else if (PredSethiUllman == SethiUllmanNumber)
  73       ++Extra;
  74   }
  75
  76   SethiUllmanNumber += Extra;
  77
  78   if (SethiUllmanNumber == 0)
  79     SethiUllmanNumber = 1;
  80
  81   return SethiUllmanNumber;
  82 }
  83
  84 // Lower priority means schedule further down. For bottom-up scheduling, lower
  85 // priority SUs are scheduled before higher priority SUs.
  86 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
  87   assert(SU->NodeNum < SUNumbers.size());
  88   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
  89     // If SU does not have a register use, i.e. it doesn't produce a value
  90     // that would be consumed (e.g. store), then it terminates a chain of
  91     // computation.  Give it a large SethiUllman number so it will be
  92     // scheduled right before its predecessors that it doesn't lengthen
  93     // their live ranges.
  94     return 0xffff;
  95
  96   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
  97     // If SU does not have a register def, schedule it close to its uses
  98     // because it does not lengthen any live ranges.
  99     return 0;
 100
 101   return SUNumbers[SU->NodeNum];
 102 }
 103
 104 /// closestSucc - Returns the scheduled cycle of the successor which is
 105 /// closest to the current cycle.
 106 static unsigned closestSucc(const SUnit *SU) {
 107   unsigned MaxHeight = 0;
 108   for (const SDep &Succ : SU->Succs) {
 109     if (Succ.isCtrl()) continue;  // ignore chain succs
 110     unsigned Height = Succ.getSUnit()->getHeight();
 111     // If there are bunch of CopyToRegs stacked up, they should be considered
 112     // to be at the same position.
 113     if (Height > MaxHeight)
 114       MaxHeight = Height;
 115   }
 116   return MaxHeight;
 117 }
 118
 119 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
 120 /// for scratch registers, i.e. number of data dependencies.
 121 static unsigned calcMaxScratches(const SUnit *SU) {
 122   unsigned Scratches = 0;
 123   for (const SDep &Pred : SU->Preds) {
 124     if (Pred.isCtrl()) continue;  // ignore chain preds
 125     Scratches++;
 126   }
 127   return Scratches;
 128 }
 129
 130 // Return -1 if left has higher priority, 1 if right has higher priority.
 131 // Return 0 if latency-based priority is equivalent.
 132 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
 133   // Scheduling an instruction that uses a VReg whose postincrement has not yet
 134   // been scheduled will induce a copy. Model this as an extra cycle of latency.
 135   int LHeight = (int)left->getHeight();
 136   int RHeight = (int)right->getHeight();
 137
 138   // If either node is scheduling for latency, sort them by height/depth
 139   // and latency.
 140
 141   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
 142   // is enabled, grouping instructions by cycle, then its height is already
 143   // covered so only its depth matters. We also reach this point if both stall
 144   // but have the same height.
 145   if (LHeight != RHeight)
 146     return LHeight > RHeight ? 1 : -1;
 147
 148   int LDepth = left->getDepth();
 149   int RDepth = right->getDepth();
 150   if (LDepth != RDepth) {
 151     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
 152                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
 153                       << ") depth " << RDepth << "\n");
 154     return LDepth < RDepth ? 1 : -1;
 155   }
 156   if (left->Latency != right->Latency)
 157     return left->Latency > right->Latency ? 1 : -1;
 158
 159   return 0;
 160 }
 161
 162 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
 163 {
 164   // TODO: add register pressure lowering checks
 165
 166   bool const DisableSchedCriticalPath = false;
 167   int MaxReorderWindow = 6;
 168   if (!DisableSchedCriticalPath) {
 169     int spread = (int)left->getDepth() - (int)right->getDepth();
 170     if (std::abs(spread) > MaxReorderWindow) {
 171       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
 172                         << left->getDepth() << " != SU(" << right->NodeNum
 173                         << "): " << right->getDepth() << "\n");
 174       return left->getDepth() < right->getDepth() ? right : left;
 175     }
 176   }
 177
 178   bool const DisableSchedHeight = false;
 179   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
 180     int spread = (int)left->getHeight() - (int)right->getHeight();
 181     if (std::abs(spread) > MaxReorderWindow)
 182       return left->getHeight() > right->getHeight() ? right : left;
 183   }
 184
 185   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
 186   unsigned LPriority = getNodePriority(left);
 187   unsigned RPriority = getNodePriority(right);
 188
 189   if (LPriority != RPriority)
 190     return LPriority > RPriority ? right : left;
 191
 192   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
 193   // e.g.
 194   // t1 = op t2, c1
 195   // t3 = op t4, c2
 196   //
 197   // and the following instructions are both ready.
 198   // t2 = op c3
 199   // t4 = op c4
 200   //
 201   // Then schedule t2 = op first.
 202   // i.e.
 203   // t4 = op c4
 204   // t2 = op c3
 205   // t1 = op t2, c1
 206   // t3 = op t4, c2
 207   //
 208   // This creates more short live intervals.
 209   unsigned LDist = closestSucc(left);
 210   unsigned RDist = closestSucc(right);
 211   if (LDist != RDist)
 212     return LDist < RDist ? right : left;
 213
 214   // How many registers becomes live when the node is scheduled.
 215   unsigned LScratch = calcMaxScratches(left);
 216   unsigned RScratch = calcMaxScratches(right);
 217   if (LScratch != RScratch)
 218     return LScratch > RScratch ? right : left;
 219
 220   bool const DisableSchedCycles = false;
 221   if (!DisableSchedCycles) {
 222     int result = BUCompareLatency(left, right);
 223     if (result != 0)
 224       return result > 0 ? right : left;
 225     return left;
 226   }
 227   else {
 228     if (left->getHeight() != right->getHeight())
 229       return (left->getHeight() > right->getHeight()) ? right : left;
 230
 231     if (left->getDepth() != right->getDepth())
 232       return (left->getDepth() < right->getDepth()) ? right : left;
 233   }
 234
 235   assert(left->NodeQueueId && right->NodeQueueId &&
 236         "NodeQueueId cannot be zero");
 237   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
 238 }
 239
 240 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
 241   if (AvailQueue.empty())
 242     return nullptr;
 243   auto Best = AvailQueue.begin();
 244   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
 245     auto NewBestSU = pickBest(Best->SU, I->SU);
 246     if (NewBestSU != Best->SU) {
 247       assert(NewBestSU == I->SU);
 248       Best = I;
 249     }
 250   }
 251   return &*Best;
 252 }
 253
 254 void GCNILPScheduler::releasePending() {
 255   // Check to see if any of the pending instructions are ready to issue.  If
 256   // so, add them to the available queue.
 257   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
 258     auto &C = *I++;
 259     if (C.SU->getHeight() <= CurCycle) {
 260       PendingQueue.remove(C);
 261       AvailQueue.push_back(C);
 262       C.SU->NodeQueueId = CurQueueId++;
 263     }
 264   }
 265 }
 266
 267 /// Move the scheduler state forward by the specified number of Cycles.
 268 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
 269   if (NextCycle <= CurCycle)
 270     return;
 271   CurCycle = NextCycle;
 272   releasePending();
 273 }
 274
 275 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
 276   for (const auto &PredEdge : SU->Preds) {
 277     auto PredSU = PredEdge.getSUnit();
 278     if (PredEdge.isWeak())
 279       continue;
 280     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
 281
 282     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
 283
 284     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
 285       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
 286   }
 287 }
 288
 289 std::vector<const SUnit*>
 290 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
 291                           const ScheduleDAG &DAG) {
 292   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
 293
 294   std::vector<SUnit> SUSavedCopy;
 295   SUSavedCopy.resize(SUnits.size());
 296
 297   // we cannot save only those fields we touch: some of them are private
 298   // so save units verbatim: this assumes SUnit should have value semantics
 299   for (const SUnit &SU : SUnits)
 300     SUSavedCopy[SU.NodeNum] = SU;
 301
 302   SUNumbers.assign(SUnits.size(), 0);
 303   for (const SUnit &SU : SUnits)
 304     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
 305
 306   for (auto SU : BotRoots) {
 307     AvailQueue.push_back(
 308       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
 309   }
 310   releasePredecessors(&DAG.ExitSU);
 311
 312   std::vector<const SUnit*> Schedule;
 313   Schedule.reserve(SUnits.size());
 314   while (true) {
 315     if (AvailQueue.empty() && !PendingQueue.empty()) {
 316       auto EarliestSU = std::min_element(
 317         PendingQueue.begin(), PendingQueue.end(),
 318         [=](const Candidate& C1, const Candidate& C2) {
 319         return C1.SU->getHeight() < C2.SU->getHeight();
 320       })->SU;
 321       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
 322     }
 323     if (AvailQueue.empty())
 324       break;
 325
 326     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
 327                          "Ready queue:";
 328                for (auto &C
 329                     : AvailQueue) dbgs()
 330                << ' ' << C.SU->NodeNum;
 331                dbgs() << '\n';);
 332
 333     auto C = pickCandidate();
 334     assert(C);
 335     AvailQueue.remove(*C);
 336     auto SU = C->SU;
 337     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
 338
 339     advanceToCycle(SU->getHeight());
 340
 341     releasePredecessors(SU);
 342     Schedule.push_back(SU);
 343     SU->isScheduled = true;
 344   }
 345   assert(SUnits.size() == Schedule.size());
 346
 347   std::reverse(Schedule.begin(), Schedule.end());
 348
 349   // restore units
 350   for (auto &SU : SUnits)
 351     SU = SUSavedCopy[SU.NodeNum];
 352
 353   return Schedule;
 354 }
 355
 356 namespace llvm {
 357 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
 358                                               const ScheduleDAG &DAG) {
 359   GCNILPScheduler S;
 360   return S.schedule(BotRoots, DAG);
 361 }
 362 }