[InstCombine] Signed saturation patterns
[llvm-complete.git] / lib / Target / AMDGPU / GCNILPSched.cpp
blob39072af7d871186eed3beeacabe930f3ccfef6e8
1 //===---------------------------- GCNILPSched.cpp - -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
11 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/ScheduleDAG.h"
14 #include "llvm/Support/Debug.h"
16 using namespace llvm;
18 #define DEBUG_TYPE "machine-scheduler"
20 namespace {
22 class GCNILPScheduler {
23 struct Candidate : ilist_node<Candidate> {
24 SUnit *SU;
26 Candidate(SUnit *SU_)
27 : SU(SU_) {}
30 SpecificBumpPtrAllocator<Candidate> Alloc;
31 typedef simple_ilist<Candidate> Queue;
32 Queue PendingQueue;
33 Queue AvailQueue;
34 unsigned CurQueueId = 0;
36 std::vector<unsigned> SUNumbers;
38 /// CurCycle - The current scheduler state corresponds to this cycle.
39 unsigned CurCycle = 0;
41 unsigned getNodePriority(const SUnit *SU) const;
43 const SUnit *pickBest(const SUnit *left, const SUnit *right);
44 Candidate* pickCandidate();
46 void releasePending();
47 void advanceToCycle(unsigned NextCycle);
48 void releasePredecessors(const SUnit* SU);
50 public:
51 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
52 const ScheduleDAG &DAG);
54 } // namespace
56 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
57 /// Smaller number is the higher priority.
58 static unsigned
59 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
60 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
61 if (SethiUllmanNumber != 0)
62 return SethiUllmanNumber;
64 unsigned Extra = 0;
65 for (const SDep &Pred : SU->Preds) {
66 if (Pred.isCtrl()) continue; // ignore chain preds
67 SUnit *PredSU = Pred.getSUnit();
68 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
69 if (PredSethiUllman > SethiUllmanNumber) {
70 SethiUllmanNumber = PredSethiUllman;
71 Extra = 0;
73 else if (PredSethiUllman == SethiUllmanNumber)
74 ++Extra;
77 SethiUllmanNumber += Extra;
79 if (SethiUllmanNumber == 0)
80 SethiUllmanNumber = 1;
82 return SethiUllmanNumber;
85 // Lower priority means schedule further down. For bottom-up scheduling, lower
86 // priority SUs are scheduled before higher priority SUs.
87 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
88 assert(SU->NodeNum < SUNumbers.size());
89 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
90 // If SU does not have a register use, i.e. it doesn't produce a value
91 // that would be consumed (e.g. store), then it terminates a chain of
92 // computation. Give it a large SethiUllman number so it will be
93 // scheduled right before its predecessors that it doesn't lengthen
94 // their live ranges.
95 return 0xffff;
97 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
98 // If SU does not have a register def, schedule it close to its uses
99 // because it does not lengthen any live ranges.
100 return 0;
102 return SUNumbers[SU->NodeNum];
105 /// closestSucc - Returns the scheduled cycle of the successor which is
106 /// closest to the current cycle.
107 static unsigned closestSucc(const SUnit *SU) {
108 unsigned MaxHeight = 0;
109 for (const SDep &Succ : SU->Succs) {
110 if (Succ.isCtrl()) continue; // ignore chain succs
111 unsigned Height = Succ.getSUnit()->getHeight();
112 // If there are bunch of CopyToRegs stacked up, they should be considered
113 // to be at the same position.
114 if (Height > MaxHeight)
115 MaxHeight = Height;
117 return MaxHeight;
120 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
121 /// for scratch registers, i.e. number of data dependencies.
122 static unsigned calcMaxScratches(const SUnit *SU) {
123 unsigned Scratches = 0;
124 for (const SDep &Pred : SU->Preds) {
125 if (Pred.isCtrl()) continue; // ignore chain preds
126 Scratches++;
128 return Scratches;
131 // Return -1 if left has higher priority, 1 if right has higher priority.
132 // Return 0 if latency-based priority is equivalent.
133 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
134 // Scheduling an instruction that uses a VReg whose postincrement has not yet
135 // been scheduled will induce a copy. Model this as an extra cycle of latency.
136 int LHeight = (int)left->getHeight();
137 int RHeight = (int)right->getHeight();
139 // If either node is scheduling for latency, sort them by height/depth
140 // and latency.
142 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
143 // is enabled, grouping instructions by cycle, then its height is already
144 // covered so only its depth matters. We also reach this point if both stall
145 // but have the same height.
146 if (LHeight != RHeight)
147 return LHeight > RHeight ? 1 : -1;
149 int LDepth = left->getDepth();
150 int RDepth = right->getDepth();
151 if (LDepth != RDepth) {
152 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
153 << ") depth " << LDepth << " vs SU (" << right->NodeNum
154 << ") depth " << RDepth << "\n");
155 return LDepth < RDepth ? 1 : -1;
157 if (left->Latency != right->Latency)
158 return left->Latency > right->Latency ? 1 : -1;
160 return 0;
163 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
165 // TODO: add register pressure lowering checks
167 bool const DisableSchedCriticalPath = false;
168 int MaxReorderWindow = 6;
169 if (!DisableSchedCriticalPath) {
170 int spread = (int)left->getDepth() - (int)right->getDepth();
171 if (std::abs(spread) > MaxReorderWindow) {
172 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
173 << left->getDepth() << " != SU(" << right->NodeNum
174 << "): " << right->getDepth() << "\n");
175 return left->getDepth() < right->getDepth() ? right : left;
179 bool const DisableSchedHeight = false;
180 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
181 int spread = (int)left->getHeight() - (int)right->getHeight();
182 if (std::abs(spread) > MaxReorderWindow)
183 return left->getHeight() > right->getHeight() ? right : left;
186 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
187 unsigned LPriority = getNodePriority(left);
188 unsigned RPriority = getNodePriority(right);
190 if (LPriority != RPriority)
191 return LPriority > RPriority ? right : left;
193 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
194 // e.g.
195 // t1 = op t2, c1
196 // t3 = op t4, c2
198 // and the following instructions are both ready.
199 // t2 = op c3
200 // t4 = op c4
202 // Then schedule t2 = op first.
203 // i.e.
204 // t4 = op c4
205 // t2 = op c3
206 // t1 = op t2, c1
207 // t3 = op t4, c2
209 // This creates more short live intervals.
210 unsigned LDist = closestSucc(left);
211 unsigned RDist = closestSucc(right);
212 if (LDist != RDist)
213 return LDist < RDist ? right : left;
215 // How many registers becomes live when the node is scheduled.
216 unsigned LScratch = calcMaxScratches(left);
217 unsigned RScratch = calcMaxScratches(right);
218 if (LScratch != RScratch)
219 return LScratch > RScratch ? right : left;
221 bool const DisableSchedCycles = false;
222 if (!DisableSchedCycles) {
223 int result = BUCompareLatency(left, right);
224 if (result != 0)
225 return result > 0 ? right : left;
226 return left;
228 else {
229 if (left->getHeight() != right->getHeight())
230 return (left->getHeight() > right->getHeight()) ? right : left;
232 if (left->getDepth() != right->getDepth())
233 return (left->getDepth() < right->getDepth()) ? right : left;
236 assert(left->NodeQueueId && right->NodeQueueId &&
237 "NodeQueueId cannot be zero");
238 return (left->NodeQueueId > right->NodeQueueId) ? right : left;
241 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
242 if (AvailQueue.empty())
243 return nullptr;
244 auto Best = AvailQueue.begin();
245 for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
246 auto NewBestSU = pickBest(Best->SU, I->SU);
247 if (NewBestSU != Best->SU) {
248 assert(NewBestSU == I->SU);
249 Best = I;
252 return &*Best;
255 void GCNILPScheduler::releasePending() {
256 // Check to see if any of the pending instructions are ready to issue. If
257 // so, add them to the available queue.
258 for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
259 auto &C = *I++;
260 if (C.SU->getHeight() <= CurCycle) {
261 PendingQueue.remove(C);
262 AvailQueue.push_back(C);
263 C.SU->NodeQueueId = CurQueueId++;
268 /// Move the scheduler state forward by the specified number of Cycles.
269 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
270 if (NextCycle <= CurCycle)
271 return;
272 CurCycle = NextCycle;
273 releasePending();
276 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
277 for (const auto &PredEdge : SU->Preds) {
278 auto PredSU = PredEdge.getSUnit();
279 if (PredEdge.isWeak())
280 continue;
281 assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
283 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
285 if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
286 PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
290 std::vector<const SUnit*>
291 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
292 const ScheduleDAG &DAG) {
293 auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
295 std::vector<SUnit> SUSavedCopy;
296 SUSavedCopy.resize(SUnits.size());
298 // we cannot save only those fields we touch: some of them are private
299 // so save units verbatim: this assumes SUnit should have value semantics
300 for (const SUnit &SU : SUnits)
301 SUSavedCopy[SU.NodeNum] = SU;
303 SUNumbers.assign(SUnits.size(), 0);
304 for (const SUnit &SU : SUnits)
305 CalcNodeSethiUllmanNumber(&SU, SUNumbers);
307 for (auto SU : BotRoots) {
308 AvailQueue.push_back(
309 *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
311 releasePredecessors(&DAG.ExitSU);
313 std::vector<const SUnit*> Schedule;
314 Schedule.reserve(SUnits.size());
315 while (true) {
316 if (AvailQueue.empty() && !PendingQueue.empty()) {
317 auto EarliestSU = std::min_element(
318 PendingQueue.begin(), PendingQueue.end(),
319 [=](const Candidate& C1, const Candidate& C2) {
320 return C1.SU->getHeight() < C2.SU->getHeight();
321 })->SU;
322 advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
324 if (AvailQueue.empty())
325 break;
327 LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
328 "Ready queue:";
329 for (auto &C
330 : AvailQueue) dbgs()
331 << ' ' << C.SU->NodeNum;
332 dbgs() << '\n';);
334 auto C = pickCandidate();
335 assert(C);
336 AvailQueue.remove(*C);
337 auto SU = C->SU;
338 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
340 advanceToCycle(SU->getHeight());
342 releasePredecessors(SU);
343 Schedule.push_back(SU);
344 SU->isScheduled = true;
346 assert(SUnits.size() == Schedule.size());
348 std::reverse(Schedule.begin(), Schedule.end());
350 // restore units
351 for (auto &SU : SUnits)
352 SU = SUSavedCopy[SU.NodeNum];
354 return Schedule;
357 namespace llvm {
358 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
359 const ScheduleDAG &DAG) {
360 GCNILPScheduler S;
361 return S.schedule(BotRoots, DAG);