[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / AMDGPU / GCNSchedStrategy.cpp
blob0212b8e17641f60db4cf26af6c599c056831fe81
1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
12 //===----------------------------------------------------------------------===//
14 #include "GCNSchedStrategy.h"
15 #include "SIMachineFunctionInfo.h"
17 #define DEBUG_TYPE "machine-scheduler"
19 using namespace llvm;
21 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
22 const MachineSchedContext *C) :
23 GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
24 HasExcessPressure(false), MF(nullptr) { }
26 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
27 GenericScheduler::initialize(DAG);
29 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
31 MF = &DAG->MF;
33 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
35 // FIXME: This is also necessary, because some passes that run after
36 // scheduling and before regalloc increase register pressure.
37 const int ErrorMargin = 3;
39 SGPRExcessLimit = Context->RegClassInfo
40 ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
41 VGPRExcessLimit = Context->RegClassInfo
42 ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
43 if (TargetOccupancy) {
44 SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
45 VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
46 } else {
47 SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
48 AMDGPU::RegisterPressureSets::SReg_32);
49 VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
50 AMDGPU::RegisterPressureSets::VGPR_32);
53 SGPRCriticalLimit -= ErrorMargin;
54 VGPRCriticalLimit -= ErrorMargin;
57 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
58 bool AtTop, const RegPressureTracker &RPTracker,
59 const SIRegisterInfo *SRI,
60 unsigned SGPRPressure,
61 unsigned VGPRPressure) {
63 Cand.SU = SU;
64 Cand.AtTop = AtTop;
66 // getDownwardPressure() and getUpwardPressure() make temporary changes to
67 // the tracker, so we need to pass those function a non-const copy.
68 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
70 Pressure.clear();
71 MaxPressure.clear();
73 if (AtTop)
74 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
75 else {
76 // FIXME: I think for bottom up scheduling, the register pressure is cached
77 // and can be retrieved by DAG->getPressureDif(SU).
78 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
81 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
82 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
84 // If two instructions increase the pressure of different register sets
85 // by the same amount, the generic scheduler will prefer to schedule the
86 // instruction that increases the set with the least amount of registers,
87 // which in our case would be SGPRs. This is rarely what we want, so
88 // when we report excess/critical register pressure, we do it either
89 // only for VGPRs or only for SGPRs.
91 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
92 const unsigned MaxVGPRPressureInc = 16;
93 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
94 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
97 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
98 // to increase the likelihood we don't go over the limits. We should improve
99 // the analysis to look through dependencies to find the path with the least
100 // register pressure.
102 // We only need to update the RPDelta for instructions that increase register
103 // pressure. Instructions that decrease or keep reg pressure the same will be
104 // marked as RegExcess in tryCandidate() when they are compared with
105 // instructions that increase the register pressure.
106 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
107 HasExcessPressure = true;
108 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
109 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
112 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
113 HasExcessPressure = true;
114 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
115 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
118 // Register pressure is considered 'CRITICAL' if it is approaching a value
119 // that would reduce the wave occupancy for the execution unit. When
120 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
121 // has the same cost, so we don't need to prefer one over the other.
123 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
124 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
126 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
127 HasExcessPressure = true;
128 if (SGPRDelta > VGPRDelta) {
129 Cand.RPDelta.CriticalMax =
130 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
131 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
132 } else {
133 Cand.RPDelta.CriticalMax =
134 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
135 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
140 // This function is mostly cut and pasted from
141 // GenericScheduler::pickNodeFromQueue()
142 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
143 const CandPolicy &ZonePolicy,
144 const RegPressureTracker &RPTracker,
145 SchedCandidate &Cand) {
146 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
147 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
148 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
149 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
150 ReadyQueue &Q = Zone.Available;
151 for (SUnit *SU : Q) {
153 SchedCandidate TryCand(ZonePolicy);
154 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
155 SGPRPressure, VGPRPressure);
156 // Pass SchedBoundary only when comparing nodes from the same boundary.
157 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
158 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
159 if (TryCand.Reason != NoCand) {
160 // Initialize resource delta if needed in case future heuristics query it.
161 if (TryCand.ResDelta == SchedResourceDelta())
162 TryCand.initResourceDelta(Zone.DAG, SchedModel);
163 Cand.setBest(TryCand);
164 LLVM_DEBUG(traceCandidate(Cand));
169 // This function is mostly cut and pasted from
170 // GenericScheduler::pickNodeBidirectional()
171 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
172 // Schedule as far as possible in the direction of no choice. This is most
173 // efficient, but also provides the best heuristics for CriticalPSets.
174 if (SUnit *SU = Bot.pickOnlyChoice()) {
175 IsTopNode = false;
176 return SU;
178 if (SUnit *SU = Top.pickOnlyChoice()) {
179 IsTopNode = true;
180 return SU;
182 // Set the bottom-up policy based on the state of the current bottom zone and
183 // the instructions outside the zone, including the top zone.
184 CandPolicy BotPolicy;
185 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
186 // Set the top-down policy based on the state of the current top zone and
187 // the instructions outside the zone, including the bottom zone.
188 CandPolicy TopPolicy;
189 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
191 // See if BotCand is still valid (because we previously scheduled from Top).
192 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
193 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
194 BotCand.Policy != BotPolicy) {
195 BotCand.reset(CandPolicy());
196 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
197 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
198 } else {
199 LLVM_DEBUG(traceCandidate(BotCand));
200 #ifndef NDEBUG
201 if (VerifyScheduling) {
202 SchedCandidate TCand;
203 TCand.reset(CandPolicy());
204 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
205 assert(TCand.SU == BotCand.SU &&
206 "Last pick result should correspond to re-picking right now");
208 #endif
211 // Check if the top Q has a better candidate.
212 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
213 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
214 TopCand.Policy != TopPolicy) {
215 TopCand.reset(CandPolicy());
216 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
217 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
218 } else {
219 LLVM_DEBUG(traceCandidate(TopCand));
220 #ifndef NDEBUG
221 if (VerifyScheduling) {
222 SchedCandidate TCand;
223 TCand.reset(CandPolicy());
224 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
225 assert(TCand.SU == TopCand.SU &&
226 "Last pick result should correspond to re-picking right now");
228 #endif
231 // Pick best from BotCand and TopCand.
232 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
233 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
234 SchedCandidate Cand = BotCand;
235 TopCand.Reason = NoCand;
236 GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
237 if (TopCand.Reason != NoCand) {
238 Cand.setBest(TopCand);
240 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
242 IsTopNode = Cand.AtTop;
243 return Cand.SU;
246 // This function is mostly cut and pasted from
247 // GenericScheduler::pickNode()
248 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
249 if (DAG->top() == DAG->bottom()) {
250 assert(Top.Available.empty() && Top.Pending.empty() &&
251 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
252 return nullptr;
254 SUnit *SU;
255 do {
256 if (RegionPolicy.OnlyTopDown) {
257 SU = Top.pickOnlyChoice();
258 if (!SU) {
259 CandPolicy NoPolicy;
260 TopCand.reset(NoPolicy);
261 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
262 assert(TopCand.Reason != NoCand && "failed to find a candidate");
263 SU = TopCand.SU;
265 IsTopNode = true;
266 } else if (RegionPolicy.OnlyBottomUp) {
267 SU = Bot.pickOnlyChoice();
268 if (!SU) {
269 CandPolicy NoPolicy;
270 BotCand.reset(NoPolicy);
271 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
272 assert(BotCand.Reason != NoCand && "failed to find a candidate");
273 SU = BotCand.SU;
275 IsTopNode = false;
276 } else {
277 SU = pickNodeBidirectional(IsTopNode);
279 } while (SU->isScheduled);
281 if (SU->isTopReady())
282 Top.removeReady(SU);
283 if (SU->isBottomReady())
284 Bot.removeReady(SU);
286 if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
287 for (SDep &Dep : SU->Preds) {
288 if (Dep.isCluster()) {
289 HasClusteredNodes = true;
290 break;
295 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
296 << *SU->getInstr());
297 return SU;
300 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
301 std::unique_ptr<MachineSchedStrategy> S) :
302 ScheduleDAGMILive(C, std::move(S)),
303 ST(MF.getSubtarget<GCNSubtarget>()),
304 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
305 StartingOccupancy(MFI.getOccupancy()),
306 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
308 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
311 void GCNScheduleDAGMILive::schedule() {
312 if (Stage == Collect) {
313 // Just record regions at the first pass.
314 Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
315 return;
318 std::vector<MachineInstr*> Unsched;
319 Unsched.reserve(NumRegionInstrs);
320 for (auto &I : *this) {
321 Unsched.push_back(&I);
324 GCNRegPressure PressureBefore;
325 if (LIS) {
326 PressureBefore = Pressure[RegionIdx];
328 LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
329 GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
330 dbgs() << "Region live-in pressure: ";
331 llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
332 dbgs() << "Region register pressure: ";
333 PressureBefore.print(dbgs()));
336 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
337 // Set HasClusteredNodes to true for late stages where we have already
338 // collected it. That way pickNode() will not scan SDep's when not needed.
339 S.HasClusteredNodes = Stage > InitialSchedule;
340 S.HasExcessPressure = false;
341 ScheduleDAGMILive::schedule();
342 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
343 RescheduleRegions[RegionIdx] = false;
344 if (Stage == InitialSchedule && S.HasClusteredNodes)
345 RegionsWithClusters[RegionIdx] = true;
346 if (S.HasExcessPressure)
347 RegionsWithHighRP[RegionIdx] = true;
349 if (!LIS)
350 return;
352 // Check the results of scheduling.
353 auto PressureAfter = getRealRegPressure();
355 LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
356 PressureAfter.print(dbgs()));
358 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
359 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
360 Pressure[RegionIdx] = PressureAfter;
361 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
362 return;
364 unsigned Occ = MFI.getOccupancy();
365 unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
366 unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
367 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
368 << ", after " << WavesAfter << ".\n");
370 // We could not keep current target occupancy because of the just scheduled
371 // region. Record new occupancy for next scheduling cycle.
372 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
373 // Allow memory bound functions to drop to 4 waves if not limited by an
374 // attribute.
375 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
376 WavesAfter >= MFI.getMinAllowedOccupancy()) {
377 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
378 << MFI.getMinAllowedOccupancy() << " waves\n");
379 NewOccupancy = WavesAfter;
381 if (NewOccupancy < MinOccupancy) {
382 MinOccupancy = NewOccupancy;
383 MFI.limitOccupancy(MinOccupancy);
384 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
385 << MinOccupancy << ".\n");
388 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
389 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
390 if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
391 PressureAfter.getAGPRNum() > MaxVGPRs ||
392 PressureAfter.getSGPRNum() > MaxSGPRs) {
393 RescheduleRegions[RegionIdx] = true;
394 RegionsWithHighRP[RegionIdx] = true;
397 if (WavesAfter >= MinOccupancy) {
398 if (Stage == UnclusteredReschedule &&
399 !PressureAfter.less(ST, PressureBefore)) {
400 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
401 } else if (WavesAfter > MFI.getMinWavesPerEU() ||
402 PressureAfter.less(ST, PressureBefore) ||
403 !RescheduleRegions[RegionIdx]) {
404 Pressure[RegionIdx] = PressureAfter;
405 if (!RegionsWithClusters[RegionIdx] &&
406 (Stage + 1) == UnclusteredReschedule)
407 RescheduleRegions[RegionIdx] = false;
408 return;
409 } else {
410 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
414 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
415 RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
416 (Stage + 1) != UnclusteredReschedule;
417 RegionEnd = RegionBegin;
418 for (MachineInstr *MI : Unsched) {
419 if (MI->isDebugInstr())
420 continue;
422 if (MI->getIterator() != RegionEnd) {
423 BB->remove(MI);
424 BB->insert(RegionEnd, MI);
425 if (!MI->isDebugInstr())
426 LIS->handleMove(*MI, true);
428 // Reset read-undef flags and update them later.
429 for (auto &Op : MI->operands())
430 if (Op.isReg() && Op.isDef())
431 Op.setIsUndef(false);
432 RegisterOperands RegOpers;
433 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
434 if (!MI->isDebugInstr()) {
435 if (ShouldTrackLaneMasks) {
436 // Adjust liveness and add missing dead+read-undef flags.
437 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
438 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
439 } else {
440 // Adjust for missing dead-def flags.
441 RegOpers.detectDeadDefs(*MI, *LIS);
444 RegionEnd = MI->getIterator();
445 ++RegionEnd;
446 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
448 RegionBegin = Unsched.front()->getIterator();
449 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
451 placeDebugValues();
454 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
455 GCNDownwardRPTracker RPTracker(*LIS);
456 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
457 return RPTracker.moveMaxPressure();
460 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
461 GCNDownwardRPTracker RPTracker(*LIS);
463 // If the block has the only successor then live-ins of that successor are
464 // live-outs of the current block. We can reuse calculated live set if the
465 // successor will be sent to scheduling past current block.
466 const MachineBasicBlock *OnlySucc = nullptr;
467 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
468 SlotIndexes *Ind = LIS->getSlotIndexes();
469 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
470 OnlySucc = *MBB->succ_begin();
473 // Scheduler sends regions from the end of the block upwards.
474 size_t CurRegion = RegionIdx;
475 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
476 if (Regions[CurRegion].first->getParent() != MBB)
477 break;
478 --CurRegion;
480 auto I = MBB->begin();
481 auto LiveInIt = MBBLiveIns.find(MBB);
482 if (LiveInIt != MBBLiveIns.end()) {
483 auto LiveIn = std::move(LiveInIt->second);
484 RPTracker.reset(*MBB->begin(), &LiveIn);
485 MBBLiveIns.erase(LiveInIt);
486 } else {
487 auto &Rgn = Regions[CurRegion];
488 I = Rgn.first;
489 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
490 auto LRS = BBLiveInMap.lookup(NonDbgMI);
491 #ifdef EXPENSIVE_CHECKS
492 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
493 #endif
494 RPTracker.reset(*I, &LRS);
497 for ( ; ; ) {
498 I = RPTracker.getNext();
500 if (Regions[CurRegion].first == I) {
501 LiveIns[CurRegion] = RPTracker.getLiveRegs();
502 RPTracker.clearMaxPressure();
505 if (Regions[CurRegion].second == I) {
506 Pressure[CurRegion] = RPTracker.moveMaxPressure();
507 if (CurRegion-- == RegionIdx)
508 break;
510 RPTracker.advanceToNext();
511 RPTracker.advanceBeforeNext();
514 if (OnlySucc) {
515 if (I != MBB->end()) {
516 RPTracker.advanceToNext();
517 RPTracker.advance(MBB->end());
519 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
520 RPTracker.advanceBeforeNext();
521 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
525 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
526 GCNScheduleDAGMILive::getBBLiveInMap() const {
527 assert(!Regions.empty());
528 std::vector<MachineInstr *> BBStarters;
529 BBStarters.reserve(Regions.size());
530 auto I = Regions.rbegin(), E = Regions.rend();
531 auto *BB = I->first->getParent();
532 do {
533 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
534 BBStarters.push_back(MI);
535 do {
536 ++I;
537 } while (I != E && I->first->getParent() == BB);
538 } while (I != E);
539 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
542 void GCNScheduleDAGMILive::finalizeSchedule() {
543 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
544 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
546 LiveIns.resize(Regions.size());
547 Pressure.resize(Regions.size());
548 RescheduleRegions.resize(Regions.size());
549 RegionsWithClusters.resize(Regions.size());
550 RegionsWithHighRP.resize(Regions.size());
551 RescheduleRegions.set();
552 RegionsWithClusters.reset();
553 RegionsWithHighRP.reset();
555 if (!Regions.empty())
556 BBLiveInMap = getBBLiveInMap();
558 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
560 do {
561 Stage++;
562 RegionIdx = 0;
563 MachineBasicBlock *MBB = nullptr;
565 if (Stage > InitialSchedule) {
566 if (!LIS)
567 break;
569 // Retry function scheduling if we found resulting occupancy and it is
570 // lower than used for first pass scheduling. This will give more freedom
571 // to schedule low register pressure blocks.
572 // Code is partially copied from MachineSchedulerBase::scheduleRegions().
574 if (Stage == UnclusteredReschedule) {
575 if (RescheduleRegions.none())
576 continue;
577 LLVM_DEBUG(dbgs() <<
578 "Retrying function scheduling without clustering.\n");
581 if (Stage == ClusteredLowOccupancyReschedule) {
582 if (StartingOccupancy <= MinOccupancy)
583 break;
585 LLVM_DEBUG(
586 dbgs()
587 << "Retrying function scheduling with lowest recorded occupancy "
588 << MinOccupancy << ".\n");
590 S.setTargetOccupancy(MinOccupancy);
594 if (Stage == UnclusteredReschedule)
595 SavedMutations.swap(Mutations);
597 for (auto Region : Regions) {
598 if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
599 (Stage == ClusteredLowOccupancyReschedule &&
600 !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
602 ++RegionIdx;
603 continue;
606 RegionBegin = Region.first;
607 RegionEnd = Region.second;
609 if (RegionBegin->getParent() != MBB) {
610 if (MBB) finishBlock();
611 MBB = RegionBegin->getParent();
612 startBlock(MBB);
613 if (Stage == InitialSchedule)
614 computeBlockPressure(MBB);
617 unsigned NumRegionInstrs = std::distance(begin(), end());
618 enterRegion(MBB, begin(), end(), NumRegionInstrs);
620 // Skip empty scheduling regions (0 or 1 schedulable instructions).
621 if (begin() == end() || begin() == std::prev(end())) {
622 exitRegion();
623 continue;
626 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
627 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
628 << MBB->getName() << "\n From: " << *begin()
629 << " To: ";
630 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
631 else dbgs() << "End";
632 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
634 schedule();
636 exitRegion();
637 ++RegionIdx;
639 finishBlock();
641 if (Stage == UnclusteredReschedule)
642 SavedMutations.swap(Mutations);
643 } while (Stage != LastStage);