1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
13 /// This pass will apply multiple scheduling stages to the same function.
14 /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15 /// entry point for the scheduling of those regions is
16 /// GCNScheduleDAGMILive::runSchedStages.
18 /// Generally, the reason for having multiple scheduling stages is to account
19 /// for the kernel-wide effect of register usage on occupancy. Usually, only a
20 /// few scheduling regions will have register pressure high enough to limit
21 /// occupancy for the kernel, so constraints can be relaxed to improve ILP in
24 //===----------------------------------------------------------------------===//
26 #include "GCNSchedStrategy.h"
27 #include "AMDGPUIGroupLP.h"
28 #include "SIMachineFunctionInfo.h"
29 #include "llvm/CodeGen/RegisterClassInfo.h"
31 #define DEBUG_TYPE "machine-scheduler"
35 static cl::opt
<bool> DisableUnclusterHighRP(
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden
,
37 cl::desc("Disable unclustered high register pressure "
38 "reduction scheduling stage."),
41 static cl::opt
<bool> DisableClusteredLowOccupancy(
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden
,
43 cl::desc("Disable clustered low occupancy "
44 "rescheduling for ILP scheduling stage."),
47 static cl::opt
<unsigned> ScheduleMetricBias(
48 "amdgpu-schedule-metric-bias", cl::Hidden
,
50 "Sets the bias which adds weight to occupancy vs latency. Set it to "
51 "100 to chase the occupancy only."),
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden
,
56 cl::desc("Relax occupancy targets for kernels which are memory "
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
61 static cl::opt
<bool> GCNTrackers(
62 "amdgpu-use-amdgpu-trackers", cl::Hidden
,
63 cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
66 const unsigned ScheduleMetrics::ScaleFactor
= 100;
68 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext
*C
)
69 : GenericScheduler(C
), TargetOccupancy(0), MF(nullptr),
70 DownwardTracker(*C
->LIS
), UpwardTracker(*C
->LIS
), HasHighPressure(false) {
73 void GCNSchedStrategy::initialize(ScheduleDAGMI
*DAG
) {
74 GenericScheduler::initialize(DAG
);
78 const GCNSubtarget
&ST
= MF
->getSubtarget
<GCNSubtarget
>();
81 Context
->RegClassInfo
->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass
);
83 Context
->RegClassInfo
->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass
);
85 SIMachineFunctionInfo
&MFI
= *MF
->getInfo
<SIMachineFunctionInfo
>();
86 // Set the initial TargetOccupnacy to the maximum occupancy that we can
87 // achieve for this function. This effectively sets a lower bound on the
88 // 'Critical' register limits in the scheduler.
89 // Allow for lower occupancy targets if kernel is wave limited or memory
90 // bound, and using the relaxed occupancy feature.
92 RelaxedOcc
? MFI
.getMinAllowedOccupancy() : MFI
.getOccupancy();
94 std::min(ST
.getMaxNumSGPRs(TargetOccupancy
, true), SGPRExcessLimit
);
98 std::min(ST
.getMaxNumVGPRs(TargetOccupancy
), VGPRExcessLimit
);
100 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
101 // returns a reasonably small number for targets with lots of VGPRs, such
102 // as GFX10 and GFX11.
103 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
104 "VGPRCriticalLimit calculation method.\n");
106 unsigned Granule
= AMDGPU::IsaInfo::getVGPRAllocGranule(&ST
);
107 unsigned Addressable
= AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST
);
108 unsigned VGPRBudget
= alignDown(Addressable
/ TargetOccupancy
, Granule
);
109 VGPRBudget
= std::max(VGPRBudget
, Granule
);
110 VGPRCriticalLimit
= std::min(VGPRBudget
, VGPRExcessLimit
);
113 // Subtract error margin and bias from register limits and avoid overflow.
114 SGPRCriticalLimit
-= std::min(SGPRLimitBias
+ ErrorMargin
, SGPRCriticalLimit
);
115 VGPRCriticalLimit
-= std::min(VGPRLimitBias
+ ErrorMargin
, VGPRCriticalLimit
);
116 SGPRExcessLimit
-= std::min(SGPRLimitBias
+ ErrorMargin
, SGPRExcessLimit
);
117 VGPRExcessLimit
-= std::min(VGPRLimitBias
+ ErrorMargin
, VGPRExcessLimit
);
119 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
120 << ", VGPRExcessLimit = " << VGPRExcessLimit
121 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
122 << ", SGPRExcessLimit = " << SGPRExcessLimit
<< "\n\n");
125 /// Checks whether \p SU can use the cached DAG pressure diffs to compute the
126 /// current register pressure.
128 /// This works for the common case, but it has a few exceptions that have been
129 /// observed through trial and error:
130 /// - Explicit physical register operands
131 /// - Subregister definitions
133 /// In both of those cases, PressureDiff doesn't represent the actual pressure,
134 /// and querying LiveIntervals through the RegPressureTracker is needed to get
135 /// an accurate value.
137 /// We should eventually only use PressureDiff for maximum performance, but this
138 /// already allows 80% of SUs to take the fast path without changing scheduling
139 /// at all. Further changes would either change scheduling, or require a lot
140 /// more logic to recover an accurate pressure estimate from the PressureDiffs.
141 static bool canUsePressureDiffs(const SUnit
&SU
) {
145 // Cannot use pressure diffs for subregister defs or with physregs, it's
146 // imprecise in both cases.
147 for (const auto &Op
: SU
.getInstr()->operands()) {
148 if (!Op
.isReg() || Op
.isImplicit())
150 if (Op
.getReg().isPhysical() ||
151 (Op
.isDef() && Op
.getSubReg() != AMDGPU::NoSubRegister
))
157 static void getRegisterPressures(
158 bool AtTop
, const RegPressureTracker
&RPTracker
, SUnit
*SU
,
159 std::vector
<unsigned> &Pressure
, std::vector
<unsigned> &MaxPressure
,
160 GCNDownwardRPTracker
&DownwardTracker
, GCNUpwardRPTracker
&UpwardTracker
,
161 ScheduleDAGMI
*DAG
, const SIRegisterInfo
*SRI
) {
162 // getDownwardPressure() and getUpwardPressure() make temporary changes to
163 // the tracker, so we need to pass those function a non-const copy.
164 RegPressureTracker
&TempTracker
= const_cast<RegPressureTracker
&>(RPTracker
);
167 ? TempTracker
.getDownwardPressure(SU
->getInstr(), Pressure
, MaxPressure
)
168 : TempTracker
.getUpwardPressure(SU
->getInstr(), Pressure
, MaxPressure
);
174 Pressure
.resize(4, 0);
175 MachineInstr
*MI
= SU
->getInstr();
176 GCNRegPressure NewPressure
;
178 GCNDownwardRPTracker
TempDownwardTracker(DownwardTracker
);
179 NewPressure
= TempDownwardTracker
.bumpDownwardPressure(MI
, SRI
);
181 GCNUpwardRPTracker
TempUpwardTracker(UpwardTracker
);
182 TempUpwardTracker
.recede(*MI
);
183 NewPressure
= TempUpwardTracker
.getPressure();
185 Pressure
[AMDGPU::RegisterPressureSets::SReg_32
] = NewPressure
.getSGPRNum();
186 Pressure
[AMDGPU::RegisterPressureSets::VGPR_32
] =
187 NewPressure
.getArchVGPRNum();
188 Pressure
[AMDGPU::RegisterPressureSets::AGPR_32
] = NewPressure
.getAGPRNum();
191 void GCNSchedStrategy::initCandidate(SchedCandidate
&Cand
, SUnit
*SU
,
193 const RegPressureTracker
&RPTracker
,
194 const SIRegisterInfo
*SRI
,
195 unsigned SGPRPressure
,
196 unsigned VGPRPressure
, bool IsBottomUp
) {
200 if (!DAG
->isTrackingPressure())
206 // We try to use the cached PressureDiffs in the ScheduleDAG whenever
207 // possible over querying the RegPressureTracker.
209 // RegPressureTracker will make a lot of LIS queries which are very
210 // expensive, it is considered a slow function in this context.
212 // PressureDiffs are precomputed and cached, and getPressureDiff is just a
213 // trivial lookup into an array. It is pretty much free.
215 // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
217 if (AtTop
|| !canUsePressureDiffs(*SU
) || GCNTrackers
) {
218 getRegisterPressures(AtTop
, RPTracker
, SU
, Pressure
, MaxPressure
,
219 DownwardTracker
, UpwardTracker
, DAG
, SRI
);
222 Pressure
.resize(4, 0);
223 Pressure
[AMDGPU::RegisterPressureSets::SReg_32
] = SGPRPressure
;
224 Pressure
[AMDGPU::RegisterPressureSets::VGPR_32
] = VGPRPressure
;
226 for (const auto &Diff
: DAG
->getPressureDiff(SU
)) {
229 // PressureDiffs is always bottom-up so if we're working top-down we need
230 // to invert its sign.
231 Pressure
[Diff
.getPSet()] +=
232 (IsBottomUp
? Diff
.getUnitInc() : -Diff
.getUnitInc());
235 #ifdef EXPENSIVE_CHECKS
236 std::vector
<unsigned> CheckPressure
, CheckMaxPressure
;
237 getRegisterPressures(AtTop
, RPTracker
, SU
, CheckPressure
, CheckMaxPressure
,
238 DownwardTracker
, UpwardTracker
, DAG
, SRI
);
239 if (Pressure
[AMDGPU::RegisterPressureSets::SReg_32
] !=
240 CheckPressure
[AMDGPU::RegisterPressureSets::SReg_32
] ||
241 Pressure
[AMDGPU::RegisterPressureSets::VGPR_32
] !=
242 CheckPressure
[AMDGPU::RegisterPressureSets::VGPR_32
]) {
243 errs() << "Register Pressure is inaccurate when calculated through "
245 << "SGPR got " << Pressure
[AMDGPU::RegisterPressureSets::SReg_32
]
247 << CheckPressure
[AMDGPU::RegisterPressureSets::SReg_32
] << "\n"
248 << "VGPR got " << Pressure
[AMDGPU::RegisterPressureSets::VGPR_32
]
250 << CheckPressure
[AMDGPU::RegisterPressureSets::VGPR_32
] << "\n";
251 report_fatal_error("inaccurate register pressure calculation");
256 unsigned NewSGPRPressure
= Pressure
[AMDGPU::RegisterPressureSets::SReg_32
];
257 unsigned NewVGPRPressure
= Pressure
[AMDGPU::RegisterPressureSets::VGPR_32
];
259 // If two instructions increase the pressure of different register sets
260 // by the same amount, the generic scheduler will prefer to schedule the
261 // instruction that increases the set with the least amount of registers,
262 // which in our case would be SGPRs. This is rarely what we want, so
263 // when we report excess/critical register pressure, we do it either
264 // only for VGPRs or only for SGPRs.
266 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
267 const unsigned MaxVGPRPressureInc
= 16;
268 bool ShouldTrackVGPRs
= VGPRPressure
+ MaxVGPRPressureInc
>= VGPRExcessLimit
;
269 bool ShouldTrackSGPRs
= !ShouldTrackVGPRs
&& SGPRPressure
>= SGPRExcessLimit
;
271 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
272 // to increase the likelihood we don't go over the limits. We should improve
273 // the analysis to look through dependencies to find the path with the least
274 // register pressure.
276 // We only need to update the RPDelta for instructions that increase register
277 // pressure. Instructions that decrease or keep reg pressure the same will be
278 // marked as RegExcess in tryCandidate() when they are compared with
279 // instructions that increase the register pressure.
280 if (ShouldTrackVGPRs
&& NewVGPRPressure
>= VGPRExcessLimit
) {
281 HasHighPressure
= true;
282 Cand
.RPDelta
.Excess
= PressureChange(AMDGPU::RegisterPressureSets::VGPR_32
);
283 Cand
.RPDelta
.Excess
.setUnitInc(NewVGPRPressure
- VGPRExcessLimit
);
286 if (ShouldTrackSGPRs
&& NewSGPRPressure
>= SGPRExcessLimit
) {
287 HasHighPressure
= true;
288 Cand
.RPDelta
.Excess
= PressureChange(AMDGPU::RegisterPressureSets::SReg_32
);
289 Cand
.RPDelta
.Excess
.setUnitInc(NewSGPRPressure
- SGPRExcessLimit
);
292 // Register pressure is considered 'CRITICAL' if it is approaching a value
293 // that would reduce the wave occupancy for the execution unit. When
294 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
295 // has the same cost, so we don't need to prefer one over the other.
297 int SGPRDelta
= NewSGPRPressure
- SGPRCriticalLimit
;
298 int VGPRDelta
= NewVGPRPressure
- VGPRCriticalLimit
;
300 if (SGPRDelta
>= 0 || VGPRDelta
>= 0) {
301 HasHighPressure
= true;
302 if (SGPRDelta
> VGPRDelta
) {
303 Cand
.RPDelta
.CriticalMax
=
304 PressureChange(AMDGPU::RegisterPressureSets::SReg_32
);
305 Cand
.RPDelta
.CriticalMax
.setUnitInc(SGPRDelta
);
307 Cand
.RPDelta
.CriticalMax
=
308 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32
);
309 Cand
.RPDelta
.CriticalMax
.setUnitInc(VGPRDelta
);
314 // This function is mostly cut and pasted from
315 // GenericScheduler::pickNodeFromQueue()
316 void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary
&Zone
,
317 const CandPolicy
&ZonePolicy
,
318 const RegPressureTracker
&RPTracker
,
319 SchedCandidate
&Cand
,
321 const SIRegisterInfo
*SRI
= static_cast<const SIRegisterInfo
*>(TRI
);
322 ArrayRef
<unsigned> Pressure
= RPTracker
.getRegSetPressureAtPos();
323 unsigned SGPRPressure
= 0;
324 unsigned VGPRPressure
= 0;
325 if (DAG
->isTrackingPressure()) {
327 SGPRPressure
= Pressure
[AMDGPU::RegisterPressureSets::SReg_32
];
328 VGPRPressure
= Pressure
[AMDGPU::RegisterPressureSets::VGPR_32
];
330 GCNRPTracker
*T
= IsBottomUp
331 ? static_cast<GCNRPTracker
*>(&UpwardTracker
)
332 : static_cast<GCNRPTracker
*>(&DownwardTracker
);
333 SGPRPressure
= T
->getPressure().getSGPRNum();
334 VGPRPressure
= T
->getPressure().getArchVGPRNum();
337 ReadyQueue
&Q
= Zone
.Available
;
338 for (SUnit
*SU
: Q
) {
340 SchedCandidate
TryCand(ZonePolicy
);
341 initCandidate(TryCand
, SU
, Zone
.isTop(), RPTracker
, SRI
, SGPRPressure
,
342 VGPRPressure
, IsBottomUp
);
343 // Pass SchedBoundary only when comparing nodes from the same boundary.
344 SchedBoundary
*ZoneArg
= Cand
.AtTop
== TryCand
.AtTop
? &Zone
: nullptr;
345 tryCandidate(Cand
, TryCand
, ZoneArg
);
346 if (TryCand
.Reason
!= NoCand
) {
347 // Initialize resource delta if needed in case future heuristics query it.
348 if (TryCand
.ResDelta
== SchedResourceDelta())
349 TryCand
.initResourceDelta(Zone
.DAG
, SchedModel
);
350 Cand
.setBest(TryCand
);
351 LLVM_DEBUG(traceCandidate(Cand
));
356 // This function is mostly cut and pasted from
357 // GenericScheduler::pickNodeBidirectional()
358 SUnit
*GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode
) {
359 // Schedule as far as possible in the direction of no choice. This is most
360 // efficient, but also provides the best heuristics for CriticalPSets.
361 if (SUnit
*SU
= Bot
.pickOnlyChoice()) {
365 if (SUnit
*SU
= Top
.pickOnlyChoice()) {
369 // Set the bottom-up policy based on the state of the current bottom zone and
370 // the instructions outside the zone, including the top zone.
371 CandPolicy BotPolicy
;
372 setPolicy(BotPolicy
, /*IsPostRA=*/false, Bot
, &Top
);
373 // Set the top-down policy based on the state of the current top zone and
374 // the instructions outside the zone, including the bottom zone.
375 CandPolicy TopPolicy
;
376 setPolicy(TopPolicy
, /*IsPostRA=*/false, Top
, &Bot
);
378 // See if BotCand is still valid (because we previously scheduled from Top).
379 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
380 if (!BotCand
.isValid() || BotCand
.SU
->isScheduled
||
381 BotCand
.Policy
!= BotPolicy
) {
382 BotCand
.reset(CandPolicy());
383 pickNodeFromQueue(Bot
, BotPolicy
, DAG
->getBotRPTracker(), BotCand
,
384 /*IsBottomUp=*/true);
385 assert(BotCand
.Reason
!= NoCand
&& "failed to find the first candidate");
387 LLVM_DEBUG(traceCandidate(BotCand
));
389 if (VerifyScheduling
) {
390 SchedCandidate TCand
;
391 TCand
.reset(CandPolicy());
392 pickNodeFromQueue(Bot
, BotPolicy
, DAG
->getBotRPTracker(), TCand
,
393 /*IsBottomUp=*/true);
394 assert(TCand
.SU
== BotCand
.SU
&&
395 "Last pick result should correspond to re-picking right now");
400 // Check if the top Q has a better candidate.
401 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
402 if (!TopCand
.isValid() || TopCand
.SU
->isScheduled
||
403 TopCand
.Policy
!= TopPolicy
) {
404 TopCand
.reset(CandPolicy());
405 pickNodeFromQueue(Top
, TopPolicy
, DAG
->getTopRPTracker(), TopCand
,
406 /*IsBottomUp=*/false);
407 assert(TopCand
.Reason
!= NoCand
&& "failed to find the first candidate");
409 LLVM_DEBUG(traceCandidate(TopCand
));
411 if (VerifyScheduling
) {
412 SchedCandidate TCand
;
413 TCand
.reset(CandPolicy());
414 pickNodeFromQueue(Top
, TopPolicy
, DAG
->getTopRPTracker(), TCand
,
415 /*IsBottomUp=*/false);
416 assert(TCand
.SU
== TopCand
.SU
&&
417 "Last pick result should correspond to re-picking right now");
422 // Pick best from BotCand and TopCand.
423 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand
);
424 dbgs() << "Bot Cand: "; traceCandidate(BotCand
););
425 SchedCandidate Cand
= BotCand
;
426 TopCand
.Reason
= NoCand
;
427 tryCandidate(Cand
, TopCand
, nullptr);
428 if (TopCand
.Reason
!= NoCand
) {
429 Cand
.setBest(TopCand
);
431 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand
););
433 IsTopNode
= Cand
.AtTop
;
437 // This function is mostly cut and pasted from
438 // GenericScheduler::pickNode()
439 SUnit
*GCNSchedStrategy::pickNode(bool &IsTopNode
) {
440 if (DAG
->top() == DAG
->bottom()) {
441 assert(Top
.Available
.empty() && Top
.Pending
.empty() &&
442 Bot
.Available
.empty() && Bot
.Pending
.empty() && "ReadyQ garbage");
447 if (RegionPolicy
.OnlyTopDown
) {
448 SU
= Top
.pickOnlyChoice();
451 TopCand
.reset(NoPolicy
);
452 pickNodeFromQueue(Top
, NoPolicy
, DAG
->getTopRPTracker(), TopCand
,
453 /*IsBottomUp=*/false);
454 assert(TopCand
.Reason
!= NoCand
&& "failed to find a candidate");
458 } else if (RegionPolicy
.OnlyBottomUp
) {
459 SU
= Bot
.pickOnlyChoice();
462 BotCand
.reset(NoPolicy
);
463 pickNodeFromQueue(Bot
, NoPolicy
, DAG
->getBotRPTracker(), BotCand
,
464 /*IsBottomUp=*/true);
465 assert(BotCand
.Reason
!= NoCand
&& "failed to find a candidate");
470 SU
= pickNodeBidirectional(IsTopNode
);
472 } while (SU
->isScheduled
);
474 if (SU
->isTopReady())
476 if (SU
->isBottomReady())
479 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU
->NodeNum
<< ") "
484 void GCNSchedStrategy::schedNode(SUnit
*SU
, bool IsTopNode
) {
486 MachineInstr
*MI
= SU
->getInstr();
487 IsTopNode
? (void)DownwardTracker
.advance(MI
, false)
488 : UpwardTracker
.recede(*MI
);
491 return GenericScheduler::schedNode(SU
, IsTopNode
);
494 GCNSchedStageID
GCNSchedStrategy::getCurrentStage() {
495 assert(CurrentStage
&& CurrentStage
!= SchedStages
.end());
496 return *CurrentStage
;
499 bool GCNSchedStrategy::advanceStage() {
500 assert(CurrentStage
!= SchedStages
.end());
502 CurrentStage
= SchedStages
.begin();
506 return CurrentStage
!= SchedStages
.end();
509 bool GCNSchedStrategy::hasNextStage() const {
510 assert(CurrentStage
);
511 return std::next(CurrentStage
) != SchedStages
.end();
514 GCNSchedStageID
GCNSchedStrategy::getNextStage() const {
515 assert(CurrentStage
&& std::next(CurrentStage
) != SchedStages
.end());
516 return *std::next(CurrentStage
);
519 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
520 const MachineSchedContext
*C
, bool IsLegacyScheduler
)
521 : GCNSchedStrategy(C
) {
522 SchedStages
.push_back(GCNSchedStageID::OccInitialSchedule
);
523 SchedStages
.push_back(GCNSchedStageID::UnclusteredHighRPReschedule
);
524 SchedStages
.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule
);
525 SchedStages
.push_back(GCNSchedStageID::PreRARematerialize
);
526 GCNTrackers
= GCNTrackers
& !IsLegacyScheduler
;
529 GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext
*C
)
530 : GCNSchedStrategy(C
) {
531 SchedStages
.push_back(GCNSchedStageID::ILPInitialSchedule
);
534 bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate
&Cand
,
535 SchedCandidate
&TryCand
,
536 SchedBoundary
*Zone
) const {
537 // Initialize the candidate if needed.
538 if (!Cand
.isValid()) {
539 TryCand
.Reason
= NodeOrder
;
543 // Avoid spilling by exceeding the register limit.
544 if (DAG
->isTrackingPressure() &&
545 tryPressure(TryCand
.RPDelta
.Excess
, Cand
.RPDelta
.Excess
, TryCand
, Cand
,
546 RegExcess
, TRI
, DAG
->MF
))
547 return TryCand
.Reason
!= NoCand
;
549 // Bias PhysReg Defs and copies to their uses and defined respectively.
550 if (tryGreater(biasPhysReg(TryCand
.SU
, TryCand
.AtTop
),
551 biasPhysReg(Cand
.SU
, Cand
.AtTop
), TryCand
, Cand
, PhysReg
))
552 return TryCand
.Reason
!= NoCand
;
554 bool SameBoundary
= Zone
!= nullptr;
556 // Prioritize instructions that read unbuffered resources by stall cycles.
557 if (tryLess(Zone
->getLatencyStallCycles(TryCand
.SU
),
558 Zone
->getLatencyStallCycles(Cand
.SU
), TryCand
, Cand
, Stall
))
559 return TryCand
.Reason
!= NoCand
;
561 // Avoid critical resource consumption and balance the schedule.
562 TryCand
.initResourceDelta(DAG
, SchedModel
);
563 if (tryLess(TryCand
.ResDelta
.CritResources
, Cand
.ResDelta
.CritResources
,
564 TryCand
, Cand
, ResourceReduce
))
565 return TryCand
.Reason
!= NoCand
;
566 if (tryGreater(TryCand
.ResDelta
.DemandedResources
,
567 Cand
.ResDelta
.DemandedResources
, TryCand
, Cand
,
569 return TryCand
.Reason
!= NoCand
;
571 // Unconditionally try to reduce latency.
572 if (tryLatency(TryCand
, Cand
, *Zone
))
573 return TryCand
.Reason
!= NoCand
;
575 // Weak edges are for clustering and other constraints.
576 if (tryLess(getWeakLeft(TryCand
.SU
, TryCand
.AtTop
),
577 getWeakLeft(Cand
.SU
, Cand
.AtTop
), TryCand
, Cand
, Weak
))
578 return TryCand
.Reason
!= NoCand
;
581 // Keep clustered nodes together to encourage downstream peephole
582 // optimizations which may reduce resource requirements.
584 // This is a best effort to set things up for a post-RA pass. Optimizations
585 // like generating loads of multiple registers should ideally be done within
586 // the scheduler pass by combining the loads during DAG postprocessing.
587 const SUnit
*CandNextClusterSU
=
588 Cand
.AtTop
? DAG
->getNextClusterSucc() : DAG
->getNextClusterPred();
589 const SUnit
*TryCandNextClusterSU
=
590 TryCand
.AtTop
? DAG
->getNextClusterSucc() : DAG
->getNextClusterPred();
591 if (tryGreater(TryCand
.SU
== TryCandNextClusterSU
,
592 Cand
.SU
== CandNextClusterSU
, TryCand
, Cand
, Cluster
))
593 return TryCand
.Reason
!= NoCand
;
595 // Avoid increasing the max critical pressure in the scheduled region.
596 if (DAG
->isTrackingPressure() &&
597 tryPressure(TryCand
.RPDelta
.CriticalMax
, Cand
.RPDelta
.CriticalMax
,
598 TryCand
, Cand
, RegCritical
, TRI
, DAG
->MF
))
599 return TryCand
.Reason
!= NoCand
;
601 // Avoid increasing the max pressure of the entire region.
602 if (DAG
->isTrackingPressure() &&
603 tryPressure(TryCand
.RPDelta
.CurrentMax
, Cand
.RPDelta
.CurrentMax
, TryCand
,
604 Cand
, RegMax
, TRI
, DAG
->MF
))
605 return TryCand
.Reason
!= NoCand
;
608 // Fall through to original instruction order.
609 if ((Zone
->isTop() && TryCand
.SU
->NodeNum
< Cand
.SU
->NodeNum
) ||
610 (!Zone
->isTop() && TryCand
.SU
->NodeNum
> Cand
.SU
->NodeNum
)) {
611 TryCand
.Reason
= NodeOrder
;
618 GCNScheduleDAGMILive::GCNScheduleDAGMILive(
619 MachineSchedContext
*C
, std::unique_ptr
<MachineSchedStrategy
> S
)
620 : ScheduleDAGMILive(C
, std::move(S
)), ST(MF
.getSubtarget
<GCNSubtarget
>()),
621 MFI(*MF
.getInfo
<SIMachineFunctionInfo
>()),
622 StartingOccupancy(MFI
.getOccupancy()), MinOccupancy(StartingOccupancy
),
623 RegionLiveOuts(this, /*IsLiveOut=*/true) {
625 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy
<< ".\n");
627 MinOccupancy
= std::min(MFI
.getMinAllowedOccupancy(), StartingOccupancy
);
628 if (MinOccupancy
!= StartingOccupancy
)
629 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
634 std::unique_ptr
<GCNSchedStage
>
635 GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID
) {
636 switch (SchedStageID
) {
637 case GCNSchedStageID::OccInitialSchedule
:
638 return std::make_unique
<OccInitialScheduleStage
>(SchedStageID
, *this);
639 case GCNSchedStageID::UnclusteredHighRPReschedule
:
640 return std::make_unique
<UnclusteredHighRPStage
>(SchedStageID
, *this);
641 case GCNSchedStageID::ClusteredLowOccupancyReschedule
:
642 return std::make_unique
<ClusteredLowOccStage
>(SchedStageID
, *this);
643 case GCNSchedStageID::PreRARematerialize
:
644 return std::make_unique
<PreRARematStage
>(SchedStageID
, *this);
645 case GCNSchedStageID::ILPInitialSchedule
:
646 return std::make_unique
<ILPInitialScheduleStage
>(SchedStageID
, *this);
649 llvm_unreachable("Unknown SchedStageID.");
652 void GCNScheduleDAGMILive::schedule() {
653 // Collect all scheduling regions. The actual scheduling is performed in
654 // GCNScheduleDAGMILive::finalizeSchedule.
655 Regions
.push_back(std::pair(RegionBegin
, RegionEnd
));
659 GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx
) const {
660 GCNDownwardRPTracker
RPTracker(*LIS
);
661 RPTracker
.advance(begin(), end(), &LiveIns
[RegionIdx
]);
662 return RPTracker
.moveMaxPressure();
665 static MachineInstr
*getLastMIForRegion(MachineBasicBlock::iterator RegionBegin
,
666 MachineBasicBlock::iterator RegionEnd
) {
667 auto REnd
= RegionEnd
== RegionBegin
->getParent()->end()
668 ? std::prev(RegionEnd
)
670 return &*skipDebugInstructionsBackward(REnd
, RegionBegin
);
673 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx
,
674 const MachineBasicBlock
*MBB
) {
675 GCNDownwardRPTracker
RPTracker(*LIS
);
677 // If the block has the only successor then live-ins of that successor are
678 // live-outs of the current block. We can reuse calculated live set if the
679 // successor will be sent to scheduling past current block.
681 // However, due to the bug in LiveInterval analysis it may happen that two
682 // predecessors of the same successor block have different lane bitmasks for
683 // a live-out register. Workaround that by sticking to one-to-one relationship
684 // i.e. one predecessor with one successor block.
685 const MachineBasicBlock
*OnlySucc
= nullptr;
686 if (MBB
->succ_size() == 1) {
687 auto *Candidate
= *MBB
->succ_begin();
688 if (!Candidate
->empty() && Candidate
->pred_size() == 1) {
689 SlotIndexes
*Ind
= LIS
->getSlotIndexes();
690 if (Ind
->getMBBStartIdx(MBB
) < Ind
->getMBBStartIdx(Candidate
))
691 OnlySucc
= Candidate
;
695 // Scheduler sends regions from the end of the block upwards.
696 size_t CurRegion
= RegionIdx
;
697 for (size_t E
= Regions
.size(); CurRegion
!= E
; ++CurRegion
)
698 if (Regions
[CurRegion
].first
->getParent() != MBB
)
702 auto I
= MBB
->begin();
703 auto LiveInIt
= MBBLiveIns
.find(MBB
);
704 auto &Rgn
= Regions
[CurRegion
];
705 auto *NonDbgMI
= &*skipDebugInstructionsForward(Rgn
.first
, Rgn
.second
);
706 if (LiveInIt
!= MBBLiveIns
.end()) {
707 auto LiveIn
= std::move(LiveInIt
->second
);
708 RPTracker
.reset(*MBB
->begin(), &LiveIn
);
709 MBBLiveIns
.erase(LiveInIt
);
712 auto LRS
= BBLiveInMap
.lookup(NonDbgMI
);
713 #ifdef EXPENSIVE_CHECKS
714 assert(isEqual(getLiveRegsBefore(*NonDbgMI
, *LIS
), LRS
));
716 RPTracker
.reset(*I
, &LRS
);
720 I
= RPTracker
.getNext();
722 if (Regions
[CurRegion
].first
== I
|| NonDbgMI
== I
) {
723 LiveIns
[CurRegion
] = RPTracker
.getLiveRegs();
724 RPTracker
.clearMaxPressure();
727 if (Regions
[CurRegion
].second
== I
) {
728 Pressure
[CurRegion
] = RPTracker
.moveMaxPressure();
729 if (CurRegion
-- == RegionIdx
)
732 RPTracker
.advanceToNext();
733 RPTracker
.advanceBeforeNext();
737 if (I
!= MBB
->end()) {
738 RPTracker
.advanceToNext();
739 RPTracker
.advance(MBB
->end());
741 RPTracker
.advanceBeforeNext();
742 MBBLiveIns
[OnlySucc
] = RPTracker
.moveLiveRegs();
746 DenseMap
<MachineInstr
*, GCNRPTracker::LiveRegSet
>
747 GCNScheduleDAGMILive::getRegionLiveInMap() const {
748 assert(!Regions
.empty());
749 std::vector
<MachineInstr
*> RegionFirstMIs
;
750 RegionFirstMIs
.reserve(Regions
.size());
751 auto I
= Regions
.rbegin(), E
= Regions
.rend();
752 auto *BB
= I
->first
->getParent();
754 auto *MI
= &*skipDebugInstructionsForward(I
->first
, I
->second
);
755 RegionFirstMIs
.push_back(MI
);
758 } while (I
!= E
&& I
->first
->getParent() == BB
);
760 return getLiveRegMap(RegionFirstMIs
, /*After=*/false, *LIS
);
763 DenseMap
<MachineInstr
*, GCNRPTracker::LiveRegSet
>
764 GCNScheduleDAGMILive::getRegionLiveOutMap() const {
765 assert(!Regions
.empty());
766 std::vector
<MachineInstr
*> RegionLastMIs
;
767 RegionLastMIs
.reserve(Regions
.size());
768 for (auto &[RegionBegin
, RegionEnd
] : reverse(Regions
))
769 RegionLastMIs
.push_back(getLastMIForRegion(RegionBegin
, RegionEnd
));
771 return getLiveRegMap(RegionLastMIs
, /*After=*/true, *LIS
);
774 void RegionPressureMap::buildLiveRegMap() {
775 IdxToInstruction
.clear();
778 IsLiveOut
? DAG
->getRegionLiveOutMap() : DAG
->getRegionLiveInMap();
779 for (unsigned I
= 0; I
< DAG
->Regions
.size(); I
++) {
780 MachineInstr
*RegionKey
=
782 ? getLastMIForRegion(DAG
->Regions
[I
].first
, DAG
->Regions
[I
].second
)
783 : &*DAG
->Regions
[I
].first
;
784 IdxToInstruction
[I
] = RegionKey
;
788 void GCNScheduleDAGMILive::finalizeSchedule() {
789 // Start actual scheduling here. This function is called by the base
790 // MachineScheduler after all regions have been recorded by
791 // GCNScheduleDAGMILive::schedule().
792 LiveIns
.resize(Regions
.size());
793 Pressure
.resize(Regions
.size());
794 RescheduleRegions
.resize(Regions
.size());
795 RegionsWithHighRP
.resize(Regions
.size());
796 RegionsWithExcessRP
.resize(Regions
.size());
797 RegionsWithMinOcc
.resize(Regions
.size());
798 RegionsWithIGLPInstrs
.resize(Regions
.size());
799 RescheduleRegions
.set();
800 RegionsWithHighRP
.reset();
801 RegionsWithExcessRP
.reset();
802 RegionsWithMinOcc
.reset();
803 RegionsWithIGLPInstrs
.reset();
808 void GCNScheduleDAGMILive::runSchedStages() {
809 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
811 if (!Regions
.empty()) {
812 BBLiveInMap
= getRegionLiveInMap();
814 RegionLiveOuts
.buildLiveRegMap();
817 GCNSchedStrategy
&S
= static_cast<GCNSchedStrategy
&>(*SchedImpl
);
818 while (S
.advanceStage()) {
819 auto Stage
= createSchedStage(S
.getCurrentStage());
820 if (!Stage
->initGCNSchedStage())
823 for (auto Region
: Regions
) {
824 RegionBegin
= Region
.first
;
825 RegionEnd
= Region
.second
;
826 // Setup for scheduling the region and check whether it should be skipped.
827 if (!Stage
->initGCNRegion()) {
828 Stage
->advanceRegion();
834 GCNDownwardRPTracker
*DownwardTracker
= S
.getDownwardTracker();
835 GCNUpwardRPTracker
*UpwardTracker
= S
.getUpwardTracker();
836 GCNRPTracker::LiveRegSet
*RegionLiveIns
=
837 &LiveIns
[Stage
->getRegionIdx()];
839 reinterpret_cast<GCNRPTracker
*>(DownwardTracker
)
840 ->reset(MRI
, *RegionLiveIns
);
841 reinterpret_cast<GCNRPTracker
*>(UpwardTracker
)
842 ->reset(MRI
, RegionLiveOuts
.getLiveRegsForRegionIdx(
843 Stage
->getRegionIdx()));
846 ScheduleDAGMILive::schedule();
847 Stage
->finalizeGCNRegion();
850 Stage
->finalizeGCNSchedStage();
855 raw_ostream
&llvm::operator<<(raw_ostream
&OS
, const GCNSchedStageID
&StageID
) {
857 case GCNSchedStageID::OccInitialSchedule
:
858 OS
<< "Max Occupancy Initial Schedule";
860 case GCNSchedStageID::UnclusteredHighRPReschedule
:
861 OS
<< "Unclustered High Register Pressure Reschedule";
863 case GCNSchedStageID::ClusteredLowOccupancyReschedule
:
864 OS
<< "Clustered Low Occupancy Reschedule";
866 case GCNSchedStageID::PreRARematerialize
:
867 OS
<< "Pre-RA Rematerialize";
869 case GCNSchedStageID::ILPInitialSchedule
:
870 OS
<< "Max ILP Initial Schedule";
878 GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID
, GCNScheduleDAGMILive
&DAG
)
879 : DAG(DAG
), S(static_cast<GCNSchedStrategy
&>(*DAG
.SchedImpl
)), MF(DAG
.MF
),
880 MFI(DAG
.MFI
), ST(DAG
.ST
), StageID(StageID
) {}
882 bool GCNSchedStage::initGCNSchedStage() {
886 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID
<< "\n");
890 bool UnclusteredHighRPStage::initGCNSchedStage() {
891 if (DisableUnclusterHighRP
)
894 if (!GCNSchedStage::initGCNSchedStage())
897 if (DAG
.RegionsWithHighRP
.none() && DAG
.RegionsWithExcessRP
.none())
900 SavedMutations
.swap(DAG
.Mutations
);
902 createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry
));
904 InitialOccupancy
= DAG
.MinOccupancy
;
905 // Aggressivly try to reduce register pressure in the unclustered high RP
906 // stage. Temporarily increase occupancy target in the region.
907 S
.SGPRLimitBias
= S
.HighRPSGPRBias
;
908 S
.VGPRLimitBias
= S
.HighRPVGPRBias
;
909 if (MFI
.getMaxWavesPerEU() > DAG
.MinOccupancy
)
910 MFI
.increaseOccupancy(MF
, ++DAG
.MinOccupancy
);
914 << "Retrying function scheduling without clustering. "
915 "Aggressivly try to reduce register pressure to achieve occupancy "
916 << DAG
.MinOccupancy
<< ".\n");
921 bool ClusteredLowOccStage::initGCNSchedStage() {
922 if (DisableClusteredLowOccupancy
)
925 if (!GCNSchedStage::initGCNSchedStage())
928 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
929 // been dropped. All regions will have already been scheduled with the ideal
930 // occupancy targets.
931 if (DAG
.StartingOccupancy
<= DAG
.MinOccupancy
)
935 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
936 << DAG
.MinOccupancy
<< ".\n");
940 bool PreRARematStage::initGCNSchedStage() {
941 if (!GCNSchedStage::initGCNSchedStage())
944 if (DAG
.RegionsWithMinOcc
.none() || DAG
.Regions
.size() == 1)
947 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
948 // Check maximum occupancy
949 if (ST
.computeOccupancy(MF
.getFunction(), MFI
.getLDSSize()) ==
953 // FIXME: This pass will invalidate cached MBBLiveIns for regions
954 // inbetween the defs and region we sinked the def to. Cached pressure
955 // for regions where a def is sinked from will also be invalidated. Will
956 // need to be fixed if there is another pass after this pass.
957 assert(!S
.hasNextStage());
959 collectRematerializableInstructions();
960 if (RematerializableInsts
.empty() || !sinkTriviallyRematInsts(ST
, TII
))
964 dbgs() << "Retrying function scheduling with improved occupancy of "
965 << DAG
.MinOccupancy
<< " from rematerializing\n");
969 void GCNSchedStage::finalizeGCNSchedStage() {
971 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID
<< "\n");
974 void UnclusteredHighRPStage::finalizeGCNSchedStage() {
975 SavedMutations
.swap(DAG
.Mutations
);
976 S
.SGPRLimitBias
= S
.VGPRLimitBias
= 0;
977 if (DAG
.MinOccupancy
> InitialOccupancy
) {
978 for (unsigned IDX
= 0; IDX
< DAG
.Pressure
.size(); ++IDX
)
979 DAG
.RegionsWithMinOcc
[IDX
] =
980 DAG
.Pressure
[IDX
].getOccupancy(DAG
.ST
) == DAG
.MinOccupancy
;
982 LLVM_DEBUG(dbgs() << StageID
983 << " stage successfully increased occupancy to "
984 << DAG
.MinOccupancy
<< '\n');
987 GCNSchedStage::finalizeGCNSchedStage();
990 bool GCNSchedStage::initGCNRegion() {
991 // Check whether this new region is also a new block.
992 if (DAG
.RegionBegin
->getParent() != CurrentMBB
)
995 unsigned NumRegionInstrs
= std::distance(DAG
.begin(), DAG
.end());
996 DAG
.enterRegion(CurrentMBB
, DAG
.begin(), DAG
.end(), NumRegionInstrs
);
998 // Skip empty scheduling regions (0 or 1 schedulable instructions).
999 if (DAG
.begin() == DAG
.end() || DAG
.begin() == std::prev(DAG
.end()))
1002 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
1003 LLVM_DEBUG(dbgs() << MF
.getName() << ":" << printMBBReference(*CurrentMBB
)
1004 << " " << CurrentMBB
->getName()
1005 << "\n From: " << *DAG
.begin() << " To: ";
1006 if (DAG
.RegionEnd
!= CurrentMBB
->end()) dbgs() << *DAG
.RegionEnd
;
1007 else dbgs() << "End";
1008 dbgs() << " RegionInstrs: " << NumRegionInstrs
<< '\n');
1010 // Save original instruction order before scheduling for possible revert.
1012 Unsched
.reserve(DAG
.NumRegionInstrs
);
1013 if (StageID
== GCNSchedStageID::OccInitialSchedule
||
1014 StageID
== GCNSchedStageID::ILPInitialSchedule
) {
1015 for (auto &I
: DAG
) {
1016 Unsched
.push_back(&I
);
1017 if (I
.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER
||
1018 I
.getOpcode() == AMDGPU::IGLP_OPT
)
1019 DAG
.RegionsWithIGLPInstrs
[RegionIdx
] = true;
1023 Unsched
.push_back(&I
);
1026 PressureBefore
= DAG
.Pressure
[RegionIdx
];
1029 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
1030 << print(DAG
.LiveIns
[RegionIdx
], DAG
.MRI
)
1031 << "Region live-in pressure: "
1032 << print(llvm::getRegPressure(DAG
.MRI
, DAG
.LiveIns
[RegionIdx
]))
1033 << "Region register pressure: " << print(PressureBefore
));
1035 S
.HasHighPressure
= false;
1036 S
.KnownExcessRP
= isRegionWithExcessRP();
1038 if (DAG
.RegionsWithIGLPInstrs
[RegionIdx
] &&
1039 StageID
!= GCNSchedStageID::UnclusteredHighRPReschedule
) {
1040 SavedMutations
.clear();
1041 SavedMutations
.swap(DAG
.Mutations
);
1042 bool IsInitialStage
= StageID
== GCNSchedStageID::OccInitialSchedule
||
1043 StageID
== GCNSchedStageID::ILPInitialSchedule
;
1044 DAG
.addMutation(createIGroupLPDAGMutation(
1045 IsInitialStage
? AMDGPU::SchedulingPhase::Initial
1046 : AMDGPU::SchedulingPhase::PreRAReentry
));
1052 bool UnclusteredHighRPStage::initGCNRegion() {
1053 // Only reschedule regions with the minimum occupancy or regions that may have
1054 // spilling (excess register pressure).
1055 if ((!DAG
.RegionsWithMinOcc
[RegionIdx
] ||
1056 DAG
.MinOccupancy
<= InitialOccupancy
) &&
1057 !DAG
.RegionsWithExcessRP
[RegionIdx
])
1060 return GCNSchedStage::initGCNRegion();
1063 bool ClusteredLowOccStage::initGCNRegion() {
1064 // We may need to reschedule this region if it wasn't rescheduled in the last
1065 // stage, or if we found it was testing critical register pressure limits in
1066 // the unclustered reschedule stage. The later is because we may not have been
1067 // able to raise the min occupancy in the previous stage so the region may be
1068 // overly constrained even if it was already rescheduled.
1069 if (!DAG
.RegionsWithHighRP
[RegionIdx
])
1072 return GCNSchedStage::initGCNRegion();
1075 bool PreRARematStage::initGCNRegion() {
1076 if (!DAG
.RescheduleRegions
[RegionIdx
])
1079 return GCNSchedStage::initGCNRegion();
1082 void GCNSchedStage::setupNewBlock() {
1086 CurrentMBB
= DAG
.RegionBegin
->getParent();
1087 DAG
.startBlock(CurrentMBB
);
1088 // Get real RP for the region if it hasn't be calculated before. After the
1089 // initial schedule stage real RP will be collected after scheduling.
1090 if (StageID
== GCNSchedStageID::OccInitialSchedule
||
1091 StageID
== GCNSchedStageID::ILPInitialSchedule
)
1092 DAG
.computeBlockPressure(RegionIdx
, CurrentMBB
);
1095 void GCNSchedStage::finalizeGCNRegion() {
1096 DAG
.Regions
[RegionIdx
] = std::pair(DAG
.RegionBegin
, DAG
.RegionEnd
);
1097 DAG
.RescheduleRegions
[RegionIdx
] = false;
1098 if (S
.HasHighPressure
)
1099 DAG
.RegionsWithHighRP
[RegionIdx
] = true;
1101 // Revert scheduling if we have dropped occupancy or there is some other
1102 // reason that the original schedule is better.
1105 if (DAG
.RegionsWithIGLPInstrs
[RegionIdx
] &&
1106 StageID
!= GCNSchedStageID::UnclusteredHighRPReschedule
)
1107 SavedMutations
.swap(DAG
.Mutations
);
1113 void GCNSchedStage::checkScheduling() {
1114 // Check the results of scheduling.
1115 PressureAfter
= DAG
.getRealRegPressure(RegionIdx
);
1117 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter
));
1118 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx
<< ".\n");
1120 if (PressureAfter
.getSGPRNum() <= S
.SGPRCriticalLimit
&&
1121 PressureAfter
.getVGPRNum(ST
.hasGFX90AInsts()) <= S
.VGPRCriticalLimit
) {
1122 DAG
.Pressure
[RegionIdx
] = PressureAfter
;
1123 DAG
.RegionsWithMinOcc
[RegionIdx
] =
1124 PressureAfter
.getOccupancy(ST
) == DAG
.MinOccupancy
;
1126 // Early out if we have achieved the occupancy target.
1127 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
1131 unsigned TargetOccupancy
=
1132 std::min(S
.getTargetOccupancy(), ST
.getOccupancyWithLocalMemSize(MF
));
1133 unsigned WavesAfter
=
1134 std::min(TargetOccupancy
, PressureAfter
.getOccupancy(ST
));
1135 unsigned WavesBefore
=
1136 std::min(TargetOccupancy
, PressureBefore
.getOccupancy(ST
));
1137 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
1138 << ", after " << WavesAfter
<< ".\n");
1140 // We may not be able to keep the current target occupancy because of the just
1141 // scheduled region. We might still be able to revert scheduling if the
1142 // occupancy before was higher, or if the current schedule has register
1143 // pressure higher than the excess limits which could lead to more spilling.
1144 unsigned NewOccupancy
= std::max(WavesAfter
, WavesBefore
);
1146 // Allow memory bound functions to drop to 4 waves if not limited by an
1148 if (WavesAfter
< WavesBefore
&& WavesAfter
< DAG
.MinOccupancy
&&
1149 WavesAfter
>= MFI
.getMinAllowedOccupancy()) {
1150 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
1151 << MFI
.getMinAllowedOccupancy() << " waves\n");
1152 NewOccupancy
= WavesAfter
;
1155 if (NewOccupancy
< DAG
.MinOccupancy
) {
1156 DAG
.MinOccupancy
= NewOccupancy
;
1157 MFI
.limitOccupancy(DAG
.MinOccupancy
);
1158 DAG
.RegionsWithMinOcc
.reset();
1159 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
1160 << DAG
.MinOccupancy
<< ".\n");
1162 // The maximum number of arch VGPR on non-unified register file, or the
1163 // maximum VGPR + AGPR in the unified register file case.
1164 unsigned MaxVGPRs
= ST
.getMaxNumVGPRs(MF
);
1165 // The maximum number of arch VGPR for both unified and non-unified register
1167 unsigned MaxArchVGPRs
= std::min(MaxVGPRs
, ST
.getAddressableNumArchVGPRs());
1168 unsigned MaxSGPRs
= ST
.getMaxNumSGPRs(MF
);
1170 if (PressureAfter
.getVGPRNum(ST
.hasGFX90AInsts()) > MaxVGPRs
||
1171 PressureAfter
.getVGPRNum(false) > MaxArchVGPRs
||
1172 PressureAfter
.getAGPRNum() > MaxArchVGPRs
||
1173 PressureAfter
.getSGPRNum() > MaxSGPRs
) {
1174 DAG
.RescheduleRegions
[RegionIdx
] = true;
1175 DAG
.RegionsWithHighRP
[RegionIdx
] = true;
1176 DAG
.RegionsWithExcessRP
[RegionIdx
] = true;
1179 // Revert if this region's schedule would cause a drop in occupancy or
1181 if (shouldRevertScheduling(WavesAfter
)) {
1184 DAG
.Pressure
[RegionIdx
] = PressureAfter
;
1185 DAG
.RegionsWithMinOcc
[RegionIdx
] =
1186 PressureAfter
.getOccupancy(ST
) == DAG
.MinOccupancy
;
1191 GCNSchedStage::computeSUnitReadyCycle(const SUnit
&SU
, unsigned CurrCycle
,
1192 DenseMap
<unsigned, unsigned> &ReadyCycles
,
1193 const TargetSchedModel
&SM
) {
1194 unsigned ReadyCycle
= CurrCycle
;
1195 for (auto &D
: SU
.Preds
) {
1196 if (D
.isAssignedRegDep()) {
1197 MachineInstr
*DefMI
= D
.getSUnit()->getInstr();
1198 unsigned Latency
= SM
.computeInstrLatency(DefMI
);
1199 unsigned DefReady
= ReadyCycles
[DAG
.getSUnit(DefMI
)->NodeNum
];
1200 ReadyCycle
= std::max(ReadyCycle
, DefReady
+ Latency
);
1203 ReadyCycles
[SU
.NodeNum
] = ReadyCycle
;
1208 struct EarlierIssuingCycle
{
1209 bool operator()(std::pair
<MachineInstr
*, unsigned> A
,
1210 std::pair
<MachineInstr
*, unsigned> B
) const {
1211 return A
.second
< B
.second
;
1215 static void printScheduleModel(std::set
<std::pair
<MachineInstr
*, unsigned>,
1216 EarlierIssuingCycle
> &ReadyCycles
) {
1217 if (ReadyCycles
.empty())
1219 unsigned BBNum
= ReadyCycles
.begin()->first
->getParent()->getNumber();
1220 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1221 << " ##################\n# Cycle #\t\t\tInstruction "
1225 for (auto &I
: ReadyCycles
) {
1226 if (I
.second
> IPrev
+ 1)
1227 dbgs() << "****************************** BUBBLE OF " << I
.second
- IPrev
1228 << " CYCLES DETECTED ******************************\n\n";
1229 dbgs() << "[ " << I
.second
<< " ] : " << *I
.first
<< "\n";
1236 GCNSchedStage::getScheduleMetrics(const std::vector
<SUnit
> &InputSchedule
) {
1238 std::set
<std::pair
<MachineInstr
*, unsigned>, EarlierIssuingCycle
>
1241 const TargetSchedModel
&SM
= ST
.getInstrInfo()->getSchedModel();
1242 unsigned SumBubbles
= 0;
1243 DenseMap
<unsigned, unsigned> ReadyCycles
;
1244 unsigned CurrCycle
= 0;
1245 for (auto &SU
: InputSchedule
) {
1246 unsigned ReadyCycle
=
1247 computeSUnitReadyCycle(SU
, CurrCycle
, ReadyCycles
, SM
);
1248 SumBubbles
+= ReadyCycle
- CurrCycle
;
1250 ReadyCyclesSorted
.insert(std::make_pair(SU
.getInstr(), ReadyCycle
));
1252 CurrCycle
= ++ReadyCycle
;
1256 printScheduleModel(ReadyCyclesSorted
);
1260 ? (SumBubbles
* ScheduleMetrics::ScaleFactor
) / CurrCycle
1265 return ScheduleMetrics(CurrCycle
, SumBubbles
);
1269 GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive
&DAG
) {
1271 std::set
<std::pair
<MachineInstr
*, unsigned>, EarlierIssuingCycle
>
1274 const TargetSchedModel
&SM
= ST
.getInstrInfo()->getSchedModel();
1275 unsigned SumBubbles
= 0;
1276 DenseMap
<unsigned, unsigned> ReadyCycles
;
1277 unsigned CurrCycle
= 0;
1278 for (auto &MI
: DAG
) {
1279 SUnit
*SU
= DAG
.getSUnit(&MI
);
1282 unsigned ReadyCycle
=
1283 computeSUnitReadyCycle(*SU
, CurrCycle
, ReadyCycles
, SM
);
1284 SumBubbles
+= ReadyCycle
- CurrCycle
;
1286 ReadyCyclesSorted
.insert(std::make_pair(SU
->getInstr(), ReadyCycle
));
1288 CurrCycle
= ++ReadyCycle
;
1292 printScheduleModel(ReadyCyclesSorted
);
1296 ? (SumBubbles
* ScheduleMetrics::ScaleFactor
) / CurrCycle
1301 return ScheduleMetrics(CurrCycle
, SumBubbles
);
1304 bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter
) {
1305 if (WavesAfter
< DAG
.MinOccupancy
)
1311 bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter
) {
1312 if (PressureAfter
== PressureBefore
)
1315 if (GCNSchedStage::shouldRevertScheduling(WavesAfter
))
1318 if (mayCauseSpilling(WavesAfter
))
1324 bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter
) {
1325 // If RP is not reduced in the unclustered reschedule stage, revert to the
1327 if ((WavesAfter
<= PressureBefore
.getOccupancy(ST
) &&
1328 mayCauseSpilling(WavesAfter
)) ||
1329 GCNSchedStage::shouldRevertScheduling(WavesAfter
)) {
1330 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1334 // Do not attempt to relax schedule even more if we are already spilling.
1335 if (isRegionWithExcessRP())
1340 << "\n\t *** In shouldRevertScheduling ***\n"
1341 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1342 ScheduleMetrics MBefore
=
1343 getScheduleMetrics(DAG
.SUnits
);
1346 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1347 ScheduleMetrics MAfter
= getScheduleMetrics(DAG
);
1348 unsigned OldMetric
= MBefore
.getMetric();
1349 unsigned NewMetric
= MAfter
.getMetric();
1350 unsigned WavesBefore
=
1351 std::min(S
.getTargetOccupancy(), PressureBefore
.getOccupancy(ST
));
1353 ((WavesAfter
* ScheduleMetrics::ScaleFactor
) / WavesBefore
*
1354 ((OldMetric
+ ScheduleMetricBias
) * ScheduleMetrics::ScaleFactor
) /
1356 ScheduleMetrics::ScaleFactor
;
1357 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore
<< "\tMetric after "
1358 << MAfter
<< "Profit: " << Profit
<< "\n");
1359 return Profit
< ScheduleMetrics::ScaleFactor
;
1362 bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter
) {
1363 if (PressureAfter
== PressureBefore
)
1366 if (GCNSchedStage::shouldRevertScheduling(WavesAfter
))
1369 if (mayCauseSpilling(WavesAfter
))
1375 bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter
) {
1376 if (GCNSchedStage::shouldRevertScheduling(WavesAfter
))
1379 if (mayCauseSpilling(WavesAfter
))
1385 bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter
) {
1386 if (mayCauseSpilling(WavesAfter
))
1392 bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter
) {
1393 if (WavesAfter
<= MFI
.getMinWavesPerEU() && isRegionWithExcessRP() &&
1394 !PressureAfter
.less(MF
, PressureBefore
)) {
1395 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1402 void GCNSchedStage::revertScheduling() {
1403 DAG
.RegionsWithMinOcc
[RegionIdx
] =
1404 PressureBefore
.getOccupancy(ST
) == DAG
.MinOccupancy
;
1405 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1406 DAG
.RescheduleRegions
[RegionIdx
] =
1408 S
.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule
;
1409 DAG
.RegionEnd
= DAG
.RegionBegin
;
1410 int SkippedDebugInstr
= 0;
1411 for (MachineInstr
*MI
: Unsched
) {
1412 if (MI
->isDebugInstr()) {
1413 ++SkippedDebugInstr
;
1417 if (MI
->getIterator() != DAG
.RegionEnd
) {
1419 DAG
.BB
->insert(DAG
.RegionEnd
, MI
);
1420 if (!MI
->isDebugInstr())
1421 DAG
.LIS
->handleMove(*MI
, true);
1424 // Reset read-undef flags and update them later.
1425 for (auto &Op
: MI
->all_defs())
1426 Op
.setIsUndef(false);
1427 RegisterOperands RegOpers
;
1428 RegOpers
.collect(*MI
, *DAG
.TRI
, DAG
.MRI
, DAG
.ShouldTrackLaneMasks
, false);
1429 if (!MI
->isDebugInstr()) {
1430 if (DAG
.ShouldTrackLaneMasks
) {
1431 // Adjust liveness and add missing dead+read-undef flags.
1432 SlotIndex SlotIdx
= DAG
.LIS
->getInstructionIndex(*MI
).getRegSlot();
1433 RegOpers
.adjustLaneLiveness(*DAG
.LIS
, DAG
.MRI
, SlotIdx
, MI
);
1435 // Adjust for missing dead-def flags.
1436 RegOpers
.detectDeadDefs(*MI
, *DAG
.LIS
);
1439 DAG
.RegionEnd
= MI
->getIterator();
1441 LLVM_DEBUG(dbgs() << "Scheduling " << *MI
);
1444 // After reverting schedule, debug instrs will now be at the end of the block
1445 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1446 // pass debug instrs to the actual end of the scheduling region.
1447 while (SkippedDebugInstr
-- > 0)
1450 // If Unsched.front() instruction is a debug instruction, this will actually
1451 // shrink the region since we moved all debug instructions to the end of the
1452 // block. Find the first instruction that is not a debug instruction.
1453 DAG
.RegionBegin
= Unsched
.front()->getIterator();
1454 if (DAG
.RegionBegin
->isDebugInstr()) {
1455 for (MachineInstr
*MI
: Unsched
) {
1456 if (MI
->isDebugInstr())
1458 DAG
.RegionBegin
= MI
->getIterator();
1463 // Then move the debug instructions back into their correct place and set
1464 // RegionBegin and RegionEnd if needed.
1465 DAG
.placeDebugValues();
1467 DAG
.Regions
[RegionIdx
] = std::pair(DAG
.RegionBegin
, DAG
.RegionEnd
);
1470 void PreRARematStage::collectRematerializableInstructions() {
1471 const SIRegisterInfo
*SRI
= static_cast<const SIRegisterInfo
*>(DAG
.TRI
);
1472 for (unsigned I
= 0, E
= DAG
.MRI
.getNumVirtRegs(); I
!= E
; ++I
) {
1473 Register Reg
= Register::index2VirtReg(I
);
1474 if (!DAG
.LIS
->hasInterval(Reg
))
1477 // TODO: Handle AGPR and SGPR rematerialization
1478 if (!SRI
->isVGPRClass(DAG
.MRI
.getRegClass(Reg
)) ||
1479 !DAG
.MRI
.hasOneDef(Reg
) || !DAG
.MRI
.hasOneNonDBGUse(Reg
))
1482 MachineOperand
*Op
= DAG
.MRI
.getOneDef(Reg
);
1483 MachineInstr
*Def
= Op
->getParent();
1484 if (Op
->getSubReg() != 0 || !isTriviallyReMaterializable(*Def
))
1487 MachineInstr
*UseI
= &*DAG
.MRI
.use_instr_nodbg_begin(Reg
);
1488 if (Def
->getParent() == UseI
->getParent())
1491 // We are only collecting defs that are defined in another block and are
1492 // live-through or used inside regions at MinOccupancy. This means that the
1493 // register must be in the live-in set for the region.
1494 bool AddedToRematList
= false;
1495 for (unsigned I
= 0, E
= DAG
.Regions
.size(); I
!= E
; ++I
) {
1496 auto It
= DAG
.LiveIns
[I
].find(Reg
);
1497 if (It
!= DAG
.LiveIns
[I
].end() && !It
->second
.none()) {
1498 if (DAG
.RegionsWithMinOcc
[I
]) {
1499 RematerializableInsts
[I
][Def
] = UseI
;
1500 AddedToRematList
= true;
1503 // Collect regions with rematerializable reg as live-in to avoid
1504 // searching later when updating RP.
1505 RematDefToLiveInRegions
[Def
].push_back(I
);
1508 if (!AddedToRematList
)
1509 RematDefToLiveInRegions
.erase(Def
);
1513 bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget
&ST
,
1514 const TargetInstrInfo
*TII
) {
1515 // Temporary copies of cached variables we will be modifying and replacing if
1516 // sinking succeeds.
1518 std::pair
<MachineBasicBlock::iterator
, MachineBasicBlock::iterator
>, 32>
1520 DenseMap
<unsigned, GCNRPTracker::LiveRegSet
> NewLiveIns
;
1521 DenseMap
<unsigned, GCNRegPressure
> NewPressure
;
1522 BitVector NewRescheduleRegions
;
1523 LiveIntervals
*LIS
= DAG
.LIS
;
1525 NewRegions
.resize(DAG
.Regions
.size());
1526 NewRescheduleRegions
.resize(DAG
.Regions
.size());
1528 // Collect only regions that has a rematerializable def as a live-in.
1529 SmallSet
<unsigned, 16> ImpactedRegions
;
1530 for (const auto &It
: RematDefToLiveInRegions
)
1531 ImpactedRegions
.insert(It
.second
.begin(), It
.second
.end());
1533 // Make copies of register pressure and live-ins cache that will be updated
1534 // as we rematerialize.
1535 for (auto Idx
: ImpactedRegions
) {
1536 NewPressure
[Idx
] = DAG
.Pressure
[Idx
];
1537 NewLiveIns
[Idx
] = DAG
.LiveIns
[Idx
];
1539 NewRegions
= DAG
.Regions
;
1540 NewRescheduleRegions
.reset();
1542 DenseMap
<MachineInstr
*, MachineInstr
*> InsertedMIToOldDef
;
1543 bool Improved
= false;
1544 for (auto I
: ImpactedRegions
) {
1545 if (!DAG
.RegionsWithMinOcc
[I
])
1549 int VGPRUsage
= NewPressure
[I
].getVGPRNum(ST
.hasGFX90AInsts());
1550 int SGPRUsage
= NewPressure
[I
].getSGPRNum();
1552 // TODO: Handle occupancy drop due to AGPR and SGPR.
1553 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1554 if (ST
.getOccupancyWithNumSGPRs(SGPRUsage
) == DAG
.MinOccupancy
)
1557 // The occupancy of this region could have been improved by a previous
1558 // iteration's sinking of defs.
1559 if (NewPressure
[I
].getOccupancy(ST
) > DAG
.MinOccupancy
) {
1560 NewRescheduleRegions
[I
] = true;
1565 // First check if we have enough trivially rematerializable instructions to
1566 // improve occupancy. Optimistically assume all instructions we are able to
1567 // sink decreased RP.
1568 int TotalSinkableRegs
= 0;
1569 for (const auto &It
: RematerializableInsts
[I
]) {
1570 MachineInstr
*Def
= It
.first
;
1571 Register DefReg
= Def
->getOperand(0).getReg();
1572 TotalSinkableRegs
+=
1573 SIRegisterInfo::getNumCoveredRegs(NewLiveIns
[I
][DefReg
]);
1575 int VGPRsAfterSink
= VGPRUsage
- TotalSinkableRegs
;
1576 unsigned OptimisticOccupancy
= ST
.getOccupancyWithNumVGPRs(VGPRsAfterSink
);
1577 // If in the most optimistic scenario, we cannot improve occupancy, then do
1578 // not attempt to sink any instructions.
1579 if (OptimisticOccupancy
<= DAG
.MinOccupancy
)
1582 unsigned ImproveOccupancy
= 0;
1583 SmallVector
<MachineInstr
*, 4> SinkedDefs
;
1584 for (auto &It
: RematerializableInsts
[I
]) {
1585 MachineInstr
*Def
= It
.first
;
1586 MachineBasicBlock::iterator InsertPos
=
1587 MachineBasicBlock::iterator(It
.second
);
1588 Register Reg
= Def
->getOperand(0).getReg();
1589 // Rematerialize MI to its use block. Since we are only rematerializing
1590 // instructions that do not have any virtual reg uses, we do not need to
1591 // call LiveRangeEdit::allUsesAvailableAt() and
1592 // LiveRangeEdit::canRematerializeAt().
1593 TII
->reMaterialize(*InsertPos
->getParent(), InsertPos
, Reg
,
1594 Def
->getOperand(0).getSubReg(), *Def
, *DAG
.TRI
);
1595 MachineInstr
*NewMI
= &*std::prev(InsertPos
);
1596 LIS
->InsertMachineInstrInMaps(*NewMI
);
1597 LIS
->removeInterval(Reg
);
1598 LIS
->createAndComputeVirtRegInterval(Reg
);
1599 InsertedMIToOldDef
[NewMI
] = Def
;
1601 // Update region boundaries in scheduling region we sinked from since we
1602 // may sink an instruction that was at the beginning or end of its region
1603 DAG
.updateRegionBoundaries(NewRegions
, Def
, /*NewMI =*/nullptr,
1604 /*Removing =*/true);
1606 // Update region boundaries in region we sinked to.
1607 DAG
.updateRegionBoundaries(NewRegions
, InsertPos
, NewMI
);
1609 LaneBitmask PrevMask
= NewLiveIns
[I
][Reg
];
1610 // FIXME: Also update cached pressure for where the def was sinked from.
1611 // Update RP for all regions that has this reg as a live-in and remove
1612 // the reg from all regions as a live-in.
1613 for (auto Idx
: RematDefToLiveInRegions
[Def
]) {
1614 NewLiveIns
[Idx
].erase(Reg
);
1615 if (InsertPos
->getParent() != DAG
.Regions
[Idx
].first
->getParent()) {
1616 // Def is live-through and not used in this block.
1617 NewPressure
[Idx
].inc(Reg
, PrevMask
, LaneBitmask::getNone(), DAG
.MRI
);
1619 // Def is used and rematerialized into this block.
1620 GCNDownwardRPTracker
RPT(*LIS
);
1621 auto *NonDbgMI
= &*skipDebugInstructionsForward(
1622 NewRegions
[Idx
].first
, NewRegions
[Idx
].second
);
1623 RPT
.reset(*NonDbgMI
, &NewLiveIns
[Idx
]);
1624 RPT
.advance(NewRegions
[Idx
].second
);
1625 NewPressure
[Idx
] = RPT
.moveMaxPressure();
1629 SinkedDefs
.push_back(Def
);
1630 ImproveOccupancy
= NewPressure
[I
].getOccupancy(ST
);
1631 if (ImproveOccupancy
> DAG
.MinOccupancy
)
1635 // Remove defs we just sinked from all regions' list of sinkable defs
1636 for (auto &Def
: SinkedDefs
)
1637 for (auto TrackedIdx
: RematDefToLiveInRegions
[Def
])
1638 RematerializableInsts
[TrackedIdx
].erase(Def
);
1640 if (ImproveOccupancy
<= DAG
.MinOccupancy
)
1643 NewRescheduleRegions
[I
] = true;
1648 // Occupancy was not improved for all regions that were at MinOccupancy.
1649 // Undo sinking and remove newly rematerialized instructions.
1650 for (auto &Entry
: InsertedMIToOldDef
) {
1651 MachineInstr
*MI
= Entry
.first
;
1652 MachineInstr
*OldMI
= Entry
.second
;
1653 Register Reg
= MI
->getOperand(0).getReg();
1654 LIS
->RemoveMachineInstrFromMaps(*MI
);
1655 MI
->eraseFromParent();
1656 OldMI
->clearRegisterDeads(Reg
);
1657 LIS
->removeInterval(Reg
);
1658 LIS
->createAndComputeVirtRegInterval(Reg
);
1663 // Occupancy was improved for all regions.
1664 for (auto &Entry
: InsertedMIToOldDef
) {
1665 MachineInstr
*MI
= Entry
.first
;
1666 MachineInstr
*OldMI
= Entry
.second
;
1668 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1669 DAG
.BBLiveInMap
.erase(OldMI
);
1671 // Remove OldMI and update LIS
1672 Register Reg
= MI
->getOperand(0).getReg();
1673 LIS
->RemoveMachineInstrFromMaps(*OldMI
);
1674 OldMI
->eraseFromParent();
1675 LIS
->removeInterval(Reg
);
1676 LIS
->createAndComputeVirtRegInterval(Reg
);
1679 // Update live-ins, register pressure, and regions caches.
1680 for (auto Idx
: ImpactedRegions
) {
1681 DAG
.LiveIns
[Idx
] = NewLiveIns
[Idx
];
1682 DAG
.Pressure
[Idx
] = NewPressure
[Idx
];
1683 DAG
.MBBLiveIns
.erase(DAG
.Regions
[Idx
].first
->getParent());
1685 DAG
.Regions
= NewRegions
;
1686 DAG
.RescheduleRegions
= NewRescheduleRegions
;
1689 DAG
.RegionLiveOuts
.buildLiveRegMap();
1691 SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
1692 MFI
.increaseOccupancy(MF
, ++DAG
.MinOccupancy
);
1697 // Copied from MachineLICM
1698 bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr
&MI
) {
1699 if (!DAG
.TII
->isTriviallyReMaterializable(MI
))
1702 for (const MachineOperand
&MO
: MI
.all_uses())
1703 if (MO
.getReg().isVirtual())
1709 // When removing, we will have to check both beginning and ending of the region.
1710 // When inserting, we will only have to check if we are inserting NewMI in front
1711 // of a scheduling region and do not need to check the ending since we will only
1712 // ever be inserting before an already existing MI.
1713 void GCNScheduleDAGMILive::updateRegionBoundaries(
1714 SmallVectorImpl
<std::pair
<MachineBasicBlock::iterator
,
1715 MachineBasicBlock::iterator
>> &RegionBoundaries
,
1716 MachineBasicBlock::iterator MI
, MachineInstr
*NewMI
, bool Removing
) {
1717 unsigned I
= 0, E
= RegionBoundaries
.size();
1718 // Search for first region of the block where MI is located
1719 while (I
!= E
&& MI
->getParent() != RegionBoundaries
[I
].first
->getParent())
1722 for (; I
!= E
; ++I
) {
1723 if (MI
->getParent() != RegionBoundaries
[I
].first
->getParent())
1726 if (Removing
&& MI
== RegionBoundaries
[I
].first
&&
1727 MI
== RegionBoundaries
[I
].second
) {
1728 // MI is in a region with size 1, after removing, the region will be
1729 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1730 RegionBoundaries
[I
] =
1731 std::pair(MI
->getParent()->end(), MI
->getParent()->end());
1734 if (MI
== RegionBoundaries
[I
].first
) {
1736 RegionBoundaries
[I
] =
1737 std::pair(std::next(MI
), RegionBoundaries
[I
].second
);
1739 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1740 RegionBoundaries
[I
] = std::pair(MachineBasicBlock::iterator(NewMI
),
1741 RegionBoundaries
[I
].second
);
1744 if (Removing
&& MI
== RegionBoundaries
[I
].second
) {
1745 RegionBoundaries
[I
] = std::pair(RegionBoundaries
[I
].first
, std::prev(MI
));
1751 static bool hasIGLPInstrs(ScheduleDAGInstrs
*DAG
) {
1752 return any_of(*DAG
, [](MachineBasicBlock::iterator MI
) {
1753 unsigned Opc
= MI
->getOpcode();
1754 return Opc
== AMDGPU::SCHED_GROUP_BARRIER
|| Opc
== AMDGPU::IGLP_OPT
;
1758 GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1759 MachineSchedContext
*C
, std::unique_ptr
<MachineSchedStrategy
> S
,
1760 bool RemoveKillFlags
)
1761 : ScheduleDAGMI(C
, std::move(S
), RemoveKillFlags
) {}
1763 void GCNPostScheduleDAGMILive::schedule() {
1764 HasIGLPInstrs
= hasIGLPInstrs(this);
1765 if (HasIGLPInstrs
) {
1766 SavedMutations
.clear();
1767 SavedMutations
.swap(Mutations
);
1768 addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA
));
1771 ScheduleDAGMI::schedule();
1774 void GCNPostScheduleDAGMILive::finalizeSchedule() {
1776 SavedMutations
.swap(Mutations
);
1778 ScheduleDAGMI::finalizeSchedule();