1 //===- GCNIterativeScheduler.cpp ------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the class GCNIterativeScheduler.
12 //===----------------------------------------------------------------------===//
14 #include "GCNIterativeScheduler.h"
15 #include "GCNSchedStrategy.h"
16 #include "SIMachineFunctionInfo.h"
20 #define DEBUG_TYPE "machine-scheduler"
24 std::vector
<const SUnit
*> makeMinRegSchedule(ArrayRef
<const SUnit
*> TopRoots
,
25 const ScheduleDAG
&DAG
);
27 std::vector
<const SUnit
*> makeGCNILPScheduler(ArrayRef
<const SUnit
*> BotRoots
,
28 const ScheduleDAG
&DAG
);
31 // shim accessors for different order containers
32 static inline MachineInstr
*getMachineInstr(MachineInstr
*MI
) {
35 static inline MachineInstr
*getMachineInstr(const SUnit
*SU
) {
36 return SU
->getInstr();
38 static inline MachineInstr
*getMachineInstr(const SUnit
&SU
) {
42 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
44 static void printRegion(raw_ostream
&OS
,
45 MachineBasicBlock::iterator Begin
,
46 MachineBasicBlock::iterator End
,
47 const LiveIntervals
*LIS
,
49 std::numeric_limits
<unsigned>::max()) {
50 auto BB
= Begin
->getParent();
51 OS
<< BB
->getParent()->getName() << ":" << printMBBReference(*BB
) << ' '
52 << BB
->getName() << ":\n";
54 MaxInstNum
= std::max(MaxInstNum
, 1u);
55 for (; I
!= End
&& MaxInstNum
; ++I
, --MaxInstNum
) {
56 if (!I
->isDebugInstr() && LIS
)
57 OS
<< LIS
->getInstructionIndex(*I
);
63 if (!I
->isDebugInstr() && LIS
)
64 OS
<< LIS
->getInstructionIndex(*I
);
67 if (End
!= BB
->end()) { // print boundary inst if present
69 if (LIS
) OS
<< LIS
->getInstructionIndex(*End
) << '\t';
75 static void printLivenessInfo(raw_ostream
&OS
,
76 MachineBasicBlock::iterator Begin
,
77 MachineBasicBlock::iterator End
,
78 const LiveIntervals
*LIS
) {
79 const auto BB
= Begin
->getParent();
80 const auto &MRI
= BB
->getParent()->getRegInfo();
82 const auto LiveIns
= getLiveRegsBefore(*Begin
, *LIS
);
84 getRegPressure(MRI
, LiveIns
).print(OS
);
86 const auto BottomMI
= End
== BB
->end() ? std::prev(End
) : End
;
87 const auto LiveOuts
= getLiveRegsAfter(*BottomMI
, *LIS
);
89 getRegPressure(MRI
, LiveOuts
).print(OS
);
93 void GCNIterativeScheduler::printRegions(raw_ostream
&OS
) const {
94 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
95 for (const auto R
: Regions
) {
96 OS
<< "Region to schedule ";
97 printRegion(OS
, R
->Begin
, R
->End
, LIS
, 1);
98 printLivenessInfo(OS
, R
->Begin
, R
->End
, LIS
);
100 R
->MaxPressure
.print(OS
, &ST
);
105 void GCNIterativeScheduler::printSchedResult(raw_ostream
&OS
,
107 const GCNRegPressure
&RP
) const {
108 OS
<< "\nAfter scheduling ";
109 printRegion(OS
, R
->Begin
, R
->End
, LIS
);
110 printSchedRP(OS
, R
->MaxPressure
, RP
);
115 void GCNIterativeScheduler::printSchedRP(raw_ostream
&OS
,
116 const GCNRegPressure
&Before
,
117 const GCNRegPressure
&After
) const {
118 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
120 Before
.print(OS
, &ST
);
122 After
.print(OS
, &ST
);
126 // DAG builder helper
127 class GCNIterativeScheduler::BuildDAG
{
128 GCNIterativeScheduler
&Sch
;
129 SmallVector
<SUnit
*, 8> TopRoots
;
131 SmallVector
<SUnit
*, 8> BotRoots
;
133 BuildDAG(const Region
&R
, GCNIterativeScheduler
&_Sch
)
135 auto BB
= R
.Begin
->getParent();
136 Sch
.BaseClass::startBlock(BB
);
137 Sch
.BaseClass::enterRegion(BB
, R
.Begin
, R
.End
, R
.NumRegionInstrs
);
139 Sch
.buildSchedGraph(Sch
.AA
, nullptr, nullptr, nullptr,
140 /*TrackLaneMask*/true);
141 Sch
.Topo
.InitDAGTopologicalSorting();
142 Sch
.findRootsAndBiasEdges(TopRoots
, BotRoots
);
146 Sch
.BaseClass::exitRegion();
147 Sch
.BaseClass::finishBlock();
150 ArrayRef
<const SUnit
*> getTopRoots() const {
153 ArrayRef
<SUnit
*> getBottomRoots() const {
158 class GCNIterativeScheduler::OverrideLegacyStrategy
{
159 GCNIterativeScheduler
&Sch
;
161 std::unique_ptr
<MachineSchedStrategy
> SaveSchedImpl
;
162 GCNRegPressure SaveMaxRP
;
165 OverrideLegacyStrategy(Region
&R
,
166 MachineSchedStrategy
&OverrideStrategy
,
167 GCNIterativeScheduler
&_Sch
)
170 , SaveSchedImpl(std::move(_Sch
.SchedImpl
))
171 , SaveMaxRP(R
.MaxPressure
) {
172 Sch
.SchedImpl
.reset(&OverrideStrategy
);
173 auto BB
= R
.Begin
->getParent();
174 Sch
.BaseClass::startBlock(BB
);
175 Sch
.BaseClass::enterRegion(BB
, R
.Begin
, R
.End
, R
.NumRegionInstrs
);
178 ~OverrideLegacyStrategy() {
179 Sch
.BaseClass::exitRegion();
180 Sch
.BaseClass::finishBlock();
181 Sch
.SchedImpl
.release();
182 Sch
.SchedImpl
= std::move(SaveSchedImpl
);
186 assert(Sch
.RegionBegin
== Rgn
.Begin
&& Sch
.RegionEnd
== Rgn
.End
);
187 LLVM_DEBUG(dbgs() << "\nScheduling ";
188 printRegion(dbgs(), Rgn
.Begin
, Rgn
.End
, Sch
.LIS
, 2));
189 Sch
.BaseClass::schedule();
191 // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
192 Sch
.RegionEnd
= Rgn
.End
;
193 //assert(Rgn.End == Sch.RegionEnd);
194 Rgn
.Begin
= Sch
.RegionBegin
;
195 Rgn
.MaxPressure
.clear();
198 void restoreOrder() {
199 assert(Sch
.RegionBegin
== Rgn
.Begin
&& Sch
.RegionEnd
== Rgn
.End
);
200 // DAG SUnits are stored using original region's order
201 // so just use SUnits as the restoring schedule
202 Sch
.scheduleRegion(Rgn
, Sch
.SUnits
, SaveMaxRP
);
208 // just a stub to make base class happy
209 class SchedStrategyStub
: public MachineSchedStrategy
{
211 bool shouldTrackPressure() const override
{ return false; }
212 bool shouldTrackLaneMasks() const override
{ return false; }
213 void initialize(ScheduleDAGMI
*DAG
) override
{}
214 SUnit
*pickNode(bool &IsTopNode
) override
{ return nullptr; }
215 void schedNode(SUnit
*SU
, bool IsTopNode
) override
{}
216 void releaseTopNode(SUnit
*SU
) override
{}
217 void releaseBottomNode(SUnit
*SU
) override
{}
220 } // end anonymous namespace
222 GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext
*C
,
224 : BaseClass(C
, std::make_unique
<SchedStrategyStub
>())
230 // returns max pressure for a region
232 GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin
,
233 MachineBasicBlock::iterator End
)
235 // For the purpose of pressure tracking bottom inst of the region should
236 // be also processed. End is either BB end, BB terminator inst or sched
238 auto const BBEnd
= Begin
->getParent()->end();
239 auto const BottomMI
= End
== BBEnd
? std::prev(End
) : End
;
241 // scheduleRegions walks bottom to top, so its likely we just get next
242 // instruction to track
243 auto AfterBottomMI
= std::next(BottomMI
);
244 if (AfterBottomMI
== BBEnd
||
245 &*AfterBottomMI
!= UPTracker
.getLastTrackedMI()) {
246 UPTracker
.reset(*BottomMI
);
248 assert(UPTracker
.isValid());
251 for (auto I
= BottomMI
; I
!= Begin
; --I
)
252 UPTracker
.recede(*I
);
254 UPTracker
.recede(*Begin
);
256 assert(UPTracker
.isValid() ||
257 (dbgs() << "Tracked region ",
258 printRegion(dbgs(), Begin
, End
, LIS
), false));
259 return UPTracker
.moveMaxPressure();
262 // returns max pressure for a tentative schedule
263 template <typename Range
> GCNRegPressure
264 GCNIterativeScheduler::getSchedulePressure(const Region
&R
,
265 Range
&&Schedule
) const {
266 auto const BBEnd
= R
.Begin
->getParent()->end();
267 GCNUpwardRPTracker
RPTracker(*LIS
);
268 if (R
.End
!= BBEnd
) {
269 // R.End points to the boundary instruction but the
270 // schedule doesn't include it
271 RPTracker
.reset(*R
.End
);
272 RPTracker
.recede(*R
.End
);
274 // R.End doesn't point to the boundary instruction
275 RPTracker
.reset(*std::prev(BBEnd
));
277 for (auto I
= Schedule
.end(), B
= Schedule
.begin(); I
!= B
;) {
278 RPTracker
.recede(*getMachineInstr(*--I
));
280 return RPTracker
.moveMaxPressure();
283 void GCNIterativeScheduler::enterRegion(MachineBasicBlock
*BB
, // overriden
284 MachineBasicBlock::iterator Begin
,
285 MachineBasicBlock::iterator End
,
286 unsigned NumRegionInstrs
) {
287 BaseClass::enterRegion(BB
, Begin
, End
, NumRegionInstrs
);
288 if (NumRegionInstrs
> 2) {
290 new (Alloc
.Allocate())
291 Region
{ Begin
, End
, NumRegionInstrs
,
292 getRegionPressure(Begin
, End
), nullptr });
296 void GCNIterativeScheduler::schedule() { // overriden
298 LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin
, RegionEnd
, LIS
);
299 if (!Regions
.empty() && Regions
.back()->Begin
== RegionBegin
) {
300 dbgs() << "Max RP: ";
301 Regions
.back()->MaxPressure
.print(
302 dbgs(), &MF
.getSubtarget
<GCNSubtarget
>());
307 void GCNIterativeScheduler::finalizeSchedule() { // overriden
311 case SCHEDULE_MINREGONLY
: scheduleMinReg(); break;
312 case SCHEDULE_MINREGFORCED
: scheduleMinReg(true); break;
313 case SCHEDULE_LEGACYMAXOCCUPANCY
: scheduleLegacyMaxOccupancy(); break;
314 case SCHEDULE_ILP
: scheduleILP(false); break;
318 // Detach schedule from SUnits and interleave it with debug values.
319 // Returned schedule becomes independent of DAG state.
320 std::vector
<MachineInstr
*>
321 GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule
) const {
322 std::vector
<MachineInstr
*> Res
;
323 Res
.reserve(Schedule
.size() * 2);
326 Res
.push_back(FirstDbgValue
);
328 const auto DbgB
= DbgValues
.begin(), DbgE
= DbgValues
.end();
329 for (auto SU
: Schedule
) {
330 Res
.push_back(SU
->getInstr());
331 const auto &D
= std::find_if(DbgB
, DbgE
, [SU
](decltype(*DbgB
) &P
) {
332 return P
.second
== SU
->getInstr();
335 Res
.push_back(D
->first
);
340 void GCNIterativeScheduler::setBestSchedule(Region
&R
,
341 ScheduleRef Schedule
,
342 const GCNRegPressure
&MaxRP
) {
343 R
.BestSchedule
.reset(
344 new TentativeSchedule
{ detachSchedule(Schedule
), MaxRP
});
347 void GCNIterativeScheduler::scheduleBest(Region
&R
) {
348 assert(R
.BestSchedule
.get() && "No schedule specified");
349 scheduleRegion(R
, R
.BestSchedule
->Schedule
, R
.BestSchedule
->MaxPressure
);
350 R
.BestSchedule
.reset();
353 // minimal required region scheduler, works for ranges of SUnits*,
354 // SUnits or MachineIntrs*
355 template <typename Range
>
356 void GCNIterativeScheduler::scheduleRegion(Region
&R
, Range
&&Schedule
,
357 const GCNRegPressure
&MaxRP
) {
358 assert(RegionBegin
== R
.Begin
&& RegionEnd
== R
.End
);
359 assert(LIS
!= nullptr);
361 const auto SchedMaxRP
= getSchedulePressure(R
, Schedule
);
363 auto BB
= R
.Begin
->getParent();
365 for (const auto &I
: Schedule
) {
366 auto MI
= getMachineInstr(I
);
370 if (!MI
->isDebugInstr())
371 LIS
->handleMove(*MI
, true);
373 if (!MI
->isDebugInstr()) {
374 // Reset read - undef flags and update them later.
375 for (auto &Op
: MI
->operands())
376 if (Op
.isReg() && Op
.isDef())
377 Op
.setIsUndef(false);
379 RegisterOperands RegOpers
;
380 RegOpers
.collect(*MI
, *TRI
, MRI
, /*ShouldTrackLaneMasks*/true,
381 /*IgnoreDead*/false);
382 // Adjust liveness and add missing dead+read-undef flags.
383 auto SlotIdx
= LIS
->getInstructionIndex(*MI
).getRegSlot();
384 RegOpers
.adjustLaneLiveness(*LIS
, MRI
, SlotIdx
, MI
);
386 Top
= std::next(MI
->getIterator());
388 RegionBegin
= getMachineInstr(Schedule
.front());
390 // Schedule consisting of MachineInstr* is considered 'detached'
391 // and already interleaved with debug values
392 if (!std::is_same
<decltype(*Schedule
.begin()), MachineInstr
*>::value
) {
394 // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
395 //assert(R.End == RegionEnd);
399 R
.Begin
= RegionBegin
;
400 R
.MaxPressure
= MaxRP
;
403 const auto RegionMaxRP
= getRegionPressure(R
);
404 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
406 assert((SchedMaxRP
== RegionMaxRP
&& (MaxRP
.empty() || SchedMaxRP
== MaxRP
))
407 || (dbgs() << "Max RP mismatch!!!\n"
408 "RP for schedule (calculated): ",
409 SchedMaxRP
.print(dbgs(), &ST
),
410 dbgs() << "RP for schedule (reported): ",
411 MaxRP
.print(dbgs(), &ST
),
412 dbgs() << "RP after scheduling: ",
413 RegionMaxRP
.print(dbgs(), &ST
),
417 // Sort recorded regions by pressure - highest at the front
418 void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc
) {
419 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
420 llvm::sort(Regions
, [&ST
, TargetOcc
](const Region
*R1
, const Region
*R2
) {
421 return R2
->MaxPressure
.less(ST
, R1
->MaxPressure
, TargetOcc
);
425 ///////////////////////////////////////////////////////////////////////////////
426 // Legacy MaxOccupancy Strategy
428 // Tries to increase occupancy applying minreg scheduler for a sequence of
429 // most demanding regions. Obtained schedules are saved as BestSchedule for a
431 // TargetOcc is the best achievable occupancy for a kernel.
432 // Returns better occupancy on success or current occupancy on fail.
433 // BestSchedules aren't deleted on fail.
434 unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc
) {
435 // TODO: assert Regions are sorted descending by pressure
436 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
437 const auto Occ
= Regions
.front()->MaxPressure
.getOccupancy(ST
);
438 LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
439 << ", current = " << Occ
<< '\n');
441 auto NewOcc
= TargetOcc
;
442 for (auto R
: Regions
) {
443 if (R
->MaxPressure
.getOccupancy(ST
) >= NewOcc
)
446 LLVM_DEBUG(printRegion(dbgs(), R
->Begin
, R
->End
, LIS
, 3);
447 printLivenessInfo(dbgs(), R
->Begin
, R
->End
, LIS
));
449 BuildDAG
DAG(*R
, *this);
450 const auto MinSchedule
= makeMinRegSchedule(DAG
.getTopRoots(), *this);
451 const auto MaxRP
= getSchedulePressure(*R
, MinSchedule
);
452 LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
453 printSchedRP(dbgs(), R
->MaxPressure
, MaxRP
));
455 NewOcc
= std::min(NewOcc
, MaxRP
.getOccupancy(ST
));
459 setBestSchedule(*R
, MinSchedule
, MaxRP
);
461 LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
462 << ", prev occupancy = " << Occ
<< '\n');
464 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
465 MFI
->increaseOccupancy(MF
, NewOcc
);
468 return std::max(NewOcc
, Occ
);
471 void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
472 bool TryMaximizeOccupancy
) {
473 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
474 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
475 auto TgtOcc
= MFI
->getMinAllowedOccupancy();
477 sortRegionsByPressure(TgtOcc
);
478 auto Occ
= Regions
.front()->MaxPressure
.getOccupancy(ST
);
480 if (TryMaximizeOccupancy
&& Occ
< TgtOcc
)
481 Occ
= tryMaximizeOccupancy(TgtOcc
);
483 // This is really weird but for some magic scheduling regions twice
484 // gives performance improvement
485 const int NumPasses
= Occ
< TgtOcc
? 2 : 1;
487 TgtOcc
= std::min(Occ
, TgtOcc
);
488 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
489 "target occupancy = "
491 GCNMaxOccupancySchedStrategy
LStrgy(Context
);
492 unsigned FinalOccupancy
= std::min(Occ
, MFI
->getOccupancy());
494 for (int I
= 0; I
< NumPasses
; ++I
) {
495 // running first pass with TargetOccupancy = 0 mimics previous scheduling
496 // approach and is a performance magic
497 LStrgy
.setTargetOccupancy(I
== 0 ? 0 : TgtOcc
);
498 for (auto R
: Regions
) {
499 OverrideLegacyStrategy
Ovr(*R
, LStrgy
, *this);
502 const auto RP
= getRegionPressure(*R
);
503 LLVM_DEBUG(printSchedRP(dbgs(), R
->MaxPressure
, RP
));
505 if (RP
.getOccupancy(ST
) < TgtOcc
) {
506 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc
);
507 if (R
->BestSchedule
.get() &&
508 R
->BestSchedule
->MaxPressure
.getOccupancy(ST
) >= TgtOcc
) {
509 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
512 LLVM_DEBUG(dbgs() << ", restoring\n");
514 assert(R
->MaxPressure
.getOccupancy(ST
) >= TgtOcc
);
517 FinalOccupancy
= std::min(FinalOccupancy
, RP
.getOccupancy(ST
));
520 MFI
->limitOccupancy(FinalOccupancy
);
523 ///////////////////////////////////////////////////////////////////////////////
524 // Minimal Register Strategy
526 void GCNIterativeScheduler::scheduleMinReg(bool force
) {
527 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
528 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
529 const auto TgtOcc
= MFI
->getOccupancy();
530 sortRegionsByPressure(TgtOcc
);
532 auto MaxPressure
= Regions
.front()->MaxPressure
;
533 for (auto R
: Regions
) {
534 if (!force
&& R
->MaxPressure
.less(ST
, MaxPressure
, TgtOcc
))
537 BuildDAG
DAG(*R
, *this);
538 const auto MinSchedule
= makeMinRegSchedule(DAG
.getTopRoots(), *this);
540 const auto RP
= getSchedulePressure(*R
, MinSchedule
);
541 LLVM_DEBUG(if (R
->MaxPressure
.less(ST
, RP
, TgtOcc
)) {
542 dbgs() << "\nWarning: Pressure becomes worse after minreg!";
543 printSchedRP(dbgs(), R
->MaxPressure
, RP
);
546 if (!force
&& MaxPressure
.less(ST
, RP
, TgtOcc
))
549 scheduleRegion(*R
, MinSchedule
, RP
);
550 LLVM_DEBUG(printSchedResult(dbgs(), R
, RP
));
556 ///////////////////////////////////////////////////////////////////////////////
557 // ILP scheduler port
559 void GCNIterativeScheduler::scheduleILP(
560 bool TryMaximizeOccupancy
) {
561 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
562 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
563 auto TgtOcc
= MFI
->getMinAllowedOccupancy();
565 sortRegionsByPressure(TgtOcc
);
566 auto Occ
= Regions
.front()->MaxPressure
.getOccupancy(ST
);
568 if (TryMaximizeOccupancy
&& Occ
< TgtOcc
)
569 Occ
= tryMaximizeOccupancy(TgtOcc
);
571 TgtOcc
= std::min(Occ
, TgtOcc
);
572 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
573 "target occupancy = "
576 unsigned FinalOccupancy
= std::min(Occ
, MFI
->getOccupancy());
577 for (auto R
: Regions
) {
578 BuildDAG
DAG(*R
, *this);
579 const auto ILPSchedule
= makeGCNILPScheduler(DAG
.getBottomRoots(), *this);
581 const auto RP
= getSchedulePressure(*R
, ILPSchedule
);
582 LLVM_DEBUG(printSchedRP(dbgs(), R
->MaxPressure
, RP
));
584 if (RP
.getOccupancy(ST
) < TgtOcc
) {
585 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc
);
586 if (R
->BestSchedule
.get() &&
587 R
->BestSchedule
->MaxPressure
.getOccupancy(ST
) >= TgtOcc
) {
588 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
592 scheduleRegion(*R
, ILPSchedule
, RP
);
593 LLVM_DEBUG(printSchedResult(dbgs(), R
, RP
));
594 FinalOccupancy
= std::min(FinalOccupancy
, RP
.getOccupancy(ST
));
597 MFI
->limitOccupancy(FinalOccupancy
);