1 //===- GCNIterativeScheduler.cpp ------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the class GCNIterativeScheduler.
12 //===----------------------------------------------------------------------===//
14 #include "GCNIterativeScheduler.h"
15 #include "GCNSchedStrategy.h"
16 #include "SIMachineFunctionInfo.h"
20 #define DEBUG_TYPE "machine-scheduler"
24 std::vector
<const SUnit
*> makeMinRegSchedule(ArrayRef
<const SUnit
*> TopRoots
,
25 const ScheduleDAG
&DAG
);
27 std::vector
<const SUnit
*> makeGCNILPScheduler(ArrayRef
<const SUnit
*> BotRoots
,
28 const ScheduleDAG
&DAG
);
31 // shim accessors for different order containers
32 static inline MachineInstr
*getMachineInstr(MachineInstr
*MI
) {
35 static inline MachineInstr
*getMachineInstr(const SUnit
*SU
) {
36 return SU
->getInstr();
38 static inline MachineInstr
*getMachineInstr(const SUnit
&SU
) {
42 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
44 static void printRegion(raw_ostream
&OS
,
45 MachineBasicBlock::iterator Begin
,
46 MachineBasicBlock::iterator End
,
47 const LiveIntervals
*LIS
,
49 std::numeric_limits
<unsigned>::max()) {
50 auto BB
= Begin
->getParent();
51 OS
<< BB
->getParent()->getName() << ":" << printMBBReference(*BB
) << ' '
52 << BB
->getName() << ":\n";
54 MaxInstNum
= std::max(MaxInstNum
, 1u);
55 for (; I
!= End
&& MaxInstNum
; ++I
, --MaxInstNum
) {
56 if (!I
->isDebugInstr() && LIS
)
57 OS
<< LIS
->getInstructionIndex(*I
);
63 if (!I
->isDebugInstr() && LIS
)
64 OS
<< LIS
->getInstructionIndex(*I
);
67 if (End
!= BB
->end()) { // print boundary inst if present
69 if (LIS
) OS
<< LIS
->getInstructionIndex(*End
) << '\t';
75 static void printLivenessInfo(raw_ostream
&OS
,
76 MachineBasicBlock::iterator Begin
,
77 MachineBasicBlock::iterator End
,
78 const LiveIntervals
*LIS
) {
79 const auto BB
= Begin
->getParent();
80 const auto &MRI
= BB
->getParent()->getRegInfo();
82 const auto LiveIns
= getLiveRegsBefore(*Begin
, *LIS
);
83 OS
<< "LIn RP: " << print(getRegPressure(MRI
, LiveIns
));
85 const auto BottomMI
= End
== BB
->end() ? std::prev(End
) : End
;
86 const auto LiveOuts
= getLiveRegsAfter(*BottomMI
, *LIS
);
87 OS
<< "LOt RP: " << print(getRegPressure(MRI
, LiveOuts
));
91 void GCNIterativeScheduler::printRegions(raw_ostream
&OS
) const {
92 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
93 for (const auto R
: Regions
) {
94 OS
<< "Region to schedule ";
95 printRegion(OS
, R
->Begin
, R
->End
, LIS
, 1);
96 printLivenessInfo(OS
, R
->Begin
, R
->End
, LIS
);
97 OS
<< "Max RP: " << print(R
->MaxPressure
, &ST
);
102 void GCNIterativeScheduler::printSchedResult(raw_ostream
&OS
,
104 const GCNRegPressure
&RP
) const {
105 OS
<< "\nAfter scheduling ";
106 printRegion(OS
, R
->Begin
, R
->End
, LIS
);
107 printSchedRP(OS
, R
->MaxPressure
, RP
);
112 void GCNIterativeScheduler::printSchedRP(raw_ostream
&OS
,
113 const GCNRegPressure
&Before
,
114 const GCNRegPressure
&After
) const {
115 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
116 OS
<< "RP before: " << print(Before
, &ST
)
117 << "RP after: " << print(After
, &ST
);
121 // DAG builder helper
122 class GCNIterativeScheduler::BuildDAG
{
123 GCNIterativeScheduler
&Sch
;
124 SmallVector
<SUnit
*, 8> TopRoots
;
126 SmallVector
<SUnit
*, 8> BotRoots
;
128 BuildDAG(const Region
&R
, GCNIterativeScheduler
&_Sch
)
130 auto BB
= R
.Begin
->getParent();
131 Sch
.BaseClass::startBlock(BB
);
132 Sch
.BaseClass::enterRegion(BB
, R
.Begin
, R
.End
, R
.NumRegionInstrs
);
134 Sch
.buildSchedGraph(Sch
.AA
, nullptr, nullptr, nullptr,
135 /*TrackLaneMask*/true);
136 Sch
.Topo
.InitDAGTopologicalSorting();
137 Sch
.findRootsAndBiasEdges(TopRoots
, BotRoots
);
141 Sch
.BaseClass::exitRegion();
142 Sch
.BaseClass::finishBlock();
145 ArrayRef
<const SUnit
*> getTopRoots() const {
148 ArrayRef
<SUnit
*> getBottomRoots() const {
153 class GCNIterativeScheduler::OverrideLegacyStrategy
{
154 GCNIterativeScheduler
&Sch
;
156 std::unique_ptr
<MachineSchedStrategy
> SaveSchedImpl
;
157 GCNRegPressure SaveMaxRP
;
160 OverrideLegacyStrategy(Region
&R
,
161 MachineSchedStrategy
&OverrideStrategy
,
162 GCNIterativeScheduler
&_Sch
)
165 , SaveSchedImpl(std::move(_Sch
.SchedImpl
))
166 , SaveMaxRP(R
.MaxPressure
) {
167 Sch
.SchedImpl
.reset(&OverrideStrategy
);
168 auto BB
= R
.Begin
->getParent();
169 Sch
.BaseClass::startBlock(BB
);
170 Sch
.BaseClass::enterRegion(BB
, R
.Begin
, R
.End
, R
.NumRegionInstrs
);
173 ~OverrideLegacyStrategy() {
174 Sch
.BaseClass::exitRegion();
175 Sch
.BaseClass::finishBlock();
176 Sch
.SchedImpl
.release();
177 Sch
.SchedImpl
= std::move(SaveSchedImpl
);
181 assert(Sch
.RegionBegin
== Rgn
.Begin
&& Sch
.RegionEnd
== Rgn
.End
);
182 LLVM_DEBUG(dbgs() << "\nScheduling ";
183 printRegion(dbgs(), Rgn
.Begin
, Rgn
.End
, Sch
.LIS
, 2));
184 Sch
.BaseClass::schedule();
186 // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
187 Sch
.RegionEnd
= Rgn
.End
;
188 //assert(Rgn.End == Sch.RegionEnd);
189 Rgn
.Begin
= Sch
.RegionBegin
;
190 Rgn
.MaxPressure
.clear();
193 void restoreOrder() {
194 assert(Sch
.RegionBegin
== Rgn
.Begin
&& Sch
.RegionEnd
== Rgn
.End
);
195 // DAG SUnits are stored using original region's order
196 // so just use SUnits as the restoring schedule
197 Sch
.scheduleRegion(Rgn
, Sch
.SUnits
, SaveMaxRP
);
203 // just a stub to make base class happy
204 class SchedStrategyStub
: public MachineSchedStrategy
{
206 bool shouldTrackPressure() const override
{ return false; }
207 bool shouldTrackLaneMasks() const override
{ return false; }
208 void initialize(ScheduleDAGMI
*DAG
) override
{}
209 SUnit
*pickNode(bool &IsTopNode
) override
{ return nullptr; }
210 void schedNode(SUnit
*SU
, bool IsTopNode
) override
{}
211 void releaseTopNode(SUnit
*SU
) override
{}
212 void releaseBottomNode(SUnit
*SU
) override
{}
215 } // end anonymous namespace
217 GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext
*C
,
219 : BaseClass(C
, std::make_unique
<SchedStrategyStub
>())
225 // returns max pressure for a region
227 GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin
,
228 MachineBasicBlock::iterator End
)
230 // For the purpose of pressure tracking bottom inst of the region should
231 // be also processed. End is either BB end, BB terminator inst or sched
233 auto const BBEnd
= Begin
->getParent()->end();
234 auto const BottomMI
= End
== BBEnd
? std::prev(End
) : End
;
236 // scheduleRegions walks bottom to top, so its likely we just get next
237 // instruction to track
238 auto AfterBottomMI
= std::next(BottomMI
);
239 if (AfterBottomMI
== BBEnd
||
240 &*AfterBottomMI
!= UPTracker
.getLastTrackedMI()) {
241 UPTracker
.reset(*BottomMI
);
243 assert(UPTracker
.isValid());
246 for (auto I
= BottomMI
; I
!= Begin
; --I
)
247 UPTracker
.recede(*I
);
249 UPTracker
.recede(*Begin
);
251 assert(UPTracker
.isValid() ||
252 (dbgs() << "Tracked region ",
253 printRegion(dbgs(), Begin
, End
, LIS
), false));
254 return UPTracker
.getMaxPressureAndReset();
257 // returns max pressure for a tentative schedule
258 template <typename Range
> GCNRegPressure
259 GCNIterativeScheduler::getSchedulePressure(const Region
&R
,
260 Range
&&Schedule
) const {
261 auto const BBEnd
= R
.Begin
->getParent()->end();
262 GCNUpwardRPTracker
RPTracker(*LIS
);
263 if (R
.End
!= BBEnd
) {
264 // R.End points to the boundary instruction but the
265 // schedule doesn't include it
266 RPTracker
.reset(*R
.End
);
267 RPTracker
.recede(*R
.End
);
269 // R.End doesn't point to the boundary instruction
270 RPTracker
.reset(*std::prev(BBEnd
));
272 for (auto I
= Schedule
.end(), B
= Schedule
.begin(); I
!= B
;) {
273 RPTracker
.recede(*getMachineInstr(*--I
));
275 return RPTracker
.getMaxPressureAndReset();
278 void GCNIterativeScheduler::enterRegion(MachineBasicBlock
*BB
, // overridden
279 MachineBasicBlock::iterator Begin
,
280 MachineBasicBlock::iterator End
,
281 unsigned NumRegionInstrs
) {
282 BaseClass::enterRegion(BB
, Begin
, End
, NumRegionInstrs
);
283 if (NumRegionInstrs
> 2) {
285 new (Alloc
.Allocate())
286 Region
{ Begin
, End
, NumRegionInstrs
,
287 getRegionPressure(Begin
, End
), nullptr });
291 void GCNIterativeScheduler::schedule() { // overridden
293 LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin
, RegionEnd
, LIS
);
294 if (!Regions
.empty() && Regions
.back()->Begin
== RegionBegin
) {
296 << print(Regions
.back()->MaxPressure
,
297 &MF
.getSubtarget
<GCNSubtarget
>());
302 void GCNIterativeScheduler::finalizeSchedule() { // overridden
306 case SCHEDULE_MINREGONLY
: scheduleMinReg(); break;
307 case SCHEDULE_MINREGFORCED
: scheduleMinReg(true); break;
308 case SCHEDULE_LEGACYMAXOCCUPANCY
: scheduleLegacyMaxOccupancy(); break;
309 case SCHEDULE_ILP
: scheduleILP(false); break;
313 // Detach schedule from SUnits and interleave it with debug values.
314 // Returned schedule becomes independent of DAG state.
315 std::vector
<MachineInstr
*>
316 GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule
) const {
317 std::vector
<MachineInstr
*> Res
;
318 Res
.reserve(Schedule
.size() * 2);
321 Res
.push_back(FirstDbgValue
);
323 const auto DbgB
= DbgValues
.begin(), DbgE
= DbgValues
.end();
324 for (const auto *SU
: Schedule
) {
325 Res
.push_back(SU
->getInstr());
326 const auto &D
= std::find_if(DbgB
, DbgE
, [SU
](decltype(*DbgB
) &P
) {
327 return P
.second
== SU
->getInstr();
330 Res
.push_back(D
->first
);
335 void GCNIterativeScheduler::setBestSchedule(Region
&R
,
336 ScheduleRef Schedule
,
337 const GCNRegPressure
&MaxRP
) {
338 R
.BestSchedule
.reset(
339 new TentativeSchedule
{ detachSchedule(Schedule
), MaxRP
});
342 void GCNIterativeScheduler::scheduleBest(Region
&R
) {
343 assert(R
.BestSchedule
.get() && "No schedule specified");
344 scheduleRegion(R
, R
.BestSchedule
->Schedule
, R
.BestSchedule
->MaxPressure
);
345 R
.BestSchedule
.reset();
348 // minimal required region scheduler, works for ranges of SUnits*,
349 // SUnits or MachineIntrs*
350 template <typename Range
>
351 void GCNIterativeScheduler::scheduleRegion(Region
&R
, Range
&&Schedule
,
352 const GCNRegPressure
&MaxRP
) {
353 assert(RegionBegin
== R
.Begin
&& RegionEnd
== R
.End
);
354 assert(LIS
!= nullptr);
356 const auto SchedMaxRP
= getSchedulePressure(R
, Schedule
);
358 auto BB
= R
.Begin
->getParent();
360 for (const auto &I
: Schedule
) {
361 auto MI
= getMachineInstr(I
);
365 if (!MI
->isDebugInstr())
366 LIS
->handleMove(*MI
, true);
368 if (!MI
->isDebugInstr()) {
369 // Reset read - undef flags and update them later.
370 for (auto &Op
: MI
->all_defs())
371 Op
.setIsUndef(false);
373 RegisterOperands RegOpers
;
374 RegOpers
.collect(*MI
, *TRI
, MRI
, /*ShouldTrackLaneMasks*/true,
375 /*IgnoreDead*/false);
376 // Adjust liveness and add missing dead+read-undef flags.
377 auto SlotIdx
= LIS
->getInstructionIndex(*MI
).getRegSlot();
378 RegOpers
.adjustLaneLiveness(*LIS
, MRI
, SlotIdx
, MI
);
380 Top
= std::next(MI
->getIterator());
382 RegionBegin
= getMachineInstr(Schedule
.front());
384 // Schedule consisting of MachineInstr* is considered 'detached'
385 // and already interleaved with debug values
386 if (!std::is_same_v
<decltype(*Schedule
.begin()), MachineInstr
*>) {
388 // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
389 // assert(R.End == RegionEnd);
393 R
.Begin
= RegionBegin
;
394 R
.MaxPressure
= MaxRP
;
397 const auto RegionMaxRP
= getRegionPressure(R
);
398 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
401 (SchedMaxRP
== RegionMaxRP
&& (MaxRP
.empty() || SchedMaxRP
== MaxRP
)) ||
402 (dbgs() << "Max RP mismatch!!!\n"
403 "RP for schedule (calculated): "
404 << print(SchedMaxRP
, &ST
)
405 << "RP for schedule (reported): " << print(MaxRP
, &ST
)
406 << "RP after scheduling: " << print(RegionMaxRP
, &ST
),
410 // Sort recorded regions by pressure - highest at the front
411 void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc
) {
412 llvm::sort(Regions
, [this, TargetOcc
](const Region
*R1
, const Region
*R2
) {
413 return R2
->MaxPressure
.less(MF
, R1
->MaxPressure
, TargetOcc
);
417 ///////////////////////////////////////////////////////////////////////////////
418 // Legacy MaxOccupancy Strategy
420 // Tries to increase occupancy applying minreg scheduler for a sequence of
421 // most demanding regions. Obtained schedules are saved as BestSchedule for a
423 // TargetOcc is the best achievable occupancy for a kernel.
424 // Returns better occupancy on success or current occupancy on fail.
425 // BestSchedules aren't deleted on fail.
426 unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc
) {
427 // TODO: assert Regions are sorted descending by pressure
428 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
429 const auto Occ
= Regions
.front()->MaxPressure
.getOccupancy(ST
);
430 LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
431 << ", current = " << Occ
<< '\n');
433 auto NewOcc
= TargetOcc
;
434 for (auto *R
: Regions
) {
435 if (R
->MaxPressure
.getOccupancy(ST
) >= NewOcc
)
438 LLVM_DEBUG(printRegion(dbgs(), R
->Begin
, R
->End
, LIS
, 3);
439 printLivenessInfo(dbgs(), R
->Begin
, R
->End
, LIS
));
441 BuildDAG
DAG(*R
, *this);
442 const auto MinSchedule
= makeMinRegSchedule(DAG
.getTopRoots(), *this);
443 const auto MaxRP
= getSchedulePressure(*R
, MinSchedule
);
444 LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
445 printSchedRP(dbgs(), R
->MaxPressure
, MaxRP
));
447 NewOcc
= std::min(NewOcc
, MaxRP
.getOccupancy(ST
));
451 setBestSchedule(*R
, MinSchedule
, MaxRP
);
453 LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
454 << ", prev occupancy = " << Occ
<< '\n');
456 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
457 MFI
->increaseOccupancy(MF
, NewOcc
);
460 return std::max(NewOcc
, Occ
);
463 void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
464 bool TryMaximizeOccupancy
) {
465 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
466 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
467 auto TgtOcc
= MFI
->getMinAllowedOccupancy();
469 sortRegionsByPressure(TgtOcc
);
470 auto Occ
= Regions
.front()->MaxPressure
.getOccupancy(ST
);
472 if (TryMaximizeOccupancy
&& Occ
< TgtOcc
)
473 Occ
= tryMaximizeOccupancy(TgtOcc
);
475 // This is really weird but for some magic scheduling regions twice
476 // gives performance improvement
477 const int NumPasses
= Occ
< TgtOcc
? 2 : 1;
479 TgtOcc
= std::min(Occ
, TgtOcc
);
480 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
481 "target occupancy = "
483 GCNMaxOccupancySchedStrategy
LStrgy(Context
);
484 unsigned FinalOccupancy
= std::min(Occ
, MFI
->getOccupancy());
486 for (int I
= 0; I
< NumPasses
; ++I
) {
487 // running first pass with TargetOccupancy = 0 mimics previous scheduling
488 // approach and is a performance magic
489 LStrgy
.setTargetOccupancy(I
== 0 ? 0 : TgtOcc
);
490 for (auto *R
: Regions
) {
491 OverrideLegacyStrategy
Ovr(*R
, LStrgy
, *this);
494 const auto RP
= getRegionPressure(*R
);
495 LLVM_DEBUG(printSchedRP(dbgs(), R
->MaxPressure
, RP
));
497 if (RP
.getOccupancy(ST
) < TgtOcc
) {
498 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc
);
499 if (R
->BestSchedule
.get() &&
500 R
->BestSchedule
->MaxPressure
.getOccupancy(ST
) >= TgtOcc
) {
501 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
504 LLVM_DEBUG(dbgs() << ", restoring\n");
506 assert(R
->MaxPressure
.getOccupancy(ST
) >= TgtOcc
);
509 FinalOccupancy
= std::min(FinalOccupancy
, RP
.getOccupancy(ST
));
512 MFI
->limitOccupancy(FinalOccupancy
);
515 ///////////////////////////////////////////////////////////////////////////////
516 // Minimal Register Strategy
518 void GCNIterativeScheduler::scheduleMinReg(bool force
) {
519 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
520 const auto TgtOcc
= MFI
->getOccupancy();
521 sortRegionsByPressure(TgtOcc
);
523 auto MaxPressure
= Regions
.front()->MaxPressure
;
524 for (auto *R
: Regions
) {
525 if (!force
&& R
->MaxPressure
.less(MF
, MaxPressure
, TgtOcc
))
528 BuildDAG
DAG(*R
, *this);
529 const auto MinSchedule
= makeMinRegSchedule(DAG
.getTopRoots(), *this);
531 const auto RP
= getSchedulePressure(*R
, MinSchedule
);
532 LLVM_DEBUG(if (R
->MaxPressure
.less(MF
, RP
, TgtOcc
)) {
533 dbgs() << "\nWarning: Pressure becomes worse after minreg!";
534 printSchedRP(dbgs(), R
->MaxPressure
, RP
);
537 if (!force
&& MaxPressure
.less(MF
, RP
, TgtOcc
))
540 scheduleRegion(*R
, MinSchedule
, RP
);
541 LLVM_DEBUG(printSchedResult(dbgs(), R
, RP
));
547 ///////////////////////////////////////////////////////////////////////////////
548 // ILP scheduler port
550 void GCNIterativeScheduler::scheduleILP(
551 bool TryMaximizeOccupancy
) {
552 const auto &ST
= MF
.getSubtarget
<GCNSubtarget
>();
553 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
554 auto TgtOcc
= MFI
->getMinAllowedOccupancy();
556 sortRegionsByPressure(TgtOcc
);
557 auto Occ
= Regions
.front()->MaxPressure
.getOccupancy(ST
);
559 if (TryMaximizeOccupancy
&& Occ
< TgtOcc
)
560 Occ
= tryMaximizeOccupancy(TgtOcc
);
562 TgtOcc
= std::min(Occ
, TgtOcc
);
563 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
564 "target occupancy = "
567 unsigned FinalOccupancy
= std::min(Occ
, MFI
->getOccupancy());
568 for (auto *R
: Regions
) {
569 BuildDAG
DAG(*R
, *this);
570 const auto ILPSchedule
= makeGCNILPScheduler(DAG
.getBottomRoots(), *this);
572 const auto RP
= getSchedulePressure(*R
, ILPSchedule
);
573 LLVM_DEBUG(printSchedRP(dbgs(), R
->MaxPressure
, RP
));
575 if (RP
.getOccupancy(ST
) < TgtOcc
) {
576 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc
);
577 if (R
->BestSchedule
.get() &&
578 R
->BestSchedule
->MaxPressure
.getOccupancy(ST
) >= TgtOcc
) {
579 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
583 scheduleRegion(*R
, ILPSchedule
, RP
);
584 LLVM_DEBUG(printSchedResult(dbgs(), R
, RP
));
585 FinalOccupancy
= std::min(FinalOccupancy
, RP
.getOccupancy(ST
));
588 MFI
->limitOccupancy(FinalOccupancy
);