1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// SI Machine Scheduler interface
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17 #include "SIInstrInfo.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineScheduler.h"
20 #include "llvm/CodeGen/RegisterPressure.h"
21 #include "llvm/CodeGen/ScheduleDAG.h"
31 enum SIScheduleCandReason
{
40 struct SISchedulerCandidate
{
41 // The reason for this candidate.
42 SIScheduleCandReason Reason
= NoCand
;
44 // Set of reasons that apply to multiple candidates.
45 uint32_t RepeatReasonSet
= 0;
47 SISchedulerCandidate() = default;
49 bool isRepeat(SIScheduleCandReason R
) { return RepeatReasonSet
& (1 << R
); }
50 void setRepeat(SIScheduleCandReason R
) { RepeatReasonSet
|= (1 << R
); }
53 class SIScheduleDAGMI
;
54 class SIScheduleBlockCreator
;
56 enum SIScheduleBlockLinkKind
{
61 class SIScheduleBlock
{
63 SIScheduleBlockCreator
*BC
;
65 std::vector
<SUnit
*> SUnits
;
66 std::map
<unsigned, unsigned> NodeNum2Index
;
67 std::vector
<SUnit
*> TopReadySUs
;
68 std::vector
<SUnit
*> ScheduledSUnits
;
70 /// The top of the unscheduled zone.
71 IntervalPressure TopPressure
;
72 RegPressureTracker TopRPTracker
;
74 // Pressure: number of said class of registers needed to
75 // store the live virtual and real registers.
76 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
77 // Pressure of additional registers required inside the block.
78 std::vector
<unsigned> InternalAdditionnalPressure
;
79 // Pressure of input and output registers
80 std::vector
<unsigned> LiveInPressure
;
81 std::vector
<unsigned> LiveOutPressure
;
82 // Registers required by the block, and outputs.
83 // We do track only virtual registers.
84 // Note that some registers are not 32 bits,
85 // and thus the pressure is not equal
86 // to the number of live registers.
87 std::set
<unsigned> LiveInRegs
;
88 std::set
<unsigned> LiveOutRegs
;
90 bool Scheduled
= false;
91 bool HighLatencyBlock
= false;
93 std::vector
<unsigned> HasLowLatencyNonWaitedParent
;
95 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
98 std::vector
<SIScheduleBlock
*> Preds
; // All blocks predecessors.
99 // All blocks successors, and the kind of link
100 std::vector
<std::pair
<SIScheduleBlock
*, SIScheduleBlockLinkKind
>> Succs
;
101 unsigned NumHighLatencySuccessors
= 0;
104 SIScheduleBlock(SIScheduleDAGMI
*DAG
, SIScheduleBlockCreator
*BC
,
106 DAG(DAG
), BC(BC
), TopRPTracker(TopPressure
), ID(ID
) {}
108 ~SIScheduleBlock() = default;
110 unsigned getID() const { return ID
; }
112 /// Functions for Block construction.
113 void addUnit(SUnit
*SU
);
115 // When all SUs have been added.
116 void finalizeUnits();
118 // Add block pred, which has instruction predecessor of SU.
119 void addPred(SIScheduleBlock
*Pred
);
120 void addSucc(SIScheduleBlock
*Succ
, SIScheduleBlockLinkKind Kind
);
122 const std::vector
<SIScheduleBlock
*>& getPreds() const { return Preds
; }
123 ArrayRef
<std::pair
<SIScheduleBlock
*, SIScheduleBlockLinkKind
>>
124 getSuccs() const { return Succs
; }
126 unsigned Height
; // Maximum topdown path length to block without outputs
127 unsigned Depth
; // Maximum bottomup path length to block without inputs
129 unsigned getNumHighLatencySuccessors() const {
130 return NumHighLatencySuccessors
;
133 bool isHighLatencyBlock() { return HighLatencyBlock
; }
135 // This is approximative.
136 // Ideally should take into accounts some instructions (rcp, etc)
137 // are 4 times slower.
138 int getCost() { return SUnits
.size(); }
140 // The block Predecessors and Successors must be all registered
141 // before fastSchedule().
142 // Fast schedule with no particular requirement.
145 std::vector
<SUnit
*> getScheduledUnits() { return ScheduledSUnits
; }
147 // Complete schedule that will try to minimize reg pressure and
148 // low latencies, and will fill liveins and liveouts.
149 // Needs all MIs to be grouped between BeginBlock and EndBlock.
150 // The MIs can be moved after the scheduling,
151 // it is just used to allow correct track of live registers.
152 void schedule(MachineBasicBlock::iterator BeginBlock
,
153 MachineBasicBlock::iterator EndBlock
);
155 bool isScheduled() { return Scheduled
; }
157 // Needs the block to be scheduled inside
158 // TODO: find a way to compute it.
159 std::vector
<unsigned> &getInternalAdditionnalRegUsage() {
160 return InternalAdditionnalPressure
;
163 std::set
<unsigned> &getInRegs() { return LiveInRegs
; }
164 std::set
<unsigned> &getOutRegs() { return LiveOutRegs
; }
166 void printDebug(bool Full
);
169 struct SISchedCandidate
: SISchedulerCandidate
{
170 // The best SUnit candidate.
176 unsigned LowLatencyOffset
;
177 bool HasLowLatencyNonWaitedParent
;
179 SISchedCandidate() = default;
181 bool isValid() const { return SU
; }
183 // Copy the status of another candidate without changing policy.
184 void setBest(SISchedCandidate
&Best
) {
185 assert(Best
.Reason
!= NoCand
&& "uninitialized Sched candidate");
187 Reason
= Best
.Reason
;
188 SGPRUsage
= Best
.SGPRUsage
;
189 VGPRUsage
= Best
.VGPRUsage
;
190 IsLowLatency
= Best
.IsLowLatency
;
191 LowLatencyOffset
= Best
.LowLatencyOffset
;
192 HasLowLatencyNonWaitedParent
= Best
.HasLowLatencyNonWaitedParent
;
198 void undoReleaseSucc(SUnit
*SU
, SDep
*SuccEdge
);
199 void releaseSucc(SUnit
*SU
, SDep
*SuccEdge
);
200 // InOrOutBlock: restrict to links pointing inside the block (true),
201 // or restrict to links pointing outside the block (false).
202 void releaseSuccessors(SUnit
*SU
, bool InOrOutBlock
);
204 void nodeScheduled(SUnit
*SU
);
205 void tryCandidateTopDown(SISchedCandidate
&Cand
, SISchedCandidate
&TryCand
);
206 void tryCandidateBottomUp(SISchedCandidate
&Cand
, SISchedCandidate
&TryCand
);
208 void traceCandidate(const SISchedCandidate
&Cand
);
209 void initRegPressure(MachineBasicBlock::iterator BeginBlock
,
210 MachineBasicBlock::iterator EndBlock
);
213 struct SIScheduleBlocks
{
214 std::vector
<SIScheduleBlock
*> Blocks
;
215 std::vector
<int> TopDownIndex2Block
;
216 std::vector
<int> TopDownBlock2Index
;
219 enum SISchedulerBlockCreatorVariant
{
222 LatenciesAlonePlusConsecutive
225 class SIScheduleBlockCreator
{
226 SIScheduleDAGMI
*DAG
;
227 // unique_ptr handles freeing memory for us.
228 std::vector
<std::unique_ptr
<SIScheduleBlock
>> BlockPtrs
;
229 std::map
<SISchedulerBlockCreatorVariant
,
230 SIScheduleBlocks
> Blocks
;
231 std::vector
<SIScheduleBlock
*> CurrentBlocks
;
232 std::vector
<int> Node2CurrentBlock
;
235 // Maps topological index to the node number.
236 std::vector
<int> TopDownIndex2Block
;
237 std::vector
<int> TopDownBlock2Index
;
238 std::vector
<int> BottomUpIndex2Block
;
240 // 0 -> Color not given.
241 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
242 // Above -> Other groups.
244 int NextNonReservedID
;
245 std::vector
<int> CurrentColoring
;
246 std::vector
<int> CurrentTopDownReservedDependencyColoring
;
247 std::vector
<int> CurrentBottomUpReservedDependencyColoring
;
250 SIScheduleBlockCreator(SIScheduleDAGMI
*DAG
);
251 ~SIScheduleBlockCreator();
254 getBlocks(SISchedulerBlockCreatorVariant BlockVariant
);
256 bool isSUInBlock(SUnit
*SU
, unsigned ID
);
259 // Give a Reserved color to every high latency.
260 void colorHighLatenciesAlone();
262 // Create groups of high latencies with a Reserved color.
263 void colorHighLatenciesGroups();
265 // Compute coloring for topdown and bottom traversals with
266 // different colors depending on dependencies on Reserved colors.
267 void colorComputeReservedDependencies();
269 // Give color to all non-colored SUs according to Reserved groups dependencies.
270 void colorAccordingToReservedDependencies();
272 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
273 // The new colors are computed according to the dependencies on the other blocks
274 // formed with colorAccordingToReservedDependencies.
275 void colorEndsAccordingToDependencies();
277 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
278 void colorForceConsecutiveOrderInGroup();
280 // Merge Constant loads that have all their users into another group to the group.
281 // (TODO: else if all their users depend on the same group, put them there)
282 void colorMergeConstantLoadsNextGroup();
284 // Merge SUs that have all their users into another group to the group
285 void colorMergeIfPossibleNextGroup();
287 // Merge SUs that have all their users into another group to the group,
288 // but only for Reserved groups.
289 void colorMergeIfPossibleNextGroupOnlyForReserved();
291 // Merge SUs that have all their users into another group to the group,
292 // but only if the group is no more than a few SUs.
293 void colorMergeIfPossibleSmallGroupsToNextGroup();
295 // Divides Blocks with important size.
296 // Idea of implementation: attribute new colors depending on topdown and
297 // bottom up links to other blocks.
298 void cutHugeBlocks();
300 // Put in one group all instructions with no users in this scheduling region
301 // (we'd want these groups be at the end).
302 void regroupNoUserInstructions();
304 // Give Reserved color to export instructions
307 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant
);
309 void topologicalSort();
311 void scheduleInsideBlocks();
316 enum SISchedulerBlockSchedulerVariant
{
317 BlockLatencyRegUsage
,
318 BlockRegUsageLatency
,
322 class SIScheduleBlockScheduler
{
323 SIScheduleDAGMI
*DAG
;
324 SISchedulerBlockSchedulerVariant Variant
;
325 std::vector
<SIScheduleBlock
*> Blocks
;
327 std::vector
<std::map
<unsigned, unsigned>> LiveOutRegsNumUsages
;
328 std::set
<unsigned> LiveRegs
;
329 // Num of schedulable unscheduled blocks reading the register.
330 std::map
<unsigned, unsigned> LiveRegsConsumers
;
332 std::vector
<unsigned> LastPosHighLatencyParentScheduled
;
333 int LastPosWaitedHighLatency
;
335 std::vector
<SIScheduleBlock
*> BlocksScheduled
;
336 unsigned NumBlockScheduled
;
337 std::vector
<SIScheduleBlock
*> ReadyBlocks
;
339 unsigned VregCurrentUsage
;
340 unsigned SregCurrentUsage
;
342 // Currently is only approximation.
343 unsigned maxVregUsage
;
344 unsigned maxSregUsage
;
346 std::vector
<unsigned> BlockNumPredsLeft
;
347 std::vector
<unsigned> BlockNumSuccsLeft
;
350 SIScheduleBlockScheduler(SIScheduleDAGMI
*DAG
,
351 SISchedulerBlockSchedulerVariant Variant
,
352 SIScheduleBlocks BlocksStruct
);
353 ~SIScheduleBlockScheduler() = default;
355 std::vector
<SIScheduleBlock
*> getBlocks() { return BlocksScheduled
; }
357 unsigned getVGPRUsage() { return maxVregUsage
; }
358 unsigned getSGPRUsage() { return maxSregUsage
; }
361 struct SIBlockSchedCandidate
: SISchedulerCandidate
{
362 // The best Block candidate.
363 SIScheduleBlock
*Block
= nullptr;
367 unsigned NumSuccessors
;
368 unsigned NumHighLatencySuccessors
;
369 unsigned LastPosHighLatParentScheduled
;
372 SIBlockSchedCandidate() = default;
374 bool isValid() const { return Block
; }
376 // Copy the status of another candidate without changing policy.
377 void setBest(SIBlockSchedCandidate
&Best
) {
378 assert(Best
.Reason
!= NoCand
&& "uninitialized Sched candidate");
380 Reason
= Best
.Reason
;
381 IsHighLatency
= Best
.IsHighLatency
;
382 VGPRUsageDiff
= Best
.VGPRUsageDiff
;
383 NumSuccessors
= Best
.NumSuccessors
;
384 NumHighLatencySuccessors
= Best
.NumHighLatencySuccessors
;
385 LastPosHighLatParentScheduled
= Best
.LastPosHighLatParentScheduled
;
386 Height
= Best
.Height
;
390 bool tryCandidateLatency(SIBlockSchedCandidate
&Cand
,
391 SIBlockSchedCandidate
&TryCand
);
392 bool tryCandidateRegUsage(SIBlockSchedCandidate
&Cand
,
393 SIBlockSchedCandidate
&TryCand
);
394 SIScheduleBlock
*pickBlock();
396 void addLiveRegs(std::set
<unsigned> &Regs
);
397 void decreaseLiveRegs(SIScheduleBlock
*Block
, std::set
<unsigned> &Regs
);
398 void releaseBlockSuccs(SIScheduleBlock
*Parent
);
399 void blockScheduled(SIScheduleBlock
*Block
);
401 // Check register pressure change
402 // by scheduling a block with these LiveIn and LiveOut.
403 std::vector
<int> checkRegUsageImpact(std::set
<unsigned> &InRegs
,
404 std::set
<unsigned> &OutRegs
);
409 struct SIScheduleBlockResult
{
410 std::vector
<unsigned> SUs
;
411 unsigned MaxSGPRUsage
;
412 unsigned MaxVGPRUsage
;
416 SIScheduleDAGMI
*DAG
;
417 SIScheduleBlockCreator BlockCreator
;
420 SIScheduler(SIScheduleDAGMI
*DAG
) : DAG(DAG
), BlockCreator(DAG
) {}
422 ~SIScheduler() = default;
424 struct SIScheduleBlockResult
425 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant
,
426 SISchedulerBlockSchedulerVariant ScheduleVariant
);
429 class SIScheduleDAGMI final
: public ScheduleDAGMILive
{
430 const SIInstrInfo
*SITII
;
431 const SIRegisterInfo
*SITRI
;
433 std::vector
<SUnit
> SUnitsLinksBackup
;
435 // For moveLowLatencies. After all Scheduling variants are tested.
436 std::vector
<unsigned> ScheduledSUnits
;
437 std::vector
<unsigned> ScheduledSUnitsInv
;
443 SIScheduleDAGMI(MachineSchedContext
*C
);
445 ~SIScheduleDAGMI() override
;
447 // Entry point for the schedule.
448 void schedule() override
;
450 // To init Block's RPTracker.
451 void initRPTracker(RegPressureTracker
&RPTracker
) {
452 RPTracker
.init(&MF
, RegClassInfo
, LIS
, BB
, RegionBegin
, false, false);
455 MachineBasicBlock
*getBB() { return BB
; }
456 MachineBasicBlock::iterator
getCurrentTop() { return CurrentTop
; }
457 MachineBasicBlock::iterator
getCurrentBottom() { return CurrentBottom
; }
458 LiveIntervals
*getLIS() { return LIS
; }
459 MachineRegisterInfo
*getMRI() { return &MRI
; }
460 const TargetRegisterInfo
*getTRI() { return TRI
; }
461 ScheduleDAGTopologicalSort
*GetTopo() { return &Topo
; }
462 SUnit
& getEntrySU() { return EntrySU
; }
463 SUnit
& getExitSU() { return ExitSU
; }
465 void restoreSULinksLeft();
467 template<typename _Iterator
> void fillVgprSgprCost(_Iterator First
,
470 unsigned &SgprUsage
);
472 std::set
<unsigned> getInRegs() {
473 std::set
<unsigned> InRegs
;
474 for (const auto &RegMaskPair
: RPTracker
.getPressure().LiveInRegs
) {
475 InRegs
.insert(RegMaskPair
.RegUnit
);
480 std::set
<unsigned> getOutRegs() {
481 std::set
<unsigned> OutRegs
;
482 for (const auto &RegMaskPair
: RPTracker
.getPressure().LiveOutRegs
) {
483 OutRegs
.insert(RegMaskPair
.RegUnit
);
488 unsigned getVGPRSetID() const { return VGPRSetID
; }
489 unsigned getSGPRSetID() const { return SGPRSetID
; }
492 void topologicalSort();
493 // After scheduling is done, improve low latency placements.
494 void moveLowLatencies();
497 // Some stats for scheduling inside blocks.
498 std::vector
<unsigned> IsLowLatencySU
;
499 std::vector
<unsigned> LowLatencyOffset
;
500 std::vector
<unsigned> IsHighLatencySU
;
502 // Maps topological index to the node number.
503 std::vector
<int> TopDownIndex2SU
;
504 std::vector
<int> BottomUpIndex2SU
;
507 } // end namespace llvm
509 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H