1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// SI Machine Scheduler interface
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
18 #include "SIInstrInfo.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineScheduler.h"
21 #include "llvm/CodeGen/RegisterPressure.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
32 enum SIScheduleCandReason
{
41 struct SISchedulerCandidate
{
42 // The reason for this candidate.
43 SIScheduleCandReason Reason
= NoCand
;
45 // Set of reasons that apply to multiple candidates.
46 uint32_t RepeatReasonSet
= 0;
48 SISchedulerCandidate() = default;
50 bool isRepeat(SIScheduleCandReason R
) { return RepeatReasonSet
& (1 << R
); }
51 void setRepeat(SIScheduleCandReason R
) { RepeatReasonSet
|= (1 << R
); }
54 class SIScheduleDAGMI
;
55 class SIScheduleBlockCreator
;
57 enum SIScheduleBlockLinkKind
{
62 class SIScheduleBlock
{
64 SIScheduleBlockCreator
*BC
;
66 std::vector
<SUnit
*> SUnits
;
67 std::map
<unsigned, unsigned> NodeNum2Index
;
68 std::vector
<SUnit
*> TopReadySUs
;
69 std::vector
<SUnit
*> ScheduledSUnits
;
71 /// The top of the unscheduled zone.
72 IntervalPressure TopPressure
;
73 RegPressureTracker TopRPTracker
;
75 // Pressure: number of said class of registers needed to
76 // store the live virtual and real registers.
77 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78 // Pressure of additional registers required inside the block.
79 std::vector
<unsigned> InternalAdditionnalPressure
;
80 // Pressure of input and output registers
81 std::vector
<unsigned> LiveInPressure
;
82 std::vector
<unsigned> LiveOutPressure
;
83 // Registers required by the block, and outputs.
84 // We do track only virtual registers.
85 // Note that some registers are not 32 bits,
86 // and thus the pressure is not equal
87 // to the number of live registers.
88 std::set
<unsigned> LiveInRegs
;
89 std::set
<unsigned> LiveOutRegs
;
91 bool Scheduled
= false;
92 bool HighLatencyBlock
= false;
94 std::vector
<unsigned> HasLowLatencyNonWaitedParent
;
96 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
99 std::vector
<SIScheduleBlock
*> Preds
; // All blocks predecessors.
100 // All blocks successors, and the kind of link
101 std::vector
<std::pair
<SIScheduleBlock
*, SIScheduleBlockLinkKind
>> Succs
;
102 unsigned NumHighLatencySuccessors
= 0;
105 SIScheduleBlock(SIScheduleDAGMI
*DAG
, SIScheduleBlockCreator
*BC
,
107 DAG(DAG
), BC(BC
), TopRPTracker(TopPressure
), ID(ID
) {}
109 ~SIScheduleBlock() = default;
111 unsigned getID() const { return ID
; }
113 /// Functions for Block construction.
114 void addUnit(SUnit
*SU
);
116 // When all SUs have been added.
117 void finalizeUnits();
119 // Add block pred, which has instruction predecessor of SU.
120 void addPred(SIScheduleBlock
*Pred
);
121 void addSucc(SIScheduleBlock
*Succ
, SIScheduleBlockLinkKind Kind
);
123 const std::vector
<SIScheduleBlock
*>& getPreds() const { return Preds
; }
124 ArrayRef
<std::pair
<SIScheduleBlock
*, SIScheduleBlockLinkKind
>>
125 getSuccs() const { return Succs
; }
127 unsigned Height
; // Maximum topdown path length to block without outputs
128 unsigned Depth
; // Maximum bottomup path length to block without inputs
130 unsigned getNumHighLatencySuccessors() const {
131 return NumHighLatencySuccessors
;
134 bool isHighLatencyBlock() { return HighLatencyBlock
; }
136 // This is approximative.
137 // Ideally should take into accounts some instructions (rcp, etc)
138 // are 4 times slower.
139 int getCost() { return SUnits
.size(); }
141 // The block Predecessors and Successors must be all registered
142 // before fastSchedule().
143 // Fast schedule with no particular requirement.
146 std::vector
<SUnit
*> getScheduledUnits() { return ScheduledSUnits
; }
148 // Complete schedule that will try to minimize reg pressure and
149 // low latencies, and will fill liveins and liveouts.
150 // Needs all MIs to be grouped between BeginBlock and EndBlock.
151 // The MIs can be moved after the scheduling,
152 // it is just used to allow correct track of live registers.
153 void schedule(MachineBasicBlock::iterator BeginBlock
,
154 MachineBasicBlock::iterator EndBlock
);
156 bool isScheduled() { return Scheduled
; }
158 // Needs the block to be scheduled inside
159 // TODO: find a way to compute it.
160 std::vector
<unsigned> &getInternalAdditionnalRegUsage() {
161 return InternalAdditionnalPressure
;
164 std::set
<unsigned> &getInRegs() { return LiveInRegs
; }
165 std::set
<unsigned> &getOutRegs() { return LiveOutRegs
; }
167 void printDebug(bool Full
);
170 struct SISchedCandidate
: SISchedulerCandidate
{
171 // The best SUnit candidate.
177 unsigned LowLatencyOffset
;
178 bool HasLowLatencyNonWaitedParent
;
180 SISchedCandidate() = default;
182 bool isValid() const { return SU
; }
184 // Copy the status of another candidate without changing policy.
185 void setBest(SISchedCandidate
&Best
) {
186 assert(Best
.Reason
!= NoCand
&& "uninitialized Sched candidate");
188 Reason
= Best
.Reason
;
189 SGPRUsage
= Best
.SGPRUsage
;
190 VGPRUsage
= Best
.VGPRUsage
;
191 IsLowLatency
= Best
.IsLowLatency
;
192 LowLatencyOffset
= Best
.LowLatencyOffset
;
193 HasLowLatencyNonWaitedParent
= Best
.HasLowLatencyNonWaitedParent
;
199 void undoReleaseSucc(SUnit
*SU
, SDep
*SuccEdge
);
200 void releaseSucc(SUnit
*SU
, SDep
*SuccEdge
);
201 // InOrOutBlock: restrict to links pointing inside the block (true),
202 // or restrict to links pointing outside the block (false).
203 void releaseSuccessors(SUnit
*SU
, bool InOrOutBlock
);
205 void nodeScheduled(SUnit
*SU
);
206 void tryCandidateTopDown(SISchedCandidate
&Cand
, SISchedCandidate
&TryCand
);
207 void tryCandidateBottomUp(SISchedCandidate
&Cand
, SISchedCandidate
&TryCand
);
209 void traceCandidate(const SISchedCandidate
&Cand
);
210 void initRegPressure(MachineBasicBlock::iterator BeginBlock
,
211 MachineBasicBlock::iterator EndBlock
);
214 struct SIScheduleBlocks
{
215 std::vector
<SIScheduleBlock
*> Blocks
;
216 std::vector
<int> TopDownIndex2Block
;
217 std::vector
<int> TopDownBlock2Index
;
220 enum SISchedulerBlockCreatorVariant
{
223 LatenciesAlonePlusConsecutive
226 class SIScheduleBlockCreator
{
227 SIScheduleDAGMI
*DAG
;
228 // unique_ptr handles freeing memory for us.
229 std::vector
<std::unique_ptr
<SIScheduleBlock
>> BlockPtrs
;
230 std::map
<SISchedulerBlockCreatorVariant
,
231 SIScheduleBlocks
> Blocks
;
232 std::vector
<SIScheduleBlock
*> CurrentBlocks
;
233 std::vector
<int> Node2CurrentBlock
;
236 // Maps topological index to the node number.
237 std::vector
<int> TopDownIndex2Block
;
238 std::vector
<int> TopDownBlock2Index
;
239 std::vector
<int> BottomUpIndex2Block
;
241 // 0 -> Color not given.
242 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243 // Above -> Other groups.
245 int NextNonReservedID
;
246 std::vector
<int> CurrentColoring
;
247 std::vector
<int> CurrentTopDownReservedDependencyColoring
;
248 std::vector
<int> CurrentBottomUpReservedDependencyColoring
;
251 SIScheduleBlockCreator(SIScheduleDAGMI
*DAG
);
252 ~SIScheduleBlockCreator();
255 getBlocks(SISchedulerBlockCreatorVariant BlockVariant
);
257 bool isSUInBlock(SUnit
*SU
, unsigned ID
);
260 // Give a Reserved color to every high latency.
261 void colorHighLatenciesAlone();
263 // Create groups of high latencies with a Reserved color.
264 void colorHighLatenciesGroups();
266 // Compute coloring for topdown and bottom traversals with
267 // different colors depending on dependencies on Reserved colors.
268 void colorComputeReservedDependencies();
270 // Give color to all non-colored SUs according to Reserved groups dependencies.
271 void colorAccordingToReservedDependencies();
273 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274 // The new colors are computed according to the dependencies on the other blocks
275 // formed with colorAccordingToReservedDependencies.
276 void colorEndsAccordingToDependencies();
278 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279 void colorForceConsecutiveOrderInGroup();
281 // Merge Constant loads that have all their users into another group to the group.
282 // (TODO: else if all their users depend on the same group, put them there)
283 void colorMergeConstantLoadsNextGroup();
285 // Merge SUs that have all their users into another group to the group
286 void colorMergeIfPossibleNextGroup();
288 // Merge SUs that have all their users into another group to the group,
289 // but only for Reserved groups.
290 void colorMergeIfPossibleNextGroupOnlyForReserved();
292 // Merge SUs that have all their users into another group to the group,
293 // but only if the group is no more than a few SUs.
294 void colorMergeIfPossibleSmallGroupsToNextGroup();
296 // Divides Blocks with important size.
297 // Idea of implementation: attribute new colors depending on topdown and
298 // bottom up links to other blocks.
299 void cutHugeBlocks();
301 // Put in one group all instructions with no users in this scheduling region
302 // (we'd want these groups be at the end).
303 void regroupNoUserInstructions();
305 // Give Reserved color to export instructions
308 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant
);
310 void topologicalSort();
312 void scheduleInsideBlocks();
317 enum SISchedulerBlockSchedulerVariant
{
318 BlockLatencyRegUsage
,
319 BlockRegUsageLatency
,
323 class SIScheduleBlockScheduler
{
324 SIScheduleDAGMI
*DAG
;
325 SISchedulerBlockSchedulerVariant Variant
;
326 std::vector
<SIScheduleBlock
*> Blocks
;
328 std::vector
<std::map
<unsigned, unsigned>> LiveOutRegsNumUsages
;
329 std::set
<unsigned> LiveRegs
;
330 // Num of schedulable unscheduled blocks reading the register.
331 std::map
<unsigned, unsigned> LiveRegsConsumers
;
333 std::vector
<unsigned> LastPosHighLatencyParentScheduled
;
334 int LastPosWaitedHighLatency
;
336 std::vector
<SIScheduleBlock
*> BlocksScheduled
;
337 unsigned NumBlockScheduled
;
338 std::vector
<SIScheduleBlock
*> ReadyBlocks
;
340 unsigned VregCurrentUsage
;
341 unsigned SregCurrentUsage
;
343 // Currently is only approximation.
344 unsigned maxVregUsage
;
345 unsigned maxSregUsage
;
347 std::vector
<unsigned> BlockNumPredsLeft
;
348 std::vector
<unsigned> BlockNumSuccsLeft
;
351 SIScheduleBlockScheduler(SIScheduleDAGMI
*DAG
,
352 SISchedulerBlockSchedulerVariant Variant
,
353 SIScheduleBlocks BlocksStruct
);
354 ~SIScheduleBlockScheduler() = default;
356 std::vector
<SIScheduleBlock
*> getBlocks() { return BlocksScheduled
; }
358 unsigned getVGPRUsage() { return maxVregUsage
; }
359 unsigned getSGPRUsage() { return maxSregUsage
; }
362 struct SIBlockSchedCandidate
: SISchedulerCandidate
{
363 // The best Block candidate.
364 SIScheduleBlock
*Block
= nullptr;
368 unsigned NumSuccessors
;
369 unsigned NumHighLatencySuccessors
;
370 unsigned LastPosHighLatParentScheduled
;
373 SIBlockSchedCandidate() = default;
375 bool isValid() const { return Block
; }
377 // Copy the status of another candidate without changing policy.
378 void setBest(SIBlockSchedCandidate
&Best
) {
379 assert(Best
.Reason
!= NoCand
&& "uninitialized Sched candidate");
381 Reason
= Best
.Reason
;
382 IsHighLatency
= Best
.IsHighLatency
;
383 VGPRUsageDiff
= Best
.VGPRUsageDiff
;
384 NumSuccessors
= Best
.NumSuccessors
;
385 NumHighLatencySuccessors
= Best
.NumHighLatencySuccessors
;
386 LastPosHighLatParentScheduled
= Best
.LastPosHighLatParentScheduled
;
387 Height
= Best
.Height
;
391 bool tryCandidateLatency(SIBlockSchedCandidate
&Cand
,
392 SIBlockSchedCandidate
&TryCand
);
393 bool tryCandidateRegUsage(SIBlockSchedCandidate
&Cand
,
394 SIBlockSchedCandidate
&TryCand
);
395 SIScheduleBlock
*pickBlock();
397 void addLiveRegs(std::set
<unsigned> &Regs
);
398 void decreaseLiveRegs(SIScheduleBlock
*Block
, std::set
<unsigned> &Regs
);
399 void releaseBlockSuccs(SIScheduleBlock
*Parent
);
400 void blockScheduled(SIScheduleBlock
*Block
);
402 // Check register pressure change
403 // by scheduling a block with these LiveIn and LiveOut.
404 std::vector
<int> checkRegUsageImpact(std::set
<unsigned> &InRegs
,
405 std::set
<unsigned> &OutRegs
);
410 struct SIScheduleBlockResult
{
411 std::vector
<unsigned> SUs
;
412 unsigned MaxSGPRUsage
;
413 unsigned MaxVGPRUsage
;
417 SIScheduleDAGMI
*DAG
;
418 SIScheduleBlockCreator BlockCreator
;
421 SIScheduler(SIScheduleDAGMI
*DAG
) : DAG(DAG
), BlockCreator(DAG
) {}
423 ~SIScheduler() = default;
425 struct SIScheduleBlockResult
426 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant
,
427 SISchedulerBlockSchedulerVariant ScheduleVariant
);
430 class SIScheduleDAGMI final
: public ScheduleDAGMILive
{
431 const SIInstrInfo
*SITII
;
432 const SIRegisterInfo
*SITRI
;
434 std::vector
<SUnit
> SUnitsLinksBackup
;
436 // For moveLowLatencies. After all Scheduling variants are tested.
437 std::vector
<unsigned> ScheduledSUnits
;
438 std::vector
<unsigned> ScheduledSUnitsInv
;
444 SIScheduleDAGMI(MachineSchedContext
*C
);
446 ~SIScheduleDAGMI() override
;
448 // Entry point for the schedule.
449 void schedule() override
;
451 // To init Block's RPTracker.
452 void initRPTracker(RegPressureTracker
&RPTracker
) {
453 RPTracker
.init(&MF
, RegClassInfo
, LIS
, BB
, RegionBegin
, false, false);
456 MachineBasicBlock
*getBB() { return BB
; }
457 MachineBasicBlock::iterator
getCurrentTop() { return CurrentTop
; }
458 MachineBasicBlock::iterator
getCurrentBottom() { return CurrentBottom
; }
459 LiveIntervals
*getLIS() { return LIS
; }
460 MachineRegisterInfo
*getMRI() { return &MRI
; }
461 const TargetRegisterInfo
*getTRI() { return TRI
; }
462 ScheduleDAGTopologicalSort
*GetTopo() { return &Topo
; }
463 SUnit
& getEntrySU() { return EntrySU
; }
464 SUnit
& getExitSU() { return ExitSU
; }
466 void restoreSULinksLeft();
468 template<typename _Iterator
> void fillVgprSgprCost(_Iterator First
,
471 unsigned &SgprUsage
);
473 std::set
<unsigned> getInRegs() {
474 std::set
<unsigned> InRegs
;
475 for (const auto &RegMaskPair
: RPTracker
.getPressure().LiveInRegs
) {
476 InRegs
.insert(RegMaskPair
.RegUnit
);
481 std::set
<unsigned> getOutRegs() {
482 std::set
<unsigned> OutRegs
;
483 for (const auto &RegMaskPair
: RPTracker
.getPressure().LiveOutRegs
) {
484 OutRegs
.insert(RegMaskPair
.RegUnit
);
489 unsigned getVGPRSetID() const { return VGPRSetID
; }
490 unsigned getSGPRSetID() const { return SGPRSetID
; }
493 void topologicalSort();
494 // After scheduling is done, improve low latency placements.
495 void moveLowLatencies();
498 // Some stats for scheduling inside blocks.
499 std::vector
<unsigned> IsLowLatencySU
;
500 std::vector
<unsigned> LowLatencyOffset
;
501 std::vector
<unsigned> IsHighLatencySU
;
503 // Maps topological index to the node number.
504 std::vector
<int> TopDownIndex2SU
;
505 std::vector
<int> BottomUpIndex2SU
;
508 } // end namespace llvm
510 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H