1 //======----------- WindowScheduler.cpp - window scheduler -------------======//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // An implementation of the Window Scheduling software pipelining algorithm.
11 // The fundamental concept of the window scheduling algorithm involves folding
12 // the original MBB at a specific position, followed by list scheduling on the
13 // folded MIs. The optimal scheduling result is then chosen from various folding
14 // positions as the final scheduling outcome.
16 // The primary challenge in this algorithm lies in generating the folded MIs and
17 // establishing their dependencies. We have innovatively employed a new MBB,
18 // created by copying the original MBB three times, known as TripleMBB. This
19 // TripleMBB enables the convenient implementation of MI folding and dependency
20 // establishment. To facilitate the algorithm's implementation, we have also
21 // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
23 // Another challenge in the algorithm is the scheduling of phis. Semantically,
24 // it is difficult to place the phis in the window and perform list scheduling.
25 // Therefore, we schedule these phis separately after each list scheduling.
27 // The provided implementation is designed for use before the Register Allocator
28 // (RA). If the target requires implementation after RA, it is recommended to
29 // reimplement analyseII(), schedulePhi(), and expand(). Additionally,
30 // target-specific logic can be added in initialize(), preProcess(), and
33 // Lastly, it is worth mentioning that getSearchIndexes() is an important
34 // function. We have experimented with more complex heuristics on downstream
35 // target and achieved favorable results.
37 //===----------------------------------------------------------------------===//
38 #include "llvm/CodeGen/WindowScheduler.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/CodeGen/LiveIntervals.h"
41 #include "llvm/CodeGen/MachineLoopInfo.h"
42 #include "llvm/CodeGen/MachinePipeliner.h"
43 #include "llvm/CodeGen/ModuloSchedule.h"
44 #include "llvm/CodeGen/TargetPassConfig.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/TimeProfiler.h"
51 #define DEBUG_TYPE "pipeliner"
54 STATISTIC(NumTryWindowSchedule
,
55 "Number of loops that we attempt to use window scheduling");
56 STATISTIC(NumTryWindowSearch
,
57 "Number of times that we run list schedule in the window scheduling");
58 STATISTIC(NumWindowSchedule
,
59 "Number of loops that we successfully use window scheduling");
60 STATISTIC(NumFailAnalyseII
,
61 "Window scheduling abort due to the failure of the II analysis");
64 WindowSearchNum("window-search-num",
65 cl::desc("The number of searches per loop in the window "
66 "algorithm. 0 means no search number limit."),
67 cl::Hidden
, cl::init(6));
69 cl::opt
<unsigned> WindowSearchRatio(
70 "window-search-ratio",
71 cl::desc("The ratio of searches per loop in the window algorithm. 100 "
72 "means search all positions in the loop, while 0 means not "
73 "performing any search."),
74 cl::Hidden
, cl::init(40));
76 cl::opt
<unsigned> WindowIICoeff(
79 "The coefficient used when initializing II in the window algorithm."),
80 cl::Hidden
, cl::init(5));
82 cl::opt
<unsigned> WindowRegionLimit(
83 "window-region-limit",
85 "The lower limit of the scheduling region in the window algorithm."),
86 cl::Hidden
, cl::init(3));
88 cl::opt
<unsigned> WindowDiffLimit(
90 cl::desc("The lower limit of the difference between best II and base II in "
91 "the window algorithm. If the difference is smaller than "
92 "this lower limit, window scheduling will not be performed."),
93 cl::Hidden
, cl::init(2));
96 // WindowIILimit serves as an indicator of abnormal scheduling results and could
97 // potentially be referenced by the derived target window scheduler.
99 WindowIILimit("window-ii-limit",
100 cl::desc("The upper limit of II in the window algorithm."),
101 cl::Hidden
, cl::init(1000));
103 WindowScheduler::WindowScheduler(MachineSchedContext
*C
, MachineLoop
&ML
)
104 : Context(C
), MF(C
->MF
), MBB(ML
.getHeader()), Loop(ML
),
105 Subtarget(&MF
->getSubtarget()), TII(Subtarget
->getInstrInfo()),
106 TRI(Subtarget
->getRegisterInfo()), MRI(&MF
->getRegInfo()) {
107 TripleDAG
= std::unique_ptr
<ScheduleDAGInstrs
>(
108 createMachineScheduler(/*OnlyBuildGraph=*/true));
111 bool WindowScheduler::run() {
113 LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
116 // The window algorithm is time-consuming, and its compilation time should be
117 // taken into consideration.
118 TimeTraceScope
Scope("WindowSearch");
119 ++NumTryWindowSchedule
;
120 // Performing the relevant processing before window scheduling.
122 // The main window scheduling begins.
123 std::unique_ptr
<ScheduleDAGInstrs
> SchedDAG(createMachineScheduler());
124 auto SearchIndexes
= getSearchIndexes(WindowSearchNum
, WindowSearchRatio
);
125 for (unsigned Idx
: SearchIndexes
) {
127 ++NumTryWindowSearch
;
128 // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
130 unsigned Offset
= Idx
+ SchedPhiNum
;
131 auto Range
= getScheduleRange(Offset
, SchedInstrNum
);
132 SchedDAG
->startBlock(MBB
);
133 SchedDAG
->enterRegion(MBB
, Range
.begin(), Range
.end(), SchedInstrNum
);
134 SchedDAG
->schedule();
135 LLVM_DEBUG(SchedDAG
->dump());
136 unsigned II
= analyseII(*SchedDAG
, Offset
);
137 if (II
== WindowIILimit
) {
139 LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
143 schedulePhi(Offset
, II
);
144 updateScheduleResult(Offset
, II
);
146 LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset
<< " and II is "
149 // Performing the relevant processing after window scheduling.
151 // Check whether the scheduling result is valid.
152 if (!isScheduleValid()) {
153 LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
156 LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
157 << " and Best II is " << BestII
<< ".\n");
158 // Expand the scheduling result to prologue, kernel, and epilogue.
165 WindowScheduler::createMachineScheduler(bool OnlyBuildGraph
) {
166 return OnlyBuildGraph
168 Context
, std::make_unique
<PostGenericScheduler
>(Context
),
170 : Context
->PassConfig
->createMachineScheduler(Context
);
173 bool WindowScheduler::initialize() {
174 if (!Subtarget
->enableWindowScheduler()) {
175 LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
178 // Initialized the member variables used by window algorithm.
189 // List scheduling used in the window algorithm depends on LiveIntervals.
191 LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
194 // Check each MI in MBB.
195 SmallSet
<Register
, 8> PrevDefs
;
196 SmallSet
<Register
, 8> PrevUses
;
197 auto IsLoopCarried
= [&](MachineInstr
&Phi
) {
198 // Two cases are checked here: (1)The virtual register defined by the
199 // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
200 // virtual register defined by the succeeding phi.
201 if (PrevUses
.count(Phi
.getOperand(0).getReg()))
203 PrevDefs
.insert(Phi
.getOperand(0).getReg());
204 for (unsigned I
= 1, E
= Phi
.getNumOperands(); I
!= E
; I
+= 2) {
205 if (PrevDefs
.count(Phi
.getOperand(I
).getReg()))
207 PrevUses
.insert(Phi
.getOperand(I
).getReg());
211 auto PLI
= TII
->analyzeLoopForPipelining(MBB
);
212 for (auto &MI
: *MBB
) {
213 if (MI
.isMetaInstruction() || MI
.isTerminator())
216 if (IsLoopCarried(MI
)) {
217 LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
224 if (TII
->isSchedulingBoundary(MI
, MBB
, *MF
)) {
226 dbgs() << "Boundary MI is not allowed in window scheduling!\n");
229 if (PLI
->shouldIgnoreForPipelining(&MI
)) {
230 LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
231 "window scheduling!\n");
234 for (auto &Def
: MI
.all_defs())
235 if (Def
.isReg() && Def
.getReg().isPhysical()) {
236 LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
237 "window scheduling!\n");
241 if (SchedInstrNum
<= WindowRegionLimit
) {
242 LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
248 void WindowScheduler::preProcess() {
249 // Prior to window scheduling, it's necessary to backup the original MBB,
250 // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
253 TripleDAG
->startBlock(MBB
);
254 TripleDAG
->enterRegion(
255 MBB
, MBB
->begin(), MBB
->getFirstTerminator(),
256 std::distance(MBB
->begin(), MBB
->getFirstTerminator()));
257 TripleDAG
->buildSchedGraph(Context
->AA
);
260 void WindowScheduler::postProcess() {
261 // After window scheduling, it's necessary to clear the TripleDAG and restore
262 // to the original MBB.
263 TripleDAG
->exitRegion();
264 TripleDAG
->finishBlock();
268 void WindowScheduler::backupMBB() {
269 for (auto &MI
: MBB
->instrs())
270 OriMIs
.push_back(&MI
);
271 // Remove MIs and the corresponding live intervals.
272 for (auto &MI
: make_early_inc_range(*MBB
)) {
273 Context
->LIS
->getSlotIndexes()->removeMachineInstrFromMaps(MI
, true);
278 void WindowScheduler::restoreMBB() {
279 // Erase MIs and the corresponding live intervals.
280 for (auto &MI
: make_early_inc_range(*MBB
)) {
281 Context
->LIS
->getSlotIndexes()->removeMachineInstrFromMaps(MI
, true);
282 MI
.eraseFromParent();
284 // Restore MBB to the state before window scheduling.
285 for (auto *MI
: OriMIs
)
287 updateLiveIntervals();
290 void WindowScheduler::generateTripleMBB() {
291 const unsigned DuplicateNum
= 3;
294 assert(OriMIs
.size() > 0 && "The Original MIs were not backed up!");
295 // Step 1: Performing the first copy of MBB instructions, excluding
296 // terminators. At the same time, we back up the anti-register of phis.
297 // DefPairs hold the old and new define register pairs.
298 DenseMap
<Register
, Register
> DefPairs
;
299 for (auto *MI
: OriMIs
) {
300 if (MI
->isMetaInstruction() || MI
->isTerminator())
303 if (Register AntiReg
= getAntiRegister(MI
))
304 DefPairs
[MI
->getOperand(0).getReg()] = AntiReg
;
305 auto *NewMI
= MF
->CloneMachineInstr(MI
);
306 MBB
->push_back(NewMI
);
307 TriMIs
.push_back(NewMI
);
308 TriToOri
[NewMI
] = MI
;
310 // Step 2: Performing the remaining two copies of MBB instructions excluding
311 // phis, and the last one contains terminators. At the same time, registers
312 // are updated accordingly.
313 for (size_t Cnt
= 1; Cnt
< DuplicateNum
; ++Cnt
) {
314 for (auto *MI
: OriMIs
) {
315 if (MI
->isPHI() || MI
->isMetaInstruction() ||
316 (MI
->isTerminator() && Cnt
< DuplicateNum
- 1))
318 auto *NewMI
= MF
->CloneMachineInstr(MI
);
319 DenseMap
<Register
, Register
> NewDefs
;
320 // New defines are updated.
321 for (auto MO
: NewMI
->all_defs())
322 if (MO
.isReg() && MO
.getReg().isVirtual()) {
324 MRI
->createVirtualRegister(MRI
->getRegClass(MO
.getReg()));
325 NewMI
->substituteRegister(MO
.getReg(), NewDef
, 0, *TRI
);
326 NewDefs
[MO
.getReg()] = NewDef
;
328 // New uses are updated.
329 for (auto DefRegPair
: DefPairs
)
330 if (NewMI
->readsRegister(DefRegPair
.first
, TRI
)) {
331 Register NewUse
= DefRegPair
.second
;
332 // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
335 // ==================================
336 // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7)
338 // ==================================
340 // %4 = sub i32 %1, %3
342 // %7 = add i32 %5, %6
344 // ----------------------------------
346 // %8 = sub i32 %7, %3 (%1,%7),(%4,%8)
348 // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9)
350 // ----------------------------------
352 // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9)
354 // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11)
356 // ==================================
358 // ==================================
359 if (DefPairs
.count(NewUse
))
360 NewUse
= DefPairs
[NewUse
];
361 NewMI
->substituteRegister(DefRegPair
.first
, NewUse
, 0, *TRI
);
363 // DefPairs is updated at last.
364 for (auto &NewDef
: NewDefs
)
365 DefPairs
[NewDef
.first
] = NewDef
.second
;
366 MBB
->push_back(NewMI
);
367 TriMIs
.push_back(NewMI
);
368 TriToOri
[NewMI
] = MI
;
371 // Step 3: The registers used by phis are updated, and they are generated in
372 // the third copy of MBB.
373 // In the privious example, the old phi is:
374 // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
376 // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
377 for (auto &Phi
: MBB
->phis()) {
378 for (auto DefRegPair
: DefPairs
)
379 if (Phi
.readsRegister(DefRegPair
.first
, TRI
))
380 Phi
.substituteRegister(DefRegPair
.first
, DefRegPair
.second
, 0, *TRI
);
382 updateLiveIntervals();
385 void WindowScheduler::restoreTripleMBB() {
386 // After list scheduling, the MBB is restored in one traversal.
387 for (size_t I
= 0; I
< TriMIs
.size(); ++I
) {
388 auto *MI
= TriMIs
[I
];
389 auto OldPos
= MBB
->begin();
390 std::advance(OldPos
, I
);
391 auto CurPos
= MI
->getIterator();
392 if (CurPos
!= OldPos
) {
393 MBB
->splice(OldPos
, MBB
, CurPos
);
394 Context
->LIS
->handleMove(*MI
, /*UpdateFlags=*/false);
399 SmallVector
<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum
,
400 unsigned SearchRatio
) {
401 // We use SearchRatio to get the index range, and then evenly get the indexes
402 // according to the SearchNum. This is a simple huristic. Depending on the
403 // characteristics of the target, more complex algorithms can be used for both
404 // performance and compilation time.
405 assert(SearchRatio
<= 100 && "SearchRatio should be equal or less than 100!");
406 unsigned MaxIdx
= SchedInstrNum
* SearchRatio
/ 100;
407 unsigned Step
= SearchNum
> 0 && SearchNum
<= MaxIdx
? MaxIdx
/ SearchNum
: 1;
408 SmallVector
<unsigned> SearchIndexes
;
409 for (unsigned Idx
= 0; Idx
< MaxIdx
; Idx
+= Step
)
410 SearchIndexes
.push_back(Idx
);
411 return SearchIndexes
;
414 int WindowScheduler::getEstimatedII(ScheduleDAGInstrs
&DAG
) {
415 // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
416 unsigned MaxDepth
= 1;
417 for (auto &SU
: DAG
.SUnits
)
418 MaxDepth
= std::max(SU
.getDepth() + SU
.Latency
, MaxDepth
);
419 return MaxDepth
* WindowIICoeff
;
422 int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs
&DAG
,
424 int InitII
= getEstimatedII(DAG
);
425 ResourceManager
RM(Subtarget
, &DAG
);
427 // ResourceManager and DAG are used to calculate the maximum cycle for the
428 // scheduled MIs. Since MIs in the Region have already been scheduled, the
429 // emit cycles can be estimated in order here.
431 auto Range
= getScheduleRange(Offset
, SchedInstrNum
);
432 for (auto &MI
: Range
) {
433 auto *SU
= DAG
.getSUnit(&MI
);
434 int ExpectCycle
= CurCycle
;
435 // The predecessors of current MI determine its earliest issue cycle.
436 for (auto &Pred
: SU
->Preds
) {
439 auto *PredMI
= Pred
.getSUnit()->getInstr();
440 int PredCycle
= getOriCycle(PredMI
);
441 ExpectCycle
= std::max(ExpectCycle
, PredCycle
+ (int)Pred
.getLatency());
443 // Zero cost instructions do not need to check resource.
444 if (!TII
->isZeroCost(MI
.getOpcode())) {
445 // ResourceManager can be used to detect resource conflicts between the
446 // current MI and the previously inserted MIs.
447 while (!RM
.canReserveResources(*SU
, CurCycle
) || CurCycle
< ExpectCycle
) {
449 if (CurCycle
== (int)WindowIILimit
)
452 RM
.reserveResources(*SU
, CurCycle
);
454 OriToCycle
[getOriMI(&MI
)] = CurCycle
;
455 LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle
<< " [S."
456 << getOriStage(getOriMI(&MI
), Offset
) << "]: " << MI
);
458 LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle
<< ".\n");
462 // By utilizing TripleDAG, we can easily establish dependencies between A and B.
463 // Based on the MaxCycle and the issue cycle of A and B, we can determine
464 // whether it is necessary to add a stall cycle. This is because, without
465 // inserting the stall cycle, the latency constraint between A and B cannot be
466 // satisfied. The details are as follows:
469 // ========================================
471 // ======================================== (sliding direction)
475 // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window-----
477 // ===================V==================== |
478 // MBB copy 2 < MI B > |
481 // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------
483 // ===================V====================
484 // MBB copy 3 < MI B'>
489 // ========================================
491 // ========================================
492 int WindowScheduler::calculateStallCycle(unsigned Offset
, int MaxCycle
) {
493 int MaxStallCycle
= 0;
494 int CurrentII
= MaxCycle
+ 1;
495 auto Range
= getScheduleRange(Offset
, SchedInstrNum
);
496 for (auto &MI
: Range
) {
497 auto *SU
= TripleDAG
->getSUnit(&MI
);
498 int DefCycle
= getOriCycle(&MI
);
499 for (auto &Succ
: SU
->Succs
) {
500 if (Succ
.isWeak() || Succ
.getSUnit() == &TripleDAG
->ExitSU
)
502 // If the expected cycle does not exceed CurrentII, no check is needed.
503 if (DefCycle
+ (int)Succ
.getLatency() <= CurrentII
)
505 // If the cycle of the scheduled MI A is less than that of the scheduled
506 // MI B, the scheduling will fail because the lifetime of the
507 // corresponding register exceeds II.
508 auto *SuccMI
= Succ
.getSUnit()->getInstr();
509 int UseCycle
= getOriCycle(SuccMI
);
510 if (DefCycle
< UseCycle
)
511 return WindowIILimit
;
512 // Get the stall cycle introduced by the register between two trips.
513 int StallCycle
= DefCycle
+ (int)Succ
.getLatency() - CurrentII
- UseCycle
;
514 MaxStallCycle
= std::max(MaxStallCycle
, StallCycle
);
517 LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle
<< ".\n");
518 return MaxStallCycle
;
521 unsigned WindowScheduler::analyseII(ScheduleDAGInstrs
&DAG
, unsigned Offset
) {
522 LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
523 int MaxCycle
= calculateMaxCycle(DAG
, Offset
);
524 if (MaxCycle
== (int)WindowIILimit
)
526 int StallCycle
= calculateStallCycle(Offset
, MaxCycle
);
527 if (StallCycle
== (int)WindowIILimit
)
529 // The value of II is equal to the maximum execution cycle plus 1.
530 return MaxCycle
+ StallCycle
+ 1;
533 void WindowScheduler::schedulePhi(int Offset
, unsigned &II
) {
534 LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
535 for (auto &Phi
: MBB
->phis()) {
536 int LateCycle
= INT_MAX
;
537 auto *SU
= TripleDAG
->getSUnit(&Phi
);
538 for (auto &Succ
: SU
->Succs
) {
539 // Phi doesn't have any Anti successors.
540 if (Succ
.getKind() != SDep::Data
)
542 // Phi is scheduled before the successor of stage 0. The issue cycle of
543 // phi is the latest cycle in this interval.
544 auto *SuccMI
= Succ
.getSUnit()->getInstr();
545 int Cycle
= getOriCycle(SuccMI
);
546 if (getOriStage(getOriMI(SuccMI
), Offset
) == 0)
547 LateCycle
= std::min(LateCycle
, Cycle
);
549 // The anti-dependency of phi need to be handled separately in the same way.
550 if (Register AntiReg
= getAntiRegister(&Phi
)) {
551 auto *AntiMI
= MRI
->getVRegDef(AntiReg
);
552 // AntiReg may be defined outside the kernel MBB.
553 if (AntiMI
->getParent() == MBB
) {
554 auto AntiCycle
= getOriCycle(AntiMI
);
555 if (getOriStage(getOriMI(AntiMI
), Offset
) == 0)
556 LateCycle
= std::min(LateCycle
, AntiCycle
);
559 // If there is no limit to the late cycle, a default value is given.
560 if (LateCycle
== INT_MAX
)
561 LateCycle
= (int)(II
- 1);
562 LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle
<< "] " << Phi
);
563 // The issue cycle of phi is set to the latest cycle in the interval.
564 auto *OriPhi
= getOriMI(&Phi
);
565 OriToCycle
[OriPhi
] = LateCycle
;
569 DenseMap
<MachineInstr
*, int> WindowScheduler::getIssueOrder(unsigned Offset
,
571 // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
572 // way is to put phi at the beginning of the current cycle.
573 DenseMap
<int, SmallVector
<MachineInstr
*>> CycleToMIs
;
574 auto Range
= getScheduleRange(Offset
, SchedInstrNum
);
575 for (auto &Phi
: MBB
->phis())
576 CycleToMIs
[getOriCycle(&Phi
)].push_back(getOriMI(&Phi
));
577 for (auto &MI
: Range
)
578 CycleToMIs
[getOriCycle(&MI
)].push_back(getOriMI(&MI
));
579 // Each MI is assigned a separate ordered Id, which is used as a sort marker
580 // in the following expand process.
581 DenseMap
<MachineInstr
*, int> IssueOrder
;
583 for (int Cycle
= 0; Cycle
< (int)II
; ++Cycle
) {
584 if (!CycleToMIs
.count(Cycle
))
586 for (auto *MI
: CycleToMIs
[Cycle
])
587 IssueOrder
[MI
] = Id
++;
592 void WindowScheduler::updateScheduleResult(unsigned Offset
, unsigned II
) {
593 // At the first update, Offset is equal to SchedPhiNum. At this time, only
594 // BestII, BestOffset, and BaseII need to be updated.
595 if (Offset
== SchedPhiNum
) {
597 BestOffset
= SchedPhiNum
;
601 // The update will only continue if the II is smaller than BestII and the II
602 // is sufficiently small.
603 if ((II
>= BestII
) || (II
+ WindowDiffLimit
> BaseII
))
607 // Record the result of the current list scheduling, noting that each MI is
608 // stored unordered in SchedResult.
610 auto IssueOrder
= getIssueOrder(Offset
, II
);
611 for (auto &Pair
: OriToCycle
) {
612 assert(IssueOrder
.count(Pair
.first
) && "Cannot find original MI!");
613 SchedResult
.push_back(std::make_tuple(Pair
.first
, Pair
.second
,
614 getOriStage(Pair
.first
, Offset
),
615 IssueOrder
[Pair
.first
]));
619 void WindowScheduler::expand() {
620 // The MIs in the SchedResult are sorted by the issue order ID.
621 llvm::stable_sort(SchedResult
,
622 [](const std::tuple
<MachineInstr
*, int, int, int> &A
,
623 const std::tuple
<MachineInstr
*, int, int, int> &B
) {
624 return std::get
<3>(A
) < std::get
<3>(B
);
626 // Use the scheduling infrastructure for expansion, noting that InstrChanges
627 // is not supported here.
628 DenseMap
<MachineInstr
*, int> Cycles
, Stages
;
629 std::vector
<MachineInstr
*> OrderedInsts
;
630 for (auto &Info
: SchedResult
) {
631 auto *MI
= std::get
<0>(Info
);
632 OrderedInsts
.push_back(MI
);
633 Cycles
[MI
] = std::get
<1>(Info
);
634 Stages
[MI
] = std::get
<2>(Info
);
635 LLVM_DEBUG(dbgs() << "\tCycle " << Cycles
[MI
] << " [S." << Stages
[MI
]
638 ModuloSchedule
MS(*MF
, &Loop
, std::move(OrderedInsts
), std::move(Cycles
),
640 ModuloScheduleExpander
MSE(*MF
, MS
, *Context
->LIS
,
641 ModuloScheduleExpander::InstrChangesTy());
646 void WindowScheduler::updateLiveIntervals() {
647 SmallVector
<Register
, 128> UsedRegs
;
648 for (MachineInstr
&MI
: *MBB
)
649 for (const MachineOperand
&MO
: MI
.operands()) {
650 if (!MO
.isReg() || MO
.getReg() == 0)
652 Register Reg
= MO
.getReg();
653 if (!is_contained(UsedRegs
, Reg
))
654 UsedRegs
.push_back(Reg
);
656 Context
->LIS
->repairIntervalsInRange(MBB
, MBB
->begin(), MBB
->end(), UsedRegs
);
659 iterator_range
<MachineBasicBlock::iterator
>
660 WindowScheduler::getScheduleRange(unsigned Offset
, unsigned Num
) {
661 auto RegionBegin
= MBB
->begin();
662 std::advance(RegionBegin
, Offset
);
663 auto RegionEnd
= RegionBegin
;
664 std::advance(RegionEnd
, Num
);
665 return make_range(RegionBegin
, RegionEnd
);
668 int WindowScheduler::getOriCycle(MachineInstr
*NewMI
) {
669 assert(TriToOri
.count(NewMI
) && "Cannot find original MI!");
670 auto *OriMI
= TriToOri
[NewMI
];
671 assert(OriToCycle
.count(OriMI
) && "Cannot find schedule cycle!");
672 return OriToCycle
[OriMI
];
675 MachineInstr
*WindowScheduler::getOriMI(MachineInstr
*NewMI
) {
676 assert(TriToOri
.count(NewMI
) && "Cannot find original MI!");
677 return TriToOri
[NewMI
];
680 unsigned WindowScheduler::getOriStage(MachineInstr
*OriMI
, unsigned Offset
) {
681 assert(llvm::find(OriMIs
, OriMI
) != OriMIs
.end() &&
682 "Cannot find OriMI in OriMIs!");
683 // If there is no instruction fold, all MI stages are 0.
684 if (Offset
== SchedPhiNum
)
686 // For those MIs with an ID less than the Offset, their stages are set to 0,
687 // while the rest are set to 1.
689 for (auto *MI
: OriMIs
) {
690 if (MI
->isMetaInstruction())
696 return Id
>= (size_t)Offset
? 1 : 0;
699 Register
WindowScheduler::getAntiRegister(MachineInstr
*Phi
) {
700 assert(Phi
->isPHI() && "Expecting PHI!");
702 for (auto MO
: Phi
->uses()) {
704 AntiReg
= MO
.getReg();
705 else if (MO
.isMBB() && MO
.getMBB() == MBB
)