1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/ModuloSchedule.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/Analysis/MemoryLocation.h"
12 #include "llvm/CodeGen/LiveIntervals.h"
13 #include "llvm/CodeGen/MachineInstrBuilder.h"
14 #include "llvm/CodeGen/MachineLoopInfo.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/InitializePasses.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
22 #define DEBUG_TYPE "pipeliner"
25 void ModuloSchedule::print(raw_ostream
&OS
) {
26 for (MachineInstr
*MI
: ScheduledInstrs
)
27 OS
<< "[stage " << getStage(MI
) << " @" << getCycle(MI
) << "c] " << *MI
;
30 //===----------------------------------------------------------------------===//
31 // ModuloScheduleExpander implementation
32 //===----------------------------------------------------------------------===//
34 /// Return the register values for the operands of a Phi instruction.
35 /// This function assume the instruction is a Phi.
36 static void getPhiRegs(MachineInstr
&Phi
, MachineBasicBlock
*Loop
,
37 unsigned &InitVal
, unsigned &LoopVal
) {
38 assert(Phi
.isPHI() && "Expecting a Phi.");
42 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
43 if (Phi
.getOperand(i
+ 1).getMBB() != Loop
)
44 InitVal
= Phi
.getOperand(i
).getReg();
46 LoopVal
= Phi
.getOperand(i
).getReg();
48 assert(InitVal
!= 0 && LoopVal
!= 0 && "Unexpected Phi structure.");
51 /// Return the Phi register value that comes from the incoming block.
52 static unsigned getInitPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
53 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
54 if (Phi
.getOperand(i
+ 1).getMBB() != LoopBB
)
55 return Phi
.getOperand(i
).getReg();
59 /// Return the Phi register value that comes the loop block.
60 static unsigned getLoopPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
61 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
62 if (Phi
.getOperand(i
+ 1).getMBB() == LoopBB
)
63 return Phi
.getOperand(i
).getReg();
67 void ModuloScheduleExpander::expand() {
68 BB
= Schedule
.getLoop()->getTopBlock();
69 Preheader
= *BB
->pred_begin();
71 Preheader
= *std::next(BB
->pred_begin());
73 // Iterate over the definitions in each instruction, and compute the
74 // stage difference for each use. Keep the maximum value.
75 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
76 int DefStage
= Schedule
.getStage(MI
);
77 for (const MachineOperand
&Op
: MI
->all_defs()) {
78 Register Reg
= Op
.getReg();
80 bool PhiIsSwapped
= false;
81 for (MachineOperand
&UseOp
: MRI
.use_operands(Reg
)) {
82 MachineInstr
*UseMI
= UseOp
.getParent();
83 int UseStage
= Schedule
.getStage(UseMI
);
85 if (UseStage
!= -1 && UseStage
>= DefStage
)
86 Diff
= UseStage
- DefStage
;
88 if (isLoopCarried(*MI
))
93 MaxDiff
= std::max(Diff
, MaxDiff
);
95 RegToStageDiff
[Reg
] = std::make_pair(MaxDiff
, PhiIsSwapped
);
99 generatePipelinedLoop();
102 void ModuloScheduleExpander::generatePipelinedLoop() {
103 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
104 assert(LoopInfo
&& "Must be able to analyze loop!");
106 // Create a new basic block for the kernel and add it to the CFG.
107 MachineBasicBlock
*KernelBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
109 unsigned MaxStageCount
= Schedule
.getNumStages() - 1;
111 // Remember the registers that are used in different stages. The index is
112 // the iteration, or stage, that the instruction is scheduled in. This is
113 // a map between register names in the original block and the names created
114 // in each stage of the pipelined loop.
115 ValueMapTy
*VRMap
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
117 // The renaming destination by Phis for the registers across stages.
118 // This map is updated during Phis generation to point to the most recent
119 // renaming destination.
120 ValueMapTy
*VRMapPhi
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
124 SmallVector
<MachineBasicBlock
*, 4> PrologBBs
;
126 // Generate the prolog instructions that set up the pipeline.
127 generateProlog(MaxStageCount
, KernelBB
, VRMap
, PrologBBs
);
128 MF
.insert(BB
->getIterator(), KernelBB
);
130 // Rearrange the instructions to generate the new, pipelined loop,
131 // and update register names as needed.
132 for (MachineInstr
*CI
: Schedule
.getInstructions()) {
135 unsigned StageNum
= Schedule
.getStage(CI
);
136 MachineInstr
*NewMI
= cloneInstr(CI
, MaxStageCount
, StageNum
);
137 updateInstruction(NewMI
, false, MaxStageCount
, StageNum
, VRMap
);
138 KernelBB
->push_back(NewMI
);
139 InstrMap
[NewMI
] = CI
;
142 // Copy any terminator instructions to the new kernel, and update
144 for (MachineInstr
&MI
: BB
->terminators()) {
145 MachineInstr
*NewMI
= MF
.CloneMachineInstr(&MI
);
146 updateInstruction(NewMI
, false, MaxStageCount
, 0, VRMap
);
147 KernelBB
->push_back(NewMI
);
148 InstrMap
[NewMI
] = &MI
;
151 NewKernel
= KernelBB
;
152 KernelBB
->transferSuccessors(BB
);
153 KernelBB
->replaceSuccessor(BB
, KernelBB
);
155 generateExistingPhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
,
156 InstrMap
, MaxStageCount
, MaxStageCount
, false);
157 generatePhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
, VRMapPhi
,
158 InstrMap
, MaxStageCount
, MaxStageCount
, false);
160 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB
->dump(););
162 SmallVector
<MachineBasicBlock
*, 4> EpilogBBs
;
163 // Generate the epilog instructions to complete the pipeline.
164 generateEpilog(MaxStageCount
, KernelBB
, BB
, VRMap
, VRMapPhi
, EpilogBBs
,
167 // We need this step because the register allocation doesn't handle some
168 // situations well, so we insert copies to help out.
169 splitLifetimes(KernelBB
, EpilogBBs
);
171 // Remove dead instructions due to loop induction variables.
172 removeDeadInstructions(KernelBB
, EpilogBBs
);
174 // Add branches between prolog and epilog blocks.
175 addBranches(*Preheader
, PrologBBs
, KernelBB
, EpilogBBs
, VRMap
);
181 void ModuloScheduleExpander::cleanup() {
182 // Remove the original loop since it's no longer referenced.
184 LIS
.RemoveMachineInstrFromMaps(I
);
186 BB
->eraseFromParent();
189 /// Generate the pipeline prolog code.
190 void ModuloScheduleExpander::generateProlog(unsigned LastStage
,
191 MachineBasicBlock
*KernelBB
,
193 MBBVectorTy
&PrologBBs
) {
194 MachineBasicBlock
*PredBB
= Preheader
;
197 // Generate a basic block for each stage, not including the last stage,
198 // which will be generated in the kernel. Each basic block may contain
199 // instructions from multiple stages/iterations.
200 for (unsigned i
= 0; i
< LastStage
; ++i
) {
201 // Create and insert the prolog basic block prior to the original loop
202 // basic block. The original loop is removed later.
203 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
204 PrologBBs
.push_back(NewBB
);
205 MF
.insert(BB
->getIterator(), NewBB
);
206 NewBB
->transferSuccessors(PredBB
);
207 PredBB
->addSuccessor(NewBB
);
210 // Generate instructions for each appropriate stage. Process instructions
211 // in original program order.
212 for (int StageNum
= i
; StageNum
>= 0; --StageNum
) {
213 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
214 BBE
= BB
->getFirstTerminator();
216 if (Schedule
.getStage(&*BBI
) == StageNum
) {
219 MachineInstr
*NewMI
=
220 cloneAndChangeInstr(&*BBI
, i
, (unsigned)StageNum
);
221 updateInstruction(NewMI
, false, i
, (unsigned)StageNum
, VRMap
);
222 NewBB
->push_back(NewMI
);
223 InstrMap
[NewMI
] = &*BBI
;
227 rewritePhiValues(NewBB
, i
, VRMap
, InstrMap
);
229 dbgs() << "prolog:\n";
234 PredBB
->replaceSuccessor(BB
, KernelBB
);
236 // Check if we need to remove the branch from the preheader to the original
237 // loop, and replace it with a branch to the new loop.
238 unsigned numBranches
= TII
->removeBranch(*Preheader
);
240 SmallVector
<MachineOperand
, 0> Cond
;
241 TII
->insertBranch(*Preheader
, PrologBBs
[0], nullptr, Cond
, DebugLoc());
245 /// Generate the pipeline epilog code. The epilog code finishes the iterations
246 /// that were started in either the prolog or the kernel. We create a basic
247 /// block for each stage that needs to complete.
248 void ModuloScheduleExpander::generateEpilog(
249 unsigned LastStage
, MachineBasicBlock
*KernelBB
, MachineBasicBlock
*OrigBB
,
250 ValueMapTy
*VRMap
, ValueMapTy
*VRMapPhi
, MBBVectorTy
&EpilogBBs
,
251 MBBVectorTy
&PrologBBs
) {
252 // We need to change the branch from the kernel to the first epilog block, so
253 // this call to analyze branch uses the kernel rather than the original BB.
254 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
255 SmallVector
<MachineOperand
, 4> Cond
;
256 bool checkBranch
= TII
->analyzeBranch(*KernelBB
, TBB
, FBB
, Cond
);
257 assert(!checkBranch
&& "generateEpilog must be able to analyze the branch");
261 MachineBasicBlock::succ_iterator LoopExitI
= KernelBB
->succ_begin();
262 if (*LoopExitI
== KernelBB
)
264 assert(LoopExitI
!= KernelBB
->succ_end() && "Expecting a successor");
265 MachineBasicBlock
*LoopExitBB
= *LoopExitI
;
267 MachineBasicBlock
*PredBB
= KernelBB
;
268 MachineBasicBlock
*EpilogStart
= LoopExitBB
;
271 // Generate a basic block for each stage, not including the last stage,
272 // which was generated for the kernel. Each basic block may contain
273 // instructions from multiple stages/iterations.
274 int EpilogStage
= LastStage
+ 1;
275 for (unsigned i
= LastStage
; i
>= 1; --i
, ++EpilogStage
) {
276 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock();
277 EpilogBBs
.push_back(NewBB
);
278 MF
.insert(BB
->getIterator(), NewBB
);
280 PredBB
->replaceSuccessor(LoopExitBB
, NewBB
);
281 NewBB
->addSuccessor(LoopExitBB
);
283 if (EpilogStart
== LoopExitBB
)
286 // Add instructions to the epilog depending on the current block.
287 // Process instructions in original program order.
288 for (unsigned StageNum
= i
; StageNum
<= LastStage
; ++StageNum
) {
289 for (auto &BBI
: *BB
) {
292 MachineInstr
*In
= &BBI
;
293 if ((unsigned)Schedule
.getStage(In
) == StageNum
) {
294 // Instructions with memoperands in the epilog are updated with
295 // conservative values.
296 MachineInstr
*NewMI
= cloneInstr(In
, UINT_MAX
, 0);
297 updateInstruction(NewMI
, i
== 1, EpilogStage
, 0, VRMap
);
298 NewBB
->push_back(NewMI
);
299 InstrMap
[NewMI
] = In
;
303 generateExistingPhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
,
304 InstrMap
, LastStage
, EpilogStage
, i
== 1);
305 generatePhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
, VRMapPhi
,
306 InstrMap
, LastStage
, EpilogStage
, i
== 1);
310 dbgs() << "epilog:\n";
315 // Fix any Phi nodes in the loop exit block.
316 LoopExitBB
->replacePhiUsesWith(BB
, PredBB
);
318 // Create a branch to the new epilog from the kernel.
319 // Remove the original branch and add a new branch to the epilog.
320 TII
->removeBranch(*KernelBB
);
321 assert((OrigBB
== TBB
|| OrigBB
== FBB
) &&
322 "Unable to determine looping branch direction");
324 TII
->insertBranch(*KernelBB
, EpilogStart
, KernelBB
, Cond
, DebugLoc());
326 TII
->insertBranch(*KernelBB
, KernelBB
, EpilogStart
, Cond
, DebugLoc());
327 // Add a branch to the loop exit.
328 if (EpilogBBs
.size() > 0) {
329 MachineBasicBlock
*LastEpilogBB
= EpilogBBs
.back();
330 SmallVector
<MachineOperand
, 4> Cond1
;
331 TII
->insertBranch(*LastEpilogBB
, LoopExitBB
, nullptr, Cond1
, DebugLoc());
335 /// Replace all uses of FromReg that appear outside the specified
336 /// basic block with ToReg.
337 static void replaceRegUsesAfterLoop(unsigned FromReg
, unsigned ToReg
,
338 MachineBasicBlock
*MBB
,
339 MachineRegisterInfo
&MRI
,
340 LiveIntervals
&LIS
) {
341 for (MachineOperand
&O
:
342 llvm::make_early_inc_range(MRI
.use_operands(FromReg
)))
343 if (O
.getParent()->getParent() != MBB
)
345 if (!LIS
.hasInterval(ToReg
))
346 LIS
.createEmptyInterval(ToReg
);
349 /// Return true if the register has a use that occurs outside the
351 static bool hasUseAfterLoop(unsigned Reg
, MachineBasicBlock
*BB
,
352 MachineRegisterInfo
&MRI
) {
353 for (const MachineOperand
&MO
: MRI
.use_operands(Reg
))
354 if (MO
.getParent()->getParent() != BB
)
359 /// Generate Phis for the specific block in the generated pipelined code.
360 /// This function looks at the Phis from the original code to guide the
361 /// creation of new Phis.
362 void ModuloScheduleExpander::generateExistingPhis(
363 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
364 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
365 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
) {
366 // Compute the stage number for the initial value of the Phi, which
367 // comes from the prolog. The prolog to use depends on to which kernel/
368 // epilog that we're adding the Phi.
369 unsigned PrologStage
= 0;
370 unsigned PrevStage
= 0;
371 bool InKernel
= (LastStageNum
== CurStageNum
);
373 PrologStage
= LastStageNum
- 1;
374 PrevStage
= CurStageNum
;
376 PrologStage
= LastStageNum
- (CurStageNum
- LastStageNum
);
377 PrevStage
= LastStageNum
+ (CurStageNum
- LastStageNum
) - 1;
380 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
381 BBE
= BB
->getFirstNonPHI();
383 Register Def
= BBI
->getOperand(0).getReg();
385 unsigned InitVal
= 0;
386 unsigned LoopVal
= 0;
387 getPhiRegs(*BBI
, BB
, InitVal
, LoopVal
);
390 // The Phi value from the loop body typically is defined in the loop, but
391 // not always. So, we need to check if the value is defined in the loop.
392 unsigned PhiOp2
= LoopVal
;
393 if (VRMap
[LastStageNum
].count(LoopVal
))
394 PhiOp2
= VRMap
[LastStageNum
][LoopVal
];
396 int StageScheduled
= Schedule
.getStage(&*BBI
);
397 int LoopValStage
= Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
398 unsigned NumStages
= getStagesForReg(Def
, CurStageNum
);
399 if (NumStages
== 0) {
400 // We don't need to generate a Phi anymore, but we need to rename any uses
402 unsigned NewReg
= VRMap
[PrevStage
][LoopVal
];
403 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, 0, &*BBI
, Def
,
405 if (VRMap
[CurStageNum
].count(LoopVal
))
406 VRMap
[CurStageNum
][Def
] = VRMap
[CurStageNum
][LoopVal
];
408 // Adjust the number of Phis needed depending on the number of prologs left,
409 // and the distance from where the Phi is first scheduled. The number of
410 // Phis cannot exceed the number of prolog stages. Each stage can
411 // potentially define two values.
412 unsigned MaxPhis
= PrologStage
+ 2;
413 if (!InKernel
&& (int)PrologStage
<= LoopValStage
)
414 MaxPhis
= std::max((int)MaxPhis
- (int)LoopValStage
, 1);
415 unsigned NumPhis
= std::min(NumStages
, MaxPhis
);
418 unsigned AccessStage
= (LoopValStage
!= -1) ? LoopValStage
: StageScheduled
;
419 // In the epilog, we may need to look back one stage to get the correct
420 // Phi name, because the epilog and prolog blocks execute the same stage.
421 // The correct name is from the previous block only when the Phi has
422 // been completely scheduled prior to the epilog, and Phi value is not
423 // needed in multiple stages.
425 if (!InKernel
&& StageScheduled
>= LoopValStage
&& AccessStage
== 0 &&
428 // Adjust the computations below when the phi and the loop definition
429 // are scheduled in different stages.
430 if (InKernel
&& LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
431 StageDiff
= StageScheduled
- LoopValStage
;
432 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
433 // If the Phi hasn't been scheduled, then use the initial Phi operand
434 // value. Otherwise, use the scheduled version of the instruction. This
435 // is a little complicated when a Phi references another Phi.
436 if (np
> PrologStage
|| StageScheduled
>= (int)LastStageNum
)
438 // Check if the Phi has already been scheduled in a prolog stage.
439 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
&&
440 VRMap
[PrologStage
- StageDiff
- np
].count(LoopVal
) != 0)
441 PhiOp1
= VRMap
[PrologStage
- StageDiff
- np
][LoopVal
];
442 // Check if the Phi has already been scheduled, but the loop instruction
443 // is either another Phi, or doesn't occur in the loop.
444 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
) {
445 // If the Phi references another Phi, we need to examine the other
446 // Phi to get the correct value.
448 MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
);
450 while (InstOp1
&& InstOp1
->isPHI() && InstOp1
->getParent() == BB
) {
451 int PhiStage
= Schedule
.getStage(InstOp1
);
452 if ((int)(PrologStage
- StageDiff
- np
) < PhiStage
+ Indirects
)
453 PhiOp1
= getInitPhiReg(*InstOp1
, BB
);
455 PhiOp1
= getLoopPhiReg(*InstOp1
, BB
);
456 InstOp1
= MRI
.getVRegDef(PhiOp1
);
457 int PhiOpStage
= Schedule
.getStage(InstOp1
);
458 int StageAdj
= (PhiOpStage
!= -1 ? PhiStage
- PhiOpStage
: 0);
459 if (PhiOpStage
!= -1 && PrologStage
- StageAdj
>= Indirects
+ np
&&
460 VRMap
[PrologStage
- StageAdj
- Indirects
- np
].count(PhiOp1
)) {
461 PhiOp1
= VRMap
[PrologStage
- StageAdj
- Indirects
- np
][PhiOp1
];
468 // If this references a generated Phi in the kernel, get the Phi operand
469 // from the incoming block.
470 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
))
471 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
472 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
474 MachineInstr
*PhiInst
= MRI
.getVRegDef(LoopVal
);
475 bool LoopDefIsPhi
= PhiInst
&& PhiInst
->isPHI();
476 // In the epilog, a map lookup is needed to get the value from the kernel,
477 // or previous epilog block. How is does this depends on if the
478 // instruction is scheduled in the previous block.
480 int StageDiffAdj
= 0;
481 if (LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
482 StageDiffAdj
= StageScheduled
- LoopValStage
;
483 // Use the loop value defined in the kernel, unless the kernel
484 // contains the last definition of the Phi.
485 if (np
== 0 && PrevStage
== LastStageNum
&&
486 (StageScheduled
!= 0 || LoopValStage
!= 0) &&
487 VRMap
[PrevStage
- StageDiffAdj
].count(LoopVal
))
488 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
][LoopVal
];
489 // Use the value defined by the Phi. We add one because we switch
490 // from looking at the loop value to the Phi definition.
491 else if (np
> 0 && PrevStage
== LastStageNum
&&
492 VRMap
[PrevStage
- np
+ 1].count(Def
))
493 PhiOp2
= VRMap
[PrevStage
- np
+ 1][Def
];
494 // Use the loop value defined in the kernel.
495 else if (static_cast<unsigned>(LoopValStage
) > PrologStage
+ 1 &&
496 VRMap
[PrevStage
- StageDiffAdj
- np
].count(LoopVal
))
497 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
- np
][LoopVal
];
498 // Use the value defined by the Phi, unless we're generating the first
499 // epilog and the Phi refers to a Phi in a different stage.
500 else if (VRMap
[PrevStage
- np
].count(Def
) &&
501 (!LoopDefIsPhi
|| (PrevStage
!= LastStageNum
) ||
502 (LoopValStage
== StageScheduled
)))
503 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
506 // Check if we can reuse an existing Phi. This occurs when a Phi
507 // references another Phi, and the other Phi is scheduled in an
508 // earlier stage. We can try to reuse an existing Phi up until the last
509 // stage of the current Phi.
511 if (static_cast<int>(PrologStage
- np
) >= StageScheduled
) {
512 int LVNumStages
= getStagesForPhi(LoopVal
);
513 int StageDiff
= (StageScheduled
- LoopValStage
);
514 LVNumStages
-= StageDiff
;
515 // Make sure the loop value Phi has been processed already.
516 if (LVNumStages
> (int)np
&& VRMap
[CurStageNum
].count(LoopVal
)) {
518 unsigned ReuseStage
= CurStageNum
;
519 if (isLoopCarried(*PhiInst
))
520 ReuseStage
-= LVNumStages
;
521 // Check if the Phi to reuse has been generated yet. If not, then
522 // there is nothing to reuse.
523 if (VRMap
[ReuseStage
- np
].count(LoopVal
)) {
524 NewReg
= VRMap
[ReuseStage
- np
][LoopVal
];
526 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
,
528 // Update the map with the new Phi name.
529 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
531 if (VRMap
[LastStageNum
- np
- 1].count(LoopVal
))
532 PhiOp2
= VRMap
[LastStageNum
- np
- 1][LoopVal
];
534 if (IsLast
&& np
== NumPhis
- 1)
535 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
540 if (InKernel
&& StageDiff
> 0 &&
541 VRMap
[CurStageNum
- StageDiff
- np
].count(LoopVal
))
542 PhiOp2
= VRMap
[CurStageNum
- StageDiff
- np
][LoopVal
];
545 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
546 NewReg
= MRI
.createVirtualRegister(RC
);
548 MachineInstrBuilder NewPhi
=
549 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
550 TII
->get(TargetOpcode::PHI
), NewReg
);
551 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
552 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
554 InstrMap
[NewPhi
] = &*BBI
;
556 // We define the Phis after creating the new pipelined code, so
557 // we need to rename the Phi values in scheduled instructions.
559 unsigned PrevReg
= 0;
560 if (InKernel
&& VRMap
[PrevStage
- np
].count(LoopVal
))
561 PrevReg
= VRMap
[PrevStage
- np
][LoopVal
];
562 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
564 // If the Phi has been scheduled, use the new name for rewriting.
565 if (VRMap
[CurStageNum
- np
].count(Def
)) {
566 unsigned R
= VRMap
[CurStageNum
- np
][Def
];
567 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, R
,
571 // Check if we need to rename any uses that occurs after the loop. The
572 // register to replace depends on whether the Phi is scheduled in the
574 if (IsLast
&& np
== NumPhis
- 1)
575 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
577 // In the kernel, a dependent Phi uses the value from this Phi.
581 // Update the map with the new Phi name.
582 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
585 while (NumPhis
++ < NumStages
) {
586 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, NumPhis
, &*BBI
, Def
,
590 // Check if we need to rename a Phi that has been eliminated due to
592 if (NumStages
== 0 && IsLast
&& VRMap
[CurStageNum
].count(LoopVal
))
593 replaceRegUsesAfterLoop(Def
, VRMap
[CurStageNum
][LoopVal
], BB
, MRI
, LIS
);
597 /// Generate Phis for the specified block in the generated pipelined code.
598 /// These are new Phis needed because the definition is scheduled after the
599 /// use in the pipelined sequence.
600 void ModuloScheduleExpander::generatePhis(
601 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
602 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, ValueMapTy
*VRMapPhi
,
603 InstrMapTy
&InstrMap
, unsigned LastStageNum
, unsigned CurStageNum
,
605 // Compute the stage number that contains the initial Phi value, and
606 // the Phi from the previous stage.
607 unsigned PrologStage
= 0;
608 unsigned PrevStage
= 0;
609 unsigned StageDiff
= CurStageNum
- LastStageNum
;
610 bool InKernel
= (StageDiff
== 0);
612 PrologStage
= LastStageNum
- 1;
613 PrevStage
= CurStageNum
;
615 PrologStage
= LastStageNum
- StageDiff
;
616 PrevStage
= LastStageNum
+ StageDiff
- 1;
619 for (MachineBasicBlock::iterator BBI
= BB
->getFirstNonPHI(),
620 BBE
= BB
->instr_end();
622 for (unsigned i
= 0, e
= BBI
->getNumOperands(); i
!= e
; ++i
) {
623 MachineOperand
&MO
= BBI
->getOperand(i
);
624 if (!MO
.isReg() || !MO
.isDef() || !MO
.getReg().isVirtual())
627 int StageScheduled
= Schedule
.getStage(&*BBI
);
628 assert(StageScheduled
!= -1 && "Expecting scheduled instruction.");
629 Register Def
= MO
.getReg();
630 unsigned NumPhis
= getStagesForReg(Def
, CurStageNum
);
631 // An instruction scheduled in stage 0 and is used after the loop
632 // requires a phi in the epilog for the last definition from either
633 // the kernel or prolog.
634 if (!InKernel
&& NumPhis
== 0 && StageScheduled
== 0 &&
635 hasUseAfterLoop(Def
, BB
, MRI
))
637 if (!InKernel
&& (unsigned)StageScheduled
> PrologStage
)
642 PhiOp2
= VRMap
[PrevStage
][Def
];
643 if (MachineInstr
*InstOp2
= MRI
.getVRegDef(PhiOp2
))
644 if (InstOp2
->isPHI() && InstOp2
->getParent() == NewBB
)
645 PhiOp2
= getLoopPhiReg(*InstOp2
, BB2
);
647 // The number of Phis can't exceed the number of prolog stages. The
648 // prolog stage number is zero based.
649 if (NumPhis
> PrologStage
+ 1 - StageScheduled
)
650 NumPhis
= PrologStage
+ 1 - StageScheduled
;
651 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
654 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
661 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
662 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
665 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
666 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
668 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
670 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
671 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
672 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
674 unsigned PhiOp1
= VRMap
[PrologStage
][Def
];
675 if (np
<= PrologStage
)
676 PhiOp1
= VRMap
[PrologStage
- np
][Def
];
678 if (PrevStage
== LastStageNum
&& np
== 0)
679 PhiOp2
= VRMap
[LastStageNum
][Def
];
681 PhiOp2
= VRMapPhi
[PrevStage
- np
][Def
];
684 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
685 Register NewReg
= MRI
.createVirtualRegister(RC
);
687 MachineInstrBuilder NewPhi
=
688 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
689 TII
->get(TargetOpcode::PHI
), NewReg
);
690 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
691 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
693 InstrMap
[NewPhi
] = &*BBI
;
695 // Rewrite uses and update the map. The actions depend upon whether
696 // we generating code for the kernel or epilog blocks.
698 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp1
,
700 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp2
,
704 VRMapPhi
[PrevStage
- np
- 1][Def
] = NewReg
;
706 VRMapPhi
[CurStageNum
- np
][Def
] = NewReg
;
707 if (np
== NumPhis
- 1)
708 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
711 if (IsLast
&& np
== NumPhis
- 1)
712 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
718 /// Remove instructions that generate values with no uses.
719 /// Typically, these are induction variable operations that generate values
720 /// used in the loop itself. A dead instruction has a definition with
721 /// no uses, or uses that occur in the original loop only.
722 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock
*KernelBB
,
723 MBBVectorTy
&EpilogBBs
) {
724 // For each epilog block, check that the value defined by each instruction
725 // is used. If not, delete it.
726 for (MachineBasicBlock
*MBB
: llvm::reverse(EpilogBBs
))
727 for (MachineBasicBlock::reverse_instr_iterator MI
= MBB
->instr_rbegin(),
728 ME
= MBB
->instr_rend();
730 // From DeadMachineInstructionElem. Don't delete inline assembly.
731 if (MI
->isInlineAsm()) {
735 bool SawStore
= false;
736 // Check if it's safe to remove the instruction due to side effects.
737 // We can, and want to, remove Phis here.
738 if (!MI
->isSafeToMove(nullptr, SawStore
) && !MI
->isPHI()) {
743 for (const MachineOperand
&MO
: MI
->all_defs()) {
744 Register reg
= MO
.getReg();
745 // Assume physical registers are used, unless they are marked dead.
746 if (reg
.isPhysical()) {
752 unsigned realUses
= 0;
753 for (const MachineOperand
&U
: MRI
.use_operands(reg
)) {
754 // Check if there are any uses that occur only in the original
755 // loop. If so, that's not a real use.
756 if (U
.getParent()->getParent() != BB
) {
767 LIS
.RemoveMachineInstrFromMaps(*MI
);
768 MI
++->eraseFromParent();
773 // In the kernel block, check if we can remove a Phi that generates a value
774 // used in an instruction removed in the epilog block.
775 for (MachineInstr
&MI
: llvm::make_early_inc_range(KernelBB
->phis())) {
776 Register reg
= MI
.getOperand(0).getReg();
777 if (MRI
.use_begin(reg
) == MRI
.use_end()) {
778 LIS
.RemoveMachineInstrFromMaps(MI
);
779 MI
.eraseFromParent();
784 /// For loop carried definitions, we split the lifetime of a virtual register
785 /// that has uses past the definition in the next iteration. A copy with a new
786 /// virtual register is inserted before the definition, which helps with
787 /// generating a better register assignment.
789 /// v1 = phi(a, v2) v1 = phi(a, v2)
790 /// v2 = phi(b, v3) v2 = phi(b, v3)
791 /// v3 = .. v4 = copy v1
794 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock
*KernelBB
,
795 MBBVectorTy
&EpilogBBs
) {
796 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
797 for (auto &PHI
: KernelBB
->phis()) {
798 Register Def
= PHI
.getOperand(0).getReg();
799 // Check for any Phi definition that used as an operand of another Phi
800 // in the same block.
801 for (MachineRegisterInfo::use_instr_iterator I
= MRI
.use_instr_begin(Def
),
802 E
= MRI
.use_instr_end();
804 if (I
->isPHI() && I
->getParent() == KernelBB
) {
805 // Get the loop carried definition.
806 unsigned LCDef
= getLoopPhiReg(PHI
, KernelBB
);
809 MachineInstr
*MI
= MRI
.getVRegDef(LCDef
);
810 if (!MI
|| MI
->getParent() != KernelBB
|| MI
->isPHI())
812 // Search through the rest of the block looking for uses of the Phi
813 // definition. If one occurs, then split the lifetime.
814 unsigned SplitReg
= 0;
815 for (auto &BBJ
: make_range(MachineBasicBlock::instr_iterator(MI
),
816 KernelBB
->instr_end()))
817 if (BBJ
.readsRegister(Def
)) {
818 // We split the lifetime when we find the first use.
820 SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Def
));
821 BuildMI(*KernelBB
, MI
, MI
->getDebugLoc(),
822 TII
->get(TargetOpcode::COPY
), SplitReg
)
825 BBJ
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
829 // Search through each of the epilog blocks for any uses to be renamed.
830 for (auto &Epilog
: EpilogBBs
)
831 for (auto &I
: *Epilog
)
832 if (I
.readsRegister(Def
))
833 I
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
840 /// Remove the incoming block from the Phis in a basic block.
841 static void removePhis(MachineBasicBlock
*BB
, MachineBasicBlock
*Incoming
) {
842 for (MachineInstr
&MI
: *BB
) {
845 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2)
846 if (MI
.getOperand(i
+ 1).getMBB() == Incoming
) {
847 MI
.removeOperand(i
+ 1);
854 /// Create branches from each prolog basic block to the appropriate epilog
855 /// block. These edges are needed if the loop ends before reaching the
857 void ModuloScheduleExpander::addBranches(MachineBasicBlock
&PreheaderBB
,
858 MBBVectorTy
&PrologBBs
,
859 MachineBasicBlock
*KernelBB
,
860 MBBVectorTy
&EpilogBBs
,
862 assert(PrologBBs
.size() == EpilogBBs
.size() && "Prolog/Epilog mismatch");
863 MachineBasicBlock
*LastPro
= KernelBB
;
864 MachineBasicBlock
*LastEpi
= KernelBB
;
866 // Start from the blocks connected to the kernel and work "out"
867 // to the first prolog and the last epilog blocks.
868 SmallVector
<MachineInstr
*, 4> PrevInsts
;
869 unsigned MaxIter
= PrologBBs
.size() - 1;
870 for (unsigned i
= 0, j
= MaxIter
; i
<= MaxIter
; ++i
, --j
) {
871 // Add branches to the prolog that go to the corresponding
872 // epilog, and the fall-thru prolog/kernel block.
873 MachineBasicBlock
*Prolog
= PrologBBs
[j
];
874 MachineBasicBlock
*Epilog
= EpilogBBs
[i
];
876 SmallVector
<MachineOperand
, 4> Cond
;
877 std::optional
<bool> StaticallyGreater
=
878 LoopInfo
->createTripCountGreaterCondition(j
+ 1, *Prolog
, Cond
);
879 unsigned numAdded
= 0;
880 if (!StaticallyGreater
) {
881 Prolog
->addSuccessor(Epilog
);
882 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, LastPro
, Cond
, DebugLoc());
883 } else if (*StaticallyGreater
== false) {
884 Prolog
->addSuccessor(Epilog
);
885 Prolog
->removeSuccessor(LastPro
);
886 LastEpi
->removeSuccessor(Epilog
);
887 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, nullptr, Cond
, DebugLoc());
888 removePhis(Epilog
, LastEpi
);
889 // Remove the blocks that are no longer referenced.
890 if (LastPro
!= LastEpi
) {
892 LastEpi
->eraseFromParent();
894 if (LastPro
== KernelBB
) {
895 LoopInfo
->disposed();
899 LastPro
->eraseFromParent();
901 numAdded
= TII
->insertBranch(*Prolog
, LastPro
, nullptr, Cond
, DebugLoc());
902 removePhis(Epilog
, Prolog
);
906 for (MachineBasicBlock::reverse_instr_iterator I
= Prolog
->instr_rbegin(),
907 E
= Prolog
->instr_rend();
908 I
!= E
&& numAdded
> 0; ++I
, --numAdded
)
909 updateInstruction(&*I
, false, j
, 0, VRMap
);
913 LoopInfo
->setPreheader(PrologBBs
[MaxIter
]);
914 LoopInfo
->adjustTripCount(-(MaxIter
+ 1));
918 /// Return true if we can compute the amount the instruction changes
919 /// during each iteration. Set Delta to the amount of the change.
920 bool ModuloScheduleExpander::computeDelta(MachineInstr
&MI
, unsigned &Delta
) {
921 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
922 const MachineOperand
*BaseOp
;
924 bool OffsetIsScalable
;
925 if (!TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, OffsetIsScalable
, TRI
))
928 // FIXME: This algorithm assumes instructions have fixed-size offsets.
929 if (OffsetIsScalable
)
932 if (!BaseOp
->isReg())
935 Register BaseReg
= BaseOp
->getReg();
937 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
938 // Check if there is a Phi. If so, get the definition in the loop.
939 MachineInstr
*BaseDef
= MRI
.getVRegDef(BaseReg
);
940 if (BaseDef
&& BaseDef
->isPHI()) {
941 BaseReg
= getLoopPhiReg(*BaseDef
, MI
.getParent());
942 BaseDef
= MRI
.getVRegDef(BaseReg
);
948 if (!TII
->getIncrementValue(*BaseDef
, D
) && D
>= 0)
955 /// Update the memory operand with a new offset when the pipeliner
956 /// generates a new copy of the instruction that refers to a
957 /// different memory location.
958 void ModuloScheduleExpander::updateMemOperands(MachineInstr
&NewMI
,
963 // If the instruction has memory operands, then adjust the offset
964 // when the instruction appears in different stages.
965 if (NewMI
.memoperands_empty())
967 SmallVector
<MachineMemOperand
*, 2> NewMMOs
;
968 for (MachineMemOperand
*MMO
: NewMI
.memoperands()) {
969 // TODO: Figure out whether isAtomic is really necessary (see D57601).
970 if (MMO
->isVolatile() || MMO
->isAtomic() ||
971 (MMO
->isInvariant() && MMO
->isDereferenceable()) ||
972 (!MMO
->getValue())) {
973 NewMMOs
.push_back(MMO
);
977 if (Num
!= UINT_MAX
&& computeDelta(OldMI
, Delta
)) {
978 int64_t AdjOffset
= Delta
* Num
;
980 MF
.getMachineMemOperand(MMO
, AdjOffset
, MMO
->getSize()));
983 MF
.getMachineMemOperand(MMO
, 0, MemoryLocation::UnknownSize
));
986 NewMI
.setMemRefs(MF
, NewMMOs
);
989 /// Clone the instruction for the new pipelined loop and update the
990 /// memory operands, if needed.
991 MachineInstr
*ModuloScheduleExpander::cloneInstr(MachineInstr
*OldMI
,
992 unsigned CurStageNum
,
993 unsigned InstStageNum
) {
994 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
995 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
999 /// Clone the instruction for the new pipelined loop. If needed, this
1000 /// function updates the instruction using the values saved in the
1001 /// InstrChanges structure.
1002 MachineInstr
*ModuloScheduleExpander::cloneAndChangeInstr(
1003 MachineInstr
*OldMI
, unsigned CurStageNum
, unsigned InstStageNum
) {
1004 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
1005 auto It
= InstrChanges
.find(OldMI
);
1006 if (It
!= InstrChanges
.end()) {
1007 std::pair
<unsigned, int64_t> RegAndOffset
= It
->second
;
1008 unsigned BasePos
, OffsetPos
;
1009 if (!TII
->getBaseAndOffsetPosition(*OldMI
, BasePos
, OffsetPos
))
1011 int64_t NewOffset
= OldMI
->getOperand(OffsetPos
).getImm();
1012 MachineInstr
*LoopDef
= findDefInLoop(RegAndOffset
.first
);
1013 if (Schedule
.getStage(LoopDef
) > (signed)InstStageNum
)
1014 NewOffset
+= RegAndOffset
.second
* (CurStageNum
- InstStageNum
);
1015 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
1017 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
1021 /// Update the machine instruction with new virtual registers. This
1022 /// function may change the definitions and/or uses.
1023 void ModuloScheduleExpander::updateInstruction(MachineInstr
*NewMI
,
1025 unsigned CurStageNum
,
1026 unsigned InstrStageNum
,
1027 ValueMapTy
*VRMap
) {
1028 for (MachineOperand
&MO
: NewMI
->operands()) {
1029 if (!MO
.isReg() || !MO
.getReg().isVirtual())
1031 Register reg
= MO
.getReg();
1033 // Create a new virtual register for the definition.
1034 const TargetRegisterClass
*RC
= MRI
.getRegClass(reg
);
1035 Register NewReg
= MRI
.createVirtualRegister(RC
);
1037 VRMap
[CurStageNum
][reg
] = NewReg
;
1039 replaceRegUsesAfterLoop(reg
, NewReg
, BB
, MRI
, LIS
);
1040 } else if (MO
.isUse()) {
1041 MachineInstr
*Def
= MRI
.getVRegDef(reg
);
1042 // Compute the stage that contains the last definition for instruction.
1043 int DefStageNum
= Schedule
.getStage(Def
);
1044 unsigned StageNum
= CurStageNum
;
1045 if (DefStageNum
!= -1 && (int)InstrStageNum
> DefStageNum
) {
1046 // Compute the difference in stages between the defintion and the use.
1047 unsigned StageDiff
= (InstrStageNum
- DefStageNum
);
1048 // Make an adjustment to get the last definition.
1049 StageNum
-= StageDiff
;
1051 if (VRMap
[StageNum
].count(reg
))
1052 MO
.setReg(VRMap
[StageNum
][reg
]);
1057 /// Return the instruction in the loop that defines the register.
1058 /// If the definition is a Phi, then follow the Phi operand to
1059 /// the instruction in the loop.
1060 MachineInstr
*ModuloScheduleExpander::findDefInLoop(unsigned Reg
) {
1061 SmallPtrSet
<MachineInstr
*, 8> Visited
;
1062 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
1063 while (Def
->isPHI()) {
1064 if (!Visited
.insert(Def
).second
)
1066 for (unsigned i
= 1, e
= Def
->getNumOperands(); i
< e
; i
+= 2)
1067 if (Def
->getOperand(i
+ 1).getMBB() == BB
) {
1068 Def
= MRI
.getVRegDef(Def
->getOperand(i
).getReg());
1075 /// Return the new name for the value from the previous stage.
1076 unsigned ModuloScheduleExpander::getPrevMapVal(
1077 unsigned StageNum
, unsigned PhiStage
, unsigned LoopVal
, unsigned LoopStage
,
1078 ValueMapTy
*VRMap
, MachineBasicBlock
*BB
) {
1079 unsigned PrevVal
= 0;
1080 if (StageNum
> PhiStage
) {
1081 MachineInstr
*LoopInst
= MRI
.getVRegDef(LoopVal
);
1082 if (PhiStage
== LoopStage
&& VRMap
[StageNum
- 1].count(LoopVal
))
1083 // The name is defined in the previous stage.
1084 PrevVal
= VRMap
[StageNum
- 1][LoopVal
];
1085 else if (VRMap
[StageNum
].count(LoopVal
))
1086 // The previous name is defined in the current stage when the instruction
1087 // order is swapped.
1088 PrevVal
= VRMap
[StageNum
][LoopVal
];
1089 else if (!LoopInst
->isPHI() || LoopInst
->getParent() != BB
)
1090 // The loop value hasn't yet been scheduled.
1092 else if (StageNum
== PhiStage
+ 1)
1093 // The loop value is another phi, which has not been scheduled.
1094 PrevVal
= getInitPhiReg(*LoopInst
, BB
);
1095 else if (StageNum
> PhiStage
+ 1 && LoopInst
->getParent() == BB
)
1096 // The loop value is another phi, which has been scheduled.
1098 getPrevMapVal(StageNum
- 1, PhiStage
, getLoopPhiReg(*LoopInst
, BB
),
1099 LoopStage
, VRMap
, BB
);
1104 /// Rewrite the Phi values in the specified block to use the mappings
1105 /// from the initial operand. Once the Phi is scheduled, we switch
1106 /// to using the loop value instead of the Phi value, so those names
1107 /// do not need to be rewritten.
1108 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock
*NewBB
,
1111 InstrMapTy
&InstrMap
) {
1112 for (auto &PHI
: BB
->phis()) {
1113 unsigned InitVal
= 0;
1114 unsigned LoopVal
= 0;
1115 getPhiRegs(PHI
, BB
, InitVal
, LoopVal
);
1116 Register PhiDef
= PHI
.getOperand(0).getReg();
1118 unsigned PhiStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(PhiDef
));
1119 unsigned LoopStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
1120 unsigned NumPhis
= getStagesForPhi(PhiDef
);
1121 if (NumPhis
> StageNum
)
1123 for (unsigned np
= 0; np
<= NumPhis
; ++np
) {
1125 getPrevMapVal(StageNum
- np
, PhiStage
, LoopVal
, LoopStage
, VRMap
, BB
);
1128 rewriteScheduledInstr(NewBB
, InstrMap
, StageNum
- np
, np
, &PHI
, PhiDef
,
1134 /// Rewrite a previously scheduled instruction to use the register value
1135 /// from the new instruction. Make sure the instruction occurs in the
1136 /// basic block, and we don't change the uses in the new instruction.
1137 void ModuloScheduleExpander::rewriteScheduledInstr(
1138 MachineBasicBlock
*BB
, InstrMapTy
&InstrMap
, unsigned CurStageNum
,
1139 unsigned PhiNum
, MachineInstr
*Phi
, unsigned OldReg
, unsigned NewReg
,
1141 bool InProlog
= (CurStageNum
< (unsigned)Schedule
.getNumStages() - 1);
1142 int StagePhi
= Schedule
.getStage(Phi
) + PhiNum
;
1143 // Rewrite uses that have been scheduled already to use the new
1145 for (MachineOperand
&UseOp
:
1146 llvm::make_early_inc_range(MRI
.use_operands(OldReg
))) {
1147 MachineInstr
*UseMI
= UseOp
.getParent();
1148 if (UseMI
->getParent() != BB
)
1150 if (UseMI
->isPHI()) {
1151 if (!Phi
->isPHI() && UseMI
->getOperand(0).getReg() == NewReg
)
1153 if (getLoopPhiReg(*UseMI
, BB
) != OldReg
)
1156 InstrMapTy::iterator OrigInstr
= InstrMap
.find(UseMI
);
1157 assert(OrigInstr
!= InstrMap
.end() && "Instruction not scheduled.");
1158 MachineInstr
*OrigMI
= OrigInstr
->second
;
1159 int StageSched
= Schedule
.getStage(OrigMI
);
1160 int CycleSched
= Schedule
.getCycle(OrigMI
);
1161 unsigned ReplaceReg
= 0;
1162 // This is the stage for the scheduled instruction.
1163 if (StagePhi
== StageSched
&& Phi
->isPHI()) {
1164 int CyclePhi
= Schedule
.getCycle(Phi
);
1165 if (PrevReg
&& InProlog
)
1166 ReplaceReg
= PrevReg
;
1167 else if (PrevReg
&& !isLoopCarried(*Phi
) &&
1168 (CyclePhi
<= CycleSched
|| OrigMI
->isPHI()))
1169 ReplaceReg
= PrevReg
;
1171 ReplaceReg
= NewReg
;
1173 // The scheduled instruction occurs before the scheduled Phi, and the
1174 // Phi is not loop carried.
1175 if (!InProlog
&& StagePhi
+ 1 == StageSched
&& !isLoopCarried(*Phi
))
1176 ReplaceReg
= NewReg
;
1177 if (StagePhi
> StageSched
&& Phi
->isPHI())
1178 ReplaceReg
= NewReg
;
1179 if (!InProlog
&& !Phi
->isPHI() && StagePhi
< StageSched
)
1180 ReplaceReg
= NewReg
;
1182 const TargetRegisterClass
*NRC
=
1183 MRI
.constrainRegClass(ReplaceReg
, MRI
.getRegClass(OldReg
));
1185 UseOp
.setReg(ReplaceReg
);
1187 Register SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OldReg
));
1188 BuildMI(*BB
, UseMI
, UseMI
->getDebugLoc(), TII
->get(TargetOpcode::COPY
),
1190 .addReg(ReplaceReg
);
1191 UseOp
.setReg(SplitReg
);
1197 bool ModuloScheduleExpander::isLoopCarried(MachineInstr
&Phi
) {
1200 int DefCycle
= Schedule
.getCycle(&Phi
);
1201 int DefStage
= Schedule
.getStage(&Phi
);
1203 unsigned InitVal
= 0;
1204 unsigned LoopVal
= 0;
1205 getPhiRegs(Phi
, Phi
.getParent(), InitVal
, LoopVal
);
1206 MachineInstr
*Use
= MRI
.getVRegDef(LoopVal
);
1207 if (!Use
|| Use
->isPHI())
1209 int LoopCycle
= Schedule
.getCycle(Use
);
1210 int LoopStage
= Schedule
.getStage(Use
);
1211 return (LoopCycle
> DefCycle
) || (LoopStage
<= DefStage
);
1214 //===----------------------------------------------------------------------===//
1215 // PeelingModuloScheduleExpander implementation
1216 //===----------------------------------------------------------------------===//
1217 // This is a reimplementation of ModuloScheduleExpander that works by creating
1218 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1219 //===----------------------------------------------------------------------===//
1222 // Remove any dead phis in MBB. Dead phis either have only one block as input
1223 // (in which case they are the identity) or have no uses.
1224 void EliminateDeadPhis(MachineBasicBlock
*MBB
, MachineRegisterInfo
&MRI
,
1225 LiveIntervals
*LIS
, bool KeepSingleSrcPhi
= false) {
1226 bool Changed
= true;
1229 for (MachineInstr
&MI
: llvm::make_early_inc_range(MBB
->phis())) {
1231 if (MRI
.use_empty(MI
.getOperand(0).getReg())) {
1233 LIS
->RemoveMachineInstrFromMaps(MI
);
1234 MI
.eraseFromParent();
1236 } else if (!KeepSingleSrcPhi
&& MI
.getNumExplicitOperands() == 3) {
1237 const TargetRegisterClass
*ConstrainRegClass
=
1238 MRI
.constrainRegClass(MI
.getOperand(1).getReg(),
1239 MRI
.getRegClass(MI
.getOperand(0).getReg()));
1240 assert(ConstrainRegClass
&&
1241 "Expected a valid constrained register class!");
1242 (void)ConstrainRegClass
;
1243 MRI
.replaceRegWith(MI
.getOperand(0).getReg(),
1244 MI
.getOperand(1).getReg());
1246 LIS
->RemoveMachineInstrFromMaps(MI
);
1247 MI
.eraseFromParent();
1254 /// Rewrites the kernel block in-place to adhere to the given schedule.
1255 /// KernelRewriter holds all of the state required to perform the rewriting.
1256 class KernelRewriter
{
1258 MachineBasicBlock
*BB
;
1259 MachineBasicBlock
*PreheaderBB
, *ExitBB
;
1260 MachineRegisterInfo
&MRI
;
1261 const TargetInstrInfo
*TII
;
1264 // Map from register class to canonical undef register for that class.
1265 DenseMap
<const TargetRegisterClass
*, Register
> Undefs
;
1266 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1267 // this map is only used when InitReg is non-undef.
1268 DenseMap
<std::pair
<unsigned, unsigned>, Register
> Phis
;
1269 // Map from LoopReg to phi register where the InitReg is undef.
1270 DenseMap
<Register
, Register
> UndefPhis
;
1272 // Reg is used by MI. Return the new register MI should use to adhere to the
1273 // schedule. Insert phis as necessary.
1274 Register
remapUse(Register Reg
, MachineInstr
&MI
);
1275 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1276 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1277 // or will be chosen so as to share another phi.
1278 Register
phi(Register LoopReg
, std::optional
<Register
> InitReg
= {},
1279 const TargetRegisterClass
*RC
= nullptr);
1280 // Create an undef register of the given register class.
1281 Register
undef(const TargetRegisterClass
*RC
);
1284 KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
, MachineBasicBlock
*LoopBB
,
1285 LiveIntervals
*LIS
= nullptr);
1290 KernelRewriter::KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
,
1291 MachineBasicBlock
*LoopBB
, LiveIntervals
*LIS
)
1292 : S(S
), BB(LoopBB
), PreheaderBB(L
.getLoopPreheader()),
1293 ExitBB(L
.getExitBlock()), MRI(BB
->getParent()->getRegInfo()),
1294 TII(BB
->getParent()->getSubtarget().getInstrInfo()), LIS(LIS
) {
1295 PreheaderBB
= *BB
->pred_begin();
1296 if (PreheaderBB
== BB
)
1297 PreheaderBB
= *std::next(BB
->pred_begin());
1300 void KernelRewriter::rewrite() {
1301 // Rearrange the loop to be in schedule order. Note that the schedule may
1302 // contain instructions that are not owned by the loop block (InstrChanges and
1303 // friends), so we gracefully handle unowned instructions and delete any
1304 // instructions that weren't in the schedule.
1305 auto InsertPt
= BB
->getFirstTerminator();
1306 MachineInstr
*FirstMI
= nullptr;
1307 for (MachineInstr
*MI
: S
.getInstructions()) {
1310 if (MI
->getParent())
1311 MI
->removeFromParent();
1312 BB
->insert(InsertPt
, MI
);
1316 assert(FirstMI
&& "Failed to find first MI in schedule");
1318 // At this point all of the scheduled instructions are between FirstMI
1319 // and the end of the block. Kill from the first non-phi to FirstMI.
1320 for (auto I
= BB
->getFirstNonPHI(); I
!= FirstMI
->getIterator();) {
1322 LIS
->RemoveMachineInstrFromMaps(*I
);
1323 (I
++)->eraseFromParent();
1326 // Now remap every instruction in the loop.
1327 for (MachineInstr
&MI
: *BB
) {
1328 if (MI
.isPHI() || MI
.isTerminator())
1330 for (MachineOperand
&MO
: MI
.uses()) {
1331 if (!MO
.isReg() || MO
.getReg().isPhysical() || MO
.isImplicit())
1333 Register Reg
= remapUse(MO
.getReg(), MI
);
1337 EliminateDeadPhis(BB
, MRI
, LIS
);
1339 // Ensure a phi exists for all instructions that are either referenced by
1340 // an illegal phi or by an instruction outside the loop. This allows us to
1341 // treat remaps of these values the same as "normal" values that come from
1342 // loop-carried phis.
1343 for (auto MI
= BB
->getFirstNonPHI(); MI
!= BB
->end(); ++MI
) {
1345 Register R
= MI
->getOperand(0).getReg();
1350 for (MachineOperand
&Def
: MI
->defs()) {
1351 for (MachineInstr
&MI
: MRI
.use_instructions(Def
.getReg())) {
1352 if (MI
.getParent() != BB
) {
1361 Register
KernelRewriter::remapUse(Register Reg
, MachineInstr
&MI
) {
1362 MachineInstr
*Producer
= MRI
.getUniqueVRegDef(Reg
);
1366 int ConsumerStage
= S
.getStage(&MI
);
1367 if (!Producer
->isPHI()) {
1368 // Non-phi producers are simple to remap. Insert as many phis as the
1369 // difference between the consumer and producer stages.
1370 if (Producer
->getParent() != BB
)
1371 // Producer was not inside the loop. Use the register as-is.
1373 int ProducerStage
= S
.getStage(Producer
);
1374 assert(ConsumerStage
!= -1 &&
1375 "In-loop consumer should always be scheduled!");
1376 assert(ConsumerStage
>= ProducerStage
);
1377 unsigned StageDiff
= ConsumerStage
- ProducerStage
;
1379 for (unsigned I
= 0; I
< StageDiff
; ++I
)
1384 // First, dive through the phi chain to find the defaults for the generated
1386 SmallVector
<std::optional
<Register
>, 4> Defaults
;
1387 Register LoopReg
= Reg
;
1388 auto LoopProducer
= Producer
;
1389 while (LoopProducer
->isPHI() && LoopProducer
->getParent() == BB
) {
1390 LoopReg
= getLoopPhiReg(*LoopProducer
, BB
);
1391 Defaults
.emplace_back(getInitPhiReg(*LoopProducer
, BB
));
1392 LoopProducer
= MRI
.getUniqueVRegDef(LoopReg
);
1393 assert(LoopProducer
);
1395 int LoopProducerStage
= S
.getStage(LoopProducer
);
1397 std::optional
<Register
> IllegalPhiDefault
;
1399 if (LoopProducerStage
== -1) {
1401 } else if (LoopProducerStage
> ConsumerStage
) {
1402 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1403 // In addition, Consumer's cycle must be scheduled after Producer in the
1404 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1406 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1407 int LoopProducerCycle
= S
.getCycle(LoopProducer
);
1408 int ConsumerCycle
= S
.getCycle(&MI
);
1410 assert(LoopProducerCycle
<= ConsumerCycle
);
1411 assert(LoopProducerStage
== ConsumerStage
+ 1);
1412 // Peel off the first phi from Defaults and insert a phi between producer
1413 // and consumer. This phi will not be at the front of the block so we
1414 // consider it illegal. It will only exist during the rewrite process; it
1415 // needs to exist while we peel off prologs because these could take the
1416 // default value. After that we can replace all uses with the loop producer
1418 IllegalPhiDefault
= Defaults
.front();
1419 Defaults
.erase(Defaults
.begin());
1421 assert(ConsumerStage
>= LoopProducerStage
);
1422 int StageDiff
= ConsumerStage
- LoopProducerStage
;
1423 if (StageDiff
> 0) {
1424 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults
.size()
1425 << " to " << (Defaults
.size() + StageDiff
) << "\n");
1426 // If we need more phis than we have defaults for, pad out with undefs for
1427 // the earliest phis, which are at the end of the defaults chain (the
1428 // chain is in reverse order).
1429 Defaults
.resize(Defaults
.size() + StageDiff
,
1430 Defaults
.empty() ? std::optional
<Register
>()
1435 // Now we know the number of stages to jump back, insert the phi chain.
1436 auto DefaultI
= Defaults
.rbegin();
1437 while (DefaultI
!= Defaults
.rend())
1438 LoopReg
= phi(LoopReg
, *DefaultI
++, MRI
.getRegClass(Reg
));
1440 if (IllegalPhiDefault
) {
1441 // The consumer optionally consumes LoopProducer in the same iteration
1442 // (because the producer is scheduled at an earlier cycle than the consumer)
1443 // or the initial value. To facilitate this we create an illegal block here
1444 // by embedding a phi in the middle of the block. We will fix this up
1445 // immediately prior to pruning.
1446 auto RC
= MRI
.getRegClass(Reg
);
1447 Register R
= MRI
.createVirtualRegister(RC
);
1448 MachineInstr
*IllegalPhi
=
1449 BuildMI(*BB
, MI
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1450 .addReg(*IllegalPhiDefault
)
1451 .addMBB(PreheaderBB
) // Block choice is arbitrary and has no effect.
1453 .addMBB(BB
); // Block choice is arbitrary and has no effect.
1454 // Illegal phi should belong to the producer stage so that it can be
1455 // filtered correctly during peeling.
1456 S
.setStage(IllegalPhi
, LoopProducerStage
);
1463 Register
KernelRewriter::phi(Register LoopReg
, std::optional
<Register
> InitReg
,
1464 const TargetRegisterClass
*RC
) {
1465 // If the init register is not undef, try and find an existing phi.
1467 auto I
= Phis
.find({LoopReg
, *InitReg
});
1468 if (I
!= Phis
.end())
1471 for (auto &KV
: Phis
) {
1472 if (KV
.first
.first
== LoopReg
)
1477 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1478 // find a phi that takes undef as input.
1479 auto I
= UndefPhis
.find(LoopReg
);
1480 if (I
!= UndefPhis
.end()) {
1481 Register R
= I
->second
;
1483 // Found a phi taking undef as input, and this input is undef so return
1484 // without any more changes.
1486 // Found a phi taking undef as input, so rewrite it to take InitReg.
1487 MachineInstr
*MI
= MRI
.getVRegDef(R
);
1488 MI
->getOperand(1).setReg(*InitReg
);
1489 Phis
.insert({{LoopReg
, *InitReg
}, R
});
1490 const TargetRegisterClass
*ConstrainRegClass
=
1491 MRI
.constrainRegClass(R
, MRI
.getRegClass(*InitReg
));
1492 assert(ConstrainRegClass
&& "Expected a valid constrained register class!");
1493 (void)ConstrainRegClass
;
1498 // Failed to find any existing phi to reuse, so create a new one.
1500 RC
= MRI
.getRegClass(LoopReg
);
1501 Register R
= MRI
.createVirtualRegister(RC
);
1503 const TargetRegisterClass
*ConstrainRegClass
=
1504 MRI
.constrainRegClass(R
, MRI
.getRegClass(*InitReg
));
1505 assert(ConstrainRegClass
&& "Expected a valid constrained register class!");
1506 (void)ConstrainRegClass
;
1508 BuildMI(*BB
, BB
->getFirstNonPHI(), DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1509 .addReg(InitReg
? *InitReg
: undef(RC
))
1510 .addMBB(PreheaderBB
)
1514 UndefPhis
[LoopReg
] = R
;
1516 Phis
[{LoopReg
, *InitReg
}] = R
;
1520 Register
KernelRewriter::undef(const TargetRegisterClass
*RC
) {
1521 Register
&R
= Undefs
[RC
];
1523 // Create an IMPLICIT_DEF that defines this register if we need it.
1524 // All uses of this should be removed by the time we have finished unrolling
1525 // prologs and epilogs.
1526 R
= MRI
.createVirtualRegister(RC
);
1527 auto *InsertBB
= &PreheaderBB
->getParent()->front();
1528 BuildMI(*InsertBB
, InsertBB
->getFirstTerminator(), DebugLoc(),
1529 TII
->get(TargetOpcode::IMPLICIT_DEF
), R
);
1535 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1536 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1537 /// through and defaults for these phis.
1538 class KernelOperandInfo
{
1539 MachineBasicBlock
*BB
;
1540 MachineRegisterInfo
&MRI
;
1541 SmallVector
<Register
, 4> PhiDefaults
;
1542 MachineOperand
*Source
;
1543 MachineOperand
*Target
;
1546 KernelOperandInfo(MachineOperand
*MO
, MachineRegisterInfo
&MRI
,
1547 const SmallPtrSetImpl
<MachineInstr
*> &IllegalPhis
)
1550 BB
= MO
->getParent()->getParent();
1551 while (isRegInLoop(MO
)) {
1552 MachineInstr
*MI
= MRI
.getVRegDef(MO
->getReg());
1553 if (MI
->isFullCopy()) {
1554 MO
= &MI
->getOperand(1);
1559 // If this is an illegal phi, don't count it in distance.
1560 if (IllegalPhis
.count(MI
)) {
1561 MO
= &MI
->getOperand(3);
1565 Register Default
= getInitPhiReg(*MI
, BB
);
1566 MO
= MI
->getOperand(2).getMBB() == BB
? &MI
->getOperand(1)
1567 : &MI
->getOperand(3);
1568 PhiDefaults
.push_back(Default
);
1573 bool operator==(const KernelOperandInfo
&Other
) const {
1574 return PhiDefaults
.size() == Other
.PhiDefaults
.size();
1577 void print(raw_ostream
&OS
) const {
1578 OS
<< "use of " << *Source
<< ": distance(" << PhiDefaults
.size() << ") in "
1579 << *Source
->getParent();
1583 bool isRegInLoop(MachineOperand
*MO
) {
1584 return MO
->isReg() && MO
->getReg().isVirtual() &&
1585 MRI
.getVRegDef(MO
->getReg())->getParent() == BB
;
1591 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD
) {
1592 MachineBasicBlock
*NewBB
= PeelSingleBlockLoop(LPD
, BB
, MRI
, TII
);
1593 if (LPD
== LPD_Front
)
1594 PeeledFront
.push_back(NewBB
);
1596 PeeledBack
.push_front(NewBB
);
1597 for (auto I
= BB
->begin(), NI
= NewBB
->begin(); !I
->isTerminator();
1599 CanonicalMIs
[&*I
] = &*I
;
1600 CanonicalMIs
[&*NI
] = &*I
;
1601 BlockMIs
[{NewBB
, &*I
}] = &*NI
;
1602 BlockMIs
[{BB
, &*I
}] = &*I
;
1607 void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock
*MB
,
1609 for (auto I
= MB
->getFirstInstrTerminator()->getReverseIterator();
1610 I
!= std::next(MB
->getFirstNonPHI()->getReverseIterator());) {
1611 MachineInstr
*MI
= &*I
++;
1612 int Stage
= getStage(MI
);
1613 if (Stage
== -1 || Stage
>= MinStage
)
1616 for (MachineOperand
&DefMO
: MI
->defs()) {
1617 SmallVector
<std::pair
<MachineInstr
*, Register
>, 4> Subs
;
1618 for (MachineInstr
&UseMI
: MRI
.use_instructions(DefMO
.getReg())) {
1619 // Only PHIs can use values from this block by construction.
1620 // Match with the equivalent PHI in B.
1621 assert(UseMI
.isPHI());
1622 Register Reg
= getEquivalentRegisterIn(UseMI
.getOperand(0).getReg(),
1624 Subs
.emplace_back(&UseMI
, Reg
);
1626 for (auto &Sub
: Subs
)
1627 Sub
.first
->substituteRegister(DefMO
.getReg(), Sub
.second
, /*SubIdx=*/0,
1628 *MRI
.getTargetRegisterInfo());
1631 LIS
->RemoveMachineInstrFromMaps(*MI
);
1632 MI
->eraseFromParent();
1636 void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
1637 MachineBasicBlock
*DestBB
, MachineBasicBlock
*SourceBB
, unsigned Stage
) {
1638 auto InsertPt
= DestBB
->getFirstNonPHI();
1639 DenseMap
<Register
, Register
> Remaps
;
1640 for (MachineInstr
&MI
: llvm::make_early_inc_range(
1641 llvm::make_range(SourceBB
->getFirstNonPHI(), SourceBB
->end()))) {
1643 // This is an illegal PHI. If we move any instructions using an illegal
1644 // PHI, we need to create a legal Phi.
1645 if (getStage(&MI
) != Stage
) {
1646 // The legal Phi is not necessary if the illegal phi's stage
1648 Register PhiR
= MI
.getOperand(0).getReg();
1649 auto RC
= MRI
.getRegClass(PhiR
);
1650 Register NR
= MRI
.createVirtualRegister(RC
);
1651 MachineInstr
*NI
= BuildMI(*DestBB
, DestBB
->getFirstNonPHI(),
1652 DebugLoc(), TII
->get(TargetOpcode::PHI
), NR
)
1655 BlockMIs
[{DestBB
, CanonicalMIs
[&MI
]}] = NI
;
1656 CanonicalMIs
[NI
] = CanonicalMIs
[&MI
];
1660 if (getStage(&MI
) != Stage
)
1662 MI
.removeFromParent();
1663 DestBB
->insert(InsertPt
, &MI
);
1664 auto *KernelMI
= CanonicalMIs
[&MI
];
1665 BlockMIs
[{DestBB
, KernelMI
}] = &MI
;
1666 BlockMIs
.erase({SourceBB
, KernelMI
});
1668 SmallVector
<MachineInstr
*, 4> PhiToDelete
;
1669 for (MachineInstr
&MI
: DestBB
->phis()) {
1670 assert(MI
.getNumOperands() == 3);
1671 MachineInstr
*Def
= MRI
.getVRegDef(MI
.getOperand(1).getReg());
1672 // If the instruction referenced by the phi is moved inside the block
1673 // we don't need the phi anymore.
1674 if (getStage(Def
) == Stage
) {
1675 Register PhiReg
= MI
.getOperand(0).getReg();
1676 assert(Def
->findRegisterDefOperandIdx(MI
.getOperand(1).getReg()) != -1);
1677 MRI
.replaceRegWith(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
1678 MI
.getOperand(0).setReg(PhiReg
);
1679 PhiToDelete
.push_back(&MI
);
1682 for (auto *P
: PhiToDelete
)
1683 P
->eraseFromParent();
1684 InsertPt
= DestBB
->getFirstNonPHI();
1685 // Helper to clone Phi instructions into the destination block. We clone Phi
1686 // greedily to avoid combinatorial explosion of Phi instructions.
1687 auto clonePhi
= [&](MachineInstr
*Phi
) {
1688 MachineInstr
*NewMI
= MF
.CloneMachineInstr(Phi
);
1689 DestBB
->insert(InsertPt
, NewMI
);
1690 Register OrigR
= Phi
->getOperand(0).getReg();
1691 Register R
= MRI
.createVirtualRegister(MRI
.getRegClass(OrigR
));
1692 NewMI
->getOperand(0).setReg(R
);
1693 NewMI
->getOperand(1).setReg(OrigR
);
1694 NewMI
->getOperand(2).setMBB(*DestBB
->pred_begin());
1696 CanonicalMIs
[NewMI
] = CanonicalMIs
[Phi
];
1697 BlockMIs
[{DestBB
, CanonicalMIs
[Phi
]}] = NewMI
;
1698 PhiNodeLoopIteration
[NewMI
] = PhiNodeLoopIteration
[Phi
];
1701 for (auto I
= DestBB
->getFirstNonPHI(); I
!= DestBB
->end(); ++I
) {
1702 for (MachineOperand
&MO
: I
->uses()) {
1705 if (Remaps
.count(MO
.getReg()))
1706 MO
.setReg(Remaps
[MO
.getReg()]);
1708 // If we are using a phi from the source block we need to add a new phi
1709 // pointing to the old one.
1710 MachineInstr
*Use
= MRI
.getUniqueVRegDef(MO
.getReg());
1711 if (Use
&& Use
->isPHI() && Use
->getParent() == SourceBB
) {
1712 Register R
= clonePhi(Use
);
1721 PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr
*CanonicalPhi
,
1722 MachineInstr
*Phi
) {
1723 unsigned distance
= PhiNodeLoopIteration
[Phi
];
1724 MachineInstr
*CanonicalUse
= CanonicalPhi
;
1725 Register CanonicalUseReg
= CanonicalUse
->getOperand(0).getReg();
1726 for (unsigned I
= 0; I
< distance
; ++I
) {
1727 assert(CanonicalUse
->isPHI());
1728 assert(CanonicalUse
->getNumOperands() == 5);
1729 unsigned LoopRegIdx
= 3, InitRegIdx
= 1;
1730 if (CanonicalUse
->getOperand(2).getMBB() == CanonicalUse
->getParent())
1731 std::swap(LoopRegIdx
, InitRegIdx
);
1732 CanonicalUseReg
= CanonicalUse
->getOperand(LoopRegIdx
).getReg();
1733 CanonicalUse
= MRI
.getVRegDef(CanonicalUseReg
);
1735 return CanonicalUseReg
;
1738 void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
1739 BitVector
LS(Schedule
.getNumStages(), true);
1740 BitVector
AS(Schedule
.getNumStages(), true);
1741 LiveStages
[BB
] = LS
;
1742 AvailableStages
[BB
] = AS
;
1744 // Peel out the prologs.
1746 for (int I
= 0; I
< Schedule
.getNumStages() - 1; ++I
) {
1748 Prologs
.push_back(peelKernel(LPD_Front
));
1749 LiveStages
[Prologs
.back()] = LS
;
1750 AvailableStages
[Prologs
.back()] = LS
;
1753 // Create a block that will end up as the new loop exiting block (dominated by
1754 // all prologs and epilogs). It will only contain PHIs, in the same order as
1755 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1756 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1757 // property that any value deffed in BB but used outside of BB is used by a
1758 // PHI in the exiting block.
1759 MachineBasicBlock
*ExitingBB
= CreateLCSSAExitingBlock();
1760 EliminateDeadPhis(ExitingBB
, MRI
, LIS
, /*KeepSingleSrcPhi=*/true);
1761 // Push out the epilogs, again in reverse order.
1762 // We can't assume anything about the minumum loop trip count at this point,
1763 // so emit a fairly complex epilog.
1765 // We first peel number of stages minus one epilogue. Then we remove dead
1766 // stages and reorder instructions based on their stage. If we have 3 stages
1767 // we generate first:
1771 // And then we move instructions based on their stages to have:
1775 // The transformation is legal because we only move instructions past
1776 // instructions of a previous loop iteration.
1777 for (int I
= 1; I
<= Schedule
.getNumStages() - 1; ++I
) {
1778 Epilogs
.push_back(peelKernel(LPD_Back
));
1779 MachineBasicBlock
*B
= Epilogs
.back();
1780 filterInstructions(B
, Schedule
.getNumStages() - I
);
1781 // Keep track at which iteration each phi belongs to. We need it to know
1782 // what version of the variable to use during prologue/epilogue stitching.
1783 EliminateDeadPhis(B
, MRI
, LIS
, /*KeepSingleSrcPhi=*/true);
1784 for (MachineInstr
&Phi
: B
->phis())
1785 PhiNodeLoopIteration
[&Phi
] = Schedule
.getNumStages() - I
;
1787 for (size_t I
= 0; I
< Epilogs
.size(); I
++) {
1789 for (size_t J
= I
; J
< Epilogs
.size(); J
++) {
1791 unsigned Stage
= Schedule
.getNumStages() - 1 + I
- J
;
1792 // Move stage one block at a time so that Phi nodes are updated correctly.
1793 for (size_t K
= Iteration
; K
> I
; K
--)
1794 moveStageBetweenBlocks(Epilogs
[K
- 1], Epilogs
[K
], Stage
);
1797 LiveStages
[Epilogs
[I
]] = LS
;
1798 AvailableStages
[Epilogs
[I
]] = AS
;
1801 // Now we've defined all the prolog and epilog blocks as a fallthrough
1802 // sequence, add the edges that will be followed if the loop trip count is
1803 // lower than the number of stages (connecting prologs directly with epilogs).
1804 auto PI
= Prologs
.begin();
1805 auto EI
= Epilogs
.begin();
1806 assert(Prologs
.size() == Epilogs
.size());
1807 for (; PI
!= Prologs
.end(); ++PI
, ++EI
) {
1808 MachineBasicBlock
*Pred
= *(*EI
)->pred_begin();
1809 (*PI
)->addSuccessor(*EI
);
1810 for (MachineInstr
&MI
: (*EI
)->phis()) {
1811 Register Reg
= MI
.getOperand(1).getReg();
1812 MachineInstr
*Use
= MRI
.getUniqueVRegDef(Reg
);
1813 if (Use
&& Use
->getParent() == Pred
) {
1814 MachineInstr
*CanonicalUse
= CanonicalMIs
[Use
];
1815 if (CanonicalUse
->isPHI()) {
1816 // If the use comes from a phi we need to skip as many phi as the
1817 // distance between the epilogue and the kernel. Trace through the phi
1818 // chain to find the right value.
1819 Reg
= getPhiCanonicalReg(CanonicalUse
, Use
);
1821 Reg
= getEquivalentRegisterIn(Reg
, *PI
);
1823 MI
.addOperand(MachineOperand::CreateReg(Reg
, /*isDef=*/false));
1824 MI
.addOperand(MachineOperand::CreateMBB(*PI
));
1828 // Create a list of all blocks in order.
1829 SmallVector
<MachineBasicBlock
*, 8> Blocks
;
1830 llvm::copy(PeeledFront
, std::back_inserter(Blocks
));
1831 Blocks
.push_back(BB
);
1832 llvm::copy(PeeledBack
, std::back_inserter(Blocks
));
1834 // Iterate in reverse order over all instructions, remapping as we go.
1835 for (MachineBasicBlock
*B
: reverse(Blocks
)) {
1836 for (auto I
= B
->instr_rbegin();
1837 I
!= std::next(B
->getFirstNonPHI()->getReverseIterator());) {
1838 MachineBasicBlock::reverse_instr_iterator MI
= I
++;
1839 rewriteUsesOf(&*MI
);
1842 for (auto *MI
: IllegalPhisToDelete
) {
1844 LIS
->RemoveMachineInstrFromMaps(*MI
);
1845 MI
->eraseFromParent();
1847 IllegalPhisToDelete
.clear();
1849 // Now all remapping has been done, we're free to optimize the generated code.
1850 for (MachineBasicBlock
*B
: reverse(Blocks
))
1851 EliminateDeadPhis(B
, MRI
, LIS
);
1852 EliminateDeadPhis(ExitingBB
, MRI
, LIS
);
1855 MachineBasicBlock
*PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
1856 MachineFunction
&MF
= *BB
->getParent();
1857 MachineBasicBlock
*Exit
= *BB
->succ_begin();
1859 Exit
= *std::next(BB
->succ_begin());
1861 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
1862 MF
.insert(std::next(BB
->getIterator()), NewBB
);
1864 // Clone all phis in BB into NewBB and rewrite.
1865 for (MachineInstr
&MI
: BB
->phis()) {
1866 auto RC
= MRI
.getRegClass(MI
.getOperand(0).getReg());
1867 Register OldR
= MI
.getOperand(3).getReg();
1868 Register R
= MRI
.createVirtualRegister(RC
);
1869 SmallVector
<MachineInstr
*, 4> Uses
;
1870 for (MachineInstr
&Use
: MRI
.use_instructions(OldR
))
1871 if (Use
.getParent() != BB
)
1872 Uses
.push_back(&Use
);
1873 for (MachineInstr
*Use
: Uses
)
1874 Use
->substituteRegister(OldR
, R
, /*SubIdx=*/0,
1875 *MRI
.getTargetRegisterInfo());
1876 MachineInstr
*NI
= BuildMI(NewBB
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1879 BlockMIs
[{NewBB
, &MI
}] = NI
;
1880 CanonicalMIs
[NI
] = &MI
;
1882 BB
->replaceSuccessor(Exit
, NewBB
);
1883 Exit
->replacePhiUsesWith(BB
, NewBB
);
1884 NewBB
->addSuccessor(Exit
);
1886 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
1887 SmallVector
<MachineOperand
, 4> Cond
;
1888 bool CanAnalyzeBr
= !TII
->analyzeBranch(*BB
, TBB
, FBB
, Cond
);
1890 assert(CanAnalyzeBr
&& "Must be able to analyze the loop branch!");
1891 TII
->removeBranch(*BB
);
1892 TII
->insertBranch(*BB
, TBB
== Exit
? NewBB
: TBB
, FBB
== Exit
? NewBB
: FBB
,
1894 TII
->insertUnconditionalBranch(*NewBB
, Exit
, DebugLoc());
1899 PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg
,
1900 MachineBasicBlock
*BB
) {
1901 MachineInstr
*MI
= MRI
.getUniqueVRegDef(Reg
);
1902 unsigned OpIdx
= MI
->findRegisterDefOperandIdx(Reg
);
1903 return BlockMIs
[{BB
, CanonicalMIs
[MI
]}]->getOperand(OpIdx
).getReg();
1906 void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr
*MI
) {
1908 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1909 // and it is produced by this block.
1910 Register PhiR
= MI
->getOperand(0).getReg();
1911 Register R
= MI
->getOperand(3).getReg();
1912 int RMIStage
= getStage(MRI
.getUniqueVRegDef(R
));
1913 if (RMIStage
!= -1 && !AvailableStages
[MI
->getParent()].test(RMIStage
))
1914 R
= MI
->getOperand(1).getReg();
1915 MRI
.setRegClass(R
, MRI
.getRegClass(PhiR
));
1916 MRI
.replaceRegWith(PhiR
, R
);
1917 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1918 // later to figure out how to remap registers.
1919 MI
->getOperand(0).setReg(PhiR
);
1920 IllegalPhisToDelete
.push_back(MI
);
1924 int Stage
= getStage(MI
);
1925 if (Stage
== -1 || LiveStages
.count(MI
->getParent()) == 0 ||
1926 LiveStages
[MI
->getParent()].test(Stage
))
1927 // Instruction is live, no rewriting to do.
1930 for (MachineOperand
&DefMO
: MI
->defs()) {
1931 SmallVector
<std::pair
<MachineInstr
*, Register
>, 4> Subs
;
1932 for (MachineInstr
&UseMI
: MRI
.use_instructions(DefMO
.getReg())) {
1933 // Only PHIs can use values from this block by construction.
1934 // Match with the equivalent PHI in B.
1935 assert(UseMI
.isPHI());
1936 Register Reg
= getEquivalentRegisterIn(UseMI
.getOperand(0).getReg(),
1938 Subs
.emplace_back(&UseMI
, Reg
);
1940 for (auto &Sub
: Subs
)
1941 Sub
.first
->substituteRegister(DefMO
.getReg(), Sub
.second
, /*SubIdx=*/0,
1942 *MRI
.getTargetRegisterInfo());
1945 LIS
->RemoveMachineInstrFromMaps(*MI
);
1946 MI
->eraseFromParent();
1949 void PeelingModuloScheduleExpander::fixupBranches() {
1950 // Work outwards from the kernel.
1951 bool KernelDisposed
= false;
1952 int TC
= Schedule
.getNumStages() - 1;
1953 for (auto PI
= Prologs
.rbegin(), EI
= Epilogs
.rbegin(); PI
!= Prologs
.rend();
1955 MachineBasicBlock
*Prolog
= *PI
;
1956 MachineBasicBlock
*Fallthrough
= *Prolog
->succ_begin();
1957 MachineBasicBlock
*Epilog
= *EI
;
1958 SmallVector
<MachineOperand
, 4> Cond
;
1959 TII
->removeBranch(*Prolog
);
1960 std::optional
<bool> StaticallyGreater
=
1961 LoopInfo
->createTripCountGreaterCondition(TC
, *Prolog
, Cond
);
1962 if (!StaticallyGreater
) {
1963 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC
<< "\n");
1964 // Dynamically branch based on Cond.
1965 TII
->insertBranch(*Prolog
, Epilog
, Fallthrough
, Cond
, DebugLoc());
1966 } else if (*StaticallyGreater
== false) {
1967 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC
<< "\n");
1968 // Prolog never falls through; branch to epilog and orphan interior
1969 // blocks. Leave it to unreachable-block-elim to clean up.
1970 Prolog
->removeSuccessor(Fallthrough
);
1971 for (MachineInstr
&P
: Fallthrough
->phis()) {
1975 TII
->insertUnconditionalBranch(*Prolog
, Epilog
, DebugLoc());
1976 KernelDisposed
= true;
1978 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC
<< "\n");
1979 // Prolog always falls through; remove incoming values in epilog.
1980 Prolog
->removeSuccessor(Epilog
);
1981 for (MachineInstr
&P
: Epilog
->phis()) {
1988 if (!KernelDisposed
) {
1989 LoopInfo
->adjustTripCount(-(Schedule
.getNumStages() - 1));
1990 LoopInfo
->setPreheader(Prologs
.back());
1992 LoopInfo
->disposed();
1996 void PeelingModuloScheduleExpander::rewriteKernel() {
1997 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
, BB
);
2001 void PeelingModuloScheduleExpander::expand() {
2002 BB
= Schedule
.getLoop()->getTopBlock();
2003 Preheader
= Schedule
.getLoop()->getLoopPreheader();
2004 LLVM_DEBUG(Schedule
.dump());
2005 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
2009 peelPrologAndEpilogs();
2013 void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
2014 BB
= Schedule
.getLoop()->getTopBlock();
2015 Preheader
= Schedule
.getLoop()->getLoopPreheader();
2017 // Dump the schedule before we invalidate and remap all its instructions.
2018 // Stash it in a string so we can print it if we found an error.
2019 std::string ScheduleDump
;
2020 raw_string_ostream
OS(ScheduleDump
);
2024 // First, run the normal ModuleScheduleExpander. We don't support any
2026 assert(LIS
&& "Requires LiveIntervals!");
2027 ModuloScheduleExpander
MSE(MF
, Schedule
, *LIS
,
2028 ModuloScheduleExpander::InstrChangesTy());
2030 MachineBasicBlock
*ExpandedKernel
= MSE
.getRewrittenKernel();
2031 if (!ExpandedKernel
) {
2032 // The expander optimized away the kernel. We can't do any useful checking.
2036 // Before running the KernelRewriter, re-add BB into the CFG.
2037 Preheader
->addSuccessor(BB
);
2039 // Now run the new expansion algorithm.
2040 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
, BB
);
2042 peelPrologAndEpilogs();
2044 // Collect all illegal phis that the new algorithm created. We'll give these
2045 // to KernelOperandInfo.
2046 SmallPtrSet
<MachineInstr
*, 4> IllegalPhis
;
2047 for (auto NI
= BB
->getFirstNonPHI(); NI
!= BB
->end(); ++NI
) {
2049 IllegalPhis
.insert(&*NI
);
2052 // Co-iterate across both kernels. We expect them to be identical apart from
2053 // phis and full COPYs (we look through both).
2054 SmallVector
<std::pair
<KernelOperandInfo
, KernelOperandInfo
>, 8> KOIs
;
2055 auto OI
= ExpandedKernel
->begin();
2056 auto NI
= BB
->begin();
2057 for (; !OI
->isTerminator() && !NI
->isTerminator(); ++OI
, ++NI
) {
2058 while (OI
->isPHI() || OI
->isFullCopy())
2060 while (NI
->isPHI() || NI
->isFullCopy())
2062 assert(OI
->getOpcode() == NI
->getOpcode() && "Opcodes don't match?!");
2063 // Analyze every operand separately.
2064 for (auto OOpI
= OI
->operands_begin(), NOpI
= NI
->operands_begin();
2065 OOpI
!= OI
->operands_end(); ++OOpI
, ++NOpI
)
2066 KOIs
.emplace_back(KernelOperandInfo(&*OOpI
, MRI
, IllegalPhis
),
2067 KernelOperandInfo(&*NOpI
, MRI
, IllegalPhis
));
2070 bool Failed
= false;
2071 for (auto &OldAndNew
: KOIs
) {
2072 if (OldAndNew
.first
== OldAndNew
.second
)
2075 errs() << "Modulo kernel validation error: [\n";
2076 errs() << " [golden] ";
2077 OldAndNew
.first
.print(errs());
2079 OldAndNew
.second
.print(errs());
2084 errs() << "Golden reference kernel:\n";
2085 ExpandedKernel
->print(errs());
2086 errs() << "New kernel:\n";
2088 errs() << ScheduleDump
;
2090 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2093 // Cleanup by removing BB from the CFG again as the original
2094 // ModuloScheduleExpander intended.
2095 Preheader
->removeSuccessor(BB
);
2099 //===----------------------------------------------------------------------===//
2100 // ModuloScheduleTestPass implementation
2101 //===----------------------------------------------------------------------===//
2102 // This pass constructs a ModuloSchedule from its module and runs
2103 // ModuloScheduleExpander.
2105 // The module is expected to contain a single-block analyzable loop.
2106 // The total order of instructions is taken from the loop as-is.
2107 // Instructions are expected to be annotated with a PostInstrSymbol.
2108 // This PostInstrSymbol must have the following format:
2109 // "Stage=%d Cycle=%d".
2110 //===----------------------------------------------------------------------===//
2113 class ModuloScheduleTest
: public MachineFunctionPass
{
2117 ModuloScheduleTest() : MachineFunctionPass(ID
) {
2118 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
2121 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2122 void runOnLoop(MachineFunction
&MF
, MachineLoop
&L
);
2124 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
2125 AU
.addRequired
<MachineLoopInfo
>();
2126 AU
.addRequired
<LiveIntervals
>();
2127 MachineFunctionPass::getAnalysisUsage(AU
);
2132 char ModuloScheduleTest::ID
= 0;
2134 INITIALIZE_PASS_BEGIN(ModuloScheduleTest
, "modulo-schedule-test",
2135 "Modulo Schedule test pass", false, false)
2136 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
2137 INITIALIZE_PASS_DEPENDENCY(LiveIntervals
)
2138 INITIALIZE_PASS_END(ModuloScheduleTest
, "modulo-schedule-test",
2139 "Modulo Schedule test pass", false, false)
2141 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction
&MF
) {
2142 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
2143 for (auto *L
: MLI
) {
2144 if (L
->getTopBlock() != L
->getBottomBlock())
2152 static void parseSymbolString(StringRef S
, int &Cycle
, int &Stage
) {
2153 std::pair
<StringRef
, StringRef
> StageAndCycle
= getToken(S
, "_");
2154 std::pair
<StringRef
, StringRef
> StageTokenAndValue
=
2155 getToken(StageAndCycle
.first
, "-");
2156 std::pair
<StringRef
, StringRef
> CycleTokenAndValue
=
2157 getToken(StageAndCycle
.second
, "-");
2158 if (StageTokenAndValue
.first
!= "Stage" ||
2159 CycleTokenAndValue
.first
!= "_Cycle") {
2161 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2165 StageTokenAndValue
.second
.drop_front().getAsInteger(10, Stage
);
2166 CycleTokenAndValue
.second
.drop_front().getAsInteger(10, Cycle
);
2168 dbgs() << " Stage=" << Stage
<< ", Cycle=" << Cycle
<< "\n";
2171 void ModuloScheduleTest::runOnLoop(MachineFunction
&MF
, MachineLoop
&L
) {
2172 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
2173 MachineBasicBlock
*BB
= L
.getTopBlock();
2174 dbgs() << "--- ModuloScheduleTest running on BB#" << BB
->getNumber() << "\n";
2176 DenseMap
<MachineInstr
*, int> Cycle
, Stage
;
2177 std::vector
<MachineInstr
*> Instrs
;
2178 for (MachineInstr
&MI
: *BB
) {
2179 if (MI
.isTerminator())
2181 Instrs
.push_back(&MI
);
2182 if (MCSymbol
*Sym
= MI
.getPostInstrSymbol()) {
2183 dbgs() << "Parsing post-instr symbol for " << MI
;
2184 parseSymbolString(Sym
->getName(), Cycle
[&MI
], Stage
[&MI
]);
2188 ModuloSchedule
MS(MF
, &L
, std::move(Instrs
), std::move(Cycle
),
2190 ModuloScheduleExpander
MSE(
2191 MF
, MS
, LIS
, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2196 //===----------------------------------------------------------------------===//
2197 // ModuloScheduleTestAnnotater implementation
2198 //===----------------------------------------------------------------------===//
2200 void ModuloScheduleTestAnnotater::annotate() {
2201 for (MachineInstr
*MI
: S
.getInstructions()) {
2202 SmallVector
<char, 16> SV
;
2203 raw_svector_ostream
OS(SV
);
2204 OS
<< "Stage-" << S
.getStage(MI
) << "_Cycle-" << S
.getCycle(MI
);
2205 MCSymbol
*Sym
= MF
.getContext().getOrCreateSymbol(OS
.str());
2206 MI
->setPostInstrSymbol(MF
, Sym
);