1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/ModuloSchedule.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/Analysis/MemoryLocation.h"
12 #include "llvm/CodeGen/LiveIntervals.h"
13 #include "llvm/CodeGen/MachineInstrBuilder.h"
14 #include "llvm/CodeGen/MachineLoopInfo.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/InitializePasses.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
22 #define DEBUG_TYPE "pipeliner"
25 static cl::opt
<bool> SwapBranchTargetsMVE(
26 "pipeliner-swap-branch-targets-mve", cl::Hidden
, cl::init(false),
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
29 void ModuloSchedule::print(raw_ostream
&OS
) {
30 for (MachineInstr
*MI
: ScheduledInstrs
)
31 OS
<< "[stage " << getStage(MI
) << " @" << getCycle(MI
) << "c] " << *MI
;
34 //===----------------------------------------------------------------------===//
35 // ModuloScheduleExpander implementation
36 //===----------------------------------------------------------------------===//
38 /// Return the register values for the operands of a Phi instruction.
39 /// This function assume the instruction is a Phi.
40 static void getPhiRegs(MachineInstr
&Phi
, MachineBasicBlock
*Loop
,
41 unsigned &InitVal
, unsigned &LoopVal
) {
42 assert(Phi
.isPHI() && "Expecting a Phi.");
46 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
47 if (Phi
.getOperand(i
+ 1).getMBB() != Loop
)
48 InitVal
= Phi
.getOperand(i
).getReg();
50 LoopVal
= Phi
.getOperand(i
).getReg();
52 assert(InitVal
!= 0 && LoopVal
!= 0 && "Unexpected Phi structure.");
55 /// Return the Phi register value that comes from the incoming block.
56 static unsigned getInitPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
57 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
58 if (Phi
.getOperand(i
+ 1).getMBB() != LoopBB
)
59 return Phi
.getOperand(i
).getReg();
63 /// Return the Phi register value that comes the loop block.
64 static unsigned getLoopPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
65 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
66 if (Phi
.getOperand(i
+ 1).getMBB() == LoopBB
)
67 return Phi
.getOperand(i
).getReg();
71 void ModuloScheduleExpander::expand() {
72 BB
= Schedule
.getLoop()->getTopBlock();
73 Preheader
= *BB
->pred_begin();
75 Preheader
= *std::next(BB
->pred_begin());
77 // Iterate over the definitions in each instruction, and compute the
78 // stage difference for each use. Keep the maximum value.
79 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
80 int DefStage
= Schedule
.getStage(MI
);
81 for (const MachineOperand
&Op
: MI
->all_defs()) {
82 Register Reg
= Op
.getReg();
84 bool PhiIsSwapped
= false;
85 for (MachineOperand
&UseOp
: MRI
.use_operands(Reg
)) {
86 MachineInstr
*UseMI
= UseOp
.getParent();
87 int UseStage
= Schedule
.getStage(UseMI
);
89 if (UseStage
!= -1 && UseStage
>= DefStage
)
90 Diff
= UseStage
- DefStage
;
92 if (isLoopCarried(*MI
))
97 MaxDiff
= std::max(Diff
, MaxDiff
);
99 RegToStageDiff
[Reg
] = std::make_pair(MaxDiff
, PhiIsSwapped
);
103 generatePipelinedLoop();
106 void ModuloScheduleExpander::generatePipelinedLoop() {
107 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
108 assert(LoopInfo
&& "Must be able to analyze loop!");
110 // Create a new basic block for the kernel and add it to the CFG.
111 MachineBasicBlock
*KernelBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
113 unsigned MaxStageCount
= Schedule
.getNumStages() - 1;
115 // Remember the registers that are used in different stages. The index is
116 // the iteration, or stage, that the instruction is scheduled in. This is
117 // a map between register names in the original block and the names created
118 // in each stage of the pipelined loop.
119 ValueMapTy
*VRMap
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
121 // The renaming destination by Phis for the registers across stages.
122 // This map is updated during Phis generation to point to the most recent
123 // renaming destination.
124 ValueMapTy
*VRMapPhi
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
128 SmallVector
<MachineBasicBlock
*, 4> PrologBBs
;
130 // Generate the prolog instructions that set up the pipeline.
131 generateProlog(MaxStageCount
, KernelBB
, VRMap
, PrologBBs
);
132 MF
.insert(BB
->getIterator(), KernelBB
);
133 LIS
.insertMBBInMaps(KernelBB
);
135 // Rearrange the instructions to generate the new, pipelined loop,
136 // and update register names as needed.
137 for (MachineInstr
*CI
: Schedule
.getInstructions()) {
140 unsigned StageNum
= Schedule
.getStage(CI
);
141 MachineInstr
*NewMI
= cloneInstr(CI
, MaxStageCount
, StageNum
);
142 updateInstruction(NewMI
, false, MaxStageCount
, StageNum
, VRMap
);
143 KernelBB
->push_back(NewMI
);
144 InstrMap
[NewMI
] = CI
;
147 // Copy any terminator instructions to the new kernel, and update
149 for (MachineInstr
&MI
: BB
->terminators()) {
150 MachineInstr
*NewMI
= MF
.CloneMachineInstr(&MI
);
151 updateInstruction(NewMI
, false, MaxStageCount
, 0, VRMap
);
152 KernelBB
->push_back(NewMI
);
153 InstrMap
[NewMI
] = &MI
;
156 NewKernel
= KernelBB
;
157 KernelBB
->transferSuccessors(BB
);
158 KernelBB
->replaceSuccessor(BB
, KernelBB
);
160 generateExistingPhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
,
161 InstrMap
, MaxStageCount
, MaxStageCount
, false);
162 generatePhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
, VRMapPhi
,
163 InstrMap
, MaxStageCount
, MaxStageCount
, false);
165 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB
->dump(););
167 SmallVector
<MachineBasicBlock
*, 4> EpilogBBs
;
168 // Generate the epilog instructions to complete the pipeline.
169 generateEpilog(MaxStageCount
, KernelBB
, BB
, VRMap
, VRMapPhi
, EpilogBBs
,
172 // We need this step because the register allocation doesn't handle some
173 // situations well, so we insert copies to help out.
174 splitLifetimes(KernelBB
, EpilogBBs
);
176 // Remove dead instructions due to loop induction variables.
177 removeDeadInstructions(KernelBB
, EpilogBBs
);
179 // Add branches between prolog and epilog blocks.
180 addBranches(*Preheader
, PrologBBs
, KernelBB
, EpilogBBs
, VRMap
);
186 void ModuloScheduleExpander::cleanup() {
187 // Remove the original loop since it's no longer referenced.
189 LIS
.RemoveMachineInstrFromMaps(I
);
191 BB
->eraseFromParent();
194 /// Generate the pipeline prolog code.
195 void ModuloScheduleExpander::generateProlog(unsigned LastStage
,
196 MachineBasicBlock
*KernelBB
,
198 MBBVectorTy
&PrologBBs
) {
199 MachineBasicBlock
*PredBB
= Preheader
;
202 // Generate a basic block for each stage, not including the last stage,
203 // which will be generated in the kernel. Each basic block may contain
204 // instructions from multiple stages/iterations.
205 for (unsigned i
= 0; i
< LastStage
; ++i
) {
206 // Create and insert the prolog basic block prior to the original loop
207 // basic block. The original loop is removed later.
208 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
209 PrologBBs
.push_back(NewBB
);
210 MF
.insert(BB
->getIterator(), NewBB
);
211 NewBB
->transferSuccessors(PredBB
);
212 PredBB
->addSuccessor(NewBB
);
214 LIS
.insertMBBInMaps(NewBB
);
216 // Generate instructions for each appropriate stage. Process instructions
217 // in original program order.
218 for (int StageNum
= i
; StageNum
>= 0; --StageNum
) {
219 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
220 BBE
= BB
->getFirstTerminator();
222 if (Schedule
.getStage(&*BBI
) == StageNum
) {
225 MachineInstr
*NewMI
=
226 cloneAndChangeInstr(&*BBI
, i
, (unsigned)StageNum
);
227 updateInstruction(NewMI
, false, i
, (unsigned)StageNum
, VRMap
);
228 NewBB
->push_back(NewMI
);
229 InstrMap
[NewMI
] = &*BBI
;
233 rewritePhiValues(NewBB
, i
, VRMap
, InstrMap
);
235 dbgs() << "prolog:\n";
240 PredBB
->replaceSuccessor(BB
, KernelBB
);
242 // Check if we need to remove the branch from the preheader to the original
243 // loop, and replace it with a branch to the new loop.
244 unsigned numBranches
= TII
->removeBranch(*Preheader
);
246 SmallVector
<MachineOperand
, 0> Cond
;
247 TII
->insertBranch(*Preheader
, PrologBBs
[0], nullptr, Cond
, DebugLoc());
251 /// Generate the pipeline epilog code. The epilog code finishes the iterations
252 /// that were started in either the prolog or the kernel. We create a basic
253 /// block for each stage that needs to complete.
254 void ModuloScheduleExpander::generateEpilog(
255 unsigned LastStage
, MachineBasicBlock
*KernelBB
, MachineBasicBlock
*OrigBB
,
256 ValueMapTy
*VRMap
, ValueMapTy
*VRMapPhi
, MBBVectorTy
&EpilogBBs
,
257 MBBVectorTy
&PrologBBs
) {
258 // We need to change the branch from the kernel to the first epilog block, so
259 // this call to analyze branch uses the kernel rather than the original BB.
260 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
261 SmallVector
<MachineOperand
, 4> Cond
;
262 bool checkBranch
= TII
->analyzeBranch(*KernelBB
, TBB
, FBB
, Cond
);
263 assert(!checkBranch
&& "generateEpilog must be able to analyze the branch");
267 MachineBasicBlock::succ_iterator LoopExitI
= KernelBB
->succ_begin();
268 if (*LoopExitI
== KernelBB
)
270 assert(LoopExitI
!= KernelBB
->succ_end() && "Expecting a successor");
271 MachineBasicBlock
*LoopExitBB
= *LoopExitI
;
273 MachineBasicBlock
*PredBB
= KernelBB
;
274 MachineBasicBlock
*EpilogStart
= LoopExitBB
;
277 // Generate a basic block for each stage, not including the last stage,
278 // which was generated for the kernel. Each basic block may contain
279 // instructions from multiple stages/iterations.
280 int EpilogStage
= LastStage
+ 1;
281 for (unsigned i
= LastStage
; i
>= 1; --i
, ++EpilogStage
) {
282 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock();
283 EpilogBBs
.push_back(NewBB
);
284 MF
.insert(BB
->getIterator(), NewBB
);
286 PredBB
->replaceSuccessor(LoopExitBB
, NewBB
);
287 NewBB
->addSuccessor(LoopExitBB
);
288 LIS
.insertMBBInMaps(NewBB
);
290 if (EpilogStart
== LoopExitBB
)
293 // Add instructions to the epilog depending on the current block.
294 // Process instructions in original program order.
295 for (unsigned StageNum
= i
; StageNum
<= LastStage
; ++StageNum
) {
296 for (auto &BBI
: *BB
) {
299 MachineInstr
*In
= &BBI
;
300 if ((unsigned)Schedule
.getStage(In
) == StageNum
) {
301 // Instructions with memoperands in the epilog are updated with
302 // conservative values.
303 MachineInstr
*NewMI
= cloneInstr(In
, UINT_MAX
, 0);
304 updateInstruction(NewMI
, i
== 1, EpilogStage
, 0, VRMap
);
305 NewBB
->push_back(NewMI
);
306 InstrMap
[NewMI
] = In
;
310 generateExistingPhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
,
311 InstrMap
, LastStage
, EpilogStage
, i
== 1);
312 generatePhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
, VRMapPhi
,
313 InstrMap
, LastStage
, EpilogStage
, i
== 1);
317 dbgs() << "epilog:\n";
322 // Fix any Phi nodes in the loop exit block.
323 LoopExitBB
->replacePhiUsesWith(BB
, PredBB
);
325 // Create a branch to the new epilog from the kernel.
326 // Remove the original branch and add a new branch to the epilog.
327 TII
->removeBranch(*KernelBB
);
328 assert((OrigBB
== TBB
|| OrigBB
== FBB
) &&
329 "Unable to determine looping branch direction");
331 TII
->insertBranch(*KernelBB
, EpilogStart
, KernelBB
, Cond
, DebugLoc());
333 TII
->insertBranch(*KernelBB
, KernelBB
, EpilogStart
, Cond
, DebugLoc());
334 // Add a branch to the loop exit.
335 if (EpilogBBs
.size() > 0) {
336 MachineBasicBlock
*LastEpilogBB
= EpilogBBs
.back();
337 SmallVector
<MachineOperand
, 4> Cond1
;
338 TII
->insertBranch(*LastEpilogBB
, LoopExitBB
, nullptr, Cond1
, DebugLoc());
342 /// Replace all uses of FromReg that appear outside the specified
343 /// basic block with ToReg.
344 static void replaceRegUsesAfterLoop(unsigned FromReg
, unsigned ToReg
,
345 MachineBasicBlock
*MBB
,
346 MachineRegisterInfo
&MRI
,
347 LiveIntervals
&LIS
) {
348 for (MachineOperand
&O
:
349 llvm::make_early_inc_range(MRI
.use_operands(FromReg
)))
350 if (O
.getParent()->getParent() != MBB
)
352 if (!LIS
.hasInterval(ToReg
))
353 LIS
.createEmptyInterval(ToReg
);
356 /// Return true if the register has a use that occurs outside the
358 static bool hasUseAfterLoop(unsigned Reg
, MachineBasicBlock
*BB
,
359 MachineRegisterInfo
&MRI
) {
360 for (const MachineOperand
&MO
: MRI
.use_operands(Reg
))
361 if (MO
.getParent()->getParent() != BB
)
366 /// Generate Phis for the specific block in the generated pipelined code.
367 /// This function looks at the Phis from the original code to guide the
368 /// creation of new Phis.
369 void ModuloScheduleExpander::generateExistingPhis(
370 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
371 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
372 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
) {
373 // Compute the stage number for the initial value of the Phi, which
374 // comes from the prolog. The prolog to use depends on to which kernel/
375 // epilog that we're adding the Phi.
376 unsigned PrologStage
= 0;
377 unsigned PrevStage
= 0;
378 bool InKernel
= (LastStageNum
== CurStageNum
);
380 PrologStage
= LastStageNum
- 1;
381 PrevStage
= CurStageNum
;
383 PrologStage
= LastStageNum
- (CurStageNum
- LastStageNum
);
384 PrevStage
= LastStageNum
+ (CurStageNum
- LastStageNum
) - 1;
387 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
388 BBE
= BB
->getFirstNonPHI();
390 Register Def
= BBI
->getOperand(0).getReg();
392 unsigned InitVal
= 0;
393 unsigned LoopVal
= 0;
394 getPhiRegs(*BBI
, BB
, InitVal
, LoopVal
);
397 // The Phi value from the loop body typically is defined in the loop, but
398 // not always. So, we need to check if the value is defined in the loop.
399 unsigned PhiOp2
= LoopVal
;
400 if (auto It
= VRMap
[LastStageNum
].find(LoopVal
);
401 It
!= VRMap
[LastStageNum
].end())
404 int StageScheduled
= Schedule
.getStage(&*BBI
);
405 int LoopValStage
= Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
406 unsigned NumStages
= getStagesForReg(Def
, CurStageNum
);
407 if (NumStages
== 0) {
408 // We don't need to generate a Phi anymore, but we need to rename any uses
410 unsigned NewReg
= VRMap
[PrevStage
][LoopVal
];
411 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, 0, &*BBI
, Def
,
413 if (VRMap
[CurStageNum
].count(LoopVal
))
414 VRMap
[CurStageNum
][Def
] = VRMap
[CurStageNum
][LoopVal
];
416 // Adjust the number of Phis needed depending on the number of prologs left,
417 // and the distance from where the Phi is first scheduled. The number of
418 // Phis cannot exceed the number of prolog stages. Each stage can
419 // potentially define two values.
420 unsigned MaxPhis
= PrologStage
+ 2;
421 if (!InKernel
&& (int)PrologStage
<= LoopValStage
)
422 MaxPhis
= std::max((int)MaxPhis
- (int)LoopValStage
, 1);
423 unsigned NumPhis
= std::min(NumStages
, MaxPhis
);
426 unsigned AccessStage
= (LoopValStage
!= -1) ? LoopValStage
: StageScheduled
;
427 // In the epilog, we may need to look back one stage to get the correct
428 // Phi name, because the epilog and prolog blocks execute the same stage.
429 // The correct name is from the previous block only when the Phi has
430 // been completely scheduled prior to the epilog, and Phi value is not
431 // needed in multiple stages.
433 if (!InKernel
&& StageScheduled
>= LoopValStage
&& AccessStage
== 0 &&
436 // Adjust the computations below when the phi and the loop definition
437 // are scheduled in different stages.
438 if (InKernel
&& LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
439 StageDiff
= StageScheduled
- LoopValStage
;
440 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
441 // If the Phi hasn't been scheduled, then use the initial Phi operand
442 // value. Otherwise, use the scheduled version of the instruction. This
443 // is a little complicated when a Phi references another Phi.
444 if (np
> PrologStage
|| StageScheduled
>= (int)LastStageNum
)
446 // Check if the Phi has already been scheduled in a prolog stage.
447 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
&&
448 VRMap
[PrologStage
- StageDiff
- np
].count(LoopVal
) != 0)
449 PhiOp1
= VRMap
[PrologStage
- StageDiff
- np
][LoopVal
];
450 // Check if the Phi has already been scheduled, but the loop instruction
451 // is either another Phi, or doesn't occur in the loop.
452 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
) {
453 // If the Phi references another Phi, we need to examine the other
454 // Phi to get the correct value.
456 MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
);
458 while (InstOp1
&& InstOp1
->isPHI() && InstOp1
->getParent() == BB
) {
459 int PhiStage
= Schedule
.getStage(InstOp1
);
460 if ((int)(PrologStage
- StageDiff
- np
) < PhiStage
+ Indirects
)
461 PhiOp1
= getInitPhiReg(*InstOp1
, BB
);
463 PhiOp1
= getLoopPhiReg(*InstOp1
, BB
);
464 InstOp1
= MRI
.getVRegDef(PhiOp1
);
465 int PhiOpStage
= Schedule
.getStage(InstOp1
);
466 int StageAdj
= (PhiOpStage
!= -1 ? PhiStage
- PhiOpStage
: 0);
467 if (PhiOpStage
!= -1 && PrologStage
- StageAdj
>= Indirects
+ np
&&
468 VRMap
[PrologStage
- StageAdj
- Indirects
- np
].count(PhiOp1
)) {
469 PhiOp1
= VRMap
[PrologStage
- StageAdj
- Indirects
- np
][PhiOp1
];
476 // If this references a generated Phi in the kernel, get the Phi operand
477 // from the incoming block.
478 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
))
479 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
480 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
482 MachineInstr
*PhiInst
= MRI
.getVRegDef(LoopVal
);
483 bool LoopDefIsPhi
= PhiInst
&& PhiInst
->isPHI();
484 // In the epilog, a map lookup is needed to get the value from the kernel,
485 // or previous epilog block. How is does this depends on if the
486 // instruction is scheduled in the previous block.
488 int StageDiffAdj
= 0;
489 if (LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
490 StageDiffAdj
= StageScheduled
- LoopValStage
;
491 // Use the loop value defined in the kernel, unless the kernel
492 // contains the last definition of the Phi.
493 if (np
== 0 && PrevStage
== LastStageNum
&&
494 (StageScheduled
!= 0 || LoopValStage
!= 0) &&
495 VRMap
[PrevStage
- StageDiffAdj
].count(LoopVal
))
496 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
][LoopVal
];
497 // Use the value defined by the Phi. We add one because we switch
498 // from looking at the loop value to the Phi definition.
499 else if (np
> 0 && PrevStage
== LastStageNum
&&
500 VRMap
[PrevStage
- np
+ 1].count(Def
))
501 PhiOp2
= VRMap
[PrevStage
- np
+ 1][Def
];
502 // Use the loop value defined in the kernel.
503 else if (static_cast<unsigned>(LoopValStage
) > PrologStage
+ 1 &&
504 VRMap
[PrevStage
- StageDiffAdj
- np
].count(LoopVal
))
505 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
- np
][LoopVal
];
506 // Use the value defined by the Phi, unless we're generating the first
507 // epilog and the Phi refers to a Phi in a different stage.
508 else if (VRMap
[PrevStage
- np
].count(Def
) &&
509 (!LoopDefIsPhi
|| (PrevStage
!= LastStageNum
) ||
510 (LoopValStage
== StageScheduled
)))
511 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
514 // Check if we can reuse an existing Phi. This occurs when a Phi
515 // references another Phi, and the other Phi is scheduled in an
516 // earlier stage. We can try to reuse an existing Phi up until the last
517 // stage of the current Phi.
519 if (static_cast<int>(PrologStage
- np
) >= StageScheduled
) {
520 int LVNumStages
= getStagesForPhi(LoopVal
);
521 int StageDiff
= (StageScheduled
- LoopValStage
);
522 LVNumStages
-= StageDiff
;
523 // Make sure the loop value Phi has been processed already.
524 if (LVNumStages
> (int)np
&& VRMap
[CurStageNum
].count(LoopVal
)) {
526 unsigned ReuseStage
= CurStageNum
;
527 if (isLoopCarried(*PhiInst
))
528 ReuseStage
-= LVNumStages
;
529 // Check if the Phi to reuse has been generated yet. If not, then
530 // there is nothing to reuse.
531 if (VRMap
[ReuseStage
- np
].count(LoopVal
)) {
532 NewReg
= VRMap
[ReuseStage
- np
][LoopVal
];
534 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
,
536 // Update the map with the new Phi name.
537 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
539 if (VRMap
[LastStageNum
- np
- 1].count(LoopVal
))
540 PhiOp2
= VRMap
[LastStageNum
- np
- 1][LoopVal
];
542 if (IsLast
&& np
== NumPhis
- 1)
543 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
548 if (InKernel
&& StageDiff
> 0 &&
549 VRMap
[CurStageNum
- StageDiff
- np
].count(LoopVal
))
550 PhiOp2
= VRMap
[CurStageNum
- StageDiff
- np
][LoopVal
];
553 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
554 NewReg
= MRI
.createVirtualRegister(RC
);
556 MachineInstrBuilder NewPhi
=
557 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
558 TII
->get(TargetOpcode::PHI
), NewReg
);
559 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
560 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
562 InstrMap
[NewPhi
] = &*BBI
;
564 // We define the Phis after creating the new pipelined code, so
565 // we need to rename the Phi values in scheduled instructions.
567 unsigned PrevReg
= 0;
568 if (InKernel
&& VRMap
[PrevStage
- np
].count(LoopVal
))
569 PrevReg
= VRMap
[PrevStage
- np
][LoopVal
];
570 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
572 // If the Phi has been scheduled, use the new name for rewriting.
573 if (VRMap
[CurStageNum
- np
].count(Def
)) {
574 unsigned R
= VRMap
[CurStageNum
- np
][Def
];
575 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, R
,
579 // Check if we need to rename any uses that occurs after the loop. The
580 // register to replace depends on whether the Phi is scheduled in the
582 if (IsLast
&& np
== NumPhis
- 1)
583 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
585 // In the kernel, a dependent Phi uses the value from this Phi.
589 // Update the map with the new Phi name.
590 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
593 while (NumPhis
++ < NumStages
) {
594 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, NumPhis
, &*BBI
, Def
,
598 // Check if we need to rename a Phi that has been eliminated due to
600 if (NumStages
== 0 && IsLast
&& VRMap
[CurStageNum
].count(LoopVal
))
601 replaceRegUsesAfterLoop(Def
, VRMap
[CurStageNum
][LoopVal
], BB
, MRI
, LIS
);
605 /// Generate Phis for the specified block in the generated pipelined code.
606 /// These are new Phis needed because the definition is scheduled after the
607 /// use in the pipelined sequence.
608 void ModuloScheduleExpander::generatePhis(
609 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
610 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, ValueMapTy
*VRMapPhi
,
611 InstrMapTy
&InstrMap
, unsigned LastStageNum
, unsigned CurStageNum
,
613 // Compute the stage number that contains the initial Phi value, and
614 // the Phi from the previous stage.
615 unsigned PrologStage
= 0;
616 unsigned PrevStage
= 0;
617 unsigned StageDiff
= CurStageNum
- LastStageNum
;
618 bool InKernel
= (StageDiff
== 0);
620 PrologStage
= LastStageNum
- 1;
621 PrevStage
= CurStageNum
;
623 PrologStage
= LastStageNum
- StageDiff
;
624 PrevStage
= LastStageNum
+ StageDiff
- 1;
627 for (MachineBasicBlock::iterator BBI
= BB
->getFirstNonPHI(),
628 BBE
= BB
->instr_end();
630 for (unsigned i
= 0, e
= BBI
->getNumOperands(); i
!= e
; ++i
) {
631 MachineOperand
&MO
= BBI
->getOperand(i
);
632 if (!MO
.isReg() || !MO
.isDef() || !MO
.getReg().isVirtual())
635 int StageScheduled
= Schedule
.getStage(&*BBI
);
636 assert(StageScheduled
!= -1 && "Expecting scheduled instruction.");
637 Register Def
= MO
.getReg();
638 unsigned NumPhis
= getStagesForReg(Def
, CurStageNum
);
639 // An instruction scheduled in stage 0 and is used after the loop
640 // requires a phi in the epilog for the last definition from either
641 // the kernel or prolog.
642 if (!InKernel
&& NumPhis
== 0 && StageScheduled
== 0 &&
643 hasUseAfterLoop(Def
, BB
, MRI
))
645 if (!InKernel
&& (unsigned)StageScheduled
> PrologStage
)
650 PhiOp2
= VRMap
[PrevStage
][Def
];
651 if (MachineInstr
*InstOp2
= MRI
.getVRegDef(PhiOp2
))
652 if (InstOp2
->isPHI() && InstOp2
->getParent() == NewBB
)
653 PhiOp2
= getLoopPhiReg(*InstOp2
, BB2
);
655 // The number of Phis can't exceed the number of prolog stages. The
656 // prolog stage number is zero based.
657 if (NumPhis
> PrologStage
+ 1 - StageScheduled
)
658 NumPhis
= PrologStage
+ 1 - StageScheduled
;
659 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
662 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
669 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
670 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
673 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
674 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
676 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
678 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
679 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
680 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
682 unsigned PhiOp1
= VRMap
[PrologStage
][Def
];
683 if (np
<= PrologStage
)
684 PhiOp1
= VRMap
[PrologStage
- np
][Def
];
686 if (PrevStage
== LastStageNum
&& np
== 0)
687 PhiOp2
= VRMap
[LastStageNum
][Def
];
689 PhiOp2
= VRMapPhi
[PrevStage
- np
][Def
];
692 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
693 Register NewReg
= MRI
.createVirtualRegister(RC
);
695 MachineInstrBuilder NewPhi
=
696 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
697 TII
->get(TargetOpcode::PHI
), NewReg
);
698 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
699 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
701 InstrMap
[NewPhi
] = &*BBI
;
703 // Rewrite uses and update the map. The actions depend upon whether
704 // we generating code for the kernel or epilog blocks.
706 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp1
,
708 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp2
,
712 VRMapPhi
[PrevStage
- np
- 1][Def
] = NewReg
;
714 VRMapPhi
[CurStageNum
- np
][Def
] = NewReg
;
715 if (np
== NumPhis
- 1)
716 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
719 if (IsLast
&& np
== NumPhis
- 1)
720 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
726 /// Remove instructions that generate values with no uses.
727 /// Typically, these are induction variable operations that generate values
728 /// used in the loop itself. A dead instruction has a definition with
729 /// no uses, or uses that occur in the original loop only.
730 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock
*KernelBB
,
731 MBBVectorTy
&EpilogBBs
) {
732 // For each epilog block, check that the value defined by each instruction
733 // is used. If not, delete it.
734 for (MachineBasicBlock
*MBB
: llvm::reverse(EpilogBBs
))
735 for (MachineBasicBlock::reverse_instr_iterator MI
= MBB
->instr_rbegin(),
736 ME
= MBB
->instr_rend();
738 // From DeadMachineInstructionElem. Don't delete inline assembly.
739 if (MI
->isInlineAsm()) {
743 bool SawStore
= false;
744 // Check if it's safe to remove the instruction due to side effects.
745 // We can, and want to, remove Phis here.
746 if (!MI
->isSafeToMove(SawStore
) && !MI
->isPHI()) {
751 for (const MachineOperand
&MO
: MI
->all_defs()) {
752 Register reg
= MO
.getReg();
753 // Assume physical registers are used, unless they are marked dead.
754 if (reg
.isPhysical()) {
760 unsigned realUses
= 0;
761 for (const MachineOperand
&U
: MRI
.use_operands(reg
)) {
762 // Check if there are any uses that occur only in the original
763 // loop. If so, that's not a real use.
764 if (U
.getParent()->getParent() != BB
) {
775 LIS
.RemoveMachineInstrFromMaps(*MI
);
776 MI
++->eraseFromParent();
781 // In the kernel block, check if we can remove a Phi that generates a value
782 // used in an instruction removed in the epilog block.
783 for (MachineInstr
&MI
: llvm::make_early_inc_range(KernelBB
->phis())) {
784 Register reg
= MI
.getOperand(0).getReg();
785 if (MRI
.use_begin(reg
) == MRI
.use_end()) {
786 LIS
.RemoveMachineInstrFromMaps(MI
);
787 MI
.eraseFromParent();
792 /// For loop carried definitions, we split the lifetime of a virtual register
793 /// that has uses past the definition in the next iteration. A copy with a new
794 /// virtual register is inserted before the definition, which helps with
795 /// generating a better register assignment.
797 /// v1 = phi(a, v2) v1 = phi(a, v2)
798 /// v2 = phi(b, v3) v2 = phi(b, v3)
799 /// v3 = .. v4 = copy v1
802 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock
*KernelBB
,
803 MBBVectorTy
&EpilogBBs
) {
804 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
805 for (auto &PHI
: KernelBB
->phis()) {
806 Register Def
= PHI
.getOperand(0).getReg();
807 // Check for any Phi definition that used as an operand of another Phi
808 // in the same block.
809 for (MachineRegisterInfo::use_instr_iterator I
= MRI
.use_instr_begin(Def
),
810 E
= MRI
.use_instr_end();
812 if (I
->isPHI() && I
->getParent() == KernelBB
) {
813 // Get the loop carried definition.
814 unsigned LCDef
= getLoopPhiReg(PHI
, KernelBB
);
817 MachineInstr
*MI
= MRI
.getVRegDef(LCDef
);
818 if (!MI
|| MI
->getParent() != KernelBB
|| MI
->isPHI())
820 // Search through the rest of the block looking for uses of the Phi
821 // definition. If one occurs, then split the lifetime.
822 unsigned SplitReg
= 0;
823 for (auto &BBJ
: make_range(MachineBasicBlock::instr_iterator(MI
),
824 KernelBB
->instr_end()))
825 if (BBJ
.readsRegister(Def
, /*TRI=*/nullptr)) {
826 // We split the lifetime when we find the first use.
828 SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Def
));
829 BuildMI(*KernelBB
, MI
, MI
->getDebugLoc(),
830 TII
->get(TargetOpcode::COPY
), SplitReg
)
833 BBJ
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
837 // Search through each of the epilog blocks for any uses to be renamed.
838 for (auto &Epilog
: EpilogBBs
)
839 for (auto &I
: *Epilog
)
840 if (I
.readsRegister(Def
, /*TRI=*/nullptr))
841 I
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
848 /// Remove the incoming block from the Phis in a basic block.
849 static void removePhis(MachineBasicBlock
*BB
, MachineBasicBlock
*Incoming
) {
850 for (MachineInstr
&MI
: *BB
) {
853 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2)
854 if (MI
.getOperand(i
+ 1).getMBB() == Incoming
) {
855 MI
.removeOperand(i
+ 1);
862 /// Create branches from each prolog basic block to the appropriate epilog
863 /// block. These edges are needed if the loop ends before reaching the
865 void ModuloScheduleExpander::addBranches(MachineBasicBlock
&PreheaderBB
,
866 MBBVectorTy
&PrologBBs
,
867 MachineBasicBlock
*KernelBB
,
868 MBBVectorTy
&EpilogBBs
,
870 assert(PrologBBs
.size() == EpilogBBs
.size() && "Prolog/Epilog mismatch");
871 MachineBasicBlock
*LastPro
= KernelBB
;
872 MachineBasicBlock
*LastEpi
= KernelBB
;
874 // Start from the blocks connected to the kernel and work "out"
875 // to the first prolog and the last epilog blocks.
876 SmallVector
<MachineInstr
*, 4> PrevInsts
;
877 unsigned MaxIter
= PrologBBs
.size() - 1;
878 for (unsigned i
= 0, j
= MaxIter
; i
<= MaxIter
; ++i
, --j
) {
879 // Add branches to the prolog that go to the corresponding
880 // epilog, and the fall-thru prolog/kernel block.
881 MachineBasicBlock
*Prolog
= PrologBBs
[j
];
882 MachineBasicBlock
*Epilog
= EpilogBBs
[i
];
884 SmallVector
<MachineOperand
, 4> Cond
;
885 std::optional
<bool> StaticallyGreater
=
886 LoopInfo
->createTripCountGreaterCondition(j
+ 1, *Prolog
, Cond
);
887 unsigned numAdded
= 0;
888 if (!StaticallyGreater
) {
889 Prolog
->addSuccessor(Epilog
);
890 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, LastPro
, Cond
, DebugLoc());
891 } else if (*StaticallyGreater
== false) {
892 Prolog
->addSuccessor(Epilog
);
893 Prolog
->removeSuccessor(LastPro
);
894 LastEpi
->removeSuccessor(Epilog
);
895 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, nullptr, Cond
, DebugLoc());
896 removePhis(Epilog
, LastEpi
);
897 // Remove the blocks that are no longer referenced.
898 if (LastPro
!= LastEpi
) {
900 LastEpi
->eraseFromParent();
902 if (LastPro
== KernelBB
) {
903 LoopInfo
->disposed(&LIS
);
907 LastPro
->eraseFromParent();
909 numAdded
= TII
->insertBranch(*Prolog
, LastPro
, nullptr, Cond
, DebugLoc());
910 removePhis(Epilog
, Prolog
);
914 for (MachineBasicBlock::reverse_instr_iterator I
= Prolog
->instr_rbegin(),
915 E
= Prolog
->instr_rend();
916 I
!= E
&& numAdded
> 0; ++I
, --numAdded
)
917 updateInstruction(&*I
, false, j
, 0, VRMap
);
921 LoopInfo
->setPreheader(PrologBBs
[MaxIter
]);
922 LoopInfo
->adjustTripCount(-(MaxIter
+ 1));
926 /// Return true if we can compute the amount the instruction changes
927 /// during each iteration. Set Delta to the amount of the change.
928 bool ModuloScheduleExpander::computeDelta(MachineInstr
&MI
, unsigned &Delta
) {
929 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
930 const MachineOperand
*BaseOp
;
932 bool OffsetIsScalable
;
933 if (!TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, OffsetIsScalable
, TRI
))
936 // FIXME: This algorithm assumes instructions have fixed-size offsets.
937 if (OffsetIsScalable
)
940 if (!BaseOp
->isReg())
943 Register BaseReg
= BaseOp
->getReg();
945 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
946 // Check if there is a Phi. If so, get the definition in the loop.
947 MachineInstr
*BaseDef
= MRI
.getVRegDef(BaseReg
);
948 if (BaseDef
&& BaseDef
->isPHI()) {
949 BaseReg
= getLoopPhiReg(*BaseDef
, MI
.getParent());
950 BaseDef
= MRI
.getVRegDef(BaseReg
);
956 if (!TII
->getIncrementValue(*BaseDef
, D
) && D
>= 0)
963 /// Update the memory operand with a new offset when the pipeliner
964 /// generates a new copy of the instruction that refers to a
965 /// different memory location.
966 void ModuloScheduleExpander::updateMemOperands(MachineInstr
&NewMI
,
971 // If the instruction has memory operands, then adjust the offset
972 // when the instruction appears in different stages.
973 if (NewMI
.memoperands_empty())
975 SmallVector
<MachineMemOperand
*, 2> NewMMOs
;
976 for (MachineMemOperand
*MMO
: NewMI
.memoperands()) {
977 // TODO: Figure out whether isAtomic is really necessary (see D57601).
978 if (MMO
->isVolatile() || MMO
->isAtomic() ||
979 (MMO
->isInvariant() && MMO
->isDereferenceable()) ||
980 (!MMO
->getValue())) {
981 NewMMOs
.push_back(MMO
);
985 if (Num
!= UINT_MAX
&& computeDelta(OldMI
, Delta
)) {
986 int64_t AdjOffset
= Delta
* Num
;
988 MF
.getMachineMemOperand(MMO
, AdjOffset
, MMO
->getSize()));
990 NewMMOs
.push_back(MF
.getMachineMemOperand(
991 MMO
, 0, LocationSize::beforeOrAfterPointer()));
994 NewMI
.setMemRefs(MF
, NewMMOs
);
997 /// Clone the instruction for the new pipelined loop and update the
998 /// memory operands, if needed.
999 MachineInstr
*ModuloScheduleExpander::cloneInstr(MachineInstr
*OldMI
,
1000 unsigned CurStageNum
,
1001 unsigned InstStageNum
) {
1002 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
1003 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
1007 /// Clone the instruction for the new pipelined loop. If needed, this
1008 /// function updates the instruction using the values saved in the
1009 /// InstrChanges structure.
1010 MachineInstr
*ModuloScheduleExpander::cloneAndChangeInstr(
1011 MachineInstr
*OldMI
, unsigned CurStageNum
, unsigned InstStageNum
) {
1012 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
1013 auto It
= InstrChanges
.find(OldMI
);
1014 if (It
!= InstrChanges
.end()) {
1015 std::pair
<unsigned, int64_t> RegAndOffset
= It
->second
;
1016 unsigned BasePos
, OffsetPos
;
1017 if (!TII
->getBaseAndOffsetPosition(*OldMI
, BasePos
, OffsetPos
))
1019 int64_t NewOffset
= OldMI
->getOperand(OffsetPos
).getImm();
1020 MachineInstr
*LoopDef
= findDefInLoop(RegAndOffset
.first
);
1021 if (Schedule
.getStage(LoopDef
) > (signed)InstStageNum
)
1022 NewOffset
+= RegAndOffset
.second
* (CurStageNum
- InstStageNum
);
1023 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
1025 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
1029 /// Update the machine instruction with new virtual registers. This
1030 /// function may change the definitions and/or uses.
1031 void ModuloScheduleExpander::updateInstruction(MachineInstr
*NewMI
,
1033 unsigned CurStageNum
,
1034 unsigned InstrStageNum
,
1035 ValueMapTy
*VRMap
) {
1036 for (MachineOperand
&MO
: NewMI
->operands()) {
1037 if (!MO
.isReg() || !MO
.getReg().isVirtual())
1039 Register reg
= MO
.getReg();
1041 // Create a new virtual register for the definition.
1042 const TargetRegisterClass
*RC
= MRI
.getRegClass(reg
);
1043 Register NewReg
= MRI
.createVirtualRegister(RC
);
1045 VRMap
[CurStageNum
][reg
] = NewReg
;
1047 replaceRegUsesAfterLoop(reg
, NewReg
, BB
, MRI
, LIS
);
1048 } else if (MO
.isUse()) {
1049 MachineInstr
*Def
= MRI
.getVRegDef(reg
);
1050 // Compute the stage that contains the last definition for instruction.
1051 int DefStageNum
= Schedule
.getStage(Def
);
1052 unsigned StageNum
= CurStageNum
;
1053 if (DefStageNum
!= -1 && (int)InstrStageNum
> DefStageNum
) {
1054 // Compute the difference in stages between the defintion and the use.
1055 unsigned StageDiff
= (InstrStageNum
- DefStageNum
);
1056 // Make an adjustment to get the last definition.
1057 StageNum
-= StageDiff
;
1059 if (auto It
= VRMap
[StageNum
].find(reg
); It
!= VRMap
[StageNum
].end())
1060 MO
.setReg(It
->second
);
1065 /// Return the instruction in the loop that defines the register.
1066 /// If the definition is a Phi, then follow the Phi operand to
1067 /// the instruction in the loop.
1068 MachineInstr
*ModuloScheduleExpander::findDefInLoop(unsigned Reg
) {
1069 SmallPtrSet
<MachineInstr
*, 8> Visited
;
1070 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
1071 while (Def
->isPHI()) {
1072 if (!Visited
.insert(Def
).second
)
1074 for (unsigned i
= 1, e
= Def
->getNumOperands(); i
< e
; i
+= 2)
1075 if (Def
->getOperand(i
+ 1).getMBB() == BB
) {
1076 Def
= MRI
.getVRegDef(Def
->getOperand(i
).getReg());
1083 /// Return the new name for the value from the previous stage.
1084 unsigned ModuloScheduleExpander::getPrevMapVal(
1085 unsigned StageNum
, unsigned PhiStage
, unsigned LoopVal
, unsigned LoopStage
,
1086 ValueMapTy
*VRMap
, MachineBasicBlock
*BB
) {
1087 unsigned PrevVal
= 0;
1088 if (StageNum
> PhiStage
) {
1089 MachineInstr
*LoopInst
= MRI
.getVRegDef(LoopVal
);
1090 if (PhiStage
== LoopStage
&& VRMap
[StageNum
- 1].count(LoopVal
))
1091 // The name is defined in the previous stage.
1092 PrevVal
= VRMap
[StageNum
- 1][LoopVal
];
1093 else if (VRMap
[StageNum
].count(LoopVal
))
1094 // The previous name is defined in the current stage when the instruction
1095 // order is swapped.
1096 PrevVal
= VRMap
[StageNum
][LoopVal
];
1097 else if (!LoopInst
->isPHI() || LoopInst
->getParent() != BB
)
1098 // The loop value hasn't yet been scheduled.
1100 else if (StageNum
== PhiStage
+ 1)
1101 // The loop value is another phi, which has not been scheduled.
1102 PrevVal
= getInitPhiReg(*LoopInst
, BB
);
1103 else if (StageNum
> PhiStage
+ 1 && LoopInst
->getParent() == BB
)
1104 // The loop value is another phi, which has been scheduled.
1106 getPrevMapVal(StageNum
- 1, PhiStage
, getLoopPhiReg(*LoopInst
, BB
),
1107 LoopStage
, VRMap
, BB
);
1112 /// Rewrite the Phi values in the specified block to use the mappings
1113 /// from the initial operand. Once the Phi is scheduled, we switch
1114 /// to using the loop value instead of the Phi value, so those names
1115 /// do not need to be rewritten.
1116 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock
*NewBB
,
1119 InstrMapTy
&InstrMap
) {
1120 for (auto &PHI
: BB
->phis()) {
1121 unsigned InitVal
= 0;
1122 unsigned LoopVal
= 0;
1123 getPhiRegs(PHI
, BB
, InitVal
, LoopVal
);
1124 Register PhiDef
= PHI
.getOperand(0).getReg();
1126 unsigned PhiStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(PhiDef
));
1127 unsigned LoopStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
1128 unsigned NumPhis
= getStagesForPhi(PhiDef
);
1129 if (NumPhis
> StageNum
)
1131 for (unsigned np
= 0; np
<= NumPhis
; ++np
) {
1133 getPrevMapVal(StageNum
- np
, PhiStage
, LoopVal
, LoopStage
, VRMap
, BB
);
1136 rewriteScheduledInstr(NewBB
, InstrMap
, StageNum
- np
, np
, &PHI
, PhiDef
,
1142 /// Rewrite a previously scheduled instruction to use the register value
1143 /// from the new instruction. Make sure the instruction occurs in the
1144 /// basic block, and we don't change the uses in the new instruction.
1145 void ModuloScheduleExpander::rewriteScheduledInstr(
1146 MachineBasicBlock
*BB
, InstrMapTy
&InstrMap
, unsigned CurStageNum
,
1147 unsigned PhiNum
, MachineInstr
*Phi
, unsigned OldReg
, unsigned NewReg
,
1149 bool InProlog
= (CurStageNum
< (unsigned)Schedule
.getNumStages() - 1);
1150 int StagePhi
= Schedule
.getStage(Phi
) + PhiNum
;
1151 // Rewrite uses that have been scheduled already to use the new
1153 for (MachineOperand
&UseOp
:
1154 llvm::make_early_inc_range(MRI
.use_operands(OldReg
))) {
1155 MachineInstr
*UseMI
= UseOp
.getParent();
1156 if (UseMI
->getParent() != BB
)
1158 if (UseMI
->isPHI()) {
1159 if (!Phi
->isPHI() && UseMI
->getOperand(0).getReg() == NewReg
)
1161 if (getLoopPhiReg(*UseMI
, BB
) != OldReg
)
1164 InstrMapTy::iterator OrigInstr
= InstrMap
.find(UseMI
);
1165 assert(OrigInstr
!= InstrMap
.end() && "Instruction not scheduled.");
1166 MachineInstr
*OrigMI
= OrigInstr
->second
;
1167 int StageSched
= Schedule
.getStage(OrigMI
);
1168 int CycleSched
= Schedule
.getCycle(OrigMI
);
1169 unsigned ReplaceReg
= 0;
1170 // This is the stage for the scheduled instruction.
1171 if (StagePhi
== StageSched
&& Phi
->isPHI()) {
1172 int CyclePhi
= Schedule
.getCycle(Phi
);
1173 if (PrevReg
&& InProlog
)
1174 ReplaceReg
= PrevReg
;
1175 else if (PrevReg
&& !isLoopCarried(*Phi
) &&
1176 (CyclePhi
<= CycleSched
|| OrigMI
->isPHI()))
1177 ReplaceReg
= PrevReg
;
1179 ReplaceReg
= NewReg
;
1181 // The scheduled instruction occurs before the scheduled Phi, and the
1182 // Phi is not loop carried.
1183 if (!InProlog
&& StagePhi
+ 1 == StageSched
&& !isLoopCarried(*Phi
))
1184 ReplaceReg
= NewReg
;
1185 if (StagePhi
> StageSched
&& Phi
->isPHI())
1186 ReplaceReg
= NewReg
;
1187 if (!InProlog
&& !Phi
->isPHI() && StagePhi
< StageSched
)
1188 ReplaceReg
= NewReg
;
1190 const TargetRegisterClass
*NRC
=
1191 MRI
.constrainRegClass(ReplaceReg
, MRI
.getRegClass(OldReg
));
1193 UseOp
.setReg(ReplaceReg
);
1195 Register SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OldReg
));
1196 BuildMI(*BB
, UseMI
, UseMI
->getDebugLoc(), TII
->get(TargetOpcode::COPY
),
1198 .addReg(ReplaceReg
);
1199 UseOp
.setReg(SplitReg
);
1205 bool ModuloScheduleExpander::isLoopCarried(MachineInstr
&Phi
) {
1208 int DefCycle
= Schedule
.getCycle(&Phi
);
1209 int DefStage
= Schedule
.getStage(&Phi
);
1211 unsigned InitVal
= 0;
1212 unsigned LoopVal
= 0;
1213 getPhiRegs(Phi
, Phi
.getParent(), InitVal
, LoopVal
);
1214 MachineInstr
*Use
= MRI
.getVRegDef(LoopVal
);
1215 if (!Use
|| Use
->isPHI())
1217 int LoopCycle
= Schedule
.getCycle(Use
);
1218 int LoopStage
= Schedule
.getStage(Use
);
1219 return (LoopCycle
> DefCycle
) || (LoopStage
<= DefStage
);
1222 //===----------------------------------------------------------------------===//
1223 // PeelingModuloScheduleExpander implementation
1224 //===----------------------------------------------------------------------===//
1225 // This is a reimplementation of ModuloScheduleExpander that works by creating
1226 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1227 //===----------------------------------------------------------------------===//
1230 // Remove any dead phis in MBB. Dead phis either have only one block as input
1231 // (in which case they are the identity) or have no uses.
1232 void EliminateDeadPhis(MachineBasicBlock
*MBB
, MachineRegisterInfo
&MRI
,
1233 LiveIntervals
*LIS
, bool KeepSingleSrcPhi
= false) {
1234 bool Changed
= true;
1237 for (MachineInstr
&MI
: llvm::make_early_inc_range(MBB
->phis())) {
1239 if (MRI
.use_empty(MI
.getOperand(0).getReg())) {
1241 LIS
->RemoveMachineInstrFromMaps(MI
);
1242 MI
.eraseFromParent();
1244 } else if (!KeepSingleSrcPhi
&& MI
.getNumExplicitOperands() == 3) {
1245 const TargetRegisterClass
*ConstrainRegClass
=
1246 MRI
.constrainRegClass(MI
.getOperand(1).getReg(),
1247 MRI
.getRegClass(MI
.getOperand(0).getReg()));
1248 assert(ConstrainRegClass
&&
1249 "Expected a valid constrained register class!");
1250 (void)ConstrainRegClass
;
1251 MRI
.replaceRegWith(MI
.getOperand(0).getReg(),
1252 MI
.getOperand(1).getReg());
1254 LIS
->RemoveMachineInstrFromMaps(MI
);
1255 MI
.eraseFromParent();
1262 /// Rewrites the kernel block in-place to adhere to the given schedule.
1263 /// KernelRewriter holds all of the state required to perform the rewriting.
1264 class KernelRewriter
{
1266 MachineBasicBlock
*BB
;
1267 MachineBasicBlock
*PreheaderBB
, *ExitBB
;
1268 MachineRegisterInfo
&MRI
;
1269 const TargetInstrInfo
*TII
;
1272 // Map from register class to canonical undef register for that class.
1273 DenseMap
<const TargetRegisterClass
*, Register
> Undefs
;
1274 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1275 // this map is only used when InitReg is non-undef.
1276 DenseMap
<std::pair
<unsigned, unsigned>, Register
> Phis
;
1277 // Map from LoopReg to phi register where the InitReg is undef.
1278 DenseMap
<Register
, Register
> UndefPhis
;
1280 // Reg is used by MI. Return the new register MI should use to adhere to the
1281 // schedule. Insert phis as necessary.
1282 Register
remapUse(Register Reg
, MachineInstr
&MI
);
1283 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1284 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1285 // or will be chosen so as to share another phi.
1286 Register
phi(Register LoopReg
, std::optional
<Register
> InitReg
= {},
1287 const TargetRegisterClass
*RC
= nullptr);
1288 // Create an undef register of the given register class.
1289 Register
undef(const TargetRegisterClass
*RC
);
1292 KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
, MachineBasicBlock
*LoopBB
,
1293 LiveIntervals
*LIS
= nullptr);
1298 KernelRewriter::KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
,
1299 MachineBasicBlock
*LoopBB
, LiveIntervals
*LIS
)
1300 : S(S
), BB(LoopBB
), PreheaderBB(L
.getLoopPreheader()),
1301 ExitBB(L
.getExitBlock()), MRI(BB
->getParent()->getRegInfo()),
1302 TII(BB
->getParent()->getSubtarget().getInstrInfo()), LIS(LIS
) {
1303 PreheaderBB
= *BB
->pred_begin();
1304 if (PreheaderBB
== BB
)
1305 PreheaderBB
= *std::next(BB
->pred_begin());
1308 void KernelRewriter::rewrite() {
1309 // Rearrange the loop to be in schedule order. Note that the schedule may
1310 // contain instructions that are not owned by the loop block (InstrChanges and
1311 // friends), so we gracefully handle unowned instructions and delete any
1312 // instructions that weren't in the schedule.
1313 auto InsertPt
= BB
->getFirstTerminator();
1314 MachineInstr
*FirstMI
= nullptr;
1315 for (MachineInstr
*MI
: S
.getInstructions()) {
1318 if (MI
->getParent())
1319 MI
->removeFromParent();
1320 BB
->insert(InsertPt
, MI
);
1324 assert(FirstMI
&& "Failed to find first MI in schedule");
1326 // At this point all of the scheduled instructions are between FirstMI
1327 // and the end of the block. Kill from the first non-phi to FirstMI.
1328 for (auto I
= BB
->getFirstNonPHI(); I
!= FirstMI
->getIterator();) {
1330 LIS
->RemoveMachineInstrFromMaps(*I
);
1331 (I
++)->eraseFromParent();
1334 // Now remap every instruction in the loop.
1335 for (MachineInstr
&MI
: *BB
) {
1336 if (MI
.isPHI() || MI
.isTerminator())
1338 for (MachineOperand
&MO
: MI
.uses()) {
1339 if (!MO
.isReg() || MO
.getReg().isPhysical() || MO
.isImplicit())
1341 Register Reg
= remapUse(MO
.getReg(), MI
);
1345 EliminateDeadPhis(BB
, MRI
, LIS
);
1347 // Ensure a phi exists for all instructions that are either referenced by
1348 // an illegal phi or by an instruction outside the loop. This allows us to
1349 // treat remaps of these values the same as "normal" values that come from
1350 // loop-carried phis.
1351 for (auto MI
= BB
->getFirstNonPHI(); MI
!= BB
->end(); ++MI
) {
1353 Register R
= MI
->getOperand(0).getReg();
1358 for (MachineOperand
&Def
: MI
->defs()) {
1359 for (MachineInstr
&MI
: MRI
.use_instructions(Def
.getReg())) {
1360 if (MI
.getParent() != BB
) {
1369 Register
KernelRewriter::remapUse(Register Reg
, MachineInstr
&MI
) {
1370 MachineInstr
*Producer
= MRI
.getUniqueVRegDef(Reg
);
1374 int ConsumerStage
= S
.getStage(&MI
);
1375 if (!Producer
->isPHI()) {
1376 // Non-phi producers are simple to remap. Insert as many phis as the
1377 // difference between the consumer and producer stages.
1378 if (Producer
->getParent() != BB
)
1379 // Producer was not inside the loop. Use the register as-is.
1381 int ProducerStage
= S
.getStage(Producer
);
1382 assert(ConsumerStage
!= -1 &&
1383 "In-loop consumer should always be scheduled!");
1384 assert(ConsumerStage
>= ProducerStage
);
1385 unsigned StageDiff
= ConsumerStage
- ProducerStage
;
1387 for (unsigned I
= 0; I
< StageDiff
; ++I
)
1392 // First, dive through the phi chain to find the defaults for the generated
1394 SmallVector
<std::optional
<Register
>, 4> Defaults
;
1395 Register LoopReg
= Reg
;
1396 auto LoopProducer
= Producer
;
1397 while (LoopProducer
->isPHI() && LoopProducer
->getParent() == BB
) {
1398 LoopReg
= getLoopPhiReg(*LoopProducer
, BB
);
1399 Defaults
.emplace_back(getInitPhiReg(*LoopProducer
, BB
));
1400 LoopProducer
= MRI
.getUniqueVRegDef(LoopReg
);
1401 assert(LoopProducer
);
1403 int LoopProducerStage
= S
.getStage(LoopProducer
);
1405 std::optional
<Register
> IllegalPhiDefault
;
1407 if (LoopProducerStage
== -1) {
1409 } else if (LoopProducerStage
> ConsumerStage
) {
1410 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1411 // In addition, Consumer's cycle must be scheduled after Producer in the
1412 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1414 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1415 int LoopProducerCycle
= S
.getCycle(LoopProducer
);
1416 int ConsumerCycle
= S
.getCycle(&MI
);
1418 assert(LoopProducerCycle
<= ConsumerCycle
);
1419 assert(LoopProducerStage
== ConsumerStage
+ 1);
1420 // Peel off the first phi from Defaults and insert a phi between producer
1421 // and consumer. This phi will not be at the front of the block so we
1422 // consider it illegal. It will only exist during the rewrite process; it
1423 // needs to exist while we peel off prologs because these could take the
1424 // default value. After that we can replace all uses with the loop producer
1426 IllegalPhiDefault
= Defaults
.front();
1427 Defaults
.erase(Defaults
.begin());
1429 assert(ConsumerStage
>= LoopProducerStage
);
1430 int StageDiff
= ConsumerStage
- LoopProducerStage
;
1431 if (StageDiff
> 0) {
1432 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults
.size()
1433 << " to " << (Defaults
.size() + StageDiff
) << "\n");
1434 // If we need more phis than we have defaults for, pad out with undefs for
1435 // the earliest phis, which are at the end of the defaults chain (the
1436 // chain is in reverse order).
1437 Defaults
.resize(Defaults
.size() + StageDiff
,
1438 Defaults
.empty() ? std::optional
<Register
>()
1443 // Now we know the number of stages to jump back, insert the phi chain.
1444 auto DefaultI
= Defaults
.rbegin();
1445 while (DefaultI
!= Defaults
.rend())
1446 LoopReg
= phi(LoopReg
, *DefaultI
++, MRI
.getRegClass(Reg
));
1448 if (IllegalPhiDefault
) {
1449 // The consumer optionally consumes LoopProducer in the same iteration
1450 // (because the producer is scheduled at an earlier cycle than the consumer)
1451 // or the initial value. To facilitate this we create an illegal block here
1452 // by embedding a phi in the middle of the block. We will fix this up
1453 // immediately prior to pruning.
1454 auto RC
= MRI
.getRegClass(Reg
);
1455 Register R
= MRI
.createVirtualRegister(RC
);
1456 MachineInstr
*IllegalPhi
=
1457 BuildMI(*BB
, MI
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1458 .addReg(*IllegalPhiDefault
)
1459 .addMBB(PreheaderBB
) // Block choice is arbitrary and has no effect.
1461 .addMBB(BB
); // Block choice is arbitrary and has no effect.
1462 // Illegal phi should belong to the producer stage so that it can be
1463 // filtered correctly during peeling.
1464 S
.setStage(IllegalPhi
, LoopProducerStage
);
1471 Register
KernelRewriter::phi(Register LoopReg
, std::optional
<Register
> InitReg
,
1472 const TargetRegisterClass
*RC
) {
1473 // If the init register is not undef, try and find an existing phi.
1475 auto I
= Phis
.find({LoopReg
, *InitReg
});
1476 if (I
!= Phis
.end())
1479 for (auto &KV
: Phis
) {
1480 if (KV
.first
.first
== LoopReg
)
1485 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1486 // find a phi that takes undef as input.
1487 auto I
= UndefPhis
.find(LoopReg
);
1488 if (I
!= UndefPhis
.end()) {
1489 Register R
= I
->second
;
1491 // Found a phi taking undef as input, and this input is undef so return
1492 // without any more changes.
1494 // Found a phi taking undef as input, so rewrite it to take InitReg.
1495 MachineInstr
*MI
= MRI
.getVRegDef(R
);
1496 MI
->getOperand(1).setReg(*InitReg
);
1497 Phis
.insert({{LoopReg
, *InitReg
}, R
});
1498 const TargetRegisterClass
*ConstrainRegClass
=
1499 MRI
.constrainRegClass(R
, MRI
.getRegClass(*InitReg
));
1500 assert(ConstrainRegClass
&& "Expected a valid constrained register class!");
1501 (void)ConstrainRegClass
;
1506 // Failed to find any existing phi to reuse, so create a new one.
1508 RC
= MRI
.getRegClass(LoopReg
);
1509 Register R
= MRI
.createVirtualRegister(RC
);
1511 const TargetRegisterClass
*ConstrainRegClass
=
1512 MRI
.constrainRegClass(R
, MRI
.getRegClass(*InitReg
));
1513 assert(ConstrainRegClass
&& "Expected a valid constrained register class!");
1514 (void)ConstrainRegClass
;
1516 BuildMI(*BB
, BB
->getFirstNonPHI(), DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1517 .addReg(InitReg
? *InitReg
: undef(RC
))
1518 .addMBB(PreheaderBB
)
1522 UndefPhis
[LoopReg
] = R
;
1524 Phis
[{LoopReg
, *InitReg
}] = R
;
1528 Register
KernelRewriter::undef(const TargetRegisterClass
*RC
) {
1529 Register
&R
= Undefs
[RC
];
1531 // Create an IMPLICIT_DEF that defines this register if we need it.
1532 // All uses of this should be removed by the time we have finished unrolling
1533 // prologs and epilogs.
1534 R
= MRI
.createVirtualRegister(RC
);
1535 auto *InsertBB
= &PreheaderBB
->getParent()->front();
1536 BuildMI(*InsertBB
, InsertBB
->getFirstTerminator(), DebugLoc(),
1537 TII
->get(TargetOpcode::IMPLICIT_DEF
), R
);
1543 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1544 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1545 /// through and defaults for these phis.
1546 class KernelOperandInfo
{
1547 MachineBasicBlock
*BB
;
1548 MachineRegisterInfo
&MRI
;
1549 SmallVector
<Register
, 4> PhiDefaults
;
1550 MachineOperand
*Source
;
1551 MachineOperand
*Target
;
1554 KernelOperandInfo(MachineOperand
*MO
, MachineRegisterInfo
&MRI
,
1555 const SmallPtrSetImpl
<MachineInstr
*> &IllegalPhis
)
1558 BB
= MO
->getParent()->getParent();
1559 while (isRegInLoop(MO
)) {
1560 MachineInstr
*MI
= MRI
.getVRegDef(MO
->getReg());
1561 if (MI
->isFullCopy()) {
1562 MO
= &MI
->getOperand(1);
1567 // If this is an illegal phi, don't count it in distance.
1568 if (IllegalPhis
.count(MI
)) {
1569 MO
= &MI
->getOperand(3);
1573 Register Default
= getInitPhiReg(*MI
, BB
);
1574 MO
= MI
->getOperand(2).getMBB() == BB
? &MI
->getOperand(1)
1575 : &MI
->getOperand(3);
1576 PhiDefaults
.push_back(Default
);
1581 bool operator==(const KernelOperandInfo
&Other
) const {
1582 return PhiDefaults
.size() == Other
.PhiDefaults
.size();
1585 void print(raw_ostream
&OS
) const {
1586 OS
<< "use of " << *Source
<< ": distance(" << PhiDefaults
.size() << ") in "
1587 << *Source
->getParent();
1591 bool isRegInLoop(MachineOperand
*MO
) {
1592 return MO
->isReg() && MO
->getReg().isVirtual() &&
1593 MRI
.getVRegDef(MO
->getReg())->getParent() == BB
;
1599 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD
) {
1600 MachineBasicBlock
*NewBB
= PeelSingleBlockLoop(LPD
, BB
, MRI
, TII
);
1601 if (LPD
== LPD_Front
)
1602 PeeledFront
.push_back(NewBB
);
1604 PeeledBack
.push_front(NewBB
);
1605 for (auto I
= BB
->begin(), NI
= NewBB
->begin(); !I
->isTerminator();
1607 CanonicalMIs
[&*I
] = &*I
;
1608 CanonicalMIs
[&*NI
] = &*I
;
1609 BlockMIs
[{NewBB
, &*I
}] = &*NI
;
1610 BlockMIs
[{BB
, &*I
}] = &*I
;
1615 void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock
*MB
,
1617 for (auto I
= MB
->getFirstInstrTerminator()->getReverseIterator();
1618 I
!= std::next(MB
->getFirstNonPHI()->getReverseIterator());) {
1619 MachineInstr
*MI
= &*I
++;
1620 int Stage
= getStage(MI
);
1621 if (Stage
== -1 || Stage
>= MinStage
)
1624 for (MachineOperand
&DefMO
: MI
->defs()) {
1625 SmallVector
<std::pair
<MachineInstr
*, Register
>, 4> Subs
;
1626 for (MachineInstr
&UseMI
: MRI
.use_instructions(DefMO
.getReg())) {
1627 // Only PHIs can use values from this block by construction.
1628 // Match with the equivalent PHI in B.
1629 assert(UseMI
.isPHI());
1630 Register Reg
= getEquivalentRegisterIn(UseMI
.getOperand(0).getReg(),
1632 Subs
.emplace_back(&UseMI
, Reg
);
1634 for (auto &Sub
: Subs
)
1635 Sub
.first
->substituteRegister(DefMO
.getReg(), Sub
.second
, /*SubIdx=*/0,
1636 *MRI
.getTargetRegisterInfo());
1639 LIS
->RemoveMachineInstrFromMaps(*MI
);
1640 MI
->eraseFromParent();
1644 void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
1645 MachineBasicBlock
*DestBB
, MachineBasicBlock
*SourceBB
, unsigned Stage
) {
1646 auto InsertPt
= DestBB
->getFirstNonPHI();
1647 DenseMap
<Register
, Register
> Remaps
;
1648 for (MachineInstr
&MI
: llvm::make_early_inc_range(
1649 llvm::make_range(SourceBB
->getFirstNonPHI(), SourceBB
->end()))) {
1651 // This is an illegal PHI. If we move any instructions using an illegal
1652 // PHI, we need to create a legal Phi.
1653 if (getStage(&MI
) != Stage
) {
1654 // The legal Phi is not necessary if the illegal phi's stage
1656 Register PhiR
= MI
.getOperand(0).getReg();
1657 auto RC
= MRI
.getRegClass(PhiR
);
1658 Register NR
= MRI
.createVirtualRegister(RC
);
1659 MachineInstr
*NI
= BuildMI(*DestBB
, DestBB
->getFirstNonPHI(),
1660 DebugLoc(), TII
->get(TargetOpcode::PHI
), NR
)
1663 BlockMIs
[{DestBB
, CanonicalMIs
[&MI
]}] = NI
;
1664 CanonicalMIs
[NI
] = CanonicalMIs
[&MI
];
1668 if (getStage(&MI
) != Stage
)
1670 MI
.removeFromParent();
1671 DestBB
->insert(InsertPt
, &MI
);
1672 auto *KernelMI
= CanonicalMIs
[&MI
];
1673 BlockMIs
[{DestBB
, KernelMI
}] = &MI
;
1674 BlockMIs
.erase({SourceBB
, KernelMI
});
1676 SmallVector
<MachineInstr
*, 4> PhiToDelete
;
1677 for (MachineInstr
&MI
: DestBB
->phis()) {
1678 assert(MI
.getNumOperands() == 3);
1679 MachineInstr
*Def
= MRI
.getVRegDef(MI
.getOperand(1).getReg());
1680 // If the instruction referenced by the phi is moved inside the block
1681 // we don't need the phi anymore.
1682 if (getStage(Def
) == Stage
) {
1683 Register PhiReg
= MI
.getOperand(0).getReg();
1684 assert(Def
->findRegisterDefOperandIdx(MI
.getOperand(1).getReg(),
1685 /*TRI=*/nullptr) != -1);
1686 MRI
.replaceRegWith(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
1687 MI
.getOperand(0).setReg(PhiReg
);
1688 PhiToDelete
.push_back(&MI
);
1691 for (auto *P
: PhiToDelete
)
1692 P
->eraseFromParent();
1693 InsertPt
= DestBB
->getFirstNonPHI();
1694 // Helper to clone Phi instructions into the destination block. We clone Phi
1695 // greedily to avoid combinatorial explosion of Phi instructions.
1696 auto clonePhi
= [&](MachineInstr
*Phi
) {
1697 MachineInstr
*NewMI
= MF
.CloneMachineInstr(Phi
);
1698 DestBB
->insert(InsertPt
, NewMI
);
1699 Register OrigR
= Phi
->getOperand(0).getReg();
1700 Register R
= MRI
.createVirtualRegister(MRI
.getRegClass(OrigR
));
1701 NewMI
->getOperand(0).setReg(R
);
1702 NewMI
->getOperand(1).setReg(OrigR
);
1703 NewMI
->getOperand(2).setMBB(*DestBB
->pred_begin());
1705 CanonicalMIs
[NewMI
] = CanonicalMIs
[Phi
];
1706 BlockMIs
[{DestBB
, CanonicalMIs
[Phi
]}] = NewMI
;
1707 PhiNodeLoopIteration
[NewMI
] = PhiNodeLoopIteration
[Phi
];
1710 for (auto I
= DestBB
->getFirstNonPHI(); I
!= DestBB
->end(); ++I
) {
1711 for (MachineOperand
&MO
: I
->uses()) {
1714 if (auto It
= Remaps
.find(MO
.getReg()); It
!= Remaps
.end())
1715 MO
.setReg(It
->second
);
1717 // If we are using a phi from the source block we need to add a new phi
1718 // pointing to the old one.
1719 MachineInstr
*Use
= MRI
.getUniqueVRegDef(MO
.getReg());
1720 if (Use
&& Use
->isPHI() && Use
->getParent() == SourceBB
) {
1721 Register R
= clonePhi(Use
);
1730 PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr
*CanonicalPhi
,
1731 MachineInstr
*Phi
) {
1732 unsigned distance
= PhiNodeLoopIteration
[Phi
];
1733 MachineInstr
*CanonicalUse
= CanonicalPhi
;
1734 Register CanonicalUseReg
= CanonicalUse
->getOperand(0).getReg();
1735 for (unsigned I
= 0; I
< distance
; ++I
) {
1736 assert(CanonicalUse
->isPHI());
1737 assert(CanonicalUse
->getNumOperands() == 5);
1738 unsigned LoopRegIdx
= 3, InitRegIdx
= 1;
1739 if (CanonicalUse
->getOperand(2).getMBB() == CanonicalUse
->getParent())
1740 std::swap(LoopRegIdx
, InitRegIdx
);
1741 CanonicalUseReg
= CanonicalUse
->getOperand(LoopRegIdx
).getReg();
1742 CanonicalUse
= MRI
.getVRegDef(CanonicalUseReg
);
1744 return CanonicalUseReg
;
1747 void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
1748 BitVector
LS(Schedule
.getNumStages(), true);
1749 BitVector
AS(Schedule
.getNumStages(), true);
1750 LiveStages
[BB
] = LS
;
1751 AvailableStages
[BB
] = AS
;
1753 // Peel out the prologs.
1755 for (int I
= 0; I
< Schedule
.getNumStages() - 1; ++I
) {
1757 Prologs
.push_back(peelKernel(LPD_Front
));
1758 LiveStages
[Prologs
.back()] = LS
;
1759 AvailableStages
[Prologs
.back()] = LS
;
1762 // Create a block that will end up as the new loop exiting block (dominated by
1763 // all prologs and epilogs). It will only contain PHIs, in the same order as
1764 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1765 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1766 // property that any value deffed in BB but used outside of BB is used by a
1767 // PHI in the exiting block.
1768 MachineBasicBlock
*ExitingBB
= CreateLCSSAExitingBlock();
1769 EliminateDeadPhis(ExitingBB
, MRI
, LIS
, /*KeepSingleSrcPhi=*/true);
1770 // Push out the epilogs, again in reverse order.
1771 // We can't assume anything about the minumum loop trip count at this point,
1772 // so emit a fairly complex epilog.
1774 // We first peel number of stages minus one epilogue. Then we remove dead
1775 // stages and reorder instructions based on their stage. If we have 3 stages
1776 // we generate first:
1780 // And then we move instructions based on their stages to have:
1784 // The transformation is legal because we only move instructions past
1785 // instructions of a previous loop iteration.
1786 for (int I
= 1; I
<= Schedule
.getNumStages() - 1; ++I
) {
1787 Epilogs
.push_back(peelKernel(LPD_Back
));
1788 MachineBasicBlock
*B
= Epilogs
.back();
1789 filterInstructions(B
, Schedule
.getNumStages() - I
);
1790 // Keep track at which iteration each phi belongs to. We need it to know
1791 // what version of the variable to use during prologue/epilogue stitching.
1792 EliminateDeadPhis(B
, MRI
, LIS
, /*KeepSingleSrcPhi=*/true);
1793 for (MachineInstr
&Phi
: B
->phis())
1794 PhiNodeLoopIteration
[&Phi
] = Schedule
.getNumStages() - I
;
1796 for (size_t I
= 0; I
< Epilogs
.size(); I
++) {
1798 for (size_t J
= I
; J
< Epilogs
.size(); J
++) {
1800 unsigned Stage
= Schedule
.getNumStages() - 1 + I
- J
;
1801 // Move stage one block at a time so that Phi nodes are updated correctly.
1802 for (size_t K
= Iteration
; K
> I
; K
--)
1803 moveStageBetweenBlocks(Epilogs
[K
- 1], Epilogs
[K
], Stage
);
1806 LiveStages
[Epilogs
[I
]] = LS
;
1807 AvailableStages
[Epilogs
[I
]] = AS
;
1810 // Now we've defined all the prolog and epilog blocks as a fallthrough
1811 // sequence, add the edges that will be followed if the loop trip count is
1812 // lower than the number of stages (connecting prologs directly with epilogs).
1813 auto PI
= Prologs
.begin();
1814 auto EI
= Epilogs
.begin();
1815 assert(Prologs
.size() == Epilogs
.size());
1816 for (; PI
!= Prologs
.end(); ++PI
, ++EI
) {
1817 MachineBasicBlock
*Pred
= *(*EI
)->pred_begin();
1818 (*PI
)->addSuccessor(*EI
);
1819 for (MachineInstr
&MI
: (*EI
)->phis()) {
1820 Register Reg
= MI
.getOperand(1).getReg();
1821 MachineInstr
*Use
= MRI
.getUniqueVRegDef(Reg
);
1822 if (Use
&& Use
->getParent() == Pred
) {
1823 MachineInstr
*CanonicalUse
= CanonicalMIs
[Use
];
1824 if (CanonicalUse
->isPHI()) {
1825 // If the use comes from a phi we need to skip as many phi as the
1826 // distance between the epilogue and the kernel. Trace through the phi
1827 // chain to find the right value.
1828 Reg
= getPhiCanonicalReg(CanonicalUse
, Use
);
1830 Reg
= getEquivalentRegisterIn(Reg
, *PI
);
1832 MI
.addOperand(MachineOperand::CreateReg(Reg
, /*isDef=*/false));
1833 MI
.addOperand(MachineOperand::CreateMBB(*PI
));
1837 // Create a list of all blocks in order.
1838 SmallVector
<MachineBasicBlock
*, 8> Blocks
;
1839 llvm::copy(PeeledFront
, std::back_inserter(Blocks
));
1840 Blocks
.push_back(BB
);
1841 llvm::copy(PeeledBack
, std::back_inserter(Blocks
));
1843 // Iterate in reverse order over all instructions, remapping as we go.
1844 for (MachineBasicBlock
*B
: reverse(Blocks
)) {
1845 for (auto I
= B
->instr_rbegin();
1846 I
!= std::next(B
->getFirstNonPHI()->getReverseIterator());) {
1847 MachineBasicBlock::reverse_instr_iterator MI
= I
++;
1848 rewriteUsesOf(&*MI
);
1851 for (auto *MI
: IllegalPhisToDelete
) {
1853 LIS
->RemoveMachineInstrFromMaps(*MI
);
1854 MI
->eraseFromParent();
1856 IllegalPhisToDelete
.clear();
1858 // Now all remapping has been done, we're free to optimize the generated code.
1859 for (MachineBasicBlock
*B
: reverse(Blocks
))
1860 EliminateDeadPhis(B
, MRI
, LIS
);
1861 EliminateDeadPhis(ExitingBB
, MRI
, LIS
);
1864 MachineBasicBlock
*PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
1865 MachineFunction
&MF
= *BB
->getParent();
1866 MachineBasicBlock
*Exit
= *BB
->succ_begin();
1868 Exit
= *std::next(BB
->succ_begin());
1870 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
1871 MF
.insert(std::next(BB
->getIterator()), NewBB
);
1873 // Clone all phis in BB into NewBB and rewrite.
1874 for (MachineInstr
&MI
: BB
->phis()) {
1875 auto RC
= MRI
.getRegClass(MI
.getOperand(0).getReg());
1876 Register OldR
= MI
.getOperand(3).getReg();
1877 Register R
= MRI
.createVirtualRegister(RC
);
1878 SmallVector
<MachineInstr
*, 4> Uses
;
1879 for (MachineInstr
&Use
: MRI
.use_instructions(OldR
))
1880 if (Use
.getParent() != BB
)
1881 Uses
.push_back(&Use
);
1882 for (MachineInstr
*Use
: Uses
)
1883 Use
->substituteRegister(OldR
, R
, /*SubIdx=*/0,
1884 *MRI
.getTargetRegisterInfo());
1885 MachineInstr
*NI
= BuildMI(NewBB
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1888 BlockMIs
[{NewBB
, &MI
}] = NI
;
1889 CanonicalMIs
[NI
] = &MI
;
1891 BB
->replaceSuccessor(Exit
, NewBB
);
1892 Exit
->replacePhiUsesWith(BB
, NewBB
);
1893 NewBB
->addSuccessor(Exit
);
1895 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
1896 SmallVector
<MachineOperand
, 4> Cond
;
1897 bool CanAnalyzeBr
= !TII
->analyzeBranch(*BB
, TBB
, FBB
, Cond
);
1899 assert(CanAnalyzeBr
&& "Must be able to analyze the loop branch!");
1900 TII
->removeBranch(*BB
);
1901 TII
->insertBranch(*BB
, TBB
== Exit
? NewBB
: TBB
, FBB
== Exit
? NewBB
: FBB
,
1903 TII
->insertUnconditionalBranch(*NewBB
, Exit
, DebugLoc());
1908 PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg
,
1909 MachineBasicBlock
*BB
) {
1910 MachineInstr
*MI
= MRI
.getUniqueVRegDef(Reg
);
1911 unsigned OpIdx
= MI
->findRegisterDefOperandIdx(Reg
, /*TRI=*/nullptr);
1912 return BlockMIs
[{BB
, CanonicalMIs
[MI
]}]->getOperand(OpIdx
).getReg();
1915 void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr
*MI
) {
1917 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1918 // and it is produced by this block.
1919 Register PhiR
= MI
->getOperand(0).getReg();
1920 Register R
= MI
->getOperand(3).getReg();
1921 int RMIStage
= getStage(MRI
.getUniqueVRegDef(R
));
1922 if (RMIStage
!= -1 && !AvailableStages
[MI
->getParent()].test(RMIStage
))
1923 R
= MI
->getOperand(1).getReg();
1924 MRI
.setRegClass(R
, MRI
.getRegClass(PhiR
));
1925 MRI
.replaceRegWith(PhiR
, R
);
1926 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1927 // later to figure out how to remap registers.
1928 MI
->getOperand(0).setReg(PhiR
);
1929 IllegalPhisToDelete
.push_back(MI
);
1933 int Stage
= getStage(MI
);
1934 if (Stage
== -1 || LiveStages
.count(MI
->getParent()) == 0 ||
1935 LiveStages
[MI
->getParent()].test(Stage
))
1936 // Instruction is live, no rewriting to do.
1939 for (MachineOperand
&DefMO
: MI
->defs()) {
1940 SmallVector
<std::pair
<MachineInstr
*, Register
>, 4> Subs
;
1941 for (MachineInstr
&UseMI
: MRI
.use_instructions(DefMO
.getReg())) {
1942 // Only PHIs can use values from this block by construction.
1943 // Match with the equivalent PHI in B.
1944 assert(UseMI
.isPHI());
1945 Register Reg
= getEquivalentRegisterIn(UseMI
.getOperand(0).getReg(),
1947 Subs
.emplace_back(&UseMI
, Reg
);
1949 for (auto &Sub
: Subs
)
1950 Sub
.first
->substituteRegister(DefMO
.getReg(), Sub
.second
, /*SubIdx=*/0,
1951 *MRI
.getTargetRegisterInfo());
1954 LIS
->RemoveMachineInstrFromMaps(*MI
);
1955 MI
->eraseFromParent();
1958 void PeelingModuloScheduleExpander::fixupBranches() {
1959 // Work outwards from the kernel.
1960 bool KernelDisposed
= false;
1961 int TC
= Schedule
.getNumStages() - 1;
1962 for (auto PI
= Prologs
.rbegin(), EI
= Epilogs
.rbegin(); PI
!= Prologs
.rend();
1964 MachineBasicBlock
*Prolog
= *PI
;
1965 MachineBasicBlock
*Fallthrough
= *Prolog
->succ_begin();
1966 MachineBasicBlock
*Epilog
= *EI
;
1967 SmallVector
<MachineOperand
, 4> Cond
;
1968 TII
->removeBranch(*Prolog
);
1969 std::optional
<bool> StaticallyGreater
=
1970 LoopInfo
->createTripCountGreaterCondition(TC
, *Prolog
, Cond
);
1971 if (!StaticallyGreater
) {
1972 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC
<< "\n");
1973 // Dynamically branch based on Cond.
1974 TII
->insertBranch(*Prolog
, Epilog
, Fallthrough
, Cond
, DebugLoc());
1975 } else if (*StaticallyGreater
== false) {
1976 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC
<< "\n");
1977 // Prolog never falls through; branch to epilog and orphan interior
1978 // blocks. Leave it to unreachable-block-elim to clean up.
1979 Prolog
->removeSuccessor(Fallthrough
);
1980 for (MachineInstr
&P
: Fallthrough
->phis()) {
1984 TII
->insertUnconditionalBranch(*Prolog
, Epilog
, DebugLoc());
1985 KernelDisposed
= true;
1987 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC
<< "\n");
1988 // Prolog always falls through; remove incoming values in epilog.
1989 Prolog
->removeSuccessor(Epilog
);
1990 for (MachineInstr
&P
: Epilog
->phis()) {
1997 if (!KernelDisposed
) {
1998 LoopInfo
->adjustTripCount(-(Schedule
.getNumStages() - 1));
1999 LoopInfo
->setPreheader(Prologs
.back());
2001 LoopInfo
->disposed();
2005 void PeelingModuloScheduleExpander::rewriteKernel() {
2006 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
, BB
);
2010 void PeelingModuloScheduleExpander::expand() {
2011 BB
= Schedule
.getLoop()->getTopBlock();
2012 Preheader
= Schedule
.getLoop()->getLoopPreheader();
2013 LLVM_DEBUG(Schedule
.dump());
2014 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
2018 peelPrologAndEpilogs();
2022 void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
2023 BB
= Schedule
.getLoop()->getTopBlock();
2024 Preheader
= Schedule
.getLoop()->getLoopPreheader();
2026 // Dump the schedule before we invalidate and remap all its instructions.
2027 // Stash it in a string so we can print it if we found an error.
2028 std::string ScheduleDump
;
2029 raw_string_ostream
OS(ScheduleDump
);
2033 // First, run the normal ModuleScheduleExpander. We don't support any
2035 assert(LIS
&& "Requires LiveIntervals!");
2036 ModuloScheduleExpander
MSE(MF
, Schedule
, *LIS
,
2037 ModuloScheduleExpander::InstrChangesTy());
2039 MachineBasicBlock
*ExpandedKernel
= MSE
.getRewrittenKernel();
2040 if (!ExpandedKernel
) {
2041 // The expander optimized away the kernel. We can't do any useful checking.
2045 // Before running the KernelRewriter, re-add BB into the CFG.
2046 Preheader
->addSuccessor(BB
);
2048 // Now run the new expansion algorithm.
2049 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
, BB
);
2051 peelPrologAndEpilogs();
2053 // Collect all illegal phis that the new algorithm created. We'll give these
2054 // to KernelOperandInfo.
2055 SmallPtrSet
<MachineInstr
*, 4> IllegalPhis
;
2056 for (auto NI
= BB
->getFirstNonPHI(); NI
!= BB
->end(); ++NI
) {
2058 IllegalPhis
.insert(&*NI
);
2061 // Co-iterate across both kernels. We expect them to be identical apart from
2062 // phis and full COPYs (we look through both).
2063 SmallVector
<std::pair
<KernelOperandInfo
, KernelOperandInfo
>, 8> KOIs
;
2064 auto OI
= ExpandedKernel
->begin();
2065 auto NI
= BB
->begin();
2066 for (; !OI
->isTerminator() && !NI
->isTerminator(); ++OI
, ++NI
) {
2067 while (OI
->isPHI() || OI
->isFullCopy())
2069 while (NI
->isPHI() || NI
->isFullCopy())
2071 assert(OI
->getOpcode() == NI
->getOpcode() && "Opcodes don't match?!");
2072 // Analyze every operand separately.
2073 for (auto OOpI
= OI
->operands_begin(), NOpI
= NI
->operands_begin();
2074 OOpI
!= OI
->operands_end(); ++OOpI
, ++NOpI
)
2075 KOIs
.emplace_back(KernelOperandInfo(&*OOpI
, MRI
, IllegalPhis
),
2076 KernelOperandInfo(&*NOpI
, MRI
, IllegalPhis
));
2079 bool Failed
= false;
2080 for (auto &OldAndNew
: KOIs
) {
2081 if (OldAndNew
.first
== OldAndNew
.second
)
2084 errs() << "Modulo kernel validation error: [\n";
2085 errs() << " [golden] ";
2086 OldAndNew
.first
.print(errs());
2088 OldAndNew
.second
.print(errs());
2093 errs() << "Golden reference kernel:\n";
2094 ExpandedKernel
->print(errs());
2095 errs() << "New kernel:\n";
2097 errs() << ScheduleDump
;
2099 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2102 // Cleanup by removing BB from the CFG again as the original
2103 // ModuloScheduleExpander intended.
2104 Preheader
->removeSuccessor(BB
);
2108 MachineInstr
*ModuloScheduleExpanderMVE::cloneInstr(MachineInstr
*OldMI
) {
2109 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
2111 // TODO: Offset information needs to be corrected.
2112 NewMI
->dropMemRefs(MF
);
2117 /// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2118 /// If it is already dedicated exit, return it. Otherwise, insert a new
2119 /// block between them and return the new block.
2120 static MachineBasicBlock
*createDedicatedExit(MachineBasicBlock
*Loop
,
2121 MachineBasicBlock
*Exit
) {
2122 if (Exit
->pred_size() == 1)
2125 MachineFunction
*MF
= Loop
->getParent();
2126 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
2128 MachineBasicBlock
*NewExit
=
2129 MF
->CreateMachineBasicBlock(Loop
->getBasicBlock());
2130 MF
->insert(Loop
->getIterator(), NewExit
);
2132 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
2133 SmallVector
<MachineOperand
, 4> Cond
;
2134 TII
->analyzeBranch(*Loop
, TBB
, FBB
, Cond
);
2137 else if (FBB
== Loop
)
2140 llvm_unreachable("unexpected loop structure");
2141 TII
->removeBranch(*Loop
);
2142 TII
->insertBranch(*Loop
, TBB
, FBB
, Cond
, DebugLoc());
2143 Loop
->replaceSuccessor(Exit
, NewExit
);
2144 TII
->insertUnconditionalBranch(*NewExit
, Exit
, DebugLoc());
2145 NewExit
->addSuccessor(Exit
);
2147 Exit
->replacePhiUsesWith(Loop
, NewExit
);
2152 /// Insert branch code into the end of MBB. It branches to GreaterThan if the
2153 /// remaining trip count for instructions in LastStage0Insts is greater than
2154 /// RequiredTC, and to Otherwise otherwise.
2155 void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock
&MBB
,
2157 InstrMapTy
&LastStage0Insts
,
2158 MachineBasicBlock
&GreaterThan
,
2159 MachineBasicBlock
&Otherwise
) {
2160 SmallVector
<MachineOperand
, 4> Cond
;
2161 LoopInfo
->createRemainingIterationsGreaterCondition(RequiredTC
, MBB
, Cond
,
2164 if (SwapBranchTargetsMVE
) {
2165 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2166 // FBB for optimal performance.
2167 if (TII
->reverseBranchCondition(Cond
))
2168 llvm_unreachable("can not reverse branch condition");
2169 TII
->insertBranch(MBB
, &Otherwise
, &GreaterThan
, Cond
, DebugLoc());
2171 TII
->insertBranch(MBB
, &GreaterThan
, &Otherwise
, Cond
, DebugLoc());
2175 /// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2176 /// other necessary blocks. The control flow is modified to execute the
2177 /// pipelined loop if the trip count satisfies the condition, otherwise the
2178 /// original loop. The original loop is also used to execute the remainder
2179 /// iterations which occur due to unrolling.
2180 void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2181 // The control flow for pipelining with MVE:
2184 // // The block that is originally the loop preheader
2188 // // Check whether the trip count satisfies the requirements to pipeline.
2189 // if (LoopCounter > NumStages + NumUnroll - 2)
2190 // // The minimum number of iterations to pipeline =
2191 // // iterations executed in prolog/epilog (NumStages-1) +
2192 // // iterations executed in one kernel run (NumUnroll)
2194 // // fallback to the original loop
2195 // goto NewPreheader
2198 // // All prolog stages. There are no direct branches to the epilogue.
2202 // // NumUnroll copies of the kernel
2203 // if (LoopCounter > MVE-1)
2208 // // All epilog stages.
2209 // if (LoopCounter > 0)
2210 // // The remainder is executed in the original loop
2211 // goto NewPreheader
2215 // // Newly created preheader for the original loop.
2216 // // The initial values of the phis in the loop are merged from two paths.
2217 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2221 // // The original loop block.
2222 // if (LoopCounter != 0)
2227 // // Newly created dedicated exit for the original loop.
2228 // // Merge values which are referenced after the loop
2229 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2233 // // The block that is originally the loop exit.
2234 // // If it is already deicated exit, NewExit is not created.
2236 // An example of where each stage is executed:
2237 // Assume #Stages 3, #MVE 4, #Iterations 12
2238 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2239 // -------------------------------------------------
2241 // Stage 1 0 Prolog#1
2242 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2243 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2244 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2245 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2246 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2247 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2248 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2249 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2250 // Stage 2 1 Epilog#0
2252 // Stage 0-2 OrigKernel
2254 LoopInfo
= TII
->analyzeLoopForPipelining(OrigKernel
);
2255 assert(LoopInfo
&& "Must be able to analyze loop!");
2259 Check
= MF
.CreateMachineBasicBlock(OrigKernel
->getBasicBlock());
2260 Prolog
= MF
.CreateMachineBasicBlock(OrigKernel
->getBasicBlock());
2261 NewKernel
= MF
.CreateMachineBasicBlock(OrigKernel
->getBasicBlock());
2262 Epilog
= MF
.CreateMachineBasicBlock(OrigKernel
->getBasicBlock());
2263 NewPreheader
= MF
.CreateMachineBasicBlock(OrigKernel
->getBasicBlock());
2265 MF
.insert(OrigKernel
->getIterator(), Check
);
2266 MF
.insert(OrigKernel
->getIterator(), Prolog
);
2267 MF
.insert(OrigKernel
->getIterator(), NewKernel
);
2268 MF
.insert(OrigKernel
->getIterator(), Epilog
);
2269 MF
.insert(OrigKernel
->getIterator(), NewPreheader
);
2271 NewExit
= createDedicatedExit(OrigKernel
, OrigExit
);
2273 NewPreheader
->transferSuccessorsAndUpdatePHIs(OrigPreheader
);
2274 TII
->insertUnconditionalBranch(*NewPreheader
, OrigKernel
, DebugLoc());
2276 OrigPreheader
->addSuccessor(Check
);
2277 TII
->removeBranch(*OrigPreheader
);
2278 TII
->insertUnconditionalBranch(*OrigPreheader
, Check
, DebugLoc());
2280 Check
->addSuccessor(Prolog
);
2281 Check
->addSuccessor(NewPreheader
);
2283 Prolog
->addSuccessor(NewKernel
);
2285 NewKernel
->addSuccessor(NewKernel
);
2286 NewKernel
->addSuccessor(Epilog
);
2288 Epilog
->addSuccessor(NewPreheader
);
2289 Epilog
->addSuccessor(NewExit
);
2291 InstrMapTy LastStage0Insts
;
2292 insertCondBranch(*Check
, Schedule
.getNumStages() + NumUnroll
- 2,
2293 LastStage0Insts
, *Prolog
, *NewPreheader
);
2295 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2297 SmallVector
<ValueMapTy
> PrologVRMap
, KernelVRMap
, EpilogVRMap
;
2298 generateProlog(PrologVRMap
);
2299 generateKernel(PrologVRMap
, KernelVRMap
, LastStage0Insts
);
2300 generateEpilog(KernelVRMap
, EpilogVRMap
, LastStage0Insts
);
2303 /// Replace MI's use operands according to the maps.
2304 void ModuloScheduleExpanderMVE::updateInstrUse(
2305 MachineInstr
*MI
, int StageNum
, int PhaseNum
,
2306 SmallVectorImpl
<ValueMapTy
> &CurVRMap
,
2307 SmallVectorImpl
<ValueMapTy
> *PrevVRMap
) {
2308 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2309 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2310 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2311 // Refer to the appropriate map according to the stage difference between
2312 // MI and the definition of an operand.
2314 for (MachineOperand
&UseMO
: MI
->uses()) {
2315 if (!UseMO
.isReg() || !UseMO
.getReg().isVirtual())
2318 Register OrigReg
= UseMO
.getReg();
2319 MachineInstr
*DefInst
= MRI
.getVRegDef(OrigReg
);
2320 if (!DefInst
|| DefInst
->getParent() != OrigKernel
)
2322 unsigned InitReg
= 0;
2323 unsigned DefReg
= OrigReg
;
2324 if (DefInst
->isPHI()) {
2327 getPhiRegs(*DefInst
, OrigKernel
, InitReg
, LoopReg
);
2328 // LoopReg is guaranteed to be defined within the loop by canApply()
2330 DefInst
= MRI
.getVRegDef(LoopReg
);
2332 unsigned DefStageNum
= Schedule
.getStage(DefInst
);
2333 DiffStage
+= StageNum
- DefStageNum
;
2335 if (PhaseNum
>= DiffStage
&& CurVRMap
[PhaseNum
- DiffStage
].count(DefReg
))
2336 // NewReg is defined in a previous phase of the same block
2337 NewReg
= CurVRMap
[PhaseNum
- DiffStage
][DefReg
];
2338 else if (!PrevVRMap
)
2339 // Since this is the first iteration, refer the initial register of the
2343 // Cases where DiffStage is larger than PhaseNum.
2344 // If MI is in the kernel block, the value is defined by the previous
2345 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2346 // value is defined in the kernel block and KernelVRMap is referenced.
2347 NewReg
= (*PrevVRMap
)[PrevVRMap
->size() - (DiffStage
- PhaseNum
)][DefReg
];
2349 const TargetRegisterClass
*NRC
=
2350 MRI
.constrainRegClass(NewReg
, MRI
.getRegClass(OrigReg
));
2352 UseMO
.setReg(NewReg
);
2354 Register SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OrigReg
));
2355 BuildMI(*OrigKernel
, MI
, MI
->getDebugLoc(), TII
->get(TargetOpcode::COPY
),
2358 UseMO
.setReg(SplitReg
);
2363 /// Return a phi if Reg is referenced by the phi.
2364 /// canApply() guarantees that at most only one such phi exists.
2365 static MachineInstr
*getLoopPhiUser(Register Reg
, MachineBasicBlock
*Loop
) {
2366 for (MachineInstr
&Phi
: Loop
->phis()) {
2367 unsigned InitVal
, LoopVal
;
2368 getPhiRegs(Phi
, Loop
, InitVal
, LoopVal
);
2375 /// Generate phis for registers defined by OrigMI.
2376 void ModuloScheduleExpanderMVE::generatePhi(
2377 MachineInstr
*OrigMI
, int UnrollNum
,
2378 SmallVectorImpl
<ValueMapTy
> &PrologVRMap
,
2379 SmallVectorImpl
<ValueMapTy
> &KernelVRMap
,
2380 SmallVectorImpl
<ValueMapTy
> &PhiVRMap
) {
2381 int StageNum
= Schedule
.getStage(OrigMI
);
2383 if (Schedule
.getNumStages() - NumUnroll
+ UnrollNum
- 1 >= StageNum
)
2384 UsePrologReg
= true;
2385 else if (Schedule
.getNumStages() - NumUnroll
+ UnrollNum
== StageNum
)
2386 UsePrologReg
= false;
2390 // Examples that show which stages are merged by phi.
2391 // Meaning of the symbol following the stage number:
2392 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2393 // +: Merged with the initial value (UsePrologReg == false)
2394 // *: No phis required
2396 // #Stages 3, #MVE 4
2397 // Iter 0 1 2 3 4 5 6 7 8
2398 // -----------------------------------------
2399 // Stage 0a Prolog#0
2400 // Stage 1a 0b Prolog#1
2401 // Stage 2* 1* 0* Kernel Unroll#0
2402 // Stage 2* 1* 0+ Kernel Unroll#1
2403 // Stage 2* 1+ 0a Kernel Unroll#2
2404 // Stage 2+ 1a 0b Kernel Unroll#3
2406 // #Stages 3, #MVE 2
2407 // Iter 0 1 2 3 4 5 6 7 8
2408 // -----------------------------------------
2409 // Stage 0a Prolog#0
2410 // Stage 1a 0b Prolog#1
2411 // Stage 2* 1+ 0a Kernel Unroll#0
2412 // Stage 2+ 1a 0b Kernel Unroll#1
2414 // #Stages 3, #MVE 1
2415 // Iter 0 1 2 3 4 5 6 7 8
2416 // -----------------------------------------
2417 // Stage 0* Prolog#0
2418 // Stage 1a 0b Prolog#1
2419 // Stage 2+ 1a 0b Kernel Unroll#0
2421 for (MachineOperand
&DefMO
: OrigMI
->defs()) {
2422 if (!DefMO
.isReg() || DefMO
.isDead())
2424 Register OrigReg
= DefMO
.getReg();
2425 auto NewReg
= KernelVRMap
[UnrollNum
].find(OrigReg
);
2426 if (NewReg
== KernelVRMap
[UnrollNum
].end())
2428 Register CorrespondReg
;
2430 int PrologNum
= Schedule
.getNumStages() - NumUnroll
+ UnrollNum
- 1;
2431 CorrespondReg
= PrologVRMap
[PrologNum
][OrigReg
];
2433 MachineInstr
*Phi
= getLoopPhiUser(OrigReg
, OrigKernel
);
2436 CorrespondReg
= getInitPhiReg(*Phi
, OrigKernel
);
2439 assert(CorrespondReg
.isValid());
2440 Register PhiReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OrigReg
));
2441 BuildMI(*NewKernel
, NewKernel
->getFirstNonPHI(), DebugLoc(),
2442 TII
->get(TargetOpcode::PHI
), PhiReg
)
2443 .addReg(NewReg
->second
)
2445 .addReg(CorrespondReg
)
2447 PhiVRMap
[UnrollNum
][OrigReg
] = PhiReg
;
2451 static void replacePhiSrc(MachineInstr
&Phi
, Register OrigReg
, Register NewReg
,
2452 MachineBasicBlock
*NewMBB
) {
2453 for (unsigned Idx
= 1; Idx
< Phi
.getNumOperands(); Idx
+= 2) {
2454 if (Phi
.getOperand(Idx
).getReg() == OrigReg
) {
2455 Phi
.getOperand(Idx
).setReg(NewReg
);
2456 Phi
.getOperand(Idx
+ 1).setMBB(NewMBB
);
2462 /// Generate phis that merge values from multiple routes
2463 void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg
,
2465 SmallVector
<MachineOperand
*> UsesAfterLoop
;
2466 SmallVector
<MachineInstr
*> LoopPhis
;
2467 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(OrigReg
),
2470 MachineOperand
&O
= *I
;
2471 if (O
.getParent()->getParent() != OrigKernel
&&
2472 O
.getParent()->getParent() != Prolog
&&
2473 O
.getParent()->getParent() != NewKernel
&&
2474 O
.getParent()->getParent() != Epilog
)
2475 UsesAfterLoop
.push_back(&O
);
2476 if (O
.getParent()->getParent() == OrigKernel
&& O
.getParent()->isPHI())
2477 LoopPhis
.push_back(O
.getParent());
2480 // Merge the route that only execute the pipelined loop (when there are no
2481 // remaining iterations) with the route that execute the original loop.
2482 if (!UsesAfterLoop
.empty()) {
2483 Register PhiReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OrigReg
));
2484 BuildMI(*NewExit
, NewExit
->getFirstNonPHI(), DebugLoc(),
2485 TII
->get(TargetOpcode::PHI
), PhiReg
)
2491 for (MachineOperand
*MO
: UsesAfterLoop
)
2494 if (!LIS
.hasInterval(PhiReg
))
2495 LIS
.createEmptyInterval(PhiReg
);
2498 // Merge routes from the pipelined loop and the bypassed route before the
2500 if (!LoopPhis
.empty()) {
2501 for (MachineInstr
*Phi
: LoopPhis
) {
2502 unsigned InitReg
, LoopReg
;
2503 getPhiRegs(*Phi
, OrigKernel
, InitReg
, LoopReg
);
2504 Register NewInit
= MRI
.createVirtualRegister(MRI
.getRegClass(InitReg
));
2505 BuildMI(*NewPreheader
, NewPreheader
->getFirstNonPHI(), Phi
->getDebugLoc(),
2506 TII
->get(TargetOpcode::PHI
), NewInit
)
2511 replacePhiSrc(*Phi
, InitReg
, NewInit
, NewPreheader
);
2516 void ModuloScheduleExpanderMVE::generateProlog(
2517 SmallVectorImpl
<ValueMapTy
> &PrologVRMap
) {
2518 PrologVRMap
.clear();
2519 PrologVRMap
.resize(Schedule
.getNumStages() - 1);
2520 DenseMap
<MachineInstr
*, std::pair
<int, int>> NewMIMap
;
2521 for (int PrologNum
= 0; PrologNum
< Schedule
.getNumStages() - 1;
2523 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
2526 int StageNum
= Schedule
.getStage(MI
);
2527 if (StageNum
> PrologNum
)
2529 MachineInstr
*NewMI
= cloneInstr(MI
);
2530 updateInstrDef(NewMI
, PrologVRMap
[PrologNum
], false);
2531 NewMIMap
[NewMI
] = {PrologNum
, StageNum
};
2532 Prolog
->push_back(NewMI
);
2536 for (auto I
: NewMIMap
) {
2537 MachineInstr
*MI
= I
.first
;
2538 int PrologNum
= I
.second
.first
;
2539 int StageNum
= I
.second
.second
;
2540 updateInstrUse(MI
, StageNum
, PrologNum
, PrologVRMap
, nullptr);
2544 dbgs() << "prolog:\n";
2549 void ModuloScheduleExpanderMVE::generateKernel(
2550 SmallVectorImpl
<ValueMapTy
> &PrologVRMap
,
2551 SmallVectorImpl
<ValueMapTy
> &KernelVRMap
, InstrMapTy
&LastStage0Insts
) {
2552 KernelVRMap
.clear();
2553 KernelVRMap
.resize(NumUnroll
);
2554 SmallVector
<ValueMapTy
> PhiVRMap
;
2555 PhiVRMap
.resize(NumUnroll
);
2556 DenseMap
<MachineInstr
*, std::pair
<int, int>> NewMIMap
;
2557 DenseMap
<MachineInstr
*, MachineInstr
*> MIMapLastStage0
;
2558 for (int UnrollNum
= 0; UnrollNum
< NumUnroll
; ++UnrollNum
) {
2559 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
2562 int StageNum
= Schedule
.getStage(MI
);
2563 MachineInstr
*NewMI
= cloneInstr(MI
);
2564 if (UnrollNum
== NumUnroll
- 1)
2565 LastStage0Insts
[MI
] = NewMI
;
2566 updateInstrDef(NewMI
, KernelVRMap
[UnrollNum
],
2567 (UnrollNum
== NumUnroll
- 1 && StageNum
== 0));
2568 generatePhi(MI
, UnrollNum
, PrologVRMap
, KernelVRMap
, PhiVRMap
);
2569 NewMIMap
[NewMI
] = {UnrollNum
, StageNum
};
2570 NewKernel
->push_back(NewMI
);
2574 for (auto I
: NewMIMap
) {
2575 MachineInstr
*MI
= I
.first
;
2576 int UnrollNum
= I
.second
.first
;
2577 int StageNum
= I
.second
.second
;
2578 updateInstrUse(MI
, StageNum
, UnrollNum
, KernelVRMap
, &PhiVRMap
);
2581 // If remaining trip count is greater than NumUnroll-1, loop continues
2582 insertCondBranch(*NewKernel
, NumUnroll
- 1, LastStage0Insts
, *NewKernel
,
2586 dbgs() << "kernel:\n";
2591 void ModuloScheduleExpanderMVE::generateEpilog(
2592 SmallVectorImpl
<ValueMapTy
> &KernelVRMap
,
2593 SmallVectorImpl
<ValueMapTy
> &EpilogVRMap
, InstrMapTy
&LastStage0Insts
) {
2594 EpilogVRMap
.clear();
2595 EpilogVRMap
.resize(Schedule
.getNumStages() - 1);
2596 DenseMap
<MachineInstr
*, std::pair
<int, int>> NewMIMap
;
2597 for (int EpilogNum
= 0; EpilogNum
< Schedule
.getNumStages() - 1;
2599 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
2602 int StageNum
= Schedule
.getStage(MI
);
2603 if (StageNum
<= EpilogNum
)
2605 MachineInstr
*NewMI
= cloneInstr(MI
);
2606 updateInstrDef(NewMI
, EpilogVRMap
[EpilogNum
], StageNum
- 1 == EpilogNum
);
2607 NewMIMap
[NewMI
] = {EpilogNum
, StageNum
};
2608 Epilog
->push_back(NewMI
);
2612 for (auto I
: NewMIMap
) {
2613 MachineInstr
*MI
= I
.first
;
2614 int EpilogNum
= I
.second
.first
;
2615 int StageNum
= I
.second
.second
;
2616 updateInstrUse(MI
, StageNum
, EpilogNum
, EpilogVRMap
, &KernelVRMap
);
2619 // If there are remaining iterations, they are executed in the original loop.
2620 // Instructions related to loop control, such as loop counter comparison,
2621 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2622 // in stage 0. Thus, the map is for the last one in the kernel.
2623 insertCondBranch(*Epilog
, 0, LastStage0Insts
, *NewPreheader
, *NewExit
);
2626 dbgs() << "epilog:\n";
2631 /// Calculate the number of unroll required and set it to NumUnroll
2632 void ModuloScheduleExpanderMVE::calcNumUnroll() {
2633 DenseMap
<MachineInstr
*, unsigned> Inst2Idx
;
2635 for (unsigned I
= 0; I
< Schedule
.getInstructions().size(); ++I
)
2636 Inst2Idx
[Schedule
.getInstructions()[I
]] = I
;
2638 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
2641 int StageNum
= Schedule
.getStage(MI
);
2642 for (const MachineOperand
&MO
: MI
->uses()) {
2643 if (!MO
.isReg() || !MO
.getReg().isVirtual())
2645 MachineInstr
*DefMI
= MRI
.getVRegDef(MO
.getReg());
2646 if (DefMI
->getParent() != OrigKernel
)
2649 int NumUnrollLocal
= 1;
2650 if (DefMI
->isPHI()) {
2652 // canApply() guarantees that DefMI is not phi and is an instruction in
2654 DefMI
= MRI
.getVRegDef(getLoopPhiReg(*DefMI
, OrigKernel
));
2656 NumUnrollLocal
+= StageNum
- Schedule
.getStage(DefMI
);
2657 if (Inst2Idx
[MI
] <= Inst2Idx
[DefMI
])
2659 NumUnroll
= std::max(NumUnroll
, NumUnrollLocal
);
2662 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll
<< "\n");
2665 /// Create new virtual registers for definitions of NewMI and update NewMI.
2666 /// If the definitions are referenced after the pipelined loop, phis are
2667 /// created to merge with other routes.
2668 void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr
*NewMI
,
2671 for (MachineOperand
&MO
: NewMI
->all_defs()) {
2672 if (!MO
.getReg().isVirtual())
2674 Register Reg
= MO
.getReg();
2675 const TargetRegisterClass
*RC
= MRI
.getRegClass(Reg
);
2676 Register NewReg
= MRI
.createVirtualRegister(RC
);
2678 VRMap
[Reg
] = NewReg
;
2680 mergeRegUsesAfterPipeline(Reg
, NewReg
);
2684 void ModuloScheduleExpanderMVE::expand() {
2685 OrigKernel
= Schedule
.getLoop()->getTopBlock();
2686 OrigPreheader
= Schedule
.getLoop()->getLoopPreheader();
2687 OrigExit
= Schedule
.getLoop()->getExitBlock();
2689 LLVM_DEBUG(Schedule
.dump());
2691 generatePipelinedLoop();
2694 /// Check if ModuloScheduleExpanderMVE can be applied to L
2695 bool ModuloScheduleExpanderMVE::canApply(MachineLoop
&L
) {
2696 if (!L
.getExitBlock()) {
2697 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2701 MachineBasicBlock
*BB
= L
.getTopBlock();
2702 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
2704 // Put some constraints on the operands of the phis to simplify the
2706 DenseSet
<unsigned> UsedByPhi
;
2707 for (MachineInstr
&MI
: BB
->phis()) {
2708 // Registers defined by phis must be used only inside the loop and be never
2710 for (MachineOperand
&MO
: MI
.defs())
2712 for (MachineInstr
&Ref
: MRI
.use_instructions(MO
.getReg()))
2713 if (Ref
.getParent() != BB
|| Ref
.isPHI()) {
2714 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2715 "referenced outside of the loop or by phi.\n");
2719 // A source register from the loop block must be defined inside the loop.
2720 // A register defined inside the loop must be referenced by only one phi at
2722 unsigned InitVal
, LoopVal
;
2723 getPhiRegs(MI
, MI
.getParent(), InitVal
, LoopVal
);
2724 if (!Register(LoopVal
).isVirtual() ||
2725 MRI
.getVRegDef(LoopVal
)->getParent() != BB
) {
2727 dbgs() << "Can not apply MVE expander: A phi source value coming "
2728 "from the loop is not defined in the loop.\n");
2731 if (UsedByPhi
.count(LoopVal
)) {
2732 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2733 "loop is referenced by two or more phis.\n");
2736 UsedByPhi
.insert(LoopVal
);
2742 //===----------------------------------------------------------------------===//
2743 // ModuloScheduleTestPass implementation
2744 //===----------------------------------------------------------------------===//
2745 // This pass constructs a ModuloSchedule from its module and runs
2746 // ModuloScheduleExpander.
2748 // The module is expected to contain a single-block analyzable loop.
2749 // The total order of instructions is taken from the loop as-is.
2750 // Instructions are expected to be annotated with a PostInstrSymbol.
2751 // This PostInstrSymbol must have the following format:
2752 // "Stage=%d Cycle=%d".
2753 //===----------------------------------------------------------------------===//
2756 class ModuloScheduleTest
: public MachineFunctionPass
{
2760 ModuloScheduleTest() : MachineFunctionPass(ID
) {
2761 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
2764 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2765 void runOnLoop(MachineFunction
&MF
, MachineLoop
&L
);
2767 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
2768 AU
.addRequired
<MachineLoopInfoWrapperPass
>();
2769 AU
.addRequired
<LiveIntervalsWrapperPass
>();
2770 MachineFunctionPass::getAnalysisUsage(AU
);
2775 char ModuloScheduleTest::ID
= 0;
2777 INITIALIZE_PASS_BEGIN(ModuloScheduleTest
, "modulo-schedule-test",
2778 "Modulo Schedule test pass", false, false)
2779 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass
)
2780 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass
)
2781 INITIALIZE_PASS_END(ModuloScheduleTest
, "modulo-schedule-test",
2782 "Modulo Schedule test pass", false, false)
2784 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction
&MF
) {
2785 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfoWrapperPass
>().getLI();
2786 for (auto *L
: MLI
) {
2787 if (L
->getTopBlock() != L
->getBottomBlock())
2795 static void parseSymbolString(StringRef S
, int &Cycle
, int &Stage
) {
2796 std::pair
<StringRef
, StringRef
> StageAndCycle
= getToken(S
, "_");
2797 std::pair
<StringRef
, StringRef
> StageTokenAndValue
=
2798 getToken(StageAndCycle
.first
, "-");
2799 std::pair
<StringRef
, StringRef
> CycleTokenAndValue
=
2800 getToken(StageAndCycle
.second
, "-");
2801 if (StageTokenAndValue
.first
!= "Stage" ||
2802 CycleTokenAndValue
.first
!= "_Cycle") {
2804 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2808 StageTokenAndValue
.second
.drop_front().getAsInteger(10, Stage
);
2809 CycleTokenAndValue
.second
.drop_front().getAsInteger(10, Cycle
);
2811 dbgs() << " Stage=" << Stage
<< ", Cycle=" << Cycle
<< "\n";
2814 void ModuloScheduleTest::runOnLoop(MachineFunction
&MF
, MachineLoop
&L
) {
2815 LiveIntervals
&LIS
= getAnalysis
<LiveIntervalsWrapperPass
>().getLIS();
2816 MachineBasicBlock
*BB
= L
.getTopBlock();
2817 dbgs() << "--- ModuloScheduleTest running on BB#" << BB
->getNumber() << "\n";
2819 DenseMap
<MachineInstr
*, int> Cycle
, Stage
;
2820 std::vector
<MachineInstr
*> Instrs
;
2821 for (MachineInstr
&MI
: *BB
) {
2822 if (MI
.isTerminator())
2824 Instrs
.push_back(&MI
);
2825 if (MCSymbol
*Sym
= MI
.getPostInstrSymbol()) {
2826 dbgs() << "Parsing post-instr symbol for " << MI
;
2827 parseSymbolString(Sym
->getName(), Cycle
[&MI
], Stage
[&MI
]);
2831 ModuloSchedule
MS(MF
, &L
, std::move(Instrs
), std::move(Cycle
),
2833 ModuloScheduleExpander
MSE(
2834 MF
, MS
, LIS
, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2839 //===----------------------------------------------------------------------===//
2840 // ModuloScheduleTestAnnotater implementation
2841 //===----------------------------------------------------------------------===//
2843 void ModuloScheduleTestAnnotater::annotate() {
2844 for (MachineInstr
*MI
: S
.getInstructions()) {
2845 SmallVector
<char, 16> SV
;
2846 raw_svector_ostream
OS(SV
);
2847 OS
<< "Stage-" << S
.getStage(MI
) << "_Cycle-" << S
.getCycle(MI
);
2848 MCSymbol
*Sym
= MF
.getContext().getOrCreateSymbol(OS
.str());
2849 MI
->setPostInstrSymbol(MF
, Sym
);