1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/ModuloSchedule.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/CodeGen/LiveIntervals.h"
12 #include "llvm/CodeGen/MachineInstrBuilder.h"
13 #include "llvm/CodeGen/MachineRegisterInfo.h"
14 #include "llvm/CodeGen/TargetInstrInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/ErrorHandling.h"
18 #include "llvm/Support/raw_ostream.h"
20 #define DEBUG_TYPE "pipeliner"
23 void ModuloSchedule::print(raw_ostream
&OS
) {
24 for (MachineInstr
*MI
: ScheduledInstrs
)
25 OS
<< "[stage " << getStage(MI
) << " @" << getCycle(MI
) << "c] " << *MI
;
28 //===----------------------------------------------------------------------===//
29 // ModuloScheduleExpander implementation
30 //===----------------------------------------------------------------------===//
32 /// Return the register values for the operands of a Phi instruction.
33 /// This function assume the instruction is a Phi.
34 static void getPhiRegs(MachineInstr
&Phi
, MachineBasicBlock
*Loop
,
35 unsigned &InitVal
, unsigned &LoopVal
) {
36 assert(Phi
.isPHI() && "Expecting a Phi.");
40 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
41 if (Phi
.getOperand(i
+ 1).getMBB() != Loop
)
42 InitVal
= Phi
.getOperand(i
).getReg();
44 LoopVal
= Phi
.getOperand(i
).getReg();
46 assert(InitVal
!= 0 && LoopVal
!= 0 && "Unexpected Phi structure.");
49 /// Return the Phi register value that comes from the incoming block.
50 static unsigned getInitPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
51 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
52 if (Phi
.getOperand(i
+ 1).getMBB() != LoopBB
)
53 return Phi
.getOperand(i
).getReg();
57 /// Return the Phi register value that comes the loop block.
58 static unsigned getLoopPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
59 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
60 if (Phi
.getOperand(i
+ 1).getMBB() == LoopBB
)
61 return Phi
.getOperand(i
).getReg();
65 void ModuloScheduleExpander::expand() {
66 BB
= Schedule
.getLoop()->getTopBlock();
67 Preheader
= *BB
->pred_begin();
69 Preheader
= *std::next(BB
->pred_begin());
71 // Iterate over the definitions in each instruction, and compute the
72 // stage difference for each use. Keep the maximum value.
73 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
74 int DefStage
= Schedule
.getStage(MI
);
75 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
< e
; ++i
) {
76 MachineOperand
&Op
= MI
->getOperand(i
);
77 if (!Op
.isReg() || !Op
.isDef())
80 Register Reg
= Op
.getReg();
82 bool PhiIsSwapped
= false;
83 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(Reg
),
86 MachineOperand
&UseOp
= *UI
;
87 MachineInstr
*UseMI
= UseOp
.getParent();
88 int UseStage
= Schedule
.getStage(UseMI
);
90 if (UseStage
!= -1 && UseStage
>= DefStage
)
91 Diff
= UseStage
- DefStage
;
93 if (isLoopCarried(*MI
))
98 MaxDiff
= std::max(Diff
, MaxDiff
);
100 RegToStageDiff
[Reg
] = std::make_pair(MaxDiff
, PhiIsSwapped
);
104 generatePipelinedLoop();
107 void ModuloScheduleExpander::generatePipelinedLoop() {
108 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
109 assert(LoopInfo
&& "Must be able to analyze loop!");
111 // Create a new basic block for the kernel and add it to the CFG.
112 MachineBasicBlock
*KernelBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
114 unsigned MaxStageCount
= Schedule
.getNumStages() - 1;
116 // Remember the registers that are used in different stages. The index is
117 // the iteration, or stage, that the instruction is scheduled in. This is
118 // a map between register names in the original block and the names created
119 // in each stage of the pipelined loop.
120 ValueMapTy
*VRMap
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
123 SmallVector
<MachineBasicBlock
*, 4> PrologBBs
;
125 // Generate the prolog instructions that set up the pipeline.
126 generateProlog(MaxStageCount
, KernelBB
, VRMap
, PrologBBs
);
127 MF
.insert(BB
->getIterator(), KernelBB
);
129 // Rearrange the instructions to generate the new, pipelined loop,
130 // and update register names as needed.
131 for (MachineInstr
*CI
: Schedule
.getInstructions()) {
134 unsigned StageNum
= Schedule
.getStage(CI
);
135 MachineInstr
*NewMI
= cloneInstr(CI
, MaxStageCount
, StageNum
);
136 updateInstruction(NewMI
, false, MaxStageCount
, StageNum
, VRMap
);
137 KernelBB
->push_back(NewMI
);
138 InstrMap
[NewMI
] = CI
;
141 // Copy any terminator instructions to the new kernel, and update
143 for (MachineBasicBlock::iterator I
= BB
->getFirstTerminator(),
146 MachineInstr
*NewMI
= MF
.CloneMachineInstr(&*I
);
147 updateInstruction(NewMI
, false, MaxStageCount
, 0, VRMap
);
148 KernelBB
->push_back(NewMI
);
149 InstrMap
[NewMI
] = &*I
;
152 NewKernel
= KernelBB
;
153 KernelBB
->transferSuccessors(BB
);
154 KernelBB
->replaceSuccessor(BB
, KernelBB
);
156 generateExistingPhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
,
157 InstrMap
, MaxStageCount
, MaxStageCount
, false);
158 generatePhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
, InstrMap
,
159 MaxStageCount
, MaxStageCount
, false);
161 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB
->dump(););
163 SmallVector
<MachineBasicBlock
*, 4> EpilogBBs
;
164 // Generate the epilog instructions to complete the pipeline.
165 generateEpilog(MaxStageCount
, KernelBB
, VRMap
, EpilogBBs
, PrologBBs
);
167 // We need this step because the register allocation doesn't handle some
168 // situations well, so we insert copies to help out.
169 splitLifetimes(KernelBB
, EpilogBBs
);
171 // Remove dead instructions due to loop induction variables.
172 removeDeadInstructions(KernelBB
, EpilogBBs
);
174 // Add branches between prolog and epilog blocks.
175 addBranches(*Preheader
, PrologBBs
, KernelBB
, EpilogBBs
, VRMap
);
180 void ModuloScheduleExpander::cleanup() {
181 // Remove the original loop since it's no longer referenced.
183 LIS
.RemoveMachineInstrFromMaps(I
);
185 BB
->eraseFromParent();
188 /// Generate the pipeline prolog code.
189 void ModuloScheduleExpander::generateProlog(unsigned LastStage
,
190 MachineBasicBlock
*KernelBB
,
192 MBBVectorTy
&PrologBBs
) {
193 MachineBasicBlock
*PredBB
= Preheader
;
196 // Generate a basic block for each stage, not including the last stage,
197 // which will be generated in the kernel. Each basic block may contain
198 // instructions from multiple stages/iterations.
199 for (unsigned i
= 0; i
< LastStage
; ++i
) {
200 // Create and insert the prolog basic block prior to the original loop
201 // basic block. The original loop is removed later.
202 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
203 PrologBBs
.push_back(NewBB
);
204 MF
.insert(BB
->getIterator(), NewBB
);
205 NewBB
->transferSuccessors(PredBB
);
206 PredBB
->addSuccessor(NewBB
);
209 // Generate instructions for each appropriate stage. Process instructions
210 // in original program order.
211 for (int StageNum
= i
; StageNum
>= 0; --StageNum
) {
212 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
213 BBE
= BB
->getFirstTerminator();
215 if (Schedule
.getStage(&*BBI
) == StageNum
) {
218 MachineInstr
*NewMI
=
219 cloneAndChangeInstr(&*BBI
, i
, (unsigned)StageNum
);
220 updateInstruction(NewMI
, false, i
, (unsigned)StageNum
, VRMap
);
221 NewBB
->push_back(NewMI
);
222 InstrMap
[NewMI
] = &*BBI
;
226 rewritePhiValues(NewBB
, i
, VRMap
, InstrMap
);
228 dbgs() << "prolog:\n";
233 PredBB
->replaceSuccessor(BB
, KernelBB
);
235 // Check if we need to remove the branch from the preheader to the original
236 // loop, and replace it with a branch to the new loop.
237 unsigned numBranches
= TII
->removeBranch(*Preheader
);
239 SmallVector
<MachineOperand
, 0> Cond
;
240 TII
->insertBranch(*Preheader
, PrologBBs
[0], nullptr, Cond
, DebugLoc());
244 /// Generate the pipeline epilog code. The epilog code finishes the iterations
245 /// that were started in either the prolog or the kernel. We create a basic
246 /// block for each stage that needs to complete.
247 void ModuloScheduleExpander::generateEpilog(unsigned LastStage
,
248 MachineBasicBlock
*KernelBB
,
250 MBBVectorTy
&EpilogBBs
,
251 MBBVectorTy
&PrologBBs
) {
252 // We need to change the branch from the kernel to the first epilog block, so
253 // this call to analyze branch uses the kernel rather than the original BB.
254 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
255 SmallVector
<MachineOperand
, 4> Cond
;
256 bool checkBranch
= TII
->analyzeBranch(*KernelBB
, TBB
, FBB
, Cond
);
257 assert(!checkBranch
&& "generateEpilog must be able to analyze the branch");
261 MachineBasicBlock::succ_iterator LoopExitI
= KernelBB
->succ_begin();
262 if (*LoopExitI
== KernelBB
)
264 assert(LoopExitI
!= KernelBB
->succ_end() && "Expecting a successor");
265 MachineBasicBlock
*LoopExitBB
= *LoopExitI
;
267 MachineBasicBlock
*PredBB
= KernelBB
;
268 MachineBasicBlock
*EpilogStart
= LoopExitBB
;
271 // Generate a basic block for each stage, not including the last stage,
272 // which was generated for the kernel. Each basic block may contain
273 // instructions from multiple stages/iterations.
274 int EpilogStage
= LastStage
+ 1;
275 for (unsigned i
= LastStage
; i
>= 1; --i
, ++EpilogStage
) {
276 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock();
277 EpilogBBs
.push_back(NewBB
);
278 MF
.insert(BB
->getIterator(), NewBB
);
280 PredBB
->replaceSuccessor(LoopExitBB
, NewBB
);
281 NewBB
->addSuccessor(LoopExitBB
);
283 if (EpilogStart
== LoopExitBB
)
286 // Add instructions to the epilog depending on the current block.
287 // Process instructions in original program order.
288 for (unsigned StageNum
= i
; StageNum
<= LastStage
; ++StageNum
) {
289 for (auto &BBI
: *BB
) {
292 MachineInstr
*In
= &BBI
;
293 if ((unsigned)Schedule
.getStage(In
) == StageNum
) {
294 // Instructions with memoperands in the epilog are updated with
295 // conservative values.
296 MachineInstr
*NewMI
= cloneInstr(In
, UINT_MAX
, 0);
297 updateInstruction(NewMI
, i
== 1, EpilogStage
, 0, VRMap
);
298 NewBB
->push_back(NewMI
);
299 InstrMap
[NewMI
] = In
;
303 generateExistingPhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
,
304 InstrMap
, LastStage
, EpilogStage
, i
== 1);
305 generatePhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
, InstrMap
,
306 LastStage
, EpilogStage
, i
== 1);
310 dbgs() << "epilog:\n";
315 // Fix any Phi nodes in the loop exit block.
316 LoopExitBB
->replacePhiUsesWith(BB
, PredBB
);
318 // Create a branch to the new epilog from the kernel.
319 // Remove the original branch and add a new branch to the epilog.
320 TII
->removeBranch(*KernelBB
);
321 TII
->insertBranch(*KernelBB
, KernelBB
, EpilogStart
, Cond
, DebugLoc());
322 // Add a branch to the loop exit.
323 if (EpilogBBs
.size() > 0) {
324 MachineBasicBlock
*LastEpilogBB
= EpilogBBs
.back();
325 SmallVector
<MachineOperand
, 4> Cond1
;
326 TII
->insertBranch(*LastEpilogBB
, LoopExitBB
, nullptr, Cond1
, DebugLoc());
330 /// Replace all uses of FromReg that appear outside the specified
331 /// basic block with ToReg.
332 static void replaceRegUsesAfterLoop(unsigned FromReg
, unsigned ToReg
,
333 MachineBasicBlock
*MBB
,
334 MachineRegisterInfo
&MRI
,
335 LiveIntervals
&LIS
) {
336 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(FromReg
),
339 MachineOperand
&O
= *I
;
341 if (O
.getParent()->getParent() != MBB
)
344 if (!LIS
.hasInterval(ToReg
))
345 LIS
.createEmptyInterval(ToReg
);
348 /// Return true if the register has a use that occurs outside the
350 static bool hasUseAfterLoop(unsigned Reg
, MachineBasicBlock
*BB
,
351 MachineRegisterInfo
&MRI
) {
352 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(Reg
),
355 if (I
->getParent()->getParent() != BB
)
360 /// Generate Phis for the specific block in the generated pipelined code.
361 /// This function looks at the Phis from the original code to guide the
362 /// creation of new Phis.
363 void ModuloScheduleExpander::generateExistingPhis(
364 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
365 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
366 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
) {
367 // Compute the stage number for the initial value of the Phi, which
368 // comes from the prolog. The prolog to use depends on to which kernel/
369 // epilog that we're adding the Phi.
370 unsigned PrologStage
= 0;
371 unsigned PrevStage
= 0;
372 bool InKernel
= (LastStageNum
== CurStageNum
);
374 PrologStage
= LastStageNum
- 1;
375 PrevStage
= CurStageNum
;
377 PrologStage
= LastStageNum
- (CurStageNum
- LastStageNum
);
378 PrevStage
= LastStageNum
+ (CurStageNum
- LastStageNum
) - 1;
381 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
382 BBE
= BB
->getFirstNonPHI();
384 Register Def
= BBI
->getOperand(0).getReg();
386 unsigned InitVal
= 0;
387 unsigned LoopVal
= 0;
388 getPhiRegs(*BBI
, BB
, InitVal
, LoopVal
);
391 // The Phi value from the loop body typically is defined in the loop, but
392 // not always. So, we need to check if the value is defined in the loop.
393 unsigned PhiOp2
= LoopVal
;
394 if (VRMap
[LastStageNum
].count(LoopVal
))
395 PhiOp2
= VRMap
[LastStageNum
][LoopVal
];
397 int StageScheduled
= Schedule
.getStage(&*BBI
);
398 int LoopValStage
= Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
399 unsigned NumStages
= getStagesForReg(Def
, CurStageNum
);
400 if (NumStages
== 0) {
401 // We don't need to generate a Phi anymore, but we need to rename any uses
403 unsigned NewReg
= VRMap
[PrevStage
][LoopVal
];
404 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, 0, &*BBI
, Def
,
406 if (VRMap
[CurStageNum
].count(LoopVal
))
407 VRMap
[CurStageNum
][Def
] = VRMap
[CurStageNum
][LoopVal
];
409 // Adjust the number of Phis needed depending on the number of prologs left,
410 // and the distance from where the Phi is first scheduled. The number of
411 // Phis cannot exceed the number of prolog stages. Each stage can
412 // potentially define two values.
413 unsigned MaxPhis
= PrologStage
+ 2;
414 if (!InKernel
&& (int)PrologStage
<= LoopValStage
)
415 MaxPhis
= std::max((int)MaxPhis
- (int)LoopValStage
, 1);
416 unsigned NumPhis
= std::min(NumStages
, MaxPhis
);
419 unsigned AccessStage
= (LoopValStage
!= -1) ? LoopValStage
: StageScheduled
;
420 // In the epilog, we may need to look back one stage to get the correct
421 // Phi name because the epilog and prolog blocks execute the same stage.
422 // The correct name is from the previous block only when the Phi has
423 // been completely scheduled prior to the epilog, and Phi value is not
424 // needed in multiple stages.
426 if (!InKernel
&& StageScheduled
>= LoopValStage
&& AccessStage
== 0 &&
429 // Adjust the computations below when the phi and the loop definition
430 // are scheduled in different stages.
431 if (InKernel
&& LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
432 StageDiff
= StageScheduled
- LoopValStage
;
433 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
434 // If the Phi hasn't been scheduled, then use the initial Phi operand
435 // value. Otherwise, use the scheduled version of the instruction. This
436 // is a little complicated when a Phi references another Phi.
437 if (np
> PrologStage
|| StageScheduled
>= (int)LastStageNum
)
439 // Check if the Phi has already been scheduled in a prolog stage.
440 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
&&
441 VRMap
[PrologStage
- StageDiff
- np
].count(LoopVal
) != 0)
442 PhiOp1
= VRMap
[PrologStage
- StageDiff
- np
][LoopVal
];
443 // Check if the Phi has already been scheduled, but the loop instruction
444 // is either another Phi, or doesn't occur in the loop.
445 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
) {
446 // If the Phi references another Phi, we need to examine the other
447 // Phi to get the correct value.
449 MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
);
451 while (InstOp1
&& InstOp1
->isPHI() && InstOp1
->getParent() == BB
) {
452 int PhiStage
= Schedule
.getStage(InstOp1
);
453 if ((int)(PrologStage
- StageDiff
- np
) < PhiStage
+ Indirects
)
454 PhiOp1
= getInitPhiReg(*InstOp1
, BB
);
456 PhiOp1
= getLoopPhiReg(*InstOp1
, BB
);
457 InstOp1
= MRI
.getVRegDef(PhiOp1
);
458 int PhiOpStage
= Schedule
.getStage(InstOp1
);
459 int StageAdj
= (PhiOpStage
!= -1 ? PhiStage
- PhiOpStage
: 0);
460 if (PhiOpStage
!= -1 && PrologStage
- StageAdj
>= Indirects
+ np
&&
461 VRMap
[PrologStage
- StageAdj
- Indirects
- np
].count(PhiOp1
)) {
462 PhiOp1
= VRMap
[PrologStage
- StageAdj
- Indirects
- np
][PhiOp1
];
469 // If this references a generated Phi in the kernel, get the Phi operand
470 // from the incoming block.
471 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
))
472 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
473 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
475 MachineInstr
*PhiInst
= MRI
.getVRegDef(LoopVal
);
476 bool LoopDefIsPhi
= PhiInst
&& PhiInst
->isPHI();
477 // In the epilog, a map lookup is needed to get the value from the kernel,
478 // or previous epilog block. How is does this depends on if the
479 // instruction is scheduled in the previous block.
481 int StageDiffAdj
= 0;
482 if (LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
483 StageDiffAdj
= StageScheduled
- LoopValStage
;
484 // Use the loop value defined in the kernel, unless the kernel
485 // contains the last definition of the Phi.
486 if (np
== 0 && PrevStage
== LastStageNum
&&
487 (StageScheduled
!= 0 || LoopValStage
!= 0) &&
488 VRMap
[PrevStage
- StageDiffAdj
].count(LoopVal
))
489 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
][LoopVal
];
490 // Use the value defined by the Phi. We add one because we switch
491 // from looking at the loop value to the Phi definition.
492 else if (np
> 0 && PrevStage
== LastStageNum
&&
493 VRMap
[PrevStage
- np
+ 1].count(Def
))
494 PhiOp2
= VRMap
[PrevStage
- np
+ 1][Def
];
495 // Use the loop value defined in the kernel.
496 else if (static_cast<unsigned>(LoopValStage
) > PrologStage
+ 1 &&
497 VRMap
[PrevStage
- StageDiffAdj
- np
].count(LoopVal
))
498 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
- np
][LoopVal
];
499 // Use the value defined by the Phi, unless we're generating the first
500 // epilog and the Phi refers to a Phi in a different stage.
501 else if (VRMap
[PrevStage
- np
].count(Def
) &&
502 (!LoopDefIsPhi
|| (PrevStage
!= LastStageNum
) ||
503 (LoopValStage
== StageScheduled
)))
504 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
507 // Check if we can reuse an existing Phi. This occurs when a Phi
508 // references another Phi, and the other Phi is scheduled in an
509 // earlier stage. We can try to reuse an existing Phi up until the last
510 // stage of the current Phi.
512 if (static_cast<int>(PrologStage
- np
) >= StageScheduled
) {
513 int LVNumStages
= getStagesForPhi(LoopVal
);
514 int StageDiff
= (StageScheduled
- LoopValStage
);
515 LVNumStages
-= StageDiff
;
516 // Make sure the loop value Phi has been processed already.
517 if (LVNumStages
> (int)np
&& VRMap
[CurStageNum
].count(LoopVal
)) {
519 unsigned ReuseStage
= CurStageNum
;
520 if (isLoopCarried(*PhiInst
))
521 ReuseStage
-= LVNumStages
;
522 // Check if the Phi to reuse has been generated yet. If not, then
523 // there is nothing to reuse.
524 if (VRMap
[ReuseStage
- np
].count(LoopVal
)) {
525 NewReg
= VRMap
[ReuseStage
- np
][LoopVal
];
527 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
,
529 // Update the map with the new Phi name.
530 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
532 if (VRMap
[LastStageNum
- np
- 1].count(LoopVal
))
533 PhiOp2
= VRMap
[LastStageNum
- np
- 1][LoopVal
];
535 if (IsLast
&& np
== NumPhis
- 1)
536 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
541 if (InKernel
&& StageDiff
> 0 &&
542 VRMap
[CurStageNum
- StageDiff
- np
].count(LoopVal
))
543 PhiOp2
= VRMap
[CurStageNum
- StageDiff
- np
][LoopVal
];
546 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
547 NewReg
= MRI
.createVirtualRegister(RC
);
549 MachineInstrBuilder NewPhi
=
550 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
551 TII
->get(TargetOpcode::PHI
), NewReg
);
552 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
553 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
555 InstrMap
[NewPhi
] = &*BBI
;
557 // We define the Phis after creating the new pipelined code, so
558 // we need to rename the Phi values in scheduled instructions.
560 unsigned PrevReg
= 0;
561 if (InKernel
&& VRMap
[PrevStage
- np
].count(LoopVal
))
562 PrevReg
= VRMap
[PrevStage
- np
][LoopVal
];
563 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
565 // If the Phi has been scheduled, use the new name for rewriting.
566 if (VRMap
[CurStageNum
- np
].count(Def
)) {
567 unsigned R
= VRMap
[CurStageNum
- np
][Def
];
568 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, R
,
572 // Check if we need to rename any uses that occurs after the loop. The
573 // register to replace depends on whether the Phi is scheduled in the
575 if (IsLast
&& np
== NumPhis
- 1)
576 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
578 // In the kernel, a dependent Phi uses the value from this Phi.
582 // Update the map with the new Phi name.
583 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
586 while (NumPhis
++ < NumStages
) {
587 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, NumPhis
, &*BBI
, Def
,
591 // Check if we need to rename a Phi that has been eliminated due to
593 if (NumStages
== 0 && IsLast
&& VRMap
[CurStageNum
].count(LoopVal
))
594 replaceRegUsesAfterLoop(Def
, VRMap
[CurStageNum
][LoopVal
], BB
, MRI
, LIS
);
598 /// Generate Phis for the specified block in the generated pipelined code.
599 /// These are new Phis needed because the definition is scheduled after the
600 /// use in the pipelined sequence.
601 void ModuloScheduleExpander::generatePhis(
602 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
603 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
604 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
) {
605 // Compute the stage number that contains the initial Phi value, and
606 // the Phi from the previous stage.
607 unsigned PrologStage
= 0;
608 unsigned PrevStage
= 0;
609 unsigned StageDiff
= CurStageNum
- LastStageNum
;
610 bool InKernel
= (StageDiff
== 0);
612 PrologStage
= LastStageNum
- 1;
613 PrevStage
= CurStageNum
;
615 PrologStage
= LastStageNum
- StageDiff
;
616 PrevStage
= LastStageNum
+ StageDiff
- 1;
619 for (MachineBasicBlock::iterator BBI
= BB
->getFirstNonPHI(),
620 BBE
= BB
->instr_end();
622 for (unsigned i
= 0, e
= BBI
->getNumOperands(); i
!= e
; ++i
) {
623 MachineOperand
&MO
= BBI
->getOperand(i
);
624 if (!MO
.isReg() || !MO
.isDef() ||
625 !Register::isVirtualRegister(MO
.getReg()))
628 int StageScheduled
= Schedule
.getStage(&*BBI
);
629 assert(StageScheduled
!= -1 && "Expecting scheduled instruction.");
630 Register Def
= MO
.getReg();
631 unsigned NumPhis
= getStagesForReg(Def
, CurStageNum
);
632 // An instruction scheduled in stage 0 and is used after the loop
633 // requires a phi in the epilog for the last definition from either
634 // the kernel or prolog.
635 if (!InKernel
&& NumPhis
== 0 && StageScheduled
== 0 &&
636 hasUseAfterLoop(Def
, BB
, MRI
))
638 if (!InKernel
&& (unsigned)StageScheduled
> PrologStage
)
641 unsigned PhiOp2
= VRMap
[PrevStage
][Def
];
642 if (MachineInstr
*InstOp2
= MRI
.getVRegDef(PhiOp2
))
643 if (InstOp2
->isPHI() && InstOp2
->getParent() == NewBB
)
644 PhiOp2
= getLoopPhiReg(*InstOp2
, BB2
);
645 // The number of Phis can't exceed the number of prolog stages. The
646 // prolog stage number is zero based.
647 if (NumPhis
> PrologStage
+ 1 - StageScheduled
)
648 NumPhis
= PrologStage
+ 1 - StageScheduled
;
649 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
650 unsigned PhiOp1
= VRMap
[PrologStage
][Def
];
651 if (np
<= PrologStage
)
652 PhiOp1
= VRMap
[PrologStage
- np
][Def
];
653 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
)) {
654 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
655 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
656 if (InstOp1
->isPHI() && InstOp1
->getParent() == NewBB
)
657 PhiOp1
= getInitPhiReg(*InstOp1
, NewBB
);
660 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
662 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
663 Register NewReg
= MRI
.createVirtualRegister(RC
);
665 MachineInstrBuilder NewPhi
=
666 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
667 TII
->get(TargetOpcode::PHI
), NewReg
);
668 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
669 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
671 InstrMap
[NewPhi
] = &*BBI
;
673 // Rewrite uses and update the map. The actions depend upon whether
674 // we generating code for the kernel or epilog blocks.
676 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp1
,
678 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp2
,
682 VRMap
[PrevStage
- np
- 1][Def
] = NewReg
;
684 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
685 if (np
== NumPhis
- 1)
686 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
689 if (IsLast
&& np
== NumPhis
- 1)
690 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
696 /// Remove instructions that generate values with no uses.
697 /// Typically, these are induction variable operations that generate values
698 /// used in the loop itself. A dead instruction has a definition with
699 /// no uses, or uses that occur in the original loop only.
700 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock
*KernelBB
,
701 MBBVectorTy
&EpilogBBs
) {
702 // For each epilog block, check that the value defined by each instruction
703 // is used. If not, delete it.
704 for (MBBVectorTy::reverse_iterator MBB
= EpilogBBs
.rbegin(),
705 MBE
= EpilogBBs
.rend();
707 for (MachineBasicBlock::reverse_instr_iterator MI
= (*MBB
)->instr_rbegin(),
708 ME
= (*MBB
)->instr_rend();
710 // From DeadMachineInstructionElem. Don't delete inline assembly.
711 if (MI
->isInlineAsm()) {
715 bool SawStore
= false;
716 // Check if it's safe to remove the instruction due to side effects.
717 // We can, and want to, remove Phis here.
718 if (!MI
->isSafeToMove(nullptr, SawStore
) && !MI
->isPHI()) {
723 for (MachineInstr::mop_iterator MOI
= MI
->operands_begin(),
724 MOE
= MI
->operands_end();
726 if (!MOI
->isReg() || !MOI
->isDef())
728 Register reg
= MOI
->getReg();
729 // Assume physical registers are used, unless they are marked dead.
730 if (Register::isPhysicalRegister(reg
)) {
731 used
= !MOI
->isDead();
736 unsigned realUses
= 0;
737 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(reg
),
740 // Check if there are any uses that occur only in the original
741 // loop. If so, that's not a real use.
742 if (UI
->getParent()->getParent() != BB
) {
753 LIS
.RemoveMachineInstrFromMaps(*MI
);
754 MI
++->eraseFromParent();
759 // In the kernel block, check if we can remove a Phi that generates a value
760 // used in an instruction removed in the epilog block.
761 for (MachineBasicBlock::iterator BBI
= KernelBB
->instr_begin(),
762 BBE
= KernelBB
->getFirstNonPHI();
764 MachineInstr
*MI
= &*BBI
;
766 Register reg
= MI
->getOperand(0).getReg();
767 if (MRI
.use_begin(reg
) == MRI
.use_end()) {
768 LIS
.RemoveMachineInstrFromMaps(*MI
);
769 MI
->eraseFromParent();
774 /// For loop carried definitions, we split the lifetime of a virtual register
775 /// that has uses past the definition in the next iteration. A copy with a new
776 /// virtual register is inserted before the definition, which helps with
777 /// generating a better register assignment.
779 /// v1 = phi(a, v2) v1 = phi(a, v2)
780 /// v2 = phi(b, v3) v2 = phi(b, v3)
781 /// v3 = .. v4 = copy v1
784 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock
*KernelBB
,
785 MBBVectorTy
&EpilogBBs
) {
786 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
787 for (auto &PHI
: KernelBB
->phis()) {
788 Register Def
= PHI
.getOperand(0).getReg();
789 // Check for any Phi definition that used as an operand of another Phi
790 // in the same block.
791 for (MachineRegisterInfo::use_instr_iterator I
= MRI
.use_instr_begin(Def
),
792 E
= MRI
.use_instr_end();
794 if (I
->isPHI() && I
->getParent() == KernelBB
) {
795 // Get the loop carried definition.
796 unsigned LCDef
= getLoopPhiReg(PHI
, KernelBB
);
799 MachineInstr
*MI
= MRI
.getVRegDef(LCDef
);
800 if (!MI
|| MI
->getParent() != KernelBB
|| MI
->isPHI())
802 // Search through the rest of the block looking for uses of the Phi
803 // definition. If one occurs, then split the lifetime.
804 unsigned SplitReg
= 0;
805 for (auto &BBJ
: make_range(MachineBasicBlock::instr_iterator(MI
),
806 KernelBB
->instr_end()))
807 if (BBJ
.readsRegister(Def
)) {
808 // We split the lifetime when we find the first use.
810 SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Def
));
811 BuildMI(*KernelBB
, MI
, MI
->getDebugLoc(),
812 TII
->get(TargetOpcode::COPY
), SplitReg
)
815 BBJ
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
819 // Search through each of the epilog blocks for any uses to be renamed.
820 for (auto &Epilog
: EpilogBBs
)
821 for (auto &I
: *Epilog
)
822 if (I
.readsRegister(Def
))
823 I
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
830 /// Remove the incoming block from the Phis in a basic block.
831 static void removePhis(MachineBasicBlock
*BB
, MachineBasicBlock
*Incoming
) {
832 for (MachineInstr
&MI
: *BB
) {
835 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2)
836 if (MI
.getOperand(i
+ 1).getMBB() == Incoming
) {
837 MI
.RemoveOperand(i
+ 1);
844 /// Create branches from each prolog basic block to the appropriate epilog
845 /// block. These edges are needed if the loop ends before reaching the
847 void ModuloScheduleExpander::addBranches(MachineBasicBlock
&PreheaderBB
,
848 MBBVectorTy
&PrologBBs
,
849 MachineBasicBlock
*KernelBB
,
850 MBBVectorTy
&EpilogBBs
,
852 assert(PrologBBs
.size() == EpilogBBs
.size() && "Prolog/Epilog mismatch");
853 MachineBasicBlock
*LastPro
= KernelBB
;
854 MachineBasicBlock
*LastEpi
= KernelBB
;
856 // Start from the blocks connected to the kernel and work "out"
857 // to the first prolog and the last epilog blocks.
858 SmallVector
<MachineInstr
*, 4> PrevInsts
;
859 unsigned MaxIter
= PrologBBs
.size() - 1;
860 for (unsigned i
= 0, j
= MaxIter
; i
<= MaxIter
; ++i
, --j
) {
861 // Add branches to the prolog that go to the corresponding
862 // epilog, and the fall-thru prolog/kernel block.
863 MachineBasicBlock
*Prolog
= PrologBBs
[j
];
864 MachineBasicBlock
*Epilog
= EpilogBBs
[i
];
866 SmallVector
<MachineOperand
, 4> Cond
;
867 Optional
<bool> StaticallyGreater
=
868 LoopInfo
->createTripCountGreaterCondition(j
+ 1, *Prolog
, Cond
);
869 unsigned numAdded
= 0;
870 if (!StaticallyGreater
.hasValue()) {
871 Prolog
->addSuccessor(Epilog
);
872 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, LastPro
, Cond
, DebugLoc());
873 } else if (*StaticallyGreater
== false) {
874 Prolog
->addSuccessor(Epilog
);
875 Prolog
->removeSuccessor(LastPro
);
876 LastEpi
->removeSuccessor(Epilog
);
877 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, nullptr, Cond
, DebugLoc());
878 removePhis(Epilog
, LastEpi
);
879 // Remove the blocks that are no longer referenced.
880 if (LastPro
!= LastEpi
) {
882 LastEpi
->eraseFromParent();
884 if (LastPro
== KernelBB
) {
885 LoopInfo
->disposed();
889 LastPro
->eraseFromParent();
891 numAdded
= TII
->insertBranch(*Prolog
, LastPro
, nullptr, Cond
, DebugLoc());
892 removePhis(Epilog
, Prolog
);
896 for (MachineBasicBlock::reverse_instr_iterator I
= Prolog
->instr_rbegin(),
897 E
= Prolog
->instr_rend();
898 I
!= E
&& numAdded
> 0; ++I
, --numAdded
)
899 updateInstruction(&*I
, false, j
, 0, VRMap
);
903 LoopInfo
->setPreheader(PrologBBs
[MaxIter
]);
904 LoopInfo
->adjustTripCount(-(MaxIter
+ 1));
908 /// Return true if we can compute the amount the instruction changes
909 /// during each iteration. Set Delta to the amount of the change.
910 bool ModuloScheduleExpander::computeDelta(MachineInstr
&MI
, unsigned &Delta
) {
911 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
912 const MachineOperand
*BaseOp
;
914 if (!TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, TRI
))
917 if (!BaseOp
->isReg())
920 Register BaseReg
= BaseOp
->getReg();
922 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
923 // Check if there is a Phi. If so, get the definition in the loop.
924 MachineInstr
*BaseDef
= MRI
.getVRegDef(BaseReg
);
925 if (BaseDef
&& BaseDef
->isPHI()) {
926 BaseReg
= getLoopPhiReg(*BaseDef
, MI
.getParent());
927 BaseDef
= MRI
.getVRegDef(BaseReg
);
933 if (!TII
->getIncrementValue(*BaseDef
, D
) && D
>= 0)
940 /// Update the memory operand with a new offset when the pipeliner
941 /// generates a new copy of the instruction that refers to a
942 /// different memory location.
943 void ModuloScheduleExpander::updateMemOperands(MachineInstr
&NewMI
,
948 // If the instruction has memory operands, then adjust the offset
949 // when the instruction appears in different stages.
950 if (NewMI
.memoperands_empty())
952 SmallVector
<MachineMemOperand
*, 2> NewMMOs
;
953 for (MachineMemOperand
*MMO
: NewMI
.memoperands()) {
954 // TODO: Figure out whether isAtomic is really necessary (see D57601).
955 if (MMO
->isVolatile() || MMO
->isAtomic() ||
956 (MMO
->isInvariant() && MMO
->isDereferenceable()) ||
957 (!MMO
->getValue())) {
958 NewMMOs
.push_back(MMO
);
962 if (Num
!= UINT_MAX
&& computeDelta(OldMI
, Delta
)) {
963 int64_t AdjOffset
= Delta
* Num
;
965 MF
.getMachineMemOperand(MMO
, AdjOffset
, MMO
->getSize()));
968 MF
.getMachineMemOperand(MMO
, 0, MemoryLocation::UnknownSize
));
971 NewMI
.setMemRefs(MF
, NewMMOs
);
974 /// Clone the instruction for the new pipelined loop and update the
975 /// memory operands, if needed.
976 MachineInstr
*ModuloScheduleExpander::cloneInstr(MachineInstr
*OldMI
,
977 unsigned CurStageNum
,
978 unsigned InstStageNum
) {
979 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
980 // Check for tied operands in inline asm instructions. This should be handled
981 // elsewhere, but I'm not sure of the best solution.
982 if (OldMI
->isInlineAsm())
983 for (unsigned i
= 0, e
= OldMI
->getNumOperands(); i
!= e
; ++i
) {
984 const auto &MO
= OldMI
->getOperand(i
);
985 if (MO
.isReg() && MO
.isUse())
988 if (OldMI
->isRegTiedToUseOperand(i
, &UseIdx
))
989 NewMI
->tieOperands(i
, UseIdx
);
991 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
995 /// Clone the instruction for the new pipelined loop. If needed, this
996 /// function updates the instruction using the values saved in the
997 /// InstrChanges structure.
998 MachineInstr
*ModuloScheduleExpander::cloneAndChangeInstr(
999 MachineInstr
*OldMI
, unsigned CurStageNum
, unsigned InstStageNum
) {
1000 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
1001 auto It
= InstrChanges
.find(OldMI
);
1002 if (It
!= InstrChanges
.end()) {
1003 std::pair
<unsigned, int64_t> RegAndOffset
= It
->second
;
1004 unsigned BasePos
, OffsetPos
;
1005 if (!TII
->getBaseAndOffsetPosition(*OldMI
, BasePos
, OffsetPos
))
1007 int64_t NewOffset
= OldMI
->getOperand(OffsetPos
).getImm();
1008 MachineInstr
*LoopDef
= findDefInLoop(RegAndOffset
.first
);
1009 if (Schedule
.getStage(LoopDef
) > (signed)InstStageNum
)
1010 NewOffset
+= RegAndOffset
.second
* (CurStageNum
- InstStageNum
);
1011 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
1013 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
1017 /// Update the machine instruction with new virtual registers. This
1018 /// function may change the defintions and/or uses.
1019 void ModuloScheduleExpander::updateInstruction(MachineInstr
*NewMI
,
1021 unsigned CurStageNum
,
1022 unsigned InstrStageNum
,
1023 ValueMapTy
*VRMap
) {
1024 for (unsigned i
= 0, e
= NewMI
->getNumOperands(); i
!= e
; ++i
) {
1025 MachineOperand
&MO
= NewMI
->getOperand(i
);
1026 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
1028 Register reg
= MO
.getReg();
1030 // Create a new virtual register for the definition.
1031 const TargetRegisterClass
*RC
= MRI
.getRegClass(reg
);
1032 Register NewReg
= MRI
.createVirtualRegister(RC
);
1034 VRMap
[CurStageNum
][reg
] = NewReg
;
1036 replaceRegUsesAfterLoop(reg
, NewReg
, BB
, MRI
, LIS
);
1037 } else if (MO
.isUse()) {
1038 MachineInstr
*Def
= MRI
.getVRegDef(reg
);
1039 // Compute the stage that contains the last definition for instruction.
1040 int DefStageNum
= Schedule
.getStage(Def
);
1041 unsigned StageNum
= CurStageNum
;
1042 if (DefStageNum
!= -1 && (int)InstrStageNum
> DefStageNum
) {
1043 // Compute the difference in stages between the defintion and the use.
1044 unsigned StageDiff
= (InstrStageNum
- DefStageNum
);
1045 // Make an adjustment to get the last definition.
1046 StageNum
-= StageDiff
;
1048 if (VRMap
[StageNum
].count(reg
))
1049 MO
.setReg(VRMap
[StageNum
][reg
]);
1054 /// Return the instruction in the loop that defines the register.
1055 /// If the definition is a Phi, then follow the Phi operand to
1056 /// the instruction in the loop.
1057 MachineInstr
*ModuloScheduleExpander::findDefInLoop(unsigned Reg
) {
1058 SmallPtrSet
<MachineInstr
*, 8> Visited
;
1059 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
1060 while (Def
->isPHI()) {
1061 if (!Visited
.insert(Def
).second
)
1063 for (unsigned i
= 1, e
= Def
->getNumOperands(); i
< e
; i
+= 2)
1064 if (Def
->getOperand(i
+ 1).getMBB() == BB
) {
1065 Def
= MRI
.getVRegDef(Def
->getOperand(i
).getReg());
1072 /// Return the new name for the value from the previous stage.
1073 unsigned ModuloScheduleExpander::getPrevMapVal(
1074 unsigned StageNum
, unsigned PhiStage
, unsigned LoopVal
, unsigned LoopStage
,
1075 ValueMapTy
*VRMap
, MachineBasicBlock
*BB
) {
1076 unsigned PrevVal
= 0;
1077 if (StageNum
> PhiStage
) {
1078 MachineInstr
*LoopInst
= MRI
.getVRegDef(LoopVal
);
1079 if (PhiStage
== LoopStage
&& VRMap
[StageNum
- 1].count(LoopVal
))
1080 // The name is defined in the previous stage.
1081 PrevVal
= VRMap
[StageNum
- 1][LoopVal
];
1082 else if (VRMap
[StageNum
].count(LoopVal
))
1083 // The previous name is defined in the current stage when the instruction
1084 // order is swapped.
1085 PrevVal
= VRMap
[StageNum
][LoopVal
];
1086 else if (!LoopInst
->isPHI() || LoopInst
->getParent() != BB
)
1087 // The loop value hasn't yet been scheduled.
1089 else if (StageNum
== PhiStage
+ 1)
1090 // The loop value is another phi, which has not been scheduled.
1091 PrevVal
= getInitPhiReg(*LoopInst
, BB
);
1092 else if (StageNum
> PhiStage
+ 1 && LoopInst
->getParent() == BB
)
1093 // The loop value is another phi, which has been scheduled.
1095 getPrevMapVal(StageNum
- 1, PhiStage
, getLoopPhiReg(*LoopInst
, BB
),
1096 LoopStage
, VRMap
, BB
);
1101 /// Rewrite the Phi values in the specified block to use the mappings
1102 /// from the initial operand. Once the Phi is scheduled, we switch
1103 /// to using the loop value instead of the Phi value, so those names
1104 /// do not need to be rewritten.
1105 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock
*NewBB
,
1108 InstrMapTy
&InstrMap
) {
1109 for (auto &PHI
: BB
->phis()) {
1110 unsigned InitVal
= 0;
1111 unsigned LoopVal
= 0;
1112 getPhiRegs(PHI
, BB
, InitVal
, LoopVal
);
1113 Register PhiDef
= PHI
.getOperand(0).getReg();
1115 unsigned PhiStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(PhiDef
));
1116 unsigned LoopStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
1117 unsigned NumPhis
= getStagesForPhi(PhiDef
);
1118 if (NumPhis
> StageNum
)
1120 for (unsigned np
= 0; np
<= NumPhis
; ++np
) {
1122 getPrevMapVal(StageNum
- np
, PhiStage
, LoopVal
, LoopStage
, VRMap
, BB
);
1125 rewriteScheduledInstr(NewBB
, InstrMap
, StageNum
- np
, np
, &PHI
, PhiDef
,
1131 /// Rewrite a previously scheduled instruction to use the register value
1132 /// from the new instruction. Make sure the instruction occurs in the
1133 /// basic block, and we don't change the uses in the new instruction.
1134 void ModuloScheduleExpander::rewriteScheduledInstr(
1135 MachineBasicBlock
*BB
, InstrMapTy
&InstrMap
, unsigned CurStageNum
,
1136 unsigned PhiNum
, MachineInstr
*Phi
, unsigned OldReg
, unsigned NewReg
,
1138 bool InProlog
= (CurStageNum
< (unsigned)Schedule
.getNumStages() - 1);
1139 int StagePhi
= Schedule
.getStage(Phi
) + PhiNum
;
1140 // Rewrite uses that have been scheduled already to use the new
1142 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(OldReg
),
1145 MachineOperand
&UseOp
= *UI
;
1146 MachineInstr
*UseMI
= UseOp
.getParent();
1148 if (UseMI
->getParent() != BB
)
1150 if (UseMI
->isPHI()) {
1151 if (!Phi
->isPHI() && UseMI
->getOperand(0).getReg() == NewReg
)
1153 if (getLoopPhiReg(*UseMI
, BB
) != OldReg
)
1156 InstrMapTy::iterator OrigInstr
= InstrMap
.find(UseMI
);
1157 assert(OrigInstr
!= InstrMap
.end() && "Instruction not scheduled.");
1158 MachineInstr
*OrigMI
= OrigInstr
->second
;
1159 int StageSched
= Schedule
.getStage(OrigMI
);
1160 int CycleSched
= Schedule
.getCycle(OrigMI
);
1161 unsigned ReplaceReg
= 0;
1162 // This is the stage for the scheduled instruction.
1163 if (StagePhi
== StageSched
&& Phi
->isPHI()) {
1164 int CyclePhi
= Schedule
.getCycle(Phi
);
1165 if (PrevReg
&& InProlog
)
1166 ReplaceReg
= PrevReg
;
1167 else if (PrevReg
&& !isLoopCarried(*Phi
) &&
1168 (CyclePhi
<= CycleSched
|| OrigMI
->isPHI()))
1169 ReplaceReg
= PrevReg
;
1171 ReplaceReg
= NewReg
;
1173 // The scheduled instruction occurs before the scheduled Phi, and the
1174 // Phi is not loop carried.
1175 if (!InProlog
&& StagePhi
+ 1 == StageSched
&& !isLoopCarried(*Phi
))
1176 ReplaceReg
= NewReg
;
1177 if (StagePhi
> StageSched
&& Phi
->isPHI())
1178 ReplaceReg
= NewReg
;
1179 if (!InProlog
&& !Phi
->isPHI() && StagePhi
< StageSched
)
1180 ReplaceReg
= NewReg
;
1182 MRI
.constrainRegClass(ReplaceReg
, MRI
.getRegClass(OldReg
));
1183 UseOp
.setReg(ReplaceReg
);
1188 bool ModuloScheduleExpander::isLoopCarried(MachineInstr
&Phi
) {
1191 unsigned DefCycle
= Schedule
.getCycle(&Phi
);
1192 int DefStage
= Schedule
.getStage(&Phi
);
1194 unsigned InitVal
= 0;
1195 unsigned LoopVal
= 0;
1196 getPhiRegs(Phi
, Phi
.getParent(), InitVal
, LoopVal
);
1197 MachineInstr
*Use
= MRI
.getVRegDef(LoopVal
);
1198 if (!Use
|| Use
->isPHI())
1200 unsigned LoopCycle
= Schedule
.getCycle(Use
);
1201 int LoopStage
= Schedule
.getStage(Use
);
1202 return (LoopCycle
> DefCycle
) || (LoopStage
<= DefStage
);
1205 //===----------------------------------------------------------------------===//
1206 // PeelingModuloScheduleExpander implementation
1207 //===----------------------------------------------------------------------===//
1208 // This is a reimplementation of ModuloScheduleExpander that works by creating
1209 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1210 //===----------------------------------------------------------------------===//
1213 // Remove any dead phis in MBB. Dead phis either have only one block as input
1214 // (in which case they are the identity) or have no uses.
1215 void EliminateDeadPhis(MachineBasicBlock
*MBB
, MachineRegisterInfo
&MRI
,
1216 LiveIntervals
*LIS
) {
1217 bool Changed
= true;
1220 for (auto I
= MBB
->begin(); I
!= MBB
->getFirstNonPHI();) {
1221 MachineInstr
&MI
= *I
++;
1223 if (MRI
.use_empty(MI
.getOperand(0).getReg())) {
1225 LIS
->RemoveMachineInstrFromMaps(MI
);
1226 MI
.eraseFromParent();
1228 } else if (MI
.getNumExplicitOperands() == 3) {
1229 MRI
.constrainRegClass(MI
.getOperand(1).getReg(),
1230 MRI
.getRegClass(MI
.getOperand(0).getReg()));
1231 MRI
.replaceRegWith(MI
.getOperand(0).getReg(),
1232 MI
.getOperand(1).getReg());
1234 LIS
->RemoveMachineInstrFromMaps(MI
);
1235 MI
.eraseFromParent();
1242 /// Rewrites the kernel block in-place to adhere to the given schedule.
1243 /// KernelRewriter holds all of the state required to perform the rewriting.
1244 class KernelRewriter
{
1246 MachineBasicBlock
*BB
;
1247 MachineBasicBlock
*PreheaderBB
, *ExitBB
;
1248 MachineRegisterInfo
&MRI
;
1249 const TargetInstrInfo
*TII
;
1252 // Map from register class to canonical undef register for that class.
1253 DenseMap
<const TargetRegisterClass
*, Register
> Undefs
;
1254 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1255 // this map is only used when InitReg is non-undef.
1256 DenseMap
<std::pair
<unsigned, unsigned>, Register
> Phis
;
1257 // Map from LoopReg to phi register where the InitReg is undef.
1258 DenseMap
<Register
, Register
> UndefPhis
;
1260 // Reg is used by MI. Return the new register MI should use to adhere to the
1261 // schedule. Insert phis as necessary.
1262 Register
remapUse(Register Reg
, MachineInstr
&MI
);
1263 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1264 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1265 // or will be chosen so as to share another phi.
1266 Register
phi(Register LoopReg
, Optional
<Register
> InitReg
= {},
1267 const TargetRegisterClass
*RC
= nullptr);
1268 // Create an undef register of the given register class.
1269 Register
undef(const TargetRegisterClass
*RC
);
1272 KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
,
1273 LiveIntervals
*LIS
= nullptr);
1278 KernelRewriter::KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
,
1280 : S(S
), BB(L
.getTopBlock()), PreheaderBB(L
.getLoopPreheader()),
1281 ExitBB(L
.getExitBlock()), MRI(BB
->getParent()->getRegInfo()),
1282 TII(BB
->getParent()->getSubtarget().getInstrInfo()), LIS(LIS
) {
1283 PreheaderBB
= *BB
->pred_begin();
1284 if (PreheaderBB
== BB
)
1285 PreheaderBB
= *std::next(BB
->pred_begin());
1288 void KernelRewriter::rewrite() {
1289 // Rearrange the loop to be in schedule order. Note that the schedule may
1290 // contain instructions that are not owned by the loop block (InstrChanges and
1291 // friends), so we gracefully handle unowned instructions and delete any
1292 // instructions that weren't in the schedule.
1293 auto InsertPt
= BB
->getFirstTerminator();
1294 MachineInstr
*FirstMI
= nullptr;
1295 for (MachineInstr
*MI
: S
.getInstructions()) {
1298 if (MI
->getParent())
1299 MI
->removeFromParent();
1300 BB
->insert(InsertPt
, MI
);
1304 assert(FirstMI
&& "Failed to find first MI in schedule");
1306 // At this point all of the scheduled instructions are between FirstMI
1307 // and the end of the block. Kill from the first non-phi to FirstMI.
1308 for (auto I
= BB
->getFirstNonPHI(); I
!= FirstMI
->getIterator();) {
1310 LIS
->RemoveMachineInstrFromMaps(*I
);
1311 (I
++)->eraseFromParent();
1314 // Now remap every instruction in the loop.
1315 for (MachineInstr
&MI
: *BB
) {
1318 for (MachineOperand
&MO
: MI
.uses()) {
1319 if (!MO
.isReg() || MO
.getReg().isPhysical() || MO
.isImplicit())
1321 Register Reg
= remapUse(MO
.getReg(), MI
);
1325 EliminateDeadPhis(BB
, MRI
, LIS
);
1327 // Ensure a phi exists for all instructions that are either referenced by
1328 // an illegal phi or by an instruction outside the loop. This allows us to
1329 // treat remaps of these values the same as "normal" values that come from
1330 // loop-carried phis.
1331 for (auto MI
= BB
->getFirstNonPHI(); MI
!= BB
->end(); ++MI
) {
1333 Register R
= MI
->getOperand(0).getReg();
1338 for (MachineOperand
&Def
: MI
->defs()) {
1339 for (MachineInstr
&MI
: MRI
.use_instructions(Def
.getReg())) {
1340 if (MI
.getParent() != BB
) {
1349 Register
KernelRewriter::remapUse(Register Reg
, MachineInstr
&MI
) {
1350 MachineInstr
*Producer
= MRI
.getUniqueVRegDef(Reg
);
1354 int ConsumerStage
= S
.getStage(&MI
);
1355 if (!Producer
->isPHI()) {
1356 // Non-phi producers are simple to remap. Insert as many phis as the
1357 // difference between the consumer and producer stages.
1358 if (Producer
->getParent() != BB
)
1359 // Producer was not inside the loop. Use the register as-is.
1361 int ProducerStage
= S
.getStage(Producer
);
1362 assert(ConsumerStage
!= -1 &&
1363 "In-loop consumer should always be scheduled!");
1364 assert(ConsumerStage
>= ProducerStage
);
1365 unsigned StageDiff
= ConsumerStage
- ProducerStage
;
1367 for (unsigned I
= 0; I
< StageDiff
; ++I
)
1372 // First, dive through the phi chain to find the defaults for the generated
1374 SmallVector
<Optional
<Register
>, 4> Defaults
;
1375 Register LoopReg
= Reg
;
1376 auto LoopProducer
= Producer
;
1377 while (LoopProducer
->isPHI() && LoopProducer
->getParent() == BB
) {
1378 LoopReg
= getLoopPhiReg(*LoopProducer
, BB
);
1379 Defaults
.emplace_back(getInitPhiReg(*LoopProducer
, BB
));
1380 LoopProducer
= MRI
.getUniqueVRegDef(LoopReg
);
1381 assert(LoopProducer
);
1383 int LoopProducerStage
= S
.getStage(LoopProducer
);
1385 Optional
<Register
> IllegalPhiDefault
;
1387 if (LoopProducerStage
== -1) {
1389 } else if (LoopProducerStage
> ConsumerStage
) {
1390 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1391 // In addition, Consumer's cycle must be scheduled after Producer in the
1392 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1394 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1395 int LoopProducerCycle
= S
.getCycle(LoopProducer
);
1396 int ConsumerCycle
= S
.getCycle(&MI
);
1398 assert(LoopProducerCycle
<= ConsumerCycle
);
1399 assert(LoopProducerStage
== ConsumerStage
+ 1);
1400 // Peel off the first phi from Defaults and insert a phi between producer
1401 // and consumer. This phi will not be at the front of the block so we
1402 // consider it illegal. It will only exist during the rewrite process; it
1403 // needs to exist while we peel off prologs because these could take the
1404 // default value. After that we can replace all uses with the loop producer
1406 IllegalPhiDefault
= Defaults
.front();
1407 Defaults
.erase(Defaults
.begin());
1409 assert(ConsumerStage
>= LoopProducerStage
);
1410 int StageDiff
= ConsumerStage
- LoopProducerStage
;
1411 if (StageDiff
> 0) {
1412 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults
.size()
1413 << " to " << (Defaults
.size() + StageDiff
) << "\n");
1414 // If we need more phis than we have defaults for, pad out with undefs for
1415 // the earliest phis, which are at the end of the defaults chain (the
1416 // chain is in reverse order).
1417 Defaults
.resize(Defaults
.size() + StageDiff
, Defaults
.empty()
1418 ? Optional
<Register
>()
1423 // Now we know the number of stages to jump back, insert the phi chain.
1424 auto DefaultI
= Defaults
.rbegin();
1425 while (DefaultI
!= Defaults
.rend())
1426 LoopReg
= phi(LoopReg
, *DefaultI
++, MRI
.getRegClass(Reg
));
1428 if (IllegalPhiDefault
.hasValue()) {
1429 // The consumer optionally consumes LoopProducer in the same iteration
1430 // (because the producer is scheduled at an earlier cycle than the consumer)
1431 // or the initial value. To facilitate this we create an illegal block here
1432 // by embedding a phi in the middle of the block. We will fix this up
1433 // immediately prior to pruning.
1434 auto RC
= MRI
.getRegClass(Reg
);
1435 Register R
= MRI
.createVirtualRegister(RC
);
1436 BuildMI(*BB
, MI
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1437 .addReg(IllegalPhiDefault
.getValue())
1438 .addMBB(PreheaderBB
) // Block choice is arbitrary and has no effect.
1440 .addMBB(BB
); // Block choice is arbitrary and has no effect.
1447 Register
KernelRewriter::phi(Register LoopReg
, Optional
<Register
> InitReg
,
1448 const TargetRegisterClass
*RC
) {
1449 // If the init register is not undef, try and find an existing phi.
1450 if (InitReg
.hasValue()) {
1451 auto I
= Phis
.find({LoopReg
, InitReg
.getValue()});
1452 if (I
!= Phis
.end())
1455 for (auto &KV
: Phis
) {
1456 if (KV
.first
.first
== LoopReg
)
1461 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1462 // find a phi that takes undef as input.
1463 auto I
= UndefPhis
.find(LoopReg
);
1464 if (I
!= UndefPhis
.end()) {
1465 Register R
= I
->second
;
1466 if (!InitReg
.hasValue())
1467 // Found a phi taking undef as input, and this input is undef so return
1468 // without any more changes.
1470 // Found a phi taking undef as input, so rewrite it to take InitReg.
1471 MachineInstr
*MI
= MRI
.getVRegDef(R
);
1472 MI
->getOperand(1).setReg(InitReg
.getValue());
1473 Phis
.insert({{LoopReg
, InitReg
.getValue()}, R
});
1474 MRI
.constrainRegClass(R
, MRI
.getRegClass(InitReg
.getValue()));
1479 // Failed to find any existing phi to reuse, so create a new one.
1481 RC
= MRI
.getRegClass(LoopReg
);
1482 Register R
= MRI
.createVirtualRegister(RC
);
1483 if (InitReg
.hasValue())
1484 MRI
.constrainRegClass(R
, MRI
.getRegClass(*InitReg
));
1485 BuildMI(*BB
, BB
->getFirstNonPHI(), DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1486 .addReg(InitReg
.hasValue() ? *InitReg
: undef(RC
))
1487 .addMBB(PreheaderBB
)
1490 if (!InitReg
.hasValue())
1491 UndefPhis
[LoopReg
] = R
;
1493 Phis
[{LoopReg
, *InitReg
}] = R
;
1497 Register
KernelRewriter::undef(const TargetRegisterClass
*RC
) {
1498 Register
&R
= Undefs
[RC
];
1500 // Create an IMPLICIT_DEF that defines this register if we need it.
1501 // All uses of this should be removed by the time we have finished unrolling
1502 // prologs and epilogs.
1503 R
= MRI
.createVirtualRegister(RC
);
1504 auto *InsertBB
= &PreheaderBB
->getParent()->front();
1505 BuildMI(*InsertBB
, InsertBB
->getFirstTerminator(), DebugLoc(),
1506 TII
->get(TargetOpcode::IMPLICIT_DEF
), R
);
1512 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1513 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1514 /// through and defaults for these phis.
1515 class KernelOperandInfo
{
1516 MachineBasicBlock
*BB
;
1517 MachineRegisterInfo
&MRI
;
1518 SmallVector
<Register
, 4> PhiDefaults
;
1519 MachineOperand
*Source
;
1520 MachineOperand
*Target
;
1523 KernelOperandInfo(MachineOperand
*MO
, MachineRegisterInfo
&MRI
,
1524 const SmallPtrSetImpl
<MachineInstr
*> &IllegalPhis
)
1527 BB
= MO
->getParent()->getParent();
1528 while (isRegInLoop(MO
)) {
1529 MachineInstr
*MI
= MRI
.getVRegDef(MO
->getReg());
1530 if (MI
->isFullCopy()) {
1531 MO
= &MI
->getOperand(1);
1536 // If this is an illegal phi, don't count it in distance.
1537 if (IllegalPhis
.count(MI
)) {
1538 MO
= &MI
->getOperand(3);
1542 Register Default
= getInitPhiReg(*MI
, BB
);
1543 MO
= MI
->getOperand(2).getMBB() == BB
? &MI
->getOperand(1)
1544 : &MI
->getOperand(3);
1545 PhiDefaults
.push_back(Default
);
1550 bool operator==(const KernelOperandInfo
&Other
) const {
1551 return PhiDefaults
.size() == Other
.PhiDefaults
.size();
1554 void print(raw_ostream
&OS
) const {
1555 OS
<< "use of " << *Source
<< ": distance(" << PhiDefaults
.size() << ") in "
1556 << *Source
->getParent();
1560 bool isRegInLoop(MachineOperand
*MO
) {
1561 return MO
->isReg() && MO
->getReg().isVirtual() &&
1562 MRI
.getVRegDef(MO
->getReg())->getParent() == BB
;
1567 void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
1568 BB
= Schedule
.getLoop()->getTopBlock();
1569 Preheader
= Schedule
.getLoop()->getLoopPreheader();
1571 // Dump the schedule before we invalidate and remap all its instructions.
1572 // Stash it in a string so we can print it if we found an error.
1573 std::string ScheduleDump
;
1574 raw_string_ostream
OS(ScheduleDump
);
1578 // First, run the normal ModuleScheduleExpander. We don't support any
1580 assert(LIS
&& "Requires LiveIntervals!");
1581 ModuloScheduleExpander
MSE(MF
, Schedule
, *LIS
,
1582 ModuloScheduleExpander::InstrChangesTy());
1584 MachineBasicBlock
*ExpandedKernel
= MSE
.getRewrittenKernel();
1585 if (!ExpandedKernel
) {
1586 // The expander optimized away the kernel. We can't do any useful checking.
1590 // Before running the KernelRewriter, re-add BB into the CFG.
1591 Preheader
->addSuccessor(BB
);
1593 // Now run the new expansion algorithm.
1594 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
);
1597 // Collect all illegal phis that the new algorithm created. We'll give these
1598 // to KernelOperandInfo.
1599 SmallPtrSet
<MachineInstr
*, 4> IllegalPhis
;
1600 for (auto NI
= BB
->getFirstNonPHI(); NI
!= BB
->end(); ++NI
) {
1602 IllegalPhis
.insert(&*NI
);
1605 // Co-iterate across both kernels. We expect them to be identical apart from
1606 // phis and full COPYs (we look through both).
1607 SmallVector
<std::pair
<KernelOperandInfo
, KernelOperandInfo
>, 8> KOIs
;
1608 auto OI
= ExpandedKernel
->begin();
1609 auto NI
= BB
->begin();
1610 for (; !OI
->isTerminator() && !NI
->isTerminator(); ++OI
, ++NI
) {
1611 while (OI
->isPHI() || OI
->isFullCopy())
1613 while (NI
->isPHI() || NI
->isFullCopy())
1615 assert(OI
->getOpcode() == NI
->getOpcode() && "Opcodes don't match?!");
1616 // Analyze every operand separately.
1617 for (auto OOpI
= OI
->operands_begin(), NOpI
= NI
->operands_begin();
1618 OOpI
!= OI
->operands_end(); ++OOpI
, ++NOpI
)
1619 KOIs
.emplace_back(KernelOperandInfo(&*OOpI
, MRI
, IllegalPhis
),
1620 KernelOperandInfo(&*NOpI
, MRI
, IllegalPhis
));
1623 bool Failed
= false;
1624 for (auto &OldAndNew
: KOIs
) {
1625 if (OldAndNew
.first
== OldAndNew
.second
)
1628 errs() << "Modulo kernel validation error: [\n";
1629 errs() << " [golden] ";
1630 OldAndNew
.first
.print(errs());
1632 OldAndNew
.second
.print(errs());
1637 errs() << "Golden reference kernel:\n";
1638 ExpandedKernel
->print(errs());
1639 errs() << "New kernel:\n";
1641 errs() << ScheduleDump
;
1643 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
1646 // Cleanup by removing BB from the CFG again as the original
1647 // ModuloScheduleExpander intended.
1648 Preheader
->removeSuccessor(BB
);
1652 //===----------------------------------------------------------------------===//
1653 // ModuloScheduleTestPass implementation
1654 //===----------------------------------------------------------------------===//
1655 // This pass constructs a ModuloSchedule from its module and runs
1656 // ModuloScheduleExpander.
1658 // The module is expected to contain a single-block analyzable loop.
1659 // The total order of instructions is taken from the loop as-is.
1660 // Instructions are expected to be annotated with a PostInstrSymbol.
1661 // This PostInstrSymbol must have the following format:
1662 // "Stage=%d Cycle=%d".
1663 //===----------------------------------------------------------------------===//
1666 class ModuloScheduleTest
: public MachineFunctionPass
{
1670 ModuloScheduleTest() : MachineFunctionPass(ID
) {
1671 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
1674 bool runOnMachineFunction(MachineFunction
&MF
) override
;
1675 void runOnLoop(MachineFunction
&MF
, MachineLoop
&L
);
1677 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1678 AU
.addRequired
<MachineLoopInfo
>();
1679 AU
.addRequired
<LiveIntervals
>();
1680 MachineFunctionPass::getAnalysisUsage(AU
);
1685 char ModuloScheduleTest::ID
= 0;
1687 INITIALIZE_PASS_BEGIN(ModuloScheduleTest
, "modulo-schedule-test",
1688 "Modulo Schedule test pass", false, false)
1689 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
1690 INITIALIZE_PASS_DEPENDENCY(LiveIntervals
)
1691 INITIALIZE_PASS_END(ModuloScheduleTest
, "modulo-schedule-test",
1692 "Modulo Schedule test pass", false, false)
1694 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction
&MF
) {
1695 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
1696 for (auto *L
: MLI
) {
1697 if (L
->getTopBlock() != L
->getBottomBlock())
1705 static void parseSymbolString(StringRef S
, int &Cycle
, int &Stage
) {
1706 std::pair
<StringRef
, StringRef
> StageAndCycle
= getToken(S
, "_");
1707 std::pair
<StringRef
, StringRef
> StageTokenAndValue
=
1708 getToken(StageAndCycle
.first
, "-");
1709 std::pair
<StringRef
, StringRef
> CycleTokenAndValue
=
1710 getToken(StageAndCycle
.second
, "-");
1711 if (StageTokenAndValue
.first
!= "Stage" ||
1712 CycleTokenAndValue
.first
!= "_Cycle") {
1714 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
1718 StageTokenAndValue
.second
.drop_front().getAsInteger(10, Stage
);
1719 CycleTokenAndValue
.second
.drop_front().getAsInteger(10, Cycle
);
1721 dbgs() << " Stage=" << Stage
<< ", Cycle=" << Cycle
<< "\n";
1724 void ModuloScheduleTest::runOnLoop(MachineFunction
&MF
, MachineLoop
&L
) {
1725 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
1726 MachineBasicBlock
*BB
= L
.getTopBlock();
1727 dbgs() << "--- ModuloScheduleTest running on BB#" << BB
->getNumber() << "\n";
1729 DenseMap
<MachineInstr
*, int> Cycle
, Stage
;
1730 std::vector
<MachineInstr
*> Instrs
;
1731 for (MachineInstr
&MI
: *BB
) {
1732 if (MI
.isTerminator())
1734 Instrs
.push_back(&MI
);
1735 if (MCSymbol
*Sym
= MI
.getPostInstrSymbol()) {
1736 dbgs() << "Parsing post-instr symbol for " << MI
;
1737 parseSymbolString(Sym
->getName(), Cycle
[&MI
], Stage
[&MI
]);
1741 ModuloSchedule
MS(MF
, &L
, std::move(Instrs
), std::move(Cycle
),
1743 ModuloScheduleExpander
MSE(
1744 MF
, MS
, LIS
, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
1749 //===----------------------------------------------------------------------===//
1750 // ModuloScheduleTestAnnotater implementation
1751 //===----------------------------------------------------------------------===//
1753 void ModuloScheduleTestAnnotater::annotate() {
1754 for (MachineInstr
*MI
: S
.getInstructions()) {
1755 SmallVector
<char, 16> SV
;
1756 raw_svector_ostream
OS(SV
);
1757 OS
<< "Stage-" << S
.getStage(MI
) << "_Cycle-" << S
.getCycle(MI
);
1758 MCSymbol
*Sym
= MF
.getContext().getOrCreateSymbol(OS
.str());
1759 MI
->setPostInstrSymbol(MF
, Sym
);