1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/ModuloSchedule.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/Analysis/MemoryLocation.h"
12 #include "llvm/CodeGen/LiveIntervals.h"
13 #include "llvm/CodeGen/MachineInstrBuilder.h"
14 #include "llvm/CodeGen/MachineRegisterInfo.h"
15 #include "llvm/InitializePasses.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/raw_ostream.h"
21 #define DEBUG_TYPE "pipeliner"
24 void ModuloSchedule::print(raw_ostream
&OS
) {
25 for (MachineInstr
*MI
: ScheduledInstrs
)
26 OS
<< "[stage " << getStage(MI
) << " @" << getCycle(MI
) << "c] " << *MI
;
29 //===----------------------------------------------------------------------===//
30 // ModuloScheduleExpander implementation
31 //===----------------------------------------------------------------------===//
33 /// Return the register values for the operands of a Phi instruction.
34 /// This function assume the instruction is a Phi.
35 static void getPhiRegs(MachineInstr
&Phi
, MachineBasicBlock
*Loop
,
36 unsigned &InitVal
, unsigned &LoopVal
) {
37 assert(Phi
.isPHI() && "Expecting a Phi.");
41 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
42 if (Phi
.getOperand(i
+ 1).getMBB() != Loop
)
43 InitVal
= Phi
.getOperand(i
).getReg();
45 LoopVal
= Phi
.getOperand(i
).getReg();
47 assert(InitVal
!= 0 && LoopVal
!= 0 && "Unexpected Phi structure.");
50 /// Return the Phi register value that comes from the incoming block.
51 static unsigned getInitPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
52 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
53 if (Phi
.getOperand(i
+ 1).getMBB() != LoopBB
)
54 return Phi
.getOperand(i
).getReg();
58 /// Return the Phi register value that comes the loop block.
59 static unsigned getLoopPhiReg(MachineInstr
&Phi
, MachineBasicBlock
*LoopBB
) {
60 for (unsigned i
= 1, e
= Phi
.getNumOperands(); i
!= e
; i
+= 2)
61 if (Phi
.getOperand(i
+ 1).getMBB() == LoopBB
)
62 return Phi
.getOperand(i
).getReg();
66 void ModuloScheduleExpander::expand() {
67 BB
= Schedule
.getLoop()->getTopBlock();
68 Preheader
= *BB
->pred_begin();
70 Preheader
= *std::next(BB
->pred_begin());
72 // Iterate over the definitions in each instruction, and compute the
73 // stage difference for each use. Keep the maximum value.
74 for (MachineInstr
*MI
: Schedule
.getInstructions()) {
75 int DefStage
= Schedule
.getStage(MI
);
76 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
< e
; ++i
) {
77 MachineOperand
&Op
= MI
->getOperand(i
);
78 if (!Op
.isReg() || !Op
.isDef())
81 Register Reg
= Op
.getReg();
83 bool PhiIsSwapped
= false;
84 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(Reg
),
87 MachineOperand
&UseOp
= *UI
;
88 MachineInstr
*UseMI
= UseOp
.getParent();
89 int UseStage
= Schedule
.getStage(UseMI
);
91 if (UseStage
!= -1 && UseStage
>= DefStage
)
92 Diff
= UseStage
- DefStage
;
94 if (isLoopCarried(*MI
))
99 MaxDiff
= std::max(Diff
, MaxDiff
);
101 RegToStageDiff
[Reg
] = std::make_pair(MaxDiff
, PhiIsSwapped
);
105 generatePipelinedLoop();
108 void ModuloScheduleExpander::generatePipelinedLoop() {
109 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
110 assert(LoopInfo
&& "Must be able to analyze loop!");
112 // Create a new basic block for the kernel and add it to the CFG.
113 MachineBasicBlock
*KernelBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
115 unsigned MaxStageCount
= Schedule
.getNumStages() - 1;
117 // Remember the registers that are used in different stages. The index is
118 // the iteration, or stage, that the instruction is scheduled in. This is
119 // a map between register names in the original block and the names created
120 // in each stage of the pipelined loop.
121 ValueMapTy
*VRMap
= new ValueMapTy
[(MaxStageCount
+ 1) * 2];
124 SmallVector
<MachineBasicBlock
*, 4> PrologBBs
;
126 // Generate the prolog instructions that set up the pipeline.
127 generateProlog(MaxStageCount
, KernelBB
, VRMap
, PrologBBs
);
128 MF
.insert(BB
->getIterator(), KernelBB
);
130 // Rearrange the instructions to generate the new, pipelined loop,
131 // and update register names as needed.
132 for (MachineInstr
*CI
: Schedule
.getInstructions()) {
135 unsigned StageNum
= Schedule
.getStage(CI
);
136 MachineInstr
*NewMI
= cloneInstr(CI
, MaxStageCount
, StageNum
);
137 updateInstruction(NewMI
, false, MaxStageCount
, StageNum
, VRMap
);
138 KernelBB
->push_back(NewMI
);
139 InstrMap
[NewMI
] = CI
;
142 // Copy any terminator instructions to the new kernel, and update
144 for (MachineBasicBlock::iterator I
= BB
->getFirstTerminator(),
147 MachineInstr
*NewMI
= MF
.CloneMachineInstr(&*I
);
148 updateInstruction(NewMI
, false, MaxStageCount
, 0, VRMap
);
149 KernelBB
->push_back(NewMI
);
150 InstrMap
[NewMI
] = &*I
;
153 NewKernel
= KernelBB
;
154 KernelBB
->transferSuccessors(BB
);
155 KernelBB
->replaceSuccessor(BB
, KernelBB
);
157 generateExistingPhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
,
158 InstrMap
, MaxStageCount
, MaxStageCount
, false);
159 generatePhis(KernelBB
, PrologBBs
.back(), KernelBB
, KernelBB
, VRMap
, InstrMap
,
160 MaxStageCount
, MaxStageCount
, false);
162 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB
->dump(););
164 SmallVector
<MachineBasicBlock
*, 4> EpilogBBs
;
165 // Generate the epilog instructions to complete the pipeline.
166 generateEpilog(MaxStageCount
, KernelBB
, VRMap
, EpilogBBs
, PrologBBs
);
168 // We need this step because the register allocation doesn't handle some
169 // situations well, so we insert copies to help out.
170 splitLifetimes(KernelBB
, EpilogBBs
);
172 // Remove dead instructions due to loop induction variables.
173 removeDeadInstructions(KernelBB
, EpilogBBs
);
175 // Add branches between prolog and epilog blocks.
176 addBranches(*Preheader
, PrologBBs
, KernelBB
, EpilogBBs
, VRMap
);
181 void ModuloScheduleExpander::cleanup() {
182 // Remove the original loop since it's no longer referenced.
184 LIS
.RemoveMachineInstrFromMaps(I
);
186 BB
->eraseFromParent();
189 /// Generate the pipeline prolog code.
190 void ModuloScheduleExpander::generateProlog(unsigned LastStage
,
191 MachineBasicBlock
*KernelBB
,
193 MBBVectorTy
&PrologBBs
) {
194 MachineBasicBlock
*PredBB
= Preheader
;
197 // Generate a basic block for each stage, not including the last stage,
198 // which will be generated in the kernel. Each basic block may contain
199 // instructions from multiple stages/iterations.
200 for (unsigned i
= 0; i
< LastStage
; ++i
) {
201 // Create and insert the prolog basic block prior to the original loop
202 // basic block. The original loop is removed later.
203 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
204 PrologBBs
.push_back(NewBB
);
205 MF
.insert(BB
->getIterator(), NewBB
);
206 NewBB
->transferSuccessors(PredBB
);
207 PredBB
->addSuccessor(NewBB
);
210 // Generate instructions for each appropriate stage. Process instructions
211 // in original program order.
212 for (int StageNum
= i
; StageNum
>= 0; --StageNum
) {
213 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
214 BBE
= BB
->getFirstTerminator();
216 if (Schedule
.getStage(&*BBI
) == StageNum
) {
219 MachineInstr
*NewMI
=
220 cloneAndChangeInstr(&*BBI
, i
, (unsigned)StageNum
);
221 updateInstruction(NewMI
, false, i
, (unsigned)StageNum
, VRMap
);
222 NewBB
->push_back(NewMI
);
223 InstrMap
[NewMI
] = &*BBI
;
227 rewritePhiValues(NewBB
, i
, VRMap
, InstrMap
);
229 dbgs() << "prolog:\n";
234 PredBB
->replaceSuccessor(BB
, KernelBB
);
236 // Check if we need to remove the branch from the preheader to the original
237 // loop, and replace it with a branch to the new loop.
238 unsigned numBranches
= TII
->removeBranch(*Preheader
);
240 SmallVector
<MachineOperand
, 0> Cond
;
241 TII
->insertBranch(*Preheader
, PrologBBs
[0], nullptr, Cond
, DebugLoc());
245 /// Generate the pipeline epilog code. The epilog code finishes the iterations
246 /// that were started in either the prolog or the kernel. We create a basic
247 /// block for each stage that needs to complete.
248 void ModuloScheduleExpander::generateEpilog(unsigned LastStage
,
249 MachineBasicBlock
*KernelBB
,
251 MBBVectorTy
&EpilogBBs
,
252 MBBVectorTy
&PrologBBs
) {
253 // We need to change the branch from the kernel to the first epilog block, so
254 // this call to analyze branch uses the kernel rather than the original BB.
255 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
256 SmallVector
<MachineOperand
, 4> Cond
;
257 bool checkBranch
= TII
->analyzeBranch(*KernelBB
, TBB
, FBB
, Cond
);
258 assert(!checkBranch
&& "generateEpilog must be able to analyze the branch");
262 MachineBasicBlock::succ_iterator LoopExitI
= KernelBB
->succ_begin();
263 if (*LoopExitI
== KernelBB
)
265 assert(LoopExitI
!= KernelBB
->succ_end() && "Expecting a successor");
266 MachineBasicBlock
*LoopExitBB
= *LoopExitI
;
268 MachineBasicBlock
*PredBB
= KernelBB
;
269 MachineBasicBlock
*EpilogStart
= LoopExitBB
;
272 // Generate a basic block for each stage, not including the last stage,
273 // which was generated for the kernel. Each basic block may contain
274 // instructions from multiple stages/iterations.
275 int EpilogStage
= LastStage
+ 1;
276 for (unsigned i
= LastStage
; i
>= 1; --i
, ++EpilogStage
) {
277 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock();
278 EpilogBBs
.push_back(NewBB
);
279 MF
.insert(BB
->getIterator(), NewBB
);
281 PredBB
->replaceSuccessor(LoopExitBB
, NewBB
);
282 NewBB
->addSuccessor(LoopExitBB
);
284 if (EpilogStart
== LoopExitBB
)
287 // Add instructions to the epilog depending on the current block.
288 // Process instructions in original program order.
289 for (unsigned StageNum
= i
; StageNum
<= LastStage
; ++StageNum
) {
290 for (auto &BBI
: *BB
) {
293 MachineInstr
*In
= &BBI
;
294 if ((unsigned)Schedule
.getStage(In
) == StageNum
) {
295 // Instructions with memoperands in the epilog are updated with
296 // conservative values.
297 MachineInstr
*NewMI
= cloneInstr(In
, UINT_MAX
, 0);
298 updateInstruction(NewMI
, i
== 1, EpilogStage
, 0, VRMap
);
299 NewBB
->push_back(NewMI
);
300 InstrMap
[NewMI
] = In
;
304 generateExistingPhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
,
305 InstrMap
, LastStage
, EpilogStage
, i
== 1);
306 generatePhis(NewBB
, PrologBBs
[i
- 1], PredBB
, KernelBB
, VRMap
, InstrMap
,
307 LastStage
, EpilogStage
, i
== 1);
311 dbgs() << "epilog:\n";
316 // Fix any Phi nodes in the loop exit block.
317 LoopExitBB
->replacePhiUsesWith(BB
, PredBB
);
319 // Create a branch to the new epilog from the kernel.
320 // Remove the original branch and add a new branch to the epilog.
321 TII
->removeBranch(*KernelBB
);
322 TII
->insertBranch(*KernelBB
, KernelBB
, EpilogStart
, Cond
, DebugLoc());
323 // Add a branch to the loop exit.
324 if (EpilogBBs
.size() > 0) {
325 MachineBasicBlock
*LastEpilogBB
= EpilogBBs
.back();
326 SmallVector
<MachineOperand
, 4> Cond1
;
327 TII
->insertBranch(*LastEpilogBB
, LoopExitBB
, nullptr, Cond1
, DebugLoc());
331 /// Replace all uses of FromReg that appear outside the specified
332 /// basic block with ToReg.
333 static void replaceRegUsesAfterLoop(unsigned FromReg
, unsigned ToReg
,
334 MachineBasicBlock
*MBB
,
335 MachineRegisterInfo
&MRI
,
336 LiveIntervals
&LIS
) {
337 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(FromReg
),
340 MachineOperand
&O
= *I
;
342 if (O
.getParent()->getParent() != MBB
)
345 if (!LIS
.hasInterval(ToReg
))
346 LIS
.createEmptyInterval(ToReg
);
349 /// Return true if the register has a use that occurs outside the
351 static bool hasUseAfterLoop(unsigned Reg
, MachineBasicBlock
*BB
,
352 MachineRegisterInfo
&MRI
) {
353 for (MachineRegisterInfo::use_iterator I
= MRI
.use_begin(Reg
),
356 if (I
->getParent()->getParent() != BB
)
361 /// Generate Phis for the specific block in the generated pipelined code.
362 /// This function looks at the Phis from the original code to guide the
363 /// creation of new Phis.
364 void ModuloScheduleExpander::generateExistingPhis(
365 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
366 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
367 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
) {
368 // Compute the stage number for the initial value of the Phi, which
369 // comes from the prolog. The prolog to use depends on to which kernel/
370 // epilog that we're adding the Phi.
371 unsigned PrologStage
= 0;
372 unsigned PrevStage
= 0;
373 bool InKernel
= (LastStageNum
== CurStageNum
);
375 PrologStage
= LastStageNum
- 1;
376 PrevStage
= CurStageNum
;
378 PrologStage
= LastStageNum
- (CurStageNum
- LastStageNum
);
379 PrevStage
= LastStageNum
+ (CurStageNum
- LastStageNum
) - 1;
382 for (MachineBasicBlock::iterator BBI
= BB
->instr_begin(),
383 BBE
= BB
->getFirstNonPHI();
385 Register Def
= BBI
->getOperand(0).getReg();
387 unsigned InitVal
= 0;
388 unsigned LoopVal
= 0;
389 getPhiRegs(*BBI
, BB
, InitVal
, LoopVal
);
392 // The Phi value from the loop body typically is defined in the loop, but
393 // not always. So, we need to check if the value is defined in the loop.
394 unsigned PhiOp2
= LoopVal
;
395 if (VRMap
[LastStageNum
].count(LoopVal
))
396 PhiOp2
= VRMap
[LastStageNum
][LoopVal
];
398 int StageScheduled
= Schedule
.getStage(&*BBI
);
399 int LoopValStage
= Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
400 unsigned NumStages
= getStagesForReg(Def
, CurStageNum
);
401 if (NumStages
== 0) {
402 // We don't need to generate a Phi anymore, but we need to rename any uses
404 unsigned NewReg
= VRMap
[PrevStage
][LoopVal
];
405 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, 0, &*BBI
, Def
,
407 if (VRMap
[CurStageNum
].count(LoopVal
))
408 VRMap
[CurStageNum
][Def
] = VRMap
[CurStageNum
][LoopVal
];
410 // Adjust the number of Phis needed depending on the number of prologs left,
411 // and the distance from where the Phi is first scheduled. The number of
412 // Phis cannot exceed the number of prolog stages. Each stage can
413 // potentially define two values.
414 unsigned MaxPhis
= PrologStage
+ 2;
415 if (!InKernel
&& (int)PrologStage
<= LoopValStage
)
416 MaxPhis
= std::max((int)MaxPhis
- (int)LoopValStage
, 1);
417 unsigned NumPhis
= std::min(NumStages
, MaxPhis
);
420 unsigned AccessStage
= (LoopValStage
!= -1) ? LoopValStage
: StageScheduled
;
421 // In the epilog, we may need to look back one stage to get the correct
422 // Phi name, because the epilog and prolog blocks execute the same stage.
423 // The correct name is from the previous block only when the Phi has
424 // been completely scheduled prior to the epilog, and Phi value is not
425 // needed in multiple stages.
427 if (!InKernel
&& StageScheduled
>= LoopValStage
&& AccessStage
== 0 &&
430 // Adjust the computations below when the phi and the loop definition
431 // are scheduled in different stages.
432 if (InKernel
&& LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
433 StageDiff
= StageScheduled
- LoopValStage
;
434 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
435 // If the Phi hasn't been scheduled, then use the initial Phi operand
436 // value. Otherwise, use the scheduled version of the instruction. This
437 // is a little complicated when a Phi references another Phi.
438 if (np
> PrologStage
|| StageScheduled
>= (int)LastStageNum
)
440 // Check if the Phi has already been scheduled in a prolog stage.
441 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
&&
442 VRMap
[PrologStage
- StageDiff
- np
].count(LoopVal
) != 0)
443 PhiOp1
= VRMap
[PrologStage
- StageDiff
- np
][LoopVal
];
444 // Check if the Phi has already been scheduled, but the loop instruction
445 // is either another Phi, or doesn't occur in the loop.
446 else if (PrologStage
>= AccessStage
+ StageDiff
+ np
) {
447 // If the Phi references another Phi, we need to examine the other
448 // Phi to get the correct value.
450 MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
);
452 while (InstOp1
&& InstOp1
->isPHI() && InstOp1
->getParent() == BB
) {
453 int PhiStage
= Schedule
.getStage(InstOp1
);
454 if ((int)(PrologStage
- StageDiff
- np
) < PhiStage
+ Indirects
)
455 PhiOp1
= getInitPhiReg(*InstOp1
, BB
);
457 PhiOp1
= getLoopPhiReg(*InstOp1
, BB
);
458 InstOp1
= MRI
.getVRegDef(PhiOp1
);
459 int PhiOpStage
= Schedule
.getStage(InstOp1
);
460 int StageAdj
= (PhiOpStage
!= -1 ? PhiStage
- PhiOpStage
: 0);
461 if (PhiOpStage
!= -1 && PrologStage
- StageAdj
>= Indirects
+ np
&&
462 VRMap
[PrologStage
- StageAdj
- Indirects
- np
].count(PhiOp1
)) {
463 PhiOp1
= VRMap
[PrologStage
- StageAdj
- Indirects
- np
][PhiOp1
];
470 // If this references a generated Phi in the kernel, get the Phi operand
471 // from the incoming block.
472 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
))
473 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
474 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
476 MachineInstr
*PhiInst
= MRI
.getVRegDef(LoopVal
);
477 bool LoopDefIsPhi
= PhiInst
&& PhiInst
->isPHI();
478 // In the epilog, a map lookup is needed to get the value from the kernel,
479 // or previous epilog block. How is does this depends on if the
480 // instruction is scheduled in the previous block.
482 int StageDiffAdj
= 0;
483 if (LoopValStage
!= -1 && StageScheduled
> LoopValStage
)
484 StageDiffAdj
= StageScheduled
- LoopValStage
;
485 // Use the loop value defined in the kernel, unless the kernel
486 // contains the last definition of the Phi.
487 if (np
== 0 && PrevStage
== LastStageNum
&&
488 (StageScheduled
!= 0 || LoopValStage
!= 0) &&
489 VRMap
[PrevStage
- StageDiffAdj
].count(LoopVal
))
490 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
][LoopVal
];
491 // Use the value defined by the Phi. We add one because we switch
492 // from looking at the loop value to the Phi definition.
493 else if (np
> 0 && PrevStage
== LastStageNum
&&
494 VRMap
[PrevStage
- np
+ 1].count(Def
))
495 PhiOp2
= VRMap
[PrevStage
- np
+ 1][Def
];
496 // Use the loop value defined in the kernel.
497 else if (static_cast<unsigned>(LoopValStage
) > PrologStage
+ 1 &&
498 VRMap
[PrevStage
- StageDiffAdj
- np
].count(LoopVal
))
499 PhiOp2
= VRMap
[PrevStage
- StageDiffAdj
- np
][LoopVal
];
500 // Use the value defined by the Phi, unless we're generating the first
501 // epilog and the Phi refers to a Phi in a different stage.
502 else if (VRMap
[PrevStage
- np
].count(Def
) &&
503 (!LoopDefIsPhi
|| (PrevStage
!= LastStageNum
) ||
504 (LoopValStage
== StageScheduled
)))
505 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
508 // Check if we can reuse an existing Phi. This occurs when a Phi
509 // references another Phi, and the other Phi is scheduled in an
510 // earlier stage. We can try to reuse an existing Phi up until the last
511 // stage of the current Phi.
513 if (static_cast<int>(PrologStage
- np
) >= StageScheduled
) {
514 int LVNumStages
= getStagesForPhi(LoopVal
);
515 int StageDiff
= (StageScheduled
- LoopValStage
);
516 LVNumStages
-= StageDiff
;
517 // Make sure the loop value Phi has been processed already.
518 if (LVNumStages
> (int)np
&& VRMap
[CurStageNum
].count(LoopVal
)) {
520 unsigned ReuseStage
= CurStageNum
;
521 if (isLoopCarried(*PhiInst
))
522 ReuseStage
-= LVNumStages
;
523 // Check if the Phi to reuse has been generated yet. If not, then
524 // there is nothing to reuse.
525 if (VRMap
[ReuseStage
- np
].count(LoopVal
)) {
526 NewReg
= VRMap
[ReuseStage
- np
][LoopVal
];
528 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
,
530 // Update the map with the new Phi name.
531 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
533 if (VRMap
[LastStageNum
- np
- 1].count(LoopVal
))
534 PhiOp2
= VRMap
[LastStageNum
- np
- 1][LoopVal
];
536 if (IsLast
&& np
== NumPhis
- 1)
537 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
542 if (InKernel
&& StageDiff
> 0 &&
543 VRMap
[CurStageNum
- StageDiff
- np
].count(LoopVal
))
544 PhiOp2
= VRMap
[CurStageNum
- StageDiff
- np
][LoopVal
];
547 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
548 NewReg
= MRI
.createVirtualRegister(RC
);
550 MachineInstrBuilder NewPhi
=
551 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
552 TII
->get(TargetOpcode::PHI
), NewReg
);
553 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
554 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
556 InstrMap
[NewPhi
] = &*BBI
;
558 // We define the Phis after creating the new pipelined code, so
559 // we need to rename the Phi values in scheduled instructions.
561 unsigned PrevReg
= 0;
562 if (InKernel
&& VRMap
[PrevStage
- np
].count(LoopVal
))
563 PrevReg
= VRMap
[PrevStage
- np
][LoopVal
];
564 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
566 // If the Phi has been scheduled, use the new name for rewriting.
567 if (VRMap
[CurStageNum
- np
].count(Def
)) {
568 unsigned R
= VRMap
[CurStageNum
- np
][Def
];
569 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, R
,
573 // Check if we need to rename any uses that occurs after the loop. The
574 // register to replace depends on whether the Phi is scheduled in the
576 if (IsLast
&& np
== NumPhis
- 1)
577 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
579 // In the kernel, a dependent Phi uses the value from this Phi.
583 // Update the map with the new Phi name.
584 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
587 while (NumPhis
++ < NumStages
) {
588 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, NumPhis
, &*BBI
, Def
,
592 // Check if we need to rename a Phi that has been eliminated due to
594 if (NumStages
== 0 && IsLast
&& VRMap
[CurStageNum
].count(LoopVal
))
595 replaceRegUsesAfterLoop(Def
, VRMap
[CurStageNum
][LoopVal
], BB
, MRI
, LIS
);
599 /// Generate Phis for the specified block in the generated pipelined code.
600 /// These are new Phis needed because the definition is scheduled after the
601 /// use in the pipelined sequence.
602 void ModuloScheduleExpander::generatePhis(
603 MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
, MachineBasicBlock
*BB2
,
604 MachineBasicBlock
*KernelBB
, ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
605 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
) {
606 // Compute the stage number that contains the initial Phi value, and
607 // the Phi from the previous stage.
608 unsigned PrologStage
= 0;
609 unsigned PrevStage
= 0;
610 unsigned StageDiff
= CurStageNum
- LastStageNum
;
611 bool InKernel
= (StageDiff
== 0);
613 PrologStage
= LastStageNum
- 1;
614 PrevStage
= CurStageNum
;
616 PrologStage
= LastStageNum
- StageDiff
;
617 PrevStage
= LastStageNum
+ StageDiff
- 1;
620 for (MachineBasicBlock::iterator BBI
= BB
->getFirstNonPHI(),
621 BBE
= BB
->instr_end();
623 for (unsigned i
= 0, e
= BBI
->getNumOperands(); i
!= e
; ++i
) {
624 MachineOperand
&MO
= BBI
->getOperand(i
);
625 if (!MO
.isReg() || !MO
.isDef() ||
626 !Register::isVirtualRegister(MO
.getReg()))
629 int StageScheduled
= Schedule
.getStage(&*BBI
);
630 assert(StageScheduled
!= -1 && "Expecting scheduled instruction.");
631 Register Def
= MO
.getReg();
632 unsigned NumPhis
= getStagesForReg(Def
, CurStageNum
);
633 // An instruction scheduled in stage 0 and is used after the loop
634 // requires a phi in the epilog for the last definition from either
635 // the kernel or prolog.
636 if (!InKernel
&& NumPhis
== 0 && StageScheduled
== 0 &&
637 hasUseAfterLoop(Def
, BB
, MRI
))
639 if (!InKernel
&& (unsigned)StageScheduled
> PrologStage
)
642 unsigned PhiOp2
= VRMap
[PrevStage
][Def
];
643 if (MachineInstr
*InstOp2
= MRI
.getVRegDef(PhiOp2
))
644 if (InstOp2
->isPHI() && InstOp2
->getParent() == NewBB
)
645 PhiOp2
= getLoopPhiReg(*InstOp2
, BB2
);
646 // The number of Phis can't exceed the number of prolog stages. The
647 // prolog stage number is zero based.
648 if (NumPhis
> PrologStage
+ 1 - StageScheduled
)
649 NumPhis
= PrologStage
+ 1 - StageScheduled
;
650 for (unsigned np
= 0; np
< NumPhis
; ++np
) {
651 unsigned PhiOp1
= VRMap
[PrologStage
][Def
];
652 if (np
<= PrologStage
)
653 PhiOp1
= VRMap
[PrologStage
- np
][Def
];
654 if (MachineInstr
*InstOp1
= MRI
.getVRegDef(PhiOp1
)) {
655 if (InstOp1
->isPHI() && InstOp1
->getParent() == KernelBB
)
656 PhiOp1
= getInitPhiReg(*InstOp1
, KernelBB
);
657 if (InstOp1
->isPHI() && InstOp1
->getParent() == NewBB
)
658 PhiOp1
= getInitPhiReg(*InstOp1
, NewBB
);
661 PhiOp2
= VRMap
[PrevStage
- np
][Def
];
663 const TargetRegisterClass
*RC
= MRI
.getRegClass(Def
);
664 Register NewReg
= MRI
.createVirtualRegister(RC
);
666 MachineInstrBuilder NewPhi
=
667 BuildMI(*NewBB
, NewBB
->getFirstNonPHI(), DebugLoc(),
668 TII
->get(TargetOpcode::PHI
), NewReg
);
669 NewPhi
.addReg(PhiOp1
).addMBB(BB1
);
670 NewPhi
.addReg(PhiOp2
).addMBB(BB2
);
672 InstrMap
[NewPhi
] = &*BBI
;
674 // Rewrite uses and update the map. The actions depend upon whether
675 // we generating code for the kernel or epilog blocks.
677 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp1
,
679 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, PhiOp2
,
683 VRMap
[PrevStage
- np
- 1][Def
] = NewReg
;
685 VRMap
[CurStageNum
- np
][Def
] = NewReg
;
686 if (np
== NumPhis
- 1)
687 rewriteScheduledInstr(NewBB
, InstrMap
, CurStageNum
, np
, &*BBI
, Def
,
690 if (IsLast
&& np
== NumPhis
- 1)
691 replaceRegUsesAfterLoop(Def
, NewReg
, BB
, MRI
, LIS
);
697 /// Remove instructions that generate values with no uses.
698 /// Typically, these are induction variable operations that generate values
699 /// used in the loop itself. A dead instruction has a definition with
700 /// no uses, or uses that occur in the original loop only.
701 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock
*KernelBB
,
702 MBBVectorTy
&EpilogBBs
) {
703 // For each epilog block, check that the value defined by each instruction
704 // is used. If not, delete it.
705 for (MBBVectorTy::reverse_iterator MBB
= EpilogBBs
.rbegin(),
706 MBE
= EpilogBBs
.rend();
708 for (MachineBasicBlock::reverse_instr_iterator MI
= (*MBB
)->instr_rbegin(),
709 ME
= (*MBB
)->instr_rend();
711 // From DeadMachineInstructionElem. Don't delete inline assembly.
712 if (MI
->isInlineAsm()) {
716 bool SawStore
= false;
717 // Check if it's safe to remove the instruction due to side effects.
718 // We can, and want to, remove Phis here.
719 if (!MI
->isSafeToMove(nullptr, SawStore
) && !MI
->isPHI()) {
724 for (MachineInstr::mop_iterator MOI
= MI
->operands_begin(),
725 MOE
= MI
->operands_end();
727 if (!MOI
->isReg() || !MOI
->isDef())
729 Register reg
= MOI
->getReg();
730 // Assume physical registers are used, unless they are marked dead.
731 if (Register::isPhysicalRegister(reg
)) {
732 used
= !MOI
->isDead();
737 unsigned realUses
= 0;
738 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(reg
),
741 // Check if there are any uses that occur only in the original
742 // loop. If so, that's not a real use.
743 if (UI
->getParent()->getParent() != BB
) {
754 LIS
.RemoveMachineInstrFromMaps(*MI
);
755 MI
++->eraseFromParent();
760 // In the kernel block, check if we can remove a Phi that generates a value
761 // used in an instruction removed in the epilog block.
762 for (MachineBasicBlock::iterator BBI
= KernelBB
->instr_begin(),
763 BBE
= KernelBB
->getFirstNonPHI();
765 MachineInstr
*MI
= &*BBI
;
767 Register reg
= MI
->getOperand(0).getReg();
768 if (MRI
.use_begin(reg
) == MRI
.use_end()) {
769 LIS
.RemoveMachineInstrFromMaps(*MI
);
770 MI
->eraseFromParent();
775 /// For loop carried definitions, we split the lifetime of a virtual register
776 /// that has uses past the definition in the next iteration. A copy with a new
777 /// virtual register is inserted before the definition, which helps with
778 /// generating a better register assignment.
780 /// v1 = phi(a, v2) v1 = phi(a, v2)
781 /// v2 = phi(b, v3) v2 = phi(b, v3)
782 /// v3 = .. v4 = copy v1
785 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock
*KernelBB
,
786 MBBVectorTy
&EpilogBBs
) {
787 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
788 for (auto &PHI
: KernelBB
->phis()) {
789 Register Def
= PHI
.getOperand(0).getReg();
790 // Check for any Phi definition that used as an operand of another Phi
791 // in the same block.
792 for (MachineRegisterInfo::use_instr_iterator I
= MRI
.use_instr_begin(Def
),
793 E
= MRI
.use_instr_end();
795 if (I
->isPHI() && I
->getParent() == KernelBB
) {
796 // Get the loop carried definition.
797 unsigned LCDef
= getLoopPhiReg(PHI
, KernelBB
);
800 MachineInstr
*MI
= MRI
.getVRegDef(LCDef
);
801 if (!MI
|| MI
->getParent() != KernelBB
|| MI
->isPHI())
803 // Search through the rest of the block looking for uses of the Phi
804 // definition. If one occurs, then split the lifetime.
805 unsigned SplitReg
= 0;
806 for (auto &BBJ
: make_range(MachineBasicBlock::instr_iterator(MI
),
807 KernelBB
->instr_end()))
808 if (BBJ
.readsRegister(Def
)) {
809 // We split the lifetime when we find the first use.
811 SplitReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Def
));
812 BuildMI(*KernelBB
, MI
, MI
->getDebugLoc(),
813 TII
->get(TargetOpcode::COPY
), SplitReg
)
816 BBJ
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
820 // Search through each of the epilog blocks for any uses to be renamed.
821 for (auto &Epilog
: EpilogBBs
)
822 for (auto &I
: *Epilog
)
823 if (I
.readsRegister(Def
))
824 I
.substituteRegister(Def
, SplitReg
, 0, *TRI
);
831 /// Remove the incoming block from the Phis in a basic block.
832 static void removePhis(MachineBasicBlock
*BB
, MachineBasicBlock
*Incoming
) {
833 for (MachineInstr
&MI
: *BB
) {
836 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; i
+= 2)
837 if (MI
.getOperand(i
+ 1).getMBB() == Incoming
) {
838 MI
.RemoveOperand(i
+ 1);
845 /// Create branches from each prolog basic block to the appropriate epilog
846 /// block. These edges are needed if the loop ends before reaching the
848 void ModuloScheduleExpander::addBranches(MachineBasicBlock
&PreheaderBB
,
849 MBBVectorTy
&PrologBBs
,
850 MachineBasicBlock
*KernelBB
,
851 MBBVectorTy
&EpilogBBs
,
853 assert(PrologBBs
.size() == EpilogBBs
.size() && "Prolog/Epilog mismatch");
854 MachineBasicBlock
*LastPro
= KernelBB
;
855 MachineBasicBlock
*LastEpi
= KernelBB
;
857 // Start from the blocks connected to the kernel and work "out"
858 // to the first prolog and the last epilog blocks.
859 SmallVector
<MachineInstr
*, 4> PrevInsts
;
860 unsigned MaxIter
= PrologBBs
.size() - 1;
861 for (unsigned i
= 0, j
= MaxIter
; i
<= MaxIter
; ++i
, --j
) {
862 // Add branches to the prolog that go to the corresponding
863 // epilog, and the fall-thru prolog/kernel block.
864 MachineBasicBlock
*Prolog
= PrologBBs
[j
];
865 MachineBasicBlock
*Epilog
= EpilogBBs
[i
];
867 SmallVector
<MachineOperand
, 4> Cond
;
868 Optional
<bool> StaticallyGreater
=
869 LoopInfo
->createTripCountGreaterCondition(j
+ 1, *Prolog
, Cond
);
870 unsigned numAdded
= 0;
871 if (!StaticallyGreater
.hasValue()) {
872 Prolog
->addSuccessor(Epilog
);
873 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, LastPro
, Cond
, DebugLoc());
874 } else if (*StaticallyGreater
== false) {
875 Prolog
->addSuccessor(Epilog
);
876 Prolog
->removeSuccessor(LastPro
);
877 LastEpi
->removeSuccessor(Epilog
);
878 numAdded
= TII
->insertBranch(*Prolog
, Epilog
, nullptr, Cond
, DebugLoc());
879 removePhis(Epilog
, LastEpi
);
880 // Remove the blocks that are no longer referenced.
881 if (LastPro
!= LastEpi
) {
883 LastEpi
->eraseFromParent();
885 if (LastPro
== KernelBB
) {
886 LoopInfo
->disposed();
890 LastPro
->eraseFromParent();
892 numAdded
= TII
->insertBranch(*Prolog
, LastPro
, nullptr, Cond
, DebugLoc());
893 removePhis(Epilog
, Prolog
);
897 for (MachineBasicBlock::reverse_instr_iterator I
= Prolog
->instr_rbegin(),
898 E
= Prolog
->instr_rend();
899 I
!= E
&& numAdded
> 0; ++I
, --numAdded
)
900 updateInstruction(&*I
, false, j
, 0, VRMap
);
904 LoopInfo
->setPreheader(PrologBBs
[MaxIter
]);
905 LoopInfo
->adjustTripCount(-(MaxIter
+ 1));
909 /// Return true if we can compute the amount the instruction changes
910 /// during each iteration. Set Delta to the amount of the change.
911 bool ModuloScheduleExpander::computeDelta(MachineInstr
&MI
, unsigned &Delta
) {
912 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
913 const MachineOperand
*BaseOp
;
915 bool OffsetIsScalable
;
916 if (!TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, OffsetIsScalable
, TRI
))
919 // FIXME: This algorithm assumes instructions have fixed-size offsets.
920 if (OffsetIsScalable
)
923 if (!BaseOp
->isReg())
926 Register BaseReg
= BaseOp
->getReg();
928 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
929 // Check if there is a Phi. If so, get the definition in the loop.
930 MachineInstr
*BaseDef
= MRI
.getVRegDef(BaseReg
);
931 if (BaseDef
&& BaseDef
->isPHI()) {
932 BaseReg
= getLoopPhiReg(*BaseDef
, MI
.getParent());
933 BaseDef
= MRI
.getVRegDef(BaseReg
);
939 if (!TII
->getIncrementValue(*BaseDef
, D
) && D
>= 0)
946 /// Update the memory operand with a new offset when the pipeliner
947 /// generates a new copy of the instruction that refers to a
948 /// different memory location.
949 void ModuloScheduleExpander::updateMemOperands(MachineInstr
&NewMI
,
954 // If the instruction has memory operands, then adjust the offset
955 // when the instruction appears in different stages.
956 if (NewMI
.memoperands_empty())
958 SmallVector
<MachineMemOperand
*, 2> NewMMOs
;
959 for (MachineMemOperand
*MMO
: NewMI
.memoperands()) {
960 // TODO: Figure out whether isAtomic is really necessary (see D57601).
961 if (MMO
->isVolatile() || MMO
->isAtomic() ||
962 (MMO
->isInvariant() && MMO
->isDereferenceable()) ||
963 (!MMO
->getValue())) {
964 NewMMOs
.push_back(MMO
);
968 if (Num
!= UINT_MAX
&& computeDelta(OldMI
, Delta
)) {
969 int64_t AdjOffset
= Delta
* Num
;
971 MF
.getMachineMemOperand(MMO
, AdjOffset
, MMO
->getSize()));
974 MF
.getMachineMemOperand(MMO
, 0, MemoryLocation::UnknownSize
));
977 NewMI
.setMemRefs(MF
, NewMMOs
);
980 /// Clone the instruction for the new pipelined loop and update the
981 /// memory operands, if needed.
982 MachineInstr
*ModuloScheduleExpander::cloneInstr(MachineInstr
*OldMI
,
983 unsigned CurStageNum
,
984 unsigned InstStageNum
) {
985 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
986 // Check for tied operands in inline asm instructions. This should be handled
987 // elsewhere, but I'm not sure of the best solution.
988 if (OldMI
->isInlineAsm())
989 for (unsigned i
= 0, e
= OldMI
->getNumOperands(); i
!= e
; ++i
) {
990 const auto &MO
= OldMI
->getOperand(i
);
991 if (MO
.isReg() && MO
.isUse())
994 if (OldMI
->isRegTiedToUseOperand(i
, &UseIdx
))
995 NewMI
->tieOperands(i
, UseIdx
);
997 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
1001 /// Clone the instruction for the new pipelined loop. If needed, this
1002 /// function updates the instruction using the values saved in the
1003 /// InstrChanges structure.
1004 MachineInstr
*ModuloScheduleExpander::cloneAndChangeInstr(
1005 MachineInstr
*OldMI
, unsigned CurStageNum
, unsigned InstStageNum
) {
1006 MachineInstr
*NewMI
= MF
.CloneMachineInstr(OldMI
);
1007 auto It
= InstrChanges
.find(OldMI
);
1008 if (It
!= InstrChanges
.end()) {
1009 std::pair
<unsigned, int64_t> RegAndOffset
= It
->second
;
1010 unsigned BasePos
, OffsetPos
;
1011 if (!TII
->getBaseAndOffsetPosition(*OldMI
, BasePos
, OffsetPos
))
1013 int64_t NewOffset
= OldMI
->getOperand(OffsetPos
).getImm();
1014 MachineInstr
*LoopDef
= findDefInLoop(RegAndOffset
.first
);
1015 if (Schedule
.getStage(LoopDef
) > (signed)InstStageNum
)
1016 NewOffset
+= RegAndOffset
.second
* (CurStageNum
- InstStageNum
);
1017 NewMI
->getOperand(OffsetPos
).setImm(NewOffset
);
1019 updateMemOperands(*NewMI
, *OldMI
, CurStageNum
- InstStageNum
);
1023 /// Update the machine instruction with new virtual registers. This
1024 /// function may change the defintions and/or uses.
1025 void ModuloScheduleExpander::updateInstruction(MachineInstr
*NewMI
,
1027 unsigned CurStageNum
,
1028 unsigned InstrStageNum
,
1029 ValueMapTy
*VRMap
) {
1030 for (unsigned i
= 0, e
= NewMI
->getNumOperands(); i
!= e
; ++i
) {
1031 MachineOperand
&MO
= NewMI
->getOperand(i
);
1032 if (!MO
.isReg() || !Register::isVirtualRegister(MO
.getReg()))
1034 Register reg
= MO
.getReg();
1036 // Create a new virtual register for the definition.
1037 const TargetRegisterClass
*RC
= MRI
.getRegClass(reg
);
1038 Register NewReg
= MRI
.createVirtualRegister(RC
);
1040 VRMap
[CurStageNum
][reg
] = NewReg
;
1042 replaceRegUsesAfterLoop(reg
, NewReg
, BB
, MRI
, LIS
);
1043 } else if (MO
.isUse()) {
1044 MachineInstr
*Def
= MRI
.getVRegDef(reg
);
1045 // Compute the stage that contains the last definition for instruction.
1046 int DefStageNum
= Schedule
.getStage(Def
);
1047 unsigned StageNum
= CurStageNum
;
1048 if (DefStageNum
!= -1 && (int)InstrStageNum
> DefStageNum
) {
1049 // Compute the difference in stages between the defintion and the use.
1050 unsigned StageDiff
= (InstrStageNum
- DefStageNum
);
1051 // Make an adjustment to get the last definition.
1052 StageNum
-= StageDiff
;
1054 if (VRMap
[StageNum
].count(reg
))
1055 MO
.setReg(VRMap
[StageNum
][reg
]);
1060 /// Return the instruction in the loop that defines the register.
1061 /// If the definition is a Phi, then follow the Phi operand to
1062 /// the instruction in the loop.
1063 MachineInstr
*ModuloScheduleExpander::findDefInLoop(unsigned Reg
) {
1064 SmallPtrSet
<MachineInstr
*, 8> Visited
;
1065 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
1066 while (Def
->isPHI()) {
1067 if (!Visited
.insert(Def
).second
)
1069 for (unsigned i
= 1, e
= Def
->getNumOperands(); i
< e
; i
+= 2)
1070 if (Def
->getOperand(i
+ 1).getMBB() == BB
) {
1071 Def
= MRI
.getVRegDef(Def
->getOperand(i
).getReg());
1078 /// Return the new name for the value from the previous stage.
1079 unsigned ModuloScheduleExpander::getPrevMapVal(
1080 unsigned StageNum
, unsigned PhiStage
, unsigned LoopVal
, unsigned LoopStage
,
1081 ValueMapTy
*VRMap
, MachineBasicBlock
*BB
) {
1082 unsigned PrevVal
= 0;
1083 if (StageNum
> PhiStage
) {
1084 MachineInstr
*LoopInst
= MRI
.getVRegDef(LoopVal
);
1085 if (PhiStage
== LoopStage
&& VRMap
[StageNum
- 1].count(LoopVal
))
1086 // The name is defined in the previous stage.
1087 PrevVal
= VRMap
[StageNum
- 1][LoopVal
];
1088 else if (VRMap
[StageNum
].count(LoopVal
))
1089 // The previous name is defined in the current stage when the instruction
1090 // order is swapped.
1091 PrevVal
= VRMap
[StageNum
][LoopVal
];
1092 else if (!LoopInst
->isPHI() || LoopInst
->getParent() != BB
)
1093 // The loop value hasn't yet been scheduled.
1095 else if (StageNum
== PhiStage
+ 1)
1096 // The loop value is another phi, which has not been scheduled.
1097 PrevVal
= getInitPhiReg(*LoopInst
, BB
);
1098 else if (StageNum
> PhiStage
+ 1 && LoopInst
->getParent() == BB
)
1099 // The loop value is another phi, which has been scheduled.
1101 getPrevMapVal(StageNum
- 1, PhiStage
, getLoopPhiReg(*LoopInst
, BB
),
1102 LoopStage
, VRMap
, BB
);
1107 /// Rewrite the Phi values in the specified block to use the mappings
1108 /// from the initial operand. Once the Phi is scheduled, we switch
1109 /// to using the loop value instead of the Phi value, so those names
1110 /// do not need to be rewritten.
1111 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock
*NewBB
,
1114 InstrMapTy
&InstrMap
) {
1115 for (auto &PHI
: BB
->phis()) {
1116 unsigned InitVal
= 0;
1117 unsigned LoopVal
= 0;
1118 getPhiRegs(PHI
, BB
, InitVal
, LoopVal
);
1119 Register PhiDef
= PHI
.getOperand(0).getReg();
1121 unsigned PhiStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(PhiDef
));
1122 unsigned LoopStage
= (unsigned)Schedule
.getStage(MRI
.getVRegDef(LoopVal
));
1123 unsigned NumPhis
= getStagesForPhi(PhiDef
);
1124 if (NumPhis
> StageNum
)
1126 for (unsigned np
= 0; np
<= NumPhis
; ++np
) {
1128 getPrevMapVal(StageNum
- np
, PhiStage
, LoopVal
, LoopStage
, VRMap
, BB
);
1131 rewriteScheduledInstr(NewBB
, InstrMap
, StageNum
- np
, np
, &PHI
, PhiDef
,
1137 /// Rewrite a previously scheduled instruction to use the register value
1138 /// from the new instruction. Make sure the instruction occurs in the
1139 /// basic block, and we don't change the uses in the new instruction.
1140 void ModuloScheduleExpander::rewriteScheduledInstr(
1141 MachineBasicBlock
*BB
, InstrMapTy
&InstrMap
, unsigned CurStageNum
,
1142 unsigned PhiNum
, MachineInstr
*Phi
, unsigned OldReg
, unsigned NewReg
,
1144 bool InProlog
= (CurStageNum
< (unsigned)Schedule
.getNumStages() - 1);
1145 int StagePhi
= Schedule
.getStage(Phi
) + PhiNum
;
1146 // Rewrite uses that have been scheduled already to use the new
1148 for (MachineRegisterInfo::use_iterator UI
= MRI
.use_begin(OldReg
),
1151 MachineOperand
&UseOp
= *UI
;
1152 MachineInstr
*UseMI
= UseOp
.getParent();
1154 if (UseMI
->getParent() != BB
)
1156 if (UseMI
->isPHI()) {
1157 if (!Phi
->isPHI() && UseMI
->getOperand(0).getReg() == NewReg
)
1159 if (getLoopPhiReg(*UseMI
, BB
) != OldReg
)
1162 InstrMapTy::iterator OrigInstr
= InstrMap
.find(UseMI
);
1163 assert(OrigInstr
!= InstrMap
.end() && "Instruction not scheduled.");
1164 MachineInstr
*OrigMI
= OrigInstr
->second
;
1165 int StageSched
= Schedule
.getStage(OrigMI
);
1166 int CycleSched
= Schedule
.getCycle(OrigMI
);
1167 unsigned ReplaceReg
= 0;
1168 // This is the stage for the scheduled instruction.
1169 if (StagePhi
== StageSched
&& Phi
->isPHI()) {
1170 int CyclePhi
= Schedule
.getCycle(Phi
);
1171 if (PrevReg
&& InProlog
)
1172 ReplaceReg
= PrevReg
;
1173 else if (PrevReg
&& !isLoopCarried(*Phi
) &&
1174 (CyclePhi
<= CycleSched
|| OrigMI
->isPHI()))
1175 ReplaceReg
= PrevReg
;
1177 ReplaceReg
= NewReg
;
1179 // The scheduled instruction occurs before the scheduled Phi, and the
1180 // Phi is not loop carried.
1181 if (!InProlog
&& StagePhi
+ 1 == StageSched
&& !isLoopCarried(*Phi
))
1182 ReplaceReg
= NewReg
;
1183 if (StagePhi
> StageSched
&& Phi
->isPHI())
1184 ReplaceReg
= NewReg
;
1185 if (!InProlog
&& !Phi
->isPHI() && StagePhi
< StageSched
)
1186 ReplaceReg
= NewReg
;
1188 MRI
.constrainRegClass(ReplaceReg
, MRI
.getRegClass(OldReg
));
1189 UseOp
.setReg(ReplaceReg
);
1194 bool ModuloScheduleExpander::isLoopCarried(MachineInstr
&Phi
) {
1197 int DefCycle
= Schedule
.getCycle(&Phi
);
1198 int DefStage
= Schedule
.getStage(&Phi
);
1200 unsigned InitVal
= 0;
1201 unsigned LoopVal
= 0;
1202 getPhiRegs(Phi
, Phi
.getParent(), InitVal
, LoopVal
);
1203 MachineInstr
*Use
= MRI
.getVRegDef(LoopVal
);
1204 if (!Use
|| Use
->isPHI())
1206 int LoopCycle
= Schedule
.getCycle(Use
);
1207 int LoopStage
= Schedule
.getStage(Use
);
1208 return (LoopCycle
> DefCycle
) || (LoopStage
<= DefStage
);
1211 //===----------------------------------------------------------------------===//
1212 // PeelingModuloScheduleExpander implementation
1213 //===----------------------------------------------------------------------===//
1214 // This is a reimplementation of ModuloScheduleExpander that works by creating
1215 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1216 //===----------------------------------------------------------------------===//
1219 // Remove any dead phis in MBB. Dead phis either have only one block as input
1220 // (in which case they are the identity) or have no uses.
1221 void EliminateDeadPhis(MachineBasicBlock
*MBB
, MachineRegisterInfo
&MRI
,
1222 LiveIntervals
*LIS
, bool KeepSingleSrcPhi
= false) {
1223 bool Changed
= true;
1226 for (auto I
= MBB
->begin(); I
!= MBB
->getFirstNonPHI();) {
1227 MachineInstr
&MI
= *I
++;
1229 if (MRI
.use_empty(MI
.getOperand(0).getReg())) {
1231 LIS
->RemoveMachineInstrFromMaps(MI
);
1232 MI
.eraseFromParent();
1234 } else if (!KeepSingleSrcPhi
&& MI
.getNumExplicitOperands() == 3) {
1235 MRI
.constrainRegClass(MI
.getOperand(1).getReg(),
1236 MRI
.getRegClass(MI
.getOperand(0).getReg()));
1237 MRI
.replaceRegWith(MI
.getOperand(0).getReg(),
1238 MI
.getOperand(1).getReg());
1240 LIS
->RemoveMachineInstrFromMaps(MI
);
1241 MI
.eraseFromParent();
1248 /// Rewrites the kernel block in-place to adhere to the given schedule.
1249 /// KernelRewriter holds all of the state required to perform the rewriting.
1250 class KernelRewriter
{
1252 MachineBasicBlock
*BB
;
1253 MachineBasicBlock
*PreheaderBB
, *ExitBB
;
1254 MachineRegisterInfo
&MRI
;
1255 const TargetInstrInfo
*TII
;
1258 // Map from register class to canonical undef register for that class.
1259 DenseMap
<const TargetRegisterClass
*, Register
> Undefs
;
1260 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1261 // this map is only used when InitReg is non-undef.
1262 DenseMap
<std::pair
<unsigned, unsigned>, Register
> Phis
;
1263 // Map from LoopReg to phi register where the InitReg is undef.
1264 DenseMap
<Register
, Register
> UndefPhis
;
1266 // Reg is used by MI. Return the new register MI should use to adhere to the
1267 // schedule. Insert phis as necessary.
1268 Register
remapUse(Register Reg
, MachineInstr
&MI
);
1269 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1270 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1271 // or will be chosen so as to share another phi.
1272 Register
phi(Register LoopReg
, Optional
<Register
> InitReg
= {},
1273 const TargetRegisterClass
*RC
= nullptr);
1274 // Create an undef register of the given register class.
1275 Register
undef(const TargetRegisterClass
*RC
);
1278 KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
, MachineBasicBlock
*LoopBB
,
1279 LiveIntervals
*LIS
= nullptr);
1284 KernelRewriter::KernelRewriter(MachineLoop
&L
, ModuloSchedule
&S
,
1285 MachineBasicBlock
*LoopBB
, LiveIntervals
*LIS
)
1286 : S(S
), BB(LoopBB
), PreheaderBB(L
.getLoopPreheader()),
1287 ExitBB(L
.getExitBlock()), MRI(BB
->getParent()->getRegInfo()),
1288 TII(BB
->getParent()->getSubtarget().getInstrInfo()), LIS(LIS
) {
1289 PreheaderBB
= *BB
->pred_begin();
1290 if (PreheaderBB
== BB
)
1291 PreheaderBB
= *std::next(BB
->pred_begin());
1294 void KernelRewriter::rewrite() {
1295 // Rearrange the loop to be in schedule order. Note that the schedule may
1296 // contain instructions that are not owned by the loop block (InstrChanges and
1297 // friends), so we gracefully handle unowned instructions and delete any
1298 // instructions that weren't in the schedule.
1299 auto InsertPt
= BB
->getFirstTerminator();
1300 MachineInstr
*FirstMI
= nullptr;
1301 for (MachineInstr
*MI
: S
.getInstructions()) {
1304 if (MI
->getParent())
1305 MI
->removeFromParent();
1306 BB
->insert(InsertPt
, MI
);
1310 assert(FirstMI
&& "Failed to find first MI in schedule");
1312 // At this point all of the scheduled instructions are between FirstMI
1313 // and the end of the block. Kill from the first non-phi to FirstMI.
1314 for (auto I
= BB
->getFirstNonPHI(); I
!= FirstMI
->getIterator();) {
1316 LIS
->RemoveMachineInstrFromMaps(*I
);
1317 (I
++)->eraseFromParent();
1320 // Now remap every instruction in the loop.
1321 for (MachineInstr
&MI
: *BB
) {
1322 if (MI
.isPHI() || MI
.isTerminator())
1324 for (MachineOperand
&MO
: MI
.uses()) {
1325 if (!MO
.isReg() || MO
.getReg().isPhysical() || MO
.isImplicit())
1327 Register Reg
= remapUse(MO
.getReg(), MI
);
1331 EliminateDeadPhis(BB
, MRI
, LIS
);
1333 // Ensure a phi exists for all instructions that are either referenced by
1334 // an illegal phi or by an instruction outside the loop. This allows us to
1335 // treat remaps of these values the same as "normal" values that come from
1336 // loop-carried phis.
1337 for (auto MI
= BB
->getFirstNonPHI(); MI
!= BB
->end(); ++MI
) {
1339 Register R
= MI
->getOperand(0).getReg();
1344 for (MachineOperand
&Def
: MI
->defs()) {
1345 for (MachineInstr
&MI
: MRI
.use_instructions(Def
.getReg())) {
1346 if (MI
.getParent() != BB
) {
1355 Register
KernelRewriter::remapUse(Register Reg
, MachineInstr
&MI
) {
1356 MachineInstr
*Producer
= MRI
.getUniqueVRegDef(Reg
);
1360 int ConsumerStage
= S
.getStage(&MI
);
1361 if (!Producer
->isPHI()) {
1362 // Non-phi producers are simple to remap. Insert as many phis as the
1363 // difference between the consumer and producer stages.
1364 if (Producer
->getParent() != BB
)
1365 // Producer was not inside the loop. Use the register as-is.
1367 int ProducerStage
= S
.getStage(Producer
);
1368 assert(ConsumerStage
!= -1 &&
1369 "In-loop consumer should always be scheduled!");
1370 assert(ConsumerStage
>= ProducerStage
);
1371 unsigned StageDiff
= ConsumerStage
- ProducerStage
;
1373 for (unsigned I
= 0; I
< StageDiff
; ++I
)
1378 // First, dive through the phi chain to find the defaults for the generated
1380 SmallVector
<Optional
<Register
>, 4> Defaults
;
1381 Register LoopReg
= Reg
;
1382 auto LoopProducer
= Producer
;
1383 while (LoopProducer
->isPHI() && LoopProducer
->getParent() == BB
) {
1384 LoopReg
= getLoopPhiReg(*LoopProducer
, BB
);
1385 Defaults
.emplace_back(getInitPhiReg(*LoopProducer
, BB
));
1386 LoopProducer
= MRI
.getUniqueVRegDef(LoopReg
);
1387 assert(LoopProducer
);
1389 int LoopProducerStage
= S
.getStage(LoopProducer
);
1391 Optional
<Register
> IllegalPhiDefault
;
1393 if (LoopProducerStage
== -1) {
1395 } else if (LoopProducerStage
> ConsumerStage
) {
1396 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1397 // In addition, Consumer's cycle must be scheduled after Producer in the
1398 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1400 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1401 int LoopProducerCycle
= S
.getCycle(LoopProducer
);
1402 int ConsumerCycle
= S
.getCycle(&MI
);
1404 assert(LoopProducerCycle
<= ConsumerCycle
);
1405 assert(LoopProducerStage
== ConsumerStage
+ 1);
1406 // Peel off the first phi from Defaults and insert a phi between producer
1407 // and consumer. This phi will not be at the front of the block so we
1408 // consider it illegal. It will only exist during the rewrite process; it
1409 // needs to exist while we peel off prologs because these could take the
1410 // default value. After that we can replace all uses with the loop producer
1412 IllegalPhiDefault
= Defaults
.front();
1413 Defaults
.erase(Defaults
.begin());
1415 assert(ConsumerStage
>= LoopProducerStage
);
1416 int StageDiff
= ConsumerStage
- LoopProducerStage
;
1417 if (StageDiff
> 0) {
1418 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults
.size()
1419 << " to " << (Defaults
.size() + StageDiff
) << "\n");
1420 // If we need more phis than we have defaults for, pad out with undefs for
1421 // the earliest phis, which are at the end of the defaults chain (the
1422 // chain is in reverse order).
1423 Defaults
.resize(Defaults
.size() + StageDiff
, Defaults
.empty()
1424 ? Optional
<Register
>()
1429 // Now we know the number of stages to jump back, insert the phi chain.
1430 auto DefaultI
= Defaults
.rbegin();
1431 while (DefaultI
!= Defaults
.rend())
1432 LoopReg
= phi(LoopReg
, *DefaultI
++, MRI
.getRegClass(Reg
));
1434 if (IllegalPhiDefault
.hasValue()) {
1435 // The consumer optionally consumes LoopProducer in the same iteration
1436 // (because the producer is scheduled at an earlier cycle than the consumer)
1437 // or the initial value. To facilitate this we create an illegal block here
1438 // by embedding a phi in the middle of the block. We will fix this up
1439 // immediately prior to pruning.
1440 auto RC
= MRI
.getRegClass(Reg
);
1441 Register R
= MRI
.createVirtualRegister(RC
);
1442 MachineInstr
*IllegalPhi
=
1443 BuildMI(*BB
, MI
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1444 .addReg(IllegalPhiDefault
.getValue())
1445 .addMBB(PreheaderBB
) // Block choice is arbitrary and has no effect.
1447 .addMBB(BB
); // Block choice is arbitrary and has no effect.
1448 // Illegal phi should belong to the producer stage so that it can be
1449 // filtered correctly during peeling.
1450 S
.setStage(IllegalPhi
, LoopProducerStage
);
1457 Register
KernelRewriter::phi(Register LoopReg
, Optional
<Register
> InitReg
,
1458 const TargetRegisterClass
*RC
) {
1459 // If the init register is not undef, try and find an existing phi.
1460 if (InitReg
.hasValue()) {
1461 auto I
= Phis
.find({LoopReg
, InitReg
.getValue()});
1462 if (I
!= Phis
.end())
1465 for (auto &KV
: Phis
) {
1466 if (KV
.first
.first
== LoopReg
)
1471 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1472 // find a phi that takes undef as input.
1473 auto I
= UndefPhis
.find(LoopReg
);
1474 if (I
!= UndefPhis
.end()) {
1475 Register R
= I
->second
;
1476 if (!InitReg
.hasValue())
1477 // Found a phi taking undef as input, and this input is undef so return
1478 // without any more changes.
1480 // Found a phi taking undef as input, so rewrite it to take InitReg.
1481 MachineInstr
*MI
= MRI
.getVRegDef(R
);
1482 MI
->getOperand(1).setReg(InitReg
.getValue());
1483 Phis
.insert({{LoopReg
, InitReg
.getValue()}, R
});
1484 MRI
.constrainRegClass(R
, MRI
.getRegClass(InitReg
.getValue()));
1489 // Failed to find any existing phi to reuse, so create a new one.
1491 RC
= MRI
.getRegClass(LoopReg
);
1492 Register R
= MRI
.createVirtualRegister(RC
);
1493 if (InitReg
.hasValue())
1494 MRI
.constrainRegClass(R
, MRI
.getRegClass(*InitReg
));
1495 BuildMI(*BB
, BB
->getFirstNonPHI(), DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1496 .addReg(InitReg
.hasValue() ? *InitReg
: undef(RC
))
1497 .addMBB(PreheaderBB
)
1500 if (!InitReg
.hasValue())
1501 UndefPhis
[LoopReg
] = R
;
1503 Phis
[{LoopReg
, *InitReg
}] = R
;
1507 Register
KernelRewriter::undef(const TargetRegisterClass
*RC
) {
1508 Register
&R
= Undefs
[RC
];
1510 // Create an IMPLICIT_DEF that defines this register if we need it.
1511 // All uses of this should be removed by the time we have finished unrolling
1512 // prologs and epilogs.
1513 R
= MRI
.createVirtualRegister(RC
);
1514 auto *InsertBB
= &PreheaderBB
->getParent()->front();
1515 BuildMI(*InsertBB
, InsertBB
->getFirstTerminator(), DebugLoc(),
1516 TII
->get(TargetOpcode::IMPLICIT_DEF
), R
);
1522 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1523 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1524 /// through and defaults for these phis.
1525 class KernelOperandInfo
{
1526 MachineBasicBlock
*BB
;
1527 MachineRegisterInfo
&MRI
;
1528 SmallVector
<Register
, 4> PhiDefaults
;
1529 MachineOperand
*Source
;
1530 MachineOperand
*Target
;
1533 KernelOperandInfo(MachineOperand
*MO
, MachineRegisterInfo
&MRI
,
1534 const SmallPtrSetImpl
<MachineInstr
*> &IllegalPhis
)
1537 BB
= MO
->getParent()->getParent();
1538 while (isRegInLoop(MO
)) {
1539 MachineInstr
*MI
= MRI
.getVRegDef(MO
->getReg());
1540 if (MI
->isFullCopy()) {
1541 MO
= &MI
->getOperand(1);
1546 // If this is an illegal phi, don't count it in distance.
1547 if (IllegalPhis
.count(MI
)) {
1548 MO
= &MI
->getOperand(3);
1552 Register Default
= getInitPhiReg(*MI
, BB
);
1553 MO
= MI
->getOperand(2).getMBB() == BB
? &MI
->getOperand(1)
1554 : &MI
->getOperand(3);
1555 PhiDefaults
.push_back(Default
);
1560 bool operator==(const KernelOperandInfo
&Other
) const {
1561 return PhiDefaults
.size() == Other
.PhiDefaults
.size();
1564 void print(raw_ostream
&OS
) const {
1565 OS
<< "use of " << *Source
<< ": distance(" << PhiDefaults
.size() << ") in "
1566 << *Source
->getParent();
1570 bool isRegInLoop(MachineOperand
*MO
) {
1571 return MO
->isReg() && MO
->getReg().isVirtual() &&
1572 MRI
.getVRegDef(MO
->getReg())->getParent() == BB
;
1578 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD
) {
1579 MachineBasicBlock
*NewBB
= PeelSingleBlockLoop(LPD
, BB
, MRI
, TII
);
1580 if (LPD
== LPD_Front
)
1581 PeeledFront
.push_back(NewBB
);
1583 PeeledBack
.push_front(NewBB
);
1584 for (auto I
= BB
->begin(), NI
= NewBB
->begin(); !I
->isTerminator();
1586 CanonicalMIs
[&*I
] = &*I
;
1587 CanonicalMIs
[&*NI
] = &*I
;
1588 BlockMIs
[{NewBB
, &*I
}] = &*NI
;
1589 BlockMIs
[{BB
, &*I
}] = &*I
;
1594 void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock
*MB
,
1596 for (auto I
= MB
->getFirstInstrTerminator()->getReverseIterator();
1597 I
!= std::next(MB
->getFirstNonPHI()->getReverseIterator());) {
1598 MachineInstr
*MI
= &*I
++;
1599 int Stage
= getStage(MI
);
1600 if (Stage
== -1 || Stage
>= MinStage
)
1603 for (MachineOperand
&DefMO
: MI
->defs()) {
1604 SmallVector
<std::pair
<MachineInstr
*, Register
>, 4> Subs
;
1605 for (MachineInstr
&UseMI
: MRI
.use_instructions(DefMO
.getReg())) {
1606 // Only PHIs can use values from this block by construction.
1607 // Match with the equivalent PHI in B.
1608 assert(UseMI
.isPHI());
1609 Register Reg
= getEquivalentRegisterIn(UseMI
.getOperand(0).getReg(),
1611 Subs
.emplace_back(&UseMI
, Reg
);
1613 for (auto &Sub
: Subs
)
1614 Sub
.first
->substituteRegister(DefMO
.getReg(), Sub
.second
, /*SubIdx=*/0,
1615 *MRI
.getTargetRegisterInfo());
1618 LIS
->RemoveMachineInstrFromMaps(*MI
);
1619 MI
->eraseFromParent();
1623 void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
1624 MachineBasicBlock
*DestBB
, MachineBasicBlock
*SourceBB
, unsigned Stage
) {
1625 auto InsertPt
= DestBB
->getFirstNonPHI();
1626 DenseMap
<Register
, Register
> Remaps
;
1627 for (auto I
= SourceBB
->getFirstNonPHI(); I
!= SourceBB
->end();) {
1628 MachineInstr
*MI
= &*I
++;
1630 // This is an illegal PHI. If we move any instructions using an illegal
1631 // PHI, we need to create a legal Phi.
1632 if (getStage(MI
) != Stage
) {
1633 // The legal Phi is not necessary if the illegal phi's stage
1635 Register PhiR
= MI
->getOperand(0).getReg();
1636 auto RC
= MRI
.getRegClass(PhiR
);
1637 Register NR
= MRI
.createVirtualRegister(RC
);
1638 MachineInstr
*NI
= BuildMI(*DestBB
, DestBB
->getFirstNonPHI(),
1639 DebugLoc(), TII
->get(TargetOpcode::PHI
), NR
)
1642 BlockMIs
[{DestBB
, CanonicalMIs
[MI
]}] = NI
;
1643 CanonicalMIs
[NI
] = CanonicalMIs
[MI
];
1647 if (getStage(MI
) != Stage
)
1649 MI
->removeFromParent();
1650 DestBB
->insert(InsertPt
, MI
);
1651 auto *KernelMI
= CanonicalMIs
[MI
];
1652 BlockMIs
[{DestBB
, KernelMI
}] = MI
;
1653 BlockMIs
.erase({SourceBB
, KernelMI
});
1655 SmallVector
<MachineInstr
*, 4> PhiToDelete
;
1656 for (MachineInstr
&MI
: DestBB
->phis()) {
1657 assert(MI
.getNumOperands() == 3);
1658 MachineInstr
*Def
= MRI
.getVRegDef(MI
.getOperand(1).getReg());
1659 // If the instruction referenced by the phi is moved inside the block
1660 // we don't need the phi anymore.
1661 if (getStage(Def
) == Stage
) {
1662 Register PhiReg
= MI
.getOperand(0).getReg();
1663 assert(Def
->findRegisterDefOperandIdx(MI
.getOperand(1).getReg()) != -1);
1664 MRI
.replaceRegWith(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
1665 MI
.getOperand(0).setReg(PhiReg
);
1666 PhiToDelete
.push_back(&MI
);
1669 for (auto *P
: PhiToDelete
)
1670 P
->eraseFromParent();
1671 InsertPt
= DestBB
->getFirstNonPHI();
1672 // Helper to clone Phi instructions into the destination block. We clone Phi
1673 // greedily to avoid combinatorial explosion of Phi instructions.
1674 auto clonePhi
= [&](MachineInstr
*Phi
) {
1675 MachineInstr
*NewMI
= MF
.CloneMachineInstr(Phi
);
1676 DestBB
->insert(InsertPt
, NewMI
);
1677 Register OrigR
= Phi
->getOperand(0).getReg();
1678 Register R
= MRI
.createVirtualRegister(MRI
.getRegClass(OrigR
));
1679 NewMI
->getOperand(0).setReg(R
);
1680 NewMI
->getOperand(1).setReg(OrigR
);
1681 NewMI
->getOperand(2).setMBB(*DestBB
->pred_begin());
1683 CanonicalMIs
[NewMI
] = CanonicalMIs
[Phi
];
1684 BlockMIs
[{DestBB
, CanonicalMIs
[Phi
]}] = NewMI
;
1685 PhiNodeLoopIteration
[NewMI
] = PhiNodeLoopIteration
[Phi
];
1688 for (auto I
= DestBB
->getFirstNonPHI(); I
!= DestBB
->end(); ++I
) {
1689 for (MachineOperand
&MO
: I
->uses()) {
1692 if (Remaps
.count(MO
.getReg()))
1693 MO
.setReg(Remaps
[MO
.getReg()]);
1695 // If we are using a phi from the source block we need to add a new phi
1696 // pointing to the old one.
1697 MachineInstr
*Use
= MRI
.getUniqueVRegDef(MO
.getReg());
1698 if (Use
&& Use
->isPHI() && Use
->getParent() == SourceBB
) {
1699 Register R
= clonePhi(Use
);
1708 PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr
*CanonicalPhi
,
1709 MachineInstr
*Phi
) {
1710 unsigned distance
= PhiNodeLoopIteration
[Phi
];
1711 MachineInstr
*CanonicalUse
= CanonicalPhi
;
1712 Register CanonicalUseReg
= CanonicalUse
->getOperand(0).getReg();
1713 for (unsigned I
= 0; I
< distance
; ++I
) {
1714 assert(CanonicalUse
->isPHI());
1715 assert(CanonicalUse
->getNumOperands() == 5);
1716 unsigned LoopRegIdx
= 3, InitRegIdx
= 1;
1717 if (CanonicalUse
->getOperand(2).getMBB() == CanonicalUse
->getParent())
1718 std::swap(LoopRegIdx
, InitRegIdx
);
1719 CanonicalUseReg
= CanonicalUse
->getOperand(LoopRegIdx
).getReg();
1720 CanonicalUse
= MRI
.getVRegDef(CanonicalUseReg
);
1722 return CanonicalUseReg
;
1725 void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
1726 BitVector
LS(Schedule
.getNumStages(), true);
1727 BitVector
AS(Schedule
.getNumStages(), true);
1728 LiveStages
[BB
] = LS
;
1729 AvailableStages
[BB
] = AS
;
1731 // Peel out the prologs.
1733 for (int I
= 0; I
< Schedule
.getNumStages() - 1; ++I
) {
1735 Prologs
.push_back(peelKernel(LPD_Front
));
1736 LiveStages
[Prologs
.back()] = LS
;
1737 AvailableStages
[Prologs
.back()] = LS
;
1740 // Create a block that will end up as the new loop exiting block (dominated by
1741 // all prologs and epilogs). It will only contain PHIs, in the same order as
1742 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1743 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1744 // property that any value deffed in BB but used outside of BB is used by a
1745 // PHI in the exiting block.
1746 MachineBasicBlock
*ExitingBB
= CreateLCSSAExitingBlock();
1747 EliminateDeadPhis(ExitingBB
, MRI
, LIS
, /*KeepSingleSrcPhi=*/true);
1748 // Push out the epilogs, again in reverse order.
1749 // We can't assume anything about the minumum loop trip count at this point,
1750 // so emit a fairly complex epilog.
1752 // We first peel number of stages minus one epilogue. Then we remove dead
1753 // stages and reorder instructions based on their stage. If we have 3 stages
1754 // we generate first:
1758 // And then we move instructions based on their stages to have:
1762 // The transformation is legal because we only move instructions past
1763 // instructions of a previous loop iteration.
1764 for (int I
= 1; I
<= Schedule
.getNumStages() - 1; ++I
) {
1765 Epilogs
.push_back(peelKernel(LPD_Back
));
1766 MachineBasicBlock
*B
= Epilogs
.back();
1767 filterInstructions(B
, Schedule
.getNumStages() - I
);
1768 // Keep track at which iteration each phi belongs to. We need it to know
1769 // what version of the variable to use during prologue/epilogue stitching.
1770 EliminateDeadPhis(B
, MRI
, LIS
, /*KeepSingleSrcPhi=*/true);
1771 for (auto Phi
= B
->begin(), IE
= B
->getFirstNonPHI(); Phi
!= IE
; ++Phi
)
1772 PhiNodeLoopIteration
[&*Phi
] = Schedule
.getNumStages() - I
;
1774 for (size_t I
= 0; I
< Epilogs
.size(); I
++) {
1776 for (size_t J
= I
; J
< Epilogs
.size(); J
++) {
1778 unsigned Stage
= Schedule
.getNumStages() - 1 + I
- J
;
1779 // Move stage one block at a time so that Phi nodes are updated correctly.
1780 for (size_t K
= Iteration
; K
> I
; K
--)
1781 moveStageBetweenBlocks(Epilogs
[K
- 1], Epilogs
[K
], Stage
);
1784 LiveStages
[Epilogs
[I
]] = LS
;
1785 AvailableStages
[Epilogs
[I
]] = AS
;
1788 // Now we've defined all the prolog and epilog blocks as a fallthrough
1789 // sequence, add the edges that will be followed if the loop trip count is
1790 // lower than the number of stages (connecting prologs directly with epilogs).
1791 auto PI
= Prologs
.begin();
1792 auto EI
= Epilogs
.begin();
1793 assert(Prologs
.size() == Epilogs
.size());
1794 for (; PI
!= Prologs
.end(); ++PI
, ++EI
) {
1795 MachineBasicBlock
*Pred
= *(*EI
)->pred_begin();
1796 (*PI
)->addSuccessor(*EI
);
1797 for (MachineInstr
&MI
: (*EI
)->phis()) {
1798 Register Reg
= MI
.getOperand(1).getReg();
1799 MachineInstr
*Use
= MRI
.getUniqueVRegDef(Reg
);
1800 if (Use
&& Use
->getParent() == Pred
) {
1801 MachineInstr
*CanonicalUse
= CanonicalMIs
[Use
];
1802 if (CanonicalUse
->isPHI()) {
1803 // If the use comes from a phi we need to skip as many phi as the
1804 // distance between the epilogue and the kernel. Trace through the phi
1805 // chain to find the right value.
1806 Reg
= getPhiCanonicalReg(CanonicalUse
, Use
);
1808 Reg
= getEquivalentRegisterIn(Reg
, *PI
);
1810 MI
.addOperand(MachineOperand::CreateReg(Reg
, /*isDef=*/false));
1811 MI
.addOperand(MachineOperand::CreateMBB(*PI
));
1815 // Create a list of all blocks in order.
1816 SmallVector
<MachineBasicBlock
*, 8> Blocks
;
1817 llvm::copy(PeeledFront
, std::back_inserter(Blocks
));
1818 Blocks
.push_back(BB
);
1819 llvm::copy(PeeledBack
, std::back_inserter(Blocks
));
1821 // Iterate in reverse order over all instructions, remapping as we go.
1822 for (MachineBasicBlock
*B
: reverse(Blocks
)) {
1823 for (auto I
= B
->getFirstInstrTerminator()->getReverseIterator();
1824 I
!= std::next(B
->getFirstNonPHI()->getReverseIterator());) {
1825 MachineInstr
*MI
= &*I
++;
1829 for (auto *MI
: IllegalPhisToDelete
) {
1831 LIS
->RemoveMachineInstrFromMaps(*MI
);
1832 MI
->eraseFromParent();
1834 IllegalPhisToDelete
.clear();
1836 // Now all remapping has been done, we're free to optimize the generated code.
1837 for (MachineBasicBlock
*B
: reverse(Blocks
))
1838 EliminateDeadPhis(B
, MRI
, LIS
);
1839 EliminateDeadPhis(ExitingBB
, MRI
, LIS
);
1842 MachineBasicBlock
*PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
1843 MachineFunction
&MF
= *BB
->getParent();
1844 MachineBasicBlock
*Exit
= *BB
->succ_begin();
1846 Exit
= *std::next(BB
->succ_begin());
1848 MachineBasicBlock
*NewBB
= MF
.CreateMachineBasicBlock(BB
->getBasicBlock());
1849 MF
.insert(std::next(BB
->getIterator()), NewBB
);
1851 // Clone all phis in BB into NewBB and rewrite.
1852 for (MachineInstr
&MI
: BB
->phis()) {
1853 auto RC
= MRI
.getRegClass(MI
.getOperand(0).getReg());
1854 Register OldR
= MI
.getOperand(3).getReg();
1855 Register R
= MRI
.createVirtualRegister(RC
);
1856 SmallVector
<MachineInstr
*, 4> Uses
;
1857 for (MachineInstr
&Use
: MRI
.use_instructions(OldR
))
1858 if (Use
.getParent() != BB
)
1859 Uses
.push_back(&Use
);
1860 for (MachineInstr
*Use
: Uses
)
1861 Use
->substituteRegister(OldR
, R
, /*SubIdx=*/0,
1862 *MRI
.getTargetRegisterInfo());
1863 MachineInstr
*NI
= BuildMI(NewBB
, DebugLoc(), TII
->get(TargetOpcode::PHI
), R
)
1866 BlockMIs
[{NewBB
, &MI
}] = NI
;
1867 CanonicalMIs
[NI
] = &MI
;
1869 BB
->replaceSuccessor(Exit
, NewBB
);
1870 Exit
->replacePhiUsesWith(BB
, NewBB
);
1871 NewBB
->addSuccessor(Exit
);
1873 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
1874 SmallVector
<MachineOperand
, 4> Cond
;
1875 bool CanAnalyzeBr
= !TII
->analyzeBranch(*BB
, TBB
, FBB
, Cond
);
1877 assert(CanAnalyzeBr
&& "Must be able to analyze the loop branch!");
1878 TII
->removeBranch(*BB
);
1879 TII
->insertBranch(*BB
, TBB
== Exit
? NewBB
: TBB
, FBB
== Exit
? NewBB
: FBB
,
1881 TII
->insertUnconditionalBranch(*NewBB
, Exit
, DebugLoc());
1886 PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg
,
1887 MachineBasicBlock
*BB
) {
1888 MachineInstr
*MI
= MRI
.getUniqueVRegDef(Reg
);
1889 unsigned OpIdx
= MI
->findRegisterDefOperandIdx(Reg
);
1890 return BlockMIs
[{BB
, CanonicalMIs
[MI
]}]->getOperand(OpIdx
).getReg();
1893 void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr
*MI
) {
1895 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1896 // and it is produced by this block.
1897 Register PhiR
= MI
->getOperand(0).getReg();
1898 Register R
= MI
->getOperand(3).getReg();
1899 int RMIStage
= getStage(MRI
.getUniqueVRegDef(R
));
1900 if (RMIStage
!= -1 && !AvailableStages
[MI
->getParent()].test(RMIStage
))
1901 R
= MI
->getOperand(1).getReg();
1902 MRI
.setRegClass(R
, MRI
.getRegClass(PhiR
));
1903 MRI
.replaceRegWith(PhiR
, R
);
1904 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1905 // later to figure out how to remap registers.
1906 MI
->getOperand(0).setReg(PhiR
);
1907 IllegalPhisToDelete
.push_back(MI
);
1911 int Stage
= getStage(MI
);
1912 if (Stage
== -1 || LiveStages
.count(MI
->getParent()) == 0 ||
1913 LiveStages
[MI
->getParent()].test(Stage
))
1914 // Instruction is live, no rewriting to do.
1917 for (MachineOperand
&DefMO
: MI
->defs()) {
1918 SmallVector
<std::pair
<MachineInstr
*, Register
>, 4> Subs
;
1919 for (MachineInstr
&UseMI
: MRI
.use_instructions(DefMO
.getReg())) {
1920 // Only PHIs can use values from this block by construction.
1921 // Match with the equivalent PHI in B.
1922 assert(UseMI
.isPHI());
1923 Register Reg
= getEquivalentRegisterIn(UseMI
.getOperand(0).getReg(),
1925 Subs
.emplace_back(&UseMI
, Reg
);
1927 for (auto &Sub
: Subs
)
1928 Sub
.first
->substituteRegister(DefMO
.getReg(), Sub
.second
, /*SubIdx=*/0,
1929 *MRI
.getTargetRegisterInfo());
1932 LIS
->RemoveMachineInstrFromMaps(*MI
);
1933 MI
->eraseFromParent();
1936 void PeelingModuloScheduleExpander::fixupBranches() {
1937 // Work outwards from the kernel.
1938 bool KernelDisposed
= false;
1939 int TC
= Schedule
.getNumStages() - 1;
1940 for (auto PI
= Prologs
.rbegin(), EI
= Epilogs
.rbegin(); PI
!= Prologs
.rend();
1942 MachineBasicBlock
*Prolog
= *PI
;
1943 MachineBasicBlock
*Fallthrough
= *Prolog
->succ_begin();
1944 MachineBasicBlock
*Epilog
= *EI
;
1945 SmallVector
<MachineOperand
, 4> Cond
;
1946 TII
->removeBranch(*Prolog
);
1947 Optional
<bool> StaticallyGreater
=
1948 LoopInfo
->createTripCountGreaterCondition(TC
, *Prolog
, Cond
);
1949 if (!StaticallyGreater
.hasValue()) {
1950 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC
<< "\n");
1951 // Dynamically branch based on Cond.
1952 TII
->insertBranch(*Prolog
, Epilog
, Fallthrough
, Cond
, DebugLoc());
1953 } else if (*StaticallyGreater
== false) {
1954 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC
<< "\n");
1955 // Prolog never falls through; branch to epilog and orphan interior
1956 // blocks. Leave it to unreachable-block-elim to clean up.
1957 Prolog
->removeSuccessor(Fallthrough
);
1958 for (MachineInstr
&P
: Fallthrough
->phis()) {
1962 TII
->insertUnconditionalBranch(*Prolog
, Epilog
, DebugLoc());
1963 KernelDisposed
= true;
1965 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC
<< "\n");
1966 // Prolog always falls through; remove incoming values in epilog.
1967 Prolog
->removeSuccessor(Epilog
);
1968 for (MachineInstr
&P
: Epilog
->phis()) {
1975 if (!KernelDisposed
) {
1976 LoopInfo
->adjustTripCount(-(Schedule
.getNumStages() - 1));
1977 LoopInfo
->setPreheader(Prologs
.back());
1979 LoopInfo
->disposed();
1983 void PeelingModuloScheduleExpander::rewriteKernel() {
1984 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
, BB
);
1988 void PeelingModuloScheduleExpander::expand() {
1989 BB
= Schedule
.getLoop()->getTopBlock();
1990 Preheader
= Schedule
.getLoop()->getLoopPreheader();
1991 LLVM_DEBUG(Schedule
.dump());
1992 LoopInfo
= TII
->analyzeLoopForPipelining(BB
);
1996 peelPrologAndEpilogs();
2000 void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
2001 BB
= Schedule
.getLoop()->getTopBlock();
2002 Preheader
= Schedule
.getLoop()->getLoopPreheader();
2004 // Dump the schedule before we invalidate and remap all its instructions.
2005 // Stash it in a string so we can print it if we found an error.
2006 std::string ScheduleDump
;
2007 raw_string_ostream
OS(ScheduleDump
);
2011 // First, run the normal ModuleScheduleExpander. We don't support any
2013 assert(LIS
&& "Requires LiveIntervals!");
2014 ModuloScheduleExpander
MSE(MF
, Schedule
, *LIS
,
2015 ModuloScheduleExpander::InstrChangesTy());
2017 MachineBasicBlock
*ExpandedKernel
= MSE
.getRewrittenKernel();
2018 if (!ExpandedKernel
) {
2019 // The expander optimized away the kernel. We can't do any useful checking.
2023 // Before running the KernelRewriter, re-add BB into the CFG.
2024 Preheader
->addSuccessor(BB
);
2026 // Now run the new expansion algorithm.
2027 KernelRewriter
KR(*Schedule
.getLoop(), Schedule
, BB
);
2029 peelPrologAndEpilogs();
2031 // Collect all illegal phis that the new algorithm created. We'll give these
2032 // to KernelOperandInfo.
2033 SmallPtrSet
<MachineInstr
*, 4> IllegalPhis
;
2034 for (auto NI
= BB
->getFirstNonPHI(); NI
!= BB
->end(); ++NI
) {
2036 IllegalPhis
.insert(&*NI
);
2039 // Co-iterate across both kernels. We expect them to be identical apart from
2040 // phis and full COPYs (we look through both).
2041 SmallVector
<std::pair
<KernelOperandInfo
, KernelOperandInfo
>, 8> KOIs
;
2042 auto OI
= ExpandedKernel
->begin();
2043 auto NI
= BB
->begin();
2044 for (; !OI
->isTerminator() && !NI
->isTerminator(); ++OI
, ++NI
) {
2045 while (OI
->isPHI() || OI
->isFullCopy())
2047 while (NI
->isPHI() || NI
->isFullCopy())
2049 assert(OI
->getOpcode() == NI
->getOpcode() && "Opcodes don't match?!");
2050 // Analyze every operand separately.
2051 for (auto OOpI
= OI
->operands_begin(), NOpI
= NI
->operands_begin();
2052 OOpI
!= OI
->operands_end(); ++OOpI
, ++NOpI
)
2053 KOIs
.emplace_back(KernelOperandInfo(&*OOpI
, MRI
, IllegalPhis
),
2054 KernelOperandInfo(&*NOpI
, MRI
, IllegalPhis
));
2057 bool Failed
= false;
2058 for (auto &OldAndNew
: KOIs
) {
2059 if (OldAndNew
.first
== OldAndNew
.second
)
2062 errs() << "Modulo kernel validation error: [\n";
2063 errs() << " [golden] ";
2064 OldAndNew
.first
.print(errs());
2066 OldAndNew
.second
.print(errs());
2071 errs() << "Golden reference kernel:\n";
2072 ExpandedKernel
->print(errs());
2073 errs() << "New kernel:\n";
2075 errs() << ScheduleDump
;
2077 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2080 // Cleanup by removing BB from the CFG again as the original
2081 // ModuloScheduleExpander intended.
2082 Preheader
->removeSuccessor(BB
);
2086 //===----------------------------------------------------------------------===//
2087 // ModuloScheduleTestPass implementation
2088 //===----------------------------------------------------------------------===//
2089 // This pass constructs a ModuloSchedule from its module and runs
2090 // ModuloScheduleExpander.
2092 // The module is expected to contain a single-block analyzable loop.
2093 // The total order of instructions is taken from the loop as-is.
2094 // Instructions are expected to be annotated with a PostInstrSymbol.
2095 // This PostInstrSymbol must have the following format:
2096 // "Stage=%d Cycle=%d".
2097 //===----------------------------------------------------------------------===//
2100 class ModuloScheduleTest
: public MachineFunctionPass
{
2104 ModuloScheduleTest() : MachineFunctionPass(ID
) {
2105 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
2108 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2109 void runOnLoop(MachineFunction
&MF
, MachineLoop
&L
);
2111 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
2112 AU
.addRequired
<MachineLoopInfo
>();
2113 AU
.addRequired
<LiveIntervals
>();
2114 MachineFunctionPass::getAnalysisUsage(AU
);
2119 char ModuloScheduleTest::ID
= 0;
2121 INITIALIZE_PASS_BEGIN(ModuloScheduleTest
, "modulo-schedule-test",
2122 "Modulo Schedule test pass", false, false)
2123 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo
)
2124 INITIALIZE_PASS_DEPENDENCY(LiveIntervals
)
2125 INITIALIZE_PASS_END(ModuloScheduleTest
, "modulo-schedule-test",
2126 "Modulo Schedule test pass", false, false)
2128 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction
&MF
) {
2129 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
2130 for (auto *L
: MLI
) {
2131 if (L
->getTopBlock() != L
->getBottomBlock())
2139 static void parseSymbolString(StringRef S
, int &Cycle
, int &Stage
) {
2140 std::pair
<StringRef
, StringRef
> StageAndCycle
= getToken(S
, "_");
2141 std::pair
<StringRef
, StringRef
> StageTokenAndValue
=
2142 getToken(StageAndCycle
.first
, "-");
2143 std::pair
<StringRef
, StringRef
> CycleTokenAndValue
=
2144 getToken(StageAndCycle
.second
, "-");
2145 if (StageTokenAndValue
.first
!= "Stage" ||
2146 CycleTokenAndValue
.first
!= "_Cycle") {
2148 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2152 StageTokenAndValue
.second
.drop_front().getAsInteger(10, Stage
);
2153 CycleTokenAndValue
.second
.drop_front().getAsInteger(10, Cycle
);
2155 dbgs() << " Stage=" << Stage
<< ", Cycle=" << Cycle
<< "\n";
2158 void ModuloScheduleTest::runOnLoop(MachineFunction
&MF
, MachineLoop
&L
) {
2159 LiveIntervals
&LIS
= getAnalysis
<LiveIntervals
>();
2160 MachineBasicBlock
*BB
= L
.getTopBlock();
2161 dbgs() << "--- ModuloScheduleTest running on BB#" << BB
->getNumber() << "\n";
2163 DenseMap
<MachineInstr
*, int> Cycle
, Stage
;
2164 std::vector
<MachineInstr
*> Instrs
;
2165 for (MachineInstr
&MI
: *BB
) {
2166 if (MI
.isTerminator())
2168 Instrs
.push_back(&MI
);
2169 if (MCSymbol
*Sym
= MI
.getPostInstrSymbol()) {
2170 dbgs() << "Parsing post-instr symbol for " << MI
;
2171 parseSymbolString(Sym
->getName(), Cycle
[&MI
], Stage
[&MI
]);
2175 ModuloSchedule
MS(MF
, &L
, std::move(Instrs
), std::move(Cycle
),
2177 ModuloScheduleExpander
MSE(
2178 MF
, MS
, LIS
, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2183 //===----------------------------------------------------------------------===//
2184 // ModuloScheduleTestAnnotater implementation
2185 //===----------------------------------------------------------------------===//
2187 void ModuloScheduleTestAnnotater::annotate() {
2188 for (MachineInstr
*MI
: S
.getInstructions()) {
2189 SmallVector
<char, 16> SV
;
2190 raw_svector_ostream
OS(SV
);
2191 OS
<< "Stage-" << S
.getStage(MI
) << "_Cycle-" << S
.getCycle(MI
);
2192 MCSymbol
*Sym
= MF
.getContext().getOrCreateSymbol(OS
.str());
2193 MI
->setPostInstrSymbol(MF
, Sym
);