1 //===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Software pipelining (SWP) is an instruction scheduling technique for loops
10 // that overlaps loop iterations and exploits ILP via compiler transformations.
12 // There are multiple methods for analyzing a loop and creating a schedule.
13 // An example algorithm is Swing Modulo Scheduling (implemented by the
14 // MachinePipeliner). The details of how a schedule is arrived at are irrelevant
15 // for the task of actually rewriting a loop to adhere to the schedule, which
16 // is what this file does.
18 // A schedule is, for every instruction in a block, a Cycle and a Stage. Note
19 // that we only support single-block loops, so "block" and "loop" can be used
22 // The Cycle of an instruction defines a partial order of the instructions in
23 // the remapped loop. Instructions within a cycle must not consume the output
24 // of any instruction in the same cycle. Cycle information is assumed to have
25 // been calculated such that the processor will execute instructions in
26 // lock-step (for example in a VLIW ISA).
28 // The Stage of an instruction defines the mapping between logical loop
29 // iterations and pipelined loop iterations. An example (unrolled) pipeline
30 // may look something like:
32 // I0[0] Execute instruction I0 of iteration 0
33 // I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1
37 // In the schedule for this unrolled sequence we would say that I0 was scheduled
38 // in stage 0 and I1 in stage 1:
42 // [stage 1] I1 x (from stage 0)
44 // And to actually generate valid code we must insert a phi:
51 // This is a simple example; the rules for how to generate correct code given
52 // an arbitrary schedule containing loop-carried values are complex.
54 // Note that these examples only mention the steady-state kernel of the
55 // generated loop; prologs and epilogs must be generated also that prime and
56 // flush the pipeline. Doing so is nontrivial.
58 //===----------------------------------------------------------------------===//
60 #ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
61 #define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
63 #include "llvm/CodeGen/MachineFunction.h"
64 #include "llvm/CodeGen/MachineLoopInfo.h"
65 #include "llvm/CodeGen/TargetInstrInfo.h"
66 #include "llvm/CodeGen/TargetSubtargetInfo.h"
70 class MachineBasicBlock
;
74 /// Represents a schedule for a single-block loop. For every instruction we
75 /// maintain a Cycle and Stage.
76 class ModuloSchedule
{
78 /// The block containing the loop instructions.
81 /// The instructions to be generated, in total order. Cycle provides a partial
82 /// order; the total order within cycles has been decided by the schedule
84 std::vector
<MachineInstr
*> ScheduledInstrs
;
86 /// The cycle for each instruction.
87 DenseMap
<MachineInstr
*, int> Cycle
;
89 /// The stage for each instruction.
90 DenseMap
<MachineInstr
*, int> Stage
;
92 /// The number of stages in this schedule (Max(Stage) + 1).
96 /// Create a new ModuloSchedule.
97 /// \arg ScheduledInstrs The new loop instructions, in total resequenced
99 /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
100 /// not need to start at zero. ScheduledInstrs must be partially ordered by
102 /// \arg Stage Stage index for all instructions in ScheduleInstrs.
103 ModuloSchedule(MachineFunction
&MF
, MachineLoop
*Loop
,
104 std::vector
<MachineInstr
*> ScheduledInstrs
,
105 DenseMap
<MachineInstr
*, int> Cycle
,
106 DenseMap
<MachineInstr
*, int> Stage
)
107 : Loop(Loop
), ScheduledInstrs(ScheduledInstrs
), Cycle(std::move(Cycle
)),
108 Stage(std::move(Stage
)) {
110 for (auto &KV
: this->Stage
)
111 NumStages
= std::max(NumStages
, KV
.second
);
115 /// Return the single-block loop being scheduled.
116 MachineLoop
*getLoop() const { return Loop
; }
118 /// Return the number of stages contained in this schedule, which is the
119 /// largest stage index + 1.
120 int getNumStages() const { return NumStages
; }
122 /// Return the first cycle in the schedule, which is the cycle index of the
123 /// first instruction.
124 int getFirstCycle() { return Cycle
[ScheduledInstrs
.front()]; }
126 /// Return the final cycle in the schedule, which is the cycle index of the
127 /// last instruction.
128 int getFinalCycle() { return Cycle
[ScheduledInstrs
.back()]; }
130 /// Return the stage that MI is scheduled in, or -1.
131 int getStage(MachineInstr
*MI
) {
132 auto I
= Stage
.find(MI
);
133 return I
== Stage
.end() ? -1 : I
->second
;
136 /// Return the cycle that MI is scheduled at, or -1.
137 int getCycle(MachineInstr
*MI
) {
138 auto I
= Cycle
.find(MI
);
139 return I
== Cycle
.end() ? -1 : I
->second
;
142 /// Return the rescheduled instructions in order.
143 ArrayRef
<MachineInstr
*> getInstructions() { return ScheduledInstrs
; }
148 void print(raw_ostream
&OS
);
151 /// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
152 /// rewriting the old loop and inserting prologs and epilogs as required.
153 class ModuloScheduleExpander
{
155 using InstrChangesTy
= DenseMap
<MachineInstr
*, std::pair
<unsigned, int64_t>>;
158 using ValueMapTy
= DenseMap
<unsigned, unsigned>;
159 using MBBVectorTy
= SmallVectorImpl
<MachineBasicBlock
*>;
160 using InstrMapTy
= DenseMap
<MachineInstr
*, MachineInstr
*>;
162 ModuloSchedule
&Schedule
;
164 const TargetSubtargetInfo
&ST
;
165 MachineRegisterInfo
&MRI
;
166 const TargetInstrInfo
*TII
;
169 MachineBasicBlock
*BB
;
170 MachineBasicBlock
*Preheader
;
171 MachineBasicBlock
*NewKernel
= nullptr;
172 std::unique_ptr
<TargetInstrInfo::PipelinerLoopInfo
> LoopInfo
;
174 /// Map for each register and the max difference between its uses and def.
175 /// The first element in the pair is the max difference in stages. The
176 /// second is true if the register defines a Phi value and loop value is
177 /// scheduled before the Phi.
178 std::map
<unsigned, std::pair
<unsigned, bool>> RegToStageDiff
;
180 /// Instructions to change when emitting the final schedule.
181 InstrChangesTy InstrChanges
;
183 void generatePipelinedLoop();
184 void generateProlog(unsigned LastStage
, MachineBasicBlock
*KernelBB
,
185 ValueMapTy
*VRMap
, MBBVectorTy
&PrologBBs
);
186 void generateEpilog(unsigned LastStage
, MachineBasicBlock
*KernelBB
,
187 ValueMapTy
*VRMap
, MBBVectorTy
&EpilogBBs
,
188 MBBVectorTy
&PrologBBs
);
189 void generateExistingPhis(MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
,
190 MachineBasicBlock
*BB2
, MachineBasicBlock
*KernelBB
,
191 ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
192 unsigned LastStageNum
, unsigned CurStageNum
,
194 void generatePhis(MachineBasicBlock
*NewBB
, MachineBasicBlock
*BB1
,
195 MachineBasicBlock
*BB2
, MachineBasicBlock
*KernelBB
,
196 ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
,
197 unsigned LastStageNum
, unsigned CurStageNum
, bool IsLast
);
198 void removeDeadInstructions(MachineBasicBlock
*KernelBB
,
199 MBBVectorTy
&EpilogBBs
);
200 void splitLifetimes(MachineBasicBlock
*KernelBB
, MBBVectorTy
&EpilogBBs
);
201 void addBranches(MachineBasicBlock
&PreheaderBB
, MBBVectorTy
&PrologBBs
,
202 MachineBasicBlock
*KernelBB
, MBBVectorTy
&EpilogBBs
,
204 bool computeDelta(MachineInstr
&MI
, unsigned &Delta
);
205 void updateMemOperands(MachineInstr
&NewMI
, MachineInstr
&OldMI
,
207 MachineInstr
*cloneInstr(MachineInstr
*OldMI
, unsigned CurStageNum
,
208 unsigned InstStageNum
);
209 MachineInstr
*cloneAndChangeInstr(MachineInstr
*OldMI
, unsigned CurStageNum
,
210 unsigned InstStageNum
);
211 void updateInstruction(MachineInstr
*NewMI
, bool LastDef
,
212 unsigned CurStageNum
, unsigned InstrStageNum
,
214 MachineInstr
*findDefInLoop(unsigned Reg
);
215 unsigned getPrevMapVal(unsigned StageNum
, unsigned PhiStage
, unsigned LoopVal
,
216 unsigned LoopStage
, ValueMapTy
*VRMap
,
217 MachineBasicBlock
*BB
);
218 void rewritePhiValues(MachineBasicBlock
*NewBB
, unsigned StageNum
,
219 ValueMapTy
*VRMap
, InstrMapTy
&InstrMap
);
220 void rewriteScheduledInstr(MachineBasicBlock
*BB
, InstrMapTy
&InstrMap
,
221 unsigned CurStageNum
, unsigned PhiNum
,
222 MachineInstr
*Phi
, unsigned OldReg
,
223 unsigned NewReg
, unsigned PrevReg
= 0);
224 bool isLoopCarried(MachineInstr
&Phi
);
226 /// Return the max. number of stages/iterations that can occur between a
227 /// register definition and its uses.
228 unsigned getStagesForReg(int Reg
, unsigned CurStage
) {
229 std::pair
<unsigned, bool> Stages
= RegToStageDiff
[Reg
];
230 if ((int)CurStage
> Schedule
.getNumStages() - 1 && Stages
.first
== 0 &&
236 /// The number of stages for a Phi is a little different than other
237 /// instructions. The minimum value computed in RegToStageDiff is 1
238 /// because we assume the Phi is needed for at least 1 iteration.
239 /// This is not the case if the loop value is scheduled prior to the
240 /// Phi in the same stage. This function returns the number of stages
241 /// or iterations needed between the Phi definition and any uses.
242 unsigned getStagesForPhi(int Reg
) {
243 std::pair
<unsigned, bool> Stages
= RegToStageDiff
[Reg
];
246 return Stages
.first
- 1;
250 /// Create a new ModuloScheduleExpander.
251 /// \arg InstrChanges Modifications to make to instructions with memory
253 /// FIXME: InstrChanges is opaque and is an implementation detail of an
254 /// optimization in MachinePipeliner that crosses abstraction boundaries.
255 ModuloScheduleExpander(MachineFunction
&MF
, ModuloSchedule
&S
,
256 LiveIntervals
&LIS
, InstrChangesTy InstrChanges
)
257 : Schedule(S
), MF(MF
), ST(MF
.getSubtarget()), MRI(MF
.getRegInfo()),
258 TII(ST
.getInstrInfo()), LIS(LIS
),
259 InstrChanges(std::move(InstrChanges
)) {}
261 /// Performs the actual expansion.
263 /// Performs final cleanup after expansion.
266 /// Returns the newly rewritten kernel block, or nullptr if this was
268 MachineBasicBlock
*getRewrittenKernel() { return NewKernel
; }
271 /// A reimplementation of ModuloScheduleExpander. It works by generating a
272 /// standalone kernel loop and peeling out the prologs and epilogs.
274 /// FIXME: This implementation cannot yet generate valid code. It can generate
275 /// a correct kernel but cannot peel out prologs and epilogs.
276 class PeelingModuloScheduleExpander
{
277 ModuloSchedule
&Schedule
;
279 const TargetSubtargetInfo
&ST
;
280 MachineRegisterInfo
&MRI
;
281 const TargetInstrInfo
*TII
;
284 MachineBasicBlock
*BB
;
285 MachineBasicBlock
*Preheader
;
287 PeelingModuloScheduleExpander(MachineFunction
&MF
, ModuloSchedule
&S
,
289 : Schedule(S
), MF(MF
), ST(MF
.getSubtarget()), MRI(MF
.getRegInfo()),
290 TII(ST
.getInstrInfo()), LIS(LIS
) {}
292 /// Runs ModuloScheduleExpander and treats it as a golden input to validate
293 /// aspects of the code generated by PeelingModuloScheduleExpander.
294 void validateAgainstModuloScheduleExpander();
297 /// Expander that simply annotates each scheduled instruction with a post-instr
298 /// symbol that can be consumed by the ModuloScheduleTest pass.
300 /// The post-instr symbol is a way of annotating an instruction that can be
301 /// roundtripped in MIR. The syntax is:
302 /// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
303 class ModuloScheduleTestAnnotater
{
308 ModuloScheduleTestAnnotater(MachineFunction
&MF
, ModuloSchedule
&S
)
311 /// Performs the annotation.
315 } // end namespace llvm
317 #endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H