1 //===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements Loop Rotation Pass.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "loop-rotate"
15 #include "llvm/Transforms/Scalar.h"
16 #include "llvm/Function.h"
17 #include "llvm/IntrinsicInst.h"
18 #include "llvm/Analysis/CodeMetrics.h"
19 #include "llvm/Analysis/LoopPass.h"
20 #include "llvm/Analysis/InstructionSimplify.h"
21 #include "llvm/Analysis/ScalarEvolution.h"
22 #include "llvm/Transforms/Utils/Local.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 #include "llvm/Transforms/Utils/SSAUpdater.h"
25 #include "llvm/Transforms/Utils/ValueMapper.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/ADT/Statistic.h"
30 #define MAX_HEADER_SIZE 16
32 STATISTIC(NumRotated
, "Number of loops rotated");
35 class LoopRotate
: public LoopPass
{
37 static char ID
; // Pass ID, replacement for typeid
38 LoopRotate() : LoopPass(ID
) {
39 initializeLoopRotatePass(*PassRegistry::getPassRegistry());
42 // LCSSA form makes instruction renaming easier.
43 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
44 AU
.addPreserved
<DominatorTree
>();
45 AU
.addRequired
<LoopInfo
>();
46 AU
.addPreserved
<LoopInfo
>();
47 AU
.addRequiredID(LoopSimplifyID
);
48 AU
.addPreservedID(LoopSimplifyID
);
49 AU
.addRequiredID(LCSSAID
);
50 AU
.addPreservedID(LCSSAID
);
51 AU
.addPreserved
<ScalarEvolution
>();
54 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
);
55 bool rotateLoop(Loop
*L
);
62 char LoopRotate::ID
= 0;
63 INITIALIZE_PASS_BEGIN(LoopRotate
, "loop-rotate", "Rotate Loops", false, false)
64 INITIALIZE_PASS_DEPENDENCY(LoopInfo
)
65 INITIALIZE_PASS_DEPENDENCY(LoopSimplify
)
66 INITIALIZE_PASS_DEPENDENCY(LCSSA
)
67 INITIALIZE_PASS_END(LoopRotate
, "loop-rotate", "Rotate Loops", false, false)
69 Pass
*llvm::createLoopRotatePass() { return new LoopRotate(); }
71 /// Rotate Loop L as many times as possible. Return true if
72 /// the loop is rotated at least once.
73 bool LoopRotate::runOnLoop(Loop
*L
, LPPassManager
&LPM
) {
74 LI
= &getAnalysis
<LoopInfo
>();
76 // One loop can be rotated multiple times.
77 bool MadeChange
= false;
84 /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
85 /// old header into the preheader. If there were uses of the values produced by
86 /// these instruction that were outside of the loop, we have to insert PHI nodes
87 /// to merge the two values. Do this now.
88 static void RewriteUsesOfClonedInstructions(BasicBlock
*OrigHeader
,
89 BasicBlock
*OrigPreheader
,
90 ValueToValueMapTy
&ValueMap
) {
91 // Remove PHI node entries that are no longer live.
92 BasicBlock::iterator I
, E
= OrigHeader
->end();
93 for (I
= OrigHeader
->begin(); PHINode
*PN
= dyn_cast
<PHINode
>(I
); ++I
)
94 PN
->removeIncomingValue(PN
->getBasicBlockIndex(OrigPreheader
));
96 // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
99 for (I
= OrigHeader
->begin(); I
!= E
; ++I
) {
100 Value
*OrigHeaderVal
= I
;
102 // If there are no uses of the value (e.g. because it returns void), there
103 // is nothing to rewrite.
104 if (OrigHeaderVal
->use_empty())
107 Value
*OrigPreHeaderVal
= ValueMap
[OrigHeaderVal
];
109 // The value now exits in two versions: the initial value in the preheader
110 // and the loop "next" value in the original header.
111 SSA
.Initialize(OrigHeaderVal
->getType(), OrigHeaderVal
->getName());
112 SSA
.AddAvailableValue(OrigHeader
, OrigHeaderVal
);
113 SSA
.AddAvailableValue(OrigPreheader
, OrigPreHeaderVal
);
115 // Visit each use of the OrigHeader instruction.
116 for (Value::use_iterator UI
= OrigHeaderVal
->use_begin(),
117 UE
= OrigHeaderVal
->use_end(); UI
!= UE
; ) {
118 // Grab the use before incrementing the iterator.
119 Use
&U
= UI
.getUse();
121 // Increment the iterator before removing the use from the list.
124 // SSAUpdater can't handle a non-PHI use in the same block as an
125 // earlier def. We can easily handle those cases manually.
126 Instruction
*UserInst
= cast
<Instruction
>(U
.getUser());
127 if (!isa
<PHINode
>(UserInst
)) {
128 BasicBlock
*UserBB
= UserInst
->getParent();
130 // The original users in the OrigHeader are already using the
131 // original definitions.
132 if (UserBB
== OrigHeader
)
135 // Users in the OrigPreHeader need to use the value to which the
136 // original definitions are mapped.
137 if (UserBB
== OrigPreheader
) {
138 U
= OrigPreHeaderVal
;
143 // Anything else can be handled by SSAUpdater.
149 /// Rotate loop LP. Return true if the loop is rotated.
150 bool LoopRotate::rotateLoop(Loop
*L
) {
151 // If the loop has only one block then there is not much to rotate.
152 if (L
->getBlocks().size() == 1)
155 BasicBlock
*OrigHeader
= L
->getHeader();
157 BranchInst
*BI
= dyn_cast
<BranchInst
>(OrigHeader
->getTerminator());
158 if (BI
== 0 || BI
->isUnconditional())
161 // If the loop header is not one of the loop exiting blocks then
162 // either this loop is already rotated or it is not
163 // suitable for loop rotation transformations.
164 if (!L
->isLoopExiting(OrigHeader
))
167 // Updating PHInodes in loops with multiple exits adds complexity.
168 // Keep it simple, and restrict loop rotation to loops with one exit only.
169 // In future, lift this restriction and support for multiple exits if
171 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
172 L
->getExitBlocks(ExitBlocks
);
173 if (ExitBlocks
.size() > 1)
176 // Check size of original header and reject loop if it is very big.
179 Metrics
.analyzeBasicBlock(OrigHeader
);
180 if (Metrics
.NumInsts
> MAX_HEADER_SIZE
)
184 // Now, this loop is suitable for rotation.
185 BasicBlock
*OrigPreheader
= L
->getLoopPreheader();
186 BasicBlock
*OrigLatch
= L
->getLoopLatch();
188 // If the loop could not be converted to canonical form, it must have an
189 // indirectbr in it, just give up.
190 if (OrigPreheader
== 0 || OrigLatch
== 0)
193 // Anything ScalarEvolution may know about this loop or the PHI nodes
194 // in its header will soon be invalidated.
195 if (ScalarEvolution
*SE
= getAnalysisIfAvailable
<ScalarEvolution
>())
198 // Find new Loop header. NewHeader is a Header's one and only successor
199 // that is inside loop. Header's other successor is outside the
200 // loop. Otherwise loop is not suitable for rotation.
201 BasicBlock
*Exit
= BI
->getSuccessor(0);
202 BasicBlock
*NewHeader
= BI
->getSuccessor(1);
203 if (L
->contains(Exit
))
204 std::swap(Exit
, NewHeader
);
205 assert(NewHeader
&& "Unable to determine new loop header");
206 assert(L
->contains(NewHeader
) && !L
->contains(Exit
) &&
207 "Unable to determine loop header and exit blocks");
209 // This code assumes that the new header has exactly one predecessor.
210 // Remove any single-entry PHI nodes in it.
211 assert(NewHeader
->getSinglePredecessor() &&
212 "New header doesn't have one pred!");
213 FoldSingleEntryPHINodes(NewHeader
);
215 // Begin by walking OrigHeader and populating ValueMap with an entry for
217 BasicBlock::iterator I
= OrigHeader
->begin(), E
= OrigHeader
->end();
218 ValueToValueMapTy ValueMap
;
220 // For PHI nodes, the value available in OldPreHeader is just the
221 // incoming value from OldPreHeader.
222 for (; PHINode
*PN
= dyn_cast
<PHINode
>(I
); ++I
)
223 ValueMap
[PN
] = PN
->getIncomingValue(PN
->getBasicBlockIndex(OrigPreheader
));
225 // For the rest of the instructions, either hoist to the OrigPreheader if
226 // possible or create a clone in the OldPreHeader if not.
227 TerminatorInst
*LoopEntryBranch
= OrigPreheader
->getTerminator();
229 Instruction
*Inst
= I
++;
231 // If the instruction's operands are invariant and it doesn't read or write
232 // memory, then it is safe to hoist. Doing this doesn't change the order of
233 // execution in the preheader, but does prevent the instruction from
234 // executing in each iteration of the loop. This means it is safe to hoist
235 // something that might trap, but isn't safe to hoist something that reads
236 // memory (without proving that the loop doesn't write).
237 if (L
->hasLoopInvariantOperands(Inst
) &&
238 !Inst
->mayReadFromMemory() && !Inst
->mayWriteToMemory() &&
239 !isa
<TerminatorInst
>(Inst
) && !isa
<DbgInfoIntrinsic
>(Inst
)) {
240 Inst
->moveBefore(LoopEntryBranch
);
244 // Otherwise, create a duplicate of the instruction.
245 Instruction
*C
= Inst
->clone();
247 // Eagerly remap the operands of the instruction.
248 RemapInstruction(C
, ValueMap
,
249 RF_NoModuleLevelChanges
|RF_IgnoreMissingEntries
);
251 // With the operands remapped, see if the instruction constant folds or is
252 // otherwise simplifyable. This commonly occurs because the entry from PHI
253 // nodes allows icmps and other instructions to fold.
254 Value
*V
= SimplifyInstruction(C
);
255 if (V
&& LI
->replacementPreservesLCSSAForm(C
, V
)) {
256 // If so, then delete the temporary instruction and stick the folded value
261 // Otherwise, stick the new instruction into the new block!
262 C
->setName(Inst
->getName());
263 C
->insertBefore(LoopEntryBranch
);
268 // Along with all the other instructions, we just cloned OrigHeader's
269 // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
270 // successors by duplicating their incoming values for OrigHeader.
271 TerminatorInst
*TI
= OrigHeader
->getTerminator();
272 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
!= e
; ++i
)
273 for (BasicBlock::iterator BI
= TI
->getSuccessor(i
)->begin();
274 PHINode
*PN
= dyn_cast
<PHINode
>(BI
); ++BI
)
275 PN
->addIncoming(PN
->getIncomingValueForBlock(OrigHeader
), OrigPreheader
);
277 // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
278 // OrigPreHeader's old terminator (the original branch into the loop), and
279 // remove the corresponding incoming values from the PHI nodes in OrigHeader.
280 LoopEntryBranch
->eraseFromParent();
282 // If there were any uses of instructions in the duplicated block outside the
283 // loop, update them, inserting PHI nodes as required
284 RewriteUsesOfClonedInstructions(OrigHeader
, OrigPreheader
, ValueMap
);
286 // NewHeader is now the header of the loop.
287 L
->moveToHeader(NewHeader
);
288 assert(L
->getHeader() == NewHeader
&& "Latch block is our new header");
291 // At this point, we've finished our major CFG changes. As part of cloning
292 // the loop into the preheader we've simplified instructions and the
293 // duplicated conditional branch may now be branching on a constant. If it is
294 // branching on a constant and if that constant means that we enter the loop,
295 // then we fold away the cond branch to an uncond branch. This simplifies the
296 // loop in cases important for nested loops, and it also means we don't have
297 // to split as many edges.
298 BranchInst
*PHBI
= cast
<BranchInst
>(OrigPreheader
->getTerminator());
299 assert(PHBI
->isConditional() && "Should be clone of BI condbr!");
300 if (!isa
<ConstantInt
>(PHBI
->getCondition()) ||
301 PHBI
->getSuccessor(cast
<ConstantInt
>(PHBI
->getCondition())->isZero())
303 // The conditional branch can't be folded, handle the general case.
304 // Update DominatorTree to reflect the CFG change we just made. Then split
305 // edges as necessary to preserve LoopSimplify form.
306 if (DominatorTree
*DT
= getAnalysisIfAvailable
<DominatorTree
>()) {
307 // Since OrigPreheader now has the conditional branch to Exit block, it is
308 // the dominator of Exit.
309 DT
->changeImmediateDominator(Exit
, OrigPreheader
);
310 DT
->changeImmediateDominator(NewHeader
, OrigPreheader
);
312 // Update OrigHeader to be dominated by the new header block.
313 DT
->changeImmediateDominator(OrigHeader
, OrigLatch
);
316 // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
317 // thus is not a preheader anymore. Split the edge to form a real preheader.
318 BasicBlock
*NewPH
= SplitCriticalEdge(OrigPreheader
, NewHeader
, this);
319 NewPH
->setName(NewHeader
->getName() + ".lr.ph");
321 // Preserve canonical loop form, which means that 'Exit' should have only one
323 BasicBlock
*ExitSplit
= SplitCriticalEdge(L
->getLoopLatch(), Exit
, this);
324 ExitSplit
->moveBefore(Exit
);
326 // We can fold the conditional branch in the preheader, this makes things
327 // simpler. The first step is to remove the extra edge to the Exit block.
328 Exit
->removePredecessor(OrigPreheader
, true /*preserve LCSSA*/);
329 BranchInst::Create(NewHeader
, PHBI
);
330 PHBI
->eraseFromParent();
332 // With our CFG finalized, update DomTree if it is available.
333 if (DominatorTree
*DT
= getAnalysisIfAvailable
<DominatorTree
>()) {
334 // Update OrigHeader to be dominated by the new header block.
335 DT
->changeImmediateDominator(NewHeader
, OrigPreheader
);
336 DT
->changeImmediateDominator(OrigHeader
, OrigLatch
);
340 assert(L
->getLoopPreheader() && "Invalid loop preheader after loop rotation");
341 assert(L
->getLoopLatch() && "Invalid loop latch after loop rotation");
343 // Now that the CFG and DomTree are in a consistent state again, try to merge
344 // the OrigHeader block into OrigLatch. This will succeed if they are
345 // connected by an unconditional branch. This is just a cleanup so the
346 // emitted code isn't too gross in this common case.
347 MergeBlockIntoPredecessor(OrigHeader
, this);