1 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements some loop unrolling utilities. It does not define any
10 // actual pass or policy, but provides a single function to perform loop
13 // The process of unrolling can produce extraneous basic blocks linked with
14 // unconditional branches. This will be corrected in the future.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/InstructionSimplify.h"
22 #include "llvm/Analysis/LoopIterator.h"
23 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Transforms/Utils/Local.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DebugInfoMetadata.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
35 #include "llvm/Transforms/Utils/Cloning.h"
36 #include "llvm/Transforms/Utils/LoopSimplify.h"
37 #include "llvm/Transforms/Utils/LoopUtils.h"
38 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
39 #include "llvm/Transforms/Utils/UnrollLoop.h"
42 #define DEBUG_TYPE "loop-unroll"
44 // TODO: Should these be here or in LoopUnroll?
45 STATISTIC(NumCompletelyUnrolled
, "Number of loops completely unrolled");
46 STATISTIC(NumUnrolled
, "Number of loops unrolled (completely or otherwise)");
47 STATISTIC(NumUnrolledWithHeader
, "Number of loops unrolled without a "
48 "conditional latch (completely or otherwise)");
51 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden
,
52 cl::desc("Allow runtime unrolled loops to be unrolled "
53 "with epilog instead of prolog."));
56 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden
,
57 cl::desc("Verify domtree after unrolling"),
58 #ifdef EXPENSIVE_CHECKS
65 /// Convert the instruction operands from referencing the current values into
66 /// those specified by VMap.
67 void llvm::remapInstruction(Instruction
*I
, ValueToValueMapTy
&VMap
) {
68 for (unsigned op
= 0, E
= I
->getNumOperands(); op
!= E
; ++op
) {
69 Value
*Op
= I
->getOperand(op
);
71 // Unwrap arguments of dbg.value intrinsics.
73 if (auto *V
= dyn_cast
<MetadataAsValue
>(Op
))
74 if (auto *Unwrapped
= dyn_cast
<ValueAsMetadata
>(V
->getMetadata())) {
75 Op
= Unwrapped
->getValue();
79 auto wrap
= [&](Value
*V
) {
80 auto &C
= I
->getContext();
81 return Wrapped
? MetadataAsValue::get(C
, ValueAsMetadata::get(V
)) : V
;
84 ValueToValueMapTy::iterator It
= VMap
.find(Op
);
86 I
->setOperand(op
, wrap(It
->second
));
89 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
)) {
90 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
) {
91 ValueToValueMapTy::iterator It
= VMap
.find(PN
->getIncomingBlock(i
));
93 PN
->setIncomingBlock(i
, cast
<BasicBlock
>(It
->second
));
98 /// Check if unrolling created a situation where we need to insert phi nodes to
99 /// preserve LCSSA form.
100 /// \param Blocks is a vector of basic blocks representing unrolled loop.
101 /// \param L is the outer loop.
102 /// It's possible that some of the blocks are in L, and some are not. In this
103 /// case, if there is a use is outside L, and definition is inside L, we need to
104 /// insert a phi-node, otherwise LCSSA will be broken.
105 /// The function is just a helper function for llvm::UnrollLoop that returns
106 /// true if this situation occurs, indicating that LCSSA needs to be fixed.
107 static bool needToInsertPhisForLCSSA(Loop
*L
, std::vector
<BasicBlock
*> Blocks
,
109 for (BasicBlock
*BB
: Blocks
) {
110 if (LI
->getLoopFor(BB
) == L
)
112 for (Instruction
&I
: *BB
) {
113 for (Use
&U
: I
.operands()) {
114 if (auto Def
= dyn_cast
<Instruction
>(U
)) {
115 Loop
*DefLoop
= LI
->getLoopFor(Def
->getParent());
118 if (DefLoop
->contains(L
))
127 /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
128 /// and adds a mapping from the original loop to the new loop to NewLoops.
129 /// Returns nullptr if no new loop was created and a pointer to the
130 /// original loop OriginalBB was part of otherwise.
131 const Loop
* llvm::addClonedBlockToLoopInfo(BasicBlock
*OriginalBB
,
132 BasicBlock
*ClonedBB
, LoopInfo
*LI
,
133 NewLoopsMap
&NewLoops
) {
134 // Figure out which loop New is in.
135 const Loop
*OldLoop
= LI
->getLoopFor(OriginalBB
);
136 assert(OldLoop
&& "Should (at least) be in the loop being unrolled!");
138 Loop
*&NewLoop
= NewLoops
[OldLoop
];
140 // Found a new sub-loop.
141 assert(OriginalBB
== OldLoop
->getHeader() &&
142 "Header should be first in RPO");
144 NewLoop
= LI
->AllocateLoop();
145 Loop
*NewLoopParent
= NewLoops
.lookup(OldLoop
->getParentLoop());
148 NewLoopParent
->addChildLoop(NewLoop
);
150 LI
->addTopLevelLoop(NewLoop
);
152 NewLoop
->addBasicBlockToLoop(ClonedBB
, *LI
);
155 NewLoop
->addBasicBlockToLoop(ClonedBB
, *LI
);
160 /// The function chooses which type of unroll (epilog or prolog) is more
162 /// Epilog unroll is more profitable when there is PHI that starts from
163 /// constant. In this case epilog will leave PHI start from constant,
164 /// but prolog will convert it to non-constant.
167 /// PN = PHI [I, Latch], [CI, PreHeader]
171 /// Epilog unroll case.
173 /// PN = PHI [I2, Latch], [CI, PreHeader]
177 /// Prolog unroll case.
178 /// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
180 /// PN = PHI [I2, Latch], [NewPN, PreHeader]
185 static bool isEpilogProfitable(Loop
*L
) {
186 BasicBlock
*PreHeader
= L
->getLoopPreheader();
187 BasicBlock
*Header
= L
->getHeader();
188 assert(PreHeader
&& Header
);
189 for (const PHINode
&PN
: Header
->phis()) {
190 if (isa
<ConstantInt
>(PN
.getIncomingValueForBlock(PreHeader
)))
196 /// Perform some cleanup and simplifications on loops after unrolling. It is
197 /// useful to simplify the IV's in the new loop, as well as do a quick
198 /// simplify/dce pass of the instructions.
199 void llvm::simplifyLoopAfterUnroll(Loop
*L
, bool SimplifyIVs
, LoopInfo
*LI
,
200 ScalarEvolution
*SE
, DominatorTree
*DT
,
201 AssumptionCache
*AC
) {
202 // Simplify any new induction variables in the partially unrolled loop.
203 if (SE
&& SimplifyIVs
) {
204 SmallVector
<WeakTrackingVH
, 16> DeadInsts
;
205 simplifyLoopIVs(L
, SE
, DT
, LI
, DeadInsts
);
207 // Aggressively clean up dead instructions that simplifyLoopIVs already
208 // identified. Any remaining should be cleaned up below.
209 while (!DeadInsts
.empty())
210 if (Instruction
*Inst
=
211 dyn_cast_or_null
<Instruction
>(&*DeadInsts
.pop_back_val()))
212 RecursivelyDeleteTriviallyDeadInstructions(Inst
);
215 // At this point, the code is well formed. We now do a quick sweep over the
216 // inserted code, doing constant propagation and dead code elimination as we
218 const DataLayout
&DL
= L
->getHeader()->getModule()->getDataLayout();
219 for (BasicBlock
*BB
: L
->getBlocks()) {
220 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
;) {
221 Instruction
*Inst
= &*I
++;
223 if (Value
*V
= SimplifyInstruction(Inst
, {DL
, nullptr, DT
, AC
}))
224 if (LI
->replacementPreservesLCSSAForm(Inst
, V
))
225 Inst
->replaceAllUsesWith(V
);
226 if (isInstructionTriviallyDead(Inst
))
227 BB
->getInstList().erase(Inst
);
231 // TODO: after peeling or unrolling, previously loop variant conditions are
232 // likely to fold to constants, eagerly propagating those here will require
233 // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
237 /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
238 /// can only fail when the loop's latch block is not terminated by a conditional
239 /// branch instruction. However, if the trip count (and multiple) are not known,
240 /// loop unrolling will mostly produce more code that is no faster.
242 /// TripCount is the upper bound of the iteration on which control exits
243 /// LatchBlock. Control may exit the loop prior to TripCount iterations either
244 /// via an early branch in other loop block or via LatchBlock terminator. This
245 /// is relaxed from the general definition of trip count which is the number of
246 /// times the loop header executes. Note that UnrollLoop assumes that the loop
247 /// counter test is in LatchBlock in order to remove unnecesssary instances of
248 /// the test. If control can exit the loop from the LatchBlock's terminator
249 /// prior to TripCount iterations, flag PreserveCondBr needs to be set.
251 /// PreserveCondBr indicates whether the conditional branch of the LatchBlock
252 /// needs to be preserved. It is needed when we use trip count upper bound to
253 /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
254 /// conditional branch needs to be preserved.
256 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
257 /// execute without exiting the loop.
259 /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
260 /// have a runtime (i.e. not compile time constant) trip count. Unrolling these
261 /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
262 /// iterations before branching into the unrolled loop. UnrollLoop will not
263 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
264 /// AllowExpensiveTripCount is false.
266 /// If we want to perform PGO-based loop peeling, PeelCount is set to the
267 /// number of iterations we want to peel off.
269 /// The LoopInfo Analysis that is passed will be kept consistent.
271 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
272 /// DominatorTree if they are non-null.
274 /// If RemainderLoop is non-null, it will receive the remainder loop (if
275 /// required and not fully unrolled).
276 LoopUnrollResult
llvm::UnrollLoop(Loop
*L
, UnrollLoopOptions ULO
, LoopInfo
*LI
,
277 ScalarEvolution
*SE
, DominatorTree
*DT
,
279 OptimizationRemarkEmitter
*ORE
,
280 bool PreserveLCSSA
, Loop
**RemainderLoop
) {
282 BasicBlock
*Preheader
= L
->getLoopPreheader();
284 LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
285 return LoopUnrollResult::Unmodified
;
288 BasicBlock
*LatchBlock
= L
->getLoopLatch();
290 LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
291 return LoopUnrollResult::Unmodified
;
294 // Loops with indirectbr cannot be cloned.
295 if (!L
->isSafeToClone()) {
296 LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
297 return LoopUnrollResult::Unmodified
;
300 // The current loop unroll pass can unroll loops with a single latch or header
301 // that's a conditional branch exiting the loop.
302 // FIXME: The implementation can be extended to work with more complicated
303 // cases, e.g. loops with multiple latches.
304 BasicBlock
*Header
= L
->getHeader();
305 BranchInst
*HeaderBI
= dyn_cast
<BranchInst
>(Header
->getTerminator());
306 BranchInst
*BI
= dyn_cast
<BranchInst
>(LatchBlock
->getTerminator());
308 // FIXME: Support loops without conditional latch and multiple exiting blocks.
310 (BI
->isUnconditional() && (!HeaderBI
|| HeaderBI
->isUnconditional() ||
311 L
->getExitingBlock() != Header
))) {
312 LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
313 "branch in the latch or header.\n");
314 return LoopUnrollResult::Unmodified
;
317 auto CheckLatchSuccessors
= [&](unsigned S1
, unsigned S2
) {
318 return BI
->isConditional() && BI
->getSuccessor(S1
) == Header
&&
319 !L
->contains(BI
->getSuccessor(S2
));
322 // If we have a conditional latch, it must exit the loop.
323 if (BI
&& BI
->isConditional() && !CheckLatchSuccessors(0, 1) &&
324 !CheckLatchSuccessors(1, 0)) {
326 dbgs() << "Can't unroll; a conditional latch must exit the loop");
327 return LoopUnrollResult::Unmodified
;
330 auto CheckHeaderSuccessors
= [&](unsigned S1
, unsigned S2
) {
331 return HeaderBI
&& HeaderBI
->isConditional() &&
332 L
->contains(HeaderBI
->getSuccessor(S1
)) &&
333 !L
->contains(HeaderBI
->getSuccessor(S2
));
336 // If we do not have a conditional latch, the header must exit the loop.
337 if (BI
&& !BI
->isConditional() && HeaderBI
&& HeaderBI
->isConditional() &&
338 !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
339 LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
340 return LoopUnrollResult::Unmodified
;
343 if (Header
->hasAddressTaken()) {
344 // The loop-rotate pass can be helpful to avoid this in many cases.
346 dbgs() << " Won't unroll loop: address of header block is taken.\n");
347 return LoopUnrollResult::Unmodified
;
350 if (ULO
.TripCount
!= 0)
351 LLVM_DEBUG(dbgs() << " Trip Count = " << ULO
.TripCount
<< "\n");
352 if (ULO
.TripMultiple
!= 1)
353 LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO
.TripMultiple
<< "\n");
355 // Effectively "DCE" unrolled iterations that are beyond the tripcount
356 // and will never be executed.
357 if (ULO
.TripCount
!= 0 && ULO
.Count
> ULO
.TripCount
)
358 ULO
.Count
= ULO
.TripCount
;
360 // Don't enter the unroll code if there is nothing to do.
361 if (ULO
.TripCount
== 0 && ULO
.Count
< 2 && ULO
.PeelCount
== 0) {
362 LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
363 return LoopUnrollResult::Unmodified
;
366 assert(ULO
.Count
> 0);
367 assert(ULO
.TripMultiple
> 0);
368 assert(ULO
.TripCount
== 0 || ULO
.TripCount
% ULO
.TripMultiple
== 0);
370 // Are we eliminating the loop control altogether?
371 bool CompletelyUnroll
= ULO
.Count
== ULO
.TripCount
;
372 SmallVector
<BasicBlock
*, 4> ExitBlocks
;
373 L
->getExitBlocks(ExitBlocks
);
374 std::vector
<BasicBlock
*> OriginalLoopBlocks
= L
->getBlocks();
376 // Go through all exits of L and see if there are any phi-nodes there. We just
377 // conservatively assume that they're inserted to preserve LCSSA form, which
378 // means that complete unrolling might break this form. We need to either fix
379 // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
380 // now we just recompute LCSSA for the outer loop, but it should be possible
381 // to fix it in-place.
382 bool NeedToFixLCSSA
= PreserveLCSSA
&& CompletelyUnroll
&&
383 any_of(ExitBlocks
, [](const BasicBlock
*BB
) {
384 return isa
<PHINode
>(BB
->begin());
387 // We assume a run-time trip count if the compiler cannot
388 // figure out the loop trip count and the unroll-runtime
389 // flag is specified.
390 bool RuntimeTripCount
=
391 (ULO
.TripCount
== 0 && ULO
.Count
> 0 && ULO
.AllowRuntime
);
393 assert((!RuntimeTripCount
|| !ULO
.PeelCount
) &&
394 "Did not expect runtime trip-count unrolling "
395 "and peeling for the same loop");
399 Peeled
= peelLoop(L
, ULO
.PeelCount
, LI
, SE
, DT
, AC
, PreserveLCSSA
);
401 // Successful peeling may result in a change in the loop preheader/trip
402 // counts. If we later unroll the loop, we want these to be updated.
404 // According to our guards and profitability checks the only
405 // meaningful exit should be latch block. Other exits go to deopt,
406 // so we do not worry about them.
407 BasicBlock
*ExitingBlock
= L
->getLoopLatch();
408 assert(ExitingBlock
&& "Loop without exiting block?");
409 assert(L
->isLoopExiting(ExitingBlock
) && "Latch is not exiting?");
410 Preheader
= L
->getLoopPreheader();
411 ULO
.TripCount
= SE
->getSmallConstantTripCount(L
, ExitingBlock
);
412 ULO
.TripMultiple
= SE
->getSmallConstantTripMultiple(L
, ExitingBlock
);
416 // Loops containing convergent instructions must have a count that divides
417 // their TripMultiple.
420 bool HasConvergent
= false;
421 for (auto &BB
: L
->blocks())
423 if (auto CS
= CallSite(&I
))
424 HasConvergent
|= CS
.isConvergent();
425 assert((!HasConvergent
|| ULO
.TripMultiple
% ULO
.Count
== 0) &&
426 "Unroll count must divide trip multiple if loop contains a "
427 "convergent operation.");
430 bool EpilogProfitability
=
431 UnrollRuntimeEpilog
.getNumOccurrences() ? UnrollRuntimeEpilog
432 : isEpilogProfitable(L
);
434 if (RuntimeTripCount
&& ULO
.TripMultiple
% ULO
.Count
!= 0 &&
435 !UnrollRuntimeLoopRemainder(L
, ULO
.Count
, ULO
.AllowExpensiveTripCount
,
436 EpilogProfitability
, ULO
.UnrollRemainder
,
437 ULO
.ForgetAllSCEV
, LI
, SE
, DT
, AC
,
438 PreserveLCSSA
, RemainderLoop
)) {
440 RuntimeTripCount
= false;
442 LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
443 "generated when assuming runtime trip count\n");
444 return LoopUnrollResult::Unmodified
;
448 // If we know the trip count, we know the multiple...
449 unsigned BreakoutTrip
= 0;
450 if (ULO
.TripCount
!= 0) {
451 BreakoutTrip
= ULO
.TripCount
% ULO
.Count
;
452 ULO
.TripMultiple
= 0;
454 // Figure out what multiple to use.
455 BreakoutTrip
= ULO
.TripMultiple
=
456 (unsigned)GreatestCommonDivisor64(ULO
.Count
, ULO
.TripMultiple
);
460 // Report the unrolling decision.
461 if (CompletelyUnroll
) {
462 LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header
->getName()
463 << " with trip count " << ULO
.TripCount
<< "!\n");
466 return OptimizationRemark(DEBUG_TYPE
, "FullyUnrolled", L
->getStartLoc(),
468 << "completely unrolled loop with "
469 << NV("UnrollCount", ULO
.TripCount
) << " iterations";
471 } else if (ULO
.PeelCount
) {
472 LLVM_DEBUG(dbgs() << "PEELING loop %" << Header
->getName()
473 << " with iteration count " << ULO
.PeelCount
<< "!\n");
476 return OptimizationRemark(DEBUG_TYPE
, "Peeled", L
->getStartLoc(),
478 << " peeled loop by " << NV("PeelCount", ULO
.PeelCount
)
482 auto DiagBuilder
= [&]() {
483 OptimizationRemark
Diag(DEBUG_TYPE
, "PartialUnrolled", L
->getStartLoc(),
485 return Diag
<< "unrolled loop by a factor of "
486 << NV("UnrollCount", ULO
.Count
);
489 LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header
->getName() << " by "
491 if (ULO
.TripMultiple
== 0 || BreakoutTrip
!= ULO
.TripMultiple
) {
492 LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip
);
495 return DiagBuilder() << " with a breakout at trip "
496 << NV("BreakoutTrip", BreakoutTrip
);
498 } else if (ULO
.TripMultiple
!= 1) {
499 LLVM_DEBUG(dbgs() << " with " << ULO
.TripMultiple
<< " trips per branch");
503 << " with " << NV("TripMultiple", ULO
.TripMultiple
)
504 << " trips per branch";
506 } else if (RuntimeTripCount
) {
507 LLVM_DEBUG(dbgs() << " with run-time trip count");
510 [&]() { return DiagBuilder() << " with run-time trip count"; });
512 LLVM_DEBUG(dbgs() << "!\n");
515 // We are going to make changes to this loop. SCEV may be keeping cached info
516 // about it, in particular about backedge taken count. The changes we make
517 // are guaranteed to invalidate this information for our loop. It is tempting
518 // to only invalidate the loop being unrolled, but it is incorrect as long as
519 // all exiting branches from all inner loops have impact on the outer loops,
520 // and if something changes inside them then any of outer loops may also
521 // change. When we forget outermost loop, we also forget all contained loops
522 // and this is what we need here.
524 if (ULO
.ForgetAllSCEV
)
525 SE
->forgetAllLoops();
527 SE
->forgetTopmostLoop(L
);
531 bool LatchIsExiting
= BI
->isConditional();
532 BasicBlock
*LoopExit
= nullptr;
533 if (LatchIsExiting
) {
534 ContinueOnTrue
= L
->contains(BI
->getSuccessor(0));
535 LoopExit
= BI
->getSuccessor(ContinueOnTrue
);
537 NumUnrolledWithHeader
++;
538 ContinueOnTrue
= L
->contains(HeaderBI
->getSuccessor(0));
539 LoopExit
= HeaderBI
->getSuccessor(ContinueOnTrue
);
542 // For the first iteration of the loop, we should use the precloned values for
543 // PHI nodes. Insert associations now.
544 ValueToValueMapTy LastValueMap
;
545 std::vector
<PHINode
*> OrigPHINode
;
546 for (BasicBlock::iterator I
= Header
->begin(); isa
<PHINode
>(I
); ++I
) {
547 OrigPHINode
.push_back(cast
<PHINode
>(I
));
550 std::vector
<BasicBlock
*> Headers
;
551 std::vector
<BasicBlock
*> HeaderSucc
;
552 std::vector
<BasicBlock
*> Latches
;
553 Headers
.push_back(Header
);
554 Latches
.push_back(LatchBlock
);
556 if (!LatchIsExiting
) {
557 auto *Term
= cast
<BranchInst
>(Header
->getTerminator());
558 if (Term
->isUnconditional() || L
->contains(Term
->getSuccessor(0))) {
559 assert(L
->contains(Term
->getSuccessor(0)));
560 HeaderSucc
.push_back(Term
->getSuccessor(0));
562 assert(L
->contains(Term
->getSuccessor(1)));
563 HeaderSucc
.push_back(Term
->getSuccessor(1));
567 // The current on-the-fly SSA update requires blocks to be processed in
568 // reverse postorder so that LastValueMap contains the correct value at each
570 LoopBlocksDFS
DFS(L
);
573 // Stash the DFS iterators before adding blocks to the loop.
574 LoopBlocksDFS::RPOIterator BlockBegin
= DFS
.beginRPO();
575 LoopBlocksDFS::RPOIterator BlockEnd
= DFS
.endRPO();
577 std::vector
<BasicBlock
*> UnrolledLoopBlocks
= L
->getBlocks();
579 // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
580 // might break loop-simplified form for these loops (as they, e.g., would
581 // share the same exit blocks). We'll keep track of loops for which we can
582 // break this so that later we can re-simplify them.
583 SmallSetVector
<Loop
*, 4> LoopsToSimplify
;
584 for (Loop
*SubLoop
: *L
)
585 LoopsToSimplify
.insert(SubLoop
);
587 if (Header
->getParent()->isDebugInfoForProfiling())
588 for (BasicBlock
*BB
: L
->getBlocks())
589 for (Instruction
&I
: *BB
)
590 if (!isa
<DbgInfoIntrinsic
>(&I
))
591 if (const DILocation
*DIL
= I
.getDebugLoc()) {
592 auto NewDIL
= DIL
->cloneByMultiplyingDuplicationFactor(ULO
.Count
);
594 I
.setDebugLoc(NewDIL
.getValue());
597 << "Failed to create new discriminator: "
598 << DIL
->getFilename() << " Line: " << DIL
->getLine());
601 for (unsigned It
= 1; It
!= ULO
.Count
; ++It
) {
602 std::vector
<BasicBlock
*> NewBlocks
;
603 SmallDenseMap
<const Loop
*, Loop
*, 4> NewLoops
;
606 for (LoopBlocksDFS::RPOIterator BB
= BlockBegin
; BB
!= BlockEnd
; ++BB
) {
607 ValueToValueMapTy VMap
;
608 BasicBlock
*New
= CloneBasicBlock(*BB
, VMap
, "." + Twine(It
));
609 Header
->getParent()->getBasicBlockList().push_back(New
);
611 assert((*BB
!= Header
|| LI
->getLoopFor(*BB
) == L
) &&
612 "Header should not be in a sub-loop");
613 // Tell LI about New.
614 const Loop
*OldLoop
= addClonedBlockToLoopInfo(*BB
, New
, LI
, NewLoops
);
616 LoopsToSimplify
.insert(NewLoops
[OldLoop
]);
619 // Loop over all of the PHI nodes in the block, changing them to use
620 // the incoming values from the previous block.
621 for (PHINode
*OrigPHI
: OrigPHINode
) {
622 PHINode
*NewPHI
= cast
<PHINode
>(VMap
[OrigPHI
]);
623 Value
*InVal
= NewPHI
->getIncomingValueForBlock(LatchBlock
);
624 if (Instruction
*InValI
= dyn_cast
<Instruction
>(InVal
))
625 if (It
> 1 && L
->contains(InValI
))
626 InVal
= LastValueMap
[InValI
];
627 VMap
[OrigPHI
] = InVal
;
628 New
->getInstList().erase(NewPHI
);
631 // Update our running map of newest clones
632 LastValueMap
[*BB
] = New
;
633 for (ValueToValueMapTy::iterator VI
= VMap
.begin(), VE
= VMap
.end();
635 LastValueMap
[VI
->first
] = VI
->second
;
637 // Add phi entries for newly created values to all exit blocks.
638 for (BasicBlock
*Succ
: successors(*BB
)) {
639 if (L
->contains(Succ
))
641 for (PHINode
&PHI
: Succ
->phis()) {
642 Value
*Incoming
= PHI
.getIncomingValueForBlock(*BB
);
643 ValueToValueMapTy::iterator It
= LastValueMap
.find(Incoming
);
644 if (It
!= LastValueMap
.end())
645 Incoming
= It
->second
;
646 PHI
.addIncoming(Incoming
, New
);
649 // Keep track of new headers and latches as we create them, so that
650 // we can insert the proper branches later.
652 Headers
.push_back(New
);
653 if (*BB
== LatchBlock
)
654 Latches
.push_back(New
);
656 // Keep track of the successor of the new header in the current iteration.
657 for (auto *Pred
: predecessors(*BB
))
658 if (Pred
== Header
) {
659 HeaderSucc
.push_back(New
);
663 NewBlocks
.push_back(New
);
664 UnrolledLoopBlocks
.push_back(New
);
666 // Update DomTree: since we just copy the loop body, and each copy has a
667 // dedicated entry block (copy of the header block), this header's copy
668 // dominates all copied blocks. That means, dominance relations in the
669 // copied body are the same as in the original body.
672 DT
->addNewBlock(New
, Latches
[It
- 1]);
674 auto BBDomNode
= DT
->getNode(*BB
);
675 auto BBIDom
= BBDomNode
->getIDom();
676 BasicBlock
*OriginalBBIDom
= BBIDom
->getBlock();
678 New
, cast
<BasicBlock
>(LastValueMap
[cast
<Value
>(OriginalBBIDom
)]));
683 // Remap all instructions in the most recent iteration
684 for (BasicBlock
*NewBlock
: NewBlocks
) {
685 for (Instruction
&I
: *NewBlock
) {
686 ::remapInstruction(&I
, LastValueMap
);
687 if (auto *II
= dyn_cast
<IntrinsicInst
>(&I
))
688 if (II
->getIntrinsicID() == Intrinsic::assume
)
689 AC
->registerAssumption(II
);
694 // Loop over the PHI nodes in the original block, setting incoming values.
695 for (PHINode
*PN
: OrigPHINode
) {
696 if (CompletelyUnroll
) {
697 PN
->replaceAllUsesWith(PN
->getIncomingValueForBlock(Preheader
));
698 Header
->getInstList().erase(PN
);
699 } else if (ULO
.Count
> 1) {
700 Value
*InVal
= PN
->removeIncomingValue(LatchBlock
, false);
701 // If this value was defined in the loop, take the value defined by the
702 // last iteration of the loop.
703 if (Instruction
*InValI
= dyn_cast
<Instruction
>(InVal
)) {
704 if (L
->contains(InValI
))
705 InVal
= LastValueMap
[InVal
];
707 assert(Latches
.back() == LastValueMap
[LatchBlock
] && "bad last latch");
708 PN
->addIncoming(InVal
, Latches
.back());
712 auto setDest
= [LoopExit
, ContinueOnTrue
](BasicBlock
*Src
, BasicBlock
*Dest
,
713 ArrayRef
<BasicBlock
*> NextBlocks
,
714 BasicBlock
*BlockInLoop
,
715 bool NeedConditional
) {
716 auto *Term
= cast
<BranchInst
>(Src
->getTerminator());
717 if (NeedConditional
) {
718 // Update the conditional branch's successor for the following
720 Term
->setSuccessor(!ContinueOnTrue
, Dest
);
722 // Remove phi operands at this loop exit
723 if (Dest
!= LoopExit
) {
724 BasicBlock
*BB
= Src
;
725 for (BasicBlock
*Succ
: successors(BB
)) {
726 // Preserve the incoming value from BB if we are jumping to the block
727 // in the current loop.
728 if (Succ
== BlockInLoop
)
730 for (PHINode
&Phi
: Succ
->phis())
731 Phi
.removeIncomingValue(BB
, false);
734 // Replace the conditional branch with an unconditional one.
735 BranchInst::Create(Dest
, Term
);
736 Term
->eraseFromParent();
740 // Now that all the basic blocks for the unrolled iterations are in place,
741 // set up the branches to connect them.
742 if (LatchIsExiting
) {
743 // Set up latches to branch to the new header in the unrolled iterations or
744 // the loop exit for the last latch in a fully unrolled loop.
745 for (unsigned i
= 0, e
= Latches
.size(); i
!= e
; ++i
) {
746 // The branch destination.
747 unsigned j
= (i
+ 1) % e
;
748 BasicBlock
*Dest
= Headers
[j
];
749 bool NeedConditional
= true;
751 if (RuntimeTripCount
&& j
!= 0) {
752 NeedConditional
= false;
755 // For a complete unroll, make the last iteration end with a branch
756 // to the exit block.
757 if (CompletelyUnroll
) {
760 // If using trip count upper bound to completely unroll, we need to keep
761 // the conditional branch except the last one because the loop may exit
762 // after any iteration.
763 assert(NeedConditional
&&
764 "NeedCondition cannot be modified by both complete "
765 "unrolling and runtime unrolling");
767 (ULO
.PreserveCondBr
&& j
&& !(ULO
.PreserveOnlyFirst
&& i
!= 0));
768 } else if (j
!= BreakoutTrip
&&
769 (ULO
.TripMultiple
== 0 || j
% ULO
.TripMultiple
!= 0)) {
770 // If we know the trip count or a multiple of it, we can safely use an
771 // unconditional branch for some iterations.
772 NeedConditional
= false;
775 setDest(Latches
[i
], Dest
, Headers
, Headers
[i
], NeedConditional
);
778 // Setup headers to branch to their new successors in the unrolled
780 for (unsigned i
= 0, e
= Headers
.size(); i
!= e
; ++i
) {
781 // The branch destination.
782 unsigned j
= (i
+ 1) % e
;
783 BasicBlock
*Dest
= HeaderSucc
[i
];
784 bool NeedConditional
= true;
786 if (RuntimeTripCount
&& j
!= 0)
787 NeedConditional
= false;
789 if (CompletelyUnroll
)
790 // We cannot drop the conditional branch for the last condition, as we
791 // may have to execute the loop body depending on the condition.
792 NeedConditional
= j
== 0 || ULO
.PreserveCondBr
;
793 else if (j
!= BreakoutTrip
&&
794 (ULO
.TripMultiple
== 0 || j
% ULO
.TripMultiple
!= 0))
795 // If we know the trip count or a multiple of it, we can safely use an
796 // unconditional branch for some iterations.
797 NeedConditional
= false;
799 setDest(Headers
[i
], Dest
, Headers
, HeaderSucc
[i
], NeedConditional
);
802 // Set up latches to branch to the new header in the unrolled iterations or
803 // the loop exit for the last latch in a fully unrolled loop.
805 for (unsigned i
= 0, e
= Latches
.size(); i
!= e
; ++i
) {
806 // The original branch was replicated in each unrolled iteration.
807 BranchInst
*Term
= cast
<BranchInst
>(Latches
[i
]->getTerminator());
809 // The branch destination.
810 unsigned j
= (i
+ 1) % e
;
811 BasicBlock
*Dest
= Headers
[j
];
813 // When completely unrolling, the last latch becomes unreachable.
814 if (CompletelyUnroll
&& j
== 0)
815 new UnreachableInst(Term
->getContext(), Term
);
817 // Replace the conditional branch with an unconditional one.
818 BranchInst::Create(Dest
, Term
);
820 Term
->eraseFromParent();
824 // Update dominators of blocks we might reach through exits.
825 // Immediate dominator of such block might change, because we add more
826 // routes which can lead to the exit: we can now reach it from the copied
828 if (DT
&& ULO
.Count
> 1) {
829 for (auto *BB
: OriginalLoopBlocks
) {
830 auto *BBDomNode
= DT
->getNode(BB
);
831 SmallVector
<BasicBlock
*, 16> ChildrenToUpdate
;
832 for (auto *ChildDomNode
: BBDomNode
->getChildren()) {
833 auto *ChildBB
= ChildDomNode
->getBlock();
834 if (!L
->contains(ChildBB
))
835 ChildrenToUpdate
.push_back(ChildBB
);
838 BasicBlock
*&TermBlock
= LatchIsExiting
? LatchBlock
: Header
;
839 auto &TermBlocks
= LatchIsExiting
? Latches
: Headers
;
840 if (BB
== TermBlock
) {
841 // The latch is special because we emit unconditional branches in
842 // some cases where the original loop contained a conditional branch.
843 // Since the latch is always at the bottom of the loop, if the latch
844 // dominated an exit before unrolling, the new dominator of that exit
845 // must also be a latch. Specifically, the dominator is the first
846 // latch which ends in a conditional branch, or the last latch if
847 // there is no such latch.
848 // For loops exiting from the header, we limit the supported loops
849 // to have a single exiting block.
850 NewIDom
= TermBlocks
.back();
851 for (BasicBlock
*Iter
: TermBlocks
) {
852 Instruction
*Term
= Iter
->getTerminator();
853 if (isa
<BranchInst
>(Term
) && cast
<BranchInst
>(Term
)->isConditional()) {
859 // The new idom of the block will be the nearest common dominator
860 // of all copies of the previous idom. This is equivalent to the
861 // nearest common dominator of the previous idom and the first latch,
862 // which dominates all copies of the previous idom.
863 NewIDom
= DT
->findNearestCommonDominator(BB
, LatchBlock
);
865 for (auto *ChildBB
: ChildrenToUpdate
)
866 DT
->changeImmediateDominator(ChildBB
, NewIDom
);
870 assert(!DT
|| !UnrollVerifyDomtree
||
871 DT
->verify(DominatorTree::VerificationLevel::Fast
));
873 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
874 // Merge adjacent basic blocks, if possible.
875 for (BasicBlock
*Latch
: Latches
) {
876 BranchInst
*Term
= dyn_cast
<BranchInst
>(Latch
->getTerminator());
878 (CompletelyUnroll
&& !LatchIsExiting
&& Latch
== Latches
.back())) &&
879 "Need a branch as terminator, except when fully unrolling with "
880 "unconditional latch");
881 if (Term
&& Term
->isUnconditional()) {
882 BasicBlock
*Dest
= Term
->getSuccessor(0);
883 BasicBlock
*Fold
= Dest
->getUniquePredecessor();
884 if (MergeBlockIntoPredecessor(Dest
, &DTU
, LI
)) {
885 // Dest has been folded into Fold. Update our worklists accordingly.
886 std::replace(Latches
.begin(), Latches
.end(), Dest
, Fold
);
887 UnrolledLoopBlocks
.erase(std::remove(UnrolledLoopBlocks
.begin(),
888 UnrolledLoopBlocks
.end(), Dest
),
889 UnrolledLoopBlocks
.end());
893 // Apply updates to the DomTree.
894 DT
= &DTU
.getDomTree();
896 // At this point, the code is well formed. We now simplify the unrolled loop,
897 // doing constant propagation and dead code elimination as we go.
898 simplifyLoopAfterUnroll(L
, !CompletelyUnroll
&& (ULO
.Count
> 1 || Peeled
), LI
,
901 NumCompletelyUnrolled
+= CompletelyUnroll
;
904 Loop
*OuterL
= L
->getParentLoop();
905 // Update LoopInfo if the loop is completely removed.
906 if (CompletelyUnroll
)
909 // After complete unrolling most of the blocks should be contained in OuterL.
910 // However, some of them might happen to be out of OuterL (e.g. if they
911 // precede a loop exit). In this case we might need to insert PHI nodes in
912 // order to preserve LCSSA form.
913 // We don't need to check this if we already know that we need to fix LCSSA
915 // TODO: For now we just recompute LCSSA for the outer loop in this case, but
916 // it should be possible to fix it in-place.
917 if (PreserveLCSSA
&& OuterL
&& CompletelyUnroll
&& !NeedToFixLCSSA
)
918 NeedToFixLCSSA
|= ::needToInsertPhisForLCSSA(OuterL
, UnrolledLoopBlocks
, LI
);
920 // If we have a pass and a DominatorTree we should re-simplify impacted loops
921 // to ensure subsequent analyses can rely on this form. We want to simplify
922 // at least one layer outside of the loop that was unrolled so that any
923 // changes to the parent loop exposed by the unrolling are considered.
926 // OuterL includes all loops for which we can break loop-simplify, so
927 // it's sufficient to simplify only it (it'll recursively simplify inner
929 if (NeedToFixLCSSA
) {
930 // LCSSA must be performed on the outermost affected loop. The unrolled
931 // loop's last loop latch is guaranteed to be in the outermost loop
932 // after LoopInfo's been updated by LoopInfo::erase.
933 Loop
*LatchLoop
= LI
->getLoopFor(Latches
.back());
934 Loop
*FixLCSSALoop
= OuterL
;
935 if (!FixLCSSALoop
->contains(LatchLoop
))
936 while (FixLCSSALoop
->getParentLoop() != LatchLoop
)
937 FixLCSSALoop
= FixLCSSALoop
->getParentLoop();
939 formLCSSARecursively(*FixLCSSALoop
, *DT
, LI
, SE
);
940 } else if (PreserveLCSSA
) {
941 assert(OuterL
->isLCSSAForm(*DT
) &&
942 "Loops should be in LCSSA form after loop-unroll.");
945 // TODO: That potentially might be compile-time expensive. We should try
946 // to fix the loop-simplified form incrementally.
947 simplifyLoop(OuterL
, DT
, LI
, SE
, AC
, nullptr, PreserveLCSSA
);
949 // Simplify loops for which we might've broken loop-simplify form.
950 for (Loop
*SubLoop
: LoopsToSimplify
)
951 simplifyLoop(SubLoop
, DT
, LI
, SE
, AC
, nullptr, PreserveLCSSA
);
955 return CompletelyUnroll
? LoopUnrollResult::FullyUnrolled
956 : LoopUnrollResult::PartiallyUnrolled
;
959 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
960 /// node with the given name (for example, "llvm.loop.unroll.count"). If no
961 /// such metadata node exists, then nullptr is returned.
962 MDNode
*llvm::GetUnrollMetadata(MDNode
*LoopID
, StringRef Name
) {
963 // First operand should refer to the loop id itself.
964 assert(LoopID
->getNumOperands() > 0 && "requires at least one operand");
965 assert(LoopID
->getOperand(0) == LoopID
&& "invalid loop id");
967 for (unsigned i
= 1, e
= LoopID
->getNumOperands(); i
< e
; ++i
) {
968 MDNode
*MD
= dyn_cast
<MDNode
>(LoopID
->getOperand(i
));
972 MDString
*S
= dyn_cast
<MDString
>(MD
->getOperand(0));
976 if (Name
.equals(S
->getString()))