[ARM] Rejig MVE load store tests. NFC
[llvm-core.git] / lib / Transforms / Utils / LCSSA.cpp
blob29e7c5260f46174736f15460320cd9945af104e3
1 //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass transforms loops by placing phi nodes at the end of the loops for
10 // all values that are live across the loop boundary. For example, it turns
11 // the left into the right code:
13 // for (...) for (...)
14 // if (c) if (c)
15 // X1 = ... X1 = ...
16 // else else
17 // X2 = ... X2 = ...
18 // X3 = phi(X1, X2) X3 = phi(X1, X2)
19 // ... = X3 + 4 X4 = phi(X3)
20 // ... = X4 + 4
22 // This is still valid LLVM; the extra phi nodes are purely redundant, and will
23 // be trivially eliminated by InstCombine. The major benefit of this
24 // transformation is that it makes many other loop optimizations, such as
25 // LoopUnswitching, simpler.
27 //===----------------------------------------------------------------------===//
29 #include "llvm/Transforms/Utils/LCSSA.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/BasicAliasAnalysis.h"
34 #include "llvm/Analysis/BranchProbabilityInfo.h"
35 #include "llvm/Analysis/GlobalsModRef.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/ScalarEvolution.h"
39 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/PredIteratorCache.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Transforms/Utils.h"
48 #include "llvm/Transforms/Utils/Local.h"
49 #include "llvm/Transforms/Utils/LoopUtils.h"
50 #include "llvm/Transforms/Utils/SSAUpdater.h"
51 using namespace llvm;
53 #define DEBUG_TYPE "lcssa"
55 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
57 #ifdef EXPENSIVE_CHECKS
58 static bool VerifyLoopLCSSA = true;
59 #else
60 static bool VerifyLoopLCSSA = false;
61 #endif
62 static cl::opt<bool, true>
63 VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
64 cl::Hidden,
65 cl::desc("Verify loop lcssa form (time consuming)"));
67 /// Return true if the specified block is in the list.
68 static bool isExitBlock(BasicBlock *BB,
69 const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
70 return is_contained(ExitBlocks, BB);
73 /// For every instruction from the worklist, check to see if it has any uses
74 /// that are outside the current loop. If so, insert LCSSA PHI nodes and
75 /// rewrite the uses.
76 bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
77 DominatorTree &DT, LoopInfo &LI) {
78 SmallVector<Use *, 16> UsesToRewrite;
79 SmallSetVector<PHINode *, 16> PHIsToRemove;
80 PredIteratorCache PredCache;
81 bool Changed = false;
83 // Cache the Loop ExitBlocks across this loop. We expect to get a lot of
84 // instructions within the same loops, computing the exit blocks is
85 // expensive, and we're not mutating the loop structure.
86 SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
88 while (!Worklist.empty()) {
89 UsesToRewrite.clear();
91 Instruction *I = Worklist.pop_back_val();
92 assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
93 BasicBlock *InstBB = I->getParent();
94 Loop *L = LI.getLoopFor(InstBB);
95 assert(L && "Instruction belongs to a BB that's not part of a loop");
96 if (!LoopExitBlocks.count(L))
97 L->getExitBlocks(LoopExitBlocks[L]);
98 assert(LoopExitBlocks.count(L));
99 const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
101 if (ExitBlocks.empty())
102 continue;
104 for (Use &U : I->uses()) {
105 Instruction *User = cast<Instruction>(U.getUser());
106 BasicBlock *UserBB = User->getParent();
107 if (auto *PN = dyn_cast<PHINode>(User))
108 UserBB = PN->getIncomingBlock(U);
110 if (InstBB != UserBB && !L->contains(UserBB))
111 UsesToRewrite.push_back(&U);
114 // If there are no uses outside the loop, exit with no change.
115 if (UsesToRewrite.empty())
116 continue;
118 ++NumLCSSA; // We are applying the transformation
120 // Invoke instructions are special in that their result value is not
121 // available along their unwind edge. The code below tests to see whether
122 // DomBB dominates the value, so adjust DomBB to the normal destination
123 // block, which is effectively where the value is first usable.
124 BasicBlock *DomBB = InstBB;
125 if (auto *Inv = dyn_cast<InvokeInst>(I))
126 DomBB = Inv->getNormalDest();
128 DomTreeNode *DomNode = DT.getNode(DomBB);
130 SmallVector<PHINode *, 16> AddedPHIs;
131 SmallVector<PHINode *, 8> PostProcessPHIs;
133 SmallVector<PHINode *, 4> InsertedPHIs;
134 SSAUpdater SSAUpdate(&InsertedPHIs);
135 SSAUpdate.Initialize(I->getType(), I->getName());
137 // Insert the LCSSA phi's into all of the exit blocks dominated by the
138 // value, and add them to the Phi's map.
139 for (BasicBlock *ExitBB : ExitBlocks) {
140 if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
141 continue;
143 // If we already inserted something for this BB, don't reprocess it.
144 if (SSAUpdate.HasValueForBlock(ExitBB))
145 continue;
147 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
148 I->getName() + ".lcssa", &ExitBB->front());
149 // Get the debug location from the original instruction.
150 PN->setDebugLoc(I->getDebugLoc());
151 // Add inputs from inside the loop for this PHI.
152 for (BasicBlock *Pred : PredCache.get(ExitBB)) {
153 PN->addIncoming(I, Pred);
155 // If the exit block has a predecessor not within the loop, arrange for
156 // the incoming value use corresponding to that predecessor to be
157 // rewritten in terms of a different LCSSA PHI.
158 if (!L->contains(Pred))
159 UsesToRewrite.push_back(
160 &PN->getOperandUse(PN->getOperandNumForIncomingValue(
161 PN->getNumIncomingValues() - 1)));
164 AddedPHIs.push_back(PN);
166 // Remember that this phi makes the value alive in this block.
167 SSAUpdate.AddAvailableValue(ExitBB, PN);
169 // LoopSimplify might fail to simplify some loops (e.g. when indirect
170 // branches are involved). In such situations, it might happen that an
171 // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
172 // create PHIs in such an exit block, we are also inserting PHIs into L2's
173 // header. This could break LCSSA form for L2 because these inserted PHIs
174 // can also have uses outside of L2. Remember all PHIs in such situation
175 // as to revisit than later on. FIXME: Remove this if indirectbr support
176 // into LoopSimplify gets improved.
177 if (auto *OtherLoop = LI.getLoopFor(ExitBB))
178 if (!L->contains(OtherLoop))
179 PostProcessPHIs.push_back(PN);
182 // Rewrite all uses outside the loop in terms of the new PHIs we just
183 // inserted.
184 for (Use *UseToRewrite : UsesToRewrite) {
185 // If this use is in an exit block, rewrite to use the newly inserted PHI.
186 // This is required for correctness because SSAUpdate doesn't handle uses
187 // in the same block. It assumes the PHI we inserted is at the end of the
188 // block.
189 Instruction *User = cast<Instruction>(UseToRewrite->getUser());
190 BasicBlock *UserBB = User->getParent();
191 if (auto *PN = dyn_cast<PHINode>(User))
192 UserBB = PN->getIncomingBlock(*UseToRewrite);
194 if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
195 // Tell the VHs that the uses changed. This updates SCEV's caches.
196 if (UseToRewrite->get()->hasValueHandle())
197 ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
198 UseToRewrite->set(&UserBB->front());
199 continue;
202 // If we added a single PHI, it must dominate all uses and we can directly
203 // rename it.
204 if (AddedPHIs.size() == 1) {
205 // Tell the VHs that the uses changed. This updates SCEV's caches.
206 // We might call ValueIsRAUWd multiple times for the same value.
207 if (UseToRewrite->get()->hasValueHandle())
208 ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
209 UseToRewrite->set(AddedPHIs[0]);
210 continue;
213 // Otherwise, do full PHI insertion.
214 SSAUpdate.RewriteUse(*UseToRewrite);
217 SmallVector<DbgValueInst *, 4> DbgValues;
218 llvm::findDbgValues(DbgValues, I);
220 // Update pre-existing debug value uses that reside outside the loop.
221 auto &Ctx = I->getContext();
222 for (auto DVI : DbgValues) {
223 BasicBlock *UserBB = DVI->getParent();
224 if (InstBB == UserBB || L->contains(UserBB))
225 continue;
226 // We currently only handle debug values residing in blocks that were
227 // traversed while rewriting the uses. If we inserted just a single PHI,
228 // we will handle all relevant debug values.
229 Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
230 : SSAUpdate.FindValueForBlock(UserBB);
231 if (V)
232 DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
235 // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
236 // to post-process them to keep LCSSA form.
237 for (PHINode *InsertedPN : InsertedPHIs) {
238 if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
239 if (!L->contains(OtherLoop))
240 PostProcessPHIs.push_back(InsertedPN);
243 // Post process PHI instructions that were inserted into another disjoint
244 // loop and update their exits properly.
245 for (auto *PostProcessPN : PostProcessPHIs)
246 if (!PostProcessPN->use_empty())
247 Worklist.push_back(PostProcessPN);
249 // Keep track of PHI nodes that we want to remove because they did not have
250 // any uses rewritten. If the new PHI is used, store it so that we can
251 // try to propagate dbg.value intrinsics to it.
252 SmallVector<PHINode *, 2> NeedDbgValues;
253 for (PHINode *PN : AddedPHIs)
254 if (PN->use_empty())
255 PHIsToRemove.insert(PN);
256 else
257 NeedDbgValues.push_back(PN);
258 insertDebugValuesForPHIs(InstBB, NeedDbgValues);
259 Changed = true;
261 // Remove PHI nodes that did not have any uses rewritten. We need to redo the
262 // use_empty() check here, because even if the PHI node wasn't used when added
263 // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is
264 // not guaranteed to handle trees/cycles of PHI nodes that only are used by
265 // each other. Such situations has only been noticed when the input IR
266 // contains unreachable code, and leaving some extra redundant PHI nodes in
267 // such situations is considered a minor problem.
268 for (PHINode *PN : PHIsToRemove)
269 if (PN->use_empty())
270 PN->eraseFromParent();
271 return Changed;
274 // Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
275 static void computeBlocksDominatingExits(
276 Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
277 SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
278 SmallVector<BasicBlock *, 8> BBWorklist;
280 // We start from the exit blocks, as every block trivially dominates itself
281 // (not strictly).
282 for (BasicBlock *BB : ExitBlocks)
283 BBWorklist.push_back(BB);
285 while (!BBWorklist.empty()) {
286 BasicBlock *BB = BBWorklist.pop_back_val();
288 // Check if this is a loop header. If this is the case, we're done.
289 if (L.getHeader() == BB)
290 continue;
292 // Otherwise, add its immediate predecessor in the dominator tree to the
293 // worklist, unless we visited it already.
294 BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
296 // Exit blocks can have an immediate dominator not beloinging to the
297 // loop. For an exit block to be immediately dominated by another block
298 // outside the loop, it implies not all paths from that dominator, to the
299 // exit block, go through the loop.
300 // Example:
302 // |---- A
303 // | |
304 // | B<--
305 // | | |
306 // |---> C --
307 // |
308 // D
310 // C is the exit block of the loop and it's immediately dominated by A,
311 // which doesn't belong to the loop.
312 if (!L.contains(IDomBB))
313 continue;
315 if (BlocksDominatingExits.insert(IDomBB))
316 BBWorklist.push_back(IDomBB);
320 bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
321 ScalarEvolution *SE) {
322 bool Changed = false;
324 #ifdef EXPENSIVE_CHECKS
325 // Verify all sub-loops are in LCSSA form already.
326 for (Loop *SubLoop: L)
327 assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
328 #endif
330 SmallVector<BasicBlock *, 8> ExitBlocks;
331 L.getExitBlocks(ExitBlocks);
332 if (ExitBlocks.empty())
333 return false;
335 SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
337 // We want to avoid use-scanning leveraging dominance informations.
338 // If a block doesn't dominate any of the loop exits, the none of the values
339 // defined in the loop can be used outside.
340 // We compute the set of blocks fullfilling the conditions in advance
341 // walking the dominator tree upwards until we hit a loop header.
342 computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
344 SmallVector<Instruction *, 8> Worklist;
346 // Look at all the instructions in the loop, checking to see if they have uses
347 // outside the loop. If so, put them into the worklist to rewrite those uses.
348 for (BasicBlock *BB : BlocksDominatingExits) {
349 // Skip blocks that are part of any sub-loops, they must be in LCSSA
350 // already.
351 if (LI->getLoopFor(BB) != &L)
352 continue;
353 for (Instruction &I : *BB) {
354 // Reject two common cases fast: instructions with no uses (like stores)
355 // and instructions with one use that is in the same block as this.
356 if (I.use_empty() ||
357 (I.hasOneUse() && I.user_back()->getParent() == BB &&
358 !isa<PHINode>(I.user_back())))
359 continue;
361 // Tokens cannot be used in PHI nodes, so we skip over them.
362 // We can run into tokens which are live out of a loop with catchswitch
363 // instructions in Windows EH if the catchswitch has one catchpad which
364 // is inside the loop and another which is not.
365 if (I.getType()->isTokenTy())
366 continue;
368 Worklist.push_back(&I);
371 Changed = formLCSSAForInstructions(Worklist, DT, *LI);
373 // If we modified the code, remove any caches about the loop from SCEV to
374 // avoid dangling entries.
375 // FIXME: This is a big hammer, can we clear the cache more selectively?
376 if (SE && Changed)
377 SE->forgetLoop(&L);
379 assert(L.isLCSSAForm(DT));
381 return Changed;
384 /// Process a loop nest depth first.
385 bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
386 ScalarEvolution *SE) {
387 bool Changed = false;
389 // Recurse depth-first through inner loops.
390 for (Loop *SubLoop : L.getSubLoops())
391 Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
393 Changed |= formLCSSA(L, DT, LI, SE);
394 return Changed;
397 /// Process all loops in the function, inner-most out.
398 static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
399 ScalarEvolution *SE) {
400 bool Changed = false;
401 for (auto &L : *LI)
402 Changed |= formLCSSARecursively(*L, DT, LI, SE);
403 return Changed;
406 namespace {
407 struct LCSSAWrapperPass : public FunctionPass {
408 static char ID; // Pass identification, replacement for typeid
409 LCSSAWrapperPass() : FunctionPass(ID) {
410 initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
413 // Cached analysis information for the current function.
414 DominatorTree *DT;
415 LoopInfo *LI;
416 ScalarEvolution *SE;
418 bool runOnFunction(Function &F) override;
419 void verifyAnalysis() const override {
420 // This check is very expensive. On the loop intensive compiles it may cause
421 // up to 10x slowdown. Currently it's disabled by default. LPPassManager
422 // always does limited form of the LCSSA verification. Similar reasoning
423 // was used for the LoopInfo verifier.
424 if (VerifyLoopLCSSA) {
425 assert(all_of(*LI,
426 [&](Loop *L) {
427 return L->isRecursivelyLCSSAForm(*DT, *LI);
428 }) &&
429 "LCSSA form is broken!");
433 /// This transformation requires natural loop information & requires that
434 /// loop preheaders be inserted into the CFG. It maintains both of these,
435 /// as well as the CFG. It also requires dominator information.
436 void getAnalysisUsage(AnalysisUsage &AU) const override {
437 AU.setPreservesCFG();
439 AU.addRequired<DominatorTreeWrapperPass>();
440 AU.addRequired<LoopInfoWrapperPass>();
441 AU.addPreservedID(LoopSimplifyID);
442 AU.addPreserved<AAResultsWrapperPass>();
443 AU.addPreserved<BasicAAWrapperPass>();
444 AU.addPreserved<GlobalsAAWrapperPass>();
445 AU.addPreserved<ScalarEvolutionWrapperPass>();
446 AU.addPreserved<SCEVAAWrapperPass>();
447 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
448 AU.addPreserved<MemorySSAWrapperPass>();
450 // This is needed to perform LCSSA verification inside LPPassManager
451 AU.addRequired<LCSSAVerificationPass>();
452 AU.addPreserved<LCSSAVerificationPass>();
457 char LCSSAWrapperPass::ID = 0;
458 INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
459 false, false)
460 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
461 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
462 INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
463 INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
464 false, false)
466 Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
467 char &llvm::LCSSAID = LCSSAWrapperPass::ID;
469 /// Transform \p F into loop-closed SSA form.
470 bool LCSSAWrapperPass::runOnFunction(Function &F) {
471 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
472 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
473 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
474 SE = SEWP ? &SEWP->getSE() : nullptr;
476 return formLCSSAOnAllLoops(LI, *DT, SE);
479 PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
480 auto &LI = AM.getResult<LoopAnalysis>(F);
481 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
482 auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
483 if (!formLCSSAOnAllLoops(&LI, DT, SE))
484 return PreservedAnalyses::all();
486 PreservedAnalyses PA;
487 PA.preserveSet<CFGAnalyses>();
488 PA.preserve<BasicAA>();
489 PA.preserve<GlobalsAA>();
490 PA.preserve<SCEVAA>();
491 PA.preserve<ScalarEvolutionAnalysis>();
492 // BPI maps terminators to probabilities, since we don't modify the CFG, no
493 // updates are needed to preserve it.
494 PA.preserve<BranchProbabilityAnalysis>();
495 PA.preserve<MemorySSAAnalysis>();
496 return PA;