[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Transforms / Scalar / LoopDeletion.cpp
blobbfe9374cf2f8c863ea9c8cf24c66983819538650
1 //===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Dead Loop Deletion Pass. This pass is responsible
10 // for eliminating loops with non-infinite computable trip counts that have no
11 // side effects or volatile instructions, and do not contribute to the
12 // computation of the function's return value.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Scalar/LoopDeletion.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CFG.h"
20 #include "llvm/Analysis/InstructionSimplify.h"
21 #include "llvm/Analysis/LoopIterator.h"
22 #include "llvm/Analysis/LoopPass.h"
23 #include "llvm/Analysis/MemorySSA.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/ScalarEvolution.h"
26 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/PatternMatch.h"
29 #include "llvm/Transforms/Scalar/LoopPassManager.h"
30 #include "llvm/Transforms/Utils/LoopUtils.h"
32 using namespace llvm;
34 #define DEBUG_TYPE "loop-delete"
36 STATISTIC(NumDeleted, "Number of loops deleted");
37 STATISTIC(NumBackedgesBroken,
38 "Number of loops for which we managed to break the backedge");
40 static cl::opt<bool> EnableSymbolicExecution(
41 "loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
42 cl::desc("Break backedge through symbolic execution of 1st iteration "
43 "attempting to prove that the backedge is never taken"));
45 enum class LoopDeletionResult {
46 Unmodified,
47 Modified,
48 Deleted,
51 static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
52 if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
53 return LoopDeletionResult::Deleted;
54 if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
55 return LoopDeletionResult::Modified;
56 return LoopDeletionResult::Unmodified;
59 /// Determines if a loop is dead.
60 ///
61 /// This assumes that we've already checked for unique exit and exiting blocks,
62 /// and that the code is in LCSSA form.
63 static bool isLoopDead(Loop *L, ScalarEvolution &SE,
64 SmallVectorImpl<BasicBlock *> &ExitingBlocks,
65 BasicBlock *ExitBlock, bool &Changed,
66 BasicBlock *Preheader, LoopInfo &LI) {
67 // Make sure that all PHI entries coming from the loop are loop invariant.
68 // Because the code is in LCSSA form, any values used outside of the loop
69 // must pass through a PHI in the exit block, meaning that this check is
70 // sufficient to guarantee that no loop-variant values are used outside
71 // of the loop.
72 bool AllEntriesInvariant = true;
73 bool AllOutgoingValuesSame = true;
74 if (ExitBlock) {
75 for (PHINode &P : ExitBlock->phis()) {
76 Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
78 // Make sure all exiting blocks produce the same incoming value for the
79 // block. If there are different incoming values for different exiting
80 // blocks, then it is impossible to statically determine which value
81 // should be used.
82 AllOutgoingValuesSame =
83 all_of(ArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
84 return incoming == P.getIncomingValueForBlock(BB);
85 });
87 if (!AllOutgoingValuesSame)
88 break;
90 if (Instruction *I = dyn_cast<Instruction>(incoming)) {
91 if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator(),
92 /*MSSAU=*/nullptr, &SE)) {
93 AllEntriesInvariant = false;
94 break;
100 if (!AllEntriesInvariant || !AllOutgoingValuesSame)
101 return false;
103 // Make sure that no instructions in the block have potential side-effects.
104 // This includes instructions that could write to memory, and loads that are
105 // marked volatile.
106 for (const auto &I : L->blocks())
107 if (any_of(*I, [](Instruction &I) {
108 return I.mayHaveSideEffects() && !I.isDroppable();
110 return false;
112 // The loop or any of its sub-loops looping infinitely is legal. The loop can
113 // only be considered dead if either
114 // a. the function is mustprogress.
115 // b. all (sub-)loops are mustprogress or have a known trip-count.
116 if (L->getHeader()->getParent()->mustProgress())
117 return true;
119 LoopBlocksRPO RPOT(L);
120 RPOT.perform(&LI);
121 // If the loop contains an irreducible cycle, it may loop infinitely.
122 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
123 return false;
125 SmallVector<Loop *, 8> WorkList;
126 WorkList.push_back(L);
127 while (!WorkList.empty()) {
128 Loop *Current = WorkList.pop_back_val();
129 if (hasMustProgress(Current))
130 continue;
132 const SCEV *S = SE.getConstantMaxBackedgeTakenCount(Current);
133 if (isa<SCEVCouldNotCompute>(S)) {
134 LLVM_DEBUG(
135 dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
136 "not required to make progress.\n");
137 return false;
139 WorkList.append(Current->begin(), Current->end());
141 return true;
144 /// This function returns true if there is no viable path from the
145 /// entry block to the header of \p L. Right now, it only does
146 /// a local search to save compile time.
147 static bool isLoopNeverExecuted(Loop *L) {
148 using namespace PatternMatch;
150 auto *Preheader = L->getLoopPreheader();
151 // TODO: We can relax this constraint, since we just need a loop
152 // predecessor.
153 assert(Preheader && "Needs preheader!");
155 if (Preheader->isEntryBlock())
156 return false;
157 // All predecessors of the preheader should have a constant conditional
158 // branch, with the loop's preheader as not-taken.
159 for (auto *Pred: predecessors(Preheader)) {
160 BasicBlock *Taken, *NotTaken;
161 ConstantInt *Cond;
162 if (!match(Pred->getTerminator(),
163 m_Br(m_ConstantInt(Cond), Taken, NotTaken)))
164 return false;
165 if (!Cond->getZExtValue())
166 std::swap(Taken, NotTaken);
167 if (Taken == Preheader)
168 return false;
170 assert(!pred_empty(Preheader) &&
171 "Preheader should have predecessors at this point!");
172 // All the predecessors have the loop preheader as not-taken target.
173 return true;
176 static Value *
177 getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
178 const SimplifyQuery &SQ) {
179 // Quick hack: do not flood cache with non-instruction values.
180 if (!isa<Instruction>(V))
181 return V;
182 // Do we already know cached result?
183 auto Existing = FirstIterValue.find(V);
184 if (Existing != FirstIterValue.end())
185 return Existing->second;
186 Value *FirstIterV = nullptr;
187 if (auto *BO = dyn_cast<BinaryOperator>(V)) {
188 Value *LHS =
189 getValueOnFirstIteration(BO->getOperand(0), FirstIterValue, SQ);
190 Value *RHS =
191 getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
192 FirstIterV = simplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
193 } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
194 Value *LHS =
195 getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
196 Value *RHS =
197 getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
198 FirstIterV = simplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
199 } else if (auto *Select = dyn_cast<SelectInst>(V)) {
200 Value *Cond =
201 getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
202 if (auto *C = dyn_cast<ConstantInt>(Cond)) {
203 auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
204 : Select->getFalseValue();
205 FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
208 if (!FirstIterV)
209 FirstIterV = V;
210 FirstIterValue[V] = FirstIterV;
211 return FirstIterV;
214 // Try to prove that one of conditions that dominates the latch must exit on 1st
215 // iteration.
216 static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
217 LoopInfo &LI) {
218 // Disabled by option.
219 if (!EnableSymbolicExecution)
220 return false;
222 BasicBlock *Predecessor = L->getLoopPredecessor();
223 BasicBlock *Latch = L->getLoopLatch();
225 if (!Predecessor || !Latch)
226 return false;
228 LoopBlocksRPO RPOT(L);
229 RPOT.perform(&LI);
231 // For the optimization to be correct, we need RPOT to have a property that
232 // each block is processed after all its predecessors, which may only be
233 // violated for headers of the current loop and all nested loops. Irreducible
234 // CFG provides multiple ways to break this assumption, so we do not want to
235 // deal with it.
236 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
237 return false;
239 BasicBlock *Header = L->getHeader();
240 // Blocks that are reachable on the 1st iteration.
241 SmallPtrSet<BasicBlock *, 4> LiveBlocks;
242 // Edges that are reachable on the 1st iteration.
243 DenseSet<BasicBlockEdge> LiveEdges;
244 LiveBlocks.insert(Header);
246 SmallPtrSet<BasicBlock *, 4> Visited;
247 auto MarkLiveEdge = [&](BasicBlock *From, BasicBlock *To) {
248 assert(LiveBlocks.count(From) && "Must be live!");
249 assert((LI.isLoopHeader(To) || !Visited.count(To)) &&
250 "Only canonical backedges are allowed. Irreducible CFG?");
251 assert((LiveBlocks.count(To) || !Visited.count(To)) &&
252 "We already discarded this block as dead!");
253 LiveBlocks.insert(To);
254 LiveEdges.insert({ From, To });
257 auto MarkAllSuccessorsLive = [&](BasicBlock *BB) {
258 for (auto *Succ : successors(BB))
259 MarkLiveEdge(BB, Succ);
262 // Check if there is only one value coming from all live predecessor blocks.
263 // Note that because we iterate in RPOT, we have already visited all its
264 // (non-latch) predecessors.
265 auto GetSoleInputOnFirstIteration = [&](PHINode & PN)->Value * {
266 BasicBlock *BB = PN.getParent();
267 bool HasLivePreds = false;
268 (void)HasLivePreds;
269 if (BB == Header)
270 return PN.getIncomingValueForBlock(Predecessor);
271 Value *OnlyInput = nullptr;
272 for (auto *Pred : predecessors(BB))
273 if (LiveEdges.count({ Pred, BB })) {
274 HasLivePreds = true;
275 Value *Incoming = PN.getIncomingValueForBlock(Pred);
276 // Skip undefs. If they are present, we can assume they are equal to
277 // the non-undef input.
278 if (isa<UndefValue>(Incoming))
279 continue;
280 // Two inputs.
281 if (OnlyInput && OnlyInput != Incoming)
282 return nullptr;
283 OnlyInput = Incoming;
286 assert(HasLivePreds && "No live predecessors?");
287 // If all incoming live value were undefs, return undef.
288 return OnlyInput ? OnlyInput : UndefValue::get(PN.getType());
290 DenseMap<Value *, Value *> FirstIterValue;
292 // Use the following algorithm to prove we never take the latch on the 1st
293 // iteration:
294 // 1. Traverse in topological order, so that whenever we visit a block, all
295 // its predecessors are already visited.
296 // 2. If we can prove that the block may have only 1 predecessor on the 1st
297 // iteration, map all its phis onto input from this predecessor.
298 // 3a. If we can prove which successor of out block is taken on the 1st
299 // iteration, mark this successor live.
300 // 3b. If we cannot prove it, conservatively assume that all successors are
301 // live.
302 auto &DL = Header->getModule()->getDataLayout();
303 const SimplifyQuery SQ(DL);
304 for (auto *BB : RPOT) {
305 Visited.insert(BB);
307 // This block is not reachable on the 1st iterations.
308 if (!LiveBlocks.count(BB))
309 continue;
311 // Skip inner loops.
312 if (LI.getLoopFor(BB) != L) {
313 MarkAllSuccessorsLive(BB);
314 continue;
317 // If Phi has only one input from all live input blocks, use it.
318 for (auto &PN : BB->phis()) {
319 if (!PN.getType()->isIntegerTy())
320 continue;
321 auto *Incoming = GetSoleInputOnFirstIteration(PN);
322 if (Incoming && DT.dominates(Incoming, BB->getTerminator())) {
323 Value *FirstIterV =
324 getValueOnFirstIteration(Incoming, FirstIterValue, SQ);
325 FirstIterValue[&PN] = FirstIterV;
329 using namespace PatternMatch;
330 Value *Cond;
331 BasicBlock *IfTrue, *IfFalse;
332 auto *Term = BB->getTerminator();
333 if (match(Term, m_Br(m_Value(Cond),
334 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
335 auto *ICmp = dyn_cast<ICmpInst>(Cond);
336 if (!ICmp || !ICmp->getType()->isIntegerTy()) {
337 MarkAllSuccessorsLive(BB);
338 continue;
341 // Can we prove constant true or false for this condition?
342 auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
343 if (KnownCondition == ICmp) {
344 // Failed to simplify.
345 MarkAllSuccessorsLive(BB);
346 continue;
348 if (isa<UndefValue>(KnownCondition)) {
349 // TODO: According to langref, branching by undef is undefined behavior.
350 // It means that, theoretically, we should be able to just continue
351 // without marking any successors as live. However, we are not certain
352 // how correct our compiler is at handling such cases. So we are being
353 // very conservative here.
355 // If there is a non-loop successor, always assume this branch leaves the
356 // loop. Otherwise, arbitrarily take IfTrue.
358 // Once we are certain that branching by undef is handled correctly by
359 // other transforms, we should not mark any successors live here.
360 if (L->contains(IfTrue) && L->contains(IfFalse))
361 MarkLiveEdge(BB, IfTrue);
362 continue;
364 auto *ConstCondition = dyn_cast<ConstantInt>(KnownCondition);
365 if (!ConstCondition) {
366 // Non-constant condition, cannot analyze any further.
367 MarkAllSuccessorsLive(BB);
368 continue;
370 if (ConstCondition->isAllOnesValue())
371 MarkLiveEdge(BB, IfTrue);
372 else
373 MarkLiveEdge(BB, IfFalse);
374 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
375 auto *SwitchValue = SI->getCondition();
376 auto *SwitchValueOnFirstIter =
377 getValueOnFirstIteration(SwitchValue, FirstIterValue, SQ);
378 auto *ConstSwitchValue = dyn_cast<ConstantInt>(SwitchValueOnFirstIter);
379 if (!ConstSwitchValue) {
380 MarkAllSuccessorsLive(BB);
381 continue;
383 auto CaseIterator = SI->findCaseValue(ConstSwitchValue);
384 MarkLiveEdge(BB, CaseIterator->getCaseSuccessor());
385 } else {
386 MarkAllSuccessorsLive(BB);
387 continue;
391 // We can break the latch if it wasn't live.
392 return !LiveEdges.count({ Latch, Header });
395 /// If we can prove the backedge is untaken, remove it. This destroys the
396 /// loop, but leaves the (now trivially loop invariant) control flow and
397 /// side effects (if any) in place.
398 static LoopDeletionResult
399 breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
400 LoopInfo &LI, MemorySSA *MSSA,
401 OptimizationRemarkEmitter &ORE) {
402 assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
404 if (!L->getLoopLatch())
405 return LoopDeletionResult::Unmodified;
407 auto *BTCMax = SE.getConstantMaxBackedgeTakenCount(L);
408 if (!BTCMax->isZero()) {
409 auto *BTC = SE.getBackedgeTakenCount(L);
410 if (!BTC->isZero()) {
411 if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
412 return LoopDeletionResult::Unmodified;
413 if (!canProveExitOnFirstIteration(L, DT, LI))
414 return LoopDeletionResult::Unmodified;
417 ++NumBackedgesBroken;
418 breakLoopBackedge(L, DT, SE, LI, MSSA);
419 return LoopDeletionResult::Deleted;
422 /// Remove a loop if it is dead.
424 /// A loop is considered dead either if it does not impact the observable
425 /// behavior of the program other than finite running time, or if it is
426 /// required to make progress by an attribute such as 'mustprogress' or
427 /// 'llvm.loop.mustprogress' and does not make any. This may remove
428 /// infinite loops that have been required to make progress.
430 /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
431 /// order to make various safety checks work.
433 /// \returns true if any changes were made. This may mutate the loop even if it
434 /// is unable to delete it due to hoisting trivially loop invariant
435 /// instructions out of the loop.
436 static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
437 ScalarEvolution &SE, LoopInfo &LI,
438 MemorySSA *MSSA,
439 OptimizationRemarkEmitter &ORE) {
440 assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
442 // We can only remove the loop if there is a preheader that we can branch from
443 // after removing it. Also, if LoopSimplify form is not available, stay out
444 // of trouble.
445 BasicBlock *Preheader = L->getLoopPreheader();
446 if (!Preheader || !L->hasDedicatedExits()) {
447 LLVM_DEBUG(
448 dbgs()
449 << "Deletion requires Loop with preheader and dedicated exits.\n");
450 return LoopDeletionResult::Unmodified;
453 BasicBlock *ExitBlock = L->getUniqueExitBlock();
455 // We can't directly branch to an EH pad. Don't bother handling this edge
456 // case.
457 if (ExitBlock && ExitBlock->isEHPad()) {
458 LLVM_DEBUG(dbgs() << "Cannot delete loop exiting to EH pad.\n");
459 return LoopDeletionResult::Unmodified;
462 if (ExitBlock && isLoopNeverExecuted(L)) {
463 LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!\n");
464 // We need to forget the loop before setting the incoming values of the exit
465 // phis to poison, so we properly invalidate the SCEV expressions for those
466 // phis.
467 SE.forgetLoop(L);
468 // Set incoming value to poison for phi nodes in the exit block.
469 for (PHINode &P : ExitBlock->phis()) {
470 std::fill(P.incoming_values().begin(), P.incoming_values().end(),
471 PoisonValue::get(P.getType()));
473 ORE.emit([&]() {
474 return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(),
475 L->getHeader())
476 << "Loop deleted because it never executes";
478 deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
479 ++NumDeleted;
480 return LoopDeletionResult::Deleted;
483 // The remaining checks below are for a loop being dead because all statements
484 // in the loop are invariant.
485 SmallVector<BasicBlock *, 4> ExitingBlocks;
486 L->getExitingBlocks(ExitingBlocks);
488 // We require that the loop has at most one exit block. Otherwise, we'd be in
489 // the situation of needing to be able to solve statically which exit block
490 // will be branched to, or trying to preserve the branching logic in a loop
491 // invariant manner.
492 if (!ExitBlock && !L->hasNoExitBlocks()) {
493 LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
494 return LoopDeletionResult::Unmodified;
497 // Finally, we have to check that the loop really is dead.
498 bool Changed = false;
499 if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader, LI)) {
500 LLVM_DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
501 return Changed ? LoopDeletionResult::Modified
502 : LoopDeletionResult::Unmodified;
505 LLVM_DEBUG(dbgs() << "Loop is invariant, delete it!\n");
506 ORE.emit([&]() {
507 return OptimizationRemark(DEBUG_TYPE, "Invariant", L->getStartLoc(),
508 L->getHeader())
509 << "Loop deleted because it is invariant";
511 deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
512 ++NumDeleted;
514 return LoopDeletionResult::Deleted;
517 PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
518 LoopStandardAnalysisResults &AR,
519 LPMUpdater &Updater) {
521 LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: ");
522 LLVM_DEBUG(L.dump());
523 std::string LoopName = std::string(L.getName());
524 // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
525 // pass. Function analyses need to be preserved across loop transformations
526 // but ORE cannot be preserved (see comment before the pass definition).
527 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
528 auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
530 // If we can prove the backedge isn't taken, just break it and be done. This
531 // leaves the loop structure in place which means it can handle dispatching
532 // to the right exit based on whatever loop invariant structure remains.
533 if (Result != LoopDeletionResult::Deleted)
534 Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
535 AR.MSSA, ORE));
537 if (Result == LoopDeletionResult::Unmodified)
538 return PreservedAnalyses::all();
540 if (Result == LoopDeletionResult::Deleted)
541 Updater.markLoopAsDeleted(L, LoopName);
543 auto PA = getLoopPassPreservedAnalyses();
544 if (AR.MSSA)
545 PA.preserve<MemorySSAAnalysis>();
546 return PA;