[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Analysis / LoopNestAnalysis.cpp
blobfe6d270b9ac53cd65201b28c21fdb39b4ada9df3
1 //===- LoopNestAnalysis.cpp - Loop Nest Analysis --------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// The implementation for the loop nest analysis.
11 ///
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/LoopNestAnalysis.h"
15 #include "llvm/ADT/BreadthFirstIterator.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/Analysis/ValueTracking.h"
19 using namespace llvm;
21 #define DEBUG_TYPE "loopnest"
22 #ifndef NDEBUG
23 static const char *VerboseDebug = DEBUG_TYPE "-verbose";
24 #endif
26 /// Determine whether the loops structure violates basic requirements for
27 /// perfect nesting:
28 /// - the inner loop should be the outer loop's only child
29 /// - the outer loop header should 'flow' into the inner loop preheader
30 /// or jump around the inner loop to the outer loop latch
31 /// - if the inner loop latch exits the inner loop, it should 'flow' into
32 /// the outer loop latch.
33 /// Returns true if the loop structure satisfies the basic requirements and
34 /// false otherwise.
35 static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
36 ScalarEvolution &SE);
38 //===----------------------------------------------------------------------===//
39 // LoopNest implementation
42 LoopNest::LoopNest(Loop &Root, ScalarEvolution &SE)
43 : MaxPerfectDepth(getMaxPerfectDepth(Root, SE)) {
44 append_range(Loops, breadth_first(&Root));
47 std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root,
48 ScalarEvolution &SE) {
49 return std::make_unique<LoopNest>(Root, SE);
52 static CmpInst *getOuterLoopLatchCmp(const Loop &OuterLoop) {
54 const BasicBlock *Latch = OuterLoop.getLoopLatch();
55 assert(Latch && "Expecting a valid loop latch");
57 const BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
58 assert(BI && BI->isConditional() &&
59 "Expecting loop latch terminator to be a branch instruction");
61 CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(BI->getCondition());
62 DEBUG_WITH_TYPE(
63 VerboseDebug, if (OuterLoopLatchCmp) {
64 dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp
65 << "\n";
66 });
67 return OuterLoopLatchCmp;
70 static CmpInst *getInnerLoopGuardCmp(const Loop &InnerLoop) {
72 BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch();
73 CmpInst *InnerLoopGuardCmp =
74 (InnerGuard) ? dyn_cast<CmpInst>(InnerGuard->getCondition()) : nullptr;
76 DEBUG_WITH_TYPE(
77 VerboseDebug, if (InnerLoopGuardCmp) {
78 dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp
79 << "\n";
80 });
81 return InnerLoopGuardCmp;
84 static bool checkSafeInstruction(const Instruction &I,
85 const CmpInst *InnerLoopGuardCmp,
86 const CmpInst *OuterLoopLatchCmp,
87 std::optional<Loop::LoopBounds> OuterLoopLB) {
89 bool IsAllowed =
90 isSafeToSpeculativelyExecute(&I) || isa<PHINode>(I) || isa<BranchInst>(I);
91 if (!IsAllowed)
92 return false;
93 // The only binary instruction allowed is the outer loop step instruction,
94 // the only comparison instructions allowed are the inner loop guard
95 // compare instruction and the outer loop latch compare instruction.
96 if ((isa<BinaryOperator>(I) && &I != &OuterLoopLB->getStepInst()) ||
97 (isa<CmpInst>(I) && &I != OuterLoopLatchCmp && &I != InnerLoopGuardCmp)) {
98 return false;
100 return true;
103 bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
104 ScalarEvolution &SE) {
105 return (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE) ==
106 PerfectLoopNest);
109 LoopNest::LoopNestEnum LoopNest::analyzeLoopNestForPerfectNest(
110 const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) {
112 assert(!OuterLoop.isInnermost() && "Outer loop should have subloops");
113 assert(!InnerLoop.isOutermost() && "Inner loop should have a parent");
114 LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName()
115 << "' and '" << InnerLoop.getName()
116 << "' are perfectly nested.\n");
118 // Determine whether the loops structure satisfies the following requirements:
119 // - the inner loop should be the outer loop's only child
120 // - the outer loop header should 'flow' into the inner loop preheader
121 // or jump around the inner loop to the outer loop latch
122 // - if the inner loop latch exits the inner loop, it should 'flow' into
123 // the outer loop latch.
124 if (!checkLoopsStructure(OuterLoop, InnerLoop, SE)) {
125 LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure.\n");
126 return InvalidLoopStructure;
129 // Bail out if we cannot retrieve the outer loop bounds.
130 auto OuterLoopLB = OuterLoop.getBounds(SE);
131 if (OuterLoopLB == std::nullopt) {
132 LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: "
133 << OuterLoop << "\n";);
134 return OuterLoopLowerBoundUnknown;
137 CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop);
138 CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop);
140 // Determine whether instructions in a basic block are one of:
141 // - the inner loop guard comparison
142 // - the outer loop latch comparison
143 // - the outer loop induction variable increment
144 // - a phi node, a cast or a branch
145 auto containsOnlySafeInstructions = [&](const BasicBlock &BB) {
146 return llvm::all_of(BB, [&](const Instruction &I) {
147 bool IsSafeInstr = checkSafeInstruction(I, InnerLoopGuardCmp,
148 OuterLoopLatchCmp, OuterLoopLB);
149 if (IsSafeInstr) {
150 DEBUG_WITH_TYPE(VerboseDebug, {
151 dbgs() << "Instruction: " << I << "\nin basic block:" << BB
152 << "is unsafe.\n";
155 return IsSafeInstr;
159 // Check the code surrounding the inner loop for instructions that are deemed
160 // unsafe.
161 const BasicBlock *OuterLoopHeader = OuterLoop.getHeader();
162 const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch();
163 const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader();
165 if (!containsOnlySafeInstructions(*OuterLoopHeader) ||
166 !containsOnlySafeInstructions(*OuterLoopLatch) ||
167 (InnerLoopPreHeader != OuterLoopHeader &&
168 !containsOnlySafeInstructions(*InnerLoopPreHeader)) ||
169 !containsOnlySafeInstructions(*InnerLoop.getExitBlock())) {
170 LLVM_DEBUG(dbgs() << "Not perfectly nested: code surrounding inner loop is "
171 "unsafe\n";);
172 return ImperfectLoopNest;
175 LLVM_DEBUG(dbgs() << "Loop '" << OuterLoop.getName() << "' and '"
176 << InnerLoop.getName() << "' are perfectly nested.\n");
178 return PerfectLoopNest;
181 LoopNest::InstrVectorTy LoopNest::getInterveningInstructions(
182 const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) {
183 InstrVectorTy Instr;
184 switch (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE)) {
185 case PerfectLoopNest:
186 LLVM_DEBUG(dbgs() << "The loop Nest is Perfect, returning empty "
187 "instruction vector. \n";);
188 return Instr;
190 case InvalidLoopStructure:
191 LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure. "
192 "Instruction vector is empty.\n";);
193 return Instr;
195 case OuterLoopLowerBoundUnknown:
196 LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: "
197 << OuterLoop << "\nInstruction vector is empty.\n";);
198 return Instr;
200 case ImperfectLoopNest:
201 break;
204 // Identify the outer loop latch comparison instruction.
205 auto OuterLoopLB = OuterLoop.getBounds(SE);
207 CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop);
208 CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop);
210 auto GetUnsafeInstructions = [&](const BasicBlock &BB) {
211 for (const Instruction &I : BB) {
212 if (!checkSafeInstruction(I, InnerLoopGuardCmp, OuterLoopLatchCmp,
213 OuterLoopLB)) {
214 Instr.push_back(&I);
215 DEBUG_WITH_TYPE(VerboseDebug, {
216 dbgs() << "Instruction: " << I << "\nin basic block:" << BB
217 << "is unsafe.\n";
223 // Check the code surrounding the inner loop for instructions that are deemed
224 // unsafe.
225 const BasicBlock *OuterLoopHeader = OuterLoop.getHeader();
226 const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch();
227 const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader();
228 const BasicBlock *InnerLoopExitBlock = InnerLoop.getExitBlock();
230 GetUnsafeInstructions(*OuterLoopHeader);
231 GetUnsafeInstructions(*OuterLoopLatch);
232 GetUnsafeInstructions(*InnerLoopExitBlock);
234 if (InnerLoopPreHeader != OuterLoopHeader) {
235 GetUnsafeInstructions(*InnerLoopPreHeader);
237 return Instr;
240 SmallVector<LoopVectorTy, 4>
241 LoopNest::getPerfectLoops(ScalarEvolution &SE) const {
242 SmallVector<LoopVectorTy, 4> LV;
243 LoopVectorTy PerfectNest;
245 for (Loop *L : depth_first(const_cast<Loop *>(Loops.front()))) {
246 if (PerfectNest.empty())
247 PerfectNest.push_back(L);
249 auto &SubLoops = L->getSubLoops();
250 if (SubLoops.size() == 1 && arePerfectlyNested(*L, *SubLoops.front(), SE)) {
251 PerfectNest.push_back(SubLoops.front());
252 } else {
253 LV.push_back(PerfectNest);
254 PerfectNest.clear();
258 return LV;
261 unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) {
262 LLVM_DEBUG(dbgs() << "Get maximum perfect depth of loop nest rooted by loop '"
263 << Root.getName() << "'\n");
265 const Loop *CurrentLoop = &Root;
266 const auto *SubLoops = &CurrentLoop->getSubLoops();
267 unsigned CurrentDepth = 1;
269 while (SubLoops->size() == 1) {
270 const Loop *InnerLoop = SubLoops->front();
271 if (!arePerfectlyNested(*CurrentLoop, *InnerLoop, SE)) {
272 LLVM_DEBUG({
273 dbgs() << "Not a perfect nest: loop '" << CurrentLoop->getName()
274 << "' is not perfectly nested with loop '"
275 << InnerLoop->getName() << "'\n";
277 break;
280 CurrentLoop = InnerLoop;
281 SubLoops = &CurrentLoop->getSubLoops();
282 ++CurrentDepth;
285 return CurrentDepth;
288 const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From,
289 const BasicBlock *End,
290 bool CheckUniquePred) {
291 assert(From && "Expecting valid From");
292 assert(End && "Expecting valid End");
294 if (From == End || !From->getUniqueSuccessor())
295 return *From;
297 auto IsEmpty = [](const BasicBlock *BB) {
298 return (BB->size() == 1);
301 // Visited is used to avoid running into an infinite loop.
302 SmallPtrSet<const BasicBlock *, 4> Visited;
303 const BasicBlock *BB = From->getUniqueSuccessor();
304 const BasicBlock *PredBB = From;
305 while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB) &&
306 (!CheckUniquePred || BB->getUniquePredecessor())) {
307 Visited.insert(BB);
308 PredBB = BB;
309 BB = BB->getUniqueSuccessor();
312 return (BB == End) ? *End : *PredBB;
315 static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
316 ScalarEvolution &SE) {
317 // The inner loop must be the only outer loop's child.
318 if ((OuterLoop.getSubLoops().size() != 1) ||
319 (InnerLoop.getParentLoop() != &OuterLoop))
320 return false;
322 // We expect loops in normal form which have a preheader, header, latch...
323 if (!OuterLoop.isLoopSimplifyForm() || !InnerLoop.isLoopSimplifyForm())
324 return false;
326 const BasicBlock *OuterLoopHeader = OuterLoop.getHeader();
327 const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch();
328 const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader();
329 const BasicBlock *InnerLoopLatch = InnerLoop.getLoopLatch();
330 const BasicBlock *InnerLoopExit = InnerLoop.getExitBlock();
332 // We expect rotated loops. The inner loop should have a single exit block.
333 if (OuterLoop.getExitingBlock() != OuterLoopLatch ||
334 InnerLoop.getExitingBlock() != InnerLoopLatch || !InnerLoopExit)
335 return false;
337 // Returns whether the block `ExitBlock` contains at least one LCSSA Phi node.
338 auto ContainsLCSSAPhi = [](const BasicBlock &ExitBlock) {
339 return any_of(ExitBlock.phis(), [](const PHINode &PN) {
340 return PN.getNumIncomingValues() == 1;
344 // Returns whether the block `BB` qualifies for being an extra Phi block. The
345 // extra Phi block is the additional block inserted after the exit block of an
346 // "guarded" inner loop which contains "only" Phi nodes corresponding to the
347 // LCSSA Phi nodes in the exit block.
348 auto IsExtraPhiBlock = [&](const BasicBlock &BB) {
349 return BB.getFirstNonPHI() == BB.getTerminator() &&
350 all_of(BB.phis(), [&](const PHINode &PN) {
351 return all_of(PN.blocks(), [&](const BasicBlock *IncomingBlock) {
352 return IncomingBlock == InnerLoopExit ||
353 IncomingBlock == OuterLoopHeader;
358 const BasicBlock *ExtraPhiBlock = nullptr;
359 // Ensure the only branch that may exist between the loops is the inner loop
360 // guard.
361 if (OuterLoopHeader != InnerLoopPreHeader) {
362 const BasicBlock &SingleSucc =
363 LoopNest::skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader);
365 // no conditional branch present
366 if (&SingleSucc != InnerLoopPreHeader) {
367 const BranchInst *BI = dyn_cast<BranchInst>(SingleSucc.getTerminator());
369 if (!BI || BI != InnerLoop.getLoopGuardBranch())
370 return false;
372 bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit);
374 // The successors of the inner loop guard should be the inner loop
375 // preheader or the outer loop latch possibly through empty blocks.
376 for (const BasicBlock *Succ : BI->successors()) {
377 const BasicBlock *PotentialInnerPreHeader = Succ;
378 const BasicBlock *PotentialOuterLatch = Succ;
380 // Ensure the inner loop guard successor is empty before skipping
381 // blocks.
382 if (Succ->size() == 1) {
383 PotentialInnerPreHeader =
384 &LoopNest::skipEmptyBlockUntil(Succ, InnerLoopPreHeader);
385 PotentialOuterLatch =
386 &LoopNest::skipEmptyBlockUntil(Succ, OuterLoopLatch);
389 if (PotentialInnerPreHeader == InnerLoopPreHeader)
390 continue;
391 if (PotentialOuterLatch == OuterLoopLatch)
392 continue;
394 // If `InnerLoopExit` contains LCSSA Phi instructions, additional block
395 // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The
396 // loops are still considered perfectly nested if the extra block only
397 // contains Phi instructions from InnerLoopExit and OuterLoopHeader.
398 if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) &&
399 Succ->getSingleSuccessor() == OuterLoopLatch) {
400 // Points to the extra block so that we can reference it later in the
401 // final check. We can also conclude that the inner loop is
402 // guarded and there exists LCSSA Phi node in the exit block later if
403 // we see a non-null `ExtraPhiBlock`.
404 ExtraPhiBlock = Succ;
405 continue;
408 DEBUG_WITH_TYPE(VerboseDebug, {
409 dbgs() << "Inner loop guard successor " << Succ->getName()
410 << " doesn't lead to inner loop preheader or "
411 "outer loop latch.\n";
413 return false;
418 // Ensure the inner loop exit block lead to the outer loop latch possibly
419 // through empty blocks.
420 if ((!ExtraPhiBlock ||
421 &LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(),
422 ExtraPhiBlock) != ExtraPhiBlock) &&
423 (&LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(),
424 OuterLoopLatch) != OuterLoopLatch)) {
425 DEBUG_WITH_TYPE(
426 VerboseDebug,
427 dbgs() << "Inner loop exit block " << *InnerLoopExit
428 << " does not directly lead to the outer loop latch.\n";);
429 return false;
432 return true;
435 AnalysisKey LoopNestAnalysis::Key;
437 raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) {
438 OS << "IsPerfect=";
439 if (LN.getMaxPerfectDepth() == LN.getNestDepth())
440 OS << "true";
441 else
442 OS << "false";
443 OS << ", Depth=" << LN.getNestDepth();
444 OS << ", OutermostLoop: " << LN.getOutermostLoop().getName();
445 OS << ", Loops: ( ";
446 for (const Loop *L : LN.getLoops())
447 OS << L->getName() << " ";
448 OS << ")";
450 return OS;
453 //===----------------------------------------------------------------------===//
454 // LoopNestPrinterPass implementation
457 PreservedAnalyses LoopNestPrinterPass::run(Loop &L, LoopAnalysisManager &AM,
458 LoopStandardAnalysisResults &AR,
459 LPMUpdater &U) {
460 if (auto LN = LoopNest::getLoopNest(L, AR.SE))
461 OS << *LN << "\n";
463 return PreservedAnalyses::all();