[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Transforms / Scalar / LoopVersioningLICM.cpp
blobf39c24484840cd76623e53b1e55fca77c64cb9c0
1 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // When alias analysis is uncertain about the aliasing between any two accesses,
10 // it will return MayAlias. This uncertainty from alias analysis restricts LICM
11 // from proceeding further. In cases where alias analysis is uncertain we might
12 // use loop versioning as an alternative.
14 // Loop Versioning will create a version of the loop with aggressive aliasing
15 // assumptions in addition to the original with conservative (default) aliasing
16 // assumptions. The version of the loop making aggressive aliasing assumptions
17 // will have all the memory accesses marked as no-alias. These two versions of
18 // loop will be preceded by a memory runtime check. This runtime check consists
19 // of bound checks for all unique memory accessed in loop, and it ensures the
20 // lack of memory aliasing. The result of the runtime check determines which of
21 // the loop versions is executed: If the runtime check detects any memory
22 // aliasing, then the original loop is executed. Otherwise, the version with
23 // aggressive aliasing assumptions is used.
25 // Following are the top level steps:
27 // a) Perform LoopVersioningLICM's feasibility check.
28 // b) If loop is a candidate for versioning then create a memory bound check,
29 // by considering all the memory accesses in loop body.
30 // c) Clone original loop and set all memory accesses as no-alias in new loop.
31 // d) Set original loop & versioned loop as a branch target of the runtime check
32 // result.
34 // It transforms loop as shown below:
36 // +----------------+
37 // |Runtime Memcheck|
38 // +----------------+
39 // |
40 // +----------+----------------+----------+
41 // | |
42 // +---------+----------+ +-----------+----------+
43 // |Orig Loop Preheader | |Cloned Loop Preheader |
44 // +--------------------+ +----------------------+
45 // | |
46 // +--------------------+ +----------------------+
47 // |Orig Loop Body | |Cloned Loop Body |
48 // +--------------------+ +----------------------+
49 // | |
50 // +--------------------+ +----------------------+
51 // |Orig Loop Exit Block| |Cloned Loop Exit Block|
52 // +--------------------+ +-----------+----------+
53 // | |
54 // +----------+--------------+-----------+
55 // |
56 // +-----+----+
57 // |Join Block|
58 // +----------+
60 //===----------------------------------------------------------------------===//
62 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
63 #include "llvm/ADT/SmallVector.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Analysis/AliasAnalysis.h"
66 #include "llvm/Analysis/AliasSetTracker.h"
67 #include "llvm/Analysis/GlobalsModRef.h"
68 #include "llvm/Analysis/LoopAccessAnalysis.h"
69 #include "llvm/Analysis/LoopInfo.h"
70 #include "llvm/Analysis/LoopPass.h"
71 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
72 #include "llvm/Analysis/ScalarEvolution.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Instruction.h"
75 #include "llvm/IR/Instructions.h"
76 #include "llvm/IR/LLVMContext.h"
77 #include "llvm/IR/MDBuilder.h"
78 #include "llvm/IR/Metadata.h"
79 #include "llvm/IR/Value.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/Transforms/Utils.h"
85 #include "llvm/Transforms/Utils/LoopUtils.h"
86 #include "llvm/Transforms/Utils/LoopVersioning.h"
87 #include <cassert>
88 #include <memory>
90 using namespace llvm;
92 #define DEBUG_TYPE "loop-versioning-licm"
94 static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
96 /// Threshold minimum allowed percentage for possible
97 /// invariant instructions in a loop.
98 static cl::opt<float>
99 LVInvarThreshold("licm-versioning-invariant-threshold",
100 cl::desc("LoopVersioningLICM's minimum allowed percentage"
101 "of possible invariant instructions per loop"),
102 cl::init(25), cl::Hidden);
104 /// Threshold for maximum allowed loop nest/depth
105 static cl::opt<unsigned> LVLoopDepthThreshold(
106 "licm-versioning-max-depth-threshold",
107 cl::desc(
108 "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
109 cl::init(2), cl::Hidden);
111 namespace {
113 struct LoopVersioningLICM {
114 // We don't explicitly pass in LoopAccessInfo to the constructor since the
115 // loop versioning might return early due to instructions that are not safe
116 // for versioning. By passing the proxy instead the construction of
117 // LoopAccessInfo will take place only when it's necessary.
118 LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
119 OptimizationRemarkEmitter *ORE,
120 LoopAccessInfoManager &LAIs, LoopInfo &LI,
121 Loop *CurLoop)
122 : AA(AA), SE(SE), LAIs(LAIs), LI(LI), CurLoop(CurLoop),
123 LoopDepthThreshold(LVLoopDepthThreshold),
124 InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
126 bool run(DominatorTree *DT);
128 private:
129 // Current AliasAnalysis information
130 AliasAnalysis *AA;
132 // Current ScalarEvolution
133 ScalarEvolution *SE;
135 // Current Loop's LoopAccessInfo
136 const LoopAccessInfo *LAI = nullptr;
138 // Proxy for retrieving LoopAccessInfo.
139 LoopAccessInfoManager &LAIs;
141 LoopInfo &LI;
143 // The current loop we are working on.
144 Loop *CurLoop;
146 // Maximum loop nest threshold
147 unsigned LoopDepthThreshold;
149 // Minimum invariant threshold
150 float InvariantThreshold;
152 // Counter to track num of load & store
153 unsigned LoadAndStoreCounter = 0;
155 // Counter to track num of invariant
156 unsigned InvariantCounter = 0;
158 // Read only loop marker.
159 bool IsReadOnlyLoop = true;
161 // OptimizationRemarkEmitter
162 OptimizationRemarkEmitter *ORE;
164 bool isLegalForVersioning();
165 bool legalLoopStructure();
166 bool legalLoopInstructions();
167 bool legalLoopMemoryAccesses();
168 bool isLoopAlreadyVisited();
169 void setNoAliasToLoop(Loop *VerLoop);
170 bool instructionSafeForVersioning(Instruction *I);
173 } // end anonymous namespace
175 /// Check loop structure and confirms it's good for LoopVersioningLICM.
176 bool LoopVersioningLICM::legalLoopStructure() {
177 // Loop must be in loop simplify form.
178 if (!CurLoop->isLoopSimplifyForm()) {
179 LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n");
180 return false;
182 // Loop should be innermost loop, if not return false.
183 if (!CurLoop->getSubLoops().empty()) {
184 LLVM_DEBUG(dbgs() << " loop is not innermost\n");
185 return false;
187 // Loop should have a single backedge, if not return false.
188 if (CurLoop->getNumBackEdges() != 1) {
189 LLVM_DEBUG(dbgs() << " loop has multiple backedges\n");
190 return false;
192 // Loop must have a single exiting block, if not return false.
193 if (!CurLoop->getExitingBlock()) {
194 LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n");
195 return false;
197 // We only handle bottom-tested loop, i.e. loop in which the condition is
198 // checked at the end of each iteration. With that we can assume that all
199 // instructions in the loop are executed the same number of times.
200 if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
201 LLVM_DEBUG(dbgs() << " loop is not bottom tested\n");
202 return false;
204 // Parallel loops must not have aliasing loop-invariant memory accesses.
205 // Hence we don't need to version anything in this case.
206 if (CurLoop->isAnnotatedParallel()) {
207 LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n");
208 return false;
210 // Loop depth more then LoopDepthThreshold are not allowed
211 if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
212 LLVM_DEBUG(dbgs() << " loop depth is more then threshold\n");
213 return false;
215 // We need to be able to compute the loop trip count in order
216 // to generate the bound checks.
217 const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
218 if (isa<SCEVCouldNotCompute>(ExitCount)) {
219 LLVM_DEBUG(dbgs() << " loop does not has trip count\n");
220 return false;
222 return true;
225 /// Check memory accesses in loop and confirms it's good for
226 /// LoopVersioningLICM.
227 bool LoopVersioningLICM::legalLoopMemoryAccesses() {
228 // Loop over the body of this loop, construct AST.
229 BatchAAResults BAA(*AA);
230 AliasSetTracker AST(BAA);
231 for (auto *Block : CurLoop->getBlocks()) {
232 // Ignore blocks in subloops.
233 if (LI.getLoopFor(Block) == CurLoop)
234 AST.add(*Block);
237 // Memory check:
238 // Transform phase will generate a versioned loop and also a runtime check to
239 // ensure the pointers are independent and they don’t alias.
240 // In version variant of loop, alias meta data asserts that all access are
241 // mutually independent.
243 // Pointers aliasing in alias domain are avoided because with multiple
244 // aliasing domains we may not be able to hoist potential loop invariant
245 // access out of the loop.
247 // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
248 // must alias set.
249 bool HasMayAlias = false;
250 bool TypeSafety = false;
251 bool HasMod = false;
252 for (const auto &I : AST) {
253 const AliasSet &AS = I;
254 // Skip Forward Alias Sets, as this should be ignored as part of
255 // the AliasSetTracker object.
256 if (AS.isForwardingAliasSet())
257 continue;
258 // With MustAlias its not worth adding runtime bound check.
259 if (AS.isMustAlias())
260 return false;
261 const Value *SomePtr = AS.begin()->Ptr;
262 bool TypeCheck = true;
263 // Check for Mod & MayAlias
264 HasMayAlias |= AS.isMayAlias();
265 HasMod |= AS.isMod();
266 for (const auto &MemLoc : AS) {
267 const Value *Ptr = MemLoc.Ptr;
268 // Alias tracker should have pointers of same data type.
270 // FIXME: check no longer effective since opaque pointers?
271 // If the intent is to check that the memory accesses use the
272 // same data type (such that LICM can promote them), then we
273 // can no longer see this from the pointer value types.
274 TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
276 // At least one alias tracker should have pointers of same data type.
277 TypeSafety |= TypeCheck;
279 // Ensure types should be of same type.
280 if (!TypeSafety) {
281 LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n");
282 return false;
284 // Ensure loop body shouldn't be read only.
285 if (!HasMod) {
286 LLVM_DEBUG(dbgs() << " No memory modified in loop body\n");
287 return false;
289 // Make sure alias set has may alias case.
290 // If there no alias memory ambiguity, return false.
291 if (!HasMayAlias) {
292 LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n");
293 return false;
295 return true;
298 /// Check loop instructions safe for Loop versioning.
299 /// It returns true if it's safe else returns false.
300 /// Consider following:
301 /// 1) Check all load store in loop body are non atomic & non volatile.
302 /// 2) Check function call safety, by ensuring its not accessing memory.
303 /// 3) Loop body shouldn't have any may throw instruction.
304 /// 4) Loop body shouldn't have any convergent or noduplicate instructions.
305 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
306 assert(I != nullptr && "Null instruction found!");
307 // Check function call safety
308 if (auto *Call = dyn_cast<CallBase>(I)) {
309 if (Call->isConvergent() || Call->cannotDuplicate()) {
310 LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
311 return false;
314 if (!AA->doesNotAccessMemory(Call)) {
315 LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
316 return false;
320 // Avoid loops with possiblity of throw
321 if (I->mayThrow()) {
322 LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
323 return false;
325 // If current instruction is load instructions
326 // make sure it's a simple load (non atomic & non volatile)
327 if (I->mayReadFromMemory()) {
328 LoadInst *Ld = dyn_cast<LoadInst>(I);
329 if (!Ld || !Ld->isSimple()) {
330 LLVM_DEBUG(dbgs() << " Found a non-simple load.\n");
331 return false;
333 LoadAndStoreCounter++;
334 Value *Ptr = Ld->getPointerOperand();
335 // Check loop invariant.
336 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
337 InvariantCounter++;
339 // If current instruction is store instruction
340 // make sure it's a simple store (non atomic & non volatile)
341 else if (I->mayWriteToMemory()) {
342 StoreInst *St = dyn_cast<StoreInst>(I);
343 if (!St || !St->isSimple()) {
344 LLVM_DEBUG(dbgs() << " Found a non-simple store.\n");
345 return false;
347 LoadAndStoreCounter++;
348 Value *Ptr = St->getPointerOperand();
349 // Check loop invariant.
350 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
351 InvariantCounter++;
353 IsReadOnlyLoop = false;
355 return true;
358 /// Check loop instructions and confirms it's good for
359 /// LoopVersioningLICM.
360 bool LoopVersioningLICM::legalLoopInstructions() {
361 // Resetting counters.
362 LoadAndStoreCounter = 0;
363 InvariantCounter = 0;
364 IsReadOnlyLoop = true;
365 using namespace ore;
366 // Iterate over loop blocks and instructions of each block and check
367 // instruction safety.
368 for (auto *Block : CurLoop->getBlocks())
369 for (auto &Inst : *Block) {
370 // If instruction is unsafe just return false.
371 if (!instructionSafeForVersioning(&Inst)) {
372 ORE->emit([&]() {
373 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst)
374 << " Unsafe Loop Instruction";
376 return false;
379 // Get LoopAccessInfo from current loop via the proxy.
380 LAI = &LAIs.getInfo(*CurLoop);
381 // Check LoopAccessInfo for need of runtime check.
382 if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
383 LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
384 return false;
386 // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
387 if (LAI->getNumRuntimePointerChecks() >
388 VectorizerParams::RuntimeMemoryCheckThreshold) {
389 LLVM_DEBUG(
390 dbgs() << " LAA: Runtime checks are more than threshold !!\n");
391 ORE->emit([&]() {
392 return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck",
393 CurLoop->getStartLoc(),
394 CurLoop->getHeader())
395 << "Number of runtime checks "
396 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks())
397 << " exceeds threshold "
398 << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold);
400 return false;
402 // Loop should have at least one invariant load or store instruction.
403 if (!InvariantCounter) {
404 LLVM_DEBUG(dbgs() << " Invariant not found !!\n");
405 return false;
407 // Read only loop not allowed.
408 if (IsReadOnlyLoop) {
409 LLVM_DEBUG(dbgs() << " Found a read-only loop!\n");
410 return false;
412 // Profitablity check:
413 // Check invariant threshold, should be in limit.
414 if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) {
415 LLVM_DEBUG(
416 dbgs()
417 << " Invariant load & store are less then defined threshold\n");
418 LLVM_DEBUG(dbgs() << " Invariant loads & stores: "
419 << ((InvariantCounter * 100) / LoadAndStoreCounter)
420 << "%\n");
421 LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: "
422 << InvariantThreshold << "%\n");
423 ORE->emit([&]() {
424 return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold",
425 CurLoop->getStartLoc(),
426 CurLoop->getHeader())
427 << "Invariant load & store "
428 << NV("LoadAndStoreCounter",
429 ((InvariantCounter * 100) / LoadAndStoreCounter))
430 << " are less then defined threshold "
431 << NV("Threshold", InvariantThreshold);
433 return false;
435 return true;
438 /// It checks loop is already visited or not.
439 /// check loop meta data, if loop revisited return true
440 /// else false.
441 bool LoopVersioningLICM::isLoopAlreadyVisited() {
442 // Check LoopVersioningLICM metadata into loop
443 if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) {
444 return true;
446 return false;
449 /// Checks legality for LoopVersioningLICM by considering following:
450 /// a) loop structure legality b) loop instruction legality
451 /// c) loop memory access legality.
452 /// Return true if legal else returns false.
453 bool LoopVersioningLICM::isLegalForVersioning() {
454 using namespace ore;
455 LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop);
456 // Make sure not re-visiting same loop again.
457 if (isLoopAlreadyVisited()) {
458 LLVM_DEBUG(
459 dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n");
460 return false;
462 // Check loop structure leagality.
463 if (!legalLoopStructure()) {
464 LLVM_DEBUG(
465 dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n");
466 ORE->emit([&]() {
467 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct",
468 CurLoop->getStartLoc(),
469 CurLoop->getHeader())
470 << " Unsafe Loop structure";
472 return false;
474 // Check loop instruction leagality.
475 if (!legalLoopInstructions()) {
476 LLVM_DEBUG(
477 dbgs()
478 << " Loop instructions not suitable for LoopVersioningLICM\n\n");
479 return false;
481 // Check loop memory access leagality.
482 if (!legalLoopMemoryAccesses()) {
483 LLVM_DEBUG(
484 dbgs()
485 << " Loop memory access not suitable for LoopVersioningLICM\n\n");
486 ORE->emit([&]() {
487 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess",
488 CurLoop->getStartLoc(),
489 CurLoop->getHeader())
490 << " Unsafe Loop memory access";
492 return false;
494 // Loop versioning is feasible, return true.
495 LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n");
496 ORE->emit([&]() {
497 return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning",
498 CurLoop->getStartLoc(), CurLoop->getHeader())
499 << " Versioned loop for LICM."
500 << " Number of runtime checks we had to insert "
501 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks());
503 return true;
506 /// Update loop with aggressive aliasing assumptions.
507 /// It marks no-alias to any pairs of memory operations by assuming
508 /// loop should not have any must-alias memory accesses pairs.
509 /// During LoopVersioningLICM legality we ignore loops having must
510 /// aliasing memory accesses.
511 void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
512 // Get latch terminator instruction.
513 Instruction *I = VerLoop->getLoopLatch()->getTerminator();
514 // Create alias scope domain.
515 MDBuilder MDB(I->getContext());
516 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain");
517 StringRef Name = "LVAliasScope";
518 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
519 SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope};
520 // Iterate over each instruction of loop.
521 // set no-alias for all load & store instructions.
522 for (auto *Block : CurLoop->getBlocks()) {
523 for (auto &Inst : *Block) {
524 // Only interested in instruction that may modify or read memory.
525 if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory())
526 continue;
527 // Set no-alias for current instruction.
528 Inst.setMetadata(
529 LLVMContext::MD_noalias,
530 MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias),
531 MDNode::get(Inst.getContext(), NoAliases)));
532 // set alias-scope for current instruction.
533 Inst.setMetadata(
534 LLVMContext::MD_alias_scope,
535 MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope),
536 MDNode::get(Inst.getContext(), Scopes)));
541 bool LoopVersioningLICM::run(DominatorTree *DT) {
542 // Do not do the transformation if disabled by metadata.
543 if (hasLICMVersioningTransformation(CurLoop) & TM_Disable)
544 return false;
546 bool Changed = false;
548 // Check feasiblity of LoopVersioningLICM.
549 // If versioning found to be feasible and beneficial then proceed
550 // else simply return, by cleaning up memory.
551 if (isLegalForVersioning()) {
552 // Do loop versioning.
553 // Create memcheck for memory accessed inside loop.
554 // Clone original loop, and set blocks properly.
555 LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
556 CurLoop, &LI, DT, SE);
557 LVer.versionLoop();
558 // Set Loop Versioning metaData for original loop.
559 addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
560 // Set Loop Versioning metaData for version loop.
561 addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
562 // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
563 // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
564 // instructions, not loops.
565 addStringMetadataToLoop(LVer.getVersionedLoop(),
566 "llvm.mem.parallel_loop_access");
567 // Update version loop with aggressive aliasing assumption.
568 setNoAliasToLoop(LVer.getVersionedLoop());
569 Changed = true;
571 return Changed;
574 namespace llvm {
576 PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
577 LoopStandardAnalysisResults &LAR,
578 LPMUpdater &U) {
579 AliasAnalysis *AA = &LAR.AA;
580 ScalarEvolution *SE = &LAR.SE;
581 DominatorTree *DT = &LAR.DT;
582 const Function *F = L.getHeader()->getParent();
583 OptimizationRemarkEmitter ORE(F);
585 LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr);
586 if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
587 return PreservedAnalyses::all();
588 return getLoopPassPreservedAnalyses();
590 } // namespace llvm