[clang] Document the return value of __builtin_COLUMN (#118360)
[llvm-project.git] / llvm / lib / Transforms / Scalar / LoopVersioningLICM.cpp
blobf58dcb51f64fef0171ba4eda51194ef06aac196a
1 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // When alias analysis is uncertain about the aliasing between any two accesses,
10 // it will return MayAlias. This uncertainty from alias analysis restricts LICM
11 // from proceeding further. In cases where alias analysis is uncertain we might
12 // use loop versioning as an alternative.
14 // Loop Versioning will create a version of the loop with aggressive aliasing
15 // assumptions in addition to the original with conservative (default) aliasing
16 // assumptions. The version of the loop making aggressive aliasing assumptions
17 // will have all the memory accesses marked as no-alias. These two versions of
18 // loop will be preceded by a memory runtime check. This runtime check consists
19 // of bound checks for all unique memory accessed in loop, and it ensures the
20 // lack of memory aliasing. The result of the runtime check determines which of
21 // the loop versions is executed: If the runtime check detects any memory
22 // aliasing, then the original loop is executed. Otherwise, the version with
23 // aggressive aliasing assumptions is used.
25 // Following are the top level steps:
27 // a) Perform LoopVersioningLICM's feasibility check.
28 // b) If loop is a candidate for versioning then create a memory bound check,
29 // by considering all the memory accesses in loop body.
30 // c) Clone original loop and set all memory accesses as no-alias in new loop.
31 // d) Set original loop & versioned loop as a branch target of the runtime check
32 // result.
34 // It transforms loop as shown below:
36 // +----------------+
37 // |Runtime Memcheck|
38 // +----------------+
39 // |
40 // +----------+----------------+----------+
41 // | |
42 // +---------+----------+ +-----------+----------+
43 // |Orig Loop Preheader | |Cloned Loop Preheader |
44 // +--------------------+ +----------------------+
45 // | |
46 // +--------------------+ +----------------------+
47 // |Orig Loop Body | |Cloned Loop Body |
48 // +--------------------+ +----------------------+
49 // | |
50 // +--------------------+ +----------------------+
51 // |Orig Loop Exit Block| |Cloned Loop Exit Block|
52 // +--------------------+ +-----------+----------+
53 // | |
54 // +----------+--------------+-----------+
55 // |
56 // +-----+----+
57 // |Join Block|
58 // +----------+
60 //===----------------------------------------------------------------------===//
62 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
63 #include "llvm/ADT/SmallVector.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Analysis/AliasAnalysis.h"
66 #include "llvm/Analysis/AliasSetTracker.h"
67 #include "llvm/Analysis/GlobalsModRef.h"
68 #include "llvm/Analysis/LoopAccessAnalysis.h"
69 #include "llvm/Analysis/LoopInfo.h"
70 #include "llvm/Analysis/LoopPass.h"
71 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
72 #include "llvm/Analysis/ScalarEvolution.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Instruction.h"
75 #include "llvm/IR/Instructions.h"
76 #include "llvm/IR/LLVMContext.h"
77 #include "llvm/IR/MDBuilder.h"
78 #include "llvm/IR/Metadata.h"
79 #include "llvm/IR/Value.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/LoopVersioning.h"
86 #include <cassert>
88 using namespace llvm;
90 #define DEBUG_TYPE "loop-versioning-licm"
92 static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
94 /// Threshold minimum allowed percentage for possible
95 /// invariant instructions in a loop.
96 static cl::opt<float>
97 LVInvarThreshold("licm-versioning-invariant-threshold",
98 cl::desc("LoopVersioningLICM's minimum allowed percentage"
99 "of possible invariant instructions per loop"),
100 cl::init(25), cl::Hidden);
102 /// Threshold for maximum allowed loop nest/depth
103 static cl::opt<unsigned> LVLoopDepthThreshold(
104 "licm-versioning-max-depth-threshold",
105 cl::desc(
106 "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
107 cl::init(2), cl::Hidden);
109 namespace {
111 struct LoopVersioningLICM {
112 // We don't explicitly pass in LoopAccessInfo to the constructor since the
113 // loop versioning might return early due to instructions that are not safe
114 // for versioning. By passing the proxy instead the construction of
115 // LoopAccessInfo will take place only when it's necessary.
116 LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
117 OptimizationRemarkEmitter *ORE,
118 LoopAccessInfoManager &LAIs, LoopInfo &LI,
119 Loop *CurLoop)
120 : AA(AA), SE(SE), LAIs(LAIs), LI(LI), CurLoop(CurLoop),
121 LoopDepthThreshold(LVLoopDepthThreshold),
122 InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
124 bool run(DominatorTree *DT);
126 private:
127 // Current AliasAnalysis information
128 AliasAnalysis *AA;
130 // Current ScalarEvolution
131 ScalarEvolution *SE;
133 // Current Loop's LoopAccessInfo
134 const LoopAccessInfo *LAI = nullptr;
136 // Proxy for retrieving LoopAccessInfo.
137 LoopAccessInfoManager &LAIs;
139 LoopInfo &LI;
141 // The current loop we are working on.
142 Loop *CurLoop;
144 // Maximum loop nest threshold
145 unsigned LoopDepthThreshold;
147 // Minimum invariant threshold
148 float InvariantThreshold;
150 // Counter to track num of load & store
151 unsigned LoadAndStoreCounter = 0;
153 // Counter to track num of invariant
154 unsigned InvariantCounter = 0;
156 // Read only loop marker.
157 bool IsReadOnlyLoop = true;
159 // OptimizationRemarkEmitter
160 OptimizationRemarkEmitter *ORE;
162 bool isLegalForVersioning();
163 bool legalLoopStructure();
164 bool legalLoopInstructions();
165 bool legalLoopMemoryAccesses();
166 bool isLoopAlreadyVisited();
167 void setNoAliasToLoop(Loop *VerLoop);
168 bool instructionSafeForVersioning(Instruction *I);
171 } // end anonymous namespace
173 /// Check loop structure and confirms it's good for LoopVersioningLICM.
174 bool LoopVersioningLICM::legalLoopStructure() {
175 // Loop must be in loop simplify form.
176 if (!CurLoop->isLoopSimplifyForm()) {
177 LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n");
178 return false;
180 // Loop should be innermost loop, if not return false.
181 if (!CurLoop->getSubLoops().empty()) {
182 LLVM_DEBUG(dbgs() << " loop is not innermost\n");
183 return false;
185 // Loop should have a single backedge, if not return false.
186 if (CurLoop->getNumBackEdges() != 1) {
187 LLVM_DEBUG(dbgs() << " loop has multiple backedges\n");
188 return false;
190 // Loop must have a single exiting block, if not return false.
191 if (!CurLoop->getExitingBlock()) {
192 LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n");
193 return false;
195 // We only handle bottom-tested loop, i.e. loop in which the condition is
196 // checked at the end of each iteration. With that we can assume that all
197 // instructions in the loop are executed the same number of times.
198 if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
199 LLVM_DEBUG(dbgs() << " loop is not bottom tested\n");
200 return false;
202 // Parallel loops must not have aliasing loop-invariant memory accesses.
203 // Hence we don't need to version anything in this case.
204 if (CurLoop->isAnnotatedParallel()) {
205 LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n");
206 return false;
208 // Loop depth more then LoopDepthThreshold are not allowed
209 if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
210 LLVM_DEBUG(dbgs() << " loop depth is more than threshold\n");
211 return false;
213 // We need to be able to compute the loop trip count in order
214 // to generate the bound checks.
215 const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
216 if (isa<SCEVCouldNotCompute>(ExitCount)) {
217 LLVM_DEBUG(dbgs() << " loop does not have trip count\n");
218 return false;
220 return true;
223 /// Check memory accesses in loop and confirms it's good for
224 /// LoopVersioningLICM.
225 bool LoopVersioningLICM::legalLoopMemoryAccesses() {
226 // Loop over the body of this loop, construct AST.
227 BatchAAResults BAA(*AA);
228 AliasSetTracker AST(BAA);
229 for (auto *Block : CurLoop->getBlocks()) {
230 // Ignore blocks in subloops.
231 if (LI.getLoopFor(Block) == CurLoop)
232 AST.add(*Block);
235 // Memory check:
236 // Transform phase will generate a versioned loop and also a runtime check to
237 // ensure the pointers are independent and they don’t alias.
238 // In version variant of loop, alias meta data asserts that all access are
239 // mutually independent.
241 // Pointers aliasing in alias domain are avoided because with multiple
242 // aliasing domains we may not be able to hoist potential loop invariant
243 // access out of the loop.
245 // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
246 // must alias set.
247 bool HasMayAlias = false;
248 bool TypeSafety = false;
249 bool HasMod = false;
250 for (const auto &I : AST) {
251 const AliasSet &AS = I;
252 // Skip Forward Alias Sets, as this should be ignored as part of
253 // the AliasSetTracker object.
254 if (AS.isForwardingAliasSet())
255 continue;
256 // With MustAlias its not worth adding runtime bound check.
257 if (AS.isMustAlias())
258 return false;
259 const Value *SomePtr = AS.begin()->Ptr;
260 bool TypeCheck = true;
261 // Check for Mod & MayAlias
262 HasMayAlias |= AS.isMayAlias();
263 HasMod |= AS.isMod();
264 for (const auto &MemLoc : AS) {
265 const Value *Ptr = MemLoc.Ptr;
266 // Alias tracker should have pointers of same data type.
268 // FIXME: check no longer effective since opaque pointers?
269 // If the intent is to check that the memory accesses use the
270 // same data type (such that LICM can promote them), then we
271 // can no longer see this from the pointer value types.
272 TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
274 // At least one alias tracker should have pointers of same data type.
275 TypeSafety |= TypeCheck;
277 // Ensure types should be of same type.
278 if (!TypeSafety) {
279 LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n");
280 return false;
282 // Ensure loop body shouldn't be read only.
283 if (!HasMod) {
284 LLVM_DEBUG(dbgs() << " No memory modified in loop body\n");
285 return false;
287 // Make sure alias set has may alias case.
288 // If there no alias memory ambiguity, return false.
289 if (!HasMayAlias) {
290 LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n");
291 return false;
293 return true;
296 /// Check loop instructions safe for Loop versioning.
297 /// It returns true if it's safe else returns false.
298 /// Consider following:
299 /// 1) Check all load store in loop body are non atomic & non volatile.
300 /// 2) Check function call safety, by ensuring its not accessing memory.
301 /// 3) Loop body shouldn't have any may throw instruction.
302 /// 4) Loop body shouldn't have any convergent or noduplicate instructions.
303 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
304 assert(I != nullptr && "Null instruction found!");
305 // Check function call safety
306 if (auto *Call = dyn_cast<CallBase>(I)) {
307 if (Call->isConvergent() || Call->cannotDuplicate()) {
308 LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
309 return false;
312 if (!AA->doesNotAccessMemory(Call)) {
313 LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
314 return false;
318 // Avoid loops with possiblity of throw
319 if (I->mayThrow()) {
320 LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
321 return false;
323 // If current instruction is load instructions
324 // make sure it's a simple load (non atomic & non volatile)
325 if (I->mayReadFromMemory()) {
326 LoadInst *Ld = dyn_cast<LoadInst>(I);
327 if (!Ld || !Ld->isSimple()) {
328 LLVM_DEBUG(dbgs() << " Found a non-simple load.\n");
329 return false;
331 LoadAndStoreCounter++;
332 Value *Ptr = Ld->getPointerOperand();
333 // Check loop invariant.
334 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
335 InvariantCounter++;
337 // If current instruction is store instruction
338 // make sure it's a simple store (non atomic & non volatile)
339 else if (I->mayWriteToMemory()) {
340 StoreInst *St = dyn_cast<StoreInst>(I);
341 if (!St || !St->isSimple()) {
342 LLVM_DEBUG(dbgs() << " Found a non-simple store.\n");
343 return false;
345 LoadAndStoreCounter++;
346 Value *Ptr = St->getPointerOperand();
347 // Check loop invariant.
348 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
349 InvariantCounter++;
351 IsReadOnlyLoop = false;
353 return true;
356 /// Check loop instructions and confirms it's good for
357 /// LoopVersioningLICM.
358 bool LoopVersioningLICM::legalLoopInstructions() {
359 // Resetting counters.
360 LoadAndStoreCounter = 0;
361 InvariantCounter = 0;
362 IsReadOnlyLoop = true;
363 using namespace ore;
364 // Iterate over loop blocks and instructions of each block and check
365 // instruction safety.
366 for (auto *Block : CurLoop->getBlocks())
367 for (auto &Inst : *Block) {
368 // If instruction is unsafe just return false.
369 if (!instructionSafeForVersioning(&Inst)) {
370 ORE->emit([&]() {
371 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst)
372 << " Unsafe Loop Instruction";
374 return false;
377 // Get LoopAccessInfo from current loop via the proxy.
378 LAI = &LAIs.getInfo(*CurLoop);
379 // Check LoopAccessInfo for need of runtime check.
380 if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
381 LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
382 return false;
384 // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
385 if (LAI->getNumRuntimePointerChecks() >
386 VectorizerParams::RuntimeMemoryCheckThreshold) {
387 LLVM_DEBUG(
388 dbgs() << " LAA: Runtime checks are more than threshold !!\n");
389 ORE->emit([&]() {
390 return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck",
391 CurLoop->getStartLoc(),
392 CurLoop->getHeader())
393 << "Number of runtime checks "
394 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks())
395 << " exceeds threshold "
396 << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold);
398 return false;
400 // Loop should have at least one invariant load or store instruction.
401 if (!InvariantCounter) {
402 LLVM_DEBUG(dbgs() << " Invariant not found !!\n");
403 return false;
405 // Read only loop not allowed.
406 if (IsReadOnlyLoop) {
407 LLVM_DEBUG(dbgs() << " Found a read-only loop!\n");
408 return false;
410 // Profitablity check:
411 // Check invariant threshold, should be in limit.
412 if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) {
413 LLVM_DEBUG(
414 dbgs()
415 << " Invariant load & store are less then defined threshold\n");
416 LLVM_DEBUG(dbgs() << " Invariant loads & stores: "
417 << ((InvariantCounter * 100) / LoadAndStoreCounter)
418 << "%\n");
419 LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: "
420 << InvariantThreshold << "%\n");
421 ORE->emit([&]() {
422 return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold",
423 CurLoop->getStartLoc(),
424 CurLoop->getHeader())
425 << "Invariant load & store "
426 << NV("LoadAndStoreCounter",
427 ((InvariantCounter * 100) / LoadAndStoreCounter))
428 << " are less then defined threshold "
429 << NV("Threshold", InvariantThreshold);
431 return false;
433 return true;
436 /// It checks loop is already visited or not.
437 /// check loop meta data, if loop revisited return true
438 /// else false.
439 bool LoopVersioningLICM::isLoopAlreadyVisited() {
440 // Check LoopVersioningLICM metadata into loop
441 if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) {
442 return true;
444 return false;
447 /// Checks legality for LoopVersioningLICM by considering following:
448 /// a) loop structure legality b) loop instruction legality
449 /// c) loop memory access legality.
450 /// Return true if legal else returns false.
451 bool LoopVersioningLICM::isLegalForVersioning() {
452 using namespace ore;
453 LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop);
454 // Make sure not re-visiting same loop again.
455 if (isLoopAlreadyVisited()) {
456 LLVM_DEBUG(
457 dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n");
458 return false;
460 // Check loop structure leagality.
461 if (!legalLoopStructure()) {
462 LLVM_DEBUG(
463 dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n");
464 ORE->emit([&]() {
465 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct",
466 CurLoop->getStartLoc(),
467 CurLoop->getHeader())
468 << " Unsafe Loop structure";
470 return false;
472 // Check loop instruction leagality.
473 if (!legalLoopInstructions()) {
474 LLVM_DEBUG(
475 dbgs()
476 << " Loop instructions not suitable for LoopVersioningLICM\n\n");
477 return false;
479 // Check loop memory access leagality.
480 if (!legalLoopMemoryAccesses()) {
481 LLVM_DEBUG(
482 dbgs()
483 << " Loop memory access not suitable for LoopVersioningLICM\n\n");
484 ORE->emit([&]() {
485 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess",
486 CurLoop->getStartLoc(),
487 CurLoop->getHeader())
488 << " Unsafe Loop memory access";
490 return false;
492 // Loop versioning is feasible, return true.
493 LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n");
494 ORE->emit([&]() {
495 return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning",
496 CurLoop->getStartLoc(), CurLoop->getHeader())
497 << " Versioned loop for LICM."
498 << " Number of runtime checks we had to insert "
499 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks());
501 return true;
504 /// Update loop with aggressive aliasing assumptions.
505 /// It marks no-alias to any pairs of memory operations by assuming
506 /// loop should not have any must-alias memory accesses pairs.
507 /// During LoopVersioningLICM legality we ignore loops having must
508 /// aliasing memory accesses.
509 void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
510 // Get latch terminator instruction.
511 Instruction *I = VerLoop->getLoopLatch()->getTerminator();
512 // Create alias scope domain.
513 MDBuilder MDB(I->getContext());
514 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain");
515 StringRef Name = "LVAliasScope";
516 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
517 SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope};
518 // Iterate over each instruction of loop.
519 // set no-alias for all load & store instructions.
520 for (auto *Block : CurLoop->getBlocks()) {
521 for (auto &Inst : *Block) {
522 // Only interested in instruction that may modify or read memory.
523 if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory())
524 continue;
525 // Set no-alias for current instruction.
526 Inst.setMetadata(
527 LLVMContext::MD_noalias,
528 MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias),
529 MDNode::get(Inst.getContext(), NoAliases)));
530 // set alias-scope for current instruction.
531 Inst.setMetadata(
532 LLVMContext::MD_alias_scope,
533 MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope),
534 MDNode::get(Inst.getContext(), Scopes)));
539 bool LoopVersioningLICM::run(DominatorTree *DT) {
540 // Do not do the transformation if disabled by metadata.
541 if (hasLICMVersioningTransformation(CurLoop) & TM_Disable)
542 return false;
544 bool Changed = false;
546 // Check feasiblity of LoopVersioningLICM.
547 // If versioning found to be feasible and beneficial then proceed
548 // else simply return, by cleaning up memory.
549 if (isLegalForVersioning()) {
550 // Do loop versioning.
551 // Create memcheck for memory accessed inside loop.
552 // Clone original loop, and set blocks properly.
553 LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
554 CurLoop, &LI, DT, SE);
555 LVer.versionLoop();
556 // Set Loop Versioning metaData for original loop.
557 addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
558 // Set Loop Versioning metaData for version loop.
559 addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
560 // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
561 // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
562 // instructions, not loops.
563 addStringMetadataToLoop(LVer.getVersionedLoop(),
564 "llvm.mem.parallel_loop_access");
565 // Update version loop with aggressive aliasing assumption.
566 setNoAliasToLoop(LVer.getVersionedLoop());
567 Changed = true;
569 return Changed;
572 namespace llvm {
574 PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
575 LoopStandardAnalysisResults &LAR,
576 LPMUpdater &U) {
577 AliasAnalysis *AA = &LAR.AA;
578 ScalarEvolution *SE = &LAR.SE;
579 DominatorTree *DT = &LAR.DT;
580 const Function *F = L.getHeader()->getParent();
581 OptimizationRemarkEmitter ORE(F);
583 LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
584 if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
585 return PreservedAnalyses::all();
586 return getLoopPassPreservedAnalyses();
588 } // namespace llvm