1 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // When alias analysis is uncertain about the aliasing between any two accesses,
10 // it will return MayAlias. This uncertainty from alias analysis restricts LICM
11 // from proceeding further. In cases where alias analysis is uncertain we might
12 // use loop versioning as an alternative.
14 // Loop Versioning will create a version of the loop with aggressive aliasing
15 // assumptions in addition to the original with conservative (default) aliasing
16 // assumptions. The version of the loop making aggressive aliasing assumptions
17 // will have all the memory accesses marked as no-alias. These two versions of
18 // loop will be preceded by a memory runtime check. This runtime check consists
19 // of bound checks for all unique memory accessed in loop, and it ensures the
20 // lack of memory aliasing. The result of the runtime check determines which of
21 // the loop versions is executed: If the runtime check detects any memory
22 // aliasing, then the original loop is executed. Otherwise, the version with
23 // aggressive aliasing assumptions is used.
25 // Following are the top level steps:
27 // a) Perform LoopVersioningLICM's feasibility check.
28 // b) If loop is a candidate for versioning then create a memory bound check,
29 // by considering all the memory accesses in loop body.
30 // c) Clone original loop and set all memory accesses as no-alias in new loop.
31 // d) Set original loop & versioned loop as a branch target of the runtime check
34 // It transforms loop as shown below:
40 // +----------+----------------+----------+
42 // +---------+----------+ +-----------+----------+
43 // |Orig Loop Preheader | |Cloned Loop Preheader |
44 // +--------------------+ +----------------------+
46 // +--------------------+ +----------------------+
47 // |Orig Loop Body | |Cloned Loop Body |
48 // +--------------------+ +----------------------+
50 // +--------------------+ +----------------------+
51 // |Orig Loop Exit Block| |Cloned Loop Exit Block|
52 // +--------------------+ +-----------+----------+
54 // +----------+--------------+-----------+
60 //===----------------------------------------------------------------------===//
62 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
63 #include "llvm/ADT/SmallVector.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Analysis/AliasAnalysis.h"
66 #include "llvm/Analysis/AliasSetTracker.h"
67 #include "llvm/Analysis/GlobalsModRef.h"
68 #include "llvm/Analysis/LoopAccessAnalysis.h"
69 #include "llvm/Analysis/LoopInfo.h"
70 #include "llvm/Analysis/LoopPass.h"
71 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
72 #include "llvm/Analysis/ScalarEvolution.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Instruction.h"
75 #include "llvm/IR/Instructions.h"
76 #include "llvm/IR/LLVMContext.h"
77 #include "llvm/IR/MDBuilder.h"
78 #include "llvm/IR/Metadata.h"
79 #include "llvm/IR/Value.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/Transforms/Utils.h"
85 #include "llvm/Transforms/Utils/LoopUtils.h"
86 #include "llvm/Transforms/Utils/LoopVersioning.h"
92 #define DEBUG_TYPE "loop-versioning-licm"
94 static const char *LICMVersioningMetaData
= "llvm.loop.licm_versioning.disable";
96 /// Threshold minimum allowed percentage for possible
97 /// invariant instructions in a loop.
99 LVInvarThreshold("licm-versioning-invariant-threshold",
100 cl::desc("LoopVersioningLICM's minimum allowed percentage"
101 "of possible invariant instructions per loop"),
102 cl::init(25), cl::Hidden
);
104 /// Threshold for maximum allowed loop nest/depth
105 static cl::opt
<unsigned> LVLoopDepthThreshold(
106 "licm-versioning-max-depth-threshold",
108 "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
109 cl::init(2), cl::Hidden
);
113 struct LoopVersioningLICM
{
114 // We don't explicitly pass in LoopAccessInfo to the constructor since the
115 // loop versioning might return early due to instructions that are not safe
116 // for versioning. By passing the proxy instead the construction of
117 // LoopAccessInfo will take place only when it's necessary.
118 LoopVersioningLICM(AliasAnalysis
*AA
, ScalarEvolution
*SE
,
119 OptimizationRemarkEmitter
*ORE
,
120 LoopAccessInfoManager
&LAIs
, LoopInfo
&LI
,
122 : AA(AA
), SE(SE
), LAIs(LAIs
), LI(LI
), CurLoop(CurLoop
),
123 LoopDepthThreshold(LVLoopDepthThreshold
),
124 InvariantThreshold(LVInvarThreshold
), ORE(ORE
) {}
126 bool run(DominatorTree
*DT
);
129 // Current AliasAnalysis information
132 // Current ScalarEvolution
135 // Current Loop's LoopAccessInfo
136 const LoopAccessInfo
*LAI
= nullptr;
138 // Proxy for retrieving LoopAccessInfo.
139 LoopAccessInfoManager
&LAIs
;
143 // The current loop we are working on.
146 // Maximum loop nest threshold
147 unsigned LoopDepthThreshold
;
149 // Minimum invariant threshold
150 float InvariantThreshold
;
152 // Counter to track num of load & store
153 unsigned LoadAndStoreCounter
= 0;
155 // Counter to track num of invariant
156 unsigned InvariantCounter
= 0;
158 // Read only loop marker.
159 bool IsReadOnlyLoop
= true;
161 // OptimizationRemarkEmitter
162 OptimizationRemarkEmitter
*ORE
;
164 bool isLegalForVersioning();
165 bool legalLoopStructure();
166 bool legalLoopInstructions();
167 bool legalLoopMemoryAccesses();
168 bool isLoopAlreadyVisited();
169 void setNoAliasToLoop(Loop
*VerLoop
);
170 bool instructionSafeForVersioning(Instruction
*I
);
173 } // end anonymous namespace
175 /// Check loop structure and confirms it's good for LoopVersioningLICM.
176 bool LoopVersioningLICM::legalLoopStructure() {
177 // Loop must be in loop simplify form.
178 if (!CurLoop
->isLoopSimplifyForm()) {
179 LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n");
182 // Loop should be innermost loop, if not return false.
183 if (!CurLoop
->getSubLoops().empty()) {
184 LLVM_DEBUG(dbgs() << " loop is not innermost\n");
187 // Loop should have a single backedge, if not return false.
188 if (CurLoop
->getNumBackEdges() != 1) {
189 LLVM_DEBUG(dbgs() << " loop has multiple backedges\n");
192 // Loop must have a single exiting block, if not return false.
193 if (!CurLoop
->getExitingBlock()) {
194 LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n");
197 // We only handle bottom-tested loop, i.e. loop in which the condition is
198 // checked at the end of each iteration. With that we can assume that all
199 // instructions in the loop are executed the same number of times.
200 if (CurLoop
->getExitingBlock() != CurLoop
->getLoopLatch()) {
201 LLVM_DEBUG(dbgs() << " loop is not bottom tested\n");
204 // Parallel loops must not have aliasing loop-invariant memory accesses.
205 // Hence we don't need to version anything in this case.
206 if (CurLoop
->isAnnotatedParallel()) {
207 LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n");
210 // Loop depth more then LoopDepthThreshold are not allowed
211 if (CurLoop
->getLoopDepth() > LoopDepthThreshold
) {
212 LLVM_DEBUG(dbgs() << " loop depth is more then threshold\n");
215 // We need to be able to compute the loop trip count in order
216 // to generate the bound checks.
217 const SCEV
*ExitCount
= SE
->getBackedgeTakenCount(CurLoop
);
218 if (isa
<SCEVCouldNotCompute
>(ExitCount
)) {
219 LLVM_DEBUG(dbgs() << " loop does not has trip count\n");
225 /// Check memory accesses in loop and confirms it's good for
226 /// LoopVersioningLICM.
227 bool LoopVersioningLICM::legalLoopMemoryAccesses() {
228 // Loop over the body of this loop, construct AST.
229 BatchAAResults
BAA(*AA
);
230 AliasSetTracker
AST(BAA
);
231 for (auto *Block
: CurLoop
->getBlocks()) {
232 // Ignore blocks in subloops.
233 if (LI
.getLoopFor(Block
) == CurLoop
)
238 // Transform phase will generate a versioned loop and also a runtime check to
239 // ensure the pointers are independent and they don’t alias.
240 // In version variant of loop, alias meta data asserts that all access are
241 // mutually independent.
243 // Pointers aliasing in alias domain are avoided because with multiple
244 // aliasing domains we may not be able to hoist potential loop invariant
245 // access out of the loop.
247 // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
249 bool HasMayAlias
= false;
250 bool TypeSafety
= false;
252 for (const auto &I
: AST
) {
253 const AliasSet
&AS
= I
;
254 // Skip Forward Alias Sets, as this should be ignored as part of
255 // the AliasSetTracker object.
256 if (AS
.isForwardingAliasSet())
258 // With MustAlias its not worth adding runtime bound check.
259 if (AS
.isMustAlias())
261 const Value
*SomePtr
= AS
.begin()->Ptr
;
262 bool TypeCheck
= true;
263 // Check for Mod & MayAlias
264 HasMayAlias
|= AS
.isMayAlias();
265 HasMod
|= AS
.isMod();
266 for (const auto &MemLoc
: AS
) {
267 const Value
*Ptr
= MemLoc
.Ptr
;
268 // Alias tracker should have pointers of same data type.
270 // FIXME: check no longer effective since opaque pointers?
271 // If the intent is to check that the memory accesses use the
272 // same data type (such that LICM can promote them), then we
273 // can no longer see this from the pointer value types.
274 TypeCheck
= (TypeCheck
&& (SomePtr
->getType() == Ptr
->getType()));
276 // At least one alias tracker should have pointers of same data type.
277 TypeSafety
|= TypeCheck
;
279 // Ensure types should be of same type.
281 LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n");
284 // Ensure loop body shouldn't be read only.
286 LLVM_DEBUG(dbgs() << " No memory modified in loop body\n");
289 // Make sure alias set has may alias case.
290 // If there no alias memory ambiguity, return false.
292 LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n");
298 /// Check loop instructions safe for Loop versioning.
299 /// It returns true if it's safe else returns false.
300 /// Consider following:
301 /// 1) Check all load store in loop body are non atomic & non volatile.
302 /// 2) Check function call safety, by ensuring its not accessing memory.
303 /// 3) Loop body shouldn't have any may throw instruction.
304 /// 4) Loop body shouldn't have any convergent or noduplicate instructions.
305 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction
*I
) {
306 assert(I
!= nullptr && "Null instruction found!");
307 // Check function call safety
308 if (auto *Call
= dyn_cast
<CallBase
>(I
)) {
309 if (Call
->isConvergent() || Call
->cannotDuplicate()) {
310 LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
314 if (!AA
->doesNotAccessMemory(Call
)) {
315 LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
320 // Avoid loops with possiblity of throw
322 LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
325 // If current instruction is load instructions
326 // make sure it's a simple load (non atomic & non volatile)
327 if (I
->mayReadFromMemory()) {
328 LoadInst
*Ld
= dyn_cast
<LoadInst
>(I
);
329 if (!Ld
|| !Ld
->isSimple()) {
330 LLVM_DEBUG(dbgs() << " Found a non-simple load.\n");
333 LoadAndStoreCounter
++;
334 Value
*Ptr
= Ld
->getPointerOperand();
335 // Check loop invariant.
336 if (SE
->isLoopInvariant(SE
->getSCEV(Ptr
), CurLoop
))
339 // If current instruction is store instruction
340 // make sure it's a simple store (non atomic & non volatile)
341 else if (I
->mayWriteToMemory()) {
342 StoreInst
*St
= dyn_cast
<StoreInst
>(I
);
343 if (!St
|| !St
->isSimple()) {
344 LLVM_DEBUG(dbgs() << " Found a non-simple store.\n");
347 LoadAndStoreCounter
++;
348 Value
*Ptr
= St
->getPointerOperand();
349 // Check loop invariant.
350 if (SE
->isLoopInvariant(SE
->getSCEV(Ptr
), CurLoop
))
353 IsReadOnlyLoop
= false;
358 /// Check loop instructions and confirms it's good for
359 /// LoopVersioningLICM.
360 bool LoopVersioningLICM::legalLoopInstructions() {
361 // Resetting counters.
362 LoadAndStoreCounter
= 0;
363 InvariantCounter
= 0;
364 IsReadOnlyLoop
= true;
366 // Iterate over loop blocks and instructions of each block and check
367 // instruction safety.
368 for (auto *Block
: CurLoop
->getBlocks())
369 for (auto &Inst
: *Block
) {
370 // If instruction is unsafe just return false.
371 if (!instructionSafeForVersioning(&Inst
)) {
373 return OptimizationRemarkMissed(DEBUG_TYPE
, "IllegalLoopInst", &Inst
)
374 << " Unsafe Loop Instruction";
379 // Get LoopAccessInfo from current loop via the proxy.
380 LAI
= &LAIs
.getInfo(*CurLoop
);
381 // Check LoopAccessInfo for need of runtime check.
382 if (LAI
->getRuntimePointerChecking()->getChecks().empty()) {
383 LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
386 // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
387 if (LAI
->getNumRuntimePointerChecks() >
388 VectorizerParams::RuntimeMemoryCheckThreshold
) {
390 dbgs() << " LAA: Runtime checks are more than threshold !!\n");
392 return OptimizationRemarkMissed(DEBUG_TYPE
, "RuntimeCheck",
393 CurLoop
->getStartLoc(),
394 CurLoop
->getHeader())
395 << "Number of runtime checks "
396 << NV("RuntimeChecks", LAI
->getNumRuntimePointerChecks())
397 << " exceeds threshold "
398 << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold
);
402 // Loop should have at least one invariant load or store instruction.
403 if (!InvariantCounter
) {
404 LLVM_DEBUG(dbgs() << " Invariant not found !!\n");
407 // Read only loop not allowed.
408 if (IsReadOnlyLoop
) {
409 LLVM_DEBUG(dbgs() << " Found a read-only loop!\n");
412 // Profitablity check:
413 // Check invariant threshold, should be in limit.
414 if (InvariantCounter
* 100 < InvariantThreshold
* LoadAndStoreCounter
) {
417 << " Invariant load & store are less then defined threshold\n");
418 LLVM_DEBUG(dbgs() << " Invariant loads & stores: "
419 << ((InvariantCounter
* 100) / LoadAndStoreCounter
)
421 LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: "
422 << InvariantThreshold
<< "%\n");
424 return OptimizationRemarkMissed(DEBUG_TYPE
, "InvariantThreshold",
425 CurLoop
->getStartLoc(),
426 CurLoop
->getHeader())
427 << "Invariant load & store "
428 << NV("LoadAndStoreCounter",
429 ((InvariantCounter
* 100) / LoadAndStoreCounter
))
430 << " are less then defined threshold "
431 << NV("Threshold", InvariantThreshold
);
438 /// It checks loop is already visited or not.
439 /// check loop meta data, if loop revisited return true
441 bool LoopVersioningLICM::isLoopAlreadyVisited() {
442 // Check LoopVersioningLICM metadata into loop
443 if (findStringMetadataForLoop(CurLoop
, LICMVersioningMetaData
)) {
449 /// Checks legality for LoopVersioningLICM by considering following:
450 /// a) loop structure legality b) loop instruction legality
451 /// c) loop memory access legality.
452 /// Return true if legal else returns false.
453 bool LoopVersioningLICM::isLegalForVersioning() {
455 LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop
);
456 // Make sure not re-visiting same loop again.
457 if (isLoopAlreadyVisited()) {
459 dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n");
462 // Check loop structure leagality.
463 if (!legalLoopStructure()) {
465 dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n");
467 return OptimizationRemarkMissed(DEBUG_TYPE
, "IllegalLoopStruct",
468 CurLoop
->getStartLoc(),
469 CurLoop
->getHeader())
470 << " Unsafe Loop structure";
474 // Check loop instruction leagality.
475 if (!legalLoopInstructions()) {
478 << " Loop instructions not suitable for LoopVersioningLICM\n\n");
481 // Check loop memory access leagality.
482 if (!legalLoopMemoryAccesses()) {
485 << " Loop memory access not suitable for LoopVersioningLICM\n\n");
487 return OptimizationRemarkMissed(DEBUG_TYPE
, "IllegalLoopMemoryAccess",
488 CurLoop
->getStartLoc(),
489 CurLoop
->getHeader())
490 << " Unsafe Loop memory access";
494 // Loop versioning is feasible, return true.
495 LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n");
497 return OptimizationRemark(DEBUG_TYPE
, "IsLegalForVersioning",
498 CurLoop
->getStartLoc(), CurLoop
->getHeader())
499 << " Versioned loop for LICM."
500 << " Number of runtime checks we had to insert "
501 << NV("RuntimeChecks", LAI
->getNumRuntimePointerChecks());
506 /// Update loop with aggressive aliasing assumptions.
507 /// It marks no-alias to any pairs of memory operations by assuming
508 /// loop should not have any must-alias memory accesses pairs.
509 /// During LoopVersioningLICM legality we ignore loops having must
510 /// aliasing memory accesses.
511 void LoopVersioningLICM::setNoAliasToLoop(Loop
*VerLoop
) {
512 // Get latch terminator instruction.
513 Instruction
*I
= VerLoop
->getLoopLatch()->getTerminator();
514 // Create alias scope domain.
515 MDBuilder
MDB(I
->getContext());
516 MDNode
*NewDomain
= MDB
.createAnonymousAliasScopeDomain("LVDomain");
517 StringRef Name
= "LVAliasScope";
518 MDNode
*NewScope
= MDB
.createAnonymousAliasScope(NewDomain
, Name
);
519 SmallVector
<Metadata
*, 4> Scopes
{NewScope
}, NoAliases
{NewScope
};
520 // Iterate over each instruction of loop.
521 // set no-alias for all load & store instructions.
522 for (auto *Block
: CurLoop
->getBlocks()) {
523 for (auto &Inst
: *Block
) {
524 // Only interested in instruction that may modify or read memory.
525 if (!Inst
.mayReadFromMemory() && !Inst
.mayWriteToMemory())
527 // Set no-alias for current instruction.
529 LLVMContext::MD_noalias
,
530 MDNode::concatenate(Inst
.getMetadata(LLVMContext::MD_noalias
),
531 MDNode::get(Inst
.getContext(), NoAliases
)));
532 // set alias-scope for current instruction.
534 LLVMContext::MD_alias_scope
,
535 MDNode::concatenate(Inst
.getMetadata(LLVMContext::MD_alias_scope
),
536 MDNode::get(Inst
.getContext(), Scopes
)));
541 bool LoopVersioningLICM::run(DominatorTree
*DT
) {
542 // Do not do the transformation if disabled by metadata.
543 if (hasLICMVersioningTransformation(CurLoop
) & TM_Disable
)
546 bool Changed
= false;
548 // Check feasiblity of LoopVersioningLICM.
549 // If versioning found to be feasible and beneficial then proceed
550 // else simply return, by cleaning up memory.
551 if (isLegalForVersioning()) {
552 // Do loop versioning.
553 // Create memcheck for memory accessed inside loop.
554 // Clone original loop, and set blocks properly.
555 LoopVersioning
LVer(*LAI
, LAI
->getRuntimePointerChecking()->getChecks(),
556 CurLoop
, &LI
, DT
, SE
);
558 // Set Loop Versioning metaData for original loop.
559 addStringMetadataToLoop(LVer
.getNonVersionedLoop(), LICMVersioningMetaData
);
560 // Set Loop Versioning metaData for version loop.
561 addStringMetadataToLoop(LVer
.getVersionedLoop(), LICMVersioningMetaData
);
562 // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
563 // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
564 // instructions, not loops.
565 addStringMetadataToLoop(LVer
.getVersionedLoop(),
566 "llvm.mem.parallel_loop_access");
567 // Update version loop with aggressive aliasing assumption.
568 setNoAliasToLoop(LVer
.getVersionedLoop());
576 PreservedAnalyses
LoopVersioningLICMPass::run(Loop
&L
, LoopAnalysisManager
&AM
,
577 LoopStandardAnalysisResults
&LAR
,
579 AliasAnalysis
*AA
= &LAR
.AA
;
580 ScalarEvolution
*SE
= &LAR
.SE
;
581 DominatorTree
*DT
= &LAR
.DT
;
582 const Function
*F
= L
.getHeader()->getParent();
583 OptimizationRemarkEmitter
ORE(F
);
585 LoopAccessInfoManager
LAIs(*SE
, *AA
, *DT
, LAR
.LI
, nullptr);
586 if (!LoopVersioningLICM(AA
, SE
, &ORE
, LAIs
, LAR
.LI
, &L
).run(DT
))
587 return PreservedAnalyses::all();
588 return getLoopPassPreservedAnalyses();