1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
15 // This pass uses alias analysis for two purposes:
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
30 //===----------------------------------------------------------------------===//
32 #include "llvm/Transforms/Scalar/LICM.h"
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/Analysis/AliasSetTracker.h"
37 #include "llvm/Analysis/BasicAliasAnalysis.h"
38 #include "llvm/Analysis/CaptureTracking.h"
39 #include "llvm/Analysis/ConstantFolding.h"
40 #include "llvm/Analysis/GlobalsModRef.h"
41 #include "llvm/Analysis/GuardUtils.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
44 #include "llvm/Analysis/LoopIterator.h"
45 #include "llvm/Analysis/LoopPass.h"
46 #include "llvm/Analysis/MemoryBuiltins.h"
47 #include "llvm/Analysis/MemorySSA.h"
48 #include "llvm/Analysis/MemorySSAUpdater.h"
49 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
50 #include "llvm/Analysis/ScalarEvolution.h"
51 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
52 #include "llvm/Analysis/TargetLibraryInfo.h"
53 #include "llvm/Analysis/ValueTracking.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Dominators.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/IR/Metadata.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/PredIteratorCache.h"
65 #include "llvm/Support/CommandLine.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/raw_ostream.h"
68 #include "llvm/Transforms/Scalar.h"
69 #include "llvm/Transforms/Scalar/LoopPassManager.h"
70 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
71 #include "llvm/Transforms/Utils/Local.h"
72 #include "llvm/Transforms/Utils/LoopUtils.h"
73 #include "llvm/Transforms/Utils/SSAUpdater.h"
78 #define DEBUG_TYPE "licm"
80 STATISTIC(NumCreatedBlocks
, "Number of blocks created");
81 STATISTIC(NumClonedBranches
, "Number of branches cloned");
82 STATISTIC(NumSunk
, "Number of instructions sunk out of loop");
83 STATISTIC(NumHoisted
, "Number of instructions hoisted out of loop");
84 STATISTIC(NumMovedLoads
, "Number of load insts hoisted or sunk");
85 STATISTIC(NumMovedCalls
, "Number of call insts hoisted or sunk");
86 STATISTIC(NumPromoted
, "Number of memory locations promoted to registers");
88 /// Memory promotion is enabled by default.
90 DisablePromotion("disable-licm-promotion", cl::Hidden
, cl::init(false),
91 cl::desc("Disable memory promotion in LICM pass"));
93 static cl::opt
<bool> ControlFlowHoisting(
94 "licm-control-flow-hoisting", cl::Hidden
, cl::init(false),
95 cl::desc("Enable control flow (and PHI) hoisting in LICM"));
97 static cl::opt
<uint32_t> MaxNumUsesTraversed(
98 "licm-max-num-uses-traversed", cl::Hidden
, cl::init(8),
99 cl::desc("Max num uses visited for identifying load "
100 "invariance in loop using invariant start (default = 8)"));
102 // Default value of zero implies we use the regular alias set tracker mechanism
103 // instead of the cross product using AA to identify aliasing of the memory
104 // location we are interested in.
106 LICMN2Theshold("licm-n2-threshold", cl::Hidden
, cl::init(0),
107 cl::desc("How many instruction to cross product using AA"));
109 // Experimental option to allow imprecision in LICM in pathological cases, in
110 // exchange for faster compile. This is to be removed if MemorySSA starts to
111 // address the same issue. This flag applies only when LICM uses MemorySSA
112 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
113 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
114 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
115 // which may not be precise, since optimizeUses is capped. The result is
116 // correct, but we may not get as "far up" as possible to get which access is
117 // clobbering the one queried.
118 static cl::opt
<int> LicmMssaOptCap(
119 "licm-mssa-optimization-cap", cl::init(100), cl::Hidden
,
120 cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
121 "for faster compile. Caps the MemorySSA clobbering calls."));
123 // Experimentally, memory promotion carries less importance than sinking and
124 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
126 static cl::opt
<unsigned> AccessCapForMSSAPromotion(
127 "max-acc-licm-promotion", cl::init(250), cl::Hidden
,
128 cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
129 "effect. When MSSA in LICM is enabled, then this is the maximum "
130 "number of accesses allowed to be present in a loop in order to "
131 "enable memory promotion."));
133 static bool inSubLoop(BasicBlock
*BB
, Loop
*CurLoop
, LoopInfo
*LI
);
134 static bool isNotUsedOrFreeInLoop(const Instruction
&I
, const Loop
*CurLoop
,
135 const LoopSafetyInfo
*SafetyInfo
,
136 TargetTransformInfo
*TTI
, bool &FreeInLoop
);
137 static void hoist(Instruction
&I
, const DominatorTree
*DT
, const Loop
*CurLoop
,
138 BasicBlock
*Dest
, ICFLoopSafetyInfo
*SafetyInfo
,
139 MemorySSAUpdater
*MSSAU
, OptimizationRemarkEmitter
*ORE
);
140 static bool sink(Instruction
&I
, LoopInfo
*LI
, DominatorTree
*DT
,
141 const Loop
*CurLoop
, ICFLoopSafetyInfo
*SafetyInfo
,
142 MemorySSAUpdater
*MSSAU
, OptimizationRemarkEmitter
*ORE
,
144 static bool isSafeToExecuteUnconditionally(Instruction
&Inst
,
145 const DominatorTree
*DT
,
147 const LoopSafetyInfo
*SafetyInfo
,
148 OptimizationRemarkEmitter
*ORE
,
149 const Instruction
*CtxI
= nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc
,
151 AliasSetTracker
*CurAST
, Loop
*CurLoop
,
153 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA
*MSSA
, MemoryUse
*MU
,
155 int &LicmMssaOptCounter
);
156 static Instruction
*CloneInstructionInExitBlock(
157 Instruction
&I
, BasicBlock
&ExitBlock
, PHINode
&PN
, const LoopInfo
*LI
,
158 const LoopSafetyInfo
*SafetyInfo
, MemorySSAUpdater
*MSSAU
);
160 static void eraseInstruction(Instruction
&I
, ICFLoopSafetyInfo
&SafetyInfo
,
161 AliasSetTracker
*AST
, MemorySSAUpdater
*MSSAU
);
163 static void moveInstructionBefore(Instruction
&I
, Instruction
&Dest
,
164 ICFLoopSafetyInfo
&SafetyInfo
,
165 MemorySSAUpdater
*MSSAU
);
168 struct LoopInvariantCodeMotion
{
169 using ASTrackerMapTy
= DenseMap
<Loop
*, std::unique_ptr
<AliasSetTracker
>>;
170 bool runOnLoop(Loop
*L
, AliasAnalysis
*AA
, LoopInfo
*LI
, DominatorTree
*DT
,
171 TargetLibraryInfo
*TLI
, TargetTransformInfo
*TTI
,
172 ScalarEvolution
*SE
, MemorySSA
*MSSA
,
173 OptimizationRemarkEmitter
*ORE
, bool DeleteAST
);
175 ASTrackerMapTy
&getLoopToAliasSetMap() { return LoopToAliasSetMap
; }
178 ASTrackerMapTy LoopToAliasSetMap
;
180 std::unique_ptr
<AliasSetTracker
>
181 collectAliasInfoForLoop(Loop
*L
, LoopInfo
*LI
, AliasAnalysis
*AA
);
182 std::unique_ptr
<AliasSetTracker
>
183 collectAliasInfoForLoopWithMSSA(Loop
*L
, AliasAnalysis
*AA
,
184 MemorySSAUpdater
*MSSAU
);
187 struct LegacyLICMPass
: public LoopPass
{
188 static char ID
; // Pass identification, replacement for typeid
189 LegacyLICMPass() : LoopPass(ID
) {
190 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
193 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
) override
{
195 // If we have run LICM on a previous loop but now we are skipping
196 // (because we've hit the opt-bisect limit), we need to clear the
197 // loop alias information.
198 LICM
.getLoopToAliasSetMap().clear();
202 auto *SE
= getAnalysisIfAvailable
<ScalarEvolutionWrapperPass
>();
203 MemorySSA
*MSSA
= EnableMSSALoopDependency
204 ? (&getAnalysis
<MemorySSAWrapperPass
>().getMSSA())
206 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
207 // pass. Function analyses need to be preserved across loop transformations
208 // but ORE cannot be preserved (see comment before the pass definition).
209 OptimizationRemarkEmitter
ORE(L
->getHeader()->getParent());
210 return LICM
.runOnLoop(L
,
211 &getAnalysis
<AAResultsWrapperPass
>().getAAResults(),
212 &getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo(),
213 &getAnalysis
<DominatorTreeWrapperPass
>().getDomTree(),
214 &getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(),
215 &getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(
216 *L
->getHeader()->getParent()),
217 SE
? &SE
->getSE() : nullptr, MSSA
, &ORE
, false);
220 /// This transformation requires natural loop information & requires that
221 /// loop preheaders be inserted into the CFG...
223 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
224 AU
.addPreserved
<DominatorTreeWrapperPass
>();
225 AU
.addPreserved
<LoopInfoWrapperPass
>();
226 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
227 if (EnableMSSALoopDependency
) {
228 AU
.addRequired
<MemorySSAWrapperPass
>();
229 AU
.addPreserved
<MemorySSAWrapperPass
>();
231 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
232 getLoopAnalysisUsage(AU
);
235 using llvm::Pass::doFinalization
;
237 bool doFinalization() override
{
238 assert(LICM
.getLoopToAliasSetMap().empty() &&
239 "Didn't free loop alias sets");
244 LoopInvariantCodeMotion LICM
;
246 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
247 void cloneBasicBlockAnalysis(BasicBlock
*From
, BasicBlock
*To
,
250 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
252 void deleteAnalysisValue(Value
*V
, Loop
*L
) override
;
254 /// Simple Analysis hook. Delete loop L from alias set map.
255 void deleteAnalysisLoop(Loop
*L
) override
;
259 PreservedAnalyses
LICMPass::run(Loop
&L
, LoopAnalysisManager
&AM
,
260 LoopStandardAnalysisResults
&AR
, LPMUpdater
&) {
262 AM
.getResult
<FunctionAnalysisManagerLoopProxy
>(L
, AR
).getManager();
263 Function
*F
= L
.getHeader()->getParent();
265 auto *ORE
= FAM
.getCachedResult
<OptimizationRemarkEmitterAnalysis
>(*F
);
266 // FIXME: This should probably be optional rather than required.
268 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
269 "cached at a higher level");
271 LoopInvariantCodeMotion LICM
;
272 if (!LICM
.runOnLoop(&L
, &AR
.AA
, &AR
.LI
, &AR
.DT
, &AR
.TLI
, &AR
.TTI
, &AR
.SE
,
274 return PreservedAnalyses::all();
276 auto PA
= getLoopPassPreservedAnalyses();
278 PA
.preserve
<DominatorTreeAnalysis
>();
279 PA
.preserve
<LoopAnalysis
>();
284 char LegacyLICMPass::ID
= 0;
285 INITIALIZE_PASS_BEGIN(LegacyLICMPass
, "licm", "Loop Invariant Code Motion",
287 INITIALIZE_PASS_DEPENDENCY(LoopPass
)
288 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
289 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
290 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass
)
291 INITIALIZE_PASS_END(LegacyLICMPass
, "licm", "Loop Invariant Code Motion", false,
294 Pass
*llvm::createLICMPass() { return new LegacyLICMPass(); }
296 /// Hoist expressions out of the specified loop. Note, alias info for inner
297 /// loop is not preserved so it is not a good idea to run LICM multiple
298 /// times on one loop.
299 /// We should delete AST for inner loops in the new pass manager to avoid
302 bool LoopInvariantCodeMotion::runOnLoop(
303 Loop
*L
, AliasAnalysis
*AA
, LoopInfo
*LI
, DominatorTree
*DT
,
304 TargetLibraryInfo
*TLI
, TargetTransformInfo
*TTI
, ScalarEvolution
*SE
,
305 MemorySSA
*MSSA
, OptimizationRemarkEmitter
*ORE
, bool DeleteAST
) {
306 bool Changed
= false;
308 assert(L
->isLCSSAForm(*DT
) && "Loop is not in LCSSA form.");
310 std::unique_ptr
<AliasSetTracker
> CurAST
;
311 std::unique_ptr
<MemorySSAUpdater
> MSSAU
;
312 bool NoOfMemAccTooLarge
= false;
313 int LicmMssaOptCounter
= 0;
316 LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
317 CurAST
= collectAliasInfoForLoop(L
, LI
, AA
);
319 LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
320 MSSAU
= make_unique
<MemorySSAUpdater
>(MSSA
);
322 unsigned AccessCapCount
= 0;
323 for (auto *BB
: L
->getBlocks()) {
324 if (auto *Accesses
= MSSA
->getBlockAccesses(BB
)) {
325 for (const auto &MA
: *Accesses
) {
328 if (AccessCapCount
> AccessCapForMSSAPromotion
) {
329 NoOfMemAccTooLarge
= true;
334 if (NoOfMemAccTooLarge
)
339 // Get the preheader block to move instructions into...
340 BasicBlock
*Preheader
= L
->getLoopPreheader();
342 // Compute loop safety information.
343 ICFLoopSafetyInfo
SafetyInfo(DT
);
344 SafetyInfo
.computeLoopSafetyInfo(L
);
346 // We want to visit all of the instructions in this loop... that are not parts
347 // of our subloops (they have already had their invariants hoisted out of
348 // their loop, into this loop, so there is no need to process the BODIES of
351 // Traverse the body of the loop in depth first order on the dominator tree so
352 // that we are guaranteed to see definitions before we see uses. This allows
353 // us to sink instructions in one pass, without iteration. After sinking
354 // instructions, we perform another pass to hoist them out of the loop.
356 if (L
->hasDedicatedExits())
357 Changed
|= sinkRegion(DT
->getNode(L
->getHeader()), AA
, LI
, DT
, TLI
, TTI
, L
,
358 CurAST
.get(), MSSAU
.get(), &SafetyInfo
,
359 NoOfMemAccTooLarge
, LicmMssaOptCounter
, ORE
);
361 Changed
|= hoistRegion(DT
->getNode(L
->getHeader()), AA
, LI
, DT
, TLI
, L
,
362 CurAST
.get(), MSSAU
.get(), &SafetyInfo
,
363 NoOfMemAccTooLarge
, LicmMssaOptCounter
, ORE
);
365 // Now that all loop invariants have been removed from the loop, promote any
366 // memory references to scalars that we can.
367 // Don't sink stores from loops without dedicated block exits. Exits
368 // containing indirect branches are not transformed by loop simplify,
369 // make sure we catch that. An additional load may be generated in the
370 // preheader for SSA updater, so also avoid sinking when no preheader
372 if (!DisablePromotion
&& Preheader
&& L
->hasDedicatedExits() &&
373 !NoOfMemAccTooLarge
) {
374 // Figure out the loop exits and their insertion points
375 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
376 L
->getUniqueExitBlocks(ExitBlocks
);
378 // We can't insert into a catchswitch.
379 bool HasCatchSwitch
= llvm::any_of(ExitBlocks
, [](BasicBlock
*Exit
) {
380 return isa
<CatchSwitchInst
>(Exit
->getTerminator());
383 if (!HasCatchSwitch
) {
384 SmallVector
<Instruction
*, 8> InsertPts
;
385 SmallVector
<MemoryAccess
*, 8> MSSAInsertPts
;
386 InsertPts
.reserve(ExitBlocks
.size());
388 MSSAInsertPts
.reserve(ExitBlocks
.size());
389 for (BasicBlock
*ExitBlock
: ExitBlocks
) {
390 InsertPts
.push_back(&*ExitBlock
->getFirstInsertionPt());
392 MSSAInsertPts
.push_back(nullptr);
395 PredIteratorCache PIC
;
397 bool Promoted
= false;
399 // Build an AST using MSSA.
401 CurAST
= collectAliasInfoForLoopWithMSSA(L
, AA
, MSSAU
.get());
403 // Loop over all of the alias sets in the tracker object.
404 for (AliasSet
&AS
: *CurAST
) {
405 // We can promote this alias set if it has a store, if it is a "Must"
406 // alias set, if the pointer is loop invariant, and if we are not
407 // eliminating any volatile loads or stores.
408 if (AS
.isForwardingAliasSet() || !AS
.isMod() || !AS
.isMustAlias() ||
409 !L
->isLoopInvariant(AS
.begin()->getValue()))
414 "Must alias set should have at least one pointer element in it!");
416 SmallSetVector
<Value
*, 8> PointerMustAliases
;
417 for (const auto &ASI
: AS
)
418 PointerMustAliases
.insert(ASI
.getValue());
420 Promoted
|= promoteLoopAccessesToScalars(
421 PointerMustAliases
, ExitBlocks
, InsertPts
, MSSAInsertPts
, PIC
, LI
,
422 DT
, TLI
, L
, CurAST
.get(), MSSAU
.get(), &SafetyInfo
, ORE
);
425 // Once we have promoted values across the loop body we have to
426 // recursively reform LCSSA as any nested loop may now have values defined
427 // within the loop used in the outer loop.
428 // FIXME: This is really heavy handed. It would be a bit better to use an
429 // SSAUpdater strategy during promotion that was LCSSA aware and reformed
432 formLCSSARecursively(*L
, *DT
, LI
, SE
);
438 // Check that neither this loop nor its parent have had LCSSA broken. LICM is
439 // specifically moving instructions across the loop boundary and so it is
440 // especially in need of sanity checking here.
441 assert(L
->isLCSSAForm(*DT
) && "Loop not left in LCSSA form after LICM!");
442 assert((!L
->getParentLoop() || L
->getParentLoop()->isLCSSAForm(*DT
)) &&
443 "Parent loop not left in LCSSA form after LICM!");
445 // If this loop is nested inside of another one, save the alias information
446 // for when we process the outer loop.
447 if (!MSSAU
.get() && CurAST
.get() && L
->getParentLoop() && !DeleteAST
)
448 LoopToAliasSetMap
[L
] = std::move(CurAST
);
450 if (MSSAU
.get() && VerifyMemorySSA
)
451 MSSAU
->getMemorySSA()->verifyMemorySSA();
454 SE
->forgetLoopDispositions(L
);
458 /// Walk the specified region of the CFG (defined by all blocks dominated by
459 /// the specified block, and that are in the current loop) in reverse depth
460 /// first order w.r.t the DominatorTree. This allows us to visit uses before
461 /// definitions, allowing us to sink a loop body in one pass without iteration.
463 bool llvm::sinkRegion(DomTreeNode
*N
, AliasAnalysis
*AA
, LoopInfo
*LI
,
464 DominatorTree
*DT
, TargetLibraryInfo
*TLI
,
465 TargetTransformInfo
*TTI
, Loop
*CurLoop
,
466 AliasSetTracker
*CurAST
, MemorySSAUpdater
*MSSAU
,
467 ICFLoopSafetyInfo
*SafetyInfo
, bool NoOfMemAccTooLarge
,
468 int &LicmMssaOptCounter
, OptimizationRemarkEmitter
*ORE
) {
471 assert(N
!= nullptr && AA
!= nullptr && LI
!= nullptr && DT
!= nullptr &&
472 CurLoop
!= nullptr && SafetyInfo
!= nullptr &&
473 "Unexpected input to sinkRegion.");
474 assert(((CurAST
!= nullptr) ^ (MSSAU
!= nullptr)) &&
475 "Either AliasSetTracker or MemorySSA should be initialized.");
477 // We want to visit children before parents. We will enque all the parents
478 // before their children in the worklist and process the worklist in reverse
480 SmallVector
<DomTreeNode
*, 16> Worklist
= collectChildrenInLoop(N
, CurLoop
);
482 bool Changed
= false;
483 for (DomTreeNode
*DTN
: reverse(Worklist
)) {
484 BasicBlock
*BB
= DTN
->getBlock();
485 // Only need to process the contents of this block if it is not part of a
486 // subloop (which would already have been processed).
487 if (inSubLoop(BB
, CurLoop
, LI
))
490 for (BasicBlock::iterator II
= BB
->end(); II
!= BB
->begin();) {
491 Instruction
&I
= *--II
;
493 // If the instruction is dead, we would try to sink it because it isn't
494 // used in the loop, instead, just delete it.
495 if (isInstructionTriviallyDead(&I
, TLI
)) {
496 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I
<< '\n');
499 eraseInstruction(I
, *SafetyInfo
, CurAST
, MSSAU
);
504 // Check to see if we can sink this instruction to the exit blocks
505 // of the loop. We can do this if the all users of the instruction are
506 // outside of the loop. In this case, it doesn't even matter if the
507 // operands of the instruction are loop invariant.
509 bool FreeInLoop
= false;
510 if (isNotUsedOrFreeInLoop(I
, CurLoop
, SafetyInfo
, TTI
, FreeInLoop
) &&
511 canSinkOrHoistInst(I
, AA
, DT
, CurLoop
, CurAST
, MSSAU
, true,
512 NoOfMemAccTooLarge
, &LicmMssaOptCounter
, ORE
) &&
513 !I
.mayHaveSideEffects()) {
514 if (sink(I
, LI
, DT
, CurLoop
, SafetyInfo
, MSSAU
, ORE
, FreeInLoop
)) {
517 eraseInstruction(I
, *SafetyInfo
, CurAST
, MSSAU
);
524 if (MSSAU
&& VerifyMemorySSA
)
525 MSSAU
->getMemorySSA()->verifyMemorySSA();
530 // This is a helper class for hoistRegion to make it able to hoist control flow
531 // in order to be able to hoist phis. The way this works is that we initially
532 // start hoisting to the loop preheader, and when we see a loop invariant branch
533 // we make note of this. When we then come to hoist an instruction that's
534 // conditional on such a branch we duplicate the branch and the relevant control
535 // flow, then hoist the instruction into the block corresponding to its original
536 // block in the duplicated control flow.
537 class ControlFlowHoister
{
539 // Information about the loop we are hoisting from
543 MemorySSAUpdater
*MSSAU
;
545 // A map of blocks in the loop to the block their instructions will be hoisted
547 DenseMap
<BasicBlock
*, BasicBlock
*> HoistDestinationMap
;
549 // The branches that we can hoist, mapped to the block that marks a
550 // convergence point of their control flow.
551 DenseMap
<BranchInst
*, BasicBlock
*> HoistableBranches
;
554 ControlFlowHoister(LoopInfo
*LI
, DominatorTree
*DT
, Loop
*CurLoop
,
555 MemorySSAUpdater
*MSSAU
)
556 : LI(LI
), DT(DT
), CurLoop(CurLoop
), MSSAU(MSSAU
) {}
558 void registerPossiblyHoistableBranch(BranchInst
*BI
) {
559 // We can only hoist conditional branches with loop invariant operands.
560 if (!ControlFlowHoisting
|| !BI
->isConditional() ||
561 !CurLoop
->hasLoopInvariantOperands(BI
))
564 // The branch destinations need to be in the loop, and we don't gain
565 // anything by duplicating conditional branches with duplicate successors,
566 // as it's essentially the same as an unconditional branch.
567 BasicBlock
*TrueDest
= BI
->getSuccessor(0);
568 BasicBlock
*FalseDest
= BI
->getSuccessor(1);
569 if (!CurLoop
->contains(TrueDest
) || !CurLoop
->contains(FalseDest
) ||
570 TrueDest
== FalseDest
)
573 // We can hoist BI if one branch destination is the successor of the other,
574 // or both have common successor which we check by seeing if the
575 // intersection of their successors is non-empty.
576 // TODO: This could be expanded to allowing branches where both ends
577 // eventually converge to a single block.
578 SmallPtrSet
<BasicBlock
*, 4> TrueDestSucc
, FalseDestSucc
;
579 TrueDestSucc
.insert(succ_begin(TrueDest
), succ_end(TrueDest
));
580 FalseDestSucc
.insert(succ_begin(FalseDest
), succ_end(FalseDest
));
581 BasicBlock
*CommonSucc
= nullptr;
582 if (TrueDestSucc
.count(FalseDest
)) {
583 CommonSucc
= FalseDest
;
584 } else if (FalseDestSucc
.count(TrueDest
)) {
585 CommonSucc
= TrueDest
;
587 set_intersect(TrueDestSucc
, FalseDestSucc
);
588 // If there's one common successor use that.
589 if (TrueDestSucc
.size() == 1)
590 CommonSucc
= *TrueDestSucc
.begin();
591 // If there's more than one pick whichever appears first in the block list
592 // (we can't use the value returned by TrueDestSucc.begin() as it's
593 // unpredicatable which element gets returned).
594 else if (!TrueDestSucc
.empty()) {
595 Function
*F
= TrueDest
->getParent();
596 auto IsSucc
= [&](BasicBlock
&BB
) { return TrueDestSucc
.count(&BB
); };
597 auto It
= std::find_if(F
->begin(), F
->end(), IsSucc
);
598 assert(It
!= F
->end() && "Could not find successor in function");
602 // The common successor has to be dominated by the branch, as otherwise
603 // there will be some other path to the successor that will not be
604 // controlled by this branch so any phi we hoist would be controlled by the
605 // wrong condition. This also takes care of avoiding hoisting of loop back
607 // TODO: In some cases this could be relaxed if the successor is dominated
608 // by another block that's been hoisted and we can guarantee that the
609 // control flow has been replicated exactly.
610 if (CommonSucc
&& DT
->dominates(BI
, CommonSucc
))
611 HoistableBranches
[BI
] = CommonSucc
;
614 bool canHoistPHI(PHINode
*PN
) {
615 // The phi must have loop invariant operands.
616 if (!ControlFlowHoisting
|| !CurLoop
->hasLoopInvariantOperands(PN
))
618 // We can hoist phis if the block they are in is the target of hoistable
619 // branches which cover all of the predecessors of the block.
620 SmallPtrSet
<BasicBlock
*, 8> PredecessorBlocks
;
621 BasicBlock
*BB
= PN
->getParent();
622 for (BasicBlock
*PredBB
: predecessors(BB
))
623 PredecessorBlocks
.insert(PredBB
);
624 // If we have less predecessor blocks than predecessors then the phi will
625 // have more than one incoming value for the same block which we can't
627 // TODO: This could be handled be erasing some of the duplicate incoming
629 if (PredecessorBlocks
.size() != pred_size(BB
))
631 for (auto &Pair
: HoistableBranches
) {
632 if (Pair
.second
== BB
) {
633 // Which blocks are predecessors via this branch depends on if the
634 // branch is triangle-like or diamond-like.
635 if (Pair
.first
->getSuccessor(0) == BB
) {
636 PredecessorBlocks
.erase(Pair
.first
->getParent());
637 PredecessorBlocks
.erase(Pair
.first
->getSuccessor(1));
638 } else if (Pair
.first
->getSuccessor(1) == BB
) {
639 PredecessorBlocks
.erase(Pair
.first
->getParent());
640 PredecessorBlocks
.erase(Pair
.first
->getSuccessor(0));
642 PredecessorBlocks
.erase(Pair
.first
->getSuccessor(0));
643 PredecessorBlocks
.erase(Pair
.first
->getSuccessor(1));
647 // PredecessorBlocks will now be empty if for every predecessor of BB we
648 // found a hoistable branch source.
649 return PredecessorBlocks
.empty();
652 BasicBlock
*getOrCreateHoistedBlock(BasicBlock
*BB
) {
653 if (!ControlFlowHoisting
)
654 return CurLoop
->getLoopPreheader();
655 // If BB has already been hoisted, return that
656 if (HoistDestinationMap
.count(BB
))
657 return HoistDestinationMap
[BB
];
659 // Check if this block is conditional based on a pending branch
660 auto HasBBAsSuccessor
=
661 [&](DenseMap
<BranchInst
*, BasicBlock
*>::value_type
&Pair
) {
662 return BB
!= Pair
.second
&& (Pair
.first
->getSuccessor(0) == BB
||
663 Pair
.first
->getSuccessor(1) == BB
);
665 auto It
= std::find_if(HoistableBranches
.begin(), HoistableBranches
.end(),
668 // If not involved in a pending branch, hoist to preheader
669 BasicBlock
*InitialPreheader
= CurLoop
->getLoopPreheader();
670 if (It
== HoistableBranches
.end()) {
671 LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader
->getName()
672 << " as hoist destination for " << BB
->getName()
674 HoistDestinationMap
[BB
] = InitialPreheader
;
675 return InitialPreheader
;
677 BranchInst
*BI
= It
->first
;
678 assert(std::find_if(++It
, HoistableBranches
.end(), HasBBAsSuccessor
) ==
679 HoistableBranches
.end() &&
680 "BB is expected to be the target of at most one branch");
682 LLVMContext
&C
= BB
->getContext();
683 BasicBlock
*TrueDest
= BI
->getSuccessor(0);
684 BasicBlock
*FalseDest
= BI
->getSuccessor(1);
685 BasicBlock
*CommonSucc
= HoistableBranches
[BI
];
686 BasicBlock
*HoistTarget
= getOrCreateHoistedBlock(BI
->getParent());
688 // Create hoisted versions of blocks that currently don't have them
689 auto CreateHoistedBlock
= [&](BasicBlock
*Orig
) {
690 if (HoistDestinationMap
.count(Orig
))
691 return HoistDestinationMap
[Orig
];
693 BasicBlock::Create(C
, Orig
->getName() + ".licm", Orig
->getParent());
694 HoistDestinationMap
[Orig
] = New
;
695 DT
->addNewBlock(New
, HoistTarget
);
696 if (CurLoop
->getParentLoop())
697 CurLoop
->getParentLoop()->addBasicBlockToLoop(New
, *LI
);
699 LLVM_DEBUG(dbgs() << "LICM created " << New
->getName()
700 << " as hoist destination for " << Orig
->getName()
704 BasicBlock
*HoistTrueDest
= CreateHoistedBlock(TrueDest
);
705 BasicBlock
*HoistFalseDest
= CreateHoistedBlock(FalseDest
);
706 BasicBlock
*HoistCommonSucc
= CreateHoistedBlock(CommonSucc
);
708 // Link up these blocks with branches.
709 if (!HoistCommonSucc
->getTerminator()) {
710 // The new common successor we've generated will branch to whatever that
711 // hoist target branched to.
712 BasicBlock
*TargetSucc
= HoistTarget
->getSingleSuccessor();
713 assert(TargetSucc
&& "Expected hoist target to have a single successor");
714 HoistCommonSucc
->moveBefore(TargetSucc
);
715 BranchInst::Create(TargetSucc
, HoistCommonSucc
);
717 if (!HoistTrueDest
->getTerminator()) {
718 HoistTrueDest
->moveBefore(HoistCommonSucc
);
719 BranchInst::Create(HoistCommonSucc
, HoistTrueDest
);
721 if (!HoistFalseDest
->getTerminator()) {
722 HoistFalseDest
->moveBefore(HoistCommonSucc
);
723 BranchInst::Create(HoistCommonSucc
, HoistFalseDest
);
726 // If BI is being cloned to what was originally the preheader then
727 // HoistCommonSucc will now be the new preheader.
728 if (HoistTarget
== InitialPreheader
) {
729 // Phis in the loop header now need to use the new preheader.
730 InitialPreheader
->replaceSuccessorsPhiUsesWith(HoistCommonSucc
);
732 MSSAU
->wireOldPredecessorsToNewImmediatePredecessor(
733 HoistTarget
->getSingleSuccessor(), HoistCommonSucc
, {HoistTarget
});
734 // The new preheader dominates the loop header.
735 DomTreeNode
*PreheaderNode
= DT
->getNode(HoistCommonSucc
);
736 DomTreeNode
*HeaderNode
= DT
->getNode(CurLoop
->getHeader());
737 DT
->changeImmediateDominator(HeaderNode
, PreheaderNode
);
738 // The preheader hoist destination is now the new preheader, with the
739 // exception of the hoist destination of this branch.
740 for (auto &Pair
: HoistDestinationMap
)
741 if (Pair
.second
== InitialPreheader
&& Pair
.first
!= BI
->getParent())
742 Pair
.second
= HoistCommonSucc
;
745 // Now finally clone BI.
747 HoistTarget
->getTerminator(),
748 BranchInst::Create(HoistTrueDest
, HoistFalseDest
, BI
->getCondition()));
751 assert(CurLoop
->getLoopPreheader() &&
752 "Hoisting blocks should not have destroyed preheader");
753 return HoistDestinationMap
[BB
];
758 /// Walk the specified region of the CFG (defined by all blocks dominated by
759 /// the specified block, and that are in the current loop) in depth first
760 /// order w.r.t the DominatorTree. This allows us to visit definitions before
761 /// uses, allowing us to hoist a loop body in one pass without iteration.
763 bool llvm::hoistRegion(DomTreeNode
*N
, AliasAnalysis
*AA
, LoopInfo
*LI
,
764 DominatorTree
*DT
, TargetLibraryInfo
*TLI
, Loop
*CurLoop
,
765 AliasSetTracker
*CurAST
, MemorySSAUpdater
*MSSAU
,
766 ICFLoopSafetyInfo
*SafetyInfo
, bool NoOfMemAccTooLarge
,
767 int &LicmMssaOptCounter
,
768 OptimizationRemarkEmitter
*ORE
) {
770 assert(N
!= nullptr && AA
!= nullptr && LI
!= nullptr && DT
!= nullptr &&
771 CurLoop
!= nullptr && SafetyInfo
!= nullptr &&
772 "Unexpected input to hoistRegion.");
773 assert(((CurAST
!= nullptr) ^ (MSSAU
!= nullptr)) &&
774 "Either AliasSetTracker or MemorySSA should be initialized.");
776 ControlFlowHoister
CFH(LI
, DT
, CurLoop
, MSSAU
);
778 // Keep track of instructions that have been hoisted, as they may need to be
779 // re-hoisted if they end up not dominating all of their uses.
780 SmallVector
<Instruction
*, 16> HoistedInstructions
;
782 // For PHI hoisting to work we need to hoist blocks before their successors.
783 // We can do this by iterating through the blocks in the loop in reverse
785 LoopBlocksRPO
Worklist(CurLoop
);
786 Worklist
.perform(LI
);
787 bool Changed
= false;
788 for (BasicBlock
*BB
: Worklist
) {
789 // Only need to process the contents of this block if it is not part of a
790 // subloop (which would already have been processed).
791 if (inSubLoop(BB
, CurLoop
, LI
))
794 for (BasicBlock::iterator II
= BB
->begin(), E
= BB
->end(); II
!= E
;) {
795 Instruction
&I
= *II
++;
796 // Try constant folding this instruction. If all the operands are
797 // constants, it is technically hoistable, but it would be better to
799 if (Constant
*C
= ConstantFoldInstruction(
800 &I
, I
.getModule()->getDataLayout(), TLI
)) {
801 LLVM_DEBUG(dbgs() << "LICM folding inst: " << I
<< " --> " << *C
804 CurAST
->copyValue(&I
, C
);
805 // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
806 I
.replaceAllUsesWith(C
);
807 if (isInstructionTriviallyDead(&I
, TLI
))
808 eraseInstruction(I
, *SafetyInfo
, CurAST
, MSSAU
);
813 // Try hoisting the instruction out to the preheader. We can only do
814 // this if all of the operands of the instruction are loop invariant and
815 // if it is safe to hoist the instruction.
816 // TODO: It may be safe to hoist if we are hoisting to a conditional block
817 // and we have accurately duplicated the control flow from the loop header
819 if (CurLoop
->hasLoopInvariantOperands(&I
) &&
820 canSinkOrHoistInst(I
, AA
, DT
, CurLoop
, CurAST
, MSSAU
, true,
821 NoOfMemAccTooLarge
, &LicmMssaOptCounter
, ORE
) &&
822 isSafeToExecuteUnconditionally(
823 I
, DT
, CurLoop
, SafetyInfo
, ORE
,
824 CurLoop
->getLoopPreheader()->getTerminator())) {
825 hoist(I
, DT
, CurLoop
, CFH
.getOrCreateHoistedBlock(BB
), SafetyInfo
,
827 HoistedInstructions
.push_back(&I
);
832 // Attempt to remove floating point division out of the loop by
833 // converting it to a reciprocal multiplication.
834 if (I
.getOpcode() == Instruction::FDiv
&&
835 CurLoop
->isLoopInvariant(I
.getOperand(1)) &&
836 I
.hasAllowReciprocal()) {
837 auto Divisor
= I
.getOperand(1);
838 auto One
= llvm::ConstantFP::get(Divisor
->getType(), 1.0);
839 auto ReciprocalDivisor
= BinaryOperator::CreateFDiv(One
, Divisor
);
840 ReciprocalDivisor
->setFastMathFlags(I
.getFastMathFlags());
841 SafetyInfo
->insertInstructionTo(ReciprocalDivisor
, I
.getParent());
842 ReciprocalDivisor
->insertBefore(&I
);
845 BinaryOperator::CreateFMul(I
.getOperand(0), ReciprocalDivisor
);
846 Product
->setFastMathFlags(I
.getFastMathFlags());
847 SafetyInfo
->insertInstructionTo(Product
, I
.getParent());
848 Product
->insertAfter(&I
);
849 I
.replaceAllUsesWith(Product
);
850 eraseInstruction(I
, *SafetyInfo
, CurAST
, MSSAU
);
852 hoist(*ReciprocalDivisor
, DT
, CurLoop
, CFH
.getOrCreateHoistedBlock(BB
),
853 SafetyInfo
, MSSAU
, ORE
);
854 HoistedInstructions
.push_back(ReciprocalDivisor
);
859 auto IsInvariantStart
= [&](Instruction
&I
) {
860 using namespace PatternMatch
;
861 return I
.use_empty() &&
862 match(&I
, m_Intrinsic
<Intrinsic::invariant_start
>());
864 auto MustExecuteWithoutWritesBefore
= [&](Instruction
&I
) {
865 return SafetyInfo
->isGuaranteedToExecute(I
, DT
, CurLoop
) &&
866 SafetyInfo
->doesNotWriteMemoryBefore(I
, CurLoop
);
868 if ((IsInvariantStart(I
) || isGuard(&I
)) &&
869 CurLoop
->hasLoopInvariantOperands(&I
) &&
870 MustExecuteWithoutWritesBefore(I
)) {
871 hoist(I
, DT
, CurLoop
, CFH
.getOrCreateHoistedBlock(BB
), SafetyInfo
,
873 HoistedInstructions
.push_back(&I
);
878 if (PHINode
*PN
= dyn_cast
<PHINode
>(&I
)) {
879 if (CFH
.canHoistPHI(PN
)) {
880 // Redirect incoming blocks first to ensure that we create hoisted
881 // versions of those blocks before we hoist the phi.
882 for (unsigned int i
= 0; i
< PN
->getNumIncomingValues(); ++i
)
883 PN
->setIncomingBlock(
884 i
, CFH
.getOrCreateHoistedBlock(PN
->getIncomingBlock(i
)));
885 hoist(*PN
, DT
, CurLoop
, CFH
.getOrCreateHoistedBlock(BB
), SafetyInfo
,
887 assert(DT
->dominates(PN
, BB
) && "Conditional PHIs not expected");
893 // Remember possibly hoistable branches so we can actually hoist them
895 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(&I
))
896 CFH
.registerPossiblyHoistableBranch(BI
);
900 // If we hoisted instructions to a conditional block they may not dominate
901 // their uses that weren't hoisted (such as phis where some operands are not
902 // loop invariant). If so make them unconditional by moving them to their
903 // immediate dominator. We iterate through the instructions in reverse order
904 // which ensures that when we rehoist an instruction we rehoist its operands,
905 // and also keep track of where in the block we are rehoisting to to make sure
906 // that we rehoist instructions before the instructions that use them.
907 Instruction
*HoistPoint
= nullptr;
908 if (ControlFlowHoisting
) {
909 for (Instruction
*I
: reverse(HoistedInstructions
)) {
910 if (!llvm::all_of(I
->uses(),
911 [&](Use
&U
) { return DT
->dominates(I
, U
); })) {
912 BasicBlock
*Dominator
=
913 DT
->getNode(I
->getParent())->getIDom()->getBlock();
914 if (!HoistPoint
|| !DT
->dominates(HoistPoint
->getParent(), Dominator
)) {
916 assert(DT
->dominates(Dominator
, HoistPoint
->getParent()) &&
917 "New hoist point expected to dominate old hoist point");
918 HoistPoint
= Dominator
->getTerminator();
920 LLVM_DEBUG(dbgs() << "LICM rehoisting to "
921 << HoistPoint
->getParent()->getName()
922 << ": " << *I
<< "\n");
923 moveInstructionBefore(*I
, *HoistPoint
, *SafetyInfo
, MSSAU
);
929 if (MSSAU
&& VerifyMemorySSA
)
930 MSSAU
->getMemorySSA()->verifyMemorySSA();
932 // Now that we've finished hoisting make sure that LI and DT are still
936 assert(DT
->verify(DominatorTree::VerificationLevel::Fast
) &&
937 "Dominator tree verification failed");
945 // Return true if LI is invariant within scope of the loop. LI is invariant if
946 // CurLoop is dominated by an invariant.start representing the same memory
947 // location and size as the memory location LI loads from, and also the
948 // invariant.start has no uses.
949 static bool isLoadInvariantInLoop(LoadInst
*LI
, DominatorTree
*DT
,
951 Value
*Addr
= LI
->getOperand(0);
952 const DataLayout
&DL
= LI
->getModule()->getDataLayout();
953 const uint32_t LocSizeInBits
= DL
.getTypeSizeInBits(
954 cast
<PointerType
>(Addr
->getType())->getElementType());
956 // if the type is i8 addrspace(x)*, we know this is the type of
957 // llvm.invariant.start operand
958 auto *PtrInt8Ty
= PointerType::get(Type::getInt8Ty(LI
->getContext()),
959 LI
->getPointerAddressSpace());
960 unsigned BitcastsVisited
= 0;
961 // Look through bitcasts until we reach the i8* type (this is invariant.start
963 while (Addr
->getType() != PtrInt8Ty
) {
964 auto *BC
= dyn_cast
<BitCastInst
>(Addr
);
965 // Avoid traversing high number of bitcast uses.
966 if (++BitcastsVisited
> MaxNumUsesTraversed
|| !BC
)
968 Addr
= BC
->getOperand(0);
971 unsigned UsesVisited
= 0;
972 // Traverse all uses of the load operand value, to see if invariant.start is
973 // one of the uses, and whether it dominates the load instruction.
974 for (auto *U
: Addr
->users()) {
975 // Avoid traversing for Load operand with high number of users.
976 if (++UsesVisited
> MaxNumUsesTraversed
)
978 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
979 // If there are escaping uses of invariant.start instruction, the load maybe
981 if (!II
|| II
->getIntrinsicID() != Intrinsic::invariant_start
||
984 unsigned InvariantSizeInBits
=
985 cast
<ConstantInt
>(II
->getArgOperand(0))->getSExtValue() * 8;
986 // Confirm the invariant.start location size contains the load operand size
987 // in bits. Also, the invariant.start should dominate the load, and we
988 // should not hoist the load out of a loop that contains this dominating
990 if (LocSizeInBits
<= InvariantSizeInBits
&&
991 DT
->properlyDominates(II
->getParent(), CurLoop
->getHeader()))
999 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1000 /// sink a given instruction out of a loop. Does not address legality
1001 /// concerns such as aliasing or speculation safety.
1002 bool isHoistableAndSinkableInst(Instruction
&I
) {
1003 // Only these instructions are hoistable/sinkable.
1004 return (isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
) ||
1005 isa
<CallInst
>(I
) || isa
<FenceInst
>(I
) ||
1006 isa
<BinaryOperator
>(I
) || isa
<CastInst
>(I
) ||
1007 isa
<SelectInst
>(I
) || isa
<GetElementPtrInst
>(I
) ||
1008 isa
<CmpInst
>(I
) || isa
<InsertElementInst
>(I
) ||
1009 isa
<ExtractElementInst
>(I
) || isa
<ShuffleVectorInst
>(I
) ||
1010 isa
<ExtractValueInst
>(I
) || isa
<InsertValueInst
>(I
));
1012 /// Return true if all of the alias sets within this AST are known not to
1013 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1014 bool isReadOnly(AliasSetTracker
*CurAST
, const MemorySSAUpdater
*MSSAU
,
1017 for (AliasSet
&AS
: *CurAST
) {
1018 if (!AS
.isForwardingAliasSet() && AS
.isMod()) {
1024 for (auto *BB
: L
->getBlocks())
1025 if (MSSAU
->getMemorySSA()->getBlockDefs(BB
))
1031 /// Return true if I is the only Instruction with a MemoryAccess in L.
1032 bool isOnlyMemoryAccess(const Instruction
*I
, const Loop
*L
,
1033 const MemorySSAUpdater
*MSSAU
) {
1034 for (auto *BB
: L
->getBlocks())
1035 if (auto *Accs
= MSSAU
->getMemorySSA()->getBlockAccesses(BB
)) {
1037 for (const auto &Acc
: *Accs
) {
1038 if (isa
<MemoryPhi
>(&Acc
))
1040 const auto *MUD
= cast
<MemoryUseOrDef
>(&Acc
);
1041 if (MUD
->getMemoryInst() != I
|| NotAPhi
++ == 1)
1049 bool llvm::canSinkOrHoistInst(Instruction
&I
, AAResults
*AA
, DominatorTree
*DT
,
1050 Loop
*CurLoop
, AliasSetTracker
*CurAST
,
1051 MemorySSAUpdater
*MSSAU
,
1052 bool TargetExecutesOncePerLoop
,
1053 bool NoOfMemAccTooLarge
, int *LicmMssaOptCounter
,
1054 OptimizationRemarkEmitter
*ORE
) {
1055 // If we don't understand the instruction, bail early.
1056 if (!isHoistableAndSinkableInst(I
))
1059 MemorySSA
*MSSA
= MSSAU
? MSSAU
->getMemorySSA() : nullptr;
1061 assert(LicmMssaOptCounter
!= nullptr && "Counter cannot be null.");
1063 // Loads have extra constraints we have to verify before we can hoist them.
1064 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(&I
)) {
1065 if (!LI
->isUnordered())
1066 return false; // Don't sink/hoist volatile or ordered atomic loads!
1068 // Loads from constant memory are always safe to move, even if they end up
1069 // in the same alias set as something that ends up being modified.
1070 if (AA
->pointsToConstantMemory(LI
->getOperand(0)))
1072 if (LI
->getMetadata(LLVMContext::MD_invariant_load
))
1075 if (LI
->isAtomic() && !TargetExecutesOncePerLoop
)
1076 return false; // Don't risk duplicating unordered loads
1078 // This checks for an invariant.start dominating the load.
1079 if (isLoadInvariantInLoop(LI
, DT
, CurLoop
))
1084 Invalidated
= pointerInvalidatedByLoop(MemoryLocation::get(LI
), CurAST
,
1087 Invalidated
= pointerInvalidatedByLoopWithMSSA(
1088 MSSA
, cast
<MemoryUse
>(MSSA
->getMemoryAccess(LI
)), CurLoop
,
1089 *LicmMssaOptCounter
);
1090 // Check loop-invariant address because this may also be a sinkable load
1091 // whose address is not necessarily loop-invariant.
1092 if (ORE
&& Invalidated
&& CurLoop
->isLoopInvariant(LI
->getPointerOperand()))
1094 return OptimizationRemarkMissed(
1095 DEBUG_TYPE
, "LoadWithLoopInvariantAddressInvalidated", LI
)
1096 << "failed to move load with loop-invariant address "
1097 "because the loop may invalidate its value";
1100 return !Invalidated
;
1101 } else if (CallInst
*CI
= dyn_cast
<CallInst
>(&I
)) {
1102 // Don't sink or hoist dbg info; it's legal, but not useful.
1103 if (isa
<DbgInfoIntrinsic
>(I
))
1106 // Don't sink calls which can throw.
1110 using namespace PatternMatch
;
1111 if (match(CI
, m_Intrinsic
<Intrinsic::assume
>()))
1112 // Assumes don't actually alias anything or throw
1115 // Handle simple cases by querying alias analysis.
1116 FunctionModRefBehavior Behavior
= AA
->getModRefBehavior(CI
);
1117 if (Behavior
== FMRB_DoesNotAccessMemory
)
1119 if (AliasAnalysis::onlyReadsMemory(Behavior
)) {
1120 // A readonly argmemonly function only reads from memory pointed to by
1121 // it's arguments with arbitrary offsets. If we can prove there are no
1122 // writes to this memory in the loop, we can hoist or sink.
1123 if (AliasAnalysis::onlyAccessesArgPointees(Behavior
)) {
1124 // TODO: expand to writeable arguments
1125 for (Value
*Op
: CI
->arg_operands())
1126 if (Op
->getType()->isPointerTy()) {
1129 Invalidated
= pointerInvalidatedByLoop(
1130 MemoryLocation(Op
, LocationSize::unknown(), AAMDNodes()),
1131 CurAST
, CurLoop
, AA
);
1133 Invalidated
= pointerInvalidatedByLoopWithMSSA(
1134 MSSA
, cast
<MemoryUse
>(MSSA
->getMemoryAccess(CI
)), CurLoop
,
1135 *LicmMssaOptCounter
);
1142 // If this call only reads from memory and there are no writes to memory
1143 // in the loop, we can hoist or sink the call as appropriate.
1144 if (isReadOnly(CurAST
, MSSAU
, CurLoop
))
1148 // FIXME: This should use mod/ref information to see if we can hoist or
1152 } else if (auto *FI
= dyn_cast
<FenceInst
>(&I
)) {
1153 // Fences alias (most) everything to provide ordering. For the moment,
1154 // just give up if there are any other memory operations in the loop.
1156 auto Begin
= CurAST
->begin();
1157 assert(Begin
!= CurAST
->end() && "must contain FI");
1158 if (std::next(Begin
) != CurAST
->end())
1159 // constant memory for instance, TODO: handle better
1161 auto *UniqueI
= Begin
->getUniqueInstruction();
1163 // other memory op, give up
1165 (void)FI
; // suppress unused variable warning
1166 assert(UniqueI
== FI
&& "AS must contain FI");
1169 return isOnlyMemoryAccess(FI
, CurLoop
, MSSAU
);
1170 } else if (auto *SI
= dyn_cast
<StoreInst
>(&I
)) {
1171 if (!SI
->isUnordered())
1172 return false; // Don't sink/hoist volatile or ordered atomic store!
1174 // We can only hoist a store that we can prove writes a value which is not
1175 // read or overwritten within the loop. For those cases, we fallback to
1176 // load store promotion instead. TODO: We can extend this to cases where
1177 // there is exactly one write to the location and that write dominates an
1178 // arbitrary number of reads in the loop.
1180 auto &AS
= CurAST
->getAliasSetFor(MemoryLocation::get(SI
));
1182 if (AS
.isRef() || !AS
.isMustAlias())
1183 // Quick exit test, handled by the full path below as well.
1185 auto *UniqueI
= AS
.getUniqueInstruction();
1187 // other memory op, give up
1189 assert(UniqueI
== SI
&& "AS must contain SI");
1192 if (isOnlyMemoryAccess(SI
, CurLoop
, MSSAU
))
1194 if (*LicmMssaOptCounter
< LicmMssaOptCap
) {
1195 auto *Source
= MSSA
->getSkipSelfWalker()->getClobberingMemoryAccess(SI
);
1196 (*LicmMssaOptCounter
)++;
1197 // If there are no clobbering Defs in the loop, we still need to check
1198 // for interfering Uses. If there are more accesses than the Promotion
1199 // cap, give up, we're not walking a list that long. Otherwise, walk the
1200 // list, check each Use if it's optimized to an access outside the loop.
1201 // If yes, store is safe to hoist. This is fairly restrictive, but
1202 // conservatively correct.
1203 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1204 // moving accesses. Can also extend to dominating uses.
1205 if ((!MSSA
->isLiveOnEntryDef(Source
) &&
1206 CurLoop
->contains(Source
->getBlock())) ||
1209 for (auto *BB
: CurLoop
->getBlocks())
1210 if (auto *Accesses
= MSSA
->getBlockAccesses(BB
))
1211 for (const auto &MA
: *Accesses
)
1212 if (const auto *MU
= dyn_cast
<MemoryUse
>(&MA
)) {
1213 auto *MD
= MU
->getDefiningAccess();
1214 if (!MSSA
->isLiveOnEntryDef(MD
) &&
1215 CurLoop
->contains(MD
->getBlock()))
1224 assert(!I
.mayReadOrWriteMemory() && "unhandled aliasing");
1226 // We've established mechanical ability and aliasing, it's up to the caller
1227 // to check fault safety
1231 /// Returns true if a PHINode is a trivially replaceable with an
1233 /// This is true when all incoming values are that instruction.
1234 /// This pattern occurs most often with LCSSA PHI nodes.
1236 static bool isTriviallyReplaceablePHI(const PHINode
&PN
, const Instruction
&I
) {
1237 for (const Value
*IncValue
: PN
.incoming_values())
1244 /// Return true if the instruction is free in the loop.
1245 static bool isFreeInLoop(const Instruction
&I
, const Loop
*CurLoop
,
1246 const TargetTransformInfo
*TTI
) {
1248 if (const GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(&I
)) {
1249 if (TTI
->getUserCost(GEP
) != TargetTransformInfo::TCC_Free
)
1251 // For a GEP, we cannot simply use getUserCost because currently it
1252 // optimistically assume that a GEP will fold into addressing mode
1253 // regardless of its users.
1254 const BasicBlock
*BB
= GEP
->getParent();
1255 for (const User
*U
: GEP
->users()) {
1256 const Instruction
*UI
= cast
<Instruction
>(U
);
1257 if (CurLoop
->contains(UI
) &&
1258 (BB
!= UI
->getParent() ||
1259 (!isa
<StoreInst
>(UI
) && !isa
<LoadInst
>(UI
))))
1264 return TTI
->getUserCost(&I
) == TargetTransformInfo::TCC_Free
;
1267 /// Return true if the only users of this instruction are outside of
1268 /// the loop. If this is true, we can sink the instruction to the exit
1269 /// blocks of the loop.
1271 /// We also return true if the instruction could be folded away in lowering.
1272 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1273 static bool isNotUsedOrFreeInLoop(const Instruction
&I
, const Loop
*CurLoop
,
1274 const LoopSafetyInfo
*SafetyInfo
,
1275 TargetTransformInfo
*TTI
, bool &FreeInLoop
) {
1276 const auto &BlockColors
= SafetyInfo
->getBlockColors();
1277 bool IsFree
= isFreeInLoop(I
, CurLoop
, TTI
);
1278 for (const User
*U
: I
.users()) {
1279 const Instruction
*UI
= cast
<Instruction
>(U
);
1280 if (const PHINode
*PN
= dyn_cast
<PHINode
>(UI
)) {
1281 const BasicBlock
*BB
= PN
->getParent();
1282 // We cannot sink uses in catchswitches.
1283 if (isa
<CatchSwitchInst
>(BB
->getTerminator()))
1286 // We need to sink a callsite to a unique funclet. Avoid sinking if the
1287 // phi use is too muddled.
1288 if (isa
<CallInst
>(I
))
1289 if (!BlockColors
.empty() &&
1290 BlockColors
.find(const_cast<BasicBlock
*>(BB
))->second
.size() != 1)
1294 if (CurLoop
->contains(UI
)) {
1305 static Instruction
*CloneInstructionInExitBlock(
1306 Instruction
&I
, BasicBlock
&ExitBlock
, PHINode
&PN
, const LoopInfo
*LI
,
1307 const LoopSafetyInfo
*SafetyInfo
, MemorySSAUpdater
*MSSAU
) {
1309 if (auto *CI
= dyn_cast
<CallInst
>(&I
)) {
1310 const auto &BlockColors
= SafetyInfo
->getBlockColors();
1312 // Sinking call-sites need to be handled differently from other
1313 // instructions. The cloned call-site needs a funclet bundle operand
1314 // appropriate for it's location in the CFG.
1315 SmallVector
<OperandBundleDef
, 1> OpBundles
;
1316 for (unsigned BundleIdx
= 0, BundleEnd
= CI
->getNumOperandBundles();
1317 BundleIdx
!= BundleEnd
; ++BundleIdx
) {
1318 OperandBundleUse Bundle
= CI
->getOperandBundleAt(BundleIdx
);
1319 if (Bundle
.getTagID() == LLVMContext::OB_funclet
)
1322 OpBundles
.emplace_back(Bundle
);
1325 if (!BlockColors
.empty()) {
1326 const ColorVector
&CV
= BlockColors
.find(&ExitBlock
)->second
;
1327 assert(CV
.size() == 1 && "non-unique color for exit block!");
1328 BasicBlock
*BBColor
= CV
.front();
1329 Instruction
*EHPad
= BBColor
->getFirstNonPHI();
1330 if (EHPad
->isEHPad())
1331 OpBundles
.emplace_back("funclet", EHPad
);
1334 New
= CallInst::Create(CI
, OpBundles
);
1339 ExitBlock
.getInstList().insert(ExitBlock
.getFirstInsertionPt(), New
);
1340 if (!I
.getName().empty())
1341 New
->setName(I
.getName() + ".le");
1343 MemoryAccess
*OldMemAcc
;
1344 if (MSSAU
&& (OldMemAcc
= MSSAU
->getMemorySSA()->getMemoryAccess(&I
))) {
1345 // Create a new MemoryAccess and let MemorySSA set its defining access.
1346 MemoryAccess
*NewMemAcc
= MSSAU
->createMemoryAccessInBB(
1347 New
, nullptr, New
->getParent(), MemorySSA::Beginning
);
1349 if (auto *MemDef
= dyn_cast
<MemoryDef
>(NewMemAcc
))
1350 MSSAU
->insertDef(MemDef
, /*RenameUses=*/true);
1352 auto *MemUse
= cast
<MemoryUse
>(NewMemAcc
);
1353 MSSAU
->insertUse(MemUse
);
1358 // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1359 // particularly cheap because we can rip off the PHI node that we're
1360 // replacing for the number and blocks of the predecessors.
1361 // OPT: If this shows up in a profile, we can instead finish sinking all
1362 // invariant instructions, and then walk their operands to re-establish
1363 // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1364 // sinking bottom-up.
1365 for (User::op_iterator OI
= New
->op_begin(), OE
= New
->op_end(); OI
!= OE
;
1367 if (Instruction
*OInst
= dyn_cast
<Instruction
>(*OI
))
1368 if (Loop
*OLoop
= LI
->getLoopFor(OInst
->getParent()))
1369 if (!OLoop
->contains(&PN
)) {
1371 PHINode::Create(OInst
->getType(), PN
.getNumIncomingValues(),
1372 OInst
->getName() + ".lcssa", &ExitBlock
.front());
1373 for (unsigned i
= 0, e
= PN
.getNumIncomingValues(); i
!= e
; ++i
)
1374 OpPN
->addIncoming(OInst
, PN
.getIncomingBlock(i
));
1380 static void eraseInstruction(Instruction
&I
, ICFLoopSafetyInfo
&SafetyInfo
,
1381 AliasSetTracker
*AST
, MemorySSAUpdater
*MSSAU
) {
1383 AST
->deleteValue(&I
);
1385 MSSAU
->removeMemoryAccess(&I
);
1386 SafetyInfo
.removeInstruction(&I
);
1387 I
.eraseFromParent();
1390 static void moveInstructionBefore(Instruction
&I
, Instruction
&Dest
,
1391 ICFLoopSafetyInfo
&SafetyInfo
,
1392 MemorySSAUpdater
*MSSAU
) {
1393 SafetyInfo
.removeInstruction(&I
);
1394 SafetyInfo
.insertInstructionTo(&I
, Dest
.getParent());
1395 I
.moveBefore(&Dest
);
1397 if (MemoryUseOrDef
*OldMemAcc
= cast_or_null
<MemoryUseOrDef
>(
1398 MSSAU
->getMemorySSA()->getMemoryAccess(&I
)))
1399 MSSAU
->moveToPlace(OldMemAcc
, Dest
.getParent(), MemorySSA::End
);
1402 static Instruction
*sinkThroughTriviallyReplaceablePHI(
1403 PHINode
*TPN
, Instruction
*I
, LoopInfo
*LI
,
1404 SmallDenseMap
<BasicBlock
*, Instruction
*, 32> &SunkCopies
,
1405 const LoopSafetyInfo
*SafetyInfo
, const Loop
*CurLoop
,
1406 MemorySSAUpdater
*MSSAU
) {
1407 assert(isTriviallyReplaceablePHI(*TPN
, *I
) &&
1408 "Expect only trivially replaceable PHI");
1409 BasicBlock
*ExitBlock
= TPN
->getParent();
1411 auto It
= SunkCopies
.find(ExitBlock
);
1412 if (It
!= SunkCopies
.end())
1415 New
= SunkCopies
[ExitBlock
] = CloneInstructionInExitBlock(
1416 *I
, *ExitBlock
, *TPN
, LI
, SafetyInfo
, MSSAU
);
1420 static bool canSplitPredecessors(PHINode
*PN
, LoopSafetyInfo
*SafetyInfo
) {
1421 BasicBlock
*BB
= PN
->getParent();
1422 if (!BB
->canSplitPredecessors())
1424 // It's not impossible to split EHPad blocks, but if BlockColors already exist
1425 // it require updating BlockColors for all offspring blocks accordingly. By
1426 // skipping such corner case, we can make updating BlockColors after splitting
1427 // predecessor fairly simple.
1428 if (!SafetyInfo
->getBlockColors().empty() && BB
->getFirstNonPHI()->isEHPad())
1430 for (pred_iterator PI
= pred_begin(BB
), E
= pred_end(BB
); PI
!= E
; ++PI
) {
1431 BasicBlock
*BBPred
= *PI
;
1432 if (isa
<IndirectBrInst
>(BBPred
->getTerminator()))
1438 static void splitPredecessorsOfLoopExit(PHINode
*PN
, DominatorTree
*DT
,
1439 LoopInfo
*LI
, const Loop
*CurLoop
,
1440 LoopSafetyInfo
*SafetyInfo
,
1441 MemorySSAUpdater
*MSSAU
) {
1443 SmallVector
<BasicBlock
*, 32> ExitBlocks
;
1444 CurLoop
->getUniqueExitBlocks(ExitBlocks
);
1445 SmallPtrSet
<BasicBlock
*, 32> ExitBlockSet(ExitBlocks
.begin(),
1448 BasicBlock
*ExitBB
= PN
->getParent();
1449 assert(ExitBlockSet
.count(ExitBB
) && "Expect the PHI is in an exit block.");
1451 // Split predecessors of the loop exit to make instructions in the loop are
1452 // exposed to exit blocks through trivially replaceable PHIs while keeping the
1453 // loop in the canonical form where each predecessor of each exit block should
1454 // be contained within the loop. For example, this will convert the loop below
1464 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1470 // br %LE.split, %LB2
1473 // br %LE.split2, %LB1
1475 // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1478 // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1481 // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1483 const auto &BlockColors
= SafetyInfo
->getBlockColors();
1484 SmallSetVector
<BasicBlock
*, 8> PredBBs(pred_begin(ExitBB
), pred_end(ExitBB
));
1485 while (!PredBBs
.empty()) {
1486 BasicBlock
*PredBB
= *PredBBs
.begin();
1487 assert(CurLoop
->contains(PredBB
) &&
1488 "Expect all predecessors are in the loop");
1489 if (PN
->getBasicBlockIndex(PredBB
) >= 0) {
1490 BasicBlock
*NewPred
= SplitBlockPredecessors(
1491 ExitBB
, PredBB
, ".split.loop.exit", DT
, LI
, MSSAU
, true);
1492 // Since we do not allow splitting EH-block with BlockColors in
1493 // canSplitPredecessors(), we can simply assign predecessor's color to
1495 if (!BlockColors
.empty())
1496 // Grab a reference to the ColorVector to be inserted before getting the
1497 // reference to the vector we are copying because inserting the new
1498 // element in BlockColors might cause the map to be reallocated.
1499 SafetyInfo
->copyColors(NewPred
, PredBB
);
1501 PredBBs
.remove(PredBB
);
1505 /// When an instruction is found to only be used outside of the loop, this
1506 /// function moves it to the exit blocks and patches up SSA form as needed.
1507 /// This method is guaranteed to remove the original instruction from its
1508 /// position, and may either delete it or move it to outside of the loop.
1510 static bool sink(Instruction
&I
, LoopInfo
*LI
, DominatorTree
*DT
,
1511 const Loop
*CurLoop
, ICFLoopSafetyInfo
*SafetyInfo
,
1512 MemorySSAUpdater
*MSSAU
, OptimizationRemarkEmitter
*ORE
,
1514 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I
<< "\n");
1516 return OptimizationRemark(DEBUG_TYPE
, "InstSunk", &I
)
1517 << "sinking " << ore::NV("Inst", &I
);
1519 bool Changed
= false;
1520 if (isa
<LoadInst
>(I
))
1522 else if (isa
<CallInst
>(I
))
1526 // Iterate over users to be ready for actual sinking. Replace users via
1527 // unrechable blocks with undef and make all user PHIs trivially replcable.
1528 SmallPtrSet
<Instruction
*, 8> VisitedUsers
;
1529 for (Value::user_iterator UI
= I
.user_begin(), UE
= I
.user_end(); UI
!= UE
;) {
1530 auto *User
= cast
<Instruction
>(*UI
);
1531 Use
&U
= UI
.getUse();
1534 if (VisitedUsers
.count(User
) || CurLoop
->contains(User
))
1537 if (!DT
->isReachableFromEntry(User
->getParent())) {
1538 U
= UndefValue::get(I
.getType());
1543 // The user must be a PHI node.
1544 PHINode
*PN
= cast
<PHINode
>(User
);
1546 // Surprisingly, instructions can be used outside of loops without any
1547 // exits. This can only happen in PHI nodes if the incoming block is
1549 BasicBlock
*BB
= PN
->getIncomingBlock(U
);
1550 if (!DT
->isReachableFromEntry(BB
)) {
1551 U
= UndefValue::get(I
.getType());
1556 VisitedUsers
.insert(PN
);
1557 if (isTriviallyReplaceablePHI(*PN
, I
))
1560 if (!canSplitPredecessors(PN
, SafetyInfo
))
1563 // Split predecessors of the PHI so that we can make users trivially
1565 splitPredecessorsOfLoopExit(PN
, DT
, LI
, CurLoop
, SafetyInfo
, MSSAU
);
1567 // Should rebuild the iterators, as they may be invalidated by
1568 // splitPredecessorsOfLoopExit().
1569 UI
= I
.user_begin();
1573 if (VisitedUsers
.empty())
1577 SmallVector
<BasicBlock
*, 32> ExitBlocks
;
1578 CurLoop
->getUniqueExitBlocks(ExitBlocks
);
1579 SmallPtrSet
<BasicBlock
*, 32> ExitBlockSet(ExitBlocks
.begin(),
1583 // Clones of this instruction. Don't create more than one per exit block!
1584 SmallDenseMap
<BasicBlock
*, Instruction
*, 32> SunkCopies
;
1586 // If this instruction is only used outside of the loop, then all users are
1587 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1589 SmallSetVector
<User
*, 8> Users(I
.user_begin(), I
.user_end());
1590 for (auto *UI
: Users
) {
1591 auto *User
= cast
<Instruction
>(UI
);
1593 if (CurLoop
->contains(User
))
1596 PHINode
*PN
= cast
<PHINode
>(User
);
1597 assert(ExitBlockSet
.count(PN
->getParent()) &&
1598 "The LCSSA PHI is not in an exit block!");
1599 // The PHI must be trivially replaceable.
1600 Instruction
*New
= sinkThroughTriviallyReplaceablePHI(
1601 PN
, &I
, LI
, SunkCopies
, SafetyInfo
, CurLoop
, MSSAU
);
1602 PN
->replaceAllUsesWith(New
);
1603 eraseInstruction(*PN
, *SafetyInfo
, nullptr, nullptr);
1609 /// When an instruction is found to only use loop invariant operands that
1610 /// is safe to hoist, this instruction is called to do the dirty work.
1612 static void hoist(Instruction
&I
, const DominatorTree
*DT
, const Loop
*CurLoop
,
1613 BasicBlock
*Dest
, ICFLoopSafetyInfo
*SafetyInfo
,
1614 MemorySSAUpdater
*MSSAU
, OptimizationRemarkEmitter
*ORE
) {
1615 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest
->getName() << ": " << I
1618 return OptimizationRemark(DEBUG_TYPE
, "Hoisted", &I
) << "hoisting "
1619 << ore::NV("Inst", &I
);
1622 // Metadata can be dependent on conditions we are hoisting above.
1623 // Conservatively strip all metadata on the instruction unless we were
1624 // guaranteed to execute I if we entered the loop, in which case the metadata
1625 // is valid in the loop preheader.
1626 if (I
.hasMetadataOtherThanDebugLoc() &&
1627 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1628 // time in isGuaranteedToExecute if we don't actually have anything to
1629 // drop. It is a compile time optimization, not required for correctness.
1630 !SafetyInfo
->isGuaranteedToExecute(I
, DT
, CurLoop
))
1631 I
.dropUnknownNonDebugMetadata();
1633 if (isa
<PHINode
>(I
))
1634 // Move the new node to the end of the phi list in the destination block.
1635 moveInstructionBefore(I
, *Dest
->getFirstNonPHI(), *SafetyInfo
, MSSAU
);
1637 // Move the new node to the destination block, before its terminator.
1638 moveInstructionBefore(I
, *Dest
->getTerminator(), *SafetyInfo
, MSSAU
);
1640 // Do not retain debug locations when we are moving instructions to different
1641 // basic blocks, because we want to avoid jumpy line tables. Calls, however,
1642 // need to retain their debug locs because they may be inlined.
1643 // FIXME: How do we retain source locations without causing poor debugging
1645 if (!isa
<CallInst
>(I
))
1646 I
.setDebugLoc(DebugLoc());
1648 if (isa
<LoadInst
>(I
))
1650 else if (isa
<CallInst
>(I
))
1655 /// Only sink or hoist an instruction if it is not a trapping instruction,
1656 /// or if the instruction is known not to trap when moved to the preheader.
1657 /// or if it is a trapping instruction and is guaranteed to execute.
1658 static bool isSafeToExecuteUnconditionally(Instruction
&Inst
,
1659 const DominatorTree
*DT
,
1660 const Loop
*CurLoop
,
1661 const LoopSafetyInfo
*SafetyInfo
,
1662 OptimizationRemarkEmitter
*ORE
,
1663 const Instruction
*CtxI
) {
1664 if (isSafeToSpeculativelyExecute(&Inst
, CtxI
, DT
))
1667 bool GuaranteedToExecute
=
1668 SafetyInfo
->isGuaranteedToExecute(Inst
, DT
, CurLoop
);
1670 if (!GuaranteedToExecute
) {
1671 auto *LI
= dyn_cast
<LoadInst
>(&Inst
);
1672 if (LI
&& CurLoop
->isLoopInvariant(LI
->getPointerOperand()))
1674 return OptimizationRemarkMissed(
1675 DEBUG_TYPE
, "LoadWithLoopInvariantAddressCondExecuted", LI
)
1676 << "failed to hoist load with loop-invariant address "
1677 "because load is conditionally executed";
1681 return GuaranteedToExecute
;
1685 class LoopPromoter
: public LoadAndStorePromoter
{
1686 Value
*SomePtr
; // Designated pointer to store to.
1687 const SmallSetVector
<Value
*, 8> &PointerMustAliases
;
1688 SmallVectorImpl
<BasicBlock
*> &LoopExitBlocks
;
1689 SmallVectorImpl
<Instruction
*> &LoopInsertPts
;
1690 SmallVectorImpl
<MemoryAccess
*> &MSSAInsertPts
;
1691 PredIteratorCache
&PredCache
;
1692 AliasSetTracker
&AST
;
1693 MemorySSAUpdater
*MSSAU
;
1697 bool UnorderedAtomic
;
1699 ICFLoopSafetyInfo
&SafetyInfo
;
1701 Value
*maybeInsertLCSSAPHI(Value
*V
, BasicBlock
*BB
) const {
1702 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1703 if (Loop
*L
= LI
.getLoopFor(I
->getParent()))
1704 if (!L
->contains(BB
)) {
1705 // We need to create an LCSSA PHI node for the incoming value and
1707 PHINode
*PN
= PHINode::Create(I
->getType(), PredCache
.size(BB
),
1708 I
->getName() + ".lcssa", &BB
->front());
1709 for (BasicBlock
*Pred
: PredCache
.get(BB
))
1710 PN
->addIncoming(I
, Pred
);
1717 LoopPromoter(Value
*SP
, ArrayRef
<const Instruction
*> Insts
, SSAUpdater
&S
,
1718 const SmallSetVector
<Value
*, 8> &PMA
,
1719 SmallVectorImpl
<BasicBlock
*> &LEB
,
1720 SmallVectorImpl
<Instruction
*> &LIP
,
1721 SmallVectorImpl
<MemoryAccess
*> &MSSAIP
, PredIteratorCache
&PIC
,
1722 AliasSetTracker
&ast
, MemorySSAUpdater
*MSSAU
, LoopInfo
&li
,
1723 DebugLoc dl
, int alignment
, bool UnorderedAtomic
,
1724 const AAMDNodes
&AATags
, ICFLoopSafetyInfo
&SafetyInfo
)
1725 : LoadAndStorePromoter(Insts
, S
), SomePtr(SP
), PointerMustAliases(PMA
),
1726 LoopExitBlocks(LEB
), LoopInsertPts(LIP
), MSSAInsertPts(MSSAIP
),
1727 PredCache(PIC
), AST(ast
), MSSAU(MSSAU
), LI(li
), DL(std::move(dl
)),
1728 Alignment(alignment
), UnorderedAtomic(UnorderedAtomic
), AATags(AATags
),
1729 SafetyInfo(SafetyInfo
) {}
1731 bool isInstInList(Instruction
*I
,
1732 const SmallVectorImpl
<Instruction
*> &) const override
{
1734 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
))
1735 Ptr
= LI
->getOperand(0);
1737 Ptr
= cast
<StoreInst
>(I
)->getPointerOperand();
1738 return PointerMustAliases
.count(Ptr
);
1741 void doExtraRewritesBeforeFinalDeletion() override
{
1742 // Insert stores after in the loop exit blocks. Each exit block gets a
1743 // store of the live-out values that feed them. Since we've already told
1744 // the SSA updater about the defs in the loop and the preheader
1745 // definition, it is all set and we can start using it.
1746 for (unsigned i
= 0, e
= LoopExitBlocks
.size(); i
!= e
; ++i
) {
1747 BasicBlock
*ExitBlock
= LoopExitBlocks
[i
];
1748 Value
*LiveInValue
= SSA
.GetValueInMiddleOfBlock(ExitBlock
);
1749 LiveInValue
= maybeInsertLCSSAPHI(LiveInValue
, ExitBlock
);
1750 Value
*Ptr
= maybeInsertLCSSAPHI(SomePtr
, ExitBlock
);
1751 Instruction
*InsertPos
= LoopInsertPts
[i
];
1752 StoreInst
*NewSI
= new StoreInst(LiveInValue
, Ptr
, InsertPos
);
1753 if (UnorderedAtomic
)
1754 NewSI
->setOrdering(AtomicOrdering::Unordered
);
1755 NewSI
->setAlignment(Alignment
);
1756 NewSI
->setDebugLoc(DL
);
1758 NewSI
->setAAMetadata(AATags
);
1761 MemoryAccess
*MSSAInsertPoint
= MSSAInsertPts
[i
];
1762 MemoryAccess
*NewMemAcc
;
1763 if (!MSSAInsertPoint
) {
1764 NewMemAcc
= MSSAU
->createMemoryAccessInBB(
1765 NewSI
, nullptr, NewSI
->getParent(), MemorySSA::Beginning
);
1768 MSSAU
->createMemoryAccessAfter(NewSI
, nullptr, MSSAInsertPoint
);
1770 MSSAInsertPts
[i
] = NewMemAcc
;
1771 MSSAU
->insertDef(cast
<MemoryDef
>(NewMemAcc
), true);
1772 // FIXME: true for safety, false may still be correct.
1777 void replaceLoadWithValue(LoadInst
*LI
, Value
*V
) const override
{
1778 // Update alias analysis.
1779 AST
.copyValue(LI
, V
);
1781 void instructionDeleted(Instruction
*I
) const override
{
1782 SafetyInfo
.removeInstruction(I
);
1785 MSSAU
->removeMemoryAccess(I
);
1790 /// Return true iff we can prove that a caller of this function can not inspect
1791 /// the contents of the provided object in a well defined program.
1792 bool isKnownNonEscaping(Value
*Object
, const TargetLibraryInfo
*TLI
) {
1793 if (isa
<AllocaInst
>(Object
))
1794 // Since the alloca goes out of scope, we know the caller can't retain a
1795 // reference to it and be well defined. Thus, we don't need to check for
1799 // For all other objects we need to know that the caller can't possibly
1800 // have gotten a reference to the object. There are two components of
1802 // 1) Object can't be escaped by this function. This is what
1803 // PointerMayBeCaptured checks.
1804 // 2) Object can't have been captured at definition site. For this, we
1805 // need to know the return value is noalias. At the moment, we use a
1806 // weaker condition and handle only AllocLikeFunctions (which are
1807 // known to be noalias). TODO
1808 return isAllocLikeFn(Object
, TLI
) &&
1809 !PointerMayBeCaptured(Object
, true, true);
1814 /// Try to promote memory values to scalars by sinking stores out of the
1815 /// loop and moving loads to before the loop. We do this by looping over
1816 /// the stores in the loop, looking for stores to Must pointers which are
1819 bool llvm::promoteLoopAccessesToScalars(
1820 const SmallSetVector
<Value
*, 8> &PointerMustAliases
,
1821 SmallVectorImpl
<BasicBlock
*> &ExitBlocks
,
1822 SmallVectorImpl
<Instruction
*> &InsertPts
,
1823 SmallVectorImpl
<MemoryAccess
*> &MSSAInsertPts
, PredIteratorCache
&PIC
,
1824 LoopInfo
*LI
, DominatorTree
*DT
, const TargetLibraryInfo
*TLI
,
1825 Loop
*CurLoop
, AliasSetTracker
*CurAST
, MemorySSAUpdater
*MSSAU
,
1826 ICFLoopSafetyInfo
*SafetyInfo
, OptimizationRemarkEmitter
*ORE
) {
1828 assert(LI
!= nullptr && DT
!= nullptr && CurLoop
!= nullptr &&
1829 CurAST
!= nullptr && SafetyInfo
!= nullptr &&
1830 "Unexpected Input to promoteLoopAccessesToScalars");
1832 Value
*SomePtr
= *PointerMustAliases
.begin();
1833 BasicBlock
*Preheader
= CurLoop
->getLoopPreheader();
1835 // It is not safe to promote a load/store from the loop if the load/store is
1836 // conditional. For example, turning:
1838 // for () { if (c) *P += 1; }
1842 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1844 // is not safe, because *P may only be valid to access if 'c' is true.
1846 // The safety property divides into two parts:
1847 // p1) The memory may not be dereferenceable on entry to the loop. In this
1848 // case, we can't insert the required load in the preheader.
1849 // p2) The memory model does not allow us to insert a store along any dynamic
1850 // path which did not originally have one.
1852 // If at least one store is guaranteed to execute, both properties are
1853 // satisfied, and promotion is legal.
1855 // This, however, is not a necessary condition. Even if no store/load is
1856 // guaranteed to execute, we can still establish these properties.
1857 // We can establish (p1) by proving that hoisting the load into the preheader
1858 // is safe (i.e. proving dereferenceability on all paths through the loop). We
1859 // can use any access within the alias set to prove dereferenceability,
1860 // since they're all must alias.
1862 // There are two ways establish (p2):
1863 // a) Prove the location is thread-local. In this case the memory model
1864 // requirement does not apply, and stores are safe to insert.
1865 // b) Prove a store dominates every exit block. In this case, if an exit
1866 // blocks is reached, the original dynamic path would have taken us through
1867 // the store, so inserting a store into the exit block is safe. Note that this
1868 // is different from the store being guaranteed to execute. For instance,
1869 // if an exception is thrown on the first iteration of the loop, the original
1870 // store is never executed, but the exit blocks are not executed either.
1872 bool DereferenceableInPH
= false;
1873 bool SafeToInsertStore
= false;
1875 SmallVector
<Instruction
*, 64> LoopUses
;
1877 // We start with an alignment of one and try to find instructions that allow
1878 // us to prove better alignment.
1879 unsigned Alignment
= 1;
1880 // Keep track of which types of access we see
1881 bool SawUnorderedAtomic
= false;
1882 bool SawNotAtomic
= false;
1885 const DataLayout
&MDL
= Preheader
->getModule()->getDataLayout();
1887 bool IsKnownThreadLocalObject
= false;
1888 if (SafetyInfo
->anyBlockMayThrow()) {
1889 // If a loop can throw, we have to insert a store along each unwind edge.
1890 // That said, we can't actually make the unwind edge explicit. Therefore,
1891 // we have to prove that the store is dead along the unwind edge. We do
1892 // this by proving that the caller can't have a reference to the object
1893 // after return and thus can't possibly load from the object.
1894 Value
*Object
= GetUnderlyingObject(SomePtr
, MDL
);
1895 if (!isKnownNonEscaping(Object
, TLI
))
1897 // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1898 // visible to other threads if captured and used during their lifetimes.
1899 IsKnownThreadLocalObject
= !isa
<AllocaInst
>(Object
);
1902 // Check that all of the pointers in the alias set have the same type. We
1903 // cannot (yet) promote a memory location that is loaded and stored in
1904 // different sizes. While we are at it, collect alignment and AA info.
1905 for (Value
*ASIV
: PointerMustAliases
) {
1906 // Check that all of the pointers in the alias set have the same type. We
1907 // cannot (yet) promote a memory location that is loaded and stored in
1909 if (SomePtr
->getType() != ASIV
->getType())
1912 for (User
*U
: ASIV
->users()) {
1913 // Ignore instructions that are outside the loop.
1914 Instruction
*UI
= dyn_cast
<Instruction
>(U
);
1915 if (!UI
|| !CurLoop
->contains(UI
))
1918 // If there is an non-load/store instruction in the loop, we can't promote
1920 if (LoadInst
*Load
= dyn_cast
<LoadInst
>(UI
)) {
1921 if (!Load
->isUnordered())
1924 SawUnorderedAtomic
|= Load
->isAtomic();
1925 SawNotAtomic
|= !Load
->isAtomic();
1927 if (!DereferenceableInPH
)
1928 DereferenceableInPH
= isSafeToExecuteUnconditionally(
1929 *Load
, DT
, CurLoop
, SafetyInfo
, ORE
, Preheader
->getTerminator());
1930 } else if (const StoreInst
*Store
= dyn_cast
<StoreInst
>(UI
)) {
1931 // Stores *of* the pointer are not interesting, only stores *to* the
1933 if (UI
->getOperand(1) != ASIV
)
1935 if (!Store
->isUnordered())
1938 SawUnorderedAtomic
|= Store
->isAtomic();
1939 SawNotAtomic
|= !Store
->isAtomic();
1941 // If the store is guaranteed to execute, both properties are satisfied.
1942 // We may want to check if a store is guaranteed to execute even if we
1943 // already know that promotion is safe, since it may have higher
1944 // alignment than any other guaranteed stores, in which case we can
1945 // raise the alignment on the promoted store.
1946 unsigned InstAlignment
= Store
->getAlignment();
1949 MDL
.getABITypeAlignment(Store
->getValueOperand()->getType());
1951 if (!DereferenceableInPH
|| !SafeToInsertStore
||
1952 (InstAlignment
> Alignment
)) {
1953 if (SafetyInfo
->isGuaranteedToExecute(*UI
, DT
, CurLoop
)) {
1954 DereferenceableInPH
= true;
1955 SafeToInsertStore
= true;
1956 Alignment
= std::max(Alignment
, InstAlignment
);
1960 // If a store dominates all exit blocks, it is safe to sink.
1961 // As explained above, if an exit block was executed, a dominating
1962 // store must have been executed at least once, so we are not
1963 // introducing stores on paths that did not have them.
1964 // Note that this only looks at explicit exit blocks. If we ever
1965 // start sinking stores into unwind edges (see above), this will break.
1966 if (!SafeToInsertStore
)
1967 SafeToInsertStore
= llvm::all_of(ExitBlocks
, [&](BasicBlock
*Exit
) {
1968 return DT
->dominates(Store
->getParent(), Exit
);
1971 // If the store is not guaranteed to execute, we may still get
1972 // deref info through it.
1973 if (!DereferenceableInPH
) {
1974 DereferenceableInPH
= isDereferenceableAndAlignedPointer(
1975 Store
->getPointerOperand(), Store
->getAlignment(), MDL
,
1976 Preheader
->getTerminator(), DT
);
1979 return false; // Not a load or store.
1981 // Merge the AA tags.
1982 if (LoopUses
.empty()) {
1983 // On the first load/store, just take its AA tags.
1984 UI
->getAAMetadata(AATags
);
1985 } else if (AATags
) {
1986 UI
->getAAMetadata(AATags
, /* Merge = */ true);
1989 LoopUses
.push_back(UI
);
1993 // If we found both an unordered atomic instruction and a non-atomic memory
1994 // access, bail. We can't blindly promote non-atomic to atomic since we
1995 // might not be able to lower the result. We can't downgrade since that
1996 // would violate memory model. Also, align 0 is an error for atomics.
1997 if (SawUnorderedAtomic
&& SawNotAtomic
)
2000 // If we couldn't prove we can hoist the load, bail.
2001 if (!DereferenceableInPH
)
2004 // We know we can hoist the load, but don't have a guaranteed store.
2005 // Check whether the location is thread-local. If it is, then we can insert
2006 // stores along paths which originally didn't have them without violating the
2008 if (!SafeToInsertStore
) {
2009 if (IsKnownThreadLocalObject
)
2010 SafeToInsertStore
= true;
2012 Value
*Object
= GetUnderlyingObject(SomePtr
, MDL
);
2014 (isAllocLikeFn(Object
, TLI
) || isa
<AllocaInst
>(Object
)) &&
2015 !PointerMayBeCaptured(Object
, true, true);
2019 // If we've still failed to prove we can sink the store, give up.
2020 if (!SafeToInsertStore
)
2023 // Otherwise, this is safe to promote, lets do it!
2024 LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2027 return OptimizationRemark(DEBUG_TYPE
, "PromoteLoopAccessesToScalar",
2029 << "Moving accesses to memory location out of the loop";
2033 // Grab a debug location for the inserted loads/stores; given that the
2034 // inserted loads/stores have little relation to the original loads/stores,
2035 // this code just arbitrarily picks a location from one, since any debug
2036 // location is better than none.
2037 DebugLoc DL
= LoopUses
[0]->getDebugLoc();
2039 // We use the SSAUpdater interface to insert phi nodes as required.
2040 SmallVector
<PHINode
*, 16> NewPHIs
;
2041 SSAUpdater
SSA(&NewPHIs
);
2042 LoopPromoter
Promoter(SomePtr
, LoopUses
, SSA
, PointerMustAliases
, ExitBlocks
,
2043 InsertPts
, MSSAInsertPts
, PIC
, *CurAST
, MSSAU
, *LI
, DL
,
2044 Alignment
, SawUnorderedAtomic
, AATags
, *SafetyInfo
);
2046 // Set up the preheader to have a definition of the value. It is the live-out
2047 // value from the preheader that uses in the loop will use.
2048 LoadInst
*PreheaderLoad
= new LoadInst(
2049 SomePtr
->getType()->getPointerElementType(), SomePtr
,
2050 SomePtr
->getName() + ".promoted", Preheader
->getTerminator());
2051 if (SawUnorderedAtomic
)
2052 PreheaderLoad
->setOrdering(AtomicOrdering::Unordered
);
2053 PreheaderLoad
->setAlignment(Alignment
);
2054 PreheaderLoad
->setDebugLoc(DL
);
2056 PreheaderLoad
->setAAMetadata(AATags
);
2057 SSA
.AddAvailableValue(Preheader
, PreheaderLoad
);
2059 MemoryAccess
*PreheaderLoadMemoryAccess
;
2061 PreheaderLoadMemoryAccess
= MSSAU
->createMemoryAccessInBB(
2062 PreheaderLoad
, nullptr, PreheaderLoad
->getParent(), MemorySSA::End
);
2063 MemoryUse
*NewMemUse
= cast
<MemoryUse
>(PreheaderLoadMemoryAccess
);
2064 MSSAU
->insertUse(NewMemUse
);
2067 // Rewrite all the loads in the loop and remember all the definitions from
2068 // stores in the loop.
2069 Promoter
.run(LoopUses
);
2071 // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2072 if (PreheaderLoad
->use_empty())
2073 eraseInstruction(*PreheaderLoad
, *SafetyInfo
, CurAST
, MSSAU
);
2078 /// Returns an owning pointer to an alias set which incorporates aliasing info
2079 /// from L and all subloops of L.
2080 /// FIXME: In new pass manager, there is no helper function to handle loop
2081 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2082 /// from scratch for every loop. Hook up with the helper functions when
2083 /// available in the new pass manager to avoid redundant computation.
2084 std::unique_ptr
<AliasSetTracker
>
2085 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop
*L
, LoopInfo
*LI
,
2086 AliasAnalysis
*AA
) {
2087 std::unique_ptr
<AliasSetTracker
> CurAST
;
2088 SmallVector
<Loop
*, 4> RecomputeLoops
;
2089 for (Loop
*InnerL
: L
->getSubLoops()) {
2090 auto MapI
= LoopToAliasSetMap
.find(InnerL
);
2091 // If the AST for this inner loop is missing it may have been merged into
2092 // some other loop's AST and then that loop unrolled, and so we need to
2094 if (MapI
== LoopToAliasSetMap
.end()) {
2095 RecomputeLoops
.push_back(InnerL
);
2098 std::unique_ptr
<AliasSetTracker
> InnerAST
= std::move(MapI
->second
);
2101 // What if InnerLoop was modified by other passes ?
2102 // Once we've incorporated the inner loop's AST into ours, we don't need
2103 // the subloop's anymore.
2104 CurAST
->add(*InnerAST
);
2106 CurAST
= std::move(InnerAST
);
2108 LoopToAliasSetMap
.erase(MapI
);
2111 CurAST
= make_unique
<AliasSetTracker
>(*AA
);
2113 // Add everything from the sub loops that are no longer directly available.
2114 for (Loop
*InnerL
: RecomputeLoops
)
2115 for (BasicBlock
*BB
: InnerL
->blocks())
2118 // And merge in this loop (without anything from inner loops).
2119 for (BasicBlock
*BB
: L
->blocks())
2120 if (LI
->getLoopFor(BB
) == L
)
2126 std::unique_ptr
<AliasSetTracker
>
2127 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2128 Loop
*L
, AliasAnalysis
*AA
, MemorySSAUpdater
*MSSAU
) {
2129 auto *MSSA
= MSSAU
->getMemorySSA();
2130 auto CurAST
= make_unique
<AliasSetTracker
>(*AA
, MSSA
, L
);
2131 CurAST
->addAllInstructionsInLoopUsingMSSA();
2135 /// Simple analysis hook. Clone alias set info.
2137 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock
*From
, BasicBlock
*To
,
2139 auto ASTIt
= LICM
.getLoopToAliasSetMap().find(L
);
2140 if (ASTIt
== LICM
.getLoopToAliasSetMap().end())
2143 ASTIt
->second
->copyValue(From
, To
);
2146 /// Simple Analysis hook. Delete value V from alias set
2148 void LegacyLICMPass::deleteAnalysisValue(Value
*V
, Loop
*L
) {
2149 auto ASTIt
= LICM
.getLoopToAliasSetMap().find(L
);
2150 if (ASTIt
== LICM
.getLoopToAliasSetMap().end())
2153 ASTIt
->second
->deleteValue(V
);
2156 /// Simple Analysis hook. Delete value L from alias set map.
2158 void LegacyLICMPass::deleteAnalysisLoop(Loop
*L
) {
2159 if (!LICM
.getLoopToAliasSetMap().count(L
))
2162 LICM
.getLoopToAliasSetMap().erase(L
);
2165 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc
,
2166 AliasSetTracker
*CurAST
, Loop
*CurLoop
,
2167 AliasAnalysis
*AA
) {
2168 // First check to see if any of the basic blocks in CurLoop invalidate *V.
2169 bool isInvalidatedAccordingToAST
= CurAST
->getAliasSetFor(MemLoc
).isMod();
2171 if (!isInvalidatedAccordingToAST
|| !LICMN2Theshold
)
2172 return isInvalidatedAccordingToAST
;
2174 // Check with a diagnostic analysis if we can refine the information above.
2175 // This is to identify the limitations of using the AST.
2176 // The alias set mechanism used by LICM has a major weakness in that it
2177 // combines all things which may alias into a single set *before* asking
2178 // modref questions. As a result, a single readonly call within a loop will
2179 // collapse all loads and stores into a single alias set and report
2180 // invalidation if the loop contains any store. For example, readonly calls
2181 // with deopt states have this form and create a general alias set with all
2182 // loads and stores. In order to get any LICM in loops containing possible
2183 // deopt states we need a more precise invalidation of checking the mod ref
2184 // info of each instruction within the loop and LI. This has a complexity of
2185 // O(N^2), so currently, it is used only as a diagnostic tool since the
2186 // default value of LICMN2Threshold is zero.
2188 // Don't look at nested loops.
2189 if (CurLoop
->begin() != CurLoop
->end())
2193 for (BasicBlock
*BB
: CurLoop
->getBlocks())
2194 for (Instruction
&I
: *BB
) {
2195 if (N
>= LICMN2Theshold
) {
2196 LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2197 << *(MemLoc
.Ptr
) << "\n");
2201 auto Res
= AA
->getModRefInfo(&I
, MemLoc
);
2202 if (isModSet(Res
)) {
2203 LLVM_DEBUG(dbgs() << "Aliasing failed on " << I
<< " for "
2204 << *(MemLoc
.Ptr
) << "\n");
2208 LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc
.Ptr
) << "\n");
2212 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA
*MSSA
, MemoryUse
*MU
,
2214 int &LicmMssaOptCounter
) {
2215 MemoryAccess
*Source
;
2216 // See declaration of LicmMssaOptCap for usage details.
2217 if (LicmMssaOptCounter
>= LicmMssaOptCap
)
2218 Source
= MU
->getDefiningAccess();
2220 Source
= MSSA
->getSkipSelfWalker()->getClobberingMemoryAccess(MU
);
2221 LicmMssaOptCounter
++;
2223 return !MSSA
->isLiveOnEntryDef(Source
) &&
2224 CurLoop
->contains(Source
->getBlock());
2227 /// Little predicate that returns true if the specified basic block is in
2228 /// a subloop of the current one, not the current one itself.
2230 static bool inSubLoop(BasicBlock
*BB
, Loop
*CurLoop
, LoopInfo
*LI
) {
2231 assert(CurLoop
->contains(BB
) && "Only valid if BB is IN the loop");
2232 return LI
->getLoopFor(BB
) != CurLoop
;