Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Transforms / Scalar / LICM.cpp
blob6d43afaa1931eb68b14abb572a541a48b77f8f7b
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
15 // This pass uses alias analysis for two purposes:
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
30 //===----------------------------------------------------------------------===//
32 #include "llvm/Transforms/Scalar/LICM.h"
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/Analysis/AliasSetTracker.h"
37 #include "llvm/Analysis/BasicAliasAnalysis.h"
38 #include "llvm/Analysis/CaptureTracking.h"
39 #include "llvm/Analysis/ConstantFolding.h"
40 #include "llvm/Analysis/GlobalsModRef.h"
41 #include "llvm/Analysis/GuardUtils.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
44 #include "llvm/Analysis/LoopIterator.h"
45 #include "llvm/Analysis/LoopPass.h"
46 #include "llvm/Analysis/MemoryBuiltins.h"
47 #include "llvm/Analysis/MemorySSA.h"
48 #include "llvm/Analysis/MemorySSAUpdater.h"
49 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
50 #include "llvm/Analysis/ScalarEvolution.h"
51 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
52 #include "llvm/Analysis/TargetLibraryInfo.h"
53 #include "llvm/Analysis/ValueTracking.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Dominators.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/IR/Metadata.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/PredIteratorCache.h"
65 #include "llvm/Support/CommandLine.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/raw_ostream.h"
68 #include "llvm/Transforms/Scalar.h"
69 #include "llvm/Transforms/Scalar/LoopPassManager.h"
70 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
71 #include "llvm/Transforms/Utils/Local.h"
72 #include "llvm/Transforms/Utils/LoopUtils.h"
73 #include "llvm/Transforms/Utils/SSAUpdater.h"
74 #include <algorithm>
75 #include <utility>
76 using namespace llvm;
78 #define DEBUG_TYPE "licm"
80 STATISTIC(NumCreatedBlocks, "Number of blocks created");
81 STATISTIC(NumClonedBranches, "Number of branches cloned");
82 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
83 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
84 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
85 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
86 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
88 /// Memory promotion is enabled by default.
89 static cl::opt<bool>
90 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
91 cl::desc("Disable memory promotion in LICM pass"));
93 static cl::opt<bool> ControlFlowHoisting(
94 "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
95 cl::desc("Enable control flow (and PHI) hoisting in LICM"));
97 static cl::opt<uint32_t> MaxNumUsesTraversed(
98 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
99 cl::desc("Max num uses visited for identifying load "
100 "invariance in loop using invariant start (default = 8)"));
102 // Default value of zero implies we use the regular alias set tracker mechanism
103 // instead of the cross product using AA to identify aliasing of the memory
104 // location we are interested in.
105 static cl::opt<int>
106 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
107 cl::desc("How many instruction to cross product using AA"));
109 // Experimental option to allow imprecision in LICM in pathological cases, in
110 // exchange for faster compile. This is to be removed if MemorySSA starts to
111 // address the same issue. This flag applies only when LICM uses MemorySSA
112 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
113 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
114 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
115 // which may not be precise, since optimizeUses is capped. The result is
116 // correct, but we may not get as "far up" as possible to get which access is
117 // clobbering the one queried.
118 static cl::opt<int> LicmMssaOptCap(
119 "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
120 cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
121 "for faster compile. Caps the MemorySSA clobbering calls."));
123 // Experimentally, memory promotion carries less importance than sinking and
124 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
125 // compile time.
126 static cl::opt<unsigned> AccessCapForMSSAPromotion(
127 "max-acc-licm-promotion", cl::init(250), cl::Hidden,
128 cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
129 "effect. When MSSA in LICM is enabled, then this is the maximum "
130 "number of accesses allowed to be present in a loop in order to "
131 "enable memory promotion."));
133 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
134 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
135 const LoopSafetyInfo *SafetyInfo,
136 TargetTransformInfo *TTI, bool &FreeInLoop);
137 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
138 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
139 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
140 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
141 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
142 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
143 bool FreeInLoop);
144 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
145 const DominatorTree *DT,
146 const Loop *CurLoop,
147 const LoopSafetyInfo *SafetyInfo,
148 OptimizationRemarkEmitter *ORE,
149 const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151 AliasSetTracker *CurAST, Loop *CurLoop,
152 AliasAnalysis *AA);
153 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
154 Loop *CurLoop,
155 int &LicmMssaOptCounter);
156 static Instruction *CloneInstructionInExitBlock(
157 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161 AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164 ICFLoopSafetyInfo &SafetyInfo,
165 MemorySSAUpdater *MSSAU);
167 namespace {
168 struct LoopInvariantCodeMotion {
169 using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
170 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
171 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
172 ScalarEvolution *SE, MemorySSA *MSSA,
173 OptimizationRemarkEmitter *ORE, bool DeleteAST);
175 ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
177 private:
178 ASTrackerMapTy LoopToAliasSetMap;
180 std::unique_ptr<AliasSetTracker>
181 collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
182 std::unique_ptr<AliasSetTracker>
183 collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
184 MemorySSAUpdater *MSSAU);
187 struct LegacyLICMPass : public LoopPass {
188 static char ID; // Pass identification, replacement for typeid
189 LegacyLICMPass() : LoopPass(ID) {
190 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
193 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
194 if (skipLoop(L)) {
195 // If we have run LICM on a previous loop but now we are skipping
196 // (because we've hit the opt-bisect limit), we need to clear the
197 // loop alias information.
198 LICM.getLoopToAliasSetMap().clear();
199 return false;
202 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
203 MemorySSA *MSSA = EnableMSSALoopDependency
204 ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
205 : nullptr;
206 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
207 // pass. Function analyses need to be preserved across loop transformations
208 // but ORE cannot be preserved (see comment before the pass definition).
209 OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
210 return LICM.runOnLoop(L,
211 &getAnalysis<AAResultsWrapperPass>().getAAResults(),
212 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
213 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
214 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
215 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
216 *L->getHeader()->getParent()),
217 SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
220 /// This transformation requires natural loop information & requires that
221 /// loop preheaders be inserted into the CFG...
223 void getAnalysisUsage(AnalysisUsage &AU) const override {
224 AU.addPreserved<DominatorTreeWrapperPass>();
225 AU.addPreserved<LoopInfoWrapperPass>();
226 AU.addRequired<TargetLibraryInfoWrapperPass>();
227 if (EnableMSSALoopDependency) {
228 AU.addRequired<MemorySSAWrapperPass>();
229 AU.addPreserved<MemorySSAWrapperPass>();
231 AU.addRequired<TargetTransformInfoWrapperPass>();
232 getLoopAnalysisUsage(AU);
235 using llvm::Pass::doFinalization;
237 bool doFinalization() override {
238 assert(LICM.getLoopToAliasSetMap().empty() &&
239 "Didn't free loop alias sets");
240 return false;
243 private:
244 LoopInvariantCodeMotion LICM;
246 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
247 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
248 Loop *L) override;
250 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
251 /// set.
252 void deleteAnalysisValue(Value *V, Loop *L) override;
254 /// Simple Analysis hook. Delete loop L from alias set map.
255 void deleteAnalysisLoop(Loop *L) override;
257 } // namespace
259 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
260 LoopStandardAnalysisResults &AR, LPMUpdater &) {
261 const auto &FAM =
262 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
263 Function *F = L.getHeader()->getParent();
265 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
266 // FIXME: This should probably be optional rather than required.
267 if (!ORE)
268 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
269 "cached at a higher level");
271 LoopInvariantCodeMotion LICM;
272 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
273 AR.MSSA, ORE, true))
274 return PreservedAnalyses::all();
276 auto PA = getLoopPassPreservedAnalyses();
278 PA.preserve<DominatorTreeAnalysis>();
279 PA.preserve<LoopAnalysis>();
281 return PA;
284 char LegacyLICMPass::ID = 0;
285 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
286 false, false)
287 INITIALIZE_PASS_DEPENDENCY(LoopPass)
288 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
289 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
290 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
291 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
292 false)
294 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
296 /// Hoist expressions out of the specified loop. Note, alias info for inner
297 /// loop is not preserved so it is not a good idea to run LICM multiple
298 /// times on one loop.
299 /// We should delete AST for inner loops in the new pass manager to avoid
300 /// memory leak.
302 bool LoopInvariantCodeMotion::runOnLoop(
303 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
304 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
305 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
306 bool Changed = false;
308 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
310 std::unique_ptr<AliasSetTracker> CurAST;
311 std::unique_ptr<MemorySSAUpdater> MSSAU;
312 bool NoOfMemAccTooLarge = false;
313 int LicmMssaOptCounter = 0;
315 if (!MSSA) {
316 LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
317 CurAST = collectAliasInfoForLoop(L, LI, AA);
318 } else {
319 LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
320 MSSAU = make_unique<MemorySSAUpdater>(MSSA);
322 unsigned AccessCapCount = 0;
323 for (auto *BB : L->getBlocks()) {
324 if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
325 for (const auto &MA : *Accesses) {
326 (void)MA;
327 AccessCapCount++;
328 if (AccessCapCount > AccessCapForMSSAPromotion) {
329 NoOfMemAccTooLarge = true;
330 break;
334 if (NoOfMemAccTooLarge)
335 break;
339 // Get the preheader block to move instructions into...
340 BasicBlock *Preheader = L->getLoopPreheader();
342 // Compute loop safety information.
343 ICFLoopSafetyInfo SafetyInfo(DT);
344 SafetyInfo.computeLoopSafetyInfo(L);
346 // We want to visit all of the instructions in this loop... that are not parts
347 // of our subloops (they have already had their invariants hoisted out of
348 // their loop, into this loop, so there is no need to process the BODIES of
349 // the subloops).
351 // Traverse the body of the loop in depth first order on the dominator tree so
352 // that we are guaranteed to see definitions before we see uses. This allows
353 // us to sink instructions in one pass, without iteration. After sinking
354 // instructions, we perform another pass to hoist them out of the loop.
356 if (L->hasDedicatedExits())
357 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
358 CurAST.get(), MSSAU.get(), &SafetyInfo,
359 NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
360 if (Preheader)
361 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
362 CurAST.get(), MSSAU.get(), &SafetyInfo,
363 NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
365 // Now that all loop invariants have been removed from the loop, promote any
366 // memory references to scalars that we can.
367 // Don't sink stores from loops without dedicated block exits. Exits
368 // containing indirect branches are not transformed by loop simplify,
369 // make sure we catch that. An additional load may be generated in the
370 // preheader for SSA updater, so also avoid sinking when no preheader
371 // is available.
372 if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
373 !NoOfMemAccTooLarge) {
374 // Figure out the loop exits and their insertion points
375 SmallVector<BasicBlock *, 8> ExitBlocks;
376 L->getUniqueExitBlocks(ExitBlocks);
378 // We can't insert into a catchswitch.
379 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
380 return isa<CatchSwitchInst>(Exit->getTerminator());
383 if (!HasCatchSwitch) {
384 SmallVector<Instruction *, 8> InsertPts;
385 SmallVector<MemoryAccess *, 8> MSSAInsertPts;
386 InsertPts.reserve(ExitBlocks.size());
387 if (MSSAU)
388 MSSAInsertPts.reserve(ExitBlocks.size());
389 for (BasicBlock *ExitBlock : ExitBlocks) {
390 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
391 if (MSSAU)
392 MSSAInsertPts.push_back(nullptr);
395 PredIteratorCache PIC;
397 bool Promoted = false;
399 // Build an AST using MSSA.
400 if (!CurAST.get())
401 CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
403 // Loop over all of the alias sets in the tracker object.
404 for (AliasSet &AS : *CurAST) {
405 // We can promote this alias set if it has a store, if it is a "Must"
406 // alias set, if the pointer is loop invariant, and if we are not
407 // eliminating any volatile loads or stores.
408 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
409 !L->isLoopInvariant(AS.begin()->getValue()))
410 continue;
412 assert(
413 !AS.empty() &&
414 "Must alias set should have at least one pointer element in it!");
416 SmallSetVector<Value *, 8> PointerMustAliases;
417 for (const auto &ASI : AS)
418 PointerMustAliases.insert(ASI.getValue());
420 Promoted |= promoteLoopAccessesToScalars(
421 PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
422 DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
425 // Once we have promoted values across the loop body we have to
426 // recursively reform LCSSA as any nested loop may now have values defined
427 // within the loop used in the outer loop.
428 // FIXME: This is really heavy handed. It would be a bit better to use an
429 // SSAUpdater strategy during promotion that was LCSSA aware and reformed
430 // it as it went.
431 if (Promoted)
432 formLCSSARecursively(*L, *DT, LI, SE);
434 Changed |= Promoted;
438 // Check that neither this loop nor its parent have had LCSSA broken. LICM is
439 // specifically moving instructions across the loop boundary and so it is
440 // especially in need of sanity checking here.
441 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
442 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
443 "Parent loop not left in LCSSA form after LICM!");
445 // If this loop is nested inside of another one, save the alias information
446 // for when we process the outer loop.
447 if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
448 LoopToAliasSetMap[L] = std::move(CurAST);
450 if (MSSAU.get() && VerifyMemorySSA)
451 MSSAU->getMemorySSA()->verifyMemorySSA();
453 if (Changed && SE)
454 SE->forgetLoopDispositions(L);
455 return Changed;
458 /// Walk the specified region of the CFG (defined by all blocks dominated by
459 /// the specified block, and that are in the current loop) in reverse depth
460 /// first order w.r.t the DominatorTree. This allows us to visit uses before
461 /// definitions, allowing us to sink a loop body in one pass without iteration.
463 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
464 DominatorTree *DT, TargetLibraryInfo *TLI,
465 TargetTransformInfo *TTI, Loop *CurLoop,
466 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
467 ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
468 int &LicmMssaOptCounter, OptimizationRemarkEmitter *ORE) {
470 // Verify inputs.
471 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
472 CurLoop != nullptr && SafetyInfo != nullptr &&
473 "Unexpected input to sinkRegion.");
474 assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
475 "Either AliasSetTracker or MemorySSA should be initialized.");
477 // We want to visit children before parents. We will enque all the parents
478 // before their children in the worklist and process the worklist in reverse
479 // order.
480 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
482 bool Changed = false;
483 for (DomTreeNode *DTN : reverse(Worklist)) {
484 BasicBlock *BB = DTN->getBlock();
485 // Only need to process the contents of this block if it is not part of a
486 // subloop (which would already have been processed).
487 if (inSubLoop(BB, CurLoop, LI))
488 continue;
490 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
491 Instruction &I = *--II;
493 // If the instruction is dead, we would try to sink it because it isn't
494 // used in the loop, instead, just delete it.
495 if (isInstructionTriviallyDead(&I, TLI)) {
496 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
497 salvageDebugInfo(I);
498 ++II;
499 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
500 Changed = true;
501 continue;
504 // Check to see if we can sink this instruction to the exit blocks
505 // of the loop. We can do this if the all users of the instruction are
506 // outside of the loop. In this case, it doesn't even matter if the
507 // operands of the instruction are loop invariant.
509 bool FreeInLoop = false;
510 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
511 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
512 NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) &&
513 !I.mayHaveSideEffects()) {
514 if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
515 if (!FreeInLoop) {
516 ++II;
517 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
519 Changed = true;
524 if (MSSAU && VerifyMemorySSA)
525 MSSAU->getMemorySSA()->verifyMemorySSA();
526 return Changed;
529 namespace {
530 // This is a helper class for hoistRegion to make it able to hoist control flow
531 // in order to be able to hoist phis. The way this works is that we initially
532 // start hoisting to the loop preheader, and when we see a loop invariant branch
533 // we make note of this. When we then come to hoist an instruction that's
534 // conditional on such a branch we duplicate the branch and the relevant control
535 // flow, then hoist the instruction into the block corresponding to its original
536 // block in the duplicated control flow.
537 class ControlFlowHoister {
538 private:
539 // Information about the loop we are hoisting from
540 LoopInfo *LI;
541 DominatorTree *DT;
542 Loop *CurLoop;
543 MemorySSAUpdater *MSSAU;
545 // A map of blocks in the loop to the block their instructions will be hoisted
546 // to.
547 DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
549 // The branches that we can hoist, mapped to the block that marks a
550 // convergence point of their control flow.
551 DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
553 public:
554 ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
555 MemorySSAUpdater *MSSAU)
556 : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
558 void registerPossiblyHoistableBranch(BranchInst *BI) {
559 // We can only hoist conditional branches with loop invariant operands.
560 if (!ControlFlowHoisting || !BI->isConditional() ||
561 !CurLoop->hasLoopInvariantOperands(BI))
562 return;
564 // The branch destinations need to be in the loop, and we don't gain
565 // anything by duplicating conditional branches with duplicate successors,
566 // as it's essentially the same as an unconditional branch.
567 BasicBlock *TrueDest = BI->getSuccessor(0);
568 BasicBlock *FalseDest = BI->getSuccessor(1);
569 if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
570 TrueDest == FalseDest)
571 return;
573 // We can hoist BI if one branch destination is the successor of the other,
574 // or both have common successor which we check by seeing if the
575 // intersection of their successors is non-empty.
576 // TODO: This could be expanded to allowing branches where both ends
577 // eventually converge to a single block.
578 SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
579 TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
580 FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
581 BasicBlock *CommonSucc = nullptr;
582 if (TrueDestSucc.count(FalseDest)) {
583 CommonSucc = FalseDest;
584 } else if (FalseDestSucc.count(TrueDest)) {
585 CommonSucc = TrueDest;
586 } else {
587 set_intersect(TrueDestSucc, FalseDestSucc);
588 // If there's one common successor use that.
589 if (TrueDestSucc.size() == 1)
590 CommonSucc = *TrueDestSucc.begin();
591 // If there's more than one pick whichever appears first in the block list
592 // (we can't use the value returned by TrueDestSucc.begin() as it's
593 // unpredicatable which element gets returned).
594 else if (!TrueDestSucc.empty()) {
595 Function *F = TrueDest->getParent();
596 auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
597 auto It = std::find_if(F->begin(), F->end(), IsSucc);
598 assert(It != F->end() && "Could not find successor in function");
599 CommonSucc = &*It;
602 // The common successor has to be dominated by the branch, as otherwise
603 // there will be some other path to the successor that will not be
604 // controlled by this branch so any phi we hoist would be controlled by the
605 // wrong condition. This also takes care of avoiding hoisting of loop back
606 // edges.
607 // TODO: In some cases this could be relaxed if the successor is dominated
608 // by another block that's been hoisted and we can guarantee that the
609 // control flow has been replicated exactly.
610 if (CommonSucc && DT->dominates(BI, CommonSucc))
611 HoistableBranches[BI] = CommonSucc;
614 bool canHoistPHI(PHINode *PN) {
615 // The phi must have loop invariant operands.
616 if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
617 return false;
618 // We can hoist phis if the block they are in is the target of hoistable
619 // branches which cover all of the predecessors of the block.
620 SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
621 BasicBlock *BB = PN->getParent();
622 for (BasicBlock *PredBB : predecessors(BB))
623 PredecessorBlocks.insert(PredBB);
624 // If we have less predecessor blocks than predecessors then the phi will
625 // have more than one incoming value for the same block which we can't
626 // handle.
627 // TODO: This could be handled be erasing some of the duplicate incoming
628 // values.
629 if (PredecessorBlocks.size() != pred_size(BB))
630 return false;
631 for (auto &Pair : HoistableBranches) {
632 if (Pair.second == BB) {
633 // Which blocks are predecessors via this branch depends on if the
634 // branch is triangle-like or diamond-like.
635 if (Pair.first->getSuccessor(0) == BB) {
636 PredecessorBlocks.erase(Pair.first->getParent());
637 PredecessorBlocks.erase(Pair.first->getSuccessor(1));
638 } else if (Pair.first->getSuccessor(1) == BB) {
639 PredecessorBlocks.erase(Pair.first->getParent());
640 PredecessorBlocks.erase(Pair.first->getSuccessor(0));
641 } else {
642 PredecessorBlocks.erase(Pair.first->getSuccessor(0));
643 PredecessorBlocks.erase(Pair.first->getSuccessor(1));
647 // PredecessorBlocks will now be empty if for every predecessor of BB we
648 // found a hoistable branch source.
649 return PredecessorBlocks.empty();
652 BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
653 if (!ControlFlowHoisting)
654 return CurLoop->getLoopPreheader();
655 // If BB has already been hoisted, return that
656 if (HoistDestinationMap.count(BB))
657 return HoistDestinationMap[BB];
659 // Check if this block is conditional based on a pending branch
660 auto HasBBAsSuccessor =
661 [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
662 return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
663 Pair.first->getSuccessor(1) == BB);
665 auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
666 HasBBAsSuccessor);
668 // If not involved in a pending branch, hoist to preheader
669 BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
670 if (It == HoistableBranches.end()) {
671 LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
672 << " as hoist destination for " << BB->getName()
673 << "\n");
674 HoistDestinationMap[BB] = InitialPreheader;
675 return InitialPreheader;
677 BranchInst *BI = It->first;
678 assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
679 HoistableBranches.end() &&
680 "BB is expected to be the target of at most one branch");
682 LLVMContext &C = BB->getContext();
683 BasicBlock *TrueDest = BI->getSuccessor(0);
684 BasicBlock *FalseDest = BI->getSuccessor(1);
685 BasicBlock *CommonSucc = HoistableBranches[BI];
686 BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
688 // Create hoisted versions of blocks that currently don't have them
689 auto CreateHoistedBlock = [&](BasicBlock *Orig) {
690 if (HoistDestinationMap.count(Orig))
691 return HoistDestinationMap[Orig];
692 BasicBlock *New =
693 BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
694 HoistDestinationMap[Orig] = New;
695 DT->addNewBlock(New, HoistTarget);
696 if (CurLoop->getParentLoop())
697 CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
698 ++NumCreatedBlocks;
699 LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
700 << " as hoist destination for " << Orig->getName()
701 << "\n");
702 return New;
704 BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
705 BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
706 BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
708 // Link up these blocks with branches.
709 if (!HoistCommonSucc->getTerminator()) {
710 // The new common successor we've generated will branch to whatever that
711 // hoist target branched to.
712 BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
713 assert(TargetSucc && "Expected hoist target to have a single successor");
714 HoistCommonSucc->moveBefore(TargetSucc);
715 BranchInst::Create(TargetSucc, HoistCommonSucc);
717 if (!HoistTrueDest->getTerminator()) {
718 HoistTrueDest->moveBefore(HoistCommonSucc);
719 BranchInst::Create(HoistCommonSucc, HoistTrueDest);
721 if (!HoistFalseDest->getTerminator()) {
722 HoistFalseDest->moveBefore(HoistCommonSucc);
723 BranchInst::Create(HoistCommonSucc, HoistFalseDest);
726 // If BI is being cloned to what was originally the preheader then
727 // HoistCommonSucc will now be the new preheader.
728 if (HoistTarget == InitialPreheader) {
729 // Phis in the loop header now need to use the new preheader.
730 InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
731 if (MSSAU)
732 MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
733 HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
734 // The new preheader dominates the loop header.
735 DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
736 DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
737 DT->changeImmediateDominator(HeaderNode, PreheaderNode);
738 // The preheader hoist destination is now the new preheader, with the
739 // exception of the hoist destination of this branch.
740 for (auto &Pair : HoistDestinationMap)
741 if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
742 Pair.second = HoistCommonSucc;
745 // Now finally clone BI.
746 ReplaceInstWithInst(
747 HoistTarget->getTerminator(),
748 BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
749 ++NumClonedBranches;
751 assert(CurLoop->getLoopPreheader() &&
752 "Hoisting blocks should not have destroyed preheader");
753 return HoistDestinationMap[BB];
756 } // namespace
758 /// Walk the specified region of the CFG (defined by all blocks dominated by
759 /// the specified block, and that are in the current loop) in depth first
760 /// order w.r.t the DominatorTree. This allows us to visit definitions before
761 /// uses, allowing us to hoist a loop body in one pass without iteration.
763 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
764 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
765 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
766 ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
767 int &LicmMssaOptCounter,
768 OptimizationRemarkEmitter *ORE) {
769 // Verify inputs.
770 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
771 CurLoop != nullptr && SafetyInfo != nullptr &&
772 "Unexpected input to hoistRegion.");
773 assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
774 "Either AliasSetTracker or MemorySSA should be initialized.");
776 ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
778 // Keep track of instructions that have been hoisted, as they may need to be
779 // re-hoisted if they end up not dominating all of their uses.
780 SmallVector<Instruction *, 16> HoistedInstructions;
782 // For PHI hoisting to work we need to hoist blocks before their successors.
783 // We can do this by iterating through the blocks in the loop in reverse
784 // post-order.
785 LoopBlocksRPO Worklist(CurLoop);
786 Worklist.perform(LI);
787 bool Changed = false;
788 for (BasicBlock *BB : Worklist) {
789 // Only need to process the contents of this block if it is not part of a
790 // subloop (which would already have been processed).
791 if (inSubLoop(BB, CurLoop, LI))
792 continue;
794 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
795 Instruction &I = *II++;
796 // Try constant folding this instruction. If all the operands are
797 // constants, it is technically hoistable, but it would be better to
798 // just fold it.
799 if (Constant *C = ConstantFoldInstruction(
800 &I, I.getModule()->getDataLayout(), TLI)) {
801 LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
802 << '\n');
803 if (CurAST)
804 CurAST->copyValue(&I, C);
805 // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
806 I.replaceAllUsesWith(C);
807 if (isInstructionTriviallyDead(&I, TLI))
808 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
809 Changed = true;
810 continue;
813 // Try hoisting the instruction out to the preheader. We can only do
814 // this if all of the operands of the instruction are loop invariant and
815 // if it is safe to hoist the instruction.
816 // TODO: It may be safe to hoist if we are hoisting to a conditional block
817 // and we have accurately duplicated the control flow from the loop header
818 // to that block.
819 if (CurLoop->hasLoopInvariantOperands(&I) &&
820 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
821 NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) &&
822 isSafeToExecuteUnconditionally(
823 I, DT, CurLoop, SafetyInfo, ORE,
824 CurLoop->getLoopPreheader()->getTerminator())) {
825 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
826 MSSAU, ORE);
827 HoistedInstructions.push_back(&I);
828 Changed = true;
829 continue;
832 // Attempt to remove floating point division out of the loop by
833 // converting it to a reciprocal multiplication.
834 if (I.getOpcode() == Instruction::FDiv &&
835 CurLoop->isLoopInvariant(I.getOperand(1)) &&
836 I.hasAllowReciprocal()) {
837 auto Divisor = I.getOperand(1);
838 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
839 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
840 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
841 SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
842 ReciprocalDivisor->insertBefore(&I);
844 auto Product =
845 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
846 Product->setFastMathFlags(I.getFastMathFlags());
847 SafetyInfo->insertInstructionTo(Product, I.getParent());
848 Product->insertAfter(&I);
849 I.replaceAllUsesWith(Product);
850 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
852 hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
853 SafetyInfo, MSSAU, ORE);
854 HoistedInstructions.push_back(ReciprocalDivisor);
855 Changed = true;
856 continue;
859 auto IsInvariantStart = [&](Instruction &I) {
860 using namespace PatternMatch;
861 return I.use_empty() &&
862 match(&I, m_Intrinsic<Intrinsic::invariant_start>());
864 auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
865 return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
866 SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
868 if ((IsInvariantStart(I) || isGuard(&I)) &&
869 CurLoop->hasLoopInvariantOperands(&I) &&
870 MustExecuteWithoutWritesBefore(I)) {
871 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
872 MSSAU, ORE);
873 HoistedInstructions.push_back(&I);
874 Changed = true;
875 continue;
878 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
879 if (CFH.canHoistPHI(PN)) {
880 // Redirect incoming blocks first to ensure that we create hoisted
881 // versions of those blocks before we hoist the phi.
882 for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
883 PN->setIncomingBlock(
884 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
885 hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
886 MSSAU, ORE);
887 assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
888 Changed = true;
889 continue;
893 // Remember possibly hoistable branches so we can actually hoist them
894 // later if needed.
895 if (BranchInst *BI = dyn_cast<BranchInst>(&I))
896 CFH.registerPossiblyHoistableBranch(BI);
900 // If we hoisted instructions to a conditional block they may not dominate
901 // their uses that weren't hoisted (such as phis where some operands are not
902 // loop invariant). If so make them unconditional by moving them to their
903 // immediate dominator. We iterate through the instructions in reverse order
904 // which ensures that when we rehoist an instruction we rehoist its operands,
905 // and also keep track of where in the block we are rehoisting to to make sure
906 // that we rehoist instructions before the instructions that use them.
907 Instruction *HoistPoint = nullptr;
908 if (ControlFlowHoisting) {
909 for (Instruction *I : reverse(HoistedInstructions)) {
910 if (!llvm::all_of(I->uses(),
911 [&](Use &U) { return DT->dominates(I, U); })) {
912 BasicBlock *Dominator =
913 DT->getNode(I->getParent())->getIDom()->getBlock();
914 if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
915 if (HoistPoint)
916 assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
917 "New hoist point expected to dominate old hoist point");
918 HoistPoint = Dominator->getTerminator();
920 LLVM_DEBUG(dbgs() << "LICM rehoisting to "
921 << HoistPoint->getParent()->getName()
922 << ": " << *I << "\n");
923 moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
924 HoistPoint = I;
925 Changed = true;
929 if (MSSAU && VerifyMemorySSA)
930 MSSAU->getMemorySSA()->verifyMemorySSA();
932 // Now that we've finished hoisting make sure that LI and DT are still
933 // valid.
934 #ifndef NDEBUG
935 if (Changed) {
936 assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
937 "Dominator tree verification failed");
938 LI->verify(*DT);
940 #endif
942 return Changed;
945 // Return true if LI is invariant within scope of the loop. LI is invariant if
946 // CurLoop is dominated by an invariant.start representing the same memory
947 // location and size as the memory location LI loads from, and also the
948 // invariant.start has no uses.
949 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
950 Loop *CurLoop) {
951 Value *Addr = LI->getOperand(0);
952 const DataLayout &DL = LI->getModule()->getDataLayout();
953 const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
954 cast<PointerType>(Addr->getType())->getElementType());
956 // if the type is i8 addrspace(x)*, we know this is the type of
957 // llvm.invariant.start operand
958 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
959 LI->getPointerAddressSpace());
960 unsigned BitcastsVisited = 0;
961 // Look through bitcasts until we reach the i8* type (this is invariant.start
962 // operand type).
963 while (Addr->getType() != PtrInt8Ty) {
964 auto *BC = dyn_cast<BitCastInst>(Addr);
965 // Avoid traversing high number of bitcast uses.
966 if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
967 return false;
968 Addr = BC->getOperand(0);
971 unsigned UsesVisited = 0;
972 // Traverse all uses of the load operand value, to see if invariant.start is
973 // one of the uses, and whether it dominates the load instruction.
974 for (auto *U : Addr->users()) {
975 // Avoid traversing for Load operand with high number of users.
976 if (++UsesVisited > MaxNumUsesTraversed)
977 return false;
978 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
979 // If there are escaping uses of invariant.start instruction, the load maybe
980 // non-invariant.
981 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
982 !II->use_empty())
983 continue;
984 unsigned InvariantSizeInBits =
985 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
986 // Confirm the invariant.start location size contains the load operand size
987 // in bits. Also, the invariant.start should dominate the load, and we
988 // should not hoist the load out of a loop that contains this dominating
989 // invariant.start.
990 if (LocSizeInBits <= InvariantSizeInBits &&
991 DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
992 return true;
995 return false;
998 namespace {
999 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1000 /// sink a given instruction out of a loop. Does not address legality
1001 /// concerns such as aliasing or speculation safety.
1002 bool isHoistableAndSinkableInst(Instruction &I) {
1003 // Only these instructions are hoistable/sinkable.
1004 return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
1005 isa<CallInst>(I) || isa<FenceInst>(I) ||
1006 isa<BinaryOperator>(I) || isa<CastInst>(I) ||
1007 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
1008 isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
1009 isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
1010 isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
1012 /// Return true if all of the alias sets within this AST are known not to
1013 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1014 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1015 const Loop *L) {
1016 if (CurAST) {
1017 for (AliasSet &AS : *CurAST) {
1018 if (!AS.isForwardingAliasSet() && AS.isMod()) {
1019 return false;
1022 return true;
1023 } else { /*MSSAU*/
1024 for (auto *BB : L->getBlocks())
1025 if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1026 return false;
1027 return true;
1031 /// Return true if I is the only Instruction with a MemoryAccess in L.
1032 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1033 const MemorySSAUpdater *MSSAU) {
1034 for (auto *BB : L->getBlocks())
1035 if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1036 int NotAPhi = 0;
1037 for (const auto &Acc : *Accs) {
1038 if (isa<MemoryPhi>(&Acc))
1039 continue;
1040 const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1041 if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1042 return false;
1045 return true;
1049 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1050 Loop *CurLoop, AliasSetTracker *CurAST,
1051 MemorySSAUpdater *MSSAU,
1052 bool TargetExecutesOncePerLoop,
1053 bool NoOfMemAccTooLarge, int *LicmMssaOptCounter,
1054 OptimizationRemarkEmitter *ORE) {
1055 // If we don't understand the instruction, bail early.
1056 if (!isHoistableAndSinkableInst(I))
1057 return false;
1059 MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1060 if (MSSA)
1061 assert(LicmMssaOptCounter != nullptr && "Counter cannot be null.");
1063 // Loads have extra constraints we have to verify before we can hoist them.
1064 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1065 if (!LI->isUnordered())
1066 return false; // Don't sink/hoist volatile or ordered atomic loads!
1068 // Loads from constant memory are always safe to move, even if they end up
1069 // in the same alias set as something that ends up being modified.
1070 if (AA->pointsToConstantMemory(LI->getOperand(0)))
1071 return true;
1072 if (LI->getMetadata(LLVMContext::MD_invariant_load))
1073 return true;
1075 if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1076 return false; // Don't risk duplicating unordered loads
1078 // This checks for an invariant.start dominating the load.
1079 if (isLoadInvariantInLoop(LI, DT, CurLoop))
1080 return true;
1082 bool Invalidated;
1083 if (CurAST)
1084 Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1085 CurLoop, AA);
1086 else
1087 Invalidated = pointerInvalidatedByLoopWithMSSA(
1088 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop,
1089 *LicmMssaOptCounter);
1090 // Check loop-invariant address because this may also be a sinkable load
1091 // whose address is not necessarily loop-invariant.
1092 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1093 ORE->emit([&]() {
1094 return OptimizationRemarkMissed(
1095 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1096 << "failed to move load with loop-invariant address "
1097 "because the loop may invalidate its value";
1100 return !Invalidated;
1101 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1102 // Don't sink or hoist dbg info; it's legal, but not useful.
1103 if (isa<DbgInfoIntrinsic>(I))
1104 return false;
1106 // Don't sink calls which can throw.
1107 if (CI->mayThrow())
1108 return false;
1110 using namespace PatternMatch;
1111 if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1112 // Assumes don't actually alias anything or throw
1113 return true;
1115 // Handle simple cases by querying alias analysis.
1116 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1117 if (Behavior == FMRB_DoesNotAccessMemory)
1118 return true;
1119 if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1120 // A readonly argmemonly function only reads from memory pointed to by
1121 // it's arguments with arbitrary offsets. If we can prove there are no
1122 // writes to this memory in the loop, we can hoist or sink.
1123 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
1124 // TODO: expand to writeable arguments
1125 for (Value *Op : CI->arg_operands())
1126 if (Op->getType()->isPointerTy()) {
1127 bool Invalidated;
1128 if (CurAST)
1129 Invalidated = pointerInvalidatedByLoop(
1130 MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
1131 CurAST, CurLoop, AA);
1132 else
1133 Invalidated = pointerInvalidatedByLoopWithMSSA(
1134 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1135 *LicmMssaOptCounter);
1136 if (Invalidated)
1137 return false;
1139 return true;
1142 // If this call only reads from memory and there are no writes to memory
1143 // in the loop, we can hoist or sink the call as appropriate.
1144 if (isReadOnly(CurAST, MSSAU, CurLoop))
1145 return true;
1148 // FIXME: This should use mod/ref information to see if we can hoist or
1149 // sink the call.
1151 return false;
1152 } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1153 // Fences alias (most) everything to provide ordering. For the moment,
1154 // just give up if there are any other memory operations in the loop.
1155 if (CurAST) {
1156 auto Begin = CurAST->begin();
1157 assert(Begin != CurAST->end() && "must contain FI");
1158 if (std::next(Begin) != CurAST->end())
1159 // constant memory for instance, TODO: handle better
1160 return false;
1161 auto *UniqueI = Begin->getUniqueInstruction();
1162 if (!UniqueI)
1163 // other memory op, give up
1164 return false;
1165 (void)FI; // suppress unused variable warning
1166 assert(UniqueI == FI && "AS must contain FI");
1167 return true;
1168 } else // MSSAU
1169 return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1170 } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1171 if (!SI->isUnordered())
1172 return false; // Don't sink/hoist volatile or ordered atomic store!
1174 // We can only hoist a store that we can prove writes a value which is not
1175 // read or overwritten within the loop. For those cases, we fallback to
1176 // load store promotion instead. TODO: We can extend this to cases where
1177 // there is exactly one write to the location and that write dominates an
1178 // arbitrary number of reads in the loop.
1179 if (CurAST) {
1180 auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1182 if (AS.isRef() || !AS.isMustAlias())
1183 // Quick exit test, handled by the full path below as well.
1184 return false;
1185 auto *UniqueI = AS.getUniqueInstruction();
1186 if (!UniqueI)
1187 // other memory op, give up
1188 return false;
1189 assert(UniqueI == SI && "AS must contain SI");
1190 return true;
1191 } else { // MSSAU
1192 if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1193 return true;
1194 if (*LicmMssaOptCounter < LicmMssaOptCap) {
1195 auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
1196 (*LicmMssaOptCounter)++;
1197 // If there are no clobbering Defs in the loop, we still need to check
1198 // for interfering Uses. If there are more accesses than the Promotion
1199 // cap, give up, we're not walking a list that long. Otherwise, walk the
1200 // list, check each Use if it's optimized to an access outside the loop.
1201 // If yes, store is safe to hoist. This is fairly restrictive, but
1202 // conservatively correct.
1203 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1204 // moving accesses. Can also extend to dominating uses.
1205 if ((!MSSA->isLiveOnEntryDef(Source) &&
1206 CurLoop->contains(Source->getBlock())) ||
1207 NoOfMemAccTooLarge)
1208 return false;
1209 for (auto *BB : CurLoop->getBlocks())
1210 if (auto *Accesses = MSSA->getBlockAccesses(BB))
1211 for (const auto &MA : *Accesses)
1212 if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1213 auto *MD = MU->getDefiningAccess();
1214 if (!MSSA->isLiveOnEntryDef(MD) &&
1215 CurLoop->contains(MD->getBlock()))
1216 return false;
1218 return true;
1220 return false;
1224 assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1226 // We've established mechanical ability and aliasing, it's up to the caller
1227 // to check fault safety
1228 return true;
1231 /// Returns true if a PHINode is a trivially replaceable with an
1232 /// Instruction.
1233 /// This is true when all incoming values are that instruction.
1234 /// This pattern occurs most often with LCSSA PHI nodes.
1236 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1237 for (const Value *IncValue : PN.incoming_values())
1238 if (IncValue != &I)
1239 return false;
1241 return true;
1244 /// Return true if the instruction is free in the loop.
1245 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1246 const TargetTransformInfo *TTI) {
1248 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1249 if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free)
1250 return false;
1251 // For a GEP, we cannot simply use getUserCost because currently it
1252 // optimistically assume that a GEP will fold into addressing mode
1253 // regardless of its users.
1254 const BasicBlock *BB = GEP->getParent();
1255 for (const User *U : GEP->users()) {
1256 const Instruction *UI = cast<Instruction>(U);
1257 if (CurLoop->contains(UI) &&
1258 (BB != UI->getParent() ||
1259 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1260 return false;
1262 return true;
1263 } else
1264 return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1267 /// Return true if the only users of this instruction are outside of
1268 /// the loop. If this is true, we can sink the instruction to the exit
1269 /// blocks of the loop.
1271 /// We also return true if the instruction could be folded away in lowering.
1272 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1273 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1274 const LoopSafetyInfo *SafetyInfo,
1275 TargetTransformInfo *TTI, bool &FreeInLoop) {
1276 const auto &BlockColors = SafetyInfo->getBlockColors();
1277 bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1278 for (const User *U : I.users()) {
1279 const Instruction *UI = cast<Instruction>(U);
1280 if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1281 const BasicBlock *BB = PN->getParent();
1282 // We cannot sink uses in catchswitches.
1283 if (isa<CatchSwitchInst>(BB->getTerminator()))
1284 return false;
1286 // We need to sink a callsite to a unique funclet. Avoid sinking if the
1287 // phi use is too muddled.
1288 if (isa<CallInst>(I))
1289 if (!BlockColors.empty() &&
1290 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1291 return false;
1294 if (CurLoop->contains(UI)) {
1295 if (IsFree) {
1296 FreeInLoop = true;
1297 continue;
1299 return false;
1302 return true;
1305 static Instruction *CloneInstructionInExitBlock(
1306 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1307 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1308 Instruction *New;
1309 if (auto *CI = dyn_cast<CallInst>(&I)) {
1310 const auto &BlockColors = SafetyInfo->getBlockColors();
1312 // Sinking call-sites need to be handled differently from other
1313 // instructions. The cloned call-site needs a funclet bundle operand
1314 // appropriate for it's location in the CFG.
1315 SmallVector<OperandBundleDef, 1> OpBundles;
1316 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1317 BundleIdx != BundleEnd; ++BundleIdx) {
1318 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1319 if (Bundle.getTagID() == LLVMContext::OB_funclet)
1320 continue;
1322 OpBundles.emplace_back(Bundle);
1325 if (!BlockColors.empty()) {
1326 const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1327 assert(CV.size() == 1 && "non-unique color for exit block!");
1328 BasicBlock *BBColor = CV.front();
1329 Instruction *EHPad = BBColor->getFirstNonPHI();
1330 if (EHPad->isEHPad())
1331 OpBundles.emplace_back("funclet", EHPad);
1334 New = CallInst::Create(CI, OpBundles);
1335 } else {
1336 New = I.clone();
1339 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1340 if (!I.getName().empty())
1341 New->setName(I.getName() + ".le");
1343 MemoryAccess *OldMemAcc;
1344 if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
1345 // Create a new MemoryAccess and let MemorySSA set its defining access.
1346 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1347 New, nullptr, New->getParent(), MemorySSA::Beginning);
1348 if (NewMemAcc) {
1349 if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1350 MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1351 else {
1352 auto *MemUse = cast<MemoryUse>(NewMemAcc);
1353 MSSAU->insertUse(MemUse);
1358 // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1359 // particularly cheap because we can rip off the PHI node that we're
1360 // replacing for the number and blocks of the predecessors.
1361 // OPT: If this shows up in a profile, we can instead finish sinking all
1362 // invariant instructions, and then walk their operands to re-establish
1363 // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1364 // sinking bottom-up.
1365 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1366 ++OI)
1367 if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1368 if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1369 if (!OLoop->contains(&PN)) {
1370 PHINode *OpPN =
1371 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1372 OInst->getName() + ".lcssa", &ExitBlock.front());
1373 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1374 OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1375 *OI = OpPN;
1377 return New;
1380 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1381 AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1382 if (AST)
1383 AST->deleteValue(&I);
1384 if (MSSAU)
1385 MSSAU->removeMemoryAccess(&I);
1386 SafetyInfo.removeInstruction(&I);
1387 I.eraseFromParent();
1390 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
1391 ICFLoopSafetyInfo &SafetyInfo,
1392 MemorySSAUpdater *MSSAU) {
1393 SafetyInfo.removeInstruction(&I);
1394 SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1395 I.moveBefore(&Dest);
1396 if (MSSAU)
1397 if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1398 MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1399 MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1402 static Instruction *sinkThroughTriviallyReplaceablePHI(
1403 PHINode *TPN, Instruction *I, LoopInfo *LI,
1404 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1405 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1406 MemorySSAUpdater *MSSAU) {
1407 assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1408 "Expect only trivially replaceable PHI");
1409 BasicBlock *ExitBlock = TPN->getParent();
1410 Instruction *New;
1411 auto It = SunkCopies.find(ExitBlock);
1412 if (It != SunkCopies.end())
1413 New = It->second;
1414 else
1415 New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1416 *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1417 return New;
1420 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1421 BasicBlock *BB = PN->getParent();
1422 if (!BB->canSplitPredecessors())
1423 return false;
1424 // It's not impossible to split EHPad blocks, but if BlockColors already exist
1425 // it require updating BlockColors for all offspring blocks accordingly. By
1426 // skipping such corner case, we can make updating BlockColors after splitting
1427 // predecessor fairly simple.
1428 if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1429 return false;
1430 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1431 BasicBlock *BBPred = *PI;
1432 if (isa<IndirectBrInst>(BBPred->getTerminator()))
1433 return false;
1435 return true;
1438 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1439 LoopInfo *LI, const Loop *CurLoop,
1440 LoopSafetyInfo *SafetyInfo,
1441 MemorySSAUpdater *MSSAU) {
1442 #ifndef NDEBUG
1443 SmallVector<BasicBlock *, 32> ExitBlocks;
1444 CurLoop->getUniqueExitBlocks(ExitBlocks);
1445 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1446 ExitBlocks.end());
1447 #endif
1448 BasicBlock *ExitBB = PN->getParent();
1449 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1451 // Split predecessors of the loop exit to make instructions in the loop are
1452 // exposed to exit blocks through trivially replaceable PHIs while keeping the
1453 // loop in the canonical form where each predecessor of each exit block should
1454 // be contained within the loop. For example, this will convert the loop below
1455 // from
1457 // LB1:
1458 // %v1 =
1459 // br %LE, %LB2
1460 // LB2:
1461 // %v2 =
1462 // br %LE, %LB1
1463 // LE:
1464 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1466 // to
1468 // LB1:
1469 // %v1 =
1470 // br %LE.split, %LB2
1471 // LB2:
1472 // %v2 =
1473 // br %LE.split2, %LB1
1474 // LE.split:
1475 // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1476 // br %LE
1477 // LE.split2:
1478 // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1479 // br %LE
1480 // LE:
1481 // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1483 const auto &BlockColors = SafetyInfo->getBlockColors();
1484 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1485 while (!PredBBs.empty()) {
1486 BasicBlock *PredBB = *PredBBs.begin();
1487 assert(CurLoop->contains(PredBB) &&
1488 "Expect all predecessors are in the loop");
1489 if (PN->getBasicBlockIndex(PredBB) >= 0) {
1490 BasicBlock *NewPred = SplitBlockPredecessors(
1491 ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1492 // Since we do not allow splitting EH-block with BlockColors in
1493 // canSplitPredecessors(), we can simply assign predecessor's color to
1494 // the new block.
1495 if (!BlockColors.empty())
1496 // Grab a reference to the ColorVector to be inserted before getting the
1497 // reference to the vector we are copying because inserting the new
1498 // element in BlockColors might cause the map to be reallocated.
1499 SafetyInfo->copyColors(NewPred, PredBB);
1501 PredBBs.remove(PredBB);
1505 /// When an instruction is found to only be used outside of the loop, this
1506 /// function moves it to the exit blocks and patches up SSA form as needed.
1507 /// This method is guaranteed to remove the original instruction from its
1508 /// position, and may either delete it or move it to outside of the loop.
1510 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1511 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1512 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
1513 bool FreeInLoop) {
1514 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1515 ORE->emit([&]() {
1516 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1517 << "sinking " << ore::NV("Inst", &I);
1519 bool Changed = false;
1520 if (isa<LoadInst>(I))
1521 ++NumMovedLoads;
1522 else if (isa<CallInst>(I))
1523 ++NumMovedCalls;
1524 ++NumSunk;
1526 // Iterate over users to be ready for actual sinking. Replace users via
1527 // unrechable blocks with undef and make all user PHIs trivially replcable.
1528 SmallPtrSet<Instruction *, 8> VisitedUsers;
1529 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1530 auto *User = cast<Instruction>(*UI);
1531 Use &U = UI.getUse();
1532 ++UI;
1534 if (VisitedUsers.count(User) || CurLoop->contains(User))
1535 continue;
1537 if (!DT->isReachableFromEntry(User->getParent())) {
1538 U = UndefValue::get(I.getType());
1539 Changed = true;
1540 continue;
1543 // The user must be a PHI node.
1544 PHINode *PN = cast<PHINode>(User);
1546 // Surprisingly, instructions can be used outside of loops without any
1547 // exits. This can only happen in PHI nodes if the incoming block is
1548 // unreachable.
1549 BasicBlock *BB = PN->getIncomingBlock(U);
1550 if (!DT->isReachableFromEntry(BB)) {
1551 U = UndefValue::get(I.getType());
1552 Changed = true;
1553 continue;
1556 VisitedUsers.insert(PN);
1557 if (isTriviallyReplaceablePHI(*PN, I))
1558 continue;
1560 if (!canSplitPredecessors(PN, SafetyInfo))
1561 return Changed;
1563 // Split predecessors of the PHI so that we can make users trivially
1564 // replaceable.
1565 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1567 // Should rebuild the iterators, as they may be invalidated by
1568 // splitPredecessorsOfLoopExit().
1569 UI = I.user_begin();
1570 UE = I.user_end();
1573 if (VisitedUsers.empty())
1574 return Changed;
1576 #ifndef NDEBUG
1577 SmallVector<BasicBlock *, 32> ExitBlocks;
1578 CurLoop->getUniqueExitBlocks(ExitBlocks);
1579 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1580 ExitBlocks.end());
1581 #endif
1583 // Clones of this instruction. Don't create more than one per exit block!
1584 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1586 // If this instruction is only used outside of the loop, then all users are
1587 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1588 // the instruction.
1589 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1590 for (auto *UI : Users) {
1591 auto *User = cast<Instruction>(UI);
1593 if (CurLoop->contains(User))
1594 continue;
1596 PHINode *PN = cast<PHINode>(User);
1597 assert(ExitBlockSet.count(PN->getParent()) &&
1598 "The LCSSA PHI is not in an exit block!");
1599 // The PHI must be trivially replaceable.
1600 Instruction *New = sinkThroughTriviallyReplaceablePHI(
1601 PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1602 PN->replaceAllUsesWith(New);
1603 eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1604 Changed = true;
1606 return Changed;
1609 /// When an instruction is found to only use loop invariant operands that
1610 /// is safe to hoist, this instruction is called to do the dirty work.
1612 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1613 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1614 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
1615 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1616 << "\n");
1617 ORE->emit([&]() {
1618 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1619 << ore::NV("Inst", &I);
1622 // Metadata can be dependent on conditions we are hoisting above.
1623 // Conservatively strip all metadata on the instruction unless we were
1624 // guaranteed to execute I if we entered the loop, in which case the metadata
1625 // is valid in the loop preheader.
1626 if (I.hasMetadataOtherThanDebugLoc() &&
1627 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1628 // time in isGuaranteedToExecute if we don't actually have anything to
1629 // drop. It is a compile time optimization, not required for correctness.
1630 !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1631 I.dropUnknownNonDebugMetadata();
1633 if (isa<PHINode>(I))
1634 // Move the new node to the end of the phi list in the destination block.
1635 moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1636 else
1637 // Move the new node to the destination block, before its terminator.
1638 moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1640 // Do not retain debug locations when we are moving instructions to different
1641 // basic blocks, because we want to avoid jumpy line tables. Calls, however,
1642 // need to retain their debug locs because they may be inlined.
1643 // FIXME: How do we retain source locations without causing poor debugging
1644 // behavior?
1645 if (!isa<CallInst>(I))
1646 I.setDebugLoc(DebugLoc());
1648 if (isa<LoadInst>(I))
1649 ++NumMovedLoads;
1650 else if (isa<CallInst>(I))
1651 ++NumMovedCalls;
1652 ++NumHoisted;
1655 /// Only sink or hoist an instruction if it is not a trapping instruction,
1656 /// or if the instruction is known not to trap when moved to the preheader.
1657 /// or if it is a trapping instruction and is guaranteed to execute.
1658 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
1659 const DominatorTree *DT,
1660 const Loop *CurLoop,
1661 const LoopSafetyInfo *SafetyInfo,
1662 OptimizationRemarkEmitter *ORE,
1663 const Instruction *CtxI) {
1664 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1665 return true;
1667 bool GuaranteedToExecute =
1668 SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1670 if (!GuaranteedToExecute) {
1671 auto *LI = dyn_cast<LoadInst>(&Inst);
1672 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1673 ORE->emit([&]() {
1674 return OptimizationRemarkMissed(
1675 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1676 << "failed to hoist load with loop-invariant address "
1677 "because load is conditionally executed";
1681 return GuaranteedToExecute;
1684 namespace {
1685 class LoopPromoter : public LoadAndStorePromoter {
1686 Value *SomePtr; // Designated pointer to store to.
1687 const SmallSetVector<Value *, 8> &PointerMustAliases;
1688 SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1689 SmallVectorImpl<Instruction *> &LoopInsertPts;
1690 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1691 PredIteratorCache &PredCache;
1692 AliasSetTracker &AST;
1693 MemorySSAUpdater *MSSAU;
1694 LoopInfo &LI;
1695 DebugLoc DL;
1696 int Alignment;
1697 bool UnorderedAtomic;
1698 AAMDNodes AATags;
1699 ICFLoopSafetyInfo &SafetyInfo;
1701 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1702 if (Instruction *I = dyn_cast<Instruction>(V))
1703 if (Loop *L = LI.getLoopFor(I->getParent()))
1704 if (!L->contains(BB)) {
1705 // We need to create an LCSSA PHI node for the incoming value and
1706 // store that.
1707 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1708 I->getName() + ".lcssa", &BB->front());
1709 for (BasicBlock *Pred : PredCache.get(BB))
1710 PN->addIncoming(I, Pred);
1711 return PN;
1713 return V;
1716 public:
1717 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1718 const SmallSetVector<Value *, 8> &PMA,
1719 SmallVectorImpl<BasicBlock *> &LEB,
1720 SmallVectorImpl<Instruction *> &LIP,
1721 SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1722 AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1723 DebugLoc dl, int alignment, bool UnorderedAtomic,
1724 const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1725 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1726 LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1727 PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1728 Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1729 SafetyInfo(SafetyInfo) {}
1731 bool isInstInList(Instruction *I,
1732 const SmallVectorImpl<Instruction *> &) const override {
1733 Value *Ptr;
1734 if (LoadInst *LI = dyn_cast<LoadInst>(I))
1735 Ptr = LI->getOperand(0);
1736 else
1737 Ptr = cast<StoreInst>(I)->getPointerOperand();
1738 return PointerMustAliases.count(Ptr);
1741 void doExtraRewritesBeforeFinalDeletion() override {
1742 // Insert stores after in the loop exit blocks. Each exit block gets a
1743 // store of the live-out values that feed them. Since we've already told
1744 // the SSA updater about the defs in the loop and the preheader
1745 // definition, it is all set and we can start using it.
1746 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1747 BasicBlock *ExitBlock = LoopExitBlocks[i];
1748 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1749 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1750 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1751 Instruction *InsertPos = LoopInsertPts[i];
1752 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1753 if (UnorderedAtomic)
1754 NewSI->setOrdering(AtomicOrdering::Unordered);
1755 NewSI->setAlignment(Alignment);
1756 NewSI->setDebugLoc(DL);
1757 if (AATags)
1758 NewSI->setAAMetadata(AATags);
1760 if (MSSAU) {
1761 MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1762 MemoryAccess *NewMemAcc;
1763 if (!MSSAInsertPoint) {
1764 NewMemAcc = MSSAU->createMemoryAccessInBB(
1765 NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1766 } else {
1767 NewMemAcc =
1768 MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1770 MSSAInsertPts[i] = NewMemAcc;
1771 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1772 // FIXME: true for safety, false may still be correct.
1777 void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1778 // Update alias analysis.
1779 AST.copyValue(LI, V);
1781 void instructionDeleted(Instruction *I) const override {
1782 SafetyInfo.removeInstruction(I);
1783 AST.deleteValue(I);
1784 if (MSSAU)
1785 MSSAU->removeMemoryAccess(I);
1790 /// Return true iff we can prove that a caller of this function can not inspect
1791 /// the contents of the provided object in a well defined program.
1792 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1793 if (isa<AllocaInst>(Object))
1794 // Since the alloca goes out of scope, we know the caller can't retain a
1795 // reference to it and be well defined. Thus, we don't need to check for
1796 // capture.
1797 return true;
1799 // For all other objects we need to know that the caller can't possibly
1800 // have gotten a reference to the object. There are two components of
1801 // that:
1802 // 1) Object can't be escaped by this function. This is what
1803 // PointerMayBeCaptured checks.
1804 // 2) Object can't have been captured at definition site. For this, we
1805 // need to know the return value is noalias. At the moment, we use a
1806 // weaker condition and handle only AllocLikeFunctions (which are
1807 // known to be noalias). TODO
1808 return isAllocLikeFn(Object, TLI) &&
1809 !PointerMayBeCaptured(Object, true, true);
1812 } // namespace
1814 /// Try to promote memory values to scalars by sinking stores out of the
1815 /// loop and moving loads to before the loop. We do this by looping over
1816 /// the stores in the loop, looking for stores to Must pointers which are
1817 /// loop invariant.
1819 bool llvm::promoteLoopAccessesToScalars(
1820 const SmallSetVector<Value *, 8> &PointerMustAliases,
1821 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1822 SmallVectorImpl<Instruction *> &InsertPts,
1823 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1824 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1825 Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1826 ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1827 // Verify inputs.
1828 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1829 CurAST != nullptr && SafetyInfo != nullptr &&
1830 "Unexpected Input to promoteLoopAccessesToScalars");
1832 Value *SomePtr = *PointerMustAliases.begin();
1833 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1835 // It is not safe to promote a load/store from the loop if the load/store is
1836 // conditional. For example, turning:
1838 // for () { if (c) *P += 1; }
1840 // into:
1842 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1844 // is not safe, because *P may only be valid to access if 'c' is true.
1846 // The safety property divides into two parts:
1847 // p1) The memory may not be dereferenceable on entry to the loop. In this
1848 // case, we can't insert the required load in the preheader.
1849 // p2) The memory model does not allow us to insert a store along any dynamic
1850 // path which did not originally have one.
1852 // If at least one store is guaranteed to execute, both properties are
1853 // satisfied, and promotion is legal.
1855 // This, however, is not a necessary condition. Even if no store/load is
1856 // guaranteed to execute, we can still establish these properties.
1857 // We can establish (p1) by proving that hoisting the load into the preheader
1858 // is safe (i.e. proving dereferenceability on all paths through the loop). We
1859 // can use any access within the alias set to prove dereferenceability,
1860 // since they're all must alias.
1862 // There are two ways establish (p2):
1863 // a) Prove the location is thread-local. In this case the memory model
1864 // requirement does not apply, and stores are safe to insert.
1865 // b) Prove a store dominates every exit block. In this case, if an exit
1866 // blocks is reached, the original dynamic path would have taken us through
1867 // the store, so inserting a store into the exit block is safe. Note that this
1868 // is different from the store being guaranteed to execute. For instance,
1869 // if an exception is thrown on the first iteration of the loop, the original
1870 // store is never executed, but the exit blocks are not executed either.
1872 bool DereferenceableInPH = false;
1873 bool SafeToInsertStore = false;
1875 SmallVector<Instruction *, 64> LoopUses;
1877 // We start with an alignment of one and try to find instructions that allow
1878 // us to prove better alignment.
1879 unsigned Alignment = 1;
1880 // Keep track of which types of access we see
1881 bool SawUnorderedAtomic = false;
1882 bool SawNotAtomic = false;
1883 AAMDNodes AATags;
1885 const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1887 bool IsKnownThreadLocalObject = false;
1888 if (SafetyInfo->anyBlockMayThrow()) {
1889 // If a loop can throw, we have to insert a store along each unwind edge.
1890 // That said, we can't actually make the unwind edge explicit. Therefore,
1891 // we have to prove that the store is dead along the unwind edge. We do
1892 // this by proving that the caller can't have a reference to the object
1893 // after return and thus can't possibly load from the object.
1894 Value *Object = GetUnderlyingObject(SomePtr, MDL);
1895 if (!isKnownNonEscaping(Object, TLI))
1896 return false;
1897 // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1898 // visible to other threads if captured and used during their lifetimes.
1899 IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1902 // Check that all of the pointers in the alias set have the same type. We
1903 // cannot (yet) promote a memory location that is loaded and stored in
1904 // different sizes. While we are at it, collect alignment and AA info.
1905 for (Value *ASIV : PointerMustAliases) {
1906 // Check that all of the pointers in the alias set have the same type. We
1907 // cannot (yet) promote a memory location that is loaded and stored in
1908 // different sizes.
1909 if (SomePtr->getType() != ASIV->getType())
1910 return false;
1912 for (User *U : ASIV->users()) {
1913 // Ignore instructions that are outside the loop.
1914 Instruction *UI = dyn_cast<Instruction>(U);
1915 if (!UI || !CurLoop->contains(UI))
1916 continue;
1918 // If there is an non-load/store instruction in the loop, we can't promote
1919 // it.
1920 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1921 if (!Load->isUnordered())
1922 return false;
1924 SawUnorderedAtomic |= Load->isAtomic();
1925 SawNotAtomic |= !Load->isAtomic();
1927 if (!DereferenceableInPH)
1928 DereferenceableInPH = isSafeToExecuteUnconditionally(
1929 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
1930 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1931 // Stores *of* the pointer are not interesting, only stores *to* the
1932 // pointer.
1933 if (UI->getOperand(1) != ASIV)
1934 continue;
1935 if (!Store->isUnordered())
1936 return false;
1938 SawUnorderedAtomic |= Store->isAtomic();
1939 SawNotAtomic |= !Store->isAtomic();
1941 // If the store is guaranteed to execute, both properties are satisfied.
1942 // We may want to check if a store is guaranteed to execute even if we
1943 // already know that promotion is safe, since it may have higher
1944 // alignment than any other guaranteed stores, in which case we can
1945 // raise the alignment on the promoted store.
1946 unsigned InstAlignment = Store->getAlignment();
1947 if (!InstAlignment)
1948 InstAlignment =
1949 MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1951 if (!DereferenceableInPH || !SafeToInsertStore ||
1952 (InstAlignment > Alignment)) {
1953 if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
1954 DereferenceableInPH = true;
1955 SafeToInsertStore = true;
1956 Alignment = std::max(Alignment, InstAlignment);
1960 // If a store dominates all exit blocks, it is safe to sink.
1961 // As explained above, if an exit block was executed, a dominating
1962 // store must have been executed at least once, so we are not
1963 // introducing stores on paths that did not have them.
1964 // Note that this only looks at explicit exit blocks. If we ever
1965 // start sinking stores into unwind edges (see above), this will break.
1966 if (!SafeToInsertStore)
1967 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
1968 return DT->dominates(Store->getParent(), Exit);
1971 // If the store is not guaranteed to execute, we may still get
1972 // deref info through it.
1973 if (!DereferenceableInPH) {
1974 DereferenceableInPH = isDereferenceableAndAlignedPointer(
1975 Store->getPointerOperand(), Store->getAlignment(), MDL,
1976 Preheader->getTerminator(), DT);
1978 } else
1979 return false; // Not a load or store.
1981 // Merge the AA tags.
1982 if (LoopUses.empty()) {
1983 // On the first load/store, just take its AA tags.
1984 UI->getAAMetadata(AATags);
1985 } else if (AATags) {
1986 UI->getAAMetadata(AATags, /* Merge = */ true);
1989 LoopUses.push_back(UI);
1993 // If we found both an unordered atomic instruction and a non-atomic memory
1994 // access, bail. We can't blindly promote non-atomic to atomic since we
1995 // might not be able to lower the result. We can't downgrade since that
1996 // would violate memory model. Also, align 0 is an error for atomics.
1997 if (SawUnorderedAtomic && SawNotAtomic)
1998 return false;
2000 // If we couldn't prove we can hoist the load, bail.
2001 if (!DereferenceableInPH)
2002 return false;
2004 // We know we can hoist the load, but don't have a guaranteed store.
2005 // Check whether the location is thread-local. If it is, then we can insert
2006 // stores along paths which originally didn't have them without violating the
2007 // memory model.
2008 if (!SafeToInsertStore) {
2009 if (IsKnownThreadLocalObject)
2010 SafeToInsertStore = true;
2011 else {
2012 Value *Object = GetUnderlyingObject(SomePtr, MDL);
2013 SafeToInsertStore =
2014 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2015 !PointerMayBeCaptured(Object, true, true);
2019 // If we've still failed to prove we can sink the store, give up.
2020 if (!SafeToInsertStore)
2021 return false;
2023 // Otherwise, this is safe to promote, lets do it!
2024 LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2025 << '\n');
2026 ORE->emit([&]() {
2027 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2028 LoopUses[0])
2029 << "Moving accesses to memory location out of the loop";
2031 ++NumPromoted;
2033 // Grab a debug location for the inserted loads/stores; given that the
2034 // inserted loads/stores have little relation to the original loads/stores,
2035 // this code just arbitrarily picks a location from one, since any debug
2036 // location is better than none.
2037 DebugLoc DL = LoopUses[0]->getDebugLoc();
2039 // We use the SSAUpdater interface to insert phi nodes as required.
2040 SmallVector<PHINode *, 16> NewPHIs;
2041 SSAUpdater SSA(&NewPHIs);
2042 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2043 InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2044 Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2046 // Set up the preheader to have a definition of the value. It is the live-out
2047 // value from the preheader that uses in the loop will use.
2048 LoadInst *PreheaderLoad = new LoadInst(
2049 SomePtr->getType()->getPointerElementType(), SomePtr,
2050 SomePtr->getName() + ".promoted", Preheader->getTerminator());
2051 if (SawUnorderedAtomic)
2052 PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2053 PreheaderLoad->setAlignment(Alignment);
2054 PreheaderLoad->setDebugLoc(DL);
2055 if (AATags)
2056 PreheaderLoad->setAAMetadata(AATags);
2057 SSA.AddAvailableValue(Preheader, PreheaderLoad);
2059 MemoryAccess *PreheaderLoadMemoryAccess;
2060 if (MSSAU) {
2061 PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2062 PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2063 MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2064 MSSAU->insertUse(NewMemUse);
2067 // Rewrite all the loads in the loop and remember all the definitions from
2068 // stores in the loop.
2069 Promoter.run(LoopUses);
2071 // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2072 if (PreheaderLoad->use_empty())
2073 eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2075 return true;
2078 /// Returns an owning pointer to an alias set which incorporates aliasing info
2079 /// from L and all subloops of L.
2080 /// FIXME: In new pass manager, there is no helper function to handle loop
2081 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2082 /// from scratch for every loop. Hook up with the helper functions when
2083 /// available in the new pass manager to avoid redundant computation.
2084 std::unique_ptr<AliasSetTracker>
2085 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2086 AliasAnalysis *AA) {
2087 std::unique_ptr<AliasSetTracker> CurAST;
2088 SmallVector<Loop *, 4> RecomputeLoops;
2089 for (Loop *InnerL : L->getSubLoops()) {
2090 auto MapI = LoopToAliasSetMap.find(InnerL);
2091 // If the AST for this inner loop is missing it may have been merged into
2092 // some other loop's AST and then that loop unrolled, and so we need to
2093 // recompute it.
2094 if (MapI == LoopToAliasSetMap.end()) {
2095 RecomputeLoops.push_back(InnerL);
2096 continue;
2098 std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2100 if (CurAST) {
2101 // What if InnerLoop was modified by other passes ?
2102 // Once we've incorporated the inner loop's AST into ours, we don't need
2103 // the subloop's anymore.
2104 CurAST->add(*InnerAST);
2105 } else {
2106 CurAST = std::move(InnerAST);
2108 LoopToAliasSetMap.erase(MapI);
2110 if (!CurAST)
2111 CurAST = make_unique<AliasSetTracker>(*AA);
2113 // Add everything from the sub loops that are no longer directly available.
2114 for (Loop *InnerL : RecomputeLoops)
2115 for (BasicBlock *BB : InnerL->blocks())
2116 CurAST->add(*BB);
2118 // And merge in this loop (without anything from inner loops).
2119 for (BasicBlock *BB : L->blocks())
2120 if (LI->getLoopFor(BB) == L)
2121 CurAST->add(*BB);
2123 return CurAST;
2126 std::unique_ptr<AliasSetTracker>
2127 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2128 Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2129 auto *MSSA = MSSAU->getMemorySSA();
2130 auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
2131 CurAST->addAllInstructionsInLoopUsingMSSA();
2132 return CurAST;
2135 /// Simple analysis hook. Clone alias set info.
2137 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2138 Loop *L) {
2139 auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2140 if (ASTIt == LICM.getLoopToAliasSetMap().end())
2141 return;
2143 ASTIt->second->copyValue(From, To);
2146 /// Simple Analysis hook. Delete value V from alias set
2148 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2149 auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2150 if (ASTIt == LICM.getLoopToAliasSetMap().end())
2151 return;
2153 ASTIt->second->deleteValue(V);
2156 /// Simple Analysis hook. Delete value L from alias set map.
2158 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2159 if (!LICM.getLoopToAliasSetMap().count(L))
2160 return;
2162 LICM.getLoopToAliasSetMap().erase(L);
2165 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
2166 AliasSetTracker *CurAST, Loop *CurLoop,
2167 AliasAnalysis *AA) {
2168 // First check to see if any of the basic blocks in CurLoop invalidate *V.
2169 bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2171 if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2172 return isInvalidatedAccordingToAST;
2174 // Check with a diagnostic analysis if we can refine the information above.
2175 // This is to identify the limitations of using the AST.
2176 // The alias set mechanism used by LICM has a major weakness in that it
2177 // combines all things which may alias into a single set *before* asking
2178 // modref questions. As a result, a single readonly call within a loop will
2179 // collapse all loads and stores into a single alias set and report
2180 // invalidation if the loop contains any store. For example, readonly calls
2181 // with deopt states have this form and create a general alias set with all
2182 // loads and stores. In order to get any LICM in loops containing possible
2183 // deopt states we need a more precise invalidation of checking the mod ref
2184 // info of each instruction within the loop and LI. This has a complexity of
2185 // O(N^2), so currently, it is used only as a diagnostic tool since the
2186 // default value of LICMN2Threshold is zero.
2188 // Don't look at nested loops.
2189 if (CurLoop->begin() != CurLoop->end())
2190 return true;
2192 int N = 0;
2193 for (BasicBlock *BB : CurLoop->getBlocks())
2194 for (Instruction &I : *BB) {
2195 if (N >= LICMN2Theshold) {
2196 LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2197 << *(MemLoc.Ptr) << "\n");
2198 return true;
2200 N++;
2201 auto Res = AA->getModRefInfo(&I, MemLoc);
2202 if (isModSet(Res)) {
2203 LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2204 << *(MemLoc.Ptr) << "\n");
2205 return true;
2208 LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2209 return false;
2212 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
2213 Loop *CurLoop,
2214 int &LicmMssaOptCounter) {
2215 MemoryAccess *Source;
2216 // See declaration of LicmMssaOptCap for usage details.
2217 if (LicmMssaOptCounter >= LicmMssaOptCap)
2218 Source = MU->getDefiningAccess();
2219 else {
2220 Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2221 LicmMssaOptCounter++;
2223 return !MSSA->isLiveOnEntryDef(Source) &&
2224 CurLoop->contains(Source->getBlock());
2227 /// Little predicate that returns true if the specified basic block is in
2228 /// a subloop of the current one, not the current one itself.
2230 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2231 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2232 return LI->getLoopFor(BB) != CurLoop;