1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/GuardUtils.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/MemorySSA.h"
27 #include "llvm/Analysis/MemorySSAUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/Transforms/Utils/Local.h"
31 #include "llvm/Analysis/ValueTracking.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/PassManager.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Use.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
50 #include "llvm/Support/AtomicOrdering.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/DebugCounter.h"
54 #include "llvm/Support/RecyclingAllocator.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils/GuardUtils.h"
64 using namespace llvm::PatternMatch
;
66 #define DEBUG_TYPE "early-cse"
68 STATISTIC(NumSimplify
, "Number of instructions simplified or DCE'd");
69 STATISTIC(NumCSE
, "Number of instructions CSE'd");
70 STATISTIC(NumCSECVP
, "Number of compare instructions CVP'd");
71 STATISTIC(NumCSELoad
, "Number of load instructions CSE'd");
72 STATISTIC(NumCSECall
, "Number of call instructions CSE'd");
73 STATISTIC(NumDSE
, "Number of trivial dead stores removed");
75 DEBUG_COUNTER(CSECounter
, "early-cse",
76 "Controls which instructions are removed");
78 static cl::opt
<unsigned> EarlyCSEMssaOptCap(
79 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden
,
80 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
81 "for faster compile. Caps the MemorySSA clobbering calls."));
83 //===----------------------------------------------------------------------===//
85 //===----------------------------------------------------------------------===//
89 /// Struct representing the available values in the scoped hash table.
93 SimpleValue(Instruction
*I
) : Inst(I
) {
94 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
97 bool isSentinel() const {
98 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
99 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
102 static bool canHandle(Instruction
*Inst
) {
103 // This can only handle non-void readnone functions.
104 if (CallInst
*CI
= dyn_cast
<CallInst
>(Inst
))
105 return CI
->doesNotAccessMemory() && !CI
->getType()->isVoidTy();
106 return isa
<CastInst
>(Inst
) || isa
<BinaryOperator
>(Inst
) ||
107 isa
<GetElementPtrInst
>(Inst
) || isa
<CmpInst
>(Inst
) ||
108 isa
<SelectInst
>(Inst
) || isa
<ExtractElementInst
>(Inst
) ||
109 isa
<InsertElementInst
>(Inst
) || isa
<ShuffleVectorInst
>(Inst
) ||
110 isa
<ExtractValueInst
>(Inst
) || isa
<InsertValueInst
>(Inst
);
114 } // end anonymous namespace
118 template <> struct DenseMapInfo
<SimpleValue
> {
119 static inline SimpleValue
getEmptyKey() {
120 return DenseMapInfo
<Instruction
*>::getEmptyKey();
123 static inline SimpleValue
getTombstoneKey() {
124 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
127 static unsigned getHashValue(SimpleValue Val
);
128 static bool isEqual(SimpleValue LHS
, SimpleValue RHS
);
131 } // end namespace llvm
133 unsigned DenseMapInfo
<SimpleValue
>::getHashValue(SimpleValue Val
) {
134 Instruction
*Inst
= Val
.Inst
;
135 // Hash in all of the operands as pointers.
136 if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Inst
)) {
137 Value
*LHS
= BinOp
->getOperand(0);
138 Value
*RHS
= BinOp
->getOperand(1);
139 if (BinOp
->isCommutative() && BinOp
->getOperand(0) > BinOp
->getOperand(1))
142 return hash_combine(BinOp
->getOpcode(), LHS
, RHS
);
145 if (CmpInst
*CI
= dyn_cast
<CmpInst
>(Inst
)) {
146 Value
*LHS
= CI
->getOperand(0);
147 Value
*RHS
= CI
->getOperand(1);
148 CmpInst::Predicate Pred
= CI
->getPredicate();
149 if (Inst
->getOperand(0) > Inst
->getOperand(1)) {
151 Pred
= CI
->getSwappedPredicate();
153 return hash_combine(Inst
->getOpcode(), Pred
, LHS
, RHS
);
156 // Hash min/max/abs (cmp + select) to allow for commuted operands.
157 // Min/max may also have non-canonical compare predicate (eg, the compare for
158 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
161 SelectPatternFlavor SPF
= matchSelectPattern(Inst
, A
, B
).Flavor
;
162 // TODO: We should also detect FP min/max.
163 if (SPF
== SPF_SMIN
|| SPF
== SPF_SMAX
||
164 SPF
== SPF_UMIN
|| SPF
== SPF_UMAX
) {
167 return hash_combine(Inst
->getOpcode(), SPF
, A
, B
);
169 if (SPF
== SPF_ABS
|| SPF
== SPF_NABS
) {
170 // ABS/NABS always puts the input in A and its negation in B.
171 return hash_combine(Inst
->getOpcode(), SPF
, A
, B
);
174 if (CastInst
*CI
= dyn_cast
<CastInst
>(Inst
))
175 return hash_combine(CI
->getOpcode(), CI
->getType(), CI
->getOperand(0));
177 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(Inst
))
178 return hash_combine(EVI
->getOpcode(), EVI
->getOperand(0),
179 hash_combine_range(EVI
->idx_begin(), EVI
->idx_end()));
181 if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(Inst
))
182 return hash_combine(IVI
->getOpcode(), IVI
->getOperand(0),
184 hash_combine_range(IVI
->idx_begin(), IVI
->idx_end()));
186 assert((isa
<CallInst
>(Inst
) || isa
<BinaryOperator
>(Inst
) ||
187 isa
<GetElementPtrInst
>(Inst
) || isa
<SelectInst
>(Inst
) ||
188 isa
<ExtractElementInst
>(Inst
) || isa
<InsertElementInst
>(Inst
) ||
189 isa
<ShuffleVectorInst
>(Inst
)) &&
190 "Invalid/unknown instruction");
192 // Mix in the opcode.
195 hash_combine_range(Inst
->value_op_begin(), Inst
->value_op_end()));
198 bool DenseMapInfo
<SimpleValue
>::isEqual(SimpleValue LHS
, SimpleValue RHS
) {
199 Instruction
*LHSI
= LHS
.Inst
, *RHSI
= RHS
.Inst
;
201 if (LHS
.isSentinel() || RHS
.isSentinel())
204 if (LHSI
->getOpcode() != RHSI
->getOpcode())
206 if (LHSI
->isIdenticalToWhenDefined(RHSI
))
209 // If we're not strictly identical, we still might be a commutable instruction
210 if (BinaryOperator
*LHSBinOp
= dyn_cast
<BinaryOperator
>(LHSI
)) {
211 if (!LHSBinOp
->isCommutative())
214 assert(isa
<BinaryOperator
>(RHSI
) &&
215 "same opcode, but different instruction type?");
216 BinaryOperator
*RHSBinOp
= cast
<BinaryOperator
>(RHSI
);
219 return LHSBinOp
->getOperand(0) == RHSBinOp
->getOperand(1) &&
220 LHSBinOp
->getOperand(1) == RHSBinOp
->getOperand(0);
222 if (CmpInst
*LHSCmp
= dyn_cast
<CmpInst
>(LHSI
)) {
223 assert(isa
<CmpInst
>(RHSI
) &&
224 "same opcode, but different instruction type?");
225 CmpInst
*RHSCmp
= cast
<CmpInst
>(RHSI
);
227 return LHSCmp
->getOperand(0) == RHSCmp
->getOperand(1) &&
228 LHSCmp
->getOperand(1) == RHSCmp
->getOperand(0) &&
229 LHSCmp
->getSwappedPredicate() == RHSCmp
->getPredicate();
232 // Min/max/abs can occur with commuted operands, non-canonical predicates,
233 // and/or non-canonical operands.
235 SelectPatternFlavor LSPF
= matchSelectPattern(LHSI
, LHSA
, LHSB
).Flavor
;
236 // TODO: We should also detect FP min/max.
237 if (LSPF
== SPF_SMIN
|| LSPF
== SPF_SMAX
||
238 LSPF
== SPF_UMIN
|| LSPF
== SPF_UMAX
||
239 LSPF
== SPF_ABS
|| LSPF
== SPF_NABS
) {
241 SelectPatternFlavor RSPF
= matchSelectPattern(RHSI
, RHSA
, RHSB
).Flavor
;
243 // Abs results are placed in a defined order by matchSelectPattern.
244 if (LSPF
== SPF_ABS
|| LSPF
== SPF_NABS
)
245 return LHSA
== RHSA
&& LHSB
== RHSB
;
246 return ((LHSA
== RHSA
&& LHSB
== RHSB
) ||
247 (LHSA
== RHSB
&& LHSB
== RHSA
));
254 //===----------------------------------------------------------------------===//
256 //===----------------------------------------------------------------------===//
260 /// Struct representing the available call values in the scoped hash
265 CallValue(Instruction
*I
) : Inst(I
) {
266 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
269 bool isSentinel() const {
270 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
271 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
274 static bool canHandle(Instruction
*Inst
) {
275 // Don't value number anything that returns void.
276 if (Inst
->getType()->isVoidTy())
279 CallInst
*CI
= dyn_cast
<CallInst
>(Inst
);
280 if (!CI
|| !CI
->onlyReadsMemory())
286 } // end anonymous namespace
290 template <> struct DenseMapInfo
<CallValue
> {
291 static inline CallValue
getEmptyKey() {
292 return DenseMapInfo
<Instruction
*>::getEmptyKey();
295 static inline CallValue
getTombstoneKey() {
296 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
299 static unsigned getHashValue(CallValue Val
);
300 static bool isEqual(CallValue LHS
, CallValue RHS
);
303 } // end namespace llvm
305 unsigned DenseMapInfo
<CallValue
>::getHashValue(CallValue Val
) {
306 Instruction
*Inst
= Val
.Inst
;
307 // Hash all of the operands as pointers and mix in the opcode.
310 hash_combine_range(Inst
->value_op_begin(), Inst
->value_op_end()));
313 bool DenseMapInfo
<CallValue
>::isEqual(CallValue LHS
, CallValue RHS
) {
314 Instruction
*LHSI
= LHS
.Inst
, *RHSI
= RHS
.Inst
;
315 if (LHS
.isSentinel() || RHS
.isSentinel())
317 return LHSI
->isIdenticalTo(RHSI
);
320 //===----------------------------------------------------------------------===//
321 // EarlyCSE implementation
322 //===----------------------------------------------------------------------===//
326 /// A simple and fast domtree-based CSE pass.
328 /// This pass does a simple depth-first walk over the dominator tree,
329 /// eliminating trivially redundant instructions and using instsimplify to
330 /// canonicalize things as it goes. It is intended to be fast and catch obvious
331 /// cases so that instcombine and other passes are more effective. It is
332 /// expected that a later pass of GVN will catch the interesting/hard cases.
335 const TargetLibraryInfo
&TLI
;
336 const TargetTransformInfo
&TTI
;
339 const SimplifyQuery SQ
;
341 std::unique_ptr
<MemorySSAUpdater
> MSSAUpdater
;
344 RecyclingAllocator
<BumpPtrAllocator
,
345 ScopedHashTableVal
<SimpleValue
, Value
*>>;
347 ScopedHashTable
<SimpleValue
, Value
*, DenseMapInfo
<SimpleValue
>,
350 /// A scoped hash table of the current values of all of our simple
351 /// scalar expressions.
353 /// As we walk down the domtree, we look to see if instructions are in this:
354 /// if so, we replace them with what we find, otherwise we insert them so
355 /// that dominated values can succeed in their lookup.
356 ScopedHTType AvailableValues
;
358 /// A scoped hash table of the current values of previously encountered
359 /// memory locations.
361 /// This allows us to get efficient access to dominating loads or stores when
362 /// we have a fully redundant load. In addition to the most recent load, we
363 /// keep track of a generation count of the read, which is compared against
364 /// the current generation count. The current generation count is incremented
365 /// after every possibly writing memory operation, which ensures that we only
366 /// CSE loads with other loads that have no intervening store. Ordering
367 /// events (such as fences or atomic instructions) increment the generation
368 /// count as well; essentially, we model these as writes to all possible
369 /// locations. Note that atomic and/or volatile loads and stores can be
370 /// present the table; it is the responsibility of the consumer to inspect
371 /// the atomicity/volatility if needed.
373 Instruction
*DefInst
= nullptr;
374 unsigned Generation
= 0;
376 bool IsAtomic
= false;
378 LoadValue() = default;
379 LoadValue(Instruction
*Inst
, unsigned Generation
, unsigned MatchingId
,
381 : DefInst(Inst
), Generation(Generation
), MatchingId(MatchingId
),
382 IsAtomic(IsAtomic
) {}
385 using LoadMapAllocator
=
386 RecyclingAllocator
<BumpPtrAllocator
,
387 ScopedHashTableVal
<Value
*, LoadValue
>>;
389 ScopedHashTable
<Value
*, LoadValue
, DenseMapInfo
<Value
*>,
392 LoadHTType AvailableLoads
;
394 // A scoped hash table mapping memory locations (represented as typed
395 // addresses) to generation numbers at which that memory location became
396 // (henceforth indefinitely) invariant.
397 using InvariantMapAllocator
=
398 RecyclingAllocator
<BumpPtrAllocator
,
399 ScopedHashTableVal
<MemoryLocation
, unsigned>>;
400 using InvariantHTType
=
401 ScopedHashTable
<MemoryLocation
, unsigned, DenseMapInfo
<MemoryLocation
>,
402 InvariantMapAllocator
>;
403 InvariantHTType AvailableInvariants
;
405 /// A scoped hash table of the current values of read-only call
408 /// It uses the same generation count as loads.
410 ScopedHashTable
<CallValue
, std::pair
<Instruction
*, unsigned>>;
411 CallHTType AvailableCalls
;
413 /// This is the current generation of the memory value.
414 unsigned CurrentGeneration
= 0;
416 /// Set up the EarlyCSE runner for a particular function.
417 EarlyCSE(const DataLayout
&DL
, const TargetLibraryInfo
&TLI
,
418 const TargetTransformInfo
&TTI
, DominatorTree
&DT
,
419 AssumptionCache
&AC
, MemorySSA
*MSSA
)
420 : TLI(TLI
), TTI(TTI
), DT(DT
), AC(AC
), SQ(DL
, &TLI
, &DT
, &AC
), MSSA(MSSA
),
421 MSSAUpdater(llvm::make_unique
<MemorySSAUpdater
>(MSSA
)) {}
426 unsigned ClobberCounter
= 0;
427 // Almost a POD, but needs to call the constructors for the scoped hash
428 // tables so that a new scope gets pushed on. These are RAII so that the
429 // scope gets popped when the NodeScope is destroyed.
432 NodeScope(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
433 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
)
434 : Scope(AvailableValues
), LoadScope(AvailableLoads
),
435 InvariantScope(AvailableInvariants
), CallScope(AvailableCalls
) {}
436 NodeScope(const NodeScope
&) = delete;
437 NodeScope
&operator=(const NodeScope
&) = delete;
440 ScopedHTType::ScopeTy Scope
;
441 LoadHTType::ScopeTy LoadScope
;
442 InvariantHTType::ScopeTy InvariantScope
;
443 CallHTType::ScopeTy CallScope
;
446 // Contains all the needed information to create a stack for doing a depth
447 // first traversal of the tree. This includes scopes for values, loads, and
448 // calls as well as the generation. There is a child iterator so that the
449 // children do not need to be store separately.
452 StackNode(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
453 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
,
454 unsigned cg
, DomTreeNode
*n
, DomTreeNode::iterator child
,
455 DomTreeNode::iterator end
)
456 : CurrentGeneration(cg
), ChildGeneration(cg
), Node(n
), ChildIter(child
),
458 Scopes(AvailableValues
, AvailableLoads
, AvailableInvariants
,
461 StackNode(const StackNode
&) = delete;
462 StackNode
&operator=(const StackNode
&) = delete;
465 unsigned currentGeneration() { return CurrentGeneration
; }
466 unsigned childGeneration() { return ChildGeneration
; }
467 void childGeneration(unsigned generation
) { ChildGeneration
= generation
; }
468 DomTreeNode
*node() { return Node
; }
469 DomTreeNode::iterator
childIter() { return ChildIter
; }
471 DomTreeNode
*nextChild() {
472 DomTreeNode
*child
= *ChildIter
;
477 DomTreeNode::iterator
end() { return EndIter
; }
478 bool isProcessed() { return Processed
; }
479 void process() { Processed
= true; }
482 unsigned CurrentGeneration
;
483 unsigned ChildGeneration
;
485 DomTreeNode::iterator ChildIter
;
486 DomTreeNode::iterator EndIter
;
488 bool Processed
= false;
491 /// Wrapper class to handle memory instructions, including loads,
492 /// stores and intrinsic loads and stores defined by the target.
493 class ParseMemoryInst
{
495 ParseMemoryInst(Instruction
*Inst
, const TargetTransformInfo
&TTI
)
497 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(Inst
))
498 if (TTI
.getTgtMemIntrinsic(II
, Info
))
499 IsTargetMemInst
= true;
502 bool isLoad() const {
503 if (IsTargetMemInst
) return Info
.ReadMem
;
504 return isa
<LoadInst
>(Inst
);
507 bool isStore() const {
508 if (IsTargetMemInst
) return Info
.WriteMem
;
509 return isa
<StoreInst
>(Inst
);
512 bool isAtomic() const {
514 return Info
.Ordering
!= AtomicOrdering::NotAtomic
;
515 return Inst
->isAtomic();
518 bool isUnordered() const {
520 return Info
.isUnordered();
522 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
523 return LI
->isUnordered();
524 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
525 return SI
->isUnordered();
527 // Conservative answer
528 return !Inst
->isAtomic();
531 bool isVolatile() const {
533 return Info
.IsVolatile
;
535 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
536 return LI
->isVolatile();
537 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
538 return SI
->isVolatile();
540 // Conservative answer
544 bool isInvariantLoad() const {
545 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
546 return LI
->getMetadata(LLVMContext::MD_invariant_load
) != nullptr;
550 bool isMatchingMemLoc(const ParseMemoryInst
&Inst
) const {
551 return (getPointerOperand() == Inst
.getPointerOperand() &&
552 getMatchingId() == Inst
.getMatchingId());
555 bool isValid() const { return getPointerOperand() != nullptr; }
557 // For regular (non-intrinsic) loads/stores, this is set to -1. For
558 // intrinsic loads/stores, the id is retrieved from the corresponding
559 // field in the MemIntrinsicInfo structure. That field contains
560 // non-negative values only.
561 int getMatchingId() const {
562 if (IsTargetMemInst
) return Info
.MatchingId
;
566 Value
*getPointerOperand() const {
567 if (IsTargetMemInst
) return Info
.PtrVal
;
568 return getLoadStorePointerOperand(Inst
);
571 bool mayReadFromMemory() const {
572 if (IsTargetMemInst
) return Info
.ReadMem
;
573 return Inst
->mayReadFromMemory();
576 bool mayWriteToMemory() const {
577 if (IsTargetMemInst
) return Info
.WriteMem
;
578 return Inst
->mayWriteToMemory();
582 bool IsTargetMemInst
= false;
583 MemIntrinsicInfo Info
;
587 bool processNode(DomTreeNode
*Node
);
589 bool handleBranchCondition(Instruction
*CondInst
, const BranchInst
*BI
,
590 const BasicBlock
*BB
, const BasicBlock
*Pred
);
592 Value
*getOrCreateResult(Value
*Inst
, Type
*ExpectedType
) const {
593 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
595 if (auto *SI
= dyn_cast
<StoreInst
>(Inst
))
596 return SI
->getValueOperand();
597 assert(isa
<IntrinsicInst
>(Inst
) && "Instruction not supported");
598 return TTI
.getOrCreateResultFromMemIntrinsic(cast
<IntrinsicInst
>(Inst
),
602 /// Return true if the instruction is known to only operate on memory
603 /// provably invariant in the given "generation".
604 bool isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
);
606 bool isSameMemGeneration(unsigned EarlierGeneration
, unsigned LaterGeneration
,
607 Instruction
*EarlierInst
, Instruction
*LaterInst
);
609 void removeMSSA(Instruction
*Inst
) {
613 MSSA
->verifyMemorySSA();
614 // Removing a store here can leave MemorySSA in an unoptimized state by
615 // creating MemoryPhis that have identical arguments and by creating
616 // MemoryUses whose defining access is not an actual clobber. The phi case
617 // is handled by MemorySSA when passing OptimizePhis = true to
618 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
619 // by MemorySSA's getClobberingMemoryAccess.
620 MSSAUpdater
->removeMemoryAccess(Inst
, true);
624 } // end anonymous namespace
626 /// Determine if the memory referenced by LaterInst is from the same heap
627 /// version as EarlierInst.
628 /// This is currently called in two scenarios:
640 /// in both cases we want to verify that there are no possible writes to the
641 /// memory referenced by p between the earlier and later instruction.
642 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration
,
643 unsigned LaterGeneration
,
644 Instruction
*EarlierInst
,
645 Instruction
*LaterInst
) {
646 // Check the simple memory generation tracking first.
647 if (EarlierGeneration
== LaterGeneration
)
653 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
654 // read/write memory, then we can safely return true here.
655 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
656 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
657 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
658 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
659 // with the default optimization pipeline.
660 auto *EarlierMA
= MSSA
->getMemoryAccess(EarlierInst
);
663 auto *LaterMA
= MSSA
->getMemoryAccess(LaterInst
);
667 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
668 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
669 // EarlierInst and LaterInst and neither can any other write that potentially
670 // clobbers LaterInst.
671 MemoryAccess
*LaterDef
;
672 if (ClobberCounter
< EarlyCSEMssaOptCap
) {
673 LaterDef
= MSSA
->getWalker()->getClobberingMemoryAccess(LaterInst
);
676 LaterDef
= LaterMA
->getDefiningAccess();
678 return MSSA
->dominates(LaterDef
, EarlierMA
);
681 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
) {
682 // A location loaded from with an invariant_load is assumed to *never* change
683 // within the visible scope of the compilation.
684 if (auto *LI
= dyn_cast
<LoadInst
>(I
))
685 if (LI
->getMetadata(LLVMContext::MD_invariant_load
))
688 auto MemLocOpt
= MemoryLocation::getOrNone(I
);
690 // "target" intrinsic forms of loads aren't currently known to
691 // MemoryLocation::get. TODO
693 MemoryLocation MemLoc
= *MemLocOpt
;
694 if (!AvailableInvariants
.count(MemLoc
))
697 // Is the generation at which this became invariant older than the
699 return AvailableInvariants
.lookup(MemLoc
) <= GenAt
;
702 bool EarlyCSE::handleBranchCondition(Instruction
*CondInst
,
703 const BranchInst
*BI
, const BasicBlock
*BB
,
704 const BasicBlock
*Pred
) {
705 assert(BI
->isConditional() && "Should be a conditional branch!");
706 assert(BI
->getCondition() == CondInst
&& "Wrong condition?");
707 assert(BI
->getSuccessor(0) == BB
|| BI
->getSuccessor(1) == BB
);
708 auto *TorF
= (BI
->getSuccessor(0) == BB
)
709 ? ConstantInt::getTrue(BB
->getContext())
710 : ConstantInt::getFalse(BB
->getContext());
711 auto MatchBinOp
= [](Instruction
*I
, unsigned Opcode
) {
712 if (BinaryOperator
*BOp
= dyn_cast
<BinaryOperator
>(I
))
713 return BOp
->getOpcode() == Opcode
;
716 // If the condition is AND operation, we can propagate its operands into the
717 // true branch. If it is OR operation, we can propagate them into the false
719 unsigned PropagateOpcode
=
720 (BI
->getSuccessor(0) == BB
) ? Instruction::And
: Instruction::Or
;
722 bool MadeChanges
= false;
723 SmallVector
<Instruction
*, 4> WorkList
;
724 SmallPtrSet
<Instruction
*, 4> Visited
;
725 WorkList
.push_back(CondInst
);
726 while (!WorkList
.empty()) {
727 Instruction
*Curr
= WorkList
.pop_back_val();
729 AvailableValues
.insert(Curr
, TorF
);
730 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
731 << Curr
->getName() << "' as " << *TorF
<< " in "
732 << BB
->getName() << "\n");
733 if (!DebugCounter::shouldExecute(CSECounter
)) {
734 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
736 // Replace all dominated uses with the known value.
737 if (unsigned Count
= replaceDominatedUsesWith(Curr
, TorF
, DT
,
738 BasicBlockEdge(Pred
, BB
))) {
744 if (MatchBinOp(Curr
, PropagateOpcode
))
745 for (auto &Op
: cast
<BinaryOperator
>(Curr
)->operands())
746 if (Instruction
*OPI
= dyn_cast
<Instruction
>(Op
))
747 if (SimpleValue::canHandle(OPI
) && Visited
.insert(OPI
).second
)
748 WorkList
.push_back(OPI
);
754 bool EarlyCSE::processNode(DomTreeNode
*Node
) {
755 bool Changed
= false;
756 BasicBlock
*BB
= Node
->getBlock();
758 // If this block has a single predecessor, then the predecessor is the parent
759 // of the domtree node and all of the live out memory values are still current
760 // in this block. If this block has multiple predecessors, then they could
761 // have invalidated the live-out memory values of our parent value. For now,
762 // just be conservative and invalidate memory if this block has multiple
764 if (!BB
->getSinglePredecessor())
767 // If this node has a single predecessor which ends in a conditional branch,
768 // we can infer the value of the branch condition given that we took this
769 // path. We need the single predecessor to ensure there's not another path
770 // which reaches this block where the condition might hold a different
771 // value. Since we're adding this to the scoped hash table (like any other
772 // def), it will have been popped if we encounter a future merge block.
773 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
774 auto *BI
= dyn_cast
<BranchInst
>(Pred
->getTerminator());
775 if (BI
&& BI
->isConditional()) {
776 auto *CondInst
= dyn_cast
<Instruction
>(BI
->getCondition());
777 if (CondInst
&& SimpleValue::canHandle(CondInst
))
778 Changed
|= handleBranchCondition(CondInst
, BI
, BB
, Pred
);
782 /// LastStore - Keep track of the last non-volatile store that we saw... for
783 /// as long as there in no instruction that reads memory. If we see a store
784 /// to the same location, we delete the dead store. This zaps trivial dead
785 /// stores which can occur in bitfield code among other things.
786 Instruction
*LastStore
= nullptr;
788 // See if any instructions in the block can be eliminated. If so, do it. If
789 // not, add them to AvailableValues.
790 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
;) {
791 Instruction
*Inst
= &*I
++;
793 // Dead instructions should just be removed.
794 if (isInstructionTriviallyDead(Inst
, &TLI
)) {
795 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst
<< '\n');
796 if (!DebugCounter::shouldExecute(CSECounter
)) {
797 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
800 if (!salvageDebugInfo(*Inst
))
801 replaceDbgUsesWithUndef(Inst
);
803 Inst
->eraseFromParent();
809 // Skip assume intrinsics, they don't really have side effects (although
810 // they're marked as such to ensure preservation of control dependencies),
811 // and this pass will not bother with its removal. However, we should mark
812 // its condition as true for all dominated blocks.
813 if (match(Inst
, m_Intrinsic
<Intrinsic::assume
>())) {
815 dyn_cast
<Instruction
>(cast
<CallInst
>(Inst
)->getArgOperand(0));
816 if (CondI
&& SimpleValue::canHandle(CondI
)) {
817 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
819 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
821 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst
<< '\n');
825 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
826 if (match(Inst
, m_Intrinsic
<Intrinsic::sideeffect
>())) {
827 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst
<< '\n');
831 // We can skip all invariant.start intrinsics since they only read memory,
832 // and we can forward values across it. For invariant starts without
833 // invariant ends, we can use the fact that the invariantness never ends to
834 // start a scope in the current generaton which is true for all future
835 // generations. Also, we dont need to consume the last store since the
836 // semantics of invariant.start allow us to perform DSE of the last
837 // store, if there was a store following invariant.start. Consider:
840 // invariant.start(p)
842 // We can DSE the store to 30, since the store 40 to invariant location p
843 // causes undefined behaviour.
844 if (match(Inst
, m_Intrinsic
<Intrinsic::invariant_start
>())) {
845 // If there are any uses, the scope might end.
846 if (!Inst
->use_empty())
848 auto *CI
= cast
<CallInst
>(Inst
);
849 MemoryLocation MemLoc
= MemoryLocation::getForArgument(CI
, 1, TLI
);
850 // Don't start a scope if we already have a better one pushed
851 if (!AvailableInvariants
.count(MemLoc
))
852 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
858 dyn_cast
<Instruction
>(cast
<CallInst
>(Inst
)->getArgOperand(0))) {
859 if (SimpleValue::canHandle(CondI
)) {
860 // Do we already know the actual value of this condition?
861 if (auto *KnownCond
= AvailableValues
.lookup(CondI
)) {
862 // Is the condition known to be true?
863 if (isa
<ConstantInt
>(KnownCond
) &&
864 cast
<ConstantInt
>(KnownCond
)->isOne()) {
866 << "EarlyCSE removing guard: " << *Inst
<< '\n');
868 Inst
->eraseFromParent();
872 // Use the known value if it wasn't true.
873 cast
<CallInst
>(Inst
)->setArgOperand(0, KnownCond
);
875 // The condition we're on guarding here is true for all dominated
877 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
881 // Guard intrinsics read all memory, but don't write any memory.
882 // Accordingly, don't update the generation but consume the last store (to
883 // avoid an incorrect DSE).
888 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
889 // its simpler value.
890 if (Value
*V
= SimplifyInstruction(Inst
, SQ
)) {
891 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst
<< " to: " << *V
893 if (!DebugCounter::shouldExecute(CSECounter
)) {
894 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
897 if (!Inst
->use_empty()) {
898 Inst
->replaceAllUsesWith(V
);
901 if (isInstructionTriviallyDead(Inst
, &TLI
)) {
903 Inst
->eraseFromParent();
914 // If this is a simple instruction that we can value number, process it.
915 if (SimpleValue::canHandle(Inst
)) {
916 // See if the instruction has an available value. If so, use it.
917 if (Value
*V
= AvailableValues
.lookup(Inst
)) {
918 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst
<< " to: " << *V
920 if (!DebugCounter::shouldExecute(CSECounter
)) {
921 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
924 if (auto *I
= dyn_cast
<Instruction
>(V
))
926 Inst
->replaceAllUsesWith(V
);
928 Inst
->eraseFromParent();
934 // Otherwise, just remember that this value is available.
935 AvailableValues
.insert(Inst
, Inst
);
939 ParseMemoryInst
MemInst(Inst
, TTI
);
940 // If this is a non-volatile load, process it.
941 if (MemInst
.isValid() && MemInst
.isLoad()) {
942 // (conservatively) we can't peak past the ordering implied by this
943 // operation, but we can add this load to our set of available values
944 if (MemInst
.isVolatile() || !MemInst
.isUnordered()) {
949 if (MemInst
.isInvariantLoad()) {
950 // If we pass an invariant load, we know that memory location is
951 // indefinitely constant from the moment of first dereferenceability.
952 // We conservatively treat the invariant_load as that moment. If we
953 // pass a invariant load after already establishing a scope, don't
954 // restart it since we want to preserve the earliest point seen.
955 auto MemLoc
= MemoryLocation::get(Inst
);
956 if (!AvailableInvariants
.count(MemLoc
))
957 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
960 // If we have an available version of this load, and if it is the right
961 // generation or the load is known to be from an invariant location,
962 // replace this instruction.
964 // If either the dominating load or the current load are invariant, then
965 // we can assume the current load loads the same value as the dominating
967 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
968 if (InVal
.DefInst
!= nullptr &&
969 InVal
.MatchingId
== MemInst
.getMatchingId() &&
970 // We don't yet handle removing loads with ordering of any kind.
971 !MemInst
.isVolatile() && MemInst
.isUnordered() &&
972 // We can't replace an atomic load with one which isn't also atomic.
973 InVal
.IsAtomic
>= MemInst
.isAtomic() &&
974 (isOperatingOnInvariantMemAt(Inst
, InVal
.Generation
) ||
975 isSameMemGeneration(InVal
.Generation
, CurrentGeneration
,
976 InVal
.DefInst
, Inst
))) {
977 Value
*Op
= getOrCreateResult(InVal
.DefInst
, Inst
->getType());
979 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
980 << " to: " << *InVal
.DefInst
<< '\n');
981 if (!DebugCounter::shouldExecute(CSECounter
)) {
982 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
985 if (!Inst
->use_empty())
986 Inst
->replaceAllUsesWith(Op
);
988 Inst
->eraseFromParent();
995 // Otherwise, remember that we have this instruction.
996 AvailableLoads
.insert(
997 MemInst
.getPointerOperand(),
998 LoadValue(Inst
, CurrentGeneration
, MemInst
.getMatchingId(),
999 MemInst
.isAtomic()));
1000 LastStore
= nullptr;
1004 // If this instruction may read from memory or throw (and potentially read
1005 // from memory in the exception handler), forget LastStore. Load/store
1006 // intrinsics will indicate both a read and a write to memory. The target
1007 // may override this (e.g. so that a store intrinsic does not read from
1008 // memory, and thus will be treated the same as a regular store for
1009 // commoning purposes).
1010 if ((Inst
->mayReadFromMemory() || Inst
->mayThrow()) &&
1011 !(MemInst
.isValid() && !MemInst
.mayReadFromMemory()))
1012 LastStore
= nullptr;
1014 // If this is a read-only call, process it.
1015 if (CallValue::canHandle(Inst
)) {
1016 // If we have an available version of this call, and if it is the right
1017 // generation, replace this instruction.
1018 std::pair
<Instruction
*, unsigned> InVal
= AvailableCalls
.lookup(Inst
);
1019 if (InVal
.first
!= nullptr &&
1020 isSameMemGeneration(InVal
.second
, CurrentGeneration
, InVal
.first
,
1022 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1023 << " to: " << *InVal
.first
<< '\n');
1024 if (!DebugCounter::shouldExecute(CSECounter
)) {
1025 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1028 if (!Inst
->use_empty())
1029 Inst
->replaceAllUsesWith(InVal
.first
);
1031 Inst
->eraseFromParent();
1037 // Otherwise, remember that we have this instruction.
1038 AvailableCalls
.insert(
1039 Inst
, std::pair
<Instruction
*, unsigned>(Inst
, CurrentGeneration
));
1043 // A release fence requires that all stores complete before it, but does
1044 // not prevent the reordering of following loads 'before' the fence. As a
1045 // result, we don't need to consider it as writing to memory and don't need
1046 // to advance the generation. We do need to prevent DSE across the fence,
1047 // but that's handled above.
1048 if (FenceInst
*FI
= dyn_cast
<FenceInst
>(Inst
))
1049 if (FI
->getOrdering() == AtomicOrdering::Release
) {
1050 assert(Inst
->mayReadFromMemory() && "relied on to prevent DSE above");
1054 // write back DSE - If we write back the same value we just loaded from
1055 // the same location and haven't passed any intervening writes or ordering
1056 // operations, we can remove the write. The primary benefit is in allowing
1057 // the available load table to remain valid and value forward past where
1058 // the store originally was.
1059 if (MemInst
.isValid() && MemInst
.isStore()) {
1060 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1061 if (InVal
.DefInst
&&
1062 InVal
.DefInst
== getOrCreateResult(Inst
, InVal
.DefInst
->getType()) &&
1063 InVal
.MatchingId
== MemInst
.getMatchingId() &&
1064 // We don't yet handle removing stores with ordering of any kind.
1065 !MemInst
.isVolatile() && MemInst
.isUnordered() &&
1066 (isOperatingOnInvariantMemAt(Inst
, InVal
.Generation
) ||
1067 isSameMemGeneration(InVal
.Generation
, CurrentGeneration
,
1068 InVal
.DefInst
, Inst
))) {
1069 // It is okay to have a LastStore to a different pointer here if MemorySSA
1070 // tells us that the load and store are from the same memory generation.
1071 // In that case, LastStore should keep its present value since we're
1072 // removing the current store.
1073 assert((!LastStore
||
1074 ParseMemoryInst(LastStore
, TTI
).getPointerOperand() ==
1075 MemInst
.getPointerOperand() ||
1077 "can't have an intervening store if not using MemorySSA!");
1078 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst
<< '\n');
1079 if (!DebugCounter::shouldExecute(CSECounter
)) {
1080 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1084 Inst
->eraseFromParent();
1087 // We can avoid incrementing the generation count since we were able
1088 // to eliminate this store.
1093 // Okay, this isn't something we can CSE at all. Check to see if it is
1094 // something that could modify memory. If so, our available memory values
1095 // cannot be used so bump the generation count.
1096 if (Inst
->mayWriteToMemory()) {
1097 ++CurrentGeneration
;
1099 if (MemInst
.isValid() && MemInst
.isStore()) {
1100 // We do a trivial form of DSE if there are two stores to the same
1101 // location with no intervening loads. Delete the earlier store.
1102 // At the moment, we don't remove ordered stores, but do remove
1103 // unordered atomic stores. There's no special requirement (for
1104 // unordered atomics) about removing atomic stores only in favor of
1105 // other atomic stores since we we're going to execute the non-atomic
1106 // one anyway and the atomic one might never have become visible.
1108 ParseMemoryInst
LastStoreMemInst(LastStore
, TTI
);
1109 assert(LastStoreMemInst
.isUnordered() &&
1110 !LastStoreMemInst
.isVolatile() &&
1111 "Violated invariant");
1112 if (LastStoreMemInst
.isMatchingMemLoc(MemInst
)) {
1113 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1114 << " due to: " << *Inst
<< '\n');
1115 if (!DebugCounter::shouldExecute(CSECounter
)) {
1116 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1118 removeMSSA(LastStore
);
1119 LastStore
->eraseFromParent();
1122 LastStore
= nullptr;
1125 // fallthrough - we can exploit information about this store
1128 // Okay, we just invalidated anything we knew about loaded values. Try
1129 // to salvage *something* by remembering that the stored value is a live
1130 // version of the pointer. It is safe to forward from volatile stores
1131 // to non-volatile loads, so we don't have to check for volatility of
1133 AvailableLoads
.insert(
1134 MemInst
.getPointerOperand(),
1135 LoadValue(Inst
, CurrentGeneration
, MemInst
.getMatchingId(),
1136 MemInst
.isAtomic()));
1138 // Remember that this was the last unordered store we saw for DSE. We
1139 // don't yet handle DSE on ordered or volatile stores since we don't
1140 // have a good way to model the ordering requirement for following
1141 // passes once the store is removed. We could insert a fence, but
1142 // since fences are slightly stronger than stores in their ordering,
1143 // it's not clear this is a profitable transform. Another option would
1144 // be to merge the ordering with that of the post dominating store.
1145 if (MemInst
.isUnordered() && !MemInst
.isVolatile())
1148 LastStore
= nullptr;
1156 bool EarlyCSE::run() {
1157 // Note, deque is being used here because there is significant performance
1158 // gains over vector when the container becomes very large due to the
1159 // specific access patterns. For more information see the mailing list
1160 // discussion on this:
1161 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1162 std::deque
<StackNode
*> nodesToProcess
;
1164 bool Changed
= false;
1166 // Process the root node.
1167 nodesToProcess
.push_back(new StackNode(
1168 AvailableValues
, AvailableLoads
, AvailableInvariants
, AvailableCalls
,
1169 CurrentGeneration
, DT
.getRootNode(),
1170 DT
.getRootNode()->begin(), DT
.getRootNode()->end()));
1172 // Save the current generation.
1173 unsigned LiveOutGeneration
= CurrentGeneration
;
1175 // Process the stack.
1176 while (!nodesToProcess
.empty()) {
1177 // Grab the first item off the stack. Set the current generation, remove
1178 // the node from the stack, and process it.
1179 StackNode
*NodeToProcess
= nodesToProcess
.back();
1181 // Initialize class members.
1182 CurrentGeneration
= NodeToProcess
->currentGeneration();
1184 // Check if the node needs to be processed.
1185 if (!NodeToProcess
->isProcessed()) {
1186 // Process the node.
1187 Changed
|= processNode(NodeToProcess
->node());
1188 NodeToProcess
->childGeneration(CurrentGeneration
);
1189 NodeToProcess
->process();
1190 } else if (NodeToProcess
->childIter() != NodeToProcess
->end()) {
1191 // Push the next child onto the stack.
1192 DomTreeNode
*child
= NodeToProcess
->nextChild();
1193 nodesToProcess
.push_back(
1194 new StackNode(AvailableValues
, AvailableLoads
, AvailableInvariants
,
1195 AvailableCalls
, NodeToProcess
->childGeneration(),
1196 child
, child
->begin(), child
->end()));
1198 // It has been processed, and there are no more children to process,
1199 // so delete it and pop it off the stack.
1200 delete NodeToProcess
;
1201 nodesToProcess
.pop_back();
1203 } // while (!nodes...)
1205 // Reset the current generation.
1206 CurrentGeneration
= LiveOutGeneration
;
1211 PreservedAnalyses
EarlyCSEPass::run(Function
&F
,
1212 FunctionAnalysisManager
&AM
) {
1213 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
1214 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
1215 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1216 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
1218 UseMemorySSA
? &AM
.getResult
<MemorySSAAnalysis
>(F
).getMSSA() : nullptr;
1220 EarlyCSE
CSE(F
.getParent()->getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1223 return PreservedAnalyses::all();
1225 PreservedAnalyses PA
;
1226 PA
.preserveSet
<CFGAnalyses
>();
1227 PA
.preserve
<GlobalsAA
>();
1229 PA
.preserve
<MemorySSAAnalysis
>();
1235 /// A simple and fast domtree-based CSE pass.
1237 /// This pass does a simple depth-first walk over the dominator tree,
1238 /// eliminating trivially redundant instructions and using instsimplify to
1239 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1240 /// cases so that instcombine and other passes are more effective. It is
1241 /// expected that a later pass of GVN will catch the interesting/hard cases.
1242 template<bool UseMemorySSA
>
1243 class EarlyCSELegacyCommonPass
: public FunctionPass
{
1247 EarlyCSELegacyCommonPass() : FunctionPass(ID
) {
1249 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1251 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1254 bool runOnFunction(Function
&F
) override
{
1255 if (skipFunction(F
))
1258 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI();
1259 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
1260 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1261 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
1263 UseMemorySSA
? &getAnalysis
<MemorySSAWrapperPass
>().getMSSA() : nullptr;
1265 EarlyCSE
CSE(F
.getParent()->getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1270 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1271 AU
.addRequired
<AssumptionCacheTracker
>();
1272 AU
.addRequired
<DominatorTreeWrapperPass
>();
1273 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1274 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1276 AU
.addRequired
<MemorySSAWrapperPass
>();
1277 AU
.addPreserved
<MemorySSAWrapperPass
>();
1279 AU
.addPreserved
<GlobalsAAWrapperPass
>();
1280 AU
.setPreservesCFG();
1284 } // end anonymous namespace
1286 using EarlyCSELegacyPass
= EarlyCSELegacyCommonPass
</*UseMemorySSA=*/false>;
1289 char EarlyCSELegacyPass::ID
= 0;
1291 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass
, "early-cse", "Early CSE", false,
1293 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1294 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1295 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1296 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1297 INITIALIZE_PASS_END(EarlyCSELegacyPass
, "early-cse", "Early CSE", false, false)
1299 using EarlyCSEMemSSALegacyPass
=
1300 EarlyCSELegacyCommonPass
</*UseMemorySSA=*/true>;
1303 char EarlyCSEMemSSALegacyPass::ID
= 0;
1305 FunctionPass
*llvm::createEarlyCSEPass(bool UseMemorySSA
) {
1307 return new EarlyCSEMemSSALegacyPass();
1309 return new EarlyCSELegacyPass();
1312 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1313 "Early CSE w/ MemorySSA", false, false)
1314 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1315 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1316 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1317 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1318 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass
)
1319 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1320 "Early CSE w/ MemorySSA", false, false)