1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/GuardUtils.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/MemorySSA.h"
27 #include "llvm/Analysis/MemorySSAUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/Analysis/ValueTracking.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/PassManager.h"
43 #include "llvm/IR/PatternMatch.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Use.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
50 #include "llvm/Support/AtomicOrdering.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/DebugCounter.h"
54 #include "llvm/Support/RecyclingAllocator.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
58 #include "llvm/Transforms/Utils/GuardUtils.h"
59 #include "llvm/Transforms/Utils/Local.h"
66 using namespace llvm::PatternMatch
;
68 #define DEBUG_TYPE "early-cse"
70 STATISTIC(NumSimplify
, "Number of instructions simplified or DCE'd");
71 STATISTIC(NumCSE
, "Number of instructions CSE'd");
72 STATISTIC(NumCSECVP
, "Number of compare instructions CVP'd");
73 STATISTIC(NumCSELoad
, "Number of load instructions CSE'd");
74 STATISTIC(NumCSECall
, "Number of call instructions CSE'd");
75 STATISTIC(NumDSE
, "Number of trivial dead stores removed");
77 DEBUG_COUNTER(CSECounter
, "early-cse",
78 "Controls which instructions are removed");
80 static cl::opt
<unsigned> EarlyCSEMssaOptCap(
81 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden
,
82 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
83 "for faster compile. Caps the MemorySSA clobbering calls."));
85 static cl::opt
<bool> EarlyCSEDebugHash(
86 "earlycse-debug-hash", cl::init(false), cl::Hidden
,
87 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
88 "function is well-behaved w.r.t. its isEqual predicate"));
90 //===----------------------------------------------------------------------===//
92 //===----------------------------------------------------------------------===//
96 /// Struct representing the available values in the scoped hash table.
100 SimpleValue(Instruction
*I
) : Inst(I
) {
101 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
104 bool isSentinel() const {
105 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
106 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
109 static bool canHandle(Instruction
*Inst
) {
110 // This can only handle non-void readnone functions.
111 // Also handled are constrained intrinsic that look like the types
112 // of instruction handled below (UnaryOperator, etc.).
113 if (CallInst
*CI
= dyn_cast
<CallInst
>(Inst
)) {
114 if (Function
*F
= CI
->getCalledFunction()) {
115 switch ((Intrinsic::ID
)F
->getIntrinsicID()) {
116 case Intrinsic::experimental_constrained_fadd
:
117 case Intrinsic::experimental_constrained_fsub
:
118 case Intrinsic::experimental_constrained_fmul
:
119 case Intrinsic::experimental_constrained_fdiv
:
120 case Intrinsic::experimental_constrained_frem
:
121 case Intrinsic::experimental_constrained_fptosi
:
122 case Intrinsic::experimental_constrained_sitofp
:
123 case Intrinsic::experimental_constrained_fptoui
:
124 case Intrinsic::experimental_constrained_uitofp
:
125 case Intrinsic::experimental_constrained_fcmp
:
126 case Intrinsic::experimental_constrained_fcmps
: {
127 auto *CFP
= cast
<ConstrainedFPIntrinsic
>(CI
);
128 return CFP
->isDefaultFPEnvironment();
132 return CI
->doesNotAccessMemory() && !CI
->getType()->isVoidTy();
134 return isa
<CastInst
>(Inst
) || isa
<UnaryOperator
>(Inst
) ||
135 isa
<BinaryOperator
>(Inst
) || isa
<GetElementPtrInst
>(Inst
) ||
136 isa
<CmpInst
>(Inst
) || isa
<SelectInst
>(Inst
) ||
137 isa
<ExtractElementInst
>(Inst
) || isa
<InsertElementInst
>(Inst
) ||
138 isa
<ShuffleVectorInst
>(Inst
) || isa
<ExtractValueInst
>(Inst
) ||
139 isa
<InsertValueInst
>(Inst
) || isa
<FreezeInst
>(Inst
);
143 } // end anonymous namespace
147 template <> struct DenseMapInfo
<SimpleValue
> {
148 static inline SimpleValue
getEmptyKey() {
149 return DenseMapInfo
<Instruction
*>::getEmptyKey();
152 static inline SimpleValue
getTombstoneKey() {
153 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
156 static unsigned getHashValue(SimpleValue Val
);
157 static bool isEqual(SimpleValue LHS
, SimpleValue RHS
);
160 } // end namespace llvm
162 /// Match a 'select' including an optional 'not's of the condition.
163 static bool matchSelectWithOptionalNotCond(Value
*V
, Value
*&Cond
, Value
*&A
,
165 SelectPatternFlavor
&Flavor
) {
166 // Return false if V is not even a select.
167 if (!match(V
, m_Select(m_Value(Cond
), m_Value(A
), m_Value(B
))))
170 // Look through a 'not' of the condition operand by swapping A/B.
172 if (match(Cond
, m_Not(m_Value(CondNot
)))) {
177 // Match canonical forms of min/max. We are not using ValueTracking's
178 // more powerful matchSelectPattern() because it may rely on instruction flags
179 // such as "nsw". That would be incompatible with the current hashing
180 // mechanism that may remove flags to increase the likelihood of CSE.
182 Flavor
= SPF_UNKNOWN
;
183 CmpInst::Predicate Pred
;
185 if (!match(Cond
, m_ICmp(Pred
, m_Specific(A
), m_Specific(B
)))) {
186 // Check for commuted variants of min/max by swapping predicate.
187 // If we do not match the standard or commuted patterns, this is not a
188 // recognized form of min/max, but it is still a select, so return true.
189 if (!match(Cond
, m_ICmp(Pred
, m_Specific(B
), m_Specific(A
))))
191 Pred
= ICmpInst::getSwappedPredicate(Pred
);
195 case CmpInst::ICMP_UGT
: Flavor
= SPF_UMAX
; break;
196 case CmpInst::ICMP_ULT
: Flavor
= SPF_UMIN
; break;
197 case CmpInst::ICMP_SGT
: Flavor
= SPF_SMAX
; break;
198 case CmpInst::ICMP_SLT
: Flavor
= SPF_SMIN
; break;
199 // Non-strict inequalities.
200 case CmpInst::ICMP_ULE
: Flavor
= SPF_UMIN
; break;
201 case CmpInst::ICMP_UGE
: Flavor
= SPF_UMAX
; break;
202 case CmpInst::ICMP_SLE
: Flavor
= SPF_SMIN
; break;
203 case CmpInst::ICMP_SGE
: Flavor
= SPF_SMAX
; break;
210 static unsigned getHashValueImpl(SimpleValue Val
) {
211 Instruction
*Inst
= Val
.Inst
;
212 // Hash in all of the operands as pointers.
213 if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Inst
)) {
214 Value
*LHS
= BinOp
->getOperand(0);
215 Value
*RHS
= BinOp
->getOperand(1);
216 if (BinOp
->isCommutative() && BinOp
->getOperand(0) > BinOp
->getOperand(1))
219 return hash_combine(BinOp
->getOpcode(), LHS
, RHS
);
222 if (CmpInst
*CI
= dyn_cast
<CmpInst
>(Inst
)) {
223 // Compares can be commuted by swapping the comparands and
224 // updating the predicate. Choose the form that has the
225 // comparands in sorted order, or in the case of a tie, the
226 // one with the lower predicate.
227 Value
*LHS
= CI
->getOperand(0);
228 Value
*RHS
= CI
->getOperand(1);
229 CmpInst::Predicate Pred
= CI
->getPredicate();
230 CmpInst::Predicate SwappedPred
= CI
->getSwappedPredicate();
231 if (std::tie(LHS
, Pred
) > std::tie(RHS
, SwappedPred
)) {
235 return hash_combine(Inst
->getOpcode(), Pred
, LHS
, RHS
);
238 // Hash general selects to allow matching commuted true/false operands.
239 SelectPatternFlavor SPF
;
241 if (matchSelectWithOptionalNotCond(Inst
, Cond
, A
, B
, SPF
)) {
242 // Hash min/max (cmp + select) to allow for commuted operands.
243 // Min/max may also have non-canonical compare predicate (eg, the compare for
244 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
246 // TODO: We should also detect FP min/max.
247 if (SPF
== SPF_SMIN
|| SPF
== SPF_SMAX
||
248 SPF
== SPF_UMIN
|| SPF
== SPF_UMAX
) {
251 return hash_combine(Inst
->getOpcode(), SPF
, A
, B
);
254 // Hash general selects to allow matching commuted true/false operands.
256 // If we do not have a compare as the condition, just hash in the condition.
257 CmpInst::Predicate Pred
;
259 if (!match(Cond
, m_Cmp(Pred
, m_Value(X
), m_Value(Y
))))
260 return hash_combine(Inst
->getOpcode(), Cond
, A
, B
);
262 // Similar to cmp normalization (above) - canonicalize the predicate value:
263 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
264 if (CmpInst::getInversePredicate(Pred
) < Pred
) {
265 Pred
= CmpInst::getInversePredicate(Pred
);
268 return hash_combine(Inst
->getOpcode(), Pred
, X
, Y
, A
, B
);
271 if (CastInst
*CI
= dyn_cast
<CastInst
>(Inst
))
272 return hash_combine(CI
->getOpcode(), CI
->getType(), CI
->getOperand(0));
274 if (FreezeInst
*FI
= dyn_cast
<FreezeInst
>(Inst
))
275 return hash_combine(FI
->getOpcode(), FI
->getOperand(0));
277 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(Inst
))
278 return hash_combine(EVI
->getOpcode(), EVI
->getOperand(0),
279 hash_combine_range(EVI
->idx_begin(), EVI
->idx_end()));
281 if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(Inst
))
282 return hash_combine(IVI
->getOpcode(), IVI
->getOperand(0),
284 hash_combine_range(IVI
->idx_begin(), IVI
->idx_end()));
286 assert((isa
<CallInst
>(Inst
) || isa
<GetElementPtrInst
>(Inst
) ||
287 isa
<ExtractElementInst
>(Inst
) || isa
<InsertElementInst
>(Inst
) ||
288 isa
<ShuffleVectorInst
>(Inst
) || isa
<UnaryOperator
>(Inst
) ||
289 isa
<FreezeInst
>(Inst
)) &&
290 "Invalid/unknown instruction");
292 // Handle intrinsics with commutative operands.
293 // TODO: Extend this to handle intrinsics with >2 operands where the 1st
294 // 2 operands are commutative.
295 auto *II
= dyn_cast
<IntrinsicInst
>(Inst
);
296 if (II
&& II
->isCommutative() && II
->getNumArgOperands() == 2) {
297 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
300 return hash_combine(II
->getOpcode(), LHS
, RHS
);
303 // gc.relocate is 'special' call: its second and third operands are
304 // not real values, but indices into statepoint's argument list.
305 // Get values they point to.
306 if (const GCRelocateInst
*GCR
= dyn_cast
<GCRelocateInst
>(Inst
))
307 return hash_combine(GCR
->getOpcode(), GCR
->getOperand(0),
308 GCR
->getBasePtr(), GCR
->getDerivedPtr());
310 // Mix in the opcode.
313 hash_combine_range(Inst
->value_op_begin(), Inst
->value_op_end()));
316 unsigned DenseMapInfo
<SimpleValue
>::getHashValue(SimpleValue Val
) {
318 // If -earlycse-debug-hash was specified, return a constant -- this
319 // will force all hashing to collide, so we'll exhaustively search
320 // the table for a match, and the assertion in isEqual will fire if
321 // there's a bug causing equal keys to hash differently.
322 if (EarlyCSEDebugHash
)
325 return getHashValueImpl(Val
);
328 static bool isEqualImpl(SimpleValue LHS
, SimpleValue RHS
) {
329 Instruction
*LHSI
= LHS
.Inst
, *RHSI
= RHS
.Inst
;
331 if (LHS
.isSentinel() || RHS
.isSentinel())
334 if (LHSI
->getOpcode() != RHSI
->getOpcode())
336 if (LHSI
->isIdenticalToWhenDefined(RHSI
))
339 // If we're not strictly identical, we still might be a commutable instruction
340 if (BinaryOperator
*LHSBinOp
= dyn_cast
<BinaryOperator
>(LHSI
)) {
341 if (!LHSBinOp
->isCommutative())
344 assert(isa
<BinaryOperator
>(RHSI
) &&
345 "same opcode, but different instruction type?");
346 BinaryOperator
*RHSBinOp
= cast
<BinaryOperator
>(RHSI
);
349 return LHSBinOp
->getOperand(0) == RHSBinOp
->getOperand(1) &&
350 LHSBinOp
->getOperand(1) == RHSBinOp
->getOperand(0);
352 if (CmpInst
*LHSCmp
= dyn_cast
<CmpInst
>(LHSI
)) {
353 assert(isa
<CmpInst
>(RHSI
) &&
354 "same opcode, but different instruction type?");
355 CmpInst
*RHSCmp
= cast
<CmpInst
>(RHSI
);
357 return LHSCmp
->getOperand(0) == RHSCmp
->getOperand(1) &&
358 LHSCmp
->getOperand(1) == RHSCmp
->getOperand(0) &&
359 LHSCmp
->getSwappedPredicate() == RHSCmp
->getPredicate();
362 // TODO: Extend this for >2 args by matching the trailing N-2 args.
363 auto *LII
= dyn_cast
<IntrinsicInst
>(LHSI
);
364 auto *RII
= dyn_cast
<IntrinsicInst
>(RHSI
);
365 if (LII
&& RII
&& LII
->getIntrinsicID() == RII
->getIntrinsicID() &&
366 LII
->isCommutative() && LII
->getNumArgOperands() == 2) {
367 return LII
->getArgOperand(0) == RII
->getArgOperand(1) &&
368 LII
->getArgOperand(1) == RII
->getArgOperand(0);
371 // See comment above in `getHashValue()`.
372 if (const GCRelocateInst
*GCR1
= dyn_cast
<GCRelocateInst
>(LHSI
))
373 if (const GCRelocateInst
*GCR2
= dyn_cast
<GCRelocateInst
>(RHSI
))
374 return GCR1
->getOperand(0) == GCR2
->getOperand(0) &&
375 GCR1
->getBasePtr() == GCR2
->getBasePtr() &&
376 GCR1
->getDerivedPtr() == GCR2
->getDerivedPtr();
378 // Min/max can occur with commuted operands, non-canonical predicates,
379 // and/or non-canonical operands.
380 // Selects can be non-trivially equivalent via inverted conditions and swaps.
381 SelectPatternFlavor LSPF
, RSPF
;
382 Value
*CondL
, *CondR
, *LHSA
, *RHSA
, *LHSB
, *RHSB
;
383 if (matchSelectWithOptionalNotCond(LHSI
, CondL
, LHSA
, LHSB
, LSPF
) &&
384 matchSelectWithOptionalNotCond(RHSI
, CondR
, RHSA
, RHSB
, RSPF
)) {
386 // TODO: We should also detect FP min/max.
387 if (LSPF
== SPF_SMIN
|| LSPF
== SPF_SMAX
||
388 LSPF
== SPF_UMIN
|| LSPF
== SPF_UMAX
)
389 return ((LHSA
== RHSA
&& LHSB
== RHSB
) ||
390 (LHSA
== RHSB
&& LHSB
== RHSA
));
392 // select Cond, A, B <--> select not(Cond), B, A
393 if (CondL
== CondR
&& LHSA
== RHSA
&& LHSB
== RHSB
)
397 // If the true/false operands are swapped and the conditions are compares
398 // with inverted predicates, the selects are equal:
399 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
401 // This also handles patterns with a double-negation in the sense of not +
402 // inverse, because we looked through a 'not' in the matching function and
404 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
406 // This intentionally does NOT handle patterns with a double-negation in
407 // the sense of not + not, because doing so could result in values
409 // as equal that hash differently in the min/max cases like:
410 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
411 // ^ hashes as min ^ would not hash as min
412 // In the context of the EarlyCSE pass, however, such cases never reach
413 // this code, as we simplify the double-negation before hashing the second
414 // select (and so still succeed at CSEing them).
415 if (LHSA
== RHSB
&& LHSB
== RHSA
) {
416 CmpInst::Predicate PredL
, PredR
;
418 if (match(CondL
, m_Cmp(PredL
, m_Value(X
), m_Value(Y
))) &&
419 match(CondR
, m_Cmp(PredR
, m_Specific(X
), m_Specific(Y
))) &&
420 CmpInst::getInversePredicate(PredL
) == PredR
)
428 bool DenseMapInfo
<SimpleValue
>::isEqual(SimpleValue LHS
, SimpleValue RHS
) {
429 // These comparisons are nontrivial, so assert that equality implies
430 // hash equality (DenseMap demands this as an invariant).
431 bool Result
= isEqualImpl(LHS
, RHS
);
432 assert(!Result
|| (LHS
.isSentinel() && LHS
.Inst
== RHS
.Inst
) ||
433 getHashValueImpl(LHS
) == getHashValueImpl(RHS
));
437 //===----------------------------------------------------------------------===//
439 //===----------------------------------------------------------------------===//
443 /// Struct representing the available call values in the scoped hash
448 CallValue(Instruction
*I
) : Inst(I
) {
449 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
452 bool isSentinel() const {
453 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
454 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
457 static bool canHandle(Instruction
*Inst
) {
458 // Don't value number anything that returns void.
459 if (Inst
->getType()->isVoidTy())
462 CallInst
*CI
= dyn_cast
<CallInst
>(Inst
);
463 if (!CI
|| !CI
->onlyReadsMemory())
469 } // end anonymous namespace
473 template <> struct DenseMapInfo
<CallValue
> {
474 static inline CallValue
getEmptyKey() {
475 return DenseMapInfo
<Instruction
*>::getEmptyKey();
478 static inline CallValue
getTombstoneKey() {
479 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
482 static unsigned getHashValue(CallValue Val
);
483 static bool isEqual(CallValue LHS
, CallValue RHS
);
486 } // end namespace llvm
488 unsigned DenseMapInfo
<CallValue
>::getHashValue(CallValue Val
) {
489 Instruction
*Inst
= Val
.Inst
;
491 // Hash all of the operands as pointers and mix in the opcode.
494 hash_combine_range(Inst
->value_op_begin(), Inst
->value_op_end()));
497 bool DenseMapInfo
<CallValue
>::isEqual(CallValue LHS
, CallValue RHS
) {
498 Instruction
*LHSI
= LHS
.Inst
, *RHSI
= RHS
.Inst
;
499 if (LHS
.isSentinel() || RHS
.isSentinel())
502 return LHSI
->isIdenticalTo(RHSI
);
505 //===----------------------------------------------------------------------===//
506 // EarlyCSE implementation
507 //===----------------------------------------------------------------------===//
511 /// A simple and fast domtree-based CSE pass.
513 /// This pass does a simple depth-first walk over the dominator tree,
514 /// eliminating trivially redundant instructions and using instsimplify to
515 /// canonicalize things as it goes. It is intended to be fast and catch obvious
516 /// cases so that instcombine and other passes are more effective. It is
517 /// expected that a later pass of GVN will catch the interesting/hard cases.
520 const TargetLibraryInfo
&TLI
;
521 const TargetTransformInfo
&TTI
;
524 const SimplifyQuery SQ
;
526 std::unique_ptr
<MemorySSAUpdater
> MSSAUpdater
;
529 RecyclingAllocator
<BumpPtrAllocator
,
530 ScopedHashTableVal
<SimpleValue
, Value
*>>;
532 ScopedHashTable
<SimpleValue
, Value
*, DenseMapInfo
<SimpleValue
>,
535 /// A scoped hash table of the current values of all of our simple
536 /// scalar expressions.
538 /// As we walk down the domtree, we look to see if instructions are in this:
539 /// if so, we replace them with what we find, otherwise we insert them so
540 /// that dominated values can succeed in their lookup.
541 ScopedHTType AvailableValues
;
543 /// A scoped hash table of the current values of previously encountered
544 /// memory locations.
546 /// This allows us to get efficient access to dominating loads or stores when
547 /// we have a fully redundant load. In addition to the most recent load, we
548 /// keep track of a generation count of the read, which is compared against
549 /// the current generation count. The current generation count is incremented
550 /// after every possibly writing memory operation, which ensures that we only
551 /// CSE loads with other loads that have no intervening store. Ordering
552 /// events (such as fences or atomic instructions) increment the generation
553 /// count as well; essentially, we model these as writes to all possible
554 /// locations. Note that atomic and/or volatile loads and stores can be
555 /// present the table; it is the responsibility of the consumer to inspect
556 /// the atomicity/volatility if needed.
558 Instruction
*DefInst
= nullptr;
559 unsigned Generation
= 0;
561 bool IsAtomic
= false;
563 LoadValue() = default;
564 LoadValue(Instruction
*Inst
, unsigned Generation
, unsigned MatchingId
,
566 : DefInst(Inst
), Generation(Generation
), MatchingId(MatchingId
),
567 IsAtomic(IsAtomic
) {}
570 using LoadMapAllocator
=
571 RecyclingAllocator
<BumpPtrAllocator
,
572 ScopedHashTableVal
<Value
*, LoadValue
>>;
574 ScopedHashTable
<Value
*, LoadValue
, DenseMapInfo
<Value
*>,
577 LoadHTType AvailableLoads
;
579 // A scoped hash table mapping memory locations (represented as typed
580 // addresses) to generation numbers at which that memory location became
581 // (henceforth indefinitely) invariant.
582 using InvariantMapAllocator
=
583 RecyclingAllocator
<BumpPtrAllocator
,
584 ScopedHashTableVal
<MemoryLocation
, unsigned>>;
585 using InvariantHTType
=
586 ScopedHashTable
<MemoryLocation
, unsigned, DenseMapInfo
<MemoryLocation
>,
587 InvariantMapAllocator
>;
588 InvariantHTType AvailableInvariants
;
590 /// A scoped hash table of the current values of read-only call
593 /// It uses the same generation count as loads.
595 ScopedHashTable
<CallValue
, std::pair
<Instruction
*, unsigned>>;
596 CallHTType AvailableCalls
;
598 /// This is the current generation of the memory value.
599 unsigned CurrentGeneration
= 0;
601 /// Set up the EarlyCSE runner for a particular function.
602 EarlyCSE(const DataLayout
&DL
, const TargetLibraryInfo
&TLI
,
603 const TargetTransformInfo
&TTI
, DominatorTree
&DT
,
604 AssumptionCache
&AC
, MemorySSA
*MSSA
)
605 : TLI(TLI
), TTI(TTI
), DT(DT
), AC(AC
), SQ(DL
, &TLI
, &DT
, &AC
), MSSA(MSSA
),
606 MSSAUpdater(std::make_unique
<MemorySSAUpdater
>(MSSA
)) {}
611 unsigned ClobberCounter
= 0;
612 // Almost a POD, but needs to call the constructors for the scoped hash
613 // tables so that a new scope gets pushed on. These are RAII so that the
614 // scope gets popped when the NodeScope is destroyed.
617 NodeScope(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
618 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
)
619 : Scope(AvailableValues
), LoadScope(AvailableLoads
),
620 InvariantScope(AvailableInvariants
), CallScope(AvailableCalls
) {}
621 NodeScope(const NodeScope
&) = delete;
622 NodeScope
&operator=(const NodeScope
&) = delete;
625 ScopedHTType::ScopeTy Scope
;
626 LoadHTType::ScopeTy LoadScope
;
627 InvariantHTType::ScopeTy InvariantScope
;
628 CallHTType::ScopeTy CallScope
;
631 // Contains all the needed information to create a stack for doing a depth
632 // first traversal of the tree. This includes scopes for values, loads, and
633 // calls as well as the generation. There is a child iterator so that the
634 // children do not need to be store separately.
637 StackNode(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
638 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
,
639 unsigned cg
, DomTreeNode
*n
, DomTreeNode::const_iterator child
,
640 DomTreeNode::const_iterator end
)
641 : CurrentGeneration(cg
), ChildGeneration(cg
), Node(n
), ChildIter(child
),
643 Scopes(AvailableValues
, AvailableLoads
, AvailableInvariants
,
646 StackNode(const StackNode
&) = delete;
647 StackNode
&operator=(const StackNode
&) = delete;
650 unsigned currentGeneration() const { return CurrentGeneration
; }
651 unsigned childGeneration() const { return ChildGeneration
; }
652 void childGeneration(unsigned generation
) { ChildGeneration
= generation
; }
653 DomTreeNode
*node() { return Node
; }
654 DomTreeNode::const_iterator
childIter() const { return ChildIter
; }
656 DomTreeNode
*nextChild() {
657 DomTreeNode
*child
= *ChildIter
;
662 DomTreeNode::const_iterator
end() const { return EndIter
; }
663 bool isProcessed() const { return Processed
; }
664 void process() { Processed
= true; }
667 unsigned CurrentGeneration
;
668 unsigned ChildGeneration
;
670 DomTreeNode::const_iterator ChildIter
;
671 DomTreeNode::const_iterator EndIter
;
673 bool Processed
= false;
676 /// Wrapper class to handle memory instructions, including loads,
677 /// stores and intrinsic loads and stores defined by the target.
678 class ParseMemoryInst
{
680 ParseMemoryInst(Instruction
*Inst
, const TargetTransformInfo
&TTI
)
682 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(Inst
)) {
683 IntrID
= II
->getIntrinsicID();
684 if (TTI
.getTgtMemIntrinsic(II
, Info
))
686 if (isHandledNonTargetIntrinsic(IntrID
)) {
688 case Intrinsic::masked_load
:
689 Info
.PtrVal
= Inst
->getOperand(0);
690 Info
.MatchingId
= Intrinsic::masked_load
;
692 Info
.WriteMem
= false;
693 Info
.IsVolatile
= false;
695 case Intrinsic::masked_store
:
696 Info
.PtrVal
= Inst
->getOperand(1);
697 // Use the ID of masked load as the "matching id". This will
698 // prevent matching non-masked loads/stores with masked ones
699 // (which could be done), but at the moment, the code here
700 // does not support matching intrinsics with non-intrinsics,
701 // so keep the MatchingIds specific to masked instructions
703 Info
.MatchingId
= Intrinsic::masked_load
;
704 Info
.ReadMem
= false;
705 Info
.WriteMem
= true;
706 Info
.IsVolatile
= false;
713 Instruction
*get() { return Inst
; }
714 const Instruction
*get() const { return Inst
; }
716 bool isLoad() const {
719 return isa
<LoadInst
>(Inst
);
722 bool isStore() const {
724 return Info
.WriteMem
;
725 return isa
<StoreInst
>(Inst
);
728 bool isAtomic() const {
730 return Info
.Ordering
!= AtomicOrdering::NotAtomic
;
731 return Inst
->isAtomic();
734 bool isUnordered() const {
736 return Info
.isUnordered();
738 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
739 return LI
->isUnordered();
740 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
741 return SI
->isUnordered();
743 // Conservative answer
744 return !Inst
->isAtomic();
747 bool isVolatile() const {
749 return Info
.IsVolatile
;
751 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
752 return LI
->isVolatile();
753 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
754 return SI
->isVolatile();
756 // Conservative answer
760 bool isInvariantLoad() const {
761 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
762 return LI
->hasMetadata(LLVMContext::MD_invariant_load
);
766 bool isValid() const { return getPointerOperand() != nullptr; }
768 // For regular (non-intrinsic) loads/stores, this is set to -1. For
769 // intrinsic loads/stores, the id is retrieved from the corresponding
770 // field in the MemIntrinsicInfo structure. That field contains
771 // non-negative values only.
772 int getMatchingId() const {
774 return Info
.MatchingId
;
778 Value
*getPointerOperand() const {
781 return getLoadStorePointerOperand(Inst
);
784 bool mayReadFromMemory() const {
787 return Inst
->mayReadFromMemory();
790 bool mayWriteToMemory() const {
792 return Info
.WriteMem
;
793 return Inst
->mayWriteToMemory();
797 Intrinsic::ID IntrID
= 0;
798 MemIntrinsicInfo Info
;
802 // This function is to prevent accidentally passing a non-target
803 // intrinsic ID to TargetTransformInfo.
804 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID
) {
806 case Intrinsic::masked_load
:
807 case Intrinsic::masked_store
:
812 static bool isHandledNonTargetIntrinsic(const Value
*V
) {
813 if (auto *II
= dyn_cast
<IntrinsicInst
>(V
))
814 return isHandledNonTargetIntrinsic(II
->getIntrinsicID());
818 bool processNode(DomTreeNode
*Node
);
820 bool handleBranchCondition(Instruction
*CondInst
, const BranchInst
*BI
,
821 const BasicBlock
*BB
, const BasicBlock
*Pred
);
823 Value
*getMatchingValue(LoadValue
&InVal
, ParseMemoryInst
&MemInst
,
824 unsigned CurrentGeneration
);
826 bool overridingStores(const ParseMemoryInst
&Earlier
,
827 const ParseMemoryInst
&Later
);
829 Value
*getOrCreateResult(Value
*Inst
, Type
*ExpectedType
) const {
830 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
832 if (auto *SI
= dyn_cast
<StoreInst
>(Inst
))
833 return SI
->getValueOperand();
834 assert(isa
<IntrinsicInst
>(Inst
) && "Instruction not supported");
835 auto *II
= cast
<IntrinsicInst
>(Inst
);
836 if (isHandledNonTargetIntrinsic(II
->getIntrinsicID()))
837 return getOrCreateResultNonTargetMemIntrinsic(II
, ExpectedType
);
838 return TTI
.getOrCreateResultFromMemIntrinsic(II
, ExpectedType
);
841 Value
*getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst
*II
,
842 Type
*ExpectedType
) const {
843 switch (II
->getIntrinsicID()) {
844 case Intrinsic::masked_load
:
846 case Intrinsic::masked_store
:
847 return II
->getOperand(0);
852 /// Return true if the instruction is known to only operate on memory
853 /// provably invariant in the given "generation".
854 bool isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
);
856 bool isSameMemGeneration(unsigned EarlierGeneration
, unsigned LaterGeneration
,
857 Instruction
*EarlierInst
, Instruction
*LaterInst
);
859 bool isNonTargetIntrinsicMatch(const IntrinsicInst
*Earlier
,
860 const IntrinsicInst
*Later
) {
861 auto IsSubmask
= [](const Value
*Mask0
, const Value
*Mask1
) {
862 // Is Mask0 a submask of Mask1?
865 if (isa
<UndefValue
>(Mask0
) || isa
<UndefValue
>(Mask1
))
867 auto *Vec0
= dyn_cast
<ConstantVector
>(Mask0
);
868 auto *Vec1
= dyn_cast
<ConstantVector
>(Mask1
);
871 assert(Vec0
->getType() == Vec1
->getType() &&
872 "Masks should have the same type");
873 for (int i
= 0, e
= Vec0
->getNumOperands(); i
!= e
; ++i
) {
874 Constant
*Elem0
= Vec0
->getOperand(i
);
875 Constant
*Elem1
= Vec1
->getOperand(i
);
876 auto *Int0
= dyn_cast
<ConstantInt
>(Elem0
);
877 if (Int0
&& Int0
->isZero())
879 auto *Int1
= dyn_cast
<ConstantInt
>(Elem1
);
880 if (Int1
&& !Int1
->isZero())
882 if (isa
<UndefValue
>(Elem0
) || isa
<UndefValue
>(Elem1
))
890 auto PtrOp
= [](const IntrinsicInst
*II
) {
891 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
892 return II
->getOperand(0);
893 if (II
->getIntrinsicID() == Intrinsic::masked_store
)
894 return II
->getOperand(1);
895 llvm_unreachable("Unexpected IntrinsicInst");
897 auto MaskOp
= [](const IntrinsicInst
*II
) {
898 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
899 return II
->getOperand(2);
900 if (II
->getIntrinsicID() == Intrinsic::masked_store
)
901 return II
->getOperand(3);
902 llvm_unreachable("Unexpected IntrinsicInst");
904 auto ThruOp
= [](const IntrinsicInst
*II
) {
905 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
906 return II
->getOperand(3);
907 llvm_unreachable("Unexpected IntrinsicInst");
910 if (PtrOp(Earlier
) != PtrOp(Later
))
913 Intrinsic::ID IDE
= Earlier
->getIntrinsicID();
914 Intrinsic::ID IDL
= Later
->getIntrinsicID();
915 // We could really use specific intrinsic classes for masked loads
916 // and stores in IntrinsicInst.h.
917 if (IDE
== Intrinsic::masked_load
&& IDL
== Intrinsic::masked_load
) {
918 // Trying to replace later masked load with the earlier one.
919 // Check that the pointers are the same, and
920 // - masks and pass-throughs are the same, or
921 // - replacee's pass-through is "undef" and replacer's mask is a
922 // super-set of the replacee's mask.
923 if (MaskOp(Earlier
) == MaskOp(Later
) && ThruOp(Earlier
) == ThruOp(Later
))
925 if (!isa
<UndefValue
>(ThruOp(Later
)))
927 return IsSubmask(MaskOp(Later
), MaskOp(Earlier
));
929 if (IDE
== Intrinsic::masked_store
&& IDL
== Intrinsic::masked_load
) {
930 // Trying to replace a load of a stored value with the store's value.
931 // Check that the pointers are the same, and
932 // - load's mask is a subset of store's mask, and
933 // - load's pass-through is "undef".
934 if (!IsSubmask(MaskOp(Later
), MaskOp(Earlier
)))
936 return isa
<UndefValue
>(ThruOp(Later
));
938 if (IDE
== Intrinsic::masked_load
&& IDL
== Intrinsic::masked_store
) {
939 // Trying to remove a store of the loaded value.
940 // Check that the pointers are the same, and
941 // - store's mask is a subset of the load's mask.
942 return IsSubmask(MaskOp(Later
), MaskOp(Earlier
));
944 if (IDE
== Intrinsic::masked_store
&& IDL
== Intrinsic::masked_store
) {
945 // Trying to remove a dead store (earlier).
946 // Check that the pointers are the same,
947 // - the to-be-removed store's mask is a subset of the other store's
949 return IsSubmask(MaskOp(Earlier
), MaskOp(Later
));
954 void removeMSSA(Instruction
&Inst
) {
958 MSSA
->verifyMemorySSA();
959 // Removing a store here can leave MemorySSA in an unoptimized state by
960 // creating MemoryPhis that have identical arguments and by creating
961 // MemoryUses whose defining access is not an actual clobber. The phi case
962 // is handled by MemorySSA when passing OptimizePhis = true to
963 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
964 // by MemorySSA's getClobberingMemoryAccess.
965 MSSAUpdater
->removeMemoryAccess(&Inst
, true);
969 } // end anonymous namespace
971 /// Determine if the memory referenced by LaterInst is from the same heap
972 /// version as EarlierInst.
973 /// This is currently called in two scenarios:
985 /// in both cases we want to verify that there are no possible writes to the
986 /// memory referenced by p between the earlier and later instruction.
987 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration
,
988 unsigned LaterGeneration
,
989 Instruction
*EarlierInst
,
990 Instruction
*LaterInst
) {
991 // Check the simple memory generation tracking first.
992 if (EarlierGeneration
== LaterGeneration
)
998 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
999 // read/write memory, then we can safely return true here.
1000 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1001 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1002 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1003 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1004 // with the default optimization pipeline.
1005 auto *EarlierMA
= MSSA
->getMemoryAccess(EarlierInst
);
1008 auto *LaterMA
= MSSA
->getMemoryAccess(LaterInst
);
1012 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1013 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1014 // EarlierInst and LaterInst and neither can any other write that potentially
1015 // clobbers LaterInst.
1016 MemoryAccess
*LaterDef
;
1017 if (ClobberCounter
< EarlyCSEMssaOptCap
) {
1018 LaterDef
= MSSA
->getWalker()->getClobberingMemoryAccess(LaterInst
);
1021 LaterDef
= LaterMA
->getDefiningAccess();
1023 return MSSA
->dominates(LaterDef
, EarlierMA
);
1026 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
) {
1027 // A location loaded from with an invariant_load is assumed to *never* change
1028 // within the visible scope of the compilation.
1029 if (auto *LI
= dyn_cast
<LoadInst
>(I
))
1030 if (LI
->hasMetadata(LLVMContext::MD_invariant_load
))
1033 auto MemLocOpt
= MemoryLocation::getOrNone(I
);
1035 // "target" intrinsic forms of loads aren't currently known to
1036 // MemoryLocation::get. TODO
1038 MemoryLocation MemLoc
= *MemLocOpt
;
1039 if (!AvailableInvariants
.count(MemLoc
))
1042 // Is the generation at which this became invariant older than the
1044 return AvailableInvariants
.lookup(MemLoc
) <= GenAt
;
1047 bool EarlyCSE::handleBranchCondition(Instruction
*CondInst
,
1048 const BranchInst
*BI
, const BasicBlock
*BB
,
1049 const BasicBlock
*Pred
) {
1050 assert(BI
->isConditional() && "Should be a conditional branch!");
1051 assert(BI
->getCondition() == CondInst
&& "Wrong condition?");
1052 assert(BI
->getSuccessor(0) == BB
|| BI
->getSuccessor(1) == BB
);
1053 auto *TorF
= (BI
->getSuccessor(0) == BB
)
1054 ? ConstantInt::getTrue(BB
->getContext())
1055 : ConstantInt::getFalse(BB
->getContext());
1056 auto MatchBinOp
= [](Instruction
*I
, unsigned Opcode
, Value
*&LHS
,
1058 if (Opcode
== Instruction::And
&&
1059 match(I
, m_LogicalAnd(m_Value(LHS
), m_Value(RHS
))))
1061 else if (Opcode
== Instruction::Or
&&
1062 match(I
, m_LogicalOr(m_Value(LHS
), m_Value(RHS
))))
1066 // If the condition is AND operation, we can propagate its operands into the
1067 // true branch. If it is OR operation, we can propagate them into the false
1069 unsigned PropagateOpcode
=
1070 (BI
->getSuccessor(0) == BB
) ? Instruction::And
: Instruction::Or
;
1072 bool MadeChanges
= false;
1073 SmallVector
<Instruction
*, 4> WorkList
;
1074 SmallPtrSet
<Instruction
*, 4> Visited
;
1075 WorkList
.push_back(CondInst
);
1076 while (!WorkList
.empty()) {
1077 Instruction
*Curr
= WorkList
.pop_back_val();
1079 AvailableValues
.insert(Curr
, TorF
);
1080 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1081 << Curr
->getName() << "' as " << *TorF
<< " in "
1082 << BB
->getName() << "\n");
1083 if (!DebugCounter::shouldExecute(CSECounter
)) {
1084 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1086 // Replace all dominated uses with the known value.
1087 if (unsigned Count
= replaceDominatedUsesWith(Curr
, TorF
, DT
,
1088 BasicBlockEdge(Pred
, BB
))) {
1095 if (MatchBinOp(Curr
, PropagateOpcode
, LHS
, RHS
))
1096 for (auto &Op
: { LHS
, RHS
})
1097 if (Instruction
*OPI
= dyn_cast
<Instruction
>(Op
))
1098 if (SimpleValue::canHandle(OPI
) && Visited
.insert(OPI
).second
)
1099 WorkList
.push_back(OPI
);
1105 Value
*EarlyCSE::getMatchingValue(LoadValue
&InVal
, ParseMemoryInst
&MemInst
,
1106 unsigned CurrentGeneration
) {
1107 if (InVal
.DefInst
== nullptr)
1109 if (InVal
.MatchingId
!= MemInst
.getMatchingId())
1111 // We don't yet handle removing loads with ordering of any kind.
1112 if (MemInst
.isVolatile() || !MemInst
.isUnordered())
1114 // We can't replace an atomic load with one which isn't also atomic.
1115 if (MemInst
.isLoad() && !InVal
.IsAtomic
&& MemInst
.isAtomic())
1117 // The value V returned from this function is used differently depending
1118 // on whether MemInst is a load or a store. If it's a load, we will replace
1119 // MemInst with V, if it's a store, we will check if V is the same as the
1121 bool MemInstMatching
= !MemInst
.isLoad();
1122 Instruction
*Matching
= MemInstMatching
? MemInst
.get() : InVal
.DefInst
;
1123 Instruction
*Other
= MemInstMatching
? InVal
.DefInst
: MemInst
.get();
1125 // For stores check the result values before checking memory generation
1126 // (otherwise isSameMemGeneration may crash).
1127 Value
*Result
= MemInst
.isStore()
1128 ? getOrCreateResult(Matching
, Other
->getType())
1130 if (MemInst
.isStore() && InVal
.DefInst
!= Result
)
1133 // Deal with non-target memory intrinsics.
1134 bool MatchingNTI
= isHandledNonTargetIntrinsic(Matching
);
1135 bool OtherNTI
= isHandledNonTargetIntrinsic(Other
);
1136 if (OtherNTI
!= MatchingNTI
)
1138 if (OtherNTI
&& MatchingNTI
) {
1139 if (!isNonTargetIntrinsicMatch(cast
<IntrinsicInst
>(InVal
.DefInst
),
1140 cast
<IntrinsicInst
>(MemInst
.get())))
1144 if (!isOperatingOnInvariantMemAt(MemInst
.get(), InVal
.Generation
) &&
1145 !isSameMemGeneration(InVal
.Generation
, CurrentGeneration
, InVal
.DefInst
,
1150 Result
= getOrCreateResult(Matching
, Other
->getType());
1154 bool EarlyCSE::overridingStores(const ParseMemoryInst
&Earlier
,
1155 const ParseMemoryInst
&Later
) {
1156 // Can we remove Earlier store because of Later store?
1158 assert(Earlier
.isUnordered() && !Earlier
.isVolatile() &&
1159 "Violated invariant");
1160 if (Earlier
.getPointerOperand() != Later
.getPointerOperand())
1162 if (Earlier
.getMatchingId() != Later
.getMatchingId())
1164 // At the moment, we don't remove ordered stores, but do remove
1165 // unordered atomic stores. There's no special requirement (for
1166 // unordered atomics) about removing atomic stores only in favor of
1167 // other atomic stores since we were going to execute the non-atomic
1168 // one anyway and the atomic one might never have become visible.
1169 if (!Earlier
.isUnordered() || !Later
.isUnordered())
1172 // Deal with non-target memory intrinsics.
1173 bool ENTI
= isHandledNonTargetIntrinsic(Earlier
.get());
1174 bool LNTI
= isHandledNonTargetIntrinsic(Later
.get());
1176 return isNonTargetIntrinsicMatch(cast
<IntrinsicInst
>(Earlier
.get()),
1177 cast
<IntrinsicInst
>(Later
.get()));
1179 // Because of the check above, at least one of them is false.
1180 // For now disallow matching intrinsics with non-intrinsics,
1181 // so assume that the stores match if neither is an intrinsic.
1182 return ENTI
== LNTI
;
1185 bool EarlyCSE::processNode(DomTreeNode
*Node
) {
1186 bool Changed
= false;
1187 BasicBlock
*BB
= Node
->getBlock();
1189 // If this block has a single predecessor, then the predecessor is the parent
1190 // of the domtree node and all of the live out memory values are still current
1191 // in this block. If this block has multiple predecessors, then they could
1192 // have invalidated the live-out memory values of our parent value. For now,
1193 // just be conservative and invalidate memory if this block has multiple
1195 if (!BB
->getSinglePredecessor())
1196 ++CurrentGeneration
;
1198 // If this node has a single predecessor which ends in a conditional branch,
1199 // we can infer the value of the branch condition given that we took this
1200 // path. We need the single predecessor to ensure there's not another path
1201 // which reaches this block where the condition might hold a different
1202 // value. Since we're adding this to the scoped hash table (like any other
1203 // def), it will have been popped if we encounter a future merge block.
1204 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
1205 auto *BI
= dyn_cast
<BranchInst
>(Pred
->getTerminator());
1206 if (BI
&& BI
->isConditional()) {
1207 auto *CondInst
= dyn_cast
<Instruction
>(BI
->getCondition());
1208 if (CondInst
&& SimpleValue::canHandle(CondInst
))
1209 Changed
|= handleBranchCondition(CondInst
, BI
, BB
, Pred
);
1213 /// LastStore - Keep track of the last non-volatile store that we saw... for
1214 /// as long as there in no instruction that reads memory. If we see a store
1215 /// to the same location, we delete the dead store. This zaps trivial dead
1216 /// stores which can occur in bitfield code among other things.
1217 Instruction
*LastStore
= nullptr;
1219 // See if any instructions in the block can be eliminated. If so, do it. If
1220 // not, add them to AvailableValues.
1221 for (Instruction
&Inst
: make_early_inc_range(BB
->getInstList())) {
1222 // Dead instructions should just be removed.
1223 if (isInstructionTriviallyDead(&Inst
, &TLI
)) {
1224 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst
<< '\n');
1225 if (!DebugCounter::shouldExecute(CSECounter
)) {
1226 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1230 salvageKnowledge(&Inst
, &AC
);
1231 salvageDebugInfo(Inst
);
1233 Inst
.eraseFromParent();
1239 // Skip assume intrinsics, they don't really have side effects (although
1240 // they're marked as such to ensure preservation of control dependencies),
1241 // and this pass will not bother with its removal. However, we should mark
1242 // its condition as true for all dominated blocks.
1243 if (auto *Assume
= dyn_cast
<AssumeInst
>(&Inst
)) {
1244 auto *CondI
= dyn_cast
<Instruction
>(Assume
->getArgOperand(0));
1245 if (CondI
&& SimpleValue::canHandle(CondI
)) {
1246 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1248 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
1250 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst
<< '\n');
1254 // Likewise, noalias intrinsics don't actually write.
1256 m_Intrinsic
<Intrinsic::experimental_noalias_scope_decl
>())) {
1257 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1262 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1263 if (match(&Inst
, m_Intrinsic
<Intrinsic::sideeffect
>())) {
1264 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst
<< '\n');
1268 // We can skip all invariant.start intrinsics since they only read memory,
1269 // and we can forward values across it. For invariant starts without
1270 // invariant ends, we can use the fact that the invariantness never ends to
1271 // start a scope in the current generaton which is true for all future
1272 // generations. Also, we dont need to consume the last store since the
1273 // semantics of invariant.start allow us to perform DSE of the last
1274 // store, if there was a store following invariant.start. Consider:
1277 // invariant.start(p)
1279 // We can DSE the store to 30, since the store 40 to invariant location p
1280 // causes undefined behaviour.
1281 if (match(&Inst
, m_Intrinsic
<Intrinsic::invariant_start
>())) {
1282 // If there are any uses, the scope might end.
1283 if (!Inst
.use_empty())
1285 MemoryLocation MemLoc
=
1286 MemoryLocation::getForArgument(&cast
<CallInst
>(Inst
), 1, TLI
);
1287 // Don't start a scope if we already have a better one pushed
1288 if (!AvailableInvariants
.count(MemLoc
))
1289 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
1293 if (isGuard(&Inst
)) {
1295 dyn_cast
<Instruction
>(cast
<CallInst
>(Inst
).getArgOperand(0))) {
1296 if (SimpleValue::canHandle(CondI
)) {
1297 // Do we already know the actual value of this condition?
1298 if (auto *KnownCond
= AvailableValues
.lookup(CondI
)) {
1299 // Is the condition known to be true?
1300 if (isa
<ConstantInt
>(KnownCond
) &&
1301 cast
<ConstantInt
>(KnownCond
)->isOne()) {
1303 << "EarlyCSE removing guard: " << Inst
<< '\n');
1304 salvageKnowledge(&Inst
, &AC
);
1306 Inst
.eraseFromParent();
1310 // Use the known value if it wasn't true.
1311 cast
<CallInst
>(Inst
).setArgOperand(0, KnownCond
);
1313 // The condition we're on guarding here is true for all dominated
1315 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
1319 // Guard intrinsics read all memory, but don't write any memory.
1320 // Accordingly, don't update the generation but consume the last store (to
1321 // avoid an incorrect DSE).
1322 LastStore
= nullptr;
1326 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1327 // its simpler value.
1328 if (Value
*V
= SimplifyInstruction(&Inst
, SQ
)) {
1329 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst
<< " to: " << *V
1331 if (!DebugCounter::shouldExecute(CSECounter
)) {
1332 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1334 bool Killed
= false;
1335 if (!Inst
.use_empty()) {
1336 Inst
.replaceAllUsesWith(V
);
1339 if (isInstructionTriviallyDead(&Inst
, &TLI
)) {
1340 salvageKnowledge(&Inst
, &AC
);
1342 Inst
.eraseFromParent();
1353 // If this is a simple instruction that we can value number, process it.
1354 if (SimpleValue::canHandle(&Inst
)) {
1355 // See if the instruction has an available value. If so, use it.
1356 if (Value
*V
= AvailableValues
.lookup(&Inst
)) {
1357 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst
<< " to: " << *V
1359 if (!DebugCounter::shouldExecute(CSECounter
)) {
1360 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1363 if (auto *I
= dyn_cast
<Instruction
>(V
))
1364 I
->andIRFlags(&Inst
);
1365 Inst
.replaceAllUsesWith(V
);
1366 salvageKnowledge(&Inst
, &AC
);
1368 Inst
.eraseFromParent();
1374 // Otherwise, just remember that this value is available.
1375 AvailableValues
.insert(&Inst
, &Inst
);
1379 ParseMemoryInst
MemInst(&Inst
, TTI
);
1380 // If this is a non-volatile load, process it.
1381 if (MemInst
.isValid() && MemInst
.isLoad()) {
1382 // (conservatively) we can't peak past the ordering implied by this
1383 // operation, but we can add this load to our set of available values
1384 if (MemInst
.isVolatile() || !MemInst
.isUnordered()) {
1385 LastStore
= nullptr;
1386 ++CurrentGeneration
;
1389 if (MemInst
.isInvariantLoad()) {
1390 // If we pass an invariant load, we know that memory location is
1391 // indefinitely constant from the moment of first dereferenceability.
1392 // We conservatively treat the invariant_load as that moment. If we
1393 // pass a invariant load after already establishing a scope, don't
1394 // restart it since we want to preserve the earliest point seen.
1395 auto MemLoc
= MemoryLocation::get(&Inst
);
1396 if (!AvailableInvariants
.count(MemLoc
))
1397 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
1400 // If we have an available version of this load, and if it is the right
1401 // generation or the load is known to be from an invariant location,
1402 // replace this instruction.
1404 // If either the dominating load or the current load are invariant, then
1405 // we can assume the current load loads the same value as the dominating
1407 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1408 if (Value
*Op
= getMatchingValue(InVal
, MemInst
, CurrentGeneration
)) {
1409 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1410 << " to: " << *InVal
.DefInst
<< '\n');
1411 if (!DebugCounter::shouldExecute(CSECounter
)) {
1412 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1415 if (!Inst
.use_empty())
1416 Inst
.replaceAllUsesWith(Op
);
1417 salvageKnowledge(&Inst
, &AC
);
1419 Inst
.eraseFromParent();
1425 // Otherwise, remember that we have this instruction.
1426 AvailableLoads
.insert(MemInst
.getPointerOperand(),
1427 LoadValue(&Inst
, CurrentGeneration
,
1428 MemInst
.getMatchingId(),
1429 MemInst
.isAtomic()));
1430 LastStore
= nullptr;
1434 // If this instruction may read from memory or throw (and potentially read
1435 // from memory in the exception handler), forget LastStore. Load/store
1436 // intrinsics will indicate both a read and a write to memory. The target
1437 // may override this (e.g. so that a store intrinsic does not read from
1438 // memory, and thus will be treated the same as a regular store for
1439 // commoning purposes).
1440 if ((Inst
.mayReadFromMemory() || Inst
.mayThrow()) &&
1441 !(MemInst
.isValid() && !MemInst
.mayReadFromMemory()))
1442 LastStore
= nullptr;
1444 // If this is a read-only call, process it.
1445 if (CallValue::canHandle(&Inst
)) {
1446 // If we have an available version of this call, and if it is the right
1447 // generation, replace this instruction.
1448 std::pair
<Instruction
*, unsigned> InVal
= AvailableCalls
.lookup(&Inst
);
1449 if (InVal
.first
!= nullptr &&
1450 isSameMemGeneration(InVal
.second
, CurrentGeneration
, InVal
.first
,
1452 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1453 << " to: " << *InVal
.first
<< '\n');
1454 if (!DebugCounter::shouldExecute(CSECounter
)) {
1455 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1458 if (!Inst
.use_empty())
1459 Inst
.replaceAllUsesWith(InVal
.first
);
1460 salvageKnowledge(&Inst
, &AC
);
1462 Inst
.eraseFromParent();
1468 // Otherwise, remember that we have this instruction.
1469 AvailableCalls
.insert(&Inst
, std::make_pair(&Inst
, CurrentGeneration
));
1473 // A release fence requires that all stores complete before it, but does
1474 // not prevent the reordering of following loads 'before' the fence. As a
1475 // result, we don't need to consider it as writing to memory and don't need
1476 // to advance the generation. We do need to prevent DSE across the fence,
1477 // but that's handled above.
1478 if (auto *FI
= dyn_cast
<FenceInst
>(&Inst
))
1479 if (FI
->getOrdering() == AtomicOrdering::Release
) {
1480 assert(Inst
.mayReadFromMemory() && "relied on to prevent DSE above");
1484 // write back DSE - If we write back the same value we just loaded from
1485 // the same location and haven't passed any intervening writes or ordering
1486 // operations, we can remove the write. The primary benefit is in allowing
1487 // the available load table to remain valid and value forward past where
1488 // the store originally was.
1489 if (MemInst
.isValid() && MemInst
.isStore()) {
1490 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1491 if (InVal
.DefInst
&&
1492 InVal
.DefInst
== getMatchingValue(InVal
, MemInst
, CurrentGeneration
)) {
1493 // It is okay to have a LastStore to a different pointer here if MemorySSA
1494 // tells us that the load and store are from the same memory generation.
1495 // In that case, LastStore should keep its present value since we're
1496 // removing the current store.
1497 assert((!LastStore
||
1498 ParseMemoryInst(LastStore
, TTI
).getPointerOperand() ==
1499 MemInst
.getPointerOperand() ||
1501 "can't have an intervening store if not using MemorySSA!");
1502 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst
<< '\n');
1503 if (!DebugCounter::shouldExecute(CSECounter
)) {
1504 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1507 salvageKnowledge(&Inst
, &AC
);
1509 Inst
.eraseFromParent();
1512 // We can avoid incrementing the generation count since we were able
1513 // to eliminate this store.
1518 // Okay, this isn't something we can CSE at all. Check to see if it is
1519 // something that could modify memory. If so, our available memory values
1520 // cannot be used so bump the generation count.
1521 if (Inst
.mayWriteToMemory()) {
1522 ++CurrentGeneration
;
1524 if (MemInst
.isValid() && MemInst
.isStore()) {
1525 // We do a trivial form of DSE if there are two stores to the same
1526 // location with no intervening loads. Delete the earlier store.
1528 if (overridingStores(ParseMemoryInst(LastStore
, TTI
), MemInst
)) {
1529 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1530 << " due to: " << Inst
<< '\n');
1531 if (!DebugCounter::shouldExecute(CSECounter
)) {
1532 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1534 salvageKnowledge(&Inst
, &AC
);
1535 removeMSSA(*LastStore
);
1536 LastStore
->eraseFromParent();
1539 LastStore
= nullptr;
1542 // fallthrough - we can exploit information about this store
1545 // Okay, we just invalidated anything we knew about loaded values. Try
1546 // to salvage *something* by remembering that the stored value is a live
1547 // version of the pointer. It is safe to forward from volatile stores
1548 // to non-volatile loads, so we don't have to check for volatility of
1550 AvailableLoads
.insert(MemInst
.getPointerOperand(),
1551 LoadValue(&Inst
, CurrentGeneration
,
1552 MemInst
.getMatchingId(),
1553 MemInst
.isAtomic()));
1555 // Remember that this was the last unordered store we saw for DSE. We
1556 // don't yet handle DSE on ordered or volatile stores since we don't
1557 // have a good way to model the ordering requirement for following
1558 // passes once the store is removed. We could insert a fence, but
1559 // since fences are slightly stronger than stores in their ordering,
1560 // it's not clear this is a profitable transform. Another option would
1561 // be to merge the ordering with that of the post dominating store.
1562 if (MemInst
.isUnordered() && !MemInst
.isVolatile())
1565 LastStore
= nullptr;
1573 bool EarlyCSE::run() {
1574 // Note, deque is being used here because there is significant performance
1575 // gains over vector when the container becomes very large due to the
1576 // specific access patterns. For more information see the mailing list
1577 // discussion on this:
1578 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1579 std::deque
<StackNode
*> nodesToProcess
;
1581 bool Changed
= false;
1583 // Process the root node.
1584 nodesToProcess
.push_back(new StackNode(
1585 AvailableValues
, AvailableLoads
, AvailableInvariants
, AvailableCalls
,
1586 CurrentGeneration
, DT
.getRootNode(),
1587 DT
.getRootNode()->begin(), DT
.getRootNode()->end()));
1589 assert(!CurrentGeneration
&& "Create a new EarlyCSE instance to rerun it.");
1591 // Process the stack.
1592 while (!nodesToProcess
.empty()) {
1593 // Grab the first item off the stack. Set the current generation, remove
1594 // the node from the stack, and process it.
1595 StackNode
*NodeToProcess
= nodesToProcess
.back();
1597 // Initialize class members.
1598 CurrentGeneration
= NodeToProcess
->currentGeneration();
1600 // Check if the node needs to be processed.
1601 if (!NodeToProcess
->isProcessed()) {
1602 // Process the node.
1603 Changed
|= processNode(NodeToProcess
->node());
1604 NodeToProcess
->childGeneration(CurrentGeneration
);
1605 NodeToProcess
->process();
1606 } else if (NodeToProcess
->childIter() != NodeToProcess
->end()) {
1607 // Push the next child onto the stack.
1608 DomTreeNode
*child
= NodeToProcess
->nextChild();
1609 nodesToProcess
.push_back(
1610 new StackNode(AvailableValues
, AvailableLoads
, AvailableInvariants
,
1611 AvailableCalls
, NodeToProcess
->childGeneration(),
1612 child
, child
->begin(), child
->end()));
1614 // It has been processed, and there are no more children to process,
1615 // so delete it and pop it off the stack.
1616 delete NodeToProcess
;
1617 nodesToProcess
.pop_back();
1619 } // while (!nodes...)
1624 PreservedAnalyses
EarlyCSEPass::run(Function
&F
,
1625 FunctionAnalysisManager
&AM
) {
1626 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
1627 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
1628 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1629 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
1631 UseMemorySSA
? &AM
.getResult
<MemorySSAAnalysis
>(F
).getMSSA() : nullptr;
1633 EarlyCSE
CSE(F
.getParent()->getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1636 return PreservedAnalyses::all();
1638 PreservedAnalyses PA
;
1639 PA
.preserveSet
<CFGAnalyses
>();
1641 PA
.preserve
<MemorySSAAnalysis
>();
1647 /// A simple and fast domtree-based CSE pass.
1649 /// This pass does a simple depth-first walk over the dominator tree,
1650 /// eliminating trivially redundant instructions and using instsimplify to
1651 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1652 /// cases so that instcombine and other passes are more effective. It is
1653 /// expected that a later pass of GVN will catch the interesting/hard cases.
1654 template<bool UseMemorySSA
>
1655 class EarlyCSELegacyCommonPass
: public FunctionPass
{
1659 EarlyCSELegacyCommonPass() : FunctionPass(ID
) {
1661 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1663 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1666 bool runOnFunction(Function
&F
) override
{
1667 if (skipFunction(F
))
1670 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
1671 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
1672 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1673 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
1675 UseMemorySSA
? &getAnalysis
<MemorySSAWrapperPass
>().getMSSA() : nullptr;
1677 EarlyCSE
CSE(F
.getParent()->getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1682 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1683 AU
.addRequired
<AssumptionCacheTracker
>();
1684 AU
.addRequired
<DominatorTreeWrapperPass
>();
1685 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1686 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1688 AU
.addRequired
<AAResultsWrapperPass
>();
1689 AU
.addRequired
<MemorySSAWrapperPass
>();
1690 AU
.addPreserved
<MemorySSAWrapperPass
>();
1692 AU
.addPreserved
<GlobalsAAWrapperPass
>();
1693 AU
.addPreserved
<AAResultsWrapperPass
>();
1694 AU
.setPreservesCFG();
1698 } // end anonymous namespace
1700 using EarlyCSELegacyPass
= EarlyCSELegacyCommonPass
</*UseMemorySSA=*/false>;
1703 char EarlyCSELegacyPass::ID
= 0;
1705 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass
, "early-cse", "Early CSE", false,
1707 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1708 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1709 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1710 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1711 INITIALIZE_PASS_END(EarlyCSELegacyPass
, "early-cse", "Early CSE", false, false)
1713 using EarlyCSEMemSSALegacyPass
=
1714 EarlyCSELegacyCommonPass
</*UseMemorySSA=*/true>;
1717 char EarlyCSEMemSSALegacyPass::ID
= 0;
1719 FunctionPass
*llvm::createEarlyCSEPass(bool UseMemorySSA
) {
1721 return new EarlyCSEMemSSALegacyPass();
1723 return new EarlyCSELegacyPass();
1726 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1727 "Early CSE w/ MemorySSA", false, false)
1728 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1729 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1730 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
1731 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1732 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1733 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass
)
1734 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1735 "Early CSE w/ MemorySSA", false, false)