1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AssumptionCache.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/GuardUtils.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/MemorySSA.h"
26 #include "llvm/Analysis/MemorySSAUpdater.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/AtomicOrdering.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/DebugCounter.h"
50 #include "llvm/Support/RecyclingAllocator.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/Transforms/Scalar.h"
53 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
54 #include "llvm/Transforms/Utils/Local.h"
61 using namespace llvm::PatternMatch
;
63 #define DEBUG_TYPE "early-cse"
65 STATISTIC(NumSimplify
, "Number of instructions simplified or DCE'd");
66 STATISTIC(NumCSE
, "Number of instructions CSE'd");
67 STATISTIC(NumCSECVP
, "Number of compare instructions CVP'd");
68 STATISTIC(NumCSELoad
, "Number of load instructions CSE'd");
69 STATISTIC(NumCSECall
, "Number of call instructions CSE'd");
70 STATISTIC(NumCSEGEP
, "Number of GEP instructions CSE'd");
71 STATISTIC(NumDSE
, "Number of trivial dead stores removed");
73 DEBUG_COUNTER(CSECounter
, "early-cse",
74 "Controls which instructions are removed");
76 static cl::opt
<unsigned> EarlyCSEMssaOptCap(
77 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden
,
78 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
81 static cl::opt
<bool> EarlyCSEDebugHash(
82 "earlycse-debug-hash", cl::init(false), cl::Hidden
,
83 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
86 //===----------------------------------------------------------------------===//
88 //===----------------------------------------------------------------------===//
92 /// Struct representing the available values in the scoped hash table.
96 SimpleValue(Instruction
*I
) : Inst(I
) {
97 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
100 bool isSentinel() const {
101 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
102 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
105 static bool canHandle(Instruction
*Inst
) {
106 // This can only handle non-void readnone functions.
107 // Also handled are constrained intrinsic that look like the types
108 // of instruction handled below (UnaryOperator, etc.).
109 if (CallInst
*CI
= dyn_cast
<CallInst
>(Inst
)) {
110 if (Function
*F
= CI
->getCalledFunction()) {
111 switch ((Intrinsic::ID
)F
->getIntrinsicID()) {
112 case Intrinsic::experimental_constrained_fadd
:
113 case Intrinsic::experimental_constrained_fsub
:
114 case Intrinsic::experimental_constrained_fmul
:
115 case Intrinsic::experimental_constrained_fdiv
:
116 case Intrinsic::experimental_constrained_frem
:
117 case Intrinsic::experimental_constrained_fptosi
:
118 case Intrinsic::experimental_constrained_sitofp
:
119 case Intrinsic::experimental_constrained_fptoui
:
120 case Intrinsic::experimental_constrained_uitofp
:
121 case Intrinsic::experimental_constrained_fcmp
:
122 case Intrinsic::experimental_constrained_fcmps
: {
123 auto *CFP
= cast
<ConstrainedFPIntrinsic
>(CI
);
124 if (CFP
->getExceptionBehavior() &&
125 CFP
->getExceptionBehavior() == fp::ebStrict
)
127 // Since we CSE across function calls we must not allow
128 // the rounding mode to change.
129 if (CFP
->getRoundingMode() &&
130 CFP
->getRoundingMode() == RoundingMode::Dynamic
)
136 return CI
->doesNotAccessMemory() && !CI
->getType()->isVoidTy() &&
137 // FIXME: Currently the calls which may access the thread id may
138 // be considered as not accessing the memory. But this is
139 // problematic for coroutines, since coroutines may resume in a
140 // different thread. So we disable the optimization here for the
141 // correctness. However, it may block many other correct
142 // optimizations. Revert this one when we detect the memory
143 // accessing kind more precisely.
144 !CI
->getFunction()->isPresplitCoroutine();
146 return isa
<CastInst
>(Inst
) || isa
<UnaryOperator
>(Inst
) ||
147 isa
<BinaryOperator
>(Inst
) || isa
<CmpInst
>(Inst
) ||
148 isa
<SelectInst
>(Inst
) || isa
<ExtractElementInst
>(Inst
) ||
149 isa
<InsertElementInst
>(Inst
) || isa
<ShuffleVectorInst
>(Inst
) ||
150 isa
<ExtractValueInst
>(Inst
) || isa
<InsertValueInst
>(Inst
) ||
151 isa
<FreezeInst
>(Inst
);
155 } // end anonymous namespace
159 template <> struct DenseMapInfo
<SimpleValue
> {
160 static inline SimpleValue
getEmptyKey() {
161 return DenseMapInfo
<Instruction
*>::getEmptyKey();
164 static inline SimpleValue
getTombstoneKey() {
165 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
168 static unsigned getHashValue(SimpleValue Val
);
169 static bool isEqual(SimpleValue LHS
, SimpleValue RHS
);
172 } // end namespace llvm
174 /// Match a 'select' including an optional 'not's of the condition.
175 static bool matchSelectWithOptionalNotCond(Value
*V
, Value
*&Cond
, Value
*&A
,
177 SelectPatternFlavor
&Flavor
) {
178 // Return false if V is not even a select.
179 if (!match(V
, m_Select(m_Value(Cond
), m_Value(A
), m_Value(B
))))
182 // Look through a 'not' of the condition operand by swapping A/B.
184 if (match(Cond
, m_Not(m_Value(CondNot
)))) {
189 // Match canonical forms of min/max. We are not using ValueTracking's
190 // more powerful matchSelectPattern() because it may rely on instruction flags
191 // such as "nsw". That would be incompatible with the current hashing
192 // mechanism that may remove flags to increase the likelihood of CSE.
194 Flavor
= SPF_UNKNOWN
;
195 CmpInst::Predicate Pred
;
197 if (!match(Cond
, m_ICmp(Pred
, m_Specific(A
), m_Specific(B
)))) {
198 // Check for commuted variants of min/max by swapping predicate.
199 // If we do not match the standard or commuted patterns, this is not a
200 // recognized form of min/max, but it is still a select, so return true.
201 if (!match(Cond
, m_ICmp(Pred
, m_Specific(B
), m_Specific(A
))))
203 Pred
= ICmpInst::getSwappedPredicate(Pred
);
207 case CmpInst::ICMP_UGT
: Flavor
= SPF_UMAX
; break;
208 case CmpInst::ICMP_ULT
: Flavor
= SPF_UMIN
; break;
209 case CmpInst::ICMP_SGT
: Flavor
= SPF_SMAX
; break;
210 case CmpInst::ICMP_SLT
: Flavor
= SPF_SMIN
; break;
211 // Non-strict inequalities.
212 case CmpInst::ICMP_ULE
: Flavor
= SPF_UMIN
; break;
213 case CmpInst::ICMP_UGE
: Flavor
= SPF_UMAX
; break;
214 case CmpInst::ICMP_SLE
: Flavor
= SPF_SMIN
; break;
215 case CmpInst::ICMP_SGE
: Flavor
= SPF_SMAX
; break;
222 static unsigned hashCallInst(CallInst
*CI
) {
223 // Don't CSE convergent calls in different basic blocks, because they
224 // implicitly depend on the set of threads that is currently executing.
225 if (CI
->isConvergent()) {
227 CI
->getOpcode(), CI
->getParent(),
228 hash_combine_range(CI
->value_op_begin(), CI
->value_op_end()));
232 hash_combine_range(CI
->value_op_begin(), CI
->value_op_end()));
235 static unsigned getHashValueImpl(SimpleValue Val
) {
236 Instruction
*Inst
= Val
.Inst
;
237 // Hash in all of the operands as pointers.
238 if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Inst
)) {
239 Value
*LHS
= BinOp
->getOperand(0);
240 Value
*RHS
= BinOp
->getOperand(1);
241 if (BinOp
->isCommutative() && BinOp
->getOperand(0) > BinOp
->getOperand(1))
244 return hash_combine(BinOp
->getOpcode(), LHS
, RHS
);
247 if (CmpInst
*CI
= dyn_cast
<CmpInst
>(Inst
)) {
248 // Compares can be commuted by swapping the comparands and
249 // updating the predicate. Choose the form that has the
250 // comparands in sorted order, or in the case of a tie, the
251 // one with the lower predicate.
252 Value
*LHS
= CI
->getOperand(0);
253 Value
*RHS
= CI
->getOperand(1);
254 CmpInst::Predicate Pred
= CI
->getPredicate();
255 CmpInst::Predicate SwappedPred
= CI
->getSwappedPredicate();
256 if (std::tie(LHS
, Pred
) > std::tie(RHS
, SwappedPred
)) {
260 return hash_combine(Inst
->getOpcode(), Pred
, LHS
, RHS
);
263 // Hash general selects to allow matching commuted true/false operands.
264 SelectPatternFlavor SPF
;
266 if (matchSelectWithOptionalNotCond(Inst
, Cond
, A
, B
, SPF
)) {
267 // Hash min/max (cmp + select) to allow for commuted operands.
268 // Min/max may also have non-canonical compare predicate (eg, the compare for
269 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
271 // TODO: We should also detect FP min/max.
272 if (SPF
== SPF_SMIN
|| SPF
== SPF_SMAX
||
273 SPF
== SPF_UMIN
|| SPF
== SPF_UMAX
) {
276 return hash_combine(Inst
->getOpcode(), SPF
, A
, B
);
279 // Hash general selects to allow matching commuted true/false operands.
281 // If we do not have a compare as the condition, just hash in the condition.
282 CmpInst::Predicate Pred
;
284 if (!match(Cond
, m_Cmp(Pred
, m_Value(X
), m_Value(Y
))))
285 return hash_combine(Inst
->getOpcode(), Cond
, A
, B
);
287 // Similar to cmp normalization (above) - canonicalize the predicate value:
288 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
289 if (CmpInst::getInversePredicate(Pred
) < Pred
) {
290 Pred
= CmpInst::getInversePredicate(Pred
);
293 return hash_combine(Inst
->getOpcode(), Pred
, X
, Y
, A
, B
);
296 if (CastInst
*CI
= dyn_cast
<CastInst
>(Inst
))
297 return hash_combine(CI
->getOpcode(), CI
->getType(), CI
->getOperand(0));
299 if (FreezeInst
*FI
= dyn_cast
<FreezeInst
>(Inst
))
300 return hash_combine(FI
->getOpcode(), FI
->getOperand(0));
302 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(Inst
))
303 return hash_combine(EVI
->getOpcode(), EVI
->getOperand(0),
304 hash_combine_range(EVI
->idx_begin(), EVI
->idx_end()));
306 if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(Inst
))
307 return hash_combine(IVI
->getOpcode(), IVI
->getOperand(0),
309 hash_combine_range(IVI
->idx_begin(), IVI
->idx_end()));
311 assert((isa
<CallInst
>(Inst
) || isa
<ExtractElementInst
>(Inst
) ||
312 isa
<InsertElementInst
>(Inst
) || isa
<ShuffleVectorInst
>(Inst
) ||
313 isa
<UnaryOperator
>(Inst
) || isa
<FreezeInst
>(Inst
)) &&
314 "Invalid/unknown instruction");
316 // Handle intrinsics with commutative operands.
317 auto *II
= dyn_cast
<IntrinsicInst
>(Inst
);
318 if (II
&& II
->isCommutative() && II
->arg_size() >= 2) {
319 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
323 II
->getOpcode(), LHS
, RHS
,
324 hash_combine_range(II
->value_op_begin() + 2, II
->value_op_end()));
327 // gc.relocate is 'special' call: its second and third operands are
328 // not real values, but indices into statepoint's argument list.
329 // Get values they point to.
330 if (const GCRelocateInst
*GCR
= dyn_cast
<GCRelocateInst
>(Inst
))
331 return hash_combine(GCR
->getOpcode(), GCR
->getOperand(0),
332 GCR
->getBasePtr(), GCR
->getDerivedPtr());
334 // Don't CSE convergent calls in different basic blocks, because they
335 // implicitly depend on the set of threads that is currently executing.
336 if (CallInst
*CI
= dyn_cast
<CallInst
>(Inst
))
337 return hashCallInst(CI
);
339 // Mix in the opcode.
342 hash_combine_range(Inst
->value_op_begin(), Inst
->value_op_end()));
345 unsigned DenseMapInfo
<SimpleValue
>::getHashValue(SimpleValue Val
) {
347 // If -earlycse-debug-hash was specified, return a constant -- this
348 // will force all hashing to collide, so we'll exhaustively search
349 // the table for a match, and the assertion in isEqual will fire if
350 // there's a bug causing equal keys to hash differently.
351 if (EarlyCSEDebugHash
)
354 return getHashValueImpl(Val
);
357 static bool isEqualImpl(SimpleValue LHS
, SimpleValue RHS
) {
358 Instruction
*LHSI
= LHS
.Inst
, *RHSI
= RHS
.Inst
;
360 if (LHS
.isSentinel() || RHS
.isSentinel())
363 if (LHSI
->getOpcode() != RHSI
->getOpcode())
365 if (LHSI
->isIdenticalToWhenDefined(RHSI
)) {
366 // Convergent calls implicitly depend on the set of threads that is
367 // currently executing, so conservatively return false if they are in
368 // different basic blocks.
369 if (CallInst
*CI
= dyn_cast
<CallInst
>(LHSI
);
370 CI
&& CI
->isConvergent() && LHSI
->getParent() != RHSI
->getParent())
376 // If we're not strictly identical, we still might be a commutable instruction
377 if (BinaryOperator
*LHSBinOp
= dyn_cast
<BinaryOperator
>(LHSI
)) {
378 if (!LHSBinOp
->isCommutative())
381 assert(isa
<BinaryOperator
>(RHSI
) &&
382 "same opcode, but different instruction type?");
383 BinaryOperator
*RHSBinOp
= cast
<BinaryOperator
>(RHSI
);
386 return LHSBinOp
->getOperand(0) == RHSBinOp
->getOperand(1) &&
387 LHSBinOp
->getOperand(1) == RHSBinOp
->getOperand(0);
389 if (CmpInst
*LHSCmp
= dyn_cast
<CmpInst
>(LHSI
)) {
390 assert(isa
<CmpInst
>(RHSI
) &&
391 "same opcode, but different instruction type?");
392 CmpInst
*RHSCmp
= cast
<CmpInst
>(RHSI
);
394 return LHSCmp
->getOperand(0) == RHSCmp
->getOperand(1) &&
395 LHSCmp
->getOperand(1) == RHSCmp
->getOperand(0) &&
396 LHSCmp
->getSwappedPredicate() == RHSCmp
->getPredicate();
399 auto *LII
= dyn_cast
<IntrinsicInst
>(LHSI
);
400 auto *RII
= dyn_cast
<IntrinsicInst
>(RHSI
);
401 if (LII
&& RII
&& LII
->getIntrinsicID() == RII
->getIntrinsicID() &&
402 LII
->isCommutative() && LII
->arg_size() >= 2) {
403 return LII
->getArgOperand(0) == RII
->getArgOperand(1) &&
404 LII
->getArgOperand(1) == RII
->getArgOperand(0) &&
405 std::equal(LII
->arg_begin() + 2, LII
->arg_end(),
406 RII
->arg_begin() + 2, RII
->arg_end());
409 // See comment above in `getHashValue()`.
410 if (const GCRelocateInst
*GCR1
= dyn_cast
<GCRelocateInst
>(LHSI
))
411 if (const GCRelocateInst
*GCR2
= dyn_cast
<GCRelocateInst
>(RHSI
))
412 return GCR1
->getOperand(0) == GCR2
->getOperand(0) &&
413 GCR1
->getBasePtr() == GCR2
->getBasePtr() &&
414 GCR1
->getDerivedPtr() == GCR2
->getDerivedPtr();
416 // Min/max can occur with commuted operands, non-canonical predicates,
417 // and/or non-canonical operands.
418 // Selects can be non-trivially equivalent via inverted conditions and swaps.
419 SelectPatternFlavor LSPF
, RSPF
;
420 Value
*CondL
, *CondR
, *LHSA
, *RHSA
, *LHSB
, *RHSB
;
421 if (matchSelectWithOptionalNotCond(LHSI
, CondL
, LHSA
, LHSB
, LSPF
) &&
422 matchSelectWithOptionalNotCond(RHSI
, CondR
, RHSA
, RHSB
, RSPF
)) {
424 // TODO: We should also detect FP min/max.
425 if (LSPF
== SPF_SMIN
|| LSPF
== SPF_SMAX
||
426 LSPF
== SPF_UMIN
|| LSPF
== SPF_UMAX
)
427 return ((LHSA
== RHSA
&& LHSB
== RHSB
) ||
428 (LHSA
== RHSB
&& LHSB
== RHSA
));
430 // select Cond, A, B <--> select not(Cond), B, A
431 if (CondL
== CondR
&& LHSA
== RHSA
&& LHSB
== RHSB
)
435 // If the true/false operands are swapped and the conditions are compares
436 // with inverted predicates, the selects are equal:
437 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
439 // This also handles patterns with a double-negation in the sense of not +
440 // inverse, because we looked through a 'not' in the matching function and
442 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
444 // This intentionally does NOT handle patterns with a double-negation in
445 // the sense of not + not, because doing so could result in values
447 // as equal that hash differently in the min/max cases like:
448 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
449 // ^ hashes as min ^ would not hash as min
450 // In the context of the EarlyCSE pass, however, such cases never reach
451 // this code, as we simplify the double-negation before hashing the second
452 // select (and so still succeed at CSEing them).
453 if (LHSA
== RHSB
&& LHSB
== RHSA
) {
454 CmpInst::Predicate PredL
, PredR
;
456 if (match(CondL
, m_Cmp(PredL
, m_Value(X
), m_Value(Y
))) &&
457 match(CondR
, m_Cmp(PredR
, m_Specific(X
), m_Specific(Y
))) &&
458 CmpInst::getInversePredicate(PredL
) == PredR
)
466 bool DenseMapInfo
<SimpleValue
>::isEqual(SimpleValue LHS
, SimpleValue RHS
) {
467 // These comparisons are nontrivial, so assert that equality implies
468 // hash equality (DenseMap demands this as an invariant).
469 bool Result
= isEqualImpl(LHS
, RHS
);
470 assert(!Result
|| (LHS
.isSentinel() && LHS
.Inst
== RHS
.Inst
) ||
471 getHashValueImpl(LHS
) == getHashValueImpl(RHS
));
475 //===----------------------------------------------------------------------===//
477 //===----------------------------------------------------------------------===//
481 /// Struct representing the available call values in the scoped hash
486 CallValue(Instruction
*I
) : Inst(I
) {
487 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
490 bool isSentinel() const {
491 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
492 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
495 static bool canHandle(Instruction
*Inst
) {
496 // Don't value number anything that returns void.
497 if (Inst
->getType()->isVoidTy())
500 CallInst
*CI
= dyn_cast
<CallInst
>(Inst
);
501 if (!CI
|| !CI
->onlyReadsMemory() ||
502 // FIXME: Currently the calls which may access the thread id may
503 // be considered as not accessing the memory. But this is
504 // problematic for coroutines, since coroutines may resume in a
505 // different thread. So we disable the optimization here for the
506 // correctness. However, it may block many other correct
507 // optimizations. Revert this one when we detect the memory
508 // accessing kind more precisely.
509 CI
->getFunction()->isPresplitCoroutine())
515 } // end anonymous namespace
519 template <> struct DenseMapInfo
<CallValue
> {
520 static inline CallValue
getEmptyKey() {
521 return DenseMapInfo
<Instruction
*>::getEmptyKey();
524 static inline CallValue
getTombstoneKey() {
525 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
528 static unsigned getHashValue(CallValue Val
);
529 static bool isEqual(CallValue LHS
, CallValue RHS
);
532 } // end namespace llvm
534 unsigned DenseMapInfo
<CallValue
>::getHashValue(CallValue Val
) {
535 Instruction
*Inst
= Val
.Inst
;
537 // Hash all of the operands as pointers and mix in the opcode.
538 return hashCallInst(cast
<CallInst
>(Inst
));
541 bool DenseMapInfo
<CallValue
>::isEqual(CallValue LHS
, CallValue RHS
) {
542 if (LHS
.isSentinel() || RHS
.isSentinel())
543 return LHS
.Inst
== RHS
.Inst
;
545 CallInst
*LHSI
= cast
<CallInst
>(LHS
.Inst
);
546 CallInst
*RHSI
= cast
<CallInst
>(RHS
.Inst
);
548 // Convergent calls implicitly depend on the set of threads that is
549 // currently executing, so conservatively return false if they are in
550 // different basic blocks.
551 if (LHSI
->isConvergent() && LHSI
->getParent() != RHSI
->getParent())
554 return LHSI
->isIdenticalTo(RHSI
);
557 //===----------------------------------------------------------------------===//
559 //===----------------------------------------------------------------------===//
565 std::optional
<int64_t> ConstantOffset
;
567 GEPValue(Instruction
*I
) : Inst(I
) {
568 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
571 GEPValue(Instruction
*I
, std::optional
<int64_t> ConstantOffset
)
572 : Inst(I
), ConstantOffset(ConstantOffset
) {
573 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
576 bool isSentinel() const {
577 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
578 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
581 static bool canHandle(Instruction
*Inst
) {
582 return isa
<GetElementPtrInst
>(Inst
);
590 template <> struct DenseMapInfo
<GEPValue
> {
591 static inline GEPValue
getEmptyKey() {
592 return DenseMapInfo
<Instruction
*>::getEmptyKey();
595 static inline GEPValue
getTombstoneKey() {
596 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
599 static unsigned getHashValue(const GEPValue
&Val
);
600 static bool isEqual(const GEPValue
&LHS
, const GEPValue
&RHS
);
603 } // end namespace llvm
605 unsigned DenseMapInfo
<GEPValue
>::getHashValue(const GEPValue
&Val
) {
606 auto *GEP
= cast
<GetElementPtrInst
>(Val
.Inst
);
607 if (Val
.ConstantOffset
.has_value())
608 return hash_combine(GEP
->getOpcode(), GEP
->getPointerOperand(),
609 Val
.ConstantOffset
.value());
612 hash_combine_range(GEP
->value_op_begin(), GEP
->value_op_end()));
615 bool DenseMapInfo
<GEPValue
>::isEqual(const GEPValue
&LHS
, const GEPValue
&RHS
) {
616 if (LHS
.isSentinel() || RHS
.isSentinel())
617 return LHS
.Inst
== RHS
.Inst
;
618 auto *LGEP
= cast
<GetElementPtrInst
>(LHS
.Inst
);
619 auto *RGEP
= cast
<GetElementPtrInst
>(RHS
.Inst
);
620 if (LGEP
->getPointerOperand() != RGEP
->getPointerOperand())
622 if (LHS
.ConstantOffset
.has_value() && RHS
.ConstantOffset
.has_value())
623 return LHS
.ConstantOffset
.value() == RHS
.ConstantOffset
.value();
624 return LGEP
->isIdenticalToWhenDefined(RGEP
);
627 //===----------------------------------------------------------------------===//
628 // EarlyCSE implementation
629 //===----------------------------------------------------------------------===//
633 /// A simple and fast domtree-based CSE pass.
635 /// This pass does a simple depth-first walk over the dominator tree,
636 /// eliminating trivially redundant instructions and using instsimplify to
637 /// canonicalize things as it goes. It is intended to be fast and catch obvious
638 /// cases so that instcombine and other passes are more effective. It is
639 /// expected that a later pass of GVN will catch the interesting/hard cases.
642 const TargetLibraryInfo
&TLI
;
643 const TargetTransformInfo
&TTI
;
646 const SimplifyQuery SQ
;
648 std::unique_ptr
<MemorySSAUpdater
> MSSAUpdater
;
651 RecyclingAllocator
<BumpPtrAllocator
,
652 ScopedHashTableVal
<SimpleValue
, Value
*>>;
654 ScopedHashTable
<SimpleValue
, Value
*, DenseMapInfo
<SimpleValue
>,
657 /// A scoped hash table of the current values of all of our simple
658 /// scalar expressions.
660 /// As we walk down the domtree, we look to see if instructions are in this:
661 /// if so, we replace them with what we find, otherwise we insert them so
662 /// that dominated values can succeed in their lookup.
663 ScopedHTType AvailableValues
;
665 /// A scoped hash table of the current values of previously encountered
666 /// memory locations.
668 /// This allows us to get efficient access to dominating loads or stores when
669 /// we have a fully redundant load. In addition to the most recent load, we
670 /// keep track of a generation count of the read, which is compared against
671 /// the current generation count. The current generation count is incremented
672 /// after every possibly writing memory operation, which ensures that we only
673 /// CSE loads with other loads that have no intervening store. Ordering
674 /// events (such as fences or atomic instructions) increment the generation
675 /// count as well; essentially, we model these as writes to all possible
676 /// locations. Note that atomic and/or volatile loads and stores can be
677 /// present the table; it is the responsibility of the consumer to inspect
678 /// the atomicity/volatility if needed.
680 Instruction
*DefInst
= nullptr;
681 unsigned Generation
= 0;
683 bool IsAtomic
= false;
686 LoadValue() = default;
687 LoadValue(Instruction
*Inst
, unsigned Generation
, unsigned MatchingId
,
688 bool IsAtomic
, bool IsLoad
)
689 : DefInst(Inst
), Generation(Generation
), MatchingId(MatchingId
),
690 IsAtomic(IsAtomic
), IsLoad(IsLoad
) {}
693 using LoadMapAllocator
=
694 RecyclingAllocator
<BumpPtrAllocator
,
695 ScopedHashTableVal
<Value
*, LoadValue
>>;
697 ScopedHashTable
<Value
*, LoadValue
, DenseMapInfo
<Value
*>,
700 LoadHTType AvailableLoads
;
702 // A scoped hash table mapping memory locations (represented as typed
703 // addresses) to generation numbers at which that memory location became
704 // (henceforth indefinitely) invariant.
705 using InvariantMapAllocator
=
706 RecyclingAllocator
<BumpPtrAllocator
,
707 ScopedHashTableVal
<MemoryLocation
, unsigned>>;
708 using InvariantHTType
=
709 ScopedHashTable
<MemoryLocation
, unsigned, DenseMapInfo
<MemoryLocation
>,
710 InvariantMapAllocator
>;
711 InvariantHTType AvailableInvariants
;
713 /// A scoped hash table of the current values of read-only call
716 /// It uses the same generation count as loads.
718 ScopedHashTable
<CallValue
, std::pair
<Instruction
*, unsigned>>;
719 CallHTType AvailableCalls
;
721 using GEPMapAllocatorTy
=
722 RecyclingAllocator
<BumpPtrAllocator
,
723 ScopedHashTableVal
<GEPValue
, Value
*>>;
724 using GEPHTType
= ScopedHashTable
<GEPValue
, Value
*, DenseMapInfo
<GEPValue
>,
726 GEPHTType AvailableGEPs
;
728 /// This is the current generation of the memory value.
729 unsigned CurrentGeneration
= 0;
731 /// Set up the EarlyCSE runner for a particular function.
732 EarlyCSE(const DataLayout
&DL
, const TargetLibraryInfo
&TLI
,
733 const TargetTransformInfo
&TTI
, DominatorTree
&DT
,
734 AssumptionCache
&AC
, MemorySSA
*MSSA
)
735 : TLI(TLI
), TTI(TTI
), DT(DT
), AC(AC
), SQ(DL
, &TLI
, &DT
, &AC
), MSSA(MSSA
),
736 MSSAUpdater(std::make_unique
<MemorySSAUpdater
>(MSSA
)) {}
741 unsigned ClobberCounter
= 0;
742 // Almost a POD, but needs to call the constructors for the scoped hash
743 // tables so that a new scope gets pushed on. These are RAII so that the
744 // scope gets popped when the NodeScope is destroyed.
747 NodeScope(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
748 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
,
749 GEPHTType
&AvailableGEPs
)
750 : Scope(AvailableValues
), LoadScope(AvailableLoads
),
751 InvariantScope(AvailableInvariants
), CallScope(AvailableCalls
),
752 GEPScope(AvailableGEPs
) {}
753 NodeScope(const NodeScope
&) = delete;
754 NodeScope
&operator=(const NodeScope
&) = delete;
757 ScopedHTType::ScopeTy Scope
;
758 LoadHTType::ScopeTy LoadScope
;
759 InvariantHTType::ScopeTy InvariantScope
;
760 CallHTType::ScopeTy CallScope
;
761 GEPHTType::ScopeTy GEPScope
;
764 // Contains all the needed information to create a stack for doing a depth
765 // first traversal of the tree. This includes scopes for values, loads, and
766 // calls as well as the generation. There is a child iterator so that the
767 // children do not need to be store separately.
770 StackNode(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
771 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
,
772 GEPHTType
&AvailableGEPs
, unsigned cg
, DomTreeNode
*n
,
773 DomTreeNode::const_iterator child
,
774 DomTreeNode::const_iterator end
)
775 : CurrentGeneration(cg
), ChildGeneration(cg
), Node(n
), ChildIter(child
),
777 Scopes(AvailableValues
, AvailableLoads
, AvailableInvariants
,
778 AvailableCalls
, AvailableGEPs
) {}
779 StackNode(const StackNode
&) = delete;
780 StackNode
&operator=(const StackNode
&) = delete;
783 unsigned currentGeneration() const { return CurrentGeneration
; }
784 unsigned childGeneration() const { return ChildGeneration
; }
785 void childGeneration(unsigned generation
) { ChildGeneration
= generation
; }
786 DomTreeNode
*node() { return Node
; }
787 DomTreeNode::const_iterator
childIter() const { return ChildIter
; }
789 DomTreeNode
*nextChild() {
790 DomTreeNode
*child
= *ChildIter
;
795 DomTreeNode::const_iterator
end() const { return EndIter
; }
796 bool isProcessed() const { return Processed
; }
797 void process() { Processed
= true; }
800 unsigned CurrentGeneration
;
801 unsigned ChildGeneration
;
803 DomTreeNode::const_iterator ChildIter
;
804 DomTreeNode::const_iterator EndIter
;
806 bool Processed
= false;
809 /// Wrapper class to handle memory instructions, including loads,
810 /// stores and intrinsic loads and stores defined by the target.
811 class ParseMemoryInst
{
813 ParseMemoryInst(Instruction
*Inst
, const TargetTransformInfo
&TTI
)
815 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(Inst
)) {
816 IntrID
= II
->getIntrinsicID();
817 if (TTI
.getTgtMemIntrinsic(II
, Info
))
819 if (isHandledNonTargetIntrinsic(IntrID
)) {
821 case Intrinsic::masked_load
:
822 Info
.PtrVal
= Inst
->getOperand(0);
823 Info
.MatchingId
= Intrinsic::masked_load
;
825 Info
.WriteMem
= false;
826 Info
.IsVolatile
= false;
828 case Intrinsic::masked_store
:
829 Info
.PtrVal
= Inst
->getOperand(1);
830 // Use the ID of masked load as the "matching id". This will
831 // prevent matching non-masked loads/stores with masked ones
832 // (which could be done), but at the moment, the code here
833 // does not support matching intrinsics with non-intrinsics,
834 // so keep the MatchingIds specific to masked instructions
836 Info
.MatchingId
= Intrinsic::masked_load
;
837 Info
.ReadMem
= false;
838 Info
.WriteMem
= true;
839 Info
.IsVolatile
= false;
846 Instruction
*get() { return Inst
; }
847 const Instruction
*get() const { return Inst
; }
849 bool isLoad() const {
852 return isa
<LoadInst
>(Inst
);
855 bool isStore() const {
857 return Info
.WriteMem
;
858 return isa
<StoreInst
>(Inst
);
861 bool isAtomic() const {
863 return Info
.Ordering
!= AtomicOrdering::NotAtomic
;
864 return Inst
->isAtomic();
867 bool isUnordered() const {
869 return Info
.isUnordered();
871 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
872 return LI
->isUnordered();
873 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
874 return SI
->isUnordered();
876 // Conservative answer
877 return !Inst
->isAtomic();
880 bool isVolatile() const {
882 return Info
.IsVolatile
;
884 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
885 return LI
->isVolatile();
886 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
887 return SI
->isVolatile();
889 // Conservative answer
893 bool isInvariantLoad() const {
894 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
895 return LI
->hasMetadata(LLVMContext::MD_invariant_load
);
899 bool isValid() const { return getPointerOperand() != nullptr; }
901 // For regular (non-intrinsic) loads/stores, this is set to -1. For
902 // intrinsic loads/stores, the id is retrieved from the corresponding
903 // field in the MemIntrinsicInfo structure. That field contains
904 // non-negative values only.
905 int getMatchingId() const {
907 return Info
.MatchingId
;
911 Value
*getPointerOperand() const {
914 return getLoadStorePointerOperand(Inst
);
917 Type
*getValueType() const {
918 // TODO: handle target-specific intrinsics.
919 return Inst
->getAccessType();
922 bool mayReadFromMemory() const {
925 return Inst
->mayReadFromMemory();
928 bool mayWriteToMemory() const {
930 return Info
.WriteMem
;
931 return Inst
->mayWriteToMemory();
935 Intrinsic::ID IntrID
= 0;
936 MemIntrinsicInfo Info
;
940 // This function is to prevent accidentally passing a non-target
941 // intrinsic ID to TargetTransformInfo.
942 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID
) {
944 case Intrinsic::masked_load
:
945 case Intrinsic::masked_store
:
950 static bool isHandledNonTargetIntrinsic(const Value
*V
) {
951 if (auto *II
= dyn_cast
<IntrinsicInst
>(V
))
952 return isHandledNonTargetIntrinsic(II
->getIntrinsicID());
956 bool processNode(DomTreeNode
*Node
);
958 bool handleBranchCondition(Instruction
*CondInst
, const BranchInst
*BI
,
959 const BasicBlock
*BB
, const BasicBlock
*Pred
);
961 Value
*getMatchingValue(LoadValue
&InVal
, ParseMemoryInst
&MemInst
,
962 unsigned CurrentGeneration
);
964 bool overridingStores(const ParseMemoryInst
&Earlier
,
965 const ParseMemoryInst
&Later
);
967 Value
*getOrCreateResult(Value
*Inst
, Type
*ExpectedType
) const {
968 // TODO: We could insert relevant casts on type mismatch here.
969 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
970 return LI
->getType() == ExpectedType
? LI
: nullptr;
971 if (auto *SI
= dyn_cast
<StoreInst
>(Inst
)) {
972 Value
*V
= SI
->getValueOperand();
973 return V
->getType() == ExpectedType
? V
: nullptr;
975 assert(isa
<IntrinsicInst
>(Inst
) && "Instruction not supported");
976 auto *II
= cast
<IntrinsicInst
>(Inst
);
977 if (isHandledNonTargetIntrinsic(II
->getIntrinsicID()))
978 return getOrCreateResultNonTargetMemIntrinsic(II
, ExpectedType
);
979 return TTI
.getOrCreateResultFromMemIntrinsic(II
, ExpectedType
);
982 Value
*getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst
*II
,
983 Type
*ExpectedType
) const {
984 // TODO: We could insert relevant casts on type mismatch here.
985 switch (II
->getIntrinsicID()) {
986 case Intrinsic::masked_load
:
987 return II
->getType() == ExpectedType
? II
: nullptr;
988 case Intrinsic::masked_store
: {
989 Value
*V
= II
->getOperand(0);
990 return V
->getType() == ExpectedType
? V
: nullptr;
996 /// Return true if the instruction is known to only operate on memory
997 /// provably invariant in the given "generation".
998 bool isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
);
1000 bool isSameMemGeneration(unsigned EarlierGeneration
, unsigned LaterGeneration
,
1001 Instruction
*EarlierInst
, Instruction
*LaterInst
);
1003 bool isNonTargetIntrinsicMatch(const IntrinsicInst
*Earlier
,
1004 const IntrinsicInst
*Later
) {
1005 auto IsSubmask
= [](const Value
*Mask0
, const Value
*Mask1
) {
1006 // Is Mask0 a submask of Mask1?
1009 if (isa
<UndefValue
>(Mask0
) || isa
<UndefValue
>(Mask1
))
1011 auto *Vec0
= dyn_cast
<ConstantVector
>(Mask0
);
1012 auto *Vec1
= dyn_cast
<ConstantVector
>(Mask1
);
1015 if (Vec0
->getType() != Vec1
->getType())
1017 for (int i
= 0, e
= Vec0
->getNumOperands(); i
!= e
; ++i
) {
1018 Constant
*Elem0
= Vec0
->getOperand(i
);
1019 Constant
*Elem1
= Vec1
->getOperand(i
);
1020 auto *Int0
= dyn_cast
<ConstantInt
>(Elem0
);
1021 if (Int0
&& Int0
->isZero())
1023 auto *Int1
= dyn_cast
<ConstantInt
>(Elem1
);
1024 if (Int1
&& !Int1
->isZero())
1026 if (isa
<UndefValue
>(Elem0
) || isa
<UndefValue
>(Elem1
))
1034 auto PtrOp
= [](const IntrinsicInst
*II
) {
1035 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
1036 return II
->getOperand(0);
1037 if (II
->getIntrinsicID() == Intrinsic::masked_store
)
1038 return II
->getOperand(1);
1039 llvm_unreachable("Unexpected IntrinsicInst");
1041 auto MaskOp
= [](const IntrinsicInst
*II
) {
1042 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
1043 return II
->getOperand(2);
1044 if (II
->getIntrinsicID() == Intrinsic::masked_store
)
1045 return II
->getOperand(3);
1046 llvm_unreachable("Unexpected IntrinsicInst");
1048 auto ThruOp
= [](const IntrinsicInst
*II
) {
1049 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
1050 return II
->getOperand(3);
1051 llvm_unreachable("Unexpected IntrinsicInst");
1054 if (PtrOp(Earlier
) != PtrOp(Later
))
1057 Intrinsic::ID IDE
= Earlier
->getIntrinsicID();
1058 Intrinsic::ID IDL
= Later
->getIntrinsicID();
1059 // We could really use specific intrinsic classes for masked loads
1060 // and stores in IntrinsicInst.h.
1061 if (IDE
== Intrinsic::masked_load
&& IDL
== Intrinsic::masked_load
) {
1062 // Trying to replace later masked load with the earlier one.
1063 // Check that the pointers are the same, and
1064 // - masks and pass-throughs are the same, or
1065 // - replacee's pass-through is "undef" and replacer's mask is a
1066 // super-set of the replacee's mask.
1067 if (MaskOp(Earlier
) == MaskOp(Later
) && ThruOp(Earlier
) == ThruOp(Later
))
1069 if (!isa
<UndefValue
>(ThruOp(Later
)))
1071 return IsSubmask(MaskOp(Later
), MaskOp(Earlier
));
1073 if (IDE
== Intrinsic::masked_store
&& IDL
== Intrinsic::masked_load
) {
1074 // Trying to replace a load of a stored value with the store's value.
1075 // Check that the pointers are the same, and
1076 // - load's mask is a subset of store's mask, and
1077 // - load's pass-through is "undef".
1078 if (!IsSubmask(MaskOp(Later
), MaskOp(Earlier
)))
1080 return isa
<UndefValue
>(ThruOp(Later
));
1082 if (IDE
== Intrinsic::masked_load
&& IDL
== Intrinsic::masked_store
) {
1083 // Trying to remove a store of the loaded value.
1084 // Check that the pointers are the same, and
1085 // - store's mask is a subset of the load's mask.
1086 return IsSubmask(MaskOp(Later
), MaskOp(Earlier
));
1088 if (IDE
== Intrinsic::masked_store
&& IDL
== Intrinsic::masked_store
) {
1089 // Trying to remove a dead store (earlier).
1090 // Check that the pointers are the same,
1091 // - the to-be-removed store's mask is a subset of the other store's
1093 return IsSubmask(MaskOp(Earlier
), MaskOp(Later
));
1098 void removeMSSA(Instruction
&Inst
) {
1101 if (VerifyMemorySSA
)
1102 MSSA
->verifyMemorySSA();
1103 // Removing a store here can leave MemorySSA in an unoptimized state by
1104 // creating MemoryPhis that have identical arguments and by creating
1105 // MemoryUses whose defining access is not an actual clobber. The phi case
1106 // is handled by MemorySSA when passing OptimizePhis = true to
1107 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1108 // by MemorySSA's getClobberingMemoryAccess.
1109 MSSAUpdater
->removeMemoryAccess(&Inst
, true);
1113 } // end anonymous namespace
1115 /// Determine if the memory referenced by LaterInst is from the same heap
1116 /// version as EarlierInst.
1117 /// This is currently called in two scenarios:
1129 /// in both cases we want to verify that there are no possible writes to the
1130 /// memory referenced by p between the earlier and later instruction.
1131 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration
,
1132 unsigned LaterGeneration
,
1133 Instruction
*EarlierInst
,
1134 Instruction
*LaterInst
) {
1135 // Check the simple memory generation tracking first.
1136 if (EarlierGeneration
== LaterGeneration
)
1142 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1143 // read/write memory, then we can safely return true here.
1144 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1145 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1146 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1147 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1148 // with the default optimization pipeline.
1149 auto *EarlierMA
= MSSA
->getMemoryAccess(EarlierInst
);
1152 auto *LaterMA
= MSSA
->getMemoryAccess(LaterInst
);
1156 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1157 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1158 // EarlierInst and LaterInst and neither can any other write that potentially
1159 // clobbers LaterInst.
1160 MemoryAccess
*LaterDef
;
1161 if (ClobberCounter
< EarlyCSEMssaOptCap
) {
1162 LaterDef
= MSSA
->getWalker()->getClobberingMemoryAccess(LaterInst
);
1165 LaterDef
= LaterMA
->getDefiningAccess();
1167 return MSSA
->dominates(LaterDef
, EarlierMA
);
1170 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
) {
1171 // A location loaded from with an invariant_load is assumed to *never* change
1172 // within the visible scope of the compilation.
1173 if (auto *LI
= dyn_cast
<LoadInst
>(I
))
1174 if (LI
->hasMetadata(LLVMContext::MD_invariant_load
))
1177 auto MemLocOpt
= MemoryLocation::getOrNone(I
);
1179 // "target" intrinsic forms of loads aren't currently known to
1180 // MemoryLocation::get. TODO
1182 MemoryLocation MemLoc
= *MemLocOpt
;
1183 if (!AvailableInvariants
.count(MemLoc
))
1186 // Is the generation at which this became invariant older than the
1188 return AvailableInvariants
.lookup(MemLoc
) <= GenAt
;
1191 bool EarlyCSE::handleBranchCondition(Instruction
*CondInst
,
1192 const BranchInst
*BI
, const BasicBlock
*BB
,
1193 const BasicBlock
*Pred
) {
1194 assert(BI
->isConditional() && "Should be a conditional branch!");
1195 assert(BI
->getCondition() == CondInst
&& "Wrong condition?");
1196 assert(BI
->getSuccessor(0) == BB
|| BI
->getSuccessor(1) == BB
);
1197 auto *TorF
= (BI
->getSuccessor(0) == BB
)
1198 ? ConstantInt::getTrue(BB
->getContext())
1199 : ConstantInt::getFalse(BB
->getContext());
1200 auto MatchBinOp
= [](Instruction
*I
, unsigned Opcode
, Value
*&LHS
,
1202 if (Opcode
== Instruction::And
&&
1203 match(I
, m_LogicalAnd(m_Value(LHS
), m_Value(RHS
))))
1205 else if (Opcode
== Instruction::Or
&&
1206 match(I
, m_LogicalOr(m_Value(LHS
), m_Value(RHS
))))
1210 // If the condition is AND operation, we can propagate its operands into the
1211 // true branch. If it is OR operation, we can propagate them into the false
1213 unsigned PropagateOpcode
=
1214 (BI
->getSuccessor(0) == BB
) ? Instruction::And
: Instruction::Or
;
1216 bool MadeChanges
= false;
1217 SmallVector
<Instruction
*, 4> WorkList
;
1218 SmallPtrSet
<Instruction
*, 4> Visited
;
1219 WorkList
.push_back(CondInst
);
1220 while (!WorkList
.empty()) {
1221 Instruction
*Curr
= WorkList
.pop_back_val();
1223 AvailableValues
.insert(Curr
, TorF
);
1224 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1225 << Curr
->getName() << "' as " << *TorF
<< " in "
1226 << BB
->getName() << "\n");
1227 if (!DebugCounter::shouldExecute(CSECounter
)) {
1228 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1230 // Replace all dominated uses with the known value.
1231 if (unsigned Count
= replaceDominatedUsesWith(Curr
, TorF
, DT
,
1232 BasicBlockEdge(Pred
, BB
))) {
1239 if (MatchBinOp(Curr
, PropagateOpcode
, LHS
, RHS
))
1240 for (auto *Op
: { LHS
, RHS
})
1241 if (Instruction
*OPI
= dyn_cast
<Instruction
>(Op
))
1242 if (SimpleValue::canHandle(OPI
) && Visited
.insert(OPI
).second
)
1243 WorkList
.push_back(OPI
);
1249 Value
*EarlyCSE::getMatchingValue(LoadValue
&InVal
, ParseMemoryInst
&MemInst
,
1250 unsigned CurrentGeneration
) {
1251 if (InVal
.DefInst
== nullptr)
1253 if (InVal
.MatchingId
!= MemInst
.getMatchingId())
1255 // We don't yet handle removing loads with ordering of any kind.
1256 if (MemInst
.isVolatile() || !MemInst
.isUnordered())
1258 // We can't replace an atomic load with one which isn't also atomic.
1259 if (MemInst
.isLoad() && !InVal
.IsAtomic
&& MemInst
.isAtomic())
1261 // The value V returned from this function is used differently depending
1262 // on whether MemInst is a load or a store. If it's a load, we will replace
1263 // MemInst with V, if it's a store, we will check if V is the same as the
1265 bool MemInstMatching
= !MemInst
.isLoad();
1266 Instruction
*Matching
= MemInstMatching
? MemInst
.get() : InVal
.DefInst
;
1267 Instruction
*Other
= MemInstMatching
? InVal
.DefInst
: MemInst
.get();
1269 // For stores check the result values before checking memory generation
1270 // (otherwise isSameMemGeneration may crash).
1271 Value
*Result
= MemInst
.isStore()
1272 ? getOrCreateResult(Matching
, Other
->getType())
1274 if (MemInst
.isStore() && InVal
.DefInst
!= Result
)
1277 // Deal with non-target memory intrinsics.
1278 bool MatchingNTI
= isHandledNonTargetIntrinsic(Matching
);
1279 bool OtherNTI
= isHandledNonTargetIntrinsic(Other
);
1280 if (OtherNTI
!= MatchingNTI
)
1282 if (OtherNTI
&& MatchingNTI
) {
1283 if (!isNonTargetIntrinsicMatch(cast
<IntrinsicInst
>(InVal
.DefInst
),
1284 cast
<IntrinsicInst
>(MemInst
.get())))
1288 if (!isOperatingOnInvariantMemAt(MemInst
.get(), InVal
.Generation
) &&
1289 !isSameMemGeneration(InVal
.Generation
, CurrentGeneration
, InVal
.DefInst
,
1294 Result
= getOrCreateResult(Matching
, Other
->getType());
1298 static void combineIRFlags(Instruction
&From
, Value
*To
) {
1299 if (auto *I
= dyn_cast
<Instruction
>(To
)) {
1300 // If I being poison triggers UB, there is no need to drop those
1301 // flags. Otherwise, only retain flags present on both I and Inst.
1302 // TODO: Currently some fast-math flags are not treated as
1303 // poison-generating even though they should. Until this is fixed,
1304 // always retain flags present on both I and Inst for floating point
1306 if (isa
<FPMathOperator
>(I
) ||
1307 (I
->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I
)))
1308 I
->andIRFlags(&From
);
1312 bool EarlyCSE::overridingStores(const ParseMemoryInst
&Earlier
,
1313 const ParseMemoryInst
&Later
) {
1314 // Can we remove Earlier store because of Later store?
1316 assert(Earlier
.isUnordered() && !Earlier
.isVolatile() &&
1317 "Violated invariant");
1318 if (Earlier
.getPointerOperand() != Later
.getPointerOperand())
1320 if (!Earlier
.getValueType() || !Later
.getValueType() ||
1321 Earlier
.getValueType() != Later
.getValueType())
1323 if (Earlier
.getMatchingId() != Later
.getMatchingId())
1325 // At the moment, we don't remove ordered stores, but do remove
1326 // unordered atomic stores. There's no special requirement (for
1327 // unordered atomics) about removing atomic stores only in favor of
1328 // other atomic stores since we were going to execute the non-atomic
1329 // one anyway and the atomic one might never have become visible.
1330 if (!Earlier
.isUnordered() || !Later
.isUnordered())
1333 // Deal with non-target memory intrinsics.
1334 bool ENTI
= isHandledNonTargetIntrinsic(Earlier
.get());
1335 bool LNTI
= isHandledNonTargetIntrinsic(Later
.get());
1337 return isNonTargetIntrinsicMatch(cast
<IntrinsicInst
>(Earlier
.get()),
1338 cast
<IntrinsicInst
>(Later
.get()));
1340 // Because of the check above, at least one of them is false.
1341 // For now disallow matching intrinsics with non-intrinsics,
1342 // so assume that the stores match if neither is an intrinsic.
1343 return ENTI
== LNTI
;
1346 bool EarlyCSE::processNode(DomTreeNode
*Node
) {
1347 bool Changed
= false;
1348 BasicBlock
*BB
= Node
->getBlock();
1350 // If this block has a single predecessor, then the predecessor is the parent
1351 // of the domtree node and all of the live out memory values are still current
1352 // in this block. If this block has multiple predecessors, then they could
1353 // have invalidated the live-out memory values of our parent value. For now,
1354 // just be conservative and invalidate memory if this block has multiple
1356 if (!BB
->getSinglePredecessor())
1357 ++CurrentGeneration
;
1359 // If this node has a single predecessor which ends in a conditional branch,
1360 // we can infer the value of the branch condition given that we took this
1361 // path. We need the single predecessor to ensure there's not another path
1362 // which reaches this block where the condition might hold a different
1363 // value. Since we're adding this to the scoped hash table (like any other
1364 // def), it will have been popped if we encounter a future merge block.
1365 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
1366 auto *BI
= dyn_cast
<BranchInst
>(Pred
->getTerminator());
1367 if (BI
&& BI
->isConditional()) {
1368 auto *CondInst
= dyn_cast
<Instruction
>(BI
->getCondition());
1369 if (CondInst
&& SimpleValue::canHandle(CondInst
))
1370 Changed
|= handleBranchCondition(CondInst
, BI
, BB
, Pred
);
1374 /// LastStore - Keep track of the last non-volatile store that we saw... for
1375 /// as long as there in no instruction that reads memory. If we see a store
1376 /// to the same location, we delete the dead store. This zaps trivial dead
1377 /// stores which can occur in bitfield code among other things.
1378 Instruction
*LastStore
= nullptr;
1380 // See if any instructions in the block can be eliminated. If so, do it. If
1381 // not, add them to AvailableValues.
1382 for (Instruction
&Inst
: make_early_inc_range(*BB
)) {
1383 // Dead instructions should just be removed.
1384 if (isInstructionTriviallyDead(&Inst
, &TLI
)) {
1385 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst
<< '\n');
1386 if (!DebugCounter::shouldExecute(CSECounter
)) {
1387 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1391 salvageKnowledge(&Inst
, &AC
);
1392 salvageDebugInfo(Inst
);
1394 Inst
.eraseFromParent();
1400 // Skip assume intrinsics, they don't really have side effects (although
1401 // they're marked as such to ensure preservation of control dependencies),
1402 // and this pass will not bother with its removal. However, we should mark
1403 // its condition as true for all dominated blocks.
1404 if (auto *Assume
= dyn_cast
<AssumeInst
>(&Inst
)) {
1405 auto *CondI
= dyn_cast
<Instruction
>(Assume
->getArgOperand(0));
1406 if (CondI
&& SimpleValue::canHandle(CondI
)) {
1407 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1409 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
1411 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst
<< '\n');
1415 // Likewise, noalias intrinsics don't actually write.
1417 m_Intrinsic
<Intrinsic::experimental_noalias_scope_decl
>())) {
1418 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1423 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1424 if (match(&Inst
, m_Intrinsic
<Intrinsic::sideeffect
>())) {
1425 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst
<< '\n');
1429 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1430 if (match(&Inst
, m_Intrinsic
<Intrinsic::pseudoprobe
>())) {
1431 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst
<< '\n');
1435 // We can skip all invariant.start intrinsics since they only read memory,
1436 // and we can forward values across it. For invariant starts without
1437 // invariant ends, we can use the fact that the invariantness never ends to
1438 // start a scope in the current generaton which is true for all future
1439 // generations. Also, we dont need to consume the last store since the
1440 // semantics of invariant.start allow us to perform DSE of the last
1441 // store, if there was a store following invariant.start. Consider:
1444 // invariant.start(p)
1446 // We can DSE the store to 30, since the store 40 to invariant location p
1447 // causes undefined behaviour.
1448 if (match(&Inst
, m_Intrinsic
<Intrinsic::invariant_start
>())) {
1449 // If there are any uses, the scope might end.
1450 if (!Inst
.use_empty())
1452 MemoryLocation MemLoc
=
1453 MemoryLocation::getForArgument(&cast
<CallInst
>(Inst
), 1, TLI
);
1454 // Don't start a scope if we already have a better one pushed
1455 if (!AvailableInvariants
.count(MemLoc
))
1456 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
1460 if (isGuard(&Inst
)) {
1462 dyn_cast
<Instruction
>(cast
<CallInst
>(Inst
).getArgOperand(0))) {
1463 if (SimpleValue::canHandle(CondI
)) {
1464 // Do we already know the actual value of this condition?
1465 if (auto *KnownCond
= AvailableValues
.lookup(CondI
)) {
1466 // Is the condition known to be true?
1467 if (isa
<ConstantInt
>(KnownCond
) &&
1468 cast
<ConstantInt
>(KnownCond
)->isOne()) {
1470 << "EarlyCSE removing guard: " << Inst
<< '\n');
1471 salvageKnowledge(&Inst
, &AC
);
1473 Inst
.eraseFromParent();
1477 // Use the known value if it wasn't true.
1478 cast
<CallInst
>(Inst
).setArgOperand(0, KnownCond
);
1480 // The condition we're on guarding here is true for all dominated
1482 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
1486 // Guard intrinsics read all memory, but don't write any memory.
1487 // Accordingly, don't update the generation but consume the last store (to
1488 // avoid an incorrect DSE).
1489 LastStore
= nullptr;
1493 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1494 // its simpler value.
1495 if (Value
*V
= simplifyInstruction(&Inst
, SQ
)) {
1496 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst
<< " to: " << *V
1498 if (!DebugCounter::shouldExecute(CSECounter
)) {
1499 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1501 bool Killed
= false;
1502 if (!Inst
.use_empty()) {
1503 Inst
.replaceAllUsesWith(V
);
1506 if (isInstructionTriviallyDead(&Inst
, &TLI
)) {
1507 salvageKnowledge(&Inst
, &AC
);
1509 Inst
.eraseFromParent();
1520 // If this is a simple instruction that we can value number, process it.
1521 if (SimpleValue::canHandle(&Inst
)) {
1522 if ([[maybe_unused
]] auto *CI
= dyn_cast
<ConstrainedFPIntrinsic
>(&Inst
)) {
1523 assert(CI
->getExceptionBehavior() != fp::ebStrict
&&
1524 "Unexpected ebStrict from SimpleValue::canHandle()");
1525 assert((!CI
->getRoundingMode() ||
1526 CI
->getRoundingMode() != RoundingMode::Dynamic
) &&
1527 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1529 // See if the instruction has an available value. If so, use it.
1530 if (Value
*V
= AvailableValues
.lookup(&Inst
)) {
1531 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst
<< " to: " << *V
1533 if (!DebugCounter::shouldExecute(CSECounter
)) {
1534 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1537 combineIRFlags(Inst
, V
);
1538 Inst
.replaceAllUsesWith(V
);
1539 salvageKnowledge(&Inst
, &AC
);
1541 Inst
.eraseFromParent();
1547 // Otherwise, just remember that this value is available.
1548 AvailableValues
.insert(&Inst
, &Inst
);
1552 ParseMemoryInst
MemInst(&Inst
, TTI
);
1553 // If this is a non-volatile load, process it.
1554 if (MemInst
.isValid() && MemInst
.isLoad()) {
1555 // (conservatively) we can't peak past the ordering implied by this
1556 // operation, but we can add this load to our set of available values
1557 if (MemInst
.isVolatile() || !MemInst
.isUnordered()) {
1558 LastStore
= nullptr;
1559 ++CurrentGeneration
;
1562 if (MemInst
.isInvariantLoad()) {
1563 // If we pass an invariant load, we know that memory location is
1564 // indefinitely constant from the moment of first dereferenceability.
1565 // We conservatively treat the invariant_load as that moment. If we
1566 // pass a invariant load after already establishing a scope, don't
1567 // restart it since we want to preserve the earliest point seen.
1568 auto MemLoc
= MemoryLocation::get(&Inst
);
1569 if (!AvailableInvariants
.count(MemLoc
))
1570 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
1573 // If we have an available version of this load, and if it is the right
1574 // generation or the load is known to be from an invariant location,
1575 // replace this instruction.
1577 // If either the dominating load or the current load are invariant, then
1578 // we can assume the current load loads the same value as the dominating
1580 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1581 if (Value
*Op
= getMatchingValue(InVal
, MemInst
, CurrentGeneration
)) {
1582 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1583 << " to: " << *InVal
.DefInst
<< '\n');
1584 if (!DebugCounter::shouldExecute(CSECounter
)) {
1585 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1589 if (auto *I
= dyn_cast
<Instruction
>(Op
))
1590 combineMetadataForCSE(I
, &Inst
, false);
1591 if (!Inst
.use_empty())
1592 Inst
.replaceAllUsesWith(Op
);
1593 salvageKnowledge(&Inst
, &AC
);
1595 Inst
.eraseFromParent();
1601 // Otherwise, remember that we have this instruction.
1602 AvailableLoads
.insert(MemInst
.getPointerOperand(),
1603 LoadValue(&Inst
, CurrentGeneration
,
1604 MemInst
.getMatchingId(),
1607 LastStore
= nullptr;
1611 // If this instruction may read from memory or throw (and potentially read
1612 // from memory in the exception handler), forget LastStore. Load/store
1613 // intrinsics will indicate both a read and a write to memory. The target
1614 // may override this (e.g. so that a store intrinsic does not read from
1615 // memory, and thus will be treated the same as a regular store for
1616 // commoning purposes).
1617 if ((Inst
.mayReadFromMemory() || Inst
.mayThrow()) &&
1618 !(MemInst
.isValid() && !MemInst
.mayReadFromMemory()))
1619 LastStore
= nullptr;
1621 // If this is a read-only call, process it.
1622 if (CallValue::canHandle(&Inst
)) {
1623 // If we have an available version of this call, and if it is the right
1624 // generation, replace this instruction.
1625 std::pair
<Instruction
*, unsigned> InVal
= AvailableCalls
.lookup(&Inst
);
1626 if (InVal
.first
!= nullptr &&
1627 isSameMemGeneration(InVal
.second
, CurrentGeneration
, InVal
.first
,
1629 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1630 << " to: " << *InVal
.first
<< '\n');
1631 if (!DebugCounter::shouldExecute(CSECounter
)) {
1632 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1635 if (!Inst
.use_empty())
1636 Inst
.replaceAllUsesWith(InVal
.first
);
1637 salvageKnowledge(&Inst
, &AC
);
1639 Inst
.eraseFromParent();
1645 // Otherwise, remember that we have this instruction.
1646 AvailableCalls
.insert(&Inst
, std::make_pair(&Inst
, CurrentGeneration
));
1650 // Compare GEP instructions based on offset.
1651 if (GEPValue::canHandle(&Inst
)) {
1652 auto *GEP
= cast
<GetElementPtrInst
>(&Inst
);
1653 APInt Offset
= APInt(SQ
.DL
.getIndexTypeSizeInBits(GEP
->getType()), 0);
1654 GEPValue
GEPVal(GEP
, GEP
->accumulateConstantOffset(SQ
.DL
, Offset
)
1655 ? Offset
.trySExtValue()
1657 if (Value
*V
= AvailableGEPs
.lookup(GEPVal
)) {
1658 LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst
<< " to: " << *V
1660 combineIRFlags(Inst
, V
);
1661 Inst
.replaceAllUsesWith(V
);
1662 salvageKnowledge(&Inst
, &AC
);
1664 Inst
.eraseFromParent();
1670 // Otherwise, just remember that we have this GEP.
1671 AvailableGEPs
.insert(GEPVal
, &Inst
);
1675 // A release fence requires that all stores complete before it, but does
1676 // not prevent the reordering of following loads 'before' the fence. As a
1677 // result, we don't need to consider it as writing to memory and don't need
1678 // to advance the generation. We do need to prevent DSE across the fence,
1679 // but that's handled above.
1680 if (auto *FI
= dyn_cast
<FenceInst
>(&Inst
))
1681 if (FI
->getOrdering() == AtomicOrdering::Release
) {
1682 assert(Inst
.mayReadFromMemory() && "relied on to prevent DSE above");
1686 // write back DSE - If we write back the same value we just loaded from
1687 // the same location and haven't passed any intervening writes or ordering
1688 // operations, we can remove the write. The primary benefit is in allowing
1689 // the available load table to remain valid and value forward past where
1690 // the store originally was.
1691 if (MemInst
.isValid() && MemInst
.isStore()) {
1692 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1693 if (InVal
.DefInst
&&
1694 InVal
.DefInst
== getMatchingValue(InVal
, MemInst
, CurrentGeneration
)) {
1695 // It is okay to have a LastStore to a different pointer here if MemorySSA
1696 // tells us that the load and store are from the same memory generation.
1697 // In that case, LastStore should keep its present value since we're
1698 // removing the current store.
1699 assert((!LastStore
||
1700 ParseMemoryInst(LastStore
, TTI
).getPointerOperand() ==
1701 MemInst
.getPointerOperand() ||
1703 "can't have an intervening store if not using MemorySSA!");
1704 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst
<< '\n');
1705 if (!DebugCounter::shouldExecute(CSECounter
)) {
1706 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1709 salvageKnowledge(&Inst
, &AC
);
1711 Inst
.eraseFromParent();
1714 // We can avoid incrementing the generation count since we were able
1715 // to eliminate this store.
1720 // Okay, this isn't something we can CSE at all. Check to see if it is
1721 // something that could modify memory. If so, our available memory values
1722 // cannot be used so bump the generation count.
1723 if (Inst
.mayWriteToMemory()) {
1724 ++CurrentGeneration
;
1726 if (MemInst
.isValid() && MemInst
.isStore()) {
1727 // We do a trivial form of DSE if there are two stores to the same
1728 // location with no intervening loads. Delete the earlier store.
1730 if (overridingStores(ParseMemoryInst(LastStore
, TTI
), MemInst
)) {
1731 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1732 << " due to: " << Inst
<< '\n');
1733 if (!DebugCounter::shouldExecute(CSECounter
)) {
1734 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1736 salvageKnowledge(&Inst
, &AC
);
1737 removeMSSA(*LastStore
);
1738 LastStore
->eraseFromParent();
1741 LastStore
= nullptr;
1744 // fallthrough - we can exploit information about this store
1747 // Okay, we just invalidated anything we knew about loaded values. Try
1748 // to salvage *something* by remembering that the stored value is a live
1749 // version of the pointer. It is safe to forward from volatile stores
1750 // to non-volatile loads, so we don't have to check for volatility of
1752 AvailableLoads
.insert(MemInst
.getPointerOperand(),
1753 LoadValue(&Inst
, CurrentGeneration
,
1754 MemInst
.getMatchingId(),
1758 // Remember that this was the last unordered store we saw for DSE. We
1759 // don't yet handle DSE on ordered or volatile stores since we don't
1760 // have a good way to model the ordering requirement for following
1761 // passes once the store is removed. We could insert a fence, but
1762 // since fences are slightly stronger than stores in their ordering,
1763 // it's not clear this is a profitable transform. Another option would
1764 // be to merge the ordering with that of the post dominating store.
1765 if (MemInst
.isUnordered() && !MemInst
.isVolatile())
1768 LastStore
= nullptr;
1776 bool EarlyCSE::run() {
1777 // Note, deque is being used here because there is significant performance
1778 // gains over vector when the container becomes very large due to the
1779 // specific access patterns. For more information see the mailing list
1780 // discussion on this:
1781 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1782 std::deque
<StackNode
*> nodesToProcess
;
1784 bool Changed
= false;
1786 // Process the root node.
1787 nodesToProcess
.push_back(new StackNode(
1788 AvailableValues
, AvailableLoads
, AvailableInvariants
, AvailableCalls
,
1789 AvailableGEPs
, CurrentGeneration
, DT
.getRootNode(),
1790 DT
.getRootNode()->begin(), DT
.getRootNode()->end()));
1792 assert(!CurrentGeneration
&& "Create a new EarlyCSE instance to rerun it.");
1794 // Process the stack.
1795 while (!nodesToProcess
.empty()) {
1796 // Grab the first item off the stack. Set the current generation, remove
1797 // the node from the stack, and process it.
1798 StackNode
*NodeToProcess
= nodesToProcess
.back();
1800 // Initialize class members.
1801 CurrentGeneration
= NodeToProcess
->currentGeneration();
1803 // Check if the node needs to be processed.
1804 if (!NodeToProcess
->isProcessed()) {
1805 // Process the node.
1806 Changed
|= processNode(NodeToProcess
->node());
1807 NodeToProcess
->childGeneration(CurrentGeneration
);
1808 NodeToProcess
->process();
1809 } else if (NodeToProcess
->childIter() != NodeToProcess
->end()) {
1810 // Push the next child onto the stack.
1811 DomTreeNode
*child
= NodeToProcess
->nextChild();
1812 nodesToProcess
.push_back(new StackNode(
1813 AvailableValues
, AvailableLoads
, AvailableInvariants
, AvailableCalls
,
1814 AvailableGEPs
, NodeToProcess
->childGeneration(), child
,
1815 child
->begin(), child
->end()));
1817 // It has been processed, and there are no more children to process,
1818 // so delete it and pop it off the stack.
1819 delete NodeToProcess
;
1820 nodesToProcess
.pop_back();
1822 } // while (!nodes...)
1827 PreservedAnalyses
EarlyCSEPass::run(Function
&F
,
1828 FunctionAnalysisManager
&AM
) {
1829 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
1830 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
1831 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1832 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
1834 UseMemorySSA
? &AM
.getResult
<MemorySSAAnalysis
>(F
).getMSSA() : nullptr;
1836 EarlyCSE
CSE(F
.getParent()->getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1839 return PreservedAnalyses::all();
1841 PreservedAnalyses PA
;
1842 PA
.preserveSet
<CFGAnalyses
>();
1844 PA
.preserve
<MemorySSAAnalysis
>();
1848 void EarlyCSEPass::printPipeline(
1849 raw_ostream
&OS
, function_ref
<StringRef(StringRef
)> MapClassName2PassName
) {
1850 static_cast<PassInfoMixin
<EarlyCSEPass
> *>(this)->printPipeline(
1851 OS
, MapClassName2PassName
);
1860 /// A simple and fast domtree-based CSE pass.
1862 /// This pass does a simple depth-first walk over the dominator tree,
1863 /// eliminating trivially redundant instructions and using instsimplify to
1864 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1865 /// cases so that instcombine and other passes are more effective. It is
1866 /// expected that a later pass of GVN will catch the interesting/hard cases.
1867 template<bool UseMemorySSA
>
1868 class EarlyCSELegacyCommonPass
: public FunctionPass
{
1872 EarlyCSELegacyCommonPass() : FunctionPass(ID
) {
1874 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1876 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1879 bool runOnFunction(Function
&F
) override
{
1880 if (skipFunction(F
))
1883 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
1884 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
1885 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1886 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
1888 UseMemorySSA
? &getAnalysis
<MemorySSAWrapperPass
>().getMSSA() : nullptr;
1890 EarlyCSE
CSE(F
.getParent()->getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1895 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1896 AU
.addRequired
<AssumptionCacheTracker
>();
1897 AU
.addRequired
<DominatorTreeWrapperPass
>();
1898 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1899 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1901 AU
.addRequired
<AAResultsWrapperPass
>();
1902 AU
.addRequired
<MemorySSAWrapperPass
>();
1903 AU
.addPreserved
<MemorySSAWrapperPass
>();
1905 AU
.addPreserved
<GlobalsAAWrapperPass
>();
1906 AU
.addPreserved
<AAResultsWrapperPass
>();
1907 AU
.setPreservesCFG();
1911 } // end anonymous namespace
1913 using EarlyCSELegacyPass
= EarlyCSELegacyCommonPass
</*UseMemorySSA=*/false>;
1916 char EarlyCSELegacyPass::ID
= 0;
1918 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass
, "early-cse", "Early CSE", false,
1920 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1921 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1922 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1923 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1924 INITIALIZE_PASS_END(EarlyCSELegacyPass
, "early-cse", "Early CSE", false, false)
1926 using EarlyCSEMemSSALegacyPass
=
1927 EarlyCSELegacyCommonPass
</*UseMemorySSA=*/true>;
1930 char EarlyCSEMemSSALegacyPass::ID
= 0;
1932 FunctionPass
*llvm::createEarlyCSEPass(bool UseMemorySSA
) {
1934 return new EarlyCSEMemSSALegacyPass();
1936 return new EarlyCSELegacyPass();
1939 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1940 "Early CSE w/ MemorySSA", false, false)
1941 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1942 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1943 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
1944 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1945 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1946 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass
)
1947 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1948 "Early CSE w/ MemorySSA", false, false)