1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AssumptionCache.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/GuardUtils.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/MemorySSA.h"
26 #include "llvm/Analysis/MemorySSAUpdater.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/AtomicOrdering.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/DebugCounter.h"
50 #include "llvm/Support/RecyclingAllocator.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/Transforms/Scalar.h"
53 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
54 #include "llvm/Transforms/Utils/Local.h"
61 using namespace llvm::PatternMatch
;
63 #define DEBUG_TYPE "early-cse"
65 STATISTIC(NumSimplify
, "Number of instructions simplified or DCE'd");
66 STATISTIC(NumCSE
, "Number of instructions CSE'd");
67 STATISTIC(NumCSECVP
, "Number of compare instructions CVP'd");
68 STATISTIC(NumCSELoad
, "Number of load instructions CSE'd");
69 STATISTIC(NumCSECall
, "Number of call instructions CSE'd");
70 STATISTIC(NumCSEGEP
, "Number of GEP instructions CSE'd");
71 STATISTIC(NumDSE
, "Number of trivial dead stores removed");
73 DEBUG_COUNTER(CSECounter
, "early-cse",
74 "Controls which instructions are removed");
76 static cl::opt
<unsigned> EarlyCSEMssaOptCap(
77 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden
,
78 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
81 static cl::opt
<bool> EarlyCSEDebugHash(
82 "earlycse-debug-hash", cl::init(false), cl::Hidden
,
83 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
86 //===----------------------------------------------------------------------===//
88 //===----------------------------------------------------------------------===//
92 /// Struct representing the available values in the scoped hash table.
96 SimpleValue(Instruction
*I
) : Inst(I
) {
97 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
100 bool isSentinel() const {
101 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
102 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
105 static bool canHandle(Instruction
*Inst
) {
106 // This can only handle non-void readnone functions.
107 // Also handled are constrained intrinsic that look like the types
108 // of instruction handled below (UnaryOperator, etc.).
109 if (CallInst
*CI
= dyn_cast
<CallInst
>(Inst
)) {
110 if (Function
*F
= CI
->getCalledFunction()) {
111 switch ((Intrinsic::ID
)F
->getIntrinsicID()) {
112 case Intrinsic::experimental_constrained_fadd
:
113 case Intrinsic::experimental_constrained_fsub
:
114 case Intrinsic::experimental_constrained_fmul
:
115 case Intrinsic::experimental_constrained_fdiv
:
116 case Intrinsic::experimental_constrained_frem
:
117 case Intrinsic::experimental_constrained_fptosi
:
118 case Intrinsic::experimental_constrained_sitofp
:
119 case Intrinsic::experimental_constrained_fptoui
:
120 case Intrinsic::experimental_constrained_uitofp
:
121 case Intrinsic::experimental_constrained_fcmp
:
122 case Intrinsic::experimental_constrained_fcmps
: {
123 auto *CFP
= cast
<ConstrainedFPIntrinsic
>(CI
);
124 if (CFP
->getExceptionBehavior() &&
125 CFP
->getExceptionBehavior() == fp::ebStrict
)
127 // Since we CSE across function calls we must not allow
128 // the rounding mode to change.
129 if (CFP
->getRoundingMode() &&
130 CFP
->getRoundingMode() == RoundingMode::Dynamic
)
136 return CI
->doesNotAccessMemory() && !CI
->getType()->isVoidTy() &&
137 // FIXME: Currently the calls which may access the thread id may
138 // be considered as not accessing the memory. But this is
139 // problematic for coroutines, since coroutines may resume in a
140 // different thread. So we disable the optimization here for the
141 // correctness. However, it may block many other correct
142 // optimizations. Revert this one when we detect the memory
143 // accessing kind more precisely.
144 !CI
->getFunction()->isPresplitCoroutine();
146 return isa
<CastInst
>(Inst
) || isa
<UnaryOperator
>(Inst
) ||
147 isa
<BinaryOperator
>(Inst
) || isa
<CmpInst
>(Inst
) ||
148 isa
<SelectInst
>(Inst
) || isa
<ExtractElementInst
>(Inst
) ||
149 isa
<InsertElementInst
>(Inst
) || isa
<ShuffleVectorInst
>(Inst
) ||
150 isa
<ExtractValueInst
>(Inst
) || isa
<InsertValueInst
>(Inst
) ||
151 isa
<FreezeInst
>(Inst
);
155 } // end anonymous namespace
159 template <> struct DenseMapInfo
<SimpleValue
> {
160 static inline SimpleValue
getEmptyKey() {
161 return DenseMapInfo
<Instruction
*>::getEmptyKey();
164 static inline SimpleValue
getTombstoneKey() {
165 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
168 static unsigned getHashValue(SimpleValue Val
);
169 static bool isEqual(SimpleValue LHS
, SimpleValue RHS
);
172 } // end namespace llvm
174 /// Match a 'select' including an optional 'not's of the condition.
175 static bool matchSelectWithOptionalNotCond(Value
*V
, Value
*&Cond
, Value
*&A
,
177 SelectPatternFlavor
&Flavor
) {
178 // Return false if V is not even a select.
179 if (!match(V
, m_Select(m_Value(Cond
), m_Value(A
), m_Value(B
))))
182 // Look through a 'not' of the condition operand by swapping A/B.
184 if (match(Cond
, m_Not(m_Value(CondNot
)))) {
189 // Match canonical forms of min/max. We are not using ValueTracking's
190 // more powerful matchSelectPattern() because it may rely on instruction flags
191 // such as "nsw". That would be incompatible with the current hashing
192 // mechanism that may remove flags to increase the likelihood of CSE.
194 Flavor
= SPF_UNKNOWN
;
195 CmpInst::Predicate Pred
;
197 if (!match(Cond
, m_ICmp(Pred
, m_Specific(A
), m_Specific(B
)))) {
198 // Check for commuted variants of min/max by swapping predicate.
199 // If we do not match the standard or commuted patterns, this is not a
200 // recognized form of min/max, but it is still a select, so return true.
201 if (!match(Cond
, m_ICmp(Pred
, m_Specific(B
), m_Specific(A
))))
203 Pred
= ICmpInst::getSwappedPredicate(Pred
);
207 case CmpInst::ICMP_UGT
: Flavor
= SPF_UMAX
; break;
208 case CmpInst::ICMP_ULT
: Flavor
= SPF_UMIN
; break;
209 case CmpInst::ICMP_SGT
: Flavor
= SPF_SMAX
; break;
210 case CmpInst::ICMP_SLT
: Flavor
= SPF_SMIN
; break;
211 // Non-strict inequalities.
212 case CmpInst::ICMP_ULE
: Flavor
= SPF_UMIN
; break;
213 case CmpInst::ICMP_UGE
: Flavor
= SPF_UMAX
; break;
214 case CmpInst::ICMP_SLE
: Flavor
= SPF_SMIN
; break;
215 case CmpInst::ICMP_SGE
: Flavor
= SPF_SMAX
; break;
222 static unsigned hashCallInst(CallInst
*CI
) {
223 // Don't CSE convergent calls in different basic blocks, because they
224 // implicitly depend on the set of threads that is currently executing.
225 if (CI
->isConvergent()) {
227 CI
->getOpcode(), CI
->getParent(),
228 hash_combine_range(CI
->value_op_begin(), CI
->value_op_end()));
232 hash_combine_range(CI
->value_op_begin(), CI
->value_op_end()));
235 static unsigned getHashValueImpl(SimpleValue Val
) {
236 Instruction
*Inst
= Val
.Inst
;
237 // Hash in all of the operands as pointers.
238 if (BinaryOperator
*BinOp
= dyn_cast
<BinaryOperator
>(Inst
)) {
239 Value
*LHS
= BinOp
->getOperand(0);
240 Value
*RHS
= BinOp
->getOperand(1);
241 if (BinOp
->isCommutative() && BinOp
->getOperand(0) > BinOp
->getOperand(1))
244 return hash_combine(BinOp
->getOpcode(), LHS
, RHS
);
247 if (CmpInst
*CI
= dyn_cast
<CmpInst
>(Inst
)) {
248 // Compares can be commuted by swapping the comparands and
249 // updating the predicate. Choose the form that has the
250 // comparands in sorted order, or in the case of a tie, the
251 // one with the lower predicate.
252 Value
*LHS
= CI
->getOperand(0);
253 Value
*RHS
= CI
->getOperand(1);
254 CmpInst::Predicate Pred
= CI
->getPredicate();
255 CmpInst::Predicate SwappedPred
= CI
->getSwappedPredicate();
256 if (std::tie(LHS
, Pred
) > std::tie(RHS
, SwappedPred
)) {
260 return hash_combine(Inst
->getOpcode(), Pred
, LHS
, RHS
);
263 // Hash general selects to allow matching commuted true/false operands.
264 SelectPatternFlavor SPF
;
266 if (matchSelectWithOptionalNotCond(Inst
, Cond
, A
, B
, SPF
)) {
267 // Hash min/max (cmp + select) to allow for commuted operands.
268 // Min/max may also have non-canonical compare predicate (eg, the compare for
269 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
271 // TODO: We should also detect FP min/max.
272 if (SPF
== SPF_SMIN
|| SPF
== SPF_SMAX
||
273 SPF
== SPF_UMIN
|| SPF
== SPF_UMAX
) {
276 return hash_combine(Inst
->getOpcode(), SPF
, A
, B
);
279 // Hash general selects to allow matching commuted true/false operands.
281 // If we do not have a compare as the condition, just hash in the condition.
282 CmpInst::Predicate Pred
;
284 if (!match(Cond
, m_Cmp(Pred
, m_Value(X
), m_Value(Y
))))
285 return hash_combine(Inst
->getOpcode(), Cond
, A
, B
);
287 // Similar to cmp normalization (above) - canonicalize the predicate value:
288 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
289 if (CmpInst::getInversePredicate(Pred
) < Pred
) {
290 Pred
= CmpInst::getInversePredicate(Pred
);
293 return hash_combine(Inst
->getOpcode(), Pred
, X
, Y
, A
, B
);
296 if (CastInst
*CI
= dyn_cast
<CastInst
>(Inst
))
297 return hash_combine(CI
->getOpcode(), CI
->getType(), CI
->getOperand(0));
299 if (FreezeInst
*FI
= dyn_cast
<FreezeInst
>(Inst
))
300 return hash_combine(FI
->getOpcode(), FI
->getOperand(0));
302 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(Inst
))
303 return hash_combine(EVI
->getOpcode(), EVI
->getOperand(0),
304 hash_combine_range(EVI
->idx_begin(), EVI
->idx_end()));
306 if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(Inst
))
307 return hash_combine(IVI
->getOpcode(), IVI
->getOperand(0),
309 hash_combine_range(IVI
->idx_begin(), IVI
->idx_end()));
311 assert((isa
<CallInst
>(Inst
) || isa
<ExtractElementInst
>(Inst
) ||
312 isa
<InsertElementInst
>(Inst
) || isa
<ShuffleVectorInst
>(Inst
) ||
313 isa
<UnaryOperator
>(Inst
) || isa
<FreezeInst
>(Inst
)) &&
314 "Invalid/unknown instruction");
316 // Handle intrinsics with commutative operands.
317 auto *II
= dyn_cast
<IntrinsicInst
>(Inst
);
318 if (II
&& II
->isCommutative() && II
->arg_size() >= 2) {
319 Value
*LHS
= II
->getArgOperand(0), *RHS
= II
->getArgOperand(1);
323 II
->getOpcode(), LHS
, RHS
,
324 hash_combine_range(II
->value_op_begin() + 2, II
->value_op_end()));
327 // gc.relocate is 'special' call: its second and third operands are
328 // not real values, but indices into statepoint's argument list.
329 // Get values they point to.
330 if (const GCRelocateInst
*GCR
= dyn_cast
<GCRelocateInst
>(Inst
))
331 return hash_combine(GCR
->getOpcode(), GCR
->getOperand(0),
332 GCR
->getBasePtr(), GCR
->getDerivedPtr());
334 // Don't CSE convergent calls in different basic blocks, because they
335 // implicitly depend on the set of threads that is currently executing.
336 if (CallInst
*CI
= dyn_cast
<CallInst
>(Inst
))
337 return hashCallInst(CI
);
339 // Mix in the opcode.
342 hash_combine_range(Inst
->value_op_begin(), Inst
->value_op_end()));
345 unsigned DenseMapInfo
<SimpleValue
>::getHashValue(SimpleValue Val
) {
347 // If -earlycse-debug-hash was specified, return a constant -- this
348 // will force all hashing to collide, so we'll exhaustively search
349 // the table for a match, and the assertion in isEqual will fire if
350 // there's a bug causing equal keys to hash differently.
351 if (EarlyCSEDebugHash
)
354 return getHashValueImpl(Val
);
357 static bool isEqualImpl(SimpleValue LHS
, SimpleValue RHS
) {
358 Instruction
*LHSI
= LHS
.Inst
, *RHSI
= RHS
.Inst
;
360 if (LHS
.isSentinel() || RHS
.isSentinel())
363 if (LHSI
->getOpcode() != RHSI
->getOpcode())
365 if (LHSI
->isIdenticalToWhenDefined(RHSI
, /*IntersectAttrs=*/true)) {
366 // Convergent calls implicitly depend on the set of threads that is
367 // currently executing, so conservatively return false if they are in
368 // different basic blocks.
369 if (CallInst
*CI
= dyn_cast
<CallInst
>(LHSI
);
370 CI
&& CI
->isConvergent() && LHSI
->getParent() != RHSI
->getParent())
376 // If we're not strictly identical, we still might be a commutable instruction
377 if (BinaryOperator
*LHSBinOp
= dyn_cast
<BinaryOperator
>(LHSI
)) {
378 if (!LHSBinOp
->isCommutative())
381 assert(isa
<BinaryOperator
>(RHSI
) &&
382 "same opcode, but different instruction type?");
383 BinaryOperator
*RHSBinOp
= cast
<BinaryOperator
>(RHSI
);
386 return LHSBinOp
->getOperand(0) == RHSBinOp
->getOperand(1) &&
387 LHSBinOp
->getOperand(1) == RHSBinOp
->getOperand(0);
389 if (CmpInst
*LHSCmp
= dyn_cast
<CmpInst
>(LHSI
)) {
390 assert(isa
<CmpInst
>(RHSI
) &&
391 "same opcode, but different instruction type?");
392 CmpInst
*RHSCmp
= cast
<CmpInst
>(RHSI
);
394 return LHSCmp
->getOperand(0) == RHSCmp
->getOperand(1) &&
395 LHSCmp
->getOperand(1) == RHSCmp
->getOperand(0) &&
396 LHSCmp
->getSwappedPredicate() == RHSCmp
->getPredicate();
399 auto *LII
= dyn_cast
<IntrinsicInst
>(LHSI
);
400 auto *RII
= dyn_cast
<IntrinsicInst
>(RHSI
);
401 if (LII
&& RII
&& LII
->getIntrinsicID() == RII
->getIntrinsicID() &&
402 LII
->isCommutative() && LII
->arg_size() >= 2) {
403 return LII
->getArgOperand(0) == RII
->getArgOperand(1) &&
404 LII
->getArgOperand(1) == RII
->getArgOperand(0) &&
405 std::equal(LII
->arg_begin() + 2, LII
->arg_end(),
406 RII
->arg_begin() + 2, RII
->arg_end());
409 // See comment above in `getHashValue()`.
410 if (const GCRelocateInst
*GCR1
= dyn_cast
<GCRelocateInst
>(LHSI
))
411 if (const GCRelocateInst
*GCR2
= dyn_cast
<GCRelocateInst
>(RHSI
))
412 return GCR1
->getOperand(0) == GCR2
->getOperand(0) &&
413 GCR1
->getBasePtr() == GCR2
->getBasePtr() &&
414 GCR1
->getDerivedPtr() == GCR2
->getDerivedPtr();
416 // Min/max can occur with commuted operands, non-canonical predicates,
417 // and/or non-canonical operands.
418 // Selects can be non-trivially equivalent via inverted conditions and swaps.
419 SelectPatternFlavor LSPF
, RSPF
;
420 Value
*CondL
, *CondR
, *LHSA
, *RHSA
, *LHSB
, *RHSB
;
421 if (matchSelectWithOptionalNotCond(LHSI
, CondL
, LHSA
, LHSB
, LSPF
) &&
422 matchSelectWithOptionalNotCond(RHSI
, CondR
, RHSA
, RHSB
, RSPF
)) {
424 // TODO: We should also detect FP min/max.
425 if (LSPF
== SPF_SMIN
|| LSPF
== SPF_SMAX
||
426 LSPF
== SPF_UMIN
|| LSPF
== SPF_UMAX
)
427 return ((LHSA
== RHSA
&& LHSB
== RHSB
) ||
428 (LHSA
== RHSB
&& LHSB
== RHSA
));
430 // select Cond, A, B <--> select not(Cond), B, A
431 if (CondL
== CondR
&& LHSA
== RHSA
&& LHSB
== RHSB
)
435 // If the true/false operands are swapped and the conditions are compares
436 // with inverted predicates, the selects are equal:
437 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
439 // This also handles patterns with a double-negation in the sense of not +
440 // inverse, because we looked through a 'not' in the matching function and
442 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
444 // This intentionally does NOT handle patterns with a double-negation in
445 // the sense of not + not, because doing so could result in values
447 // as equal that hash differently in the min/max cases like:
448 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
449 // ^ hashes as min ^ would not hash as min
450 // In the context of the EarlyCSE pass, however, such cases never reach
451 // this code, as we simplify the double-negation before hashing the second
452 // select (and so still succeed at CSEing them).
453 if (LHSA
== RHSB
&& LHSB
== RHSA
) {
454 CmpInst::Predicate PredL
, PredR
;
456 if (match(CondL
, m_Cmp(PredL
, m_Value(X
), m_Value(Y
))) &&
457 match(CondR
, m_Cmp(PredR
, m_Specific(X
), m_Specific(Y
))) &&
458 CmpInst::getInversePredicate(PredL
) == PredR
)
466 bool DenseMapInfo
<SimpleValue
>::isEqual(SimpleValue LHS
, SimpleValue RHS
) {
467 // These comparisons are nontrivial, so assert that equality implies
468 // hash equality (DenseMap demands this as an invariant).
469 bool Result
= isEqualImpl(LHS
, RHS
);
470 assert(!Result
|| (LHS
.isSentinel() && LHS
.Inst
== RHS
.Inst
) ||
471 getHashValueImpl(LHS
) == getHashValueImpl(RHS
));
475 //===----------------------------------------------------------------------===//
477 //===----------------------------------------------------------------------===//
481 /// Struct representing the available call values in the scoped hash
486 CallValue(Instruction
*I
) : Inst(I
) {
487 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
490 bool isSentinel() const {
491 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
492 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
495 static bool canHandle(Instruction
*Inst
) {
496 // Don't value number anything that returns void.
497 if (Inst
->getType()->isVoidTy())
500 CallInst
*CI
= dyn_cast
<CallInst
>(Inst
);
501 if (!CI
|| !CI
->onlyReadsMemory() ||
502 // FIXME: Currently the calls which may access the thread id may
503 // be considered as not accessing the memory. But this is
504 // problematic for coroutines, since coroutines may resume in a
505 // different thread. So we disable the optimization here for the
506 // correctness. However, it may block many other correct
507 // optimizations. Revert this one when we detect the memory
508 // accessing kind more precisely.
509 CI
->getFunction()->isPresplitCoroutine())
515 } // end anonymous namespace
519 template <> struct DenseMapInfo
<CallValue
> {
520 static inline CallValue
getEmptyKey() {
521 return DenseMapInfo
<Instruction
*>::getEmptyKey();
524 static inline CallValue
getTombstoneKey() {
525 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
528 static unsigned getHashValue(CallValue Val
);
529 static bool isEqual(CallValue LHS
, CallValue RHS
);
532 } // end namespace llvm
534 unsigned DenseMapInfo
<CallValue
>::getHashValue(CallValue Val
) {
535 Instruction
*Inst
= Val
.Inst
;
537 // Hash all of the operands as pointers and mix in the opcode.
538 return hashCallInst(cast
<CallInst
>(Inst
));
541 bool DenseMapInfo
<CallValue
>::isEqual(CallValue LHS
, CallValue RHS
) {
542 if (LHS
.isSentinel() || RHS
.isSentinel())
543 return LHS
.Inst
== RHS
.Inst
;
545 CallInst
*LHSI
= cast
<CallInst
>(LHS
.Inst
);
546 CallInst
*RHSI
= cast
<CallInst
>(RHS
.Inst
);
548 // Convergent calls implicitly depend on the set of threads that is
549 // currently executing, so conservatively return false if they are in
550 // different basic blocks.
551 if (LHSI
->isConvergent() && LHSI
->getParent() != RHSI
->getParent())
554 return LHSI
->isIdenticalToWhenDefined(RHSI
, /*IntersectAttrs=*/true);
557 //===----------------------------------------------------------------------===//
559 //===----------------------------------------------------------------------===//
565 std::optional
<int64_t> ConstantOffset
;
567 GEPValue(Instruction
*I
) : Inst(I
) {
568 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
571 GEPValue(Instruction
*I
, std::optional
<int64_t> ConstantOffset
)
572 : Inst(I
), ConstantOffset(ConstantOffset
) {
573 assert((isSentinel() || canHandle(I
)) && "Inst can't be handled!");
576 bool isSentinel() const {
577 return Inst
== DenseMapInfo
<Instruction
*>::getEmptyKey() ||
578 Inst
== DenseMapInfo
<Instruction
*>::getTombstoneKey();
581 static bool canHandle(Instruction
*Inst
) {
582 return isa
<GetElementPtrInst
>(Inst
);
590 template <> struct DenseMapInfo
<GEPValue
> {
591 static inline GEPValue
getEmptyKey() {
592 return DenseMapInfo
<Instruction
*>::getEmptyKey();
595 static inline GEPValue
getTombstoneKey() {
596 return DenseMapInfo
<Instruction
*>::getTombstoneKey();
599 static unsigned getHashValue(const GEPValue
&Val
);
600 static bool isEqual(const GEPValue
&LHS
, const GEPValue
&RHS
);
603 } // end namespace llvm
605 unsigned DenseMapInfo
<GEPValue
>::getHashValue(const GEPValue
&Val
) {
606 auto *GEP
= cast
<GetElementPtrInst
>(Val
.Inst
);
607 if (Val
.ConstantOffset
.has_value())
608 return hash_combine(GEP
->getOpcode(), GEP
->getPointerOperand(),
609 Val
.ConstantOffset
.value());
612 hash_combine_range(GEP
->value_op_begin(), GEP
->value_op_end()));
615 bool DenseMapInfo
<GEPValue
>::isEqual(const GEPValue
&LHS
, const GEPValue
&RHS
) {
616 if (LHS
.isSentinel() || RHS
.isSentinel())
617 return LHS
.Inst
== RHS
.Inst
;
618 auto *LGEP
= cast
<GetElementPtrInst
>(LHS
.Inst
);
619 auto *RGEP
= cast
<GetElementPtrInst
>(RHS
.Inst
);
620 if (LGEP
->getPointerOperand() != RGEP
->getPointerOperand())
622 if (LHS
.ConstantOffset
.has_value() && RHS
.ConstantOffset
.has_value())
623 return LHS
.ConstantOffset
.value() == RHS
.ConstantOffset
.value();
624 return LGEP
->isIdenticalToWhenDefined(RGEP
);
627 //===----------------------------------------------------------------------===//
628 // EarlyCSE implementation
629 //===----------------------------------------------------------------------===//
633 /// A simple and fast domtree-based CSE pass.
635 /// This pass does a simple depth-first walk over the dominator tree,
636 /// eliminating trivially redundant instructions and using instsimplify to
637 /// canonicalize things as it goes. It is intended to be fast and catch obvious
638 /// cases so that instcombine and other passes are more effective. It is
639 /// expected that a later pass of GVN will catch the interesting/hard cases.
642 const TargetLibraryInfo
&TLI
;
643 const TargetTransformInfo
&TTI
;
646 const SimplifyQuery SQ
;
648 std::unique_ptr
<MemorySSAUpdater
> MSSAUpdater
;
651 RecyclingAllocator
<BumpPtrAllocator
,
652 ScopedHashTableVal
<SimpleValue
, Value
*>>;
654 ScopedHashTable
<SimpleValue
, Value
*, DenseMapInfo
<SimpleValue
>,
657 /// A scoped hash table of the current values of all of our simple
658 /// scalar expressions.
660 /// As we walk down the domtree, we look to see if instructions are in this:
661 /// if so, we replace them with what we find, otherwise we insert them so
662 /// that dominated values can succeed in their lookup.
663 ScopedHTType AvailableValues
;
665 /// A scoped hash table of the current values of previously encountered
666 /// memory locations.
668 /// This allows us to get efficient access to dominating loads or stores when
669 /// we have a fully redundant load. In addition to the most recent load, we
670 /// keep track of a generation count of the read, which is compared against
671 /// the current generation count. The current generation count is incremented
672 /// after every possibly writing memory operation, which ensures that we only
673 /// CSE loads with other loads that have no intervening store. Ordering
674 /// events (such as fences or atomic instructions) increment the generation
675 /// count as well; essentially, we model these as writes to all possible
676 /// locations. Note that atomic and/or volatile loads and stores can be
677 /// present the table; it is the responsibility of the consumer to inspect
678 /// the atomicity/volatility if needed.
680 Instruction
*DefInst
= nullptr;
681 unsigned Generation
= 0;
683 bool IsAtomic
= false;
686 LoadValue() = default;
687 LoadValue(Instruction
*Inst
, unsigned Generation
, unsigned MatchingId
,
688 bool IsAtomic
, bool IsLoad
)
689 : DefInst(Inst
), Generation(Generation
), MatchingId(MatchingId
),
690 IsAtomic(IsAtomic
), IsLoad(IsLoad
) {}
693 using LoadMapAllocator
=
694 RecyclingAllocator
<BumpPtrAllocator
,
695 ScopedHashTableVal
<Value
*, LoadValue
>>;
697 ScopedHashTable
<Value
*, LoadValue
, DenseMapInfo
<Value
*>,
700 LoadHTType AvailableLoads
;
702 // A scoped hash table mapping memory locations (represented as typed
703 // addresses) to generation numbers at which that memory location became
704 // (henceforth indefinitely) invariant.
705 using InvariantMapAllocator
=
706 RecyclingAllocator
<BumpPtrAllocator
,
707 ScopedHashTableVal
<MemoryLocation
, unsigned>>;
708 using InvariantHTType
=
709 ScopedHashTable
<MemoryLocation
, unsigned, DenseMapInfo
<MemoryLocation
>,
710 InvariantMapAllocator
>;
711 InvariantHTType AvailableInvariants
;
713 /// A scoped hash table of the current values of read-only call
716 /// It uses the same generation count as loads.
718 ScopedHashTable
<CallValue
, std::pair
<Instruction
*, unsigned>>;
719 CallHTType AvailableCalls
;
721 using GEPMapAllocatorTy
=
722 RecyclingAllocator
<BumpPtrAllocator
,
723 ScopedHashTableVal
<GEPValue
, Value
*>>;
724 using GEPHTType
= ScopedHashTable
<GEPValue
, Value
*, DenseMapInfo
<GEPValue
>,
726 GEPHTType AvailableGEPs
;
728 /// This is the current generation of the memory value.
729 unsigned CurrentGeneration
= 0;
731 /// Set up the EarlyCSE runner for a particular function.
732 EarlyCSE(const DataLayout
&DL
, const TargetLibraryInfo
&TLI
,
733 const TargetTransformInfo
&TTI
, DominatorTree
&DT
,
734 AssumptionCache
&AC
, MemorySSA
*MSSA
)
735 : TLI(TLI
), TTI(TTI
), DT(DT
), AC(AC
), SQ(DL
, &TLI
, &DT
, &AC
), MSSA(MSSA
),
736 MSSAUpdater(std::make_unique
<MemorySSAUpdater
>(MSSA
)) {}
741 unsigned ClobberCounter
= 0;
742 // Almost a POD, but needs to call the constructors for the scoped hash
743 // tables so that a new scope gets pushed on. These are RAII so that the
744 // scope gets popped when the NodeScope is destroyed.
747 NodeScope(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
748 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
,
749 GEPHTType
&AvailableGEPs
)
750 : Scope(AvailableValues
), LoadScope(AvailableLoads
),
751 InvariantScope(AvailableInvariants
), CallScope(AvailableCalls
),
752 GEPScope(AvailableGEPs
) {}
753 NodeScope(const NodeScope
&) = delete;
754 NodeScope
&operator=(const NodeScope
&) = delete;
757 ScopedHTType::ScopeTy Scope
;
758 LoadHTType::ScopeTy LoadScope
;
759 InvariantHTType::ScopeTy InvariantScope
;
760 CallHTType::ScopeTy CallScope
;
761 GEPHTType::ScopeTy GEPScope
;
764 // Contains all the needed information to create a stack for doing a depth
765 // first traversal of the tree. This includes scopes for values, loads, and
766 // calls as well as the generation. There is a child iterator so that the
767 // children do not need to be store separately.
770 StackNode(ScopedHTType
&AvailableValues
, LoadHTType
&AvailableLoads
,
771 InvariantHTType
&AvailableInvariants
, CallHTType
&AvailableCalls
,
772 GEPHTType
&AvailableGEPs
, unsigned cg
, DomTreeNode
*n
,
773 DomTreeNode::const_iterator child
,
774 DomTreeNode::const_iterator end
)
775 : CurrentGeneration(cg
), ChildGeneration(cg
), Node(n
), ChildIter(child
),
777 Scopes(AvailableValues
, AvailableLoads
, AvailableInvariants
,
778 AvailableCalls
, AvailableGEPs
) {}
779 StackNode(const StackNode
&) = delete;
780 StackNode
&operator=(const StackNode
&) = delete;
783 unsigned currentGeneration() const { return CurrentGeneration
; }
784 unsigned childGeneration() const { return ChildGeneration
; }
785 void childGeneration(unsigned generation
) { ChildGeneration
= generation
; }
786 DomTreeNode
*node() { return Node
; }
787 DomTreeNode::const_iterator
childIter() const { return ChildIter
; }
789 DomTreeNode
*nextChild() {
790 DomTreeNode
*child
= *ChildIter
;
795 DomTreeNode::const_iterator
end() const { return EndIter
; }
796 bool isProcessed() const { return Processed
; }
797 void process() { Processed
= true; }
800 unsigned CurrentGeneration
;
801 unsigned ChildGeneration
;
803 DomTreeNode::const_iterator ChildIter
;
804 DomTreeNode::const_iterator EndIter
;
806 bool Processed
= false;
809 /// Wrapper class to handle memory instructions, including loads,
810 /// stores and intrinsic loads and stores defined by the target.
811 class ParseMemoryInst
{
813 ParseMemoryInst(Instruction
*Inst
, const TargetTransformInfo
&TTI
)
815 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(Inst
)) {
816 IntrID
= II
->getIntrinsicID();
817 if (TTI
.getTgtMemIntrinsic(II
, Info
))
819 if (isHandledNonTargetIntrinsic(IntrID
)) {
821 case Intrinsic::masked_load
:
822 Info
.PtrVal
= Inst
->getOperand(0);
823 Info
.MatchingId
= Intrinsic::masked_load
;
825 Info
.WriteMem
= false;
826 Info
.IsVolatile
= false;
828 case Intrinsic::masked_store
:
829 Info
.PtrVal
= Inst
->getOperand(1);
830 // Use the ID of masked load as the "matching id". This will
831 // prevent matching non-masked loads/stores with masked ones
832 // (which could be done), but at the moment, the code here
833 // does not support matching intrinsics with non-intrinsics,
834 // so keep the MatchingIds specific to masked instructions
836 Info
.MatchingId
= Intrinsic::masked_load
;
837 Info
.ReadMem
= false;
838 Info
.WriteMem
= true;
839 Info
.IsVolatile
= false;
846 Instruction
*get() { return Inst
; }
847 const Instruction
*get() const { return Inst
; }
849 bool isLoad() const {
852 return isa
<LoadInst
>(Inst
);
855 bool isStore() const {
857 return Info
.WriteMem
;
858 return isa
<StoreInst
>(Inst
);
861 bool isAtomic() const {
863 return Info
.Ordering
!= AtomicOrdering::NotAtomic
;
864 return Inst
->isAtomic();
867 bool isUnordered() const {
869 return Info
.isUnordered();
871 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
872 return LI
->isUnordered();
873 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
874 return SI
->isUnordered();
876 // Conservative answer
877 return !Inst
->isAtomic();
880 bool isVolatile() const {
882 return Info
.IsVolatile
;
884 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
)) {
885 return LI
->isVolatile();
886 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
887 return SI
->isVolatile();
889 // Conservative answer
893 bool isInvariantLoad() const {
894 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
895 return LI
->hasMetadata(LLVMContext::MD_invariant_load
);
899 bool isValid() const { return getPointerOperand() != nullptr; }
901 // For regular (non-intrinsic) loads/stores, this is set to -1. For
902 // intrinsic loads/stores, the id is retrieved from the corresponding
903 // field in the MemIntrinsicInfo structure. That field contains
904 // non-negative values only.
905 int getMatchingId() const {
907 return Info
.MatchingId
;
911 Value
*getPointerOperand() const {
914 return getLoadStorePointerOperand(Inst
);
917 Type
*getValueType() const {
918 // TODO: handle target-specific intrinsics.
919 return Inst
->getAccessType();
922 bool mayReadFromMemory() const {
925 return Inst
->mayReadFromMemory();
928 bool mayWriteToMemory() const {
930 return Info
.WriteMem
;
931 return Inst
->mayWriteToMemory();
935 Intrinsic::ID IntrID
= 0;
936 MemIntrinsicInfo Info
;
940 // This function is to prevent accidentally passing a non-target
941 // intrinsic ID to TargetTransformInfo.
942 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID
) {
944 case Intrinsic::masked_load
:
945 case Intrinsic::masked_store
:
950 static bool isHandledNonTargetIntrinsic(const Value
*V
) {
951 if (auto *II
= dyn_cast
<IntrinsicInst
>(V
))
952 return isHandledNonTargetIntrinsic(II
->getIntrinsicID());
956 bool processNode(DomTreeNode
*Node
);
958 bool handleBranchCondition(Instruction
*CondInst
, const BranchInst
*BI
,
959 const BasicBlock
*BB
, const BasicBlock
*Pred
);
961 Value
*getMatchingValue(LoadValue
&InVal
, ParseMemoryInst
&MemInst
,
962 unsigned CurrentGeneration
);
964 bool overridingStores(const ParseMemoryInst
&Earlier
,
965 const ParseMemoryInst
&Later
);
967 Value
*getOrCreateResult(Instruction
*Inst
, Type
*ExpectedType
) const {
968 // TODO: We could insert relevant casts on type mismatch.
969 // The load or the store's first operand.
971 if (auto *II
= dyn_cast
<IntrinsicInst
>(Inst
)) {
972 switch (II
->getIntrinsicID()) {
973 case Intrinsic::masked_load
:
976 case Intrinsic::masked_store
:
977 V
= II
->getOperand(0);
980 return TTI
.getOrCreateResultFromMemIntrinsic(II
, ExpectedType
);
983 V
= isa
<LoadInst
>(Inst
) ? Inst
: cast
<StoreInst
>(Inst
)->getValueOperand();
986 return V
->getType() == ExpectedType
? V
: nullptr;
989 /// Return true if the instruction is known to only operate on memory
990 /// provably invariant in the given "generation".
991 bool isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
);
993 bool isSameMemGeneration(unsigned EarlierGeneration
, unsigned LaterGeneration
,
994 Instruction
*EarlierInst
, Instruction
*LaterInst
);
996 bool isNonTargetIntrinsicMatch(const IntrinsicInst
*Earlier
,
997 const IntrinsicInst
*Later
) {
998 auto IsSubmask
= [](const Value
*Mask0
, const Value
*Mask1
) {
999 // Is Mask0 a submask of Mask1?
1002 if (isa
<UndefValue
>(Mask0
) || isa
<UndefValue
>(Mask1
))
1004 auto *Vec0
= dyn_cast
<ConstantVector
>(Mask0
);
1005 auto *Vec1
= dyn_cast
<ConstantVector
>(Mask1
);
1008 if (Vec0
->getType() != Vec1
->getType())
1010 for (int i
= 0, e
= Vec0
->getNumOperands(); i
!= e
; ++i
) {
1011 Constant
*Elem0
= Vec0
->getOperand(i
);
1012 Constant
*Elem1
= Vec1
->getOperand(i
);
1013 auto *Int0
= dyn_cast
<ConstantInt
>(Elem0
);
1014 if (Int0
&& Int0
->isZero())
1016 auto *Int1
= dyn_cast
<ConstantInt
>(Elem1
);
1017 if (Int1
&& !Int1
->isZero())
1019 if (isa
<UndefValue
>(Elem0
) || isa
<UndefValue
>(Elem1
))
1027 auto PtrOp
= [](const IntrinsicInst
*II
) {
1028 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
1029 return II
->getOperand(0);
1030 if (II
->getIntrinsicID() == Intrinsic::masked_store
)
1031 return II
->getOperand(1);
1032 llvm_unreachable("Unexpected IntrinsicInst");
1034 auto MaskOp
= [](const IntrinsicInst
*II
) {
1035 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
1036 return II
->getOperand(2);
1037 if (II
->getIntrinsicID() == Intrinsic::masked_store
)
1038 return II
->getOperand(3);
1039 llvm_unreachable("Unexpected IntrinsicInst");
1041 auto ThruOp
= [](const IntrinsicInst
*II
) {
1042 if (II
->getIntrinsicID() == Intrinsic::masked_load
)
1043 return II
->getOperand(3);
1044 llvm_unreachable("Unexpected IntrinsicInst");
1047 if (PtrOp(Earlier
) != PtrOp(Later
))
1050 Intrinsic::ID IDE
= Earlier
->getIntrinsicID();
1051 Intrinsic::ID IDL
= Later
->getIntrinsicID();
1052 // We could really use specific intrinsic classes for masked loads
1053 // and stores in IntrinsicInst.h.
1054 if (IDE
== Intrinsic::masked_load
&& IDL
== Intrinsic::masked_load
) {
1055 // Trying to replace later masked load with the earlier one.
1056 // Check that the pointers are the same, and
1057 // - masks and pass-throughs are the same, or
1058 // - replacee's pass-through is "undef" and replacer's mask is a
1059 // super-set of the replacee's mask.
1060 if (MaskOp(Earlier
) == MaskOp(Later
) && ThruOp(Earlier
) == ThruOp(Later
))
1062 if (!isa
<UndefValue
>(ThruOp(Later
)))
1064 return IsSubmask(MaskOp(Later
), MaskOp(Earlier
));
1066 if (IDE
== Intrinsic::masked_store
&& IDL
== Intrinsic::masked_load
) {
1067 // Trying to replace a load of a stored value with the store's value.
1068 // Check that the pointers are the same, and
1069 // - load's mask is a subset of store's mask, and
1070 // - load's pass-through is "undef".
1071 if (!IsSubmask(MaskOp(Later
), MaskOp(Earlier
)))
1073 return isa
<UndefValue
>(ThruOp(Later
));
1075 if (IDE
== Intrinsic::masked_load
&& IDL
== Intrinsic::masked_store
) {
1076 // Trying to remove a store of the loaded value.
1077 // Check that the pointers are the same, and
1078 // - store's mask is a subset of the load's mask.
1079 return IsSubmask(MaskOp(Later
), MaskOp(Earlier
));
1081 if (IDE
== Intrinsic::masked_store
&& IDL
== Intrinsic::masked_store
) {
1082 // Trying to remove a dead store (earlier).
1083 // Check that the pointers are the same,
1084 // - the to-be-removed store's mask is a subset of the other store's
1086 return IsSubmask(MaskOp(Earlier
), MaskOp(Later
));
1091 void removeMSSA(Instruction
&Inst
) {
1094 if (VerifyMemorySSA
)
1095 MSSA
->verifyMemorySSA();
1096 // Removing a store here can leave MemorySSA in an unoptimized state by
1097 // creating MemoryPhis that have identical arguments and by creating
1098 // MemoryUses whose defining access is not an actual clobber. The phi case
1099 // is handled by MemorySSA when passing OptimizePhis = true to
1100 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1101 // by MemorySSA's getClobberingMemoryAccess.
1102 MSSAUpdater
->removeMemoryAccess(&Inst
, true);
1106 } // end anonymous namespace
1108 /// Determine if the memory referenced by LaterInst is from the same heap
1109 /// version as EarlierInst.
1110 /// This is currently called in two scenarios:
1122 /// in both cases we want to verify that there are no possible writes to the
1123 /// memory referenced by p between the earlier and later instruction.
1124 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration
,
1125 unsigned LaterGeneration
,
1126 Instruction
*EarlierInst
,
1127 Instruction
*LaterInst
) {
1128 // Check the simple memory generation tracking first.
1129 if (EarlierGeneration
== LaterGeneration
)
1135 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1136 // read/write memory, then we can safely return true here.
1137 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1138 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1139 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1140 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1141 // with the default optimization pipeline.
1142 auto *EarlierMA
= MSSA
->getMemoryAccess(EarlierInst
);
1145 auto *LaterMA
= MSSA
->getMemoryAccess(LaterInst
);
1149 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1150 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1151 // EarlierInst and LaterInst and neither can any other write that potentially
1152 // clobbers LaterInst.
1153 MemoryAccess
*LaterDef
;
1154 if (ClobberCounter
< EarlyCSEMssaOptCap
) {
1155 LaterDef
= MSSA
->getWalker()->getClobberingMemoryAccess(LaterInst
);
1158 LaterDef
= LaterMA
->getDefiningAccess();
1160 return MSSA
->dominates(LaterDef
, EarlierMA
);
1163 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction
*I
, unsigned GenAt
) {
1164 // A location loaded from with an invariant_load is assumed to *never* change
1165 // within the visible scope of the compilation.
1166 if (auto *LI
= dyn_cast
<LoadInst
>(I
))
1167 if (LI
->hasMetadata(LLVMContext::MD_invariant_load
))
1170 auto MemLocOpt
= MemoryLocation::getOrNone(I
);
1172 // "target" intrinsic forms of loads aren't currently known to
1173 // MemoryLocation::get. TODO
1175 MemoryLocation MemLoc
= *MemLocOpt
;
1176 if (!AvailableInvariants
.count(MemLoc
))
1179 // Is the generation at which this became invariant older than the
1181 return AvailableInvariants
.lookup(MemLoc
) <= GenAt
;
1184 bool EarlyCSE::handleBranchCondition(Instruction
*CondInst
,
1185 const BranchInst
*BI
, const BasicBlock
*BB
,
1186 const BasicBlock
*Pred
) {
1187 assert(BI
->isConditional() && "Should be a conditional branch!");
1188 assert(BI
->getCondition() == CondInst
&& "Wrong condition?");
1189 assert(BI
->getSuccessor(0) == BB
|| BI
->getSuccessor(1) == BB
);
1190 auto *TorF
= (BI
->getSuccessor(0) == BB
)
1191 ? ConstantInt::getTrue(BB
->getContext())
1192 : ConstantInt::getFalse(BB
->getContext());
1193 auto MatchBinOp
= [](Instruction
*I
, unsigned Opcode
, Value
*&LHS
,
1195 if (Opcode
== Instruction::And
&&
1196 match(I
, m_LogicalAnd(m_Value(LHS
), m_Value(RHS
))))
1198 else if (Opcode
== Instruction::Or
&&
1199 match(I
, m_LogicalOr(m_Value(LHS
), m_Value(RHS
))))
1203 // If the condition is AND operation, we can propagate its operands into the
1204 // true branch. If it is OR operation, we can propagate them into the false
1206 unsigned PropagateOpcode
=
1207 (BI
->getSuccessor(0) == BB
) ? Instruction::And
: Instruction::Or
;
1209 bool MadeChanges
= false;
1210 SmallVector
<Instruction
*, 4> WorkList
;
1211 SmallPtrSet
<Instruction
*, 4> Visited
;
1212 WorkList
.push_back(CondInst
);
1213 while (!WorkList
.empty()) {
1214 Instruction
*Curr
= WorkList
.pop_back_val();
1216 AvailableValues
.insert(Curr
, TorF
);
1217 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1218 << Curr
->getName() << "' as " << *TorF
<< " in "
1219 << BB
->getName() << "\n");
1220 if (!DebugCounter::shouldExecute(CSECounter
)) {
1221 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1223 // Replace all dominated uses with the known value.
1224 if (unsigned Count
= replaceDominatedUsesWith(Curr
, TorF
, DT
,
1225 BasicBlockEdge(Pred
, BB
))) {
1232 if (MatchBinOp(Curr
, PropagateOpcode
, LHS
, RHS
))
1233 for (auto *Op
: { LHS
, RHS
})
1234 if (Instruction
*OPI
= dyn_cast
<Instruction
>(Op
))
1235 if (SimpleValue::canHandle(OPI
) && Visited
.insert(OPI
).second
)
1236 WorkList
.push_back(OPI
);
1242 Value
*EarlyCSE::getMatchingValue(LoadValue
&InVal
, ParseMemoryInst
&MemInst
,
1243 unsigned CurrentGeneration
) {
1244 if (InVal
.DefInst
== nullptr)
1246 if (InVal
.MatchingId
!= MemInst
.getMatchingId())
1248 // We don't yet handle removing loads with ordering of any kind.
1249 if (MemInst
.isVolatile() || !MemInst
.isUnordered())
1251 // We can't replace an atomic load with one which isn't also atomic.
1252 if (MemInst
.isLoad() && !InVal
.IsAtomic
&& MemInst
.isAtomic())
1254 // The value V returned from this function is used differently depending
1255 // on whether MemInst is a load or a store. If it's a load, we will replace
1256 // MemInst with V, if it's a store, we will check if V is the same as the
1258 bool MemInstMatching
= !MemInst
.isLoad();
1259 Instruction
*Matching
= MemInstMatching
? MemInst
.get() : InVal
.DefInst
;
1260 Instruction
*Other
= MemInstMatching
? InVal
.DefInst
: MemInst
.get();
1262 // For stores check the result values before checking memory generation
1263 // (otherwise isSameMemGeneration may crash).
1264 Value
*Result
= MemInst
.isStore()
1265 ? getOrCreateResult(Matching
, Other
->getType())
1267 if (MemInst
.isStore() && InVal
.DefInst
!= Result
)
1270 // Deal with non-target memory intrinsics.
1271 bool MatchingNTI
= isHandledNonTargetIntrinsic(Matching
);
1272 bool OtherNTI
= isHandledNonTargetIntrinsic(Other
);
1273 if (OtherNTI
!= MatchingNTI
)
1275 if (OtherNTI
&& MatchingNTI
) {
1276 if (!isNonTargetIntrinsicMatch(cast
<IntrinsicInst
>(InVal
.DefInst
),
1277 cast
<IntrinsicInst
>(MemInst
.get())))
1281 if (!isOperatingOnInvariantMemAt(MemInst
.get(), InVal
.Generation
) &&
1282 !isSameMemGeneration(InVal
.Generation
, CurrentGeneration
, InVal
.DefInst
,
1287 Result
= getOrCreateResult(Matching
, Other
->getType());
1291 static void combineIRFlags(Instruction
&From
, Value
*To
) {
1292 if (auto *I
= dyn_cast
<Instruction
>(To
)) {
1293 // If I being poison triggers UB, there is no need to drop those
1294 // flags. Otherwise, only retain flags present on both I and Inst.
1295 // TODO: Currently some fast-math flags are not treated as
1296 // poison-generating even though they should. Until this is fixed,
1297 // always retain flags present on both I and Inst for floating point
1299 if (isa
<FPMathOperator
>(I
) ||
1300 (I
->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I
)))
1301 I
->andIRFlags(&From
);
1303 if (isa
<CallBase
>(&From
) && isa
<CallBase
>(To
)) {
1304 // NB: Intersection of attrs between InVal.first and Inst is overly
1305 // conservative. Since we only CSE readonly functions that have the same
1306 // memory state, we can preserve (or possibly in some cases combine)
1307 // more attributes. Likewise this implies when checking equality of
1308 // callsite for CSEing, we can probably ignore more attributes.
1309 // Generally poison generating attributes need to be handled with more
1310 // care as they can create *new* UB if preserved/combined and violated.
1311 // Attributes that imply immediate UB on the other hand would have been
1312 // violated either way.
1314 cast
<CallBase
>(To
)->tryIntersectAttributes(cast
<CallBase
>(&From
));
1315 assert(Success
&& "Failed to intersect attributes in callsites that "
1316 "passed identical check");
1317 // For NDEBUG Compile.
1322 bool EarlyCSE::overridingStores(const ParseMemoryInst
&Earlier
,
1323 const ParseMemoryInst
&Later
) {
1324 // Can we remove Earlier store because of Later store?
1326 assert(Earlier
.isUnordered() && !Earlier
.isVolatile() &&
1327 "Violated invariant");
1328 if (Earlier
.getPointerOperand() != Later
.getPointerOperand())
1330 if (!Earlier
.getValueType() || !Later
.getValueType() ||
1331 Earlier
.getValueType() != Later
.getValueType())
1333 if (Earlier
.getMatchingId() != Later
.getMatchingId())
1335 // At the moment, we don't remove ordered stores, but do remove
1336 // unordered atomic stores. There's no special requirement (for
1337 // unordered atomics) about removing atomic stores only in favor of
1338 // other atomic stores since we were going to execute the non-atomic
1339 // one anyway and the atomic one might never have become visible.
1340 if (!Earlier
.isUnordered() || !Later
.isUnordered())
1343 // Deal with non-target memory intrinsics.
1344 bool ENTI
= isHandledNonTargetIntrinsic(Earlier
.get());
1345 bool LNTI
= isHandledNonTargetIntrinsic(Later
.get());
1347 return isNonTargetIntrinsicMatch(cast
<IntrinsicInst
>(Earlier
.get()),
1348 cast
<IntrinsicInst
>(Later
.get()));
1350 // Because of the check above, at least one of them is false.
1351 // For now disallow matching intrinsics with non-intrinsics,
1352 // so assume that the stores match if neither is an intrinsic.
1353 return ENTI
== LNTI
;
1356 bool EarlyCSE::processNode(DomTreeNode
*Node
) {
1357 bool Changed
= false;
1358 BasicBlock
*BB
= Node
->getBlock();
1360 // If this block has a single predecessor, then the predecessor is the parent
1361 // of the domtree node and all of the live out memory values are still current
1362 // in this block. If this block has multiple predecessors, then they could
1363 // have invalidated the live-out memory values of our parent value. For now,
1364 // just be conservative and invalidate memory if this block has multiple
1366 if (!BB
->getSinglePredecessor())
1367 ++CurrentGeneration
;
1369 // If this node has a single predecessor which ends in a conditional branch,
1370 // we can infer the value of the branch condition given that we took this
1371 // path. We need the single predecessor to ensure there's not another path
1372 // which reaches this block where the condition might hold a different
1373 // value. Since we're adding this to the scoped hash table (like any other
1374 // def), it will have been popped if we encounter a future merge block.
1375 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
1376 auto *BI
= dyn_cast
<BranchInst
>(Pred
->getTerminator());
1377 if (BI
&& BI
->isConditional()) {
1378 auto *CondInst
= dyn_cast
<Instruction
>(BI
->getCondition());
1379 if (CondInst
&& SimpleValue::canHandle(CondInst
))
1380 Changed
|= handleBranchCondition(CondInst
, BI
, BB
, Pred
);
1384 /// LastStore - Keep track of the last non-volatile store that we saw... for
1385 /// as long as there in no instruction that reads memory. If we see a store
1386 /// to the same location, we delete the dead store. This zaps trivial dead
1387 /// stores which can occur in bitfield code among other things.
1388 Instruction
*LastStore
= nullptr;
1390 // See if any instructions in the block can be eliminated. If so, do it. If
1391 // not, add them to AvailableValues.
1392 for (Instruction
&Inst
: make_early_inc_range(*BB
)) {
1393 // Dead instructions should just be removed.
1394 if (isInstructionTriviallyDead(&Inst
, &TLI
)) {
1395 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst
<< '\n');
1396 if (!DebugCounter::shouldExecute(CSECounter
)) {
1397 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1401 salvageKnowledge(&Inst
, &AC
);
1402 salvageDebugInfo(Inst
);
1404 Inst
.eraseFromParent();
1410 // Skip assume intrinsics, they don't really have side effects (although
1411 // they're marked as such to ensure preservation of control dependencies),
1412 // and this pass will not bother with its removal. However, we should mark
1413 // its condition as true for all dominated blocks.
1414 if (auto *Assume
= dyn_cast
<AssumeInst
>(&Inst
)) {
1415 auto *CondI
= dyn_cast
<Instruction
>(Assume
->getArgOperand(0));
1416 if (CondI
&& SimpleValue::canHandle(CondI
)) {
1417 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1419 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
1421 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst
<< '\n');
1425 // Likewise, noalias intrinsics don't actually write.
1427 m_Intrinsic
<Intrinsic::experimental_noalias_scope_decl
>())) {
1428 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1433 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1434 if (match(&Inst
, m_Intrinsic
<Intrinsic::sideeffect
>())) {
1435 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst
<< '\n');
1439 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1440 if (match(&Inst
, m_Intrinsic
<Intrinsic::pseudoprobe
>())) {
1441 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst
<< '\n');
1445 // We can skip all invariant.start intrinsics since they only read memory,
1446 // and we can forward values across it. For invariant starts without
1447 // invariant ends, we can use the fact that the invariantness never ends to
1448 // start a scope in the current generaton which is true for all future
1449 // generations. Also, we dont need to consume the last store since the
1450 // semantics of invariant.start allow us to perform DSE of the last
1451 // store, if there was a store following invariant.start. Consider:
1454 // invariant.start(p)
1456 // We can DSE the store to 30, since the store 40 to invariant location p
1457 // causes undefined behaviour.
1458 if (match(&Inst
, m_Intrinsic
<Intrinsic::invariant_start
>())) {
1459 // If there are any uses, the scope might end.
1460 if (!Inst
.use_empty())
1462 MemoryLocation MemLoc
=
1463 MemoryLocation::getForArgument(&cast
<CallInst
>(Inst
), 1, TLI
);
1464 // Don't start a scope if we already have a better one pushed
1465 if (!AvailableInvariants
.count(MemLoc
))
1466 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
1470 if (isGuard(&Inst
)) {
1472 dyn_cast
<Instruction
>(cast
<CallInst
>(Inst
).getArgOperand(0))) {
1473 if (SimpleValue::canHandle(CondI
)) {
1474 // Do we already know the actual value of this condition?
1475 if (auto *KnownCond
= AvailableValues
.lookup(CondI
)) {
1476 // Is the condition known to be true?
1477 if (isa
<ConstantInt
>(KnownCond
) &&
1478 cast
<ConstantInt
>(KnownCond
)->isOne()) {
1480 << "EarlyCSE removing guard: " << Inst
<< '\n');
1481 salvageKnowledge(&Inst
, &AC
);
1483 Inst
.eraseFromParent();
1487 // Use the known value if it wasn't true.
1488 cast
<CallInst
>(Inst
).setArgOperand(0, KnownCond
);
1490 // The condition we're on guarding here is true for all dominated
1492 AvailableValues
.insert(CondI
, ConstantInt::getTrue(BB
->getContext()));
1496 // Guard intrinsics read all memory, but don't write any memory.
1497 // Accordingly, don't update the generation but consume the last store (to
1498 // avoid an incorrect DSE).
1499 LastStore
= nullptr;
1503 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1504 // its simpler value.
1505 if (Value
*V
= simplifyInstruction(&Inst
, SQ
)) {
1506 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst
<< " to: " << *V
1508 if (!DebugCounter::shouldExecute(CSECounter
)) {
1509 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1511 bool Killed
= false;
1512 if (!Inst
.use_empty()) {
1513 Inst
.replaceAllUsesWith(V
);
1516 if (isInstructionTriviallyDead(&Inst
, &TLI
)) {
1517 salvageKnowledge(&Inst
, &AC
);
1519 Inst
.eraseFromParent();
1530 // If this is a simple instruction that we can value number, process it.
1531 if (SimpleValue::canHandle(&Inst
)) {
1532 if ([[maybe_unused
]] auto *CI
= dyn_cast
<ConstrainedFPIntrinsic
>(&Inst
)) {
1533 assert(CI
->getExceptionBehavior() != fp::ebStrict
&&
1534 "Unexpected ebStrict from SimpleValue::canHandle()");
1535 assert((!CI
->getRoundingMode() ||
1536 CI
->getRoundingMode() != RoundingMode::Dynamic
) &&
1537 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1539 // See if the instruction has an available value. If so, use it.
1540 if (Value
*V
= AvailableValues
.lookup(&Inst
)) {
1541 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst
<< " to: " << *V
1543 if (!DebugCounter::shouldExecute(CSECounter
)) {
1544 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1547 combineIRFlags(Inst
, V
);
1548 Inst
.replaceAllUsesWith(V
);
1549 salvageKnowledge(&Inst
, &AC
);
1551 Inst
.eraseFromParent();
1557 // Otherwise, just remember that this value is available.
1558 AvailableValues
.insert(&Inst
, &Inst
);
1562 ParseMemoryInst
MemInst(&Inst
, TTI
);
1563 // If this is a non-volatile load, process it.
1564 if (MemInst
.isValid() && MemInst
.isLoad()) {
1565 // (conservatively) we can't peak past the ordering implied by this
1566 // operation, but we can add this load to our set of available values
1567 if (MemInst
.isVolatile() || !MemInst
.isUnordered()) {
1568 LastStore
= nullptr;
1569 ++CurrentGeneration
;
1572 if (MemInst
.isInvariantLoad()) {
1573 // If we pass an invariant load, we know that memory location is
1574 // indefinitely constant from the moment of first dereferenceability.
1575 // We conservatively treat the invariant_load as that moment. If we
1576 // pass a invariant load after already establishing a scope, don't
1577 // restart it since we want to preserve the earliest point seen.
1578 auto MemLoc
= MemoryLocation::get(&Inst
);
1579 if (!AvailableInvariants
.count(MemLoc
))
1580 AvailableInvariants
.insert(MemLoc
, CurrentGeneration
);
1583 // If we have an available version of this load, and if it is the right
1584 // generation or the load is known to be from an invariant location,
1585 // replace this instruction.
1587 // If either the dominating load or the current load are invariant, then
1588 // we can assume the current load loads the same value as the dominating
1590 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1591 if (Value
*Op
= getMatchingValue(InVal
, MemInst
, CurrentGeneration
)) {
1592 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1593 << " to: " << *InVal
.DefInst
<< '\n');
1594 if (!DebugCounter::shouldExecute(CSECounter
)) {
1595 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1599 if (auto *I
= dyn_cast
<Instruction
>(Op
))
1600 combineMetadataForCSE(I
, &Inst
, false);
1601 if (!Inst
.use_empty())
1602 Inst
.replaceAllUsesWith(Op
);
1603 salvageKnowledge(&Inst
, &AC
);
1605 Inst
.eraseFromParent();
1611 // Otherwise, remember that we have this instruction.
1612 AvailableLoads
.insert(MemInst
.getPointerOperand(),
1613 LoadValue(&Inst
, CurrentGeneration
,
1614 MemInst
.getMatchingId(),
1617 LastStore
= nullptr;
1621 // If this instruction may read from memory or throw (and potentially read
1622 // from memory in the exception handler), forget LastStore. Load/store
1623 // intrinsics will indicate both a read and a write to memory. The target
1624 // may override this (e.g. so that a store intrinsic does not read from
1625 // memory, and thus will be treated the same as a regular store for
1626 // commoning purposes).
1627 if ((Inst
.mayReadFromMemory() || Inst
.mayThrow()) &&
1628 !(MemInst
.isValid() && !MemInst
.mayReadFromMemory()))
1629 LastStore
= nullptr;
1631 // If this is a read-only call, process it.
1632 if (CallValue::canHandle(&Inst
)) {
1633 // If we have an available version of this call, and if it is the right
1634 // generation, replace this instruction.
1635 std::pair
<Instruction
*, unsigned> InVal
= AvailableCalls
.lookup(&Inst
);
1636 if (InVal
.first
!= nullptr &&
1637 isSameMemGeneration(InVal
.second
, CurrentGeneration
, InVal
.first
,
1639 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1640 << " to: " << *InVal
.first
<< '\n');
1641 if (!DebugCounter::shouldExecute(CSECounter
)) {
1642 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1645 combineIRFlags(Inst
, InVal
.first
);
1646 if (!Inst
.use_empty())
1647 Inst
.replaceAllUsesWith(InVal
.first
);
1648 salvageKnowledge(&Inst
, &AC
);
1650 Inst
.eraseFromParent();
1656 // Otherwise, remember that we have this instruction.
1657 AvailableCalls
.insert(&Inst
, std::make_pair(&Inst
, CurrentGeneration
));
1661 // Compare GEP instructions based on offset.
1662 if (GEPValue::canHandle(&Inst
)) {
1663 auto *GEP
= cast
<GetElementPtrInst
>(&Inst
);
1664 APInt Offset
= APInt(SQ
.DL
.getIndexTypeSizeInBits(GEP
->getType()), 0);
1665 GEPValue
GEPVal(GEP
, GEP
->accumulateConstantOffset(SQ
.DL
, Offset
)
1666 ? Offset
.trySExtValue()
1668 if (Value
*V
= AvailableGEPs
.lookup(GEPVal
)) {
1669 LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst
<< " to: " << *V
1671 combineIRFlags(Inst
, V
);
1672 Inst
.replaceAllUsesWith(V
);
1673 salvageKnowledge(&Inst
, &AC
);
1675 Inst
.eraseFromParent();
1681 // Otherwise, just remember that we have this GEP.
1682 AvailableGEPs
.insert(GEPVal
, &Inst
);
1686 // A release fence requires that all stores complete before it, but does
1687 // not prevent the reordering of following loads 'before' the fence. As a
1688 // result, we don't need to consider it as writing to memory and don't need
1689 // to advance the generation. We do need to prevent DSE across the fence,
1690 // but that's handled above.
1691 if (auto *FI
= dyn_cast
<FenceInst
>(&Inst
))
1692 if (FI
->getOrdering() == AtomicOrdering::Release
) {
1693 assert(Inst
.mayReadFromMemory() && "relied on to prevent DSE above");
1697 // write back DSE - If we write back the same value we just loaded from
1698 // the same location and haven't passed any intervening writes or ordering
1699 // operations, we can remove the write. The primary benefit is in allowing
1700 // the available load table to remain valid and value forward past where
1701 // the store originally was.
1702 if (MemInst
.isValid() && MemInst
.isStore()) {
1703 LoadValue InVal
= AvailableLoads
.lookup(MemInst
.getPointerOperand());
1704 if (InVal
.DefInst
&&
1705 InVal
.DefInst
== getMatchingValue(InVal
, MemInst
, CurrentGeneration
)) {
1706 // It is okay to have a LastStore to a different pointer here if MemorySSA
1707 // tells us that the load and store are from the same memory generation.
1708 // In that case, LastStore should keep its present value since we're
1709 // removing the current store.
1710 assert((!LastStore
||
1711 ParseMemoryInst(LastStore
, TTI
).getPointerOperand() ==
1712 MemInst
.getPointerOperand() ||
1714 "can't have an intervening store if not using MemorySSA!");
1715 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst
<< '\n');
1716 if (!DebugCounter::shouldExecute(CSECounter
)) {
1717 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1720 salvageKnowledge(&Inst
, &AC
);
1722 Inst
.eraseFromParent();
1725 // We can avoid incrementing the generation count since we were able
1726 // to eliminate this store.
1731 // Okay, this isn't something we can CSE at all. Check to see if it is
1732 // something that could modify memory. If so, our available memory values
1733 // cannot be used so bump the generation count.
1734 if (Inst
.mayWriteToMemory()) {
1735 ++CurrentGeneration
;
1737 if (MemInst
.isValid() && MemInst
.isStore()) {
1738 // We do a trivial form of DSE if there are two stores to the same
1739 // location with no intervening loads. Delete the earlier store.
1741 if (overridingStores(ParseMemoryInst(LastStore
, TTI
), MemInst
)) {
1742 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1743 << " due to: " << Inst
<< '\n');
1744 if (!DebugCounter::shouldExecute(CSECounter
)) {
1745 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1747 salvageKnowledge(&Inst
, &AC
);
1748 removeMSSA(*LastStore
);
1749 LastStore
->eraseFromParent();
1752 LastStore
= nullptr;
1755 // fallthrough - we can exploit information about this store
1758 // Okay, we just invalidated anything we knew about loaded values. Try
1759 // to salvage *something* by remembering that the stored value is a live
1760 // version of the pointer. It is safe to forward from volatile stores
1761 // to non-volatile loads, so we don't have to check for volatility of
1763 AvailableLoads
.insert(MemInst
.getPointerOperand(),
1764 LoadValue(&Inst
, CurrentGeneration
,
1765 MemInst
.getMatchingId(),
1769 // Remember that this was the last unordered store we saw for DSE. We
1770 // don't yet handle DSE on ordered or volatile stores since we don't
1771 // have a good way to model the ordering requirement for following
1772 // passes once the store is removed. We could insert a fence, but
1773 // since fences are slightly stronger than stores in their ordering,
1774 // it's not clear this is a profitable transform. Another option would
1775 // be to merge the ordering with that of the post dominating store.
1776 if (MemInst
.isUnordered() && !MemInst
.isVolatile())
1779 LastStore
= nullptr;
1787 bool EarlyCSE::run() {
1788 // Note, deque is being used here because there is significant performance
1789 // gains over vector when the container becomes very large due to the
1790 // specific access patterns. For more information see the mailing list
1791 // discussion on this:
1792 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1793 std::deque
<StackNode
*> nodesToProcess
;
1795 bool Changed
= false;
1797 // Process the root node.
1798 nodesToProcess
.push_back(new StackNode(
1799 AvailableValues
, AvailableLoads
, AvailableInvariants
, AvailableCalls
,
1800 AvailableGEPs
, CurrentGeneration
, DT
.getRootNode(),
1801 DT
.getRootNode()->begin(), DT
.getRootNode()->end()));
1803 assert(!CurrentGeneration
&& "Create a new EarlyCSE instance to rerun it.");
1805 // Process the stack.
1806 while (!nodesToProcess
.empty()) {
1807 // Grab the first item off the stack. Set the current generation, remove
1808 // the node from the stack, and process it.
1809 StackNode
*NodeToProcess
= nodesToProcess
.back();
1811 // Initialize class members.
1812 CurrentGeneration
= NodeToProcess
->currentGeneration();
1814 // Check if the node needs to be processed.
1815 if (!NodeToProcess
->isProcessed()) {
1816 // Process the node.
1817 Changed
|= processNode(NodeToProcess
->node());
1818 NodeToProcess
->childGeneration(CurrentGeneration
);
1819 NodeToProcess
->process();
1820 } else if (NodeToProcess
->childIter() != NodeToProcess
->end()) {
1821 // Push the next child onto the stack.
1822 DomTreeNode
*child
= NodeToProcess
->nextChild();
1823 nodesToProcess
.push_back(new StackNode(
1824 AvailableValues
, AvailableLoads
, AvailableInvariants
, AvailableCalls
,
1825 AvailableGEPs
, NodeToProcess
->childGeneration(), child
,
1826 child
->begin(), child
->end()));
1828 // It has been processed, and there are no more children to process,
1829 // so delete it and pop it off the stack.
1830 delete NodeToProcess
;
1831 nodesToProcess
.pop_back();
1833 } // while (!nodes...)
1838 PreservedAnalyses
EarlyCSEPass::run(Function
&F
,
1839 FunctionAnalysisManager
&AM
) {
1840 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
1841 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
1842 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1843 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
1845 UseMemorySSA
? &AM
.getResult
<MemorySSAAnalysis
>(F
).getMSSA() : nullptr;
1847 EarlyCSE
CSE(F
.getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1850 return PreservedAnalyses::all();
1852 PreservedAnalyses PA
;
1853 PA
.preserveSet
<CFGAnalyses
>();
1855 PA
.preserve
<MemorySSAAnalysis
>();
1859 void EarlyCSEPass::printPipeline(
1860 raw_ostream
&OS
, function_ref
<StringRef(StringRef
)> MapClassName2PassName
) {
1861 static_cast<PassInfoMixin
<EarlyCSEPass
> *>(this)->printPipeline(
1862 OS
, MapClassName2PassName
);
1871 /// A simple and fast domtree-based CSE pass.
1873 /// This pass does a simple depth-first walk over the dominator tree,
1874 /// eliminating trivially redundant instructions and using instsimplify to
1875 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1876 /// cases so that instcombine and other passes are more effective. It is
1877 /// expected that a later pass of GVN will catch the interesting/hard cases.
1878 template<bool UseMemorySSA
>
1879 class EarlyCSELegacyCommonPass
: public FunctionPass
{
1883 EarlyCSELegacyCommonPass() : FunctionPass(ID
) {
1885 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1887 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1890 bool runOnFunction(Function
&F
) override
{
1891 if (skipFunction(F
))
1894 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
1895 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
1896 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1897 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
1899 UseMemorySSA
? &getAnalysis
<MemorySSAWrapperPass
>().getMSSA() : nullptr;
1901 EarlyCSE
CSE(F
.getDataLayout(), TLI
, TTI
, DT
, AC
, MSSA
);
1906 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1907 AU
.addRequired
<AssumptionCacheTracker
>();
1908 AU
.addRequired
<DominatorTreeWrapperPass
>();
1909 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1910 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1912 AU
.addRequired
<AAResultsWrapperPass
>();
1913 AU
.addRequired
<MemorySSAWrapperPass
>();
1914 AU
.addPreserved
<MemorySSAWrapperPass
>();
1916 AU
.addPreserved
<GlobalsAAWrapperPass
>();
1917 AU
.addPreserved
<AAResultsWrapperPass
>();
1918 AU
.setPreservesCFG();
1922 } // end anonymous namespace
1924 using EarlyCSELegacyPass
= EarlyCSELegacyCommonPass
</*UseMemorySSA=*/false>;
1927 char EarlyCSELegacyPass::ID
= 0;
1929 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass
, "early-cse", "Early CSE", false,
1931 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1932 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1933 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1934 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1935 INITIALIZE_PASS_END(EarlyCSELegacyPass
, "early-cse", "Early CSE", false, false)
1937 using EarlyCSEMemSSALegacyPass
=
1938 EarlyCSELegacyCommonPass
</*UseMemorySSA=*/true>;
1941 char EarlyCSEMemSSALegacyPass::ID
= 0;
1943 FunctionPass
*llvm::createEarlyCSEPass(bool UseMemorySSA
) {
1945 return new EarlyCSEMemSSALegacyPass();
1947 return new EarlyCSELegacyPass();
1950 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1951 "Early CSE w/ MemorySSA", false, false)
1952 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1953 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1954 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
1955 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
1956 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1957 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass
)
1958 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass
, "early-cse-memssa",
1959 "Early CSE w/ MemorySSA", false, false)