1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs global value numbering to eliminate fully redundant
10 // instructions. It also performs simple dead load elimination.
12 // Note that this pass does the value numbering itself; it does not use the
13 // ValueNumbering analysis passes.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Transforms/Scalar/GVN.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DepthFirstIterator.h"
20 #include "llvm/ADT/Hashing.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/PointerIntPair.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
30 #include "llvm/Analysis/AssumeBundleQueries.h"
31 #include "llvm/Analysis/AssumptionCache.h"
32 #include "llvm/Analysis/CFG.h"
33 #include "llvm/Analysis/DomTreeUpdater.h"
34 #include "llvm/Analysis/GlobalsModRef.h"
35 #include "llvm/Analysis/InstructionSimplify.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/MemoryBuiltins.h"
38 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
39 #include "llvm/Analysis/MemorySSA.h"
40 #include "llvm/Analysis/MemorySSAUpdater.h"
41 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
42 #include "llvm/Analysis/PHITransAddr.h"
43 #include "llvm/Analysis/TargetLibraryInfo.h"
44 #include "llvm/Analysis/ValueTracking.h"
45 #include "llvm/Config/llvm-config.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DebugLoc.h"
52 #include "llvm/IR/Dominators.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/LLVMContext.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/Value.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Utils.h"
76 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
77 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
78 #include "llvm/Transforms/Utils/Local.h"
79 #include "llvm/Transforms/Utils/SSAUpdater.h"
80 #include "llvm/Transforms/Utils/VNCoercion.h"
87 using namespace llvm::gvn
;
88 using namespace llvm::VNCoercion
;
89 using namespace PatternMatch
;
91 #define DEBUG_TYPE "gvn"
93 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
94 STATISTIC(NumGVNLoad
, "Number of loads deleted");
95 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
96 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
97 STATISTIC(NumGVNSimpl
, "Number of instructions simplified");
98 STATISTIC(NumGVNEqProp
, "Number of equalities propagated");
99 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
100 STATISTIC(NumPRELoopLoad
, "Number of loop loads PRE'd");
102 STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax
,
103 "Number of blocks speculated as available in "
104 "IsValueFullyAvailableInBlock(), max");
105 STATISTIC(MaxBBSpeculationCutoffReachedTimes
,
106 "Number of times we we reached gvn-max-block-speculations cut-off "
107 "preventing further exploration");
109 static cl::opt
<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden
);
110 static cl::opt
<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
111 static cl::opt
<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
114 GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
116 static cl::opt
<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
118 static cl::opt
<uint32_t> MaxNumDeps(
119 "gvn-max-num-deps", cl::Hidden
, cl::init(100), cl::ZeroOrMore
,
120 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
122 // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
123 static cl::opt
<uint32_t> MaxBBSpeculations(
124 "gvn-max-block-speculations", cl::Hidden
, cl::init(600), cl::ZeroOrMore
,
125 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
126 "into) when deducing if a value is fully available or not in GVN "
129 struct llvm::GVN::Expression
{
131 bool commutative
= false;
132 Type
*type
= nullptr;
133 SmallVector
<uint32_t, 4> varargs
;
135 Expression(uint32_t o
= ~2U) : opcode(o
) {}
137 bool operator==(const Expression
&other
) const {
138 if (opcode
!= other
.opcode
)
140 if (opcode
== ~0U || opcode
== ~1U)
142 if (type
!= other
.type
)
144 if (varargs
!= other
.varargs
)
149 friend hash_code
hash_value(const Expression
&Value
) {
151 Value
.opcode
, Value
.type
,
152 hash_combine_range(Value
.varargs
.begin(), Value
.varargs
.end()));
158 template <> struct DenseMapInfo
<GVN::Expression
> {
159 static inline GVN::Expression
getEmptyKey() { return ~0U; }
160 static inline GVN::Expression
getTombstoneKey() { return ~1U; }
162 static unsigned getHashValue(const GVN::Expression
&e
) {
163 using llvm::hash_value
;
165 return static_cast<unsigned>(hash_value(e
));
168 static bool isEqual(const GVN::Expression
&LHS
, const GVN::Expression
&RHS
) {
173 } // end namespace llvm
175 /// Represents a particular available value that we know how to materialize.
176 /// Materialization of an AvailableValue never fails. An AvailableValue is
177 /// implicitly associated with a rematerialization point which is the
178 /// location of the instruction from which it was formed.
179 struct llvm::gvn::AvailableValue
{
181 SimpleVal
, // A simple offsetted value that is accessed.
182 LoadVal
, // A value produced by a load.
183 MemIntrin
, // A memory intrinsic which is loaded from.
184 UndefVal
// A UndefValue representing a value from dead block (which
185 // is not yet physically removed from the CFG).
188 /// V - The value that is live out of the block.
189 PointerIntPair
<Value
*, 2, ValType
> Val
;
191 /// Offset - The byte offset in Val that is interesting for the load query.
194 static AvailableValue
get(Value
*V
, unsigned Offset
= 0) {
196 Res
.Val
.setPointer(V
);
197 Res
.Val
.setInt(SimpleVal
);
202 static AvailableValue
getMI(MemIntrinsic
*MI
, unsigned Offset
= 0) {
204 Res
.Val
.setPointer(MI
);
205 Res
.Val
.setInt(MemIntrin
);
210 static AvailableValue
getLoad(LoadInst
*Load
, unsigned Offset
= 0) {
212 Res
.Val
.setPointer(Load
);
213 Res
.Val
.setInt(LoadVal
);
218 static AvailableValue
getUndef() {
220 Res
.Val
.setPointer(nullptr);
221 Res
.Val
.setInt(UndefVal
);
226 bool isSimpleValue() const { return Val
.getInt() == SimpleVal
; }
227 bool isCoercedLoadValue() const { return Val
.getInt() == LoadVal
; }
228 bool isMemIntrinValue() const { return Val
.getInt() == MemIntrin
; }
229 bool isUndefValue() const { return Val
.getInt() == UndefVal
; }
231 Value
*getSimpleValue() const {
232 assert(isSimpleValue() && "Wrong accessor");
233 return Val
.getPointer();
236 LoadInst
*getCoercedLoadValue() const {
237 assert(isCoercedLoadValue() && "Wrong accessor");
238 return cast
<LoadInst
>(Val
.getPointer());
241 MemIntrinsic
*getMemIntrinValue() const {
242 assert(isMemIntrinValue() && "Wrong accessor");
243 return cast
<MemIntrinsic
>(Val
.getPointer());
246 /// Emit code at the specified insertion point to adjust the value defined
247 /// here to the specified type. This handles various coercion cases.
248 Value
*MaterializeAdjustedValue(LoadInst
*Load
, Instruction
*InsertPt
,
252 /// Represents an AvailableValue which can be rematerialized at the end of
253 /// the associated BasicBlock.
254 struct llvm::gvn::AvailableValueInBlock
{
255 /// BB - The basic block in question.
256 BasicBlock
*BB
= nullptr;
258 /// AV - The actual available value
261 static AvailableValueInBlock
get(BasicBlock
*BB
, AvailableValue
&&AV
) {
262 AvailableValueInBlock Res
;
264 Res
.AV
= std::move(AV
);
268 static AvailableValueInBlock
get(BasicBlock
*BB
, Value
*V
,
269 unsigned Offset
= 0) {
270 return get(BB
, AvailableValue::get(V
, Offset
));
273 static AvailableValueInBlock
getUndef(BasicBlock
*BB
) {
274 return get(BB
, AvailableValue::getUndef());
277 /// Emit code at the end of this block to adjust the value defined here to
278 /// the specified type. This handles various coercion cases.
279 Value
*MaterializeAdjustedValue(LoadInst
*Load
, GVN
&gvn
) const {
280 return AV
.MaterializeAdjustedValue(Load
, BB
->getTerminator(), gvn
);
284 //===----------------------------------------------------------------------===//
285 // ValueTable Internal Functions
286 //===----------------------------------------------------------------------===//
288 GVN::Expression
GVN::ValueTable::createExpr(Instruction
*I
) {
290 e
.type
= I
->getType();
291 e
.opcode
= I
->getOpcode();
292 if (const GCRelocateInst
*GCR
= dyn_cast
<GCRelocateInst
>(I
)) {
293 // gc.relocate is 'special' call: its second and third operands are
294 // not real values, but indices into statepoint's argument list.
295 // Use the refered to values for purposes of identity.
296 e
.varargs
.push_back(lookupOrAdd(GCR
->getOperand(0)));
297 e
.varargs
.push_back(lookupOrAdd(GCR
->getBasePtr()));
298 e
.varargs
.push_back(lookupOrAdd(GCR
->getDerivedPtr()));
300 for (Use
&Op
: I
->operands())
301 e
.varargs
.push_back(lookupOrAdd(Op
));
303 if (I
->isCommutative()) {
304 // Ensure that commutative instructions that only differ by a permutation
305 // of their operands get the same value number by sorting the operand value
306 // numbers. Since commutative operands are the 1st two operands it is more
307 // efficient to sort by hand rather than using, say, std::sort.
308 assert(I
->getNumOperands() >= 2 && "Unsupported commutative instruction!");
309 if (e
.varargs
[0] > e
.varargs
[1])
310 std::swap(e
.varargs
[0], e
.varargs
[1]);
311 e
.commutative
= true;
314 if (auto *C
= dyn_cast
<CmpInst
>(I
)) {
315 // Sort the operand value numbers so x<y and y>x get the same value number.
316 CmpInst::Predicate Predicate
= C
->getPredicate();
317 if (e
.varargs
[0] > e
.varargs
[1]) {
318 std::swap(e
.varargs
[0], e
.varargs
[1]);
319 Predicate
= CmpInst::getSwappedPredicate(Predicate
);
321 e
.opcode
= (C
->getOpcode() << 8) | Predicate
;
322 e
.commutative
= true;
323 } else if (auto *E
= dyn_cast
<InsertValueInst
>(I
)) {
324 e
.varargs
.append(E
->idx_begin(), E
->idx_end());
325 } else if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(I
)) {
326 ArrayRef
<int> ShuffleMask
= SVI
->getShuffleMask();
327 e
.varargs
.append(ShuffleMask
.begin(), ShuffleMask
.end());
333 GVN::Expression
GVN::ValueTable::createCmpExpr(unsigned Opcode
,
334 CmpInst::Predicate Predicate
,
335 Value
*LHS
, Value
*RHS
) {
336 assert((Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
) &&
337 "Not a comparison!");
339 e
.type
= CmpInst::makeCmpResultType(LHS
->getType());
340 e
.varargs
.push_back(lookupOrAdd(LHS
));
341 e
.varargs
.push_back(lookupOrAdd(RHS
));
343 // Sort the operand value numbers so x<y and y>x get the same value number.
344 if (e
.varargs
[0] > e
.varargs
[1]) {
345 std::swap(e
.varargs
[0], e
.varargs
[1]);
346 Predicate
= CmpInst::getSwappedPredicate(Predicate
);
348 e
.opcode
= (Opcode
<< 8) | Predicate
;
349 e
.commutative
= true;
353 GVN::Expression
GVN::ValueTable::createExtractvalueExpr(ExtractValueInst
*EI
) {
354 assert(EI
&& "Not an ExtractValueInst?");
356 e
.type
= EI
->getType();
359 WithOverflowInst
*WO
= dyn_cast
<WithOverflowInst
>(EI
->getAggregateOperand());
360 if (WO
!= nullptr && EI
->getNumIndices() == 1 && *EI
->idx_begin() == 0) {
361 // EI is an extract from one of our with.overflow intrinsics. Synthesize
362 // a semantically equivalent expression instead of an extract value
364 e
.opcode
= WO
->getBinaryOp();
365 e
.varargs
.push_back(lookupOrAdd(WO
->getLHS()));
366 e
.varargs
.push_back(lookupOrAdd(WO
->getRHS()));
370 // Not a recognised intrinsic. Fall back to producing an extract value
372 e
.opcode
= EI
->getOpcode();
373 for (Use
&Op
: EI
->operands())
374 e
.varargs
.push_back(lookupOrAdd(Op
));
376 append_range(e
.varargs
, EI
->indices());
381 //===----------------------------------------------------------------------===//
382 // ValueTable External Functions
383 //===----------------------------------------------------------------------===//
385 GVN::ValueTable::ValueTable() = default;
386 GVN::ValueTable::ValueTable(const ValueTable
&) = default;
387 GVN::ValueTable::ValueTable(ValueTable
&&) = default;
388 GVN::ValueTable::~ValueTable() = default;
389 GVN::ValueTable
&GVN::ValueTable::operator=(const GVN::ValueTable
&Arg
) = default;
391 /// add - Insert a value into the table with a specified value number.
392 void GVN::ValueTable::add(Value
*V
, uint32_t num
) {
393 valueNumbering
.insert(std::make_pair(V
, num
));
394 if (PHINode
*PN
= dyn_cast
<PHINode
>(V
))
395 NumberingPhi
[num
] = PN
;
398 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst
*C
) {
399 if (AA
->doesNotAccessMemory(C
)) {
400 Expression exp
= createExpr(C
);
401 uint32_t e
= assignExpNewValueNum(exp
).first
;
402 valueNumbering
[C
] = e
;
404 } else if (MD
&& AA
->onlyReadsMemory(C
)) {
405 Expression exp
= createExpr(C
);
406 auto ValNum
= assignExpNewValueNum(exp
);
408 valueNumbering
[C
] = ValNum
.first
;
412 MemDepResult local_dep
= MD
->getDependency(C
);
414 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
415 valueNumbering
[C
] = nextValueNumber
;
416 return nextValueNumber
++;
419 if (local_dep
.isDef()) {
420 // For masked load/store intrinsics, the local_dep may actully be
421 // a normal load or store instruction.
422 CallInst
*local_cdep
= dyn_cast
<CallInst
>(local_dep
.getInst());
425 local_cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
426 valueNumbering
[C
] = nextValueNumber
;
427 return nextValueNumber
++;
430 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
431 uint32_t c_vn
= lookupOrAdd(C
->getArgOperand(i
));
432 uint32_t cd_vn
= lookupOrAdd(local_cdep
->getArgOperand(i
));
434 valueNumbering
[C
] = nextValueNumber
;
435 return nextValueNumber
++;
439 uint32_t v
= lookupOrAdd(local_cdep
);
440 valueNumbering
[C
] = v
;
445 const MemoryDependenceResults::NonLocalDepInfo
&deps
=
446 MD
->getNonLocalCallDependency(C
);
447 // FIXME: Move the checking logic to MemDep!
448 CallInst
* cdep
= nullptr;
450 // Check to see if we have a single dominating call instruction that is
452 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
453 const NonLocalDepEntry
*I
= &deps
[i
];
454 if (I
->getResult().isNonLocal())
457 // We don't handle non-definitions. If we already have a call, reject
458 // instruction dependencies.
459 if (!I
->getResult().isDef() || cdep
!= nullptr) {
464 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->getResult().getInst());
465 // FIXME: All duplicated with non-local case.
466 if (NonLocalDepCall
&& DT
->properlyDominates(I
->getBB(), C
->getParent())){
467 cdep
= NonLocalDepCall
;
476 valueNumbering
[C
] = nextValueNumber
;
477 return nextValueNumber
++;
480 if (cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
481 valueNumbering
[C
] = nextValueNumber
;
482 return nextValueNumber
++;
484 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
485 uint32_t c_vn
= lookupOrAdd(C
->getArgOperand(i
));
486 uint32_t cd_vn
= lookupOrAdd(cdep
->getArgOperand(i
));
488 valueNumbering
[C
] = nextValueNumber
;
489 return nextValueNumber
++;
493 uint32_t v
= lookupOrAdd(cdep
);
494 valueNumbering
[C
] = v
;
497 valueNumbering
[C
] = nextValueNumber
;
498 return nextValueNumber
++;
502 /// Returns true if a value number exists for the specified value.
503 bool GVN::ValueTable::exists(Value
*V
) const { return valueNumbering
.count(V
) != 0; }
505 /// lookup_or_add - Returns the value number for the specified value, assigning
506 /// it a new number if it did not have one before.
507 uint32_t GVN::ValueTable::lookupOrAdd(Value
*V
) {
508 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
509 if (VI
!= valueNumbering
.end())
512 if (!isa
<Instruction
>(V
)) {
513 valueNumbering
[V
] = nextValueNumber
;
514 return nextValueNumber
++;
517 Instruction
* I
= cast
<Instruction
>(V
);
519 switch (I
->getOpcode()) {
520 case Instruction::Call
:
521 return lookupOrAddCall(cast
<CallInst
>(I
));
522 case Instruction::FNeg
:
523 case Instruction::Add
:
524 case Instruction::FAdd
:
525 case Instruction::Sub
:
526 case Instruction::FSub
:
527 case Instruction::Mul
:
528 case Instruction::FMul
:
529 case Instruction::UDiv
:
530 case Instruction::SDiv
:
531 case Instruction::FDiv
:
532 case Instruction::URem
:
533 case Instruction::SRem
:
534 case Instruction::FRem
:
535 case Instruction::Shl
:
536 case Instruction::LShr
:
537 case Instruction::AShr
:
538 case Instruction::And
:
539 case Instruction::Or
:
540 case Instruction::Xor
:
541 case Instruction::ICmp
:
542 case Instruction::FCmp
:
543 case Instruction::Trunc
:
544 case Instruction::ZExt
:
545 case Instruction::SExt
:
546 case Instruction::FPToUI
:
547 case Instruction::FPToSI
:
548 case Instruction::UIToFP
:
549 case Instruction::SIToFP
:
550 case Instruction::FPTrunc
:
551 case Instruction::FPExt
:
552 case Instruction::PtrToInt
:
553 case Instruction::IntToPtr
:
554 case Instruction::AddrSpaceCast
:
555 case Instruction::BitCast
:
556 case Instruction::Select
:
557 case Instruction::Freeze
:
558 case Instruction::ExtractElement
:
559 case Instruction::InsertElement
:
560 case Instruction::ShuffleVector
:
561 case Instruction::InsertValue
:
562 case Instruction::GetElementPtr
:
565 case Instruction::ExtractValue
:
566 exp
= createExtractvalueExpr(cast
<ExtractValueInst
>(I
));
568 case Instruction::PHI
:
569 valueNumbering
[V
] = nextValueNumber
;
570 NumberingPhi
[nextValueNumber
] = cast
<PHINode
>(V
);
571 return nextValueNumber
++;
573 valueNumbering
[V
] = nextValueNumber
;
574 return nextValueNumber
++;
577 uint32_t e
= assignExpNewValueNum(exp
).first
;
578 valueNumbering
[V
] = e
;
582 /// Returns the value number of the specified value. Fails if
583 /// the value has not yet been numbered.
584 uint32_t GVN::ValueTable::lookup(Value
*V
, bool Verify
) const {
585 DenseMap
<Value
*, uint32_t>::const_iterator VI
= valueNumbering
.find(V
);
587 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
590 return (VI
!= valueNumbering
.end()) ? VI
->second
: 0;
593 /// Returns the value number of the given comparison,
594 /// assigning it a new number if it did not have one before. Useful when
595 /// we deduced the result of a comparison, but don't immediately have an
596 /// instruction realizing that comparison to hand.
597 uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode
,
598 CmpInst::Predicate Predicate
,
599 Value
*LHS
, Value
*RHS
) {
600 Expression exp
= createCmpExpr(Opcode
, Predicate
, LHS
, RHS
);
601 return assignExpNewValueNum(exp
).first
;
604 /// Remove all entries from the ValueTable.
605 void GVN::ValueTable::clear() {
606 valueNumbering
.clear();
607 expressionNumbering
.clear();
608 NumberingPhi
.clear();
609 PhiTranslateTable
.clear();
616 /// Remove a value from the value numbering.
617 void GVN::ValueTable::erase(Value
*V
) {
618 uint32_t Num
= valueNumbering
.lookup(V
);
619 valueNumbering
.erase(V
);
620 // If V is PHINode, V <--> value number is an one-to-one mapping.
622 NumberingPhi
.erase(Num
);
625 /// verifyRemoved - Verify that the value is removed from all internal data
627 void GVN::ValueTable::verifyRemoved(const Value
*V
) const {
628 for (DenseMap
<Value
*, uint32_t>::const_iterator
629 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
630 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
634 //===----------------------------------------------------------------------===//
636 //===----------------------------------------------------------------------===//
638 bool GVN::isPREEnabled() const {
639 return Options
.AllowPRE
.getValueOr(GVNEnablePRE
);
642 bool GVN::isLoadPREEnabled() const {
643 return Options
.AllowLoadPRE
.getValueOr(GVNEnableLoadPRE
);
646 bool GVN::isLoadInLoopPREEnabled() const {
647 return Options
.AllowLoadInLoopPRE
.getValueOr(GVNEnableLoadInLoopPRE
);
650 bool GVN::isLoadPRESplitBackedgeEnabled() const {
651 return Options
.AllowLoadPRESplitBackedge
.getValueOr(
652 GVNEnableSplitBackedgeInLoadPRE
);
655 bool GVN::isMemDepEnabled() const {
656 return Options
.AllowMemDep
.getValueOr(GVNEnableMemDep
);
659 PreservedAnalyses
GVN::run(Function
&F
, FunctionAnalysisManager
&AM
) {
660 // FIXME: The order of evaluation of these 'getResult' calls is very
661 // significant! Re-ordering these variables will cause GVN when run alone to
662 // be less effective! We should fix memdep and basic-aa to not exhibit this
663 // behavior, but until then don't change the order here.
664 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
665 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
666 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
667 auto &AA
= AM
.getResult
<AAManager
>(F
);
669 isMemDepEnabled() ? &AM
.getResult
<MemoryDependenceAnalysis
>(F
) : nullptr;
670 auto *LI
= AM
.getCachedResult
<LoopAnalysis
>(F
);
671 auto *MSSA
= AM
.getCachedResult
<MemorySSAAnalysis
>(F
);
672 auto &ORE
= AM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
673 bool Changed
= runImpl(F
, AC
, DT
, TLI
, AA
, MemDep
, LI
, &ORE
,
674 MSSA
? &MSSA
->getMSSA() : nullptr);
676 return PreservedAnalyses::all();
677 PreservedAnalyses PA
;
678 PA
.preserve
<DominatorTreeAnalysis
>();
679 PA
.preserve
<TargetLibraryAnalysis
>();
681 PA
.preserve
<MemorySSAAnalysis
>();
683 PA
.preserve
<LoopAnalysis
>();
687 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 LLVM_DUMP_METHOD
void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) const {
691 errs() << I
.first
<< "\n";
698 enum class AvailabilityState
: char {
699 /// We know the block *is not* fully available. This is a fixpoint.
701 /// We know the block *is* fully available. This is a fixpoint.
703 /// We do not know whether the block is fully available or not,
704 /// but we are currently speculating that it will be.
705 /// If it would have turned out that the block was, in fact, not fully
706 /// available, this would have been cleaned up into an Unavailable.
707 SpeculativelyAvailable
= 2,
710 /// Return true if we can prove that the value
711 /// we're analyzing is fully available in the specified block. As we go, keep
712 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
713 /// map is actually a tri-state map with the following values:
714 /// 0) we know the block *is not* fully available.
715 /// 1) we know the block *is* fully available.
716 /// 2) we do not know whether the block is fully available or not, but we are
717 /// currently speculating that it will be.
718 static bool IsValueFullyAvailableInBlock(
720 DenseMap
<BasicBlock
*, AvailabilityState
> &FullyAvailableBlocks
) {
721 SmallVector
<BasicBlock
*, 32> Worklist
;
722 Optional
<BasicBlock
*> UnavailableBB
;
724 // The number of times we didn't find an entry for a block in a map and
725 // optimistically inserted an entry marking block as speculatively available.
726 unsigned NumNewNewSpeculativelyAvailableBBs
= 0;
729 SmallSet
<BasicBlock
*, 32> NewSpeculativelyAvailableBBs
;
730 SmallVector
<BasicBlock
*, 32> AvailableBBs
;
733 Worklist
.emplace_back(BB
);
734 while (!Worklist
.empty()) {
735 BasicBlock
*CurrBB
= Worklist
.pop_back_val(); // LoadFO - depth-first!
736 // Optimistically assume that the block is Speculatively Available and check
737 // to see if we already know about this block in one lookup.
738 std::pair
<DenseMap
<BasicBlock
*, AvailabilityState
>::iterator
, bool> IV
=
739 FullyAvailableBlocks
.try_emplace(
740 CurrBB
, AvailabilityState::SpeculativelyAvailable
);
741 AvailabilityState
&State
= IV
.first
->second
;
743 // Did the entry already exist for this block?
745 if (State
== AvailabilityState::Unavailable
) {
746 UnavailableBB
= CurrBB
;
747 break; // Backpropagate unavailability info.
751 AvailableBBs
.emplace_back(CurrBB
);
753 continue; // Don't recurse further, but continue processing worklist.
756 // No entry found for block.
757 ++NumNewNewSpeculativelyAvailableBBs
;
758 bool OutOfBudget
= NumNewNewSpeculativelyAvailableBBs
> MaxBBSpeculations
;
760 // If we have exhausted our budget, mark this block as unavailable.
761 // Also, if this block has no predecessors, the value isn't live-in here.
762 if (OutOfBudget
|| pred_empty(CurrBB
)) {
763 MaxBBSpeculationCutoffReachedTimes
+= (int)OutOfBudget
;
764 State
= AvailabilityState::Unavailable
;
765 UnavailableBB
= CurrBB
;
766 break; // Backpropagate unavailability info.
769 // Tentatively consider this block as speculatively available.
771 NewSpeculativelyAvailableBBs
.insert(CurrBB
);
773 // And further recurse into block's predecessors, in depth-first order!
774 Worklist
.append(pred_begin(CurrBB
), pred_end(CurrBB
));
777 #if LLVM_ENABLE_STATS
778 IsValueFullyAvailableInBlockNumSpeculationsMax
.updateMax(
779 NumNewNewSpeculativelyAvailableBBs
);
782 // If the block isn't marked as fixpoint yet
783 // (the Unavailable and Available states are fixpoints)
784 auto MarkAsFixpointAndEnqueueSuccessors
=
785 [&](BasicBlock
*BB
, AvailabilityState FixpointState
) {
786 auto It
= FullyAvailableBlocks
.find(BB
);
787 if (It
== FullyAvailableBlocks
.end())
788 return; // Never queried this block, leave as-is.
789 switch (AvailabilityState
&State
= It
->second
) {
790 case AvailabilityState::Unavailable
:
791 case AvailabilityState::Available
:
792 return; // Don't backpropagate further, continue processing worklist.
793 case AvailabilityState::SpeculativelyAvailable
: // Fix it!
794 State
= FixpointState
;
796 assert(NewSpeculativelyAvailableBBs
.erase(BB
) &&
797 "Found a speculatively available successor leftover?");
799 // Queue successors for further processing.
800 Worklist
.append(succ_begin(BB
), succ_end(BB
));
806 // Okay, we have encountered an unavailable block.
807 // Mark speculatively available blocks reachable from UnavailableBB as
808 // unavailable as well. Paths are terminated when they reach blocks not in
809 // FullyAvailableBlocks or they are not marked as speculatively available.
811 Worklist
.append(succ_begin(*UnavailableBB
), succ_end(*UnavailableBB
));
812 while (!Worklist
.empty())
813 MarkAsFixpointAndEnqueueSuccessors(Worklist
.pop_back_val(),
814 AvailabilityState::Unavailable
);
819 for (BasicBlock
*AvailableBB
: AvailableBBs
)
820 Worklist
.append(succ_begin(AvailableBB
), succ_end(AvailableBB
));
821 while (!Worklist
.empty())
822 MarkAsFixpointAndEnqueueSuccessors(Worklist
.pop_back_val(),
823 AvailabilityState::Available
);
825 assert(NewSpeculativelyAvailableBBs
.empty() &&
826 "Must have fixed all the new speculatively available blocks.");
829 return !UnavailableBB
;
832 /// Given a set of loads specified by ValuesPerBlock,
833 /// construct SSA form, allowing us to eliminate Load. This returns the value
834 /// that should be used at Load's definition site.
836 ConstructSSAForLoadSet(LoadInst
*Load
,
837 SmallVectorImpl
<AvailableValueInBlock
> &ValuesPerBlock
,
839 // Check for the fully redundant, dominating load case. In this case, we can
840 // just use the dominating value directly.
841 if (ValuesPerBlock
.size() == 1 &&
842 gvn
.getDominatorTree().properlyDominates(ValuesPerBlock
[0].BB
,
843 Load
->getParent())) {
844 assert(!ValuesPerBlock
[0].AV
.isUndefValue() &&
845 "Dead BB dominate this block");
846 return ValuesPerBlock
[0].MaterializeAdjustedValue(Load
, gvn
);
849 // Otherwise, we have to construct SSA form.
850 SmallVector
<PHINode
*, 8> NewPHIs
;
851 SSAUpdater
SSAUpdate(&NewPHIs
);
852 SSAUpdate
.Initialize(Load
->getType(), Load
->getName());
854 for (const AvailableValueInBlock
&AV
: ValuesPerBlock
) {
855 BasicBlock
*BB
= AV
.BB
;
857 if (AV
.AV
.isUndefValue())
860 if (SSAUpdate
.HasValueForBlock(BB
))
863 // If the value is the load that we will be eliminating, and the block it's
864 // available in is the block that the load is in, then don't add it as
865 // SSAUpdater will resolve the value to the relevant phi which may let it
866 // avoid phi construction entirely if there's actually only one value.
867 if (BB
== Load
->getParent() &&
868 ((AV
.AV
.isSimpleValue() && AV
.AV
.getSimpleValue() == Load
) ||
869 (AV
.AV
.isCoercedLoadValue() && AV
.AV
.getCoercedLoadValue() == Load
)))
872 SSAUpdate
.AddAvailableValue(BB
, AV
.MaterializeAdjustedValue(Load
, gvn
));
875 // Perform PHI construction.
876 return SSAUpdate
.GetValueInMiddleOfBlock(Load
->getParent());
879 Value
*AvailableValue::MaterializeAdjustedValue(LoadInst
*Load
,
880 Instruction
*InsertPt
,
883 Type
*LoadTy
= Load
->getType();
884 const DataLayout
&DL
= Load
->getModule()->getDataLayout();
885 if (isSimpleValue()) {
886 Res
= getSimpleValue();
887 if (Res
->getType() != LoadTy
) {
888 Res
= getStoreValueForLoad(Res
, Offset
, LoadTy
, InsertPt
, DL
);
890 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
891 << " " << *getSimpleValue() << '\n'
895 } else if (isCoercedLoadValue()) {
896 LoadInst
*CoercedLoad
= getCoercedLoadValue();
897 if (CoercedLoad
->getType() == LoadTy
&& Offset
== 0) {
900 Res
= getLoadValueForLoad(CoercedLoad
, Offset
, LoadTy
, InsertPt
, DL
);
901 // We would like to use gvn.markInstructionForDeletion here, but we can't
902 // because the load is already memoized into the leader map table that GVN
903 // tracks. It is potentially possible to remove the load from the table,
904 // but then there all of the operations based on it would need to be
905 // rehashed. Just leave the dead load around.
906 gvn
.getMemDep().removeInstruction(CoercedLoad
);
907 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
908 << " " << *getCoercedLoadValue() << '\n'
912 } else if (isMemIntrinValue()) {
913 Res
= getMemInstValueForLoad(getMemIntrinValue(), Offset
, LoadTy
,
915 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
916 << " " << *getMemIntrinValue() << '\n'
920 llvm_unreachable("Should not materialize value from dead block");
922 assert(Res
&& "failed to materialize?");
926 static bool isLifetimeStart(const Instruction
*Inst
) {
927 if (const IntrinsicInst
* II
= dyn_cast
<IntrinsicInst
>(Inst
))
928 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
932 /// Assuming To can be reached from both From and Between, does Between lie on
933 /// every path from From to To?
934 static bool liesBetween(const Instruction
*From
, Instruction
*Between
,
935 const Instruction
*To
, DominatorTree
*DT
) {
936 if (From
->getParent() == Between
->getParent())
937 return DT
->dominates(From
, Between
);
938 SmallSet
<BasicBlock
*, 1> Exclusion
;
939 Exclusion
.insert(Between
->getParent());
940 return !isPotentiallyReachable(From
, To
, &Exclusion
, DT
);
943 /// Try to locate the three instruction involved in a missed
944 /// load-elimination case that is due to an intervening store.
945 static void reportMayClobberedLoad(LoadInst
*Load
, MemDepResult DepInfo
,
947 OptimizationRemarkEmitter
*ORE
) {
950 User
*OtherAccess
= nullptr;
952 OptimizationRemarkMissed
R(DEBUG_TYPE
, "LoadClobbered", Load
);
953 R
<< "load of type " << NV("Type", Load
->getType()) << " not eliminated"
956 for (auto *U
: Load
->getPointerOperand()->users()) {
957 if (U
!= Load
&& (isa
<LoadInst
>(U
) || isa
<StoreInst
>(U
)) &&
958 cast
<Instruction
>(U
)->getFunction() == Load
->getFunction() &&
959 DT
->dominates(cast
<Instruction
>(U
), Load
)) {
960 // Use the most immediately dominating value
962 if (DT
->dominates(cast
<Instruction
>(OtherAccess
), cast
<Instruction
>(U
)))
965 assert(DT
->dominates(cast
<Instruction
>(U
),
966 cast
<Instruction
>(OtherAccess
)));
973 // There is no dominating use, check if we can find a closest non-dominating
974 // use that lies between any other potentially available use and Load.
975 for (auto *U
: Load
->getPointerOperand()->users()) {
976 if (U
!= Load
&& (isa
<LoadInst
>(U
) || isa
<StoreInst
>(U
)) &&
977 cast
<Instruction
>(U
)->getFunction() == Load
->getFunction() &&
978 isPotentiallyReachable(cast
<Instruction
>(U
), Load
, nullptr, DT
)) {
980 if (liesBetween(cast
<Instruction
>(OtherAccess
), cast
<Instruction
>(U
),
983 } else if (!liesBetween(cast
<Instruction
>(U
),
984 cast
<Instruction
>(OtherAccess
), Load
, DT
)) {
985 // These uses are both partially available at Load were it not for
986 // the clobber, but neither lies strictly after the other.
987 OtherAccess
= nullptr;
989 } // else: keep current OtherAccess since it lies between U and Load
998 R
<< " in favor of " << NV("OtherAccess", OtherAccess
);
1000 R
<< " because it is clobbered by " << NV("ClobberedBy", DepInfo
.getInst());
1005 bool GVN::AnalyzeLoadAvailability(LoadInst
*Load
, MemDepResult DepInfo
,
1006 Value
*Address
, AvailableValue
&Res
) {
1007 assert((DepInfo
.isDef() || DepInfo
.isClobber()) &&
1008 "expected a local dependence");
1009 assert(Load
->isUnordered() && "rules below are incorrect for ordered access");
1011 const DataLayout
&DL
= Load
->getModule()->getDataLayout();
1013 Instruction
*DepInst
= DepInfo
.getInst();
1014 if (DepInfo
.isClobber()) {
1015 // If the dependence is to a store that writes to a superset of the bits
1016 // read by the load, we can extract the bits we need for the load from the
1018 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1019 // Can't forward from non-atomic to atomic without violating memory model.
1020 if (Address
&& Load
->isAtomic() <= DepSI
->isAtomic()) {
1022 analyzeLoadFromClobberingStore(Load
->getType(), Address
, DepSI
, DL
);
1024 Res
= AvailableValue::get(DepSI
->getValueOperand(), Offset
);
1030 // Check to see if we have something like this:
1033 // if we have this, replace the later with an extraction from the former.
1034 if (LoadInst
*DepLoad
= dyn_cast
<LoadInst
>(DepInst
)) {
1035 // If this is a clobber and L is the first instruction in its block, then
1036 // we have the first instruction in the entry block.
1037 // Can't forward from non-atomic to atomic without violating memory model.
1038 if (DepLoad
!= Load
&& Address
&&
1039 Load
->isAtomic() <= DepLoad
->isAtomic()) {
1040 Type
*LoadType
= Load
->getType();
1043 // If MD reported clobber, check it was nested.
1044 if (DepInfo
.isClobber() &&
1045 canCoerceMustAliasedValueToLoad(DepLoad
, LoadType
, DL
)) {
1046 const auto ClobberOff
= MD
->getClobberOffset(DepLoad
);
1047 // GVN has no deal with a negative offset.
1048 Offset
= (ClobberOff
== None
|| ClobberOff
.getValue() < 0)
1050 : ClobberOff
.getValue();
1054 analyzeLoadFromClobberingLoad(LoadType
, Address
, DepLoad
, DL
);
1056 Res
= AvailableValue::getLoad(DepLoad
, Offset
);
1062 // If the clobbering value is a memset/memcpy/memmove, see if we can
1063 // forward a value on from it.
1064 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(DepInst
)) {
1065 if (Address
&& !Load
->isAtomic()) {
1066 int Offset
= analyzeLoadFromClobberingMemInst(Load
->getType(), Address
,
1069 Res
= AvailableValue::getMI(DepMI
, Offset
);
1074 // Nothing known about this clobber, have to be conservative
1076 // fast print dep, using operator<< on instruction is too slow.
1077 dbgs() << "GVN: load "; Load
->printAsOperand(dbgs());
1078 dbgs() << " is clobbered by " << *DepInst
<< '\n';);
1079 if (ORE
->allowExtraAnalysis(DEBUG_TYPE
))
1080 reportMayClobberedLoad(Load
, DepInfo
, DT
, ORE
);
1084 assert(DepInfo
.isDef() && "follows from above");
1086 // Loading the allocation -> undef.
1087 if (isa
<AllocaInst
>(DepInst
) || isMallocLikeFn(DepInst
, TLI
) ||
1088 isAlignedAllocLikeFn(DepInst
, TLI
) ||
1089 // Loading immediately after lifetime begin -> undef.
1090 isLifetimeStart(DepInst
)) {
1091 Res
= AvailableValue::get(UndefValue::get(Load
->getType()));
1095 // Loading from calloc (which zero initializes memory) -> zero
1096 if (isCallocLikeFn(DepInst
, TLI
)) {
1097 Res
= AvailableValue::get(Constant::getNullValue(Load
->getType()));
1101 if (StoreInst
*S
= dyn_cast
<StoreInst
>(DepInst
)) {
1102 // Reject loads and stores that are to the same address but are of
1103 // different types if we have to. If the stored value is convertable to
1104 // the loaded value, we can reuse it.
1105 if (!canCoerceMustAliasedValueToLoad(S
->getValueOperand(), Load
->getType(),
1109 // Can't forward from non-atomic to atomic without violating memory model.
1110 if (S
->isAtomic() < Load
->isAtomic())
1113 Res
= AvailableValue::get(S
->getValueOperand());
1117 if (LoadInst
*LD
= dyn_cast
<LoadInst
>(DepInst
)) {
1118 // If the types mismatch and we can't handle it, reject reuse of the load.
1119 // If the stored value is larger or equal to the loaded value, we can reuse
1121 if (!canCoerceMustAliasedValueToLoad(LD
, Load
->getType(), DL
))
1124 // Can't forward from non-atomic to atomic without violating memory model.
1125 if (LD
->isAtomic() < Load
->isAtomic())
1128 Res
= AvailableValue::getLoad(LD
);
1132 // Unknown def - must be conservative
1134 // fast print dep, using operator<< on instruction is too slow.
1135 dbgs() << "GVN: load "; Load
->printAsOperand(dbgs());
1136 dbgs() << " has unknown def " << *DepInst
<< '\n';);
1140 void GVN::AnalyzeLoadAvailability(LoadInst
*Load
, LoadDepVect
&Deps
,
1141 AvailValInBlkVect
&ValuesPerBlock
,
1142 UnavailBlkVect
&UnavailableBlocks
) {
1143 // Filter out useless results (non-locals, etc). Keep track of the blocks
1144 // where we have a value available in repl, also keep track of whether we see
1145 // dependencies that produce an unknown value for the load (such as a call
1146 // that could potentially clobber the load).
1147 unsigned NumDeps
= Deps
.size();
1148 for (unsigned i
= 0, e
= NumDeps
; i
!= e
; ++i
) {
1149 BasicBlock
*DepBB
= Deps
[i
].getBB();
1150 MemDepResult DepInfo
= Deps
[i
].getResult();
1152 if (DeadBlocks
.count(DepBB
)) {
1153 // Dead dependent mem-op disguise as a load evaluating the same value
1154 // as the load in question.
1155 ValuesPerBlock
.push_back(AvailableValueInBlock::getUndef(DepBB
));
1159 if (!DepInfo
.isDef() && !DepInfo
.isClobber()) {
1160 UnavailableBlocks
.push_back(DepBB
);
1164 // The address being loaded in this non-local block may not be the same as
1165 // the pointer operand of the load if PHI translation occurs. Make sure
1166 // to consider the right address.
1167 Value
*Address
= Deps
[i
].getAddress();
1170 if (AnalyzeLoadAvailability(Load
, DepInfo
, Address
, AV
)) {
1171 // subtlety: because we know this was a non-local dependency, we know
1172 // it's safe to materialize anywhere between the instruction within
1173 // DepInfo and the end of it's block.
1174 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1177 UnavailableBlocks
.push_back(DepBB
);
1181 assert(NumDeps
== ValuesPerBlock
.size() + UnavailableBlocks
.size() &&
1182 "post condition violation");
1185 void GVN::eliminatePartiallyRedundantLoad(
1186 LoadInst
*Load
, AvailValInBlkVect
&ValuesPerBlock
,
1187 MapVector
<BasicBlock
*, Value
*> &AvailableLoads
) {
1188 for (const auto &AvailableLoad
: AvailableLoads
) {
1189 BasicBlock
*UnavailableBlock
= AvailableLoad
.first
;
1190 Value
*LoadPtr
= AvailableLoad
.second
;
1193 new LoadInst(Load
->getType(), LoadPtr
, Load
->getName() + ".pre",
1194 Load
->isVolatile(), Load
->getAlign(), Load
->getOrdering(),
1195 Load
->getSyncScopeID(), UnavailableBlock
->getTerminator());
1196 NewLoad
->setDebugLoc(Load
->getDebugLoc());
1198 auto *MSSA
= MSSAU
->getMemorySSA();
1199 // Get the defining access of the original load or use the load if it is a
1200 // MemoryDef (e.g. because it is volatile). The inserted loads are
1201 // guaranteed to load from the same definition.
1202 auto *LoadAcc
= MSSA
->getMemoryAccess(Load
);
1204 isa
<MemoryDef
>(LoadAcc
) ? LoadAcc
: LoadAcc
->getDefiningAccess();
1205 auto *NewAccess
= MSSAU
->createMemoryAccessInBB(
1206 NewLoad
, DefiningAcc
, NewLoad
->getParent(),
1207 MemorySSA::BeforeTerminator
);
1208 if (auto *NewDef
= dyn_cast
<MemoryDef
>(NewAccess
))
1209 MSSAU
->insertDef(NewDef
, /*RenameUses=*/true);
1211 MSSAU
->insertUse(cast
<MemoryUse
>(NewAccess
), /*RenameUses=*/true);
1214 // Transfer the old load's AA tags to the new load.
1216 Load
->getAAMetadata(Tags
);
1218 NewLoad
->setAAMetadata(Tags
);
1220 if (auto *MD
= Load
->getMetadata(LLVMContext::MD_invariant_load
))
1221 NewLoad
->setMetadata(LLVMContext::MD_invariant_load
, MD
);
1222 if (auto *InvGroupMD
= Load
->getMetadata(LLVMContext::MD_invariant_group
))
1223 NewLoad
->setMetadata(LLVMContext::MD_invariant_group
, InvGroupMD
);
1224 if (auto *RangeMD
= Load
->getMetadata(LLVMContext::MD_range
))
1225 NewLoad
->setMetadata(LLVMContext::MD_range
, RangeMD
);
1226 if (auto *AccessMD
= Load
->getMetadata(LLVMContext::MD_access_group
))
1228 LI
->getLoopFor(Load
->getParent()) == LI
->getLoopFor(UnavailableBlock
))
1229 NewLoad
->setMetadata(LLVMContext::MD_access_group
, AccessMD
);
1231 // We do not propagate the old load's debug location, because the new
1232 // load now lives in a different BB, and we want to avoid a jumpy line
1234 // FIXME: How do we retain source locations without causing poor debugging
1237 // Add the newly created load.
1238 ValuesPerBlock
.push_back(
1239 AvailableValueInBlock::get(UnavailableBlock
, NewLoad
));
1240 MD
->invalidateCachedPointerInfo(LoadPtr
);
1241 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad
<< '\n');
1244 // Perform PHI construction.
1245 Value
*V
= ConstructSSAForLoadSet(Load
, ValuesPerBlock
, *this);
1246 Load
->replaceAllUsesWith(V
);
1247 if (isa
<PHINode
>(V
))
1249 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1250 I
->setDebugLoc(Load
->getDebugLoc());
1251 if (V
->getType()->isPtrOrPtrVectorTy())
1252 MD
->invalidateCachedPointerInfo(V
);
1253 markInstructionForDeletion(Load
);
1255 return OptimizationRemark(DEBUG_TYPE
, "LoadPRE", Load
)
1256 << "load eliminated by PRE";
1260 bool GVN::PerformLoadPRE(LoadInst
*Load
, AvailValInBlkVect
&ValuesPerBlock
,
1261 UnavailBlkVect
&UnavailableBlocks
) {
1262 // Okay, we have *some* definitions of the value. This means that the value
1263 // is available in some of our (transitive) predecessors. Lets think about
1264 // doing PRE of this load. This will involve inserting a new load into the
1265 // predecessor when it's not available. We could do this in general, but
1266 // prefer to not increase code size. As such, we only do this when we know
1267 // that we only have to insert *one* load (which means we're basically moving
1268 // the load, not inserting a new one).
1270 SmallPtrSet
<BasicBlock
*, 4> Blockers(UnavailableBlocks
.begin(),
1271 UnavailableBlocks
.end());
1273 // Let's find the first basic block with more than one predecessor. Walk
1274 // backwards through predecessors if needed.
1275 BasicBlock
*LoadBB
= Load
->getParent();
1276 BasicBlock
*TmpBB
= LoadBB
;
1278 // Check that there is no implicit control flow instructions above our load in
1279 // its block. If there is an instruction that doesn't always pass the
1280 // execution to the following instruction, then moving through it may become
1281 // invalid. For example:
1286 // guard(0 <= index && index < LEN);
1289 // It is illegal to move the array access to any point above the guard,
1290 // because if the index is out of bounds we should deoptimize rather than
1291 // access the array.
1292 // Check that there is no guard in this block above our instruction.
1293 bool MustEnsureSafetyOfSpeculativeExecution
=
1294 ICF
->isDominatedByICFIFromSameBlock(Load
);
1296 while (TmpBB
->getSinglePredecessor()) {
1297 TmpBB
= TmpBB
->getSinglePredecessor();
1298 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1300 if (Blockers
.count(TmpBB
))
1303 // If any of these blocks has more than one successor (i.e. if the edge we
1304 // just traversed was critical), then there are other paths through this
1305 // block along which the load may not be anticipated. Hoisting the load
1306 // above this block would be adding the load to execution paths along
1307 // which it was not previously executed.
1308 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1311 // Check that there is no implicit control flow in a block above.
1312 MustEnsureSafetyOfSpeculativeExecution
=
1313 MustEnsureSafetyOfSpeculativeExecution
|| ICF
->hasICF(TmpBB
);
1319 // Check to see how many predecessors have the loaded value fully
1321 MapVector
<BasicBlock
*, Value
*> PredLoads
;
1322 DenseMap
<BasicBlock
*, AvailabilityState
> FullyAvailableBlocks
;
1323 for (const AvailableValueInBlock
&AV
: ValuesPerBlock
)
1324 FullyAvailableBlocks
[AV
.BB
] = AvailabilityState::Available
;
1325 for (BasicBlock
*UnavailableBB
: UnavailableBlocks
)
1326 FullyAvailableBlocks
[UnavailableBB
] = AvailabilityState::Unavailable
;
1328 SmallVector
<BasicBlock
*, 4> CriticalEdgePred
;
1329 for (BasicBlock
*Pred
: predecessors(LoadBB
)) {
1330 // If any predecessor block is an EH pad that does not allow non-PHI
1331 // instructions before the terminator, we can't PRE the load.
1332 if (Pred
->getTerminator()->isEHPad()) {
1334 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1335 << Pred
->getName() << "': " << *Load
<< '\n');
1339 if (IsValueFullyAvailableInBlock(Pred
, FullyAvailableBlocks
)) {
1343 if (Pred
->getTerminator()->getNumSuccessors() != 1) {
1344 if (isa
<IndirectBrInst
>(Pred
->getTerminator())) {
1346 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1347 << Pred
->getName() << "': " << *Load
<< '\n');
1351 // FIXME: Can we support the fallthrough edge?
1352 if (isa
<CallBrInst
>(Pred
->getTerminator())) {
1354 dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
1355 << Pred
->getName() << "': " << *Load
<< '\n');
1359 if (LoadBB
->isEHPad()) {
1361 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1362 << Pred
->getName() << "': " << *Load
<< '\n');
1366 // Do not split backedge as it will break the canonical loop form.
1367 if (!isLoadPRESplitBackedgeEnabled())
1368 if (DT
->dominates(LoadBB
, Pred
)) {
1371 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1372 << Pred
->getName() << "': " << *Load
<< '\n');
1376 CriticalEdgePred
.push_back(Pred
);
1378 // Only add the predecessors that will not be split for now.
1379 PredLoads
[Pred
] = nullptr;
1383 // Decide whether PRE is profitable for this load.
1384 unsigned NumUnavailablePreds
= PredLoads
.size() + CriticalEdgePred
.size();
1385 assert(NumUnavailablePreds
!= 0 &&
1386 "Fully available value should already be eliminated!");
1388 // If this load is unavailable in multiple predecessors, reject it.
1389 // FIXME: If we could restructure the CFG, we could make a common pred with
1390 // all the preds that don't have an available Load and insert a new load into
1392 if (NumUnavailablePreds
!= 1)
1395 // Now we know where we will insert load. We must ensure that it is safe
1396 // to speculatively execute the load at that points.
1397 if (MustEnsureSafetyOfSpeculativeExecution
) {
1398 if (CriticalEdgePred
.size())
1399 if (!isSafeToSpeculativelyExecute(Load
, LoadBB
->getFirstNonPHI(), DT
))
1401 for (auto &PL
: PredLoads
)
1402 if (!isSafeToSpeculativelyExecute(Load
, PL
.first
->getTerminator(), DT
))
1406 // Split critical edges, and update the unavailable predecessors accordingly.
1407 for (BasicBlock
*OrigPred
: CriticalEdgePred
) {
1408 BasicBlock
*NewPred
= splitCriticalEdges(OrigPred
, LoadBB
);
1409 assert(!PredLoads
.count(OrigPred
) && "Split edges shouldn't be in map!");
1410 PredLoads
[NewPred
] = nullptr;
1411 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred
->getName() << "->"
1412 << LoadBB
->getName() << '\n');
1415 // Check if the load can safely be moved to all the unavailable predecessors.
1416 bool CanDoPRE
= true;
1417 const DataLayout
&DL
= Load
->getModule()->getDataLayout();
1418 SmallVector
<Instruction
*, 8> NewInsts
;
1419 for (auto &PredLoad
: PredLoads
) {
1420 BasicBlock
*UnavailablePred
= PredLoad
.first
;
1422 // Do PHI translation to get its value in the predecessor if necessary. The
1423 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1424 // We do the translation for each edge we skipped by going from Load's block
1425 // to LoadBB, otherwise we might miss pieces needing translation.
1427 // If all preds have a single successor, then we know it is safe to insert
1428 // the load on the pred (?!?), so we can insert code to materialize the
1429 // pointer if it is not available.
1430 Value
*LoadPtr
= Load
->getPointerOperand();
1431 BasicBlock
*Cur
= Load
->getParent();
1432 while (Cur
!= LoadBB
) {
1433 PHITransAddr
Address(LoadPtr
, DL
, AC
);
1434 LoadPtr
= Address
.PHITranslateWithInsertion(
1435 Cur
, Cur
->getSinglePredecessor(), *DT
, NewInsts
);
1440 Cur
= Cur
->getSinglePredecessor();
1444 PHITransAddr
Address(LoadPtr
, DL
, AC
);
1445 LoadPtr
= Address
.PHITranslateWithInsertion(LoadBB
, UnavailablePred
, *DT
,
1448 // If we couldn't find or insert a computation of this phi translated value,
1451 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1452 << *Load
->getPointerOperand() << "\n");
1457 PredLoad
.second
= LoadPtr
;
1461 while (!NewInsts
.empty()) {
1462 // Erase instructions generated by the failed PHI translation before
1463 // trying to number them. PHI translation might insert instructions
1464 // in basic blocks other than the current one, and we delete them
1465 // directly, as markInstructionForDeletion only allows removing from the
1466 // current basic block.
1467 NewInsts
.pop_back_val()->eraseFromParent();
1469 // HINT: Don't revert the edge-splitting as following transformation may
1470 // also need to split these critical edges.
1471 return !CriticalEdgePred
.empty();
1474 // Okay, we can eliminate this load by inserting a reload in the predecessor
1475 // and using PHI construction to get the value in the other predecessors, do
1477 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load
<< '\n');
1478 LLVM_DEBUG(if (!NewInsts
.empty()) dbgs() << "INSERTED " << NewInsts
.size()
1479 << " INSTS: " << *NewInsts
.back()
1482 // Assign value numbers to the new instructions.
1483 for (Instruction
*I
: NewInsts
) {
1484 // Instructions that have been inserted in predecessor(s) to materialize
1485 // the load address do not retain their original debug locations. Doing
1486 // so could lead to confusing (but correct) source attributions.
1487 I
->updateLocationAfterHoist();
1489 // FIXME: We really _ought_ to insert these value numbers into their
1490 // parent's availability map. However, in doing so, we risk getting into
1491 // ordering issues. If a block hasn't been processed yet, we would be
1492 // marking a value as AVAIL-IN, which isn't what we intend.
1496 eliminatePartiallyRedundantLoad(Load
, ValuesPerBlock
, PredLoads
);
1501 bool GVN::performLoopLoadPRE(LoadInst
*Load
, AvailValInBlkVect
&ValuesPerBlock
,
1502 UnavailBlkVect
&UnavailableBlocks
) {
1506 const Loop
*L
= LI
->getLoopFor(Load
->getParent());
1507 // TODO: Generalize to other loop blocks that dominate the latch.
1508 if (!L
|| L
->getHeader() != Load
->getParent())
1511 BasicBlock
*Preheader
= L
->getLoopPreheader();
1512 BasicBlock
*Latch
= L
->getLoopLatch();
1513 if (!Preheader
|| !Latch
)
1516 Value
*LoadPtr
= Load
->getPointerOperand();
1517 // Must be available in preheader.
1518 if (!L
->isLoopInvariant(LoadPtr
))
1521 // We plan to hoist the load to preheader without introducing a new fault.
1522 // In order to do it, we need to prove that we cannot side-exit the loop
1523 // once loop header is first entered before execution of the load.
1524 if (ICF
->isDominatedByICFIFromSameBlock(Load
))
1527 BasicBlock
*LoopBlock
= nullptr;
1528 for (auto *Blocker
: UnavailableBlocks
) {
1529 // Blockers from outside the loop are handled in preheader.
1530 if (!L
->contains(Blocker
))
1533 // Only allow one loop block. Loop header is not less frequently executed
1534 // than each loop block, and likely it is much more frequently executed. But
1535 // in case of multiple loop blocks, we need extra information (such as block
1536 // frequency info) to understand whether it is profitable to PRE into
1537 // multiple loop blocks.
1541 // Do not sink into inner loops. This may be non-profitable.
1542 if (L
!= LI
->getLoopFor(Blocker
))
1545 // Blocks that dominate the latch execute on every single iteration, maybe
1546 // except the last one. So PREing into these blocks doesn't make much sense
1547 // in most cases. But the blocks that do not necessarily execute on each
1548 // iteration are sometimes much colder than the header, and this is when
1549 // PRE is potentially profitable.
1550 if (DT
->dominates(Blocker
, Latch
))
1553 // Make sure that the terminator itself doesn't clobber.
1554 if (Blocker
->getTerminator()->mayWriteToMemory())
1557 LoopBlock
= Blocker
;
1563 // Make sure the memory at this pointer cannot be freed, therefore we can
1564 // safely reload from it after clobber.
1565 if (LoadPtr
->canBeFreed())
1568 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1569 MapVector
<BasicBlock
*, Value
*> AvailableLoads
;
1570 AvailableLoads
[LoopBlock
] = LoadPtr
;
1571 AvailableLoads
[Preheader
] = LoadPtr
;
1573 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load
<< '\n');
1574 eliminatePartiallyRedundantLoad(Load
, ValuesPerBlock
, AvailableLoads
);
1579 static void reportLoadElim(LoadInst
*Load
, Value
*AvailableValue
,
1580 OptimizationRemarkEmitter
*ORE
) {
1581 using namespace ore
;
1584 return OptimizationRemark(DEBUG_TYPE
, "LoadElim", Load
)
1585 << "load of type " << NV("Type", Load
->getType()) << " eliminated"
1586 << setExtraArgs() << " in favor of "
1587 << NV("InfavorOfValue", AvailableValue
);
1591 /// Attempt to eliminate a load whose dependencies are
1592 /// non-local by performing PHI construction.
1593 bool GVN::processNonLocalLoad(LoadInst
*Load
) {
1594 // non-local speculations are not allowed under asan.
1595 if (Load
->getParent()->getParent()->hasFnAttribute(
1596 Attribute::SanitizeAddress
) ||
1597 Load
->getParent()->getParent()->hasFnAttribute(
1598 Attribute::SanitizeHWAddress
))
1601 // Step 1: Find the non-local dependencies of the load.
1603 MD
->getNonLocalPointerDependency(Load
, Deps
);
1605 // If we had to process more than one hundred blocks to find the
1606 // dependencies, this load isn't worth worrying about. Optimizing
1607 // it will be too expensive.
1608 unsigned NumDeps
= Deps
.size();
1609 if (NumDeps
> MaxNumDeps
)
1612 // If we had a phi translation failure, we'll have a single entry which is a
1613 // clobber in the current block. Reject this early.
1615 !Deps
[0].getResult().isDef() && !Deps
[0].getResult().isClobber()) {
1616 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load
->printAsOperand(dbgs());
1617 dbgs() << " has unknown dependencies\n";);
1621 bool Changed
= false;
1622 // If this load follows a GEP, see if we can PRE the indices before analyzing.
1623 if (GetElementPtrInst
*GEP
=
1624 dyn_cast
<GetElementPtrInst
>(Load
->getOperand(0))) {
1625 for (GetElementPtrInst::op_iterator OI
= GEP
->idx_begin(),
1626 OE
= GEP
->idx_end();
1628 if (Instruction
*I
= dyn_cast
<Instruction
>(OI
->get()))
1629 Changed
|= performScalarPRE(I
);
1632 // Step 2: Analyze the availability of the load
1633 AvailValInBlkVect ValuesPerBlock
;
1634 UnavailBlkVect UnavailableBlocks
;
1635 AnalyzeLoadAvailability(Load
, Deps
, ValuesPerBlock
, UnavailableBlocks
);
1637 // If we have no predecessors that produce a known value for this load, exit
1639 if (ValuesPerBlock
.empty())
1642 // Step 3: Eliminate fully redundancy.
1644 // If all of the instructions we depend on produce a known value for this
1645 // load, then it is fully redundant and we can use PHI insertion to compute
1646 // its value. Insert PHIs and remove the fully redundant value now.
1647 if (UnavailableBlocks
.empty()) {
1648 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load
<< '\n');
1650 // Perform PHI construction.
1651 Value
*V
= ConstructSSAForLoadSet(Load
, ValuesPerBlock
, *this);
1652 Load
->replaceAllUsesWith(V
);
1654 if (isa
<PHINode
>(V
))
1656 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1657 // If instruction I has debug info, then we should not update it.
1658 // Also, if I has a null DebugLoc, then it is still potentially incorrect
1659 // to propagate Load's DebugLoc because Load may not post-dominate I.
1660 if (Load
->getDebugLoc() && Load
->getParent() == I
->getParent())
1661 I
->setDebugLoc(Load
->getDebugLoc());
1662 if (V
->getType()->isPtrOrPtrVectorTy())
1663 MD
->invalidateCachedPointerInfo(V
);
1664 markInstructionForDeletion(Load
);
1666 reportLoadElim(Load
, V
, ORE
);
1670 // Step 4: Eliminate partial redundancy.
1671 if (!isPREEnabled() || !isLoadPREEnabled())
1673 if (!isLoadInLoopPREEnabled() && LI
&& LI
->getLoopFor(Load
->getParent()))
1676 return Changed
|| PerformLoadPRE(Load
, ValuesPerBlock
, UnavailableBlocks
) ||
1677 performLoopLoadPRE(Load
, ValuesPerBlock
, UnavailableBlocks
);
1680 static bool impliesEquivalanceIfTrue(CmpInst
* Cmp
) {
1681 if (Cmp
->getPredicate() == CmpInst::Predicate::ICMP_EQ
)
1684 // Floating point comparisons can be equal, but not equivalent. Cases:
1685 // NaNs for unordered operators
1686 // +0.0 vs 0.0 for all operators
1687 if (Cmp
->getPredicate() == CmpInst::Predicate::FCMP_OEQ
||
1688 (Cmp
->getPredicate() == CmpInst::Predicate::FCMP_UEQ
&&
1689 Cmp
->getFastMathFlags().noNaNs())) {
1690 Value
*LHS
= Cmp
->getOperand(0);
1691 Value
*RHS
= Cmp
->getOperand(1);
1692 // If we can prove either side non-zero, then equality must imply
1694 // FIXME: We should do this optimization if 'no signed zeros' is
1695 // applicable via an instruction-level fast-math-flag or some other
1696 // indicator that relaxed FP semantics are being used.
1697 if (isa
<ConstantFP
>(LHS
) && !cast
<ConstantFP
>(LHS
)->isZero())
1699 if (isa
<ConstantFP
>(RHS
) && !cast
<ConstantFP
>(RHS
)->isZero())
1701 // TODO: Handle vector floating point constants
1706 static bool impliesEquivalanceIfFalse(CmpInst
* Cmp
) {
1707 if (Cmp
->getPredicate() == CmpInst::Predicate::ICMP_NE
)
1710 // Floating point comparisons can be equal, but not equivelent. Cases:
1711 // NaNs for unordered operators
1712 // +0.0 vs 0.0 for all operators
1713 if ((Cmp
->getPredicate() == CmpInst::Predicate::FCMP_ONE
&&
1714 Cmp
->getFastMathFlags().noNaNs()) ||
1715 Cmp
->getPredicate() == CmpInst::Predicate::FCMP_UNE
) {
1716 Value
*LHS
= Cmp
->getOperand(0);
1717 Value
*RHS
= Cmp
->getOperand(1);
1718 // If we can prove either side non-zero, then equality must imply
1720 // FIXME: We should do this optimization if 'no signed zeros' is
1721 // applicable via an instruction-level fast-math-flag or some other
1722 // indicator that relaxed FP semantics are being used.
1723 if (isa
<ConstantFP
>(LHS
) && !cast
<ConstantFP
>(LHS
)->isZero())
1725 if (isa
<ConstantFP
>(RHS
) && !cast
<ConstantFP
>(RHS
)->isZero())
1727 // TODO: Handle vector floating point constants
1733 static bool hasUsersIn(Value
*V
, BasicBlock
*BB
) {
1734 for (User
*U
: V
->users())
1735 if (isa
<Instruction
>(U
) &&
1736 cast
<Instruction
>(U
)->getParent() == BB
)
1741 bool GVN::processAssumeIntrinsic(AssumeInst
*IntrinsicI
) {
1742 Value
*V
= IntrinsicI
->getArgOperand(0);
1744 if (ConstantInt
*Cond
= dyn_cast
<ConstantInt
>(V
)) {
1745 if (Cond
->isZero()) {
1746 Type
*Int8Ty
= Type::getInt8Ty(V
->getContext());
1747 // Insert a new store to null instruction before the load to indicate that
1748 // this code is not reachable. FIXME: We could insert unreachable
1749 // instruction directly because we can modify the CFG.
1750 auto *NewS
= new StoreInst(UndefValue::get(Int8Ty
),
1751 Constant::getNullValue(Int8Ty
->getPointerTo()),
1754 const MemoryUseOrDef
*FirstNonDom
= nullptr;
1756 MSSAU
->getMemorySSA()->getBlockAccesses(IntrinsicI
->getParent());
1758 // If there are accesses in the current basic block, find the first one
1759 // that does not come before NewS. The new memory access is inserted
1760 // after the found access or before the terminator if no such access is
1763 for (auto &Acc
: *AL
) {
1764 if (auto *Current
= dyn_cast
<MemoryUseOrDef
>(&Acc
))
1765 if (!Current
->getMemoryInst()->comesBefore(NewS
)) {
1766 FirstNonDom
= Current
;
1772 // This added store is to null, so it will never executed and we can
1773 // just use the LiveOnEntry def as defining access.
1775 FirstNonDom
? MSSAU
->createMemoryAccessBefore(
1776 NewS
, MSSAU
->getMemorySSA()->getLiveOnEntryDef(),
1777 const_cast<MemoryUseOrDef
*>(FirstNonDom
))
1778 : MSSAU
->createMemoryAccessInBB(
1779 NewS
, MSSAU
->getMemorySSA()->getLiveOnEntryDef(),
1780 NewS
->getParent(), MemorySSA::BeforeTerminator
);
1782 MSSAU
->insertDef(cast
<MemoryDef
>(NewDef
), /*RenameUses=*/false);
1785 if (isAssumeWithEmptyBundle(*IntrinsicI
))
1786 markInstructionForDeletion(IntrinsicI
);
1788 } else if (isa
<Constant
>(V
)) {
1789 // If it's not false, and constant, it must evaluate to true. This means our
1790 // assume is assume(true), and thus, pointless, and we don't want to do
1791 // anything more here.
1795 Constant
*True
= ConstantInt::getTrue(V
->getContext());
1796 bool Changed
= false;
1798 for (BasicBlock
*Successor
: successors(IntrinsicI
->getParent())) {
1799 BasicBlockEdge
Edge(IntrinsicI
->getParent(), Successor
);
1801 // This property is only true in dominated successors, propagateEquality
1802 // will check dominance for us.
1803 Changed
|= propagateEquality(V
, True
, Edge
, false);
1806 // We can replace assume value with true, which covers cases like this:
1807 // call void @llvm.assume(i1 %cmp)
1808 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
1809 ReplaceOperandsWithMap
[V
] = True
;
1811 // Similarly, after assume(!NotV) we know that NotV == false.
1813 if (match(V
, m_Not(m_Value(NotV
))))
1814 ReplaceOperandsWithMap
[NotV
] = ConstantInt::getFalse(V
->getContext());
1816 // If we find an equality fact, canonicalize all dominated uses in this block
1817 // to one of the two values. We heuristically choice the "oldest" of the
1818 // two where age is determined by value number. (Note that propagateEquality
1819 // above handles the cross block case.)
1821 // Key case to cover are:
1823 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
1824 // call void @llvm.assume(i1 %cmp)
1825 // ret float %0 ; will change it to ret float 3.000000e+00
1827 // %load = load float, float* %addr
1828 // %cmp = fcmp oeq float %load, %0
1829 // call void @llvm.assume(i1 %cmp)
1830 // ret float %load ; will change it to ret float %0
1831 if (auto *CmpI
= dyn_cast
<CmpInst
>(V
)) {
1832 if (impliesEquivalanceIfTrue(CmpI
)) {
1833 Value
*CmpLHS
= CmpI
->getOperand(0);
1834 Value
*CmpRHS
= CmpI
->getOperand(1);
1835 // Heuristically pick the better replacement -- the choice of heuristic
1836 // isn't terribly important here, but the fact we canonicalize on some
1837 // replacement is for exposing other simplifications.
1838 // TODO: pull this out as a helper function and reuse w/existing
1839 // (slightly different) logic.
1840 if (isa
<Constant
>(CmpLHS
) && !isa
<Constant
>(CmpRHS
))
1841 std::swap(CmpLHS
, CmpRHS
);
1842 if (!isa
<Instruction
>(CmpLHS
) && isa
<Instruction
>(CmpRHS
))
1843 std::swap(CmpLHS
, CmpRHS
);
1844 if ((isa
<Argument
>(CmpLHS
) && isa
<Argument
>(CmpRHS
)) ||
1845 (isa
<Instruction
>(CmpLHS
) && isa
<Instruction
>(CmpRHS
))) {
1846 // Move the 'oldest' value to the right-hand side, using the value
1847 // number as a proxy for age.
1848 uint32_t LVN
= VN
.lookupOrAdd(CmpLHS
);
1849 uint32_t RVN
= VN
.lookupOrAdd(CmpRHS
);
1851 std::swap(CmpLHS
, CmpRHS
);
1854 // Handle degenerate case where we either haven't pruned a dead path or a
1855 // removed a trivial assume yet.
1856 if (isa
<Constant
>(CmpLHS
) && isa
<Constant
>(CmpRHS
))
1859 LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
1860 << *CmpLHS
<< " with "
1861 << *CmpRHS
<< " in block "
1862 << IntrinsicI
->getParent()->getName() << "\n");
1865 // Setup the replacement map - this handles uses within the same block
1866 if (hasUsersIn(CmpLHS
, IntrinsicI
->getParent()))
1867 ReplaceOperandsWithMap
[CmpLHS
] = CmpRHS
;
1869 // NOTE: The non-block local cases are handled by the call to
1870 // propagateEquality above; this block is just about handling the block
1871 // local cases. TODO: There's a bunch of logic in propagateEqualiy which
1872 // isn't duplicated for the block local case, can we share it somehow?
1878 static void patchAndReplaceAllUsesWith(Instruction
*I
, Value
*Repl
) {
1879 patchReplacementInstruction(I
, Repl
);
1880 I
->replaceAllUsesWith(Repl
);
1883 /// Attempt to eliminate a load, first by eliminating it
1884 /// locally, and then attempting non-local elimination if that fails.
1885 bool GVN::processLoad(LoadInst
*L
) {
1889 // This code hasn't been audited for ordered or volatile memory access
1890 if (!L
->isUnordered())
1893 if (L
->use_empty()) {
1894 markInstructionForDeletion(L
);
1898 // ... to a pointer that has been loaded from before...
1899 MemDepResult Dep
= MD
->getDependency(L
);
1901 // If it is defined in another block, try harder.
1902 if (Dep
.isNonLocal())
1903 return processNonLocalLoad(L
);
1905 // Only handle the local case below
1906 if (!Dep
.isDef() && !Dep
.isClobber()) {
1907 // This might be a NonFuncLocal or an Unknown
1909 // fast print dep, using operator<< on instruction is too slow.
1910 dbgs() << "GVN: load "; L
->printAsOperand(dbgs());
1911 dbgs() << " has unknown dependence\n";);
1916 if (AnalyzeLoadAvailability(L
, Dep
, L
->getPointerOperand(), AV
)) {
1917 Value
*AvailableValue
= AV
.MaterializeAdjustedValue(L
, L
, *this);
1919 // Replace the load!
1920 patchAndReplaceAllUsesWith(L
, AvailableValue
);
1921 markInstructionForDeletion(L
);
1923 MSSAU
->removeMemoryAccess(L
);
1925 reportLoadElim(L
, AvailableValue
, ORE
);
1926 // Tell MDA to reexamine the reused pointer since we might have more
1927 // information after forwarding it.
1928 if (MD
&& AvailableValue
->getType()->isPtrOrPtrVectorTy())
1929 MD
->invalidateCachedPointerInfo(AvailableValue
);
1936 /// Return a pair the first field showing the value number of \p Exp and the
1937 /// second field showing whether it is a value number newly created.
1938 std::pair
<uint32_t, bool>
1939 GVN::ValueTable::assignExpNewValueNum(Expression
&Exp
) {
1940 uint32_t &e
= expressionNumbering
[Exp
];
1941 bool CreateNewValNum
= !e
;
1942 if (CreateNewValNum
) {
1943 Expressions
.push_back(Exp
);
1944 if (ExprIdx
.size() < nextValueNumber
+ 1)
1945 ExprIdx
.resize(nextValueNumber
* 2);
1946 e
= nextValueNumber
;
1947 ExprIdx
[nextValueNumber
++] = nextExprNumber
++;
1949 return {e
, CreateNewValNum
};
1952 /// Return whether all the values related with the same \p num are
1953 /// defined in \p BB.
1954 bool GVN::ValueTable::areAllValsInBB(uint32_t Num
, const BasicBlock
*BB
,
1956 LeaderTableEntry
*Vals
= &Gvn
.LeaderTable
[Num
];
1957 while (Vals
&& Vals
->BB
== BB
)
1962 /// Wrap phiTranslateImpl to provide caching functionality.
1963 uint32_t GVN::ValueTable::phiTranslate(const BasicBlock
*Pred
,
1964 const BasicBlock
*PhiBlock
, uint32_t Num
,
1966 auto FindRes
= PhiTranslateTable
.find({Num
, Pred
});
1967 if (FindRes
!= PhiTranslateTable
.end())
1968 return FindRes
->second
;
1969 uint32_t NewNum
= phiTranslateImpl(Pred
, PhiBlock
, Num
, Gvn
);
1970 PhiTranslateTable
.insert({{Num
, Pred
}, NewNum
});
1974 // Return true if the value number \p Num and NewNum have equal value.
1975 // Return false if the result is unknown.
1976 bool GVN::ValueTable::areCallValsEqual(uint32_t Num
, uint32_t NewNum
,
1977 const BasicBlock
*Pred
,
1978 const BasicBlock
*PhiBlock
, GVN
&Gvn
) {
1979 CallInst
*Call
= nullptr;
1980 LeaderTableEntry
*Vals
= &Gvn
.LeaderTable
[Num
];
1982 Call
= dyn_cast
<CallInst
>(Vals
->Val
);
1983 if (Call
&& Call
->getParent() == PhiBlock
)
1988 if (AA
->doesNotAccessMemory(Call
))
1991 if (!MD
|| !AA
->onlyReadsMemory(Call
))
1994 MemDepResult local_dep
= MD
->getDependency(Call
);
1995 if (!local_dep
.isNonLocal())
1998 const MemoryDependenceResults::NonLocalDepInfo
&deps
=
1999 MD
->getNonLocalCallDependency(Call
);
2001 // Check to see if the Call has no function local clobber.
2002 for (const NonLocalDepEntry
&D
: deps
) {
2003 if (D
.getResult().isNonFuncLocal())
2009 /// Translate value number \p Num using phis, so that it has the values of
2011 uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock
*Pred
,
2012 const BasicBlock
*PhiBlock
,
2013 uint32_t Num
, GVN
&Gvn
) {
2014 if (PHINode
*PN
= NumberingPhi
[Num
]) {
2015 for (unsigned i
= 0; i
!= PN
->getNumIncomingValues(); ++i
) {
2016 if (PN
->getParent() == PhiBlock
&& PN
->getIncomingBlock(i
) == Pred
)
2017 if (uint32_t TransVal
= lookup(PN
->getIncomingValue(i
), false))
2023 // If there is any value related with Num is defined in a BB other than
2024 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2025 // a backedge. We can do an early exit in that case to save compile time.
2026 if (!areAllValsInBB(Num
, PhiBlock
, Gvn
))
2029 if (Num
>= ExprIdx
.size() || ExprIdx
[Num
] == 0)
2031 Expression Exp
= Expressions
[ExprIdx
[Num
]];
2033 for (unsigned i
= 0; i
< Exp
.varargs
.size(); i
++) {
2034 // For InsertValue and ExtractValue, some varargs are index numbers
2035 // instead of value numbers. Those index numbers should not be
2037 if ((i
> 1 && Exp
.opcode
== Instruction::InsertValue
) ||
2038 (i
> 0 && Exp
.opcode
== Instruction::ExtractValue
) ||
2039 (i
> 1 && Exp
.opcode
== Instruction::ShuffleVector
))
2041 Exp
.varargs
[i
] = phiTranslate(Pred
, PhiBlock
, Exp
.varargs
[i
], Gvn
);
2044 if (Exp
.commutative
) {
2045 assert(Exp
.varargs
.size() >= 2 && "Unsupported commutative instruction!");
2046 if (Exp
.varargs
[0] > Exp
.varargs
[1]) {
2047 std::swap(Exp
.varargs
[0], Exp
.varargs
[1]);
2048 uint32_t Opcode
= Exp
.opcode
>> 8;
2049 if (Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
)
2050 Exp
.opcode
= (Opcode
<< 8) |
2051 CmpInst::getSwappedPredicate(
2052 static_cast<CmpInst::Predicate
>(Exp
.opcode
& 255));
2056 if (uint32_t NewNum
= expressionNumbering
[Exp
]) {
2057 if (Exp
.opcode
== Instruction::Call
&& NewNum
!= Num
)
2058 return areCallValsEqual(Num
, NewNum
, Pred
, PhiBlock
, Gvn
) ? NewNum
: Num
;
2064 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2066 void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num
,
2067 const BasicBlock
&CurrBlock
) {
2068 for (const BasicBlock
*Pred
: predecessors(&CurrBlock
))
2069 PhiTranslateTable
.erase({Num
, Pred
});
2072 // In order to find a leader for a given value number at a
2073 // specific basic block, we first obtain the list of all Values for that number,
2074 // and then scan the list to find one whose block dominates the block in
2075 // question. This is fast because dominator tree queries consist of only
2076 // a few comparisons of DFS numbers.
2077 Value
*GVN::findLeader(const BasicBlock
*BB
, uint32_t num
) {
2078 LeaderTableEntry Vals
= LeaderTable
[num
];
2079 if (!Vals
.Val
) return nullptr;
2081 Value
*Val
= nullptr;
2082 if (DT
->dominates(Vals
.BB
, BB
)) {
2084 if (isa
<Constant
>(Val
)) return Val
;
2087 LeaderTableEntry
* Next
= Vals
.Next
;
2089 if (DT
->dominates(Next
->BB
, BB
)) {
2090 if (isa
<Constant
>(Next
->Val
)) return Next
->Val
;
2091 if (!Val
) Val
= Next
->Val
;
2100 /// There is an edge from 'Src' to 'Dst'. Return
2101 /// true if every path from the entry block to 'Dst' passes via this edge. In
2102 /// particular 'Dst' must not be reachable via another edge from 'Src'.
2103 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge
&E
,
2104 DominatorTree
*DT
) {
2105 // While in theory it is interesting to consider the case in which Dst has
2106 // more than one predecessor, because Dst might be part of a loop which is
2107 // only reachable from Src, in practice it is pointless since at the time
2108 // GVN runs all such loops have preheaders, which means that Dst will have
2109 // been changed to have only one predecessor, namely Src.
2110 const BasicBlock
*Pred
= E
.getEnd()->getSinglePredecessor();
2111 assert((!Pred
|| Pred
== E
.getStart()) &&
2112 "No edge between these basic blocks!");
2113 return Pred
!= nullptr;
2116 void GVN::assignBlockRPONumber(Function
&F
) {
2117 BlockRPONumber
.clear();
2118 uint32_t NextBlockNumber
= 1;
2119 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2120 for (BasicBlock
*BB
: RPOT
)
2121 BlockRPONumber
[BB
] = NextBlockNumber
++;
2122 InvalidBlockRPONumbers
= false;
2125 bool GVN::replaceOperandsForInBlockEquality(Instruction
*Instr
) const {
2126 bool Changed
= false;
2127 for (unsigned OpNum
= 0; OpNum
< Instr
->getNumOperands(); ++OpNum
) {
2128 Value
*Operand
= Instr
->getOperand(OpNum
);
2129 auto it
= ReplaceOperandsWithMap
.find(Operand
);
2130 if (it
!= ReplaceOperandsWithMap
.end()) {
2131 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand
<< " with "
2132 << *it
->second
<< " in instruction " << *Instr
<< '\n');
2133 Instr
->setOperand(OpNum
, it
->second
);
2140 /// The given values are known to be equal in every block
2141 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2142 /// 'RHS' everywhere in the scope. Returns whether a change was made.
2143 /// If DominatesByEdge is false, then it means that we will propagate the RHS
2144 /// value starting from the end of Root.Start.
2145 bool GVN::propagateEquality(Value
*LHS
, Value
*RHS
, const BasicBlockEdge
&Root
,
2146 bool DominatesByEdge
) {
2147 SmallVector
<std::pair
<Value
*, Value
*>, 4> Worklist
;
2148 Worklist
.push_back(std::make_pair(LHS
, RHS
));
2149 bool Changed
= false;
2150 // For speed, compute a conservative fast approximation to
2151 // DT->dominates(Root, Root.getEnd());
2152 const bool RootDominatesEnd
= isOnlyReachableViaThisEdge(Root
, DT
);
2154 while (!Worklist
.empty()) {
2155 std::pair
<Value
*, Value
*> Item
= Worklist
.pop_back_val();
2156 LHS
= Item
.first
; RHS
= Item
.second
;
2160 assert(LHS
->getType() == RHS
->getType() && "Equality but unequal types!");
2162 // Don't try to propagate equalities between constants.
2163 if (isa
<Constant
>(LHS
) && isa
<Constant
>(RHS
))
2166 // Prefer a constant on the right-hand side, or an Argument if no constants.
2167 if (isa
<Constant
>(LHS
) || (isa
<Argument
>(LHS
) && !isa
<Constant
>(RHS
)))
2168 std::swap(LHS
, RHS
);
2169 assert((isa
<Argument
>(LHS
) || isa
<Instruction
>(LHS
)) && "Unexpected value!");
2171 // If there is no obvious reason to prefer the left-hand side over the
2172 // right-hand side, ensure the longest lived term is on the right-hand side,
2173 // so the shortest lived term will be replaced by the longest lived.
2174 // This tends to expose more simplifications.
2175 uint32_t LVN
= VN
.lookupOrAdd(LHS
);
2176 if ((isa
<Argument
>(LHS
) && isa
<Argument
>(RHS
)) ||
2177 (isa
<Instruction
>(LHS
) && isa
<Instruction
>(RHS
))) {
2178 // Move the 'oldest' value to the right-hand side, using the value number
2179 // as a proxy for age.
2180 uint32_t RVN
= VN
.lookupOrAdd(RHS
);
2182 std::swap(LHS
, RHS
);
2187 // If value numbering later sees that an instruction in the scope is equal
2188 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2189 // the invariant that instructions only occur in the leader table for their
2190 // own value number (this is used by removeFromLeaderTable), do not do this
2191 // if RHS is an instruction (if an instruction in the scope is morphed into
2192 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2193 // using the leader table is about compiling faster, not optimizing better).
2194 // The leader table only tracks basic blocks, not edges. Only add to if we
2195 // have the simple case where the edge dominates the end.
2196 if (RootDominatesEnd
&& !isa
<Instruction
>(RHS
))
2197 addToLeaderTable(LVN
, RHS
, Root
.getEnd());
2199 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2200 // LHS always has at least one use that is not dominated by Root, this will
2201 // never do anything if LHS has only one use.
2202 if (!LHS
->hasOneUse()) {
2203 unsigned NumReplacements
=
2205 ? replaceDominatedUsesWith(LHS
, RHS
, *DT
, Root
)
2206 : replaceDominatedUsesWith(LHS
, RHS
, *DT
, Root
.getStart());
2208 Changed
|= NumReplacements
> 0;
2209 NumGVNEqProp
+= NumReplacements
;
2210 // Cached information for anything that uses LHS will be invalid.
2212 MD
->invalidateCachedPointerInfo(LHS
);
2215 // Now try to deduce additional equalities from this one. For example, if
2216 // the known equality was "(A != B)" == "false" then it follows that A and B
2217 // are equal in the scope. Only boolean equalities with an explicit true or
2218 // false RHS are currently supported.
2219 if (!RHS
->getType()->isIntegerTy(1))
2220 // Not a boolean equality - bail out.
2222 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(RHS
);
2224 // RHS neither 'true' nor 'false' - bail out.
2226 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2227 bool isKnownTrue
= CI
->isMinusOne();
2228 bool isKnownFalse
= !isKnownTrue
;
2230 // If "A && B" is known true then both A and B are known true. If "A || B"
2231 // is known false then both A and B are known false.
2233 if ((isKnownTrue
&& match(LHS
, m_LogicalAnd(m_Value(A
), m_Value(B
)))) ||
2234 (isKnownFalse
&& match(LHS
, m_LogicalOr(m_Value(A
), m_Value(B
))))) {
2235 Worklist
.push_back(std::make_pair(A
, RHS
));
2236 Worklist
.push_back(std::make_pair(B
, RHS
));
2240 // If we are propagating an equality like "(A == B)" == "true" then also
2241 // propagate the equality A == B. When propagating a comparison such as
2242 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2243 if (CmpInst
*Cmp
= dyn_cast
<CmpInst
>(LHS
)) {
2244 Value
*Op0
= Cmp
->getOperand(0), *Op1
= Cmp
->getOperand(1);
2246 // If "A == B" is known true, or "A != B" is known false, then replace
2247 // A with B everywhere in the scope. For floating point operations, we
2248 // have to be careful since equality does not always imply equivalance.
2249 if ((isKnownTrue
&& impliesEquivalanceIfTrue(Cmp
)) ||
2250 (isKnownFalse
&& impliesEquivalanceIfFalse(Cmp
)))
2251 Worklist
.push_back(std::make_pair(Op0
, Op1
));
2253 // If "A >= B" is known true, replace "A < B" with false everywhere.
2254 CmpInst::Predicate NotPred
= Cmp
->getInversePredicate();
2255 Constant
*NotVal
= ConstantInt::get(Cmp
->getType(), isKnownFalse
);
2256 // Since we don't have the instruction "A < B" immediately to hand, work
2257 // out the value number that it would have and use that to find an
2258 // appropriate instruction (if any).
2259 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
2260 uint32_t Num
= VN
.lookupOrAddCmp(Cmp
->getOpcode(), NotPred
, Op0
, Op1
);
2261 // If the number we were assigned was brand new then there is no point in
2262 // looking for an instruction realizing it: there cannot be one!
2263 if (Num
< NextNum
) {
2264 Value
*NotCmp
= findLeader(Root
.getEnd(), Num
);
2265 if (NotCmp
&& isa
<Instruction
>(NotCmp
)) {
2266 unsigned NumReplacements
=
2268 ? replaceDominatedUsesWith(NotCmp
, NotVal
, *DT
, Root
)
2269 : replaceDominatedUsesWith(NotCmp
, NotVal
, *DT
,
2271 Changed
|= NumReplacements
> 0;
2272 NumGVNEqProp
+= NumReplacements
;
2273 // Cached information for anything that uses NotCmp will be invalid.
2275 MD
->invalidateCachedPointerInfo(NotCmp
);
2278 // Ensure that any instruction in scope that gets the "A < B" value number
2279 // is replaced with false.
2280 // The leader table only tracks basic blocks, not edges. Only add to if we
2281 // have the simple case where the edge dominates the end.
2282 if (RootDominatesEnd
)
2283 addToLeaderTable(Num
, NotVal
, Root
.getEnd());
2292 /// When calculating availability, handle an instruction
2293 /// by inserting it into the appropriate sets
2294 bool GVN::processInstruction(Instruction
*I
) {
2295 // Ignore dbg info intrinsics.
2296 if (isa
<DbgInfoIntrinsic
>(I
))
2299 // If the instruction can be easily simplified then do so now in preference
2300 // to value numbering it. Value numbering often exposes redundancies, for
2301 // example if it determines that %y is equal to %x then the instruction
2302 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2303 const DataLayout
&DL
= I
->getModule()->getDataLayout();
2304 if (Value
*V
= SimplifyInstruction(I
, {DL
, TLI
, DT
, AC
})) {
2305 bool Changed
= false;
2306 if (!I
->use_empty()) {
2307 // Simplification can cause a special instruction to become not special.
2308 // For example, devirtualization to a willreturn function.
2309 ICF
->removeUsersOf(I
);
2310 I
->replaceAllUsesWith(V
);
2313 if (isInstructionTriviallyDead(I
, TLI
)) {
2314 markInstructionForDeletion(I
);
2318 if (MD
&& V
->getType()->isPtrOrPtrVectorTy())
2319 MD
->invalidateCachedPointerInfo(V
);
2325 if (auto *Assume
= dyn_cast
<AssumeInst
>(I
))
2326 return processAssumeIntrinsic(Assume
);
2328 if (LoadInst
*Load
= dyn_cast
<LoadInst
>(I
)) {
2329 if (processLoad(Load
))
2332 unsigned Num
= VN
.lookupOrAdd(Load
);
2333 addToLeaderTable(Num
, Load
, Load
->getParent());
2337 // For conditional branches, we can perform simple conditional propagation on
2338 // the condition value itself.
2339 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(I
)) {
2340 if (!BI
->isConditional())
2343 if (isa
<Constant
>(BI
->getCondition()))
2344 return processFoldableCondBr(BI
);
2346 Value
*BranchCond
= BI
->getCondition();
2347 BasicBlock
*TrueSucc
= BI
->getSuccessor(0);
2348 BasicBlock
*FalseSucc
= BI
->getSuccessor(1);
2349 // Avoid multiple edges early.
2350 if (TrueSucc
== FalseSucc
)
2353 BasicBlock
*Parent
= BI
->getParent();
2354 bool Changed
= false;
2356 Value
*TrueVal
= ConstantInt::getTrue(TrueSucc
->getContext());
2357 BasicBlockEdge
TrueE(Parent
, TrueSucc
);
2358 Changed
|= propagateEquality(BranchCond
, TrueVal
, TrueE
, true);
2360 Value
*FalseVal
= ConstantInt::getFalse(FalseSucc
->getContext());
2361 BasicBlockEdge
FalseE(Parent
, FalseSucc
);
2362 Changed
|= propagateEquality(BranchCond
, FalseVal
, FalseE
, true);
2367 // For switches, propagate the case values into the case destinations.
2368 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(I
)) {
2369 Value
*SwitchCond
= SI
->getCondition();
2370 BasicBlock
*Parent
= SI
->getParent();
2371 bool Changed
= false;
2373 // Remember how many outgoing edges there are to every successor.
2374 SmallDenseMap
<BasicBlock
*, unsigned, 16> SwitchEdges
;
2375 for (unsigned i
= 0, n
= SI
->getNumSuccessors(); i
!= n
; ++i
)
2376 ++SwitchEdges
[SI
->getSuccessor(i
)];
2378 for (SwitchInst::CaseIt i
= SI
->case_begin(), e
= SI
->case_end();
2380 BasicBlock
*Dst
= i
->getCaseSuccessor();
2381 // If there is only a single edge, propagate the case value into it.
2382 if (SwitchEdges
.lookup(Dst
) == 1) {
2383 BasicBlockEdge
E(Parent
, Dst
);
2384 Changed
|= propagateEquality(SwitchCond
, i
->getCaseValue(), E
, true);
2390 // Instructions with void type don't return a value, so there's
2391 // no point in trying to find redundancies in them.
2392 if (I
->getType()->isVoidTy())
2395 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
2396 unsigned Num
= VN
.lookupOrAdd(I
);
2398 // Allocations are always uniquely numbered, so we can save time and memory
2399 // by fast failing them.
2400 if (isa
<AllocaInst
>(I
) || I
->isTerminator() || isa
<PHINode
>(I
)) {
2401 addToLeaderTable(Num
, I
, I
->getParent());
2405 // If the number we were assigned was a brand new VN, then we don't
2406 // need to do a lookup to see if the number already exists
2407 // somewhere in the domtree: it can't!
2408 if (Num
>= NextNum
) {
2409 addToLeaderTable(Num
, I
, I
->getParent());
2413 // Perform fast-path value-number based elimination of values inherited from
2415 Value
*Repl
= findLeader(I
->getParent(), Num
);
2417 // Failure, just remember this instance for future use.
2418 addToLeaderTable(Num
, I
, I
->getParent());
2420 } else if (Repl
== I
) {
2421 // If I was the result of a shortcut PRE, it might already be in the table
2422 // and the best replacement for itself. Nothing to do.
2427 patchAndReplaceAllUsesWith(I
, Repl
);
2428 if (MD
&& Repl
->getType()->isPtrOrPtrVectorTy())
2429 MD
->invalidateCachedPointerInfo(Repl
);
2430 markInstructionForDeletion(I
);
2434 /// runOnFunction - This is the main transformation entry point for a function.
2435 bool GVN::runImpl(Function
&F
, AssumptionCache
&RunAC
, DominatorTree
&RunDT
,
2436 const TargetLibraryInfo
&RunTLI
, AAResults
&RunAA
,
2437 MemoryDependenceResults
*RunMD
, LoopInfo
*LI
,
2438 OptimizationRemarkEmitter
*RunORE
, MemorySSA
*MSSA
) {
2443 VN
.setAliasAnalysis(&RunAA
);
2445 ImplicitControlFlowTracking ImplicitCFT
;
2450 InvalidBlockRPONumbers
= true;
2451 MemorySSAUpdater
Updater(MSSA
);
2452 MSSAU
= MSSA
? &Updater
: nullptr;
2454 bool Changed
= false;
2455 bool ShouldContinue
= true;
2457 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Eager
);
2458 // Merge unconditional branches, allowing PRE to catch more
2459 // optimization opportunities.
2460 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
2461 BasicBlock
*BB
= &*FI
++;
2463 bool removedBlock
= MergeBlockIntoPredecessor(BB
, &DTU
, LI
, MSSAU
, MD
);
2467 Changed
|= removedBlock
;
2470 unsigned Iteration
= 0;
2471 while (ShouldContinue
) {
2472 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration
<< "\n");
2473 ShouldContinue
= iterateOnFunction(F
);
2474 Changed
|= ShouldContinue
;
2478 if (isPREEnabled()) {
2479 // Fabricate val-num for dead-code in order to suppress assertion in
2481 assignValNumForDeadCode();
2482 bool PREChanged
= true;
2483 while (PREChanged
) {
2484 PREChanged
= performPRE(F
);
2485 Changed
|= PREChanged
;
2489 // FIXME: Should perform GVN again after PRE does something. PRE can move
2490 // computations into blocks where they become fully redundant. Note that
2491 // we can't do this until PRE's critical edge splitting updates memdep.
2492 // Actually, when this happens, we should just fully integrate PRE into GVN.
2494 cleanupGlobalSets();
2495 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2499 if (MSSA
&& VerifyMemorySSA
)
2500 MSSA
->verifyMemorySSA();
2505 bool GVN::processBlock(BasicBlock
*BB
) {
2506 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2507 // (and incrementing BI before processing an instruction).
2508 assert(InstrsToErase
.empty() &&
2509 "We expect InstrsToErase to be empty across iterations");
2510 if (DeadBlocks
.count(BB
))
2513 // Clearing map before every BB because it can be used only for single BB.
2514 ReplaceOperandsWithMap
.clear();
2515 bool ChangedFunction
= false;
2517 // Since we may not have visited the input blocks of the phis, we can't
2518 // use our normal hash approach for phis. Instead, simply look for
2519 // obvious duplicates. The first pass of GVN will tend to create
2520 // identical phis, and the second or later passes can eliminate them.
2521 ChangedFunction
|= EliminateDuplicatePHINodes(BB
);
2523 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
2525 if (!ReplaceOperandsWithMap
.empty())
2526 ChangedFunction
|= replaceOperandsForInBlockEquality(&*BI
);
2527 ChangedFunction
|= processInstruction(&*BI
);
2529 if (InstrsToErase
.empty()) {
2534 // If we need some instructions deleted, do it now.
2535 NumGVNInstr
+= InstrsToErase
.size();
2537 // Avoid iterator invalidation.
2538 bool AtStart
= BI
== BB
->begin();
2542 for (auto *I
: InstrsToErase
) {
2543 assert(I
->getParent() == BB
&& "Removing instruction from wrong block?");
2544 LLVM_DEBUG(dbgs() << "GVN removed: " << *I
<< '\n');
2545 salvageKnowledge(I
, AC
);
2546 salvageDebugInfo(*I
);
2547 if (MD
) MD
->removeInstruction(I
);
2549 MSSAU
->removeMemoryAccess(I
);
2550 LLVM_DEBUG(verifyRemoved(I
));
2551 ICF
->removeInstruction(I
);
2552 I
->eraseFromParent();
2554 InstrsToErase
.clear();
2562 return ChangedFunction
;
2565 // Instantiate an expression in a predecessor that lacked it.
2566 bool GVN::performScalarPREInsertion(Instruction
*Instr
, BasicBlock
*Pred
,
2567 BasicBlock
*Curr
, unsigned int ValNo
) {
2568 // Because we are going top-down through the block, all value numbers
2569 // will be available in the predecessor by the time we need them. Any
2570 // that weren't originally present will have been instantiated earlier
2572 bool success
= true;
2573 for (unsigned i
= 0, e
= Instr
->getNumOperands(); i
!= e
; ++i
) {
2574 Value
*Op
= Instr
->getOperand(i
);
2575 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
2577 // This could be a newly inserted instruction, in which case, we won't
2578 // find a value number, and should give up before we hurt ourselves.
2579 // FIXME: Rewrite the infrastructure to let it easier to value number
2580 // and process newly inserted instructions.
2581 if (!VN
.exists(Op
)) {
2586 VN
.phiTranslate(Pred
, Curr
, VN
.lookup(Op
), *this);
2587 if (Value
*V
= findLeader(Pred
, TValNo
)) {
2588 Instr
->setOperand(i
, V
);
2595 // Fail out if we encounter an operand that is not available in
2596 // the PRE predecessor. This is typically because of loads which
2597 // are not value numbered precisely.
2601 Instr
->insertBefore(Pred
->getTerminator());
2602 Instr
->setName(Instr
->getName() + ".pre");
2603 Instr
->setDebugLoc(Instr
->getDebugLoc());
2605 ICF
->insertInstructionTo(Instr
, Pred
);
2607 unsigned Num
= VN
.lookupOrAdd(Instr
);
2610 // Update the availability map to include the new instruction.
2611 addToLeaderTable(Num
, Instr
, Pred
);
2615 bool GVN::performScalarPRE(Instruction
*CurInst
) {
2616 if (isa
<AllocaInst
>(CurInst
) || CurInst
->isTerminator() ||
2617 isa
<PHINode
>(CurInst
) || CurInst
->getType()->isVoidTy() ||
2618 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
2619 isa
<DbgInfoIntrinsic
>(CurInst
))
2622 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2623 // sinking the compare again, and it would force the code generator to
2624 // move the i1 from processor flags or predicate registers into a general
2625 // purpose register.
2626 if (isa
<CmpInst
>(CurInst
))
2629 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2630 // sinking the addressing mode computation back to its uses. Extending the
2631 // GEP's live range increases the register pressure, and therefore it can
2632 // introduce unnecessary spills.
2634 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2635 // to the load by moving it to the predecessor block if necessary.
2636 if (isa
<GetElementPtrInst
>(CurInst
))
2639 if (auto *CallB
= dyn_cast
<CallBase
>(CurInst
)) {
2640 // We don't currently value number ANY inline asm calls.
2641 if (CallB
->isInlineAsm())
2643 // Don't do PRE on convergent calls.
2644 if (CallB
->isConvergent())
2648 uint32_t ValNo
= VN
.lookup(CurInst
);
2650 // Look for the predecessors for PRE opportunities. We're
2651 // only trying to solve the basic diamond case, where
2652 // a value is computed in the successor and one predecessor,
2653 // but not the other. We also explicitly disallow cases
2654 // where the successor is its own predecessor, because they're
2655 // more complicated to get right.
2656 unsigned NumWith
= 0;
2657 unsigned NumWithout
= 0;
2658 BasicBlock
*PREPred
= nullptr;
2659 BasicBlock
*CurrentBlock
= CurInst
->getParent();
2661 // Update the RPO numbers for this function.
2662 if (InvalidBlockRPONumbers
)
2663 assignBlockRPONumber(*CurrentBlock
->getParent());
2665 SmallVector
<std::pair
<Value
*, BasicBlock
*>, 8> predMap
;
2666 for (BasicBlock
*P
: predecessors(CurrentBlock
)) {
2667 // We're not interested in PRE where blocks with predecessors that are
2669 if (!DT
->isReachableFromEntry(P
)) {
2673 // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
2674 // when CurInst has operand defined in CurrentBlock (so it may be defined
2675 // by phi in the loop header).
2676 assert(BlockRPONumber
.count(P
) && BlockRPONumber
.count(CurrentBlock
) &&
2677 "Invalid BlockRPONumber map.");
2678 if (BlockRPONumber
[P
] >= BlockRPONumber
[CurrentBlock
] &&
2679 llvm::any_of(CurInst
->operands(), [&](const Use
&U
) {
2680 if (auto *Inst
= dyn_cast
<Instruction
>(U
.get()))
2681 return Inst
->getParent() == CurrentBlock
;
2688 uint32_t TValNo
= VN
.phiTranslate(P
, CurrentBlock
, ValNo
, *this);
2689 Value
*predV
= findLeader(P
, TValNo
);
2691 predMap
.push_back(std::make_pair(static_cast<Value
*>(nullptr), P
));
2694 } else if (predV
== CurInst
) {
2695 /* CurInst dominates this predecessor. */
2699 predMap
.push_back(std::make_pair(predV
, P
));
2704 // Don't do PRE when it might increase code size, i.e. when
2705 // we would need to insert instructions in more than one pred.
2706 if (NumWithout
> 1 || NumWith
== 0)
2709 // We may have a case where all predecessors have the instruction,
2710 // and we just need to insert a phi node. Otherwise, perform
2712 Instruction
*PREInstr
= nullptr;
2714 if (NumWithout
!= 0) {
2715 if (!isSafeToSpeculativelyExecute(CurInst
)) {
2716 // It is only valid to insert a new instruction if the current instruction
2717 // is always executed. An instruction with implicit control flow could
2718 // prevent us from doing it. If we cannot speculate the execution, then
2719 // PRE should be prohibited.
2720 if (ICF
->isDominatedByICFIFromSameBlock(CurInst
))
2724 // Don't do PRE across indirect branch.
2725 if (isa
<IndirectBrInst
>(PREPred
->getTerminator()))
2728 // Don't do PRE across callbr.
2729 // FIXME: Can we do this across the fallthrough edge?
2730 if (isa
<CallBrInst
>(PREPred
->getTerminator()))
2733 // We can't do PRE safely on a critical edge, so instead we schedule
2734 // the edge to be split and perform the PRE the next time we iterate
2736 unsigned SuccNum
= GetSuccessorNumber(PREPred
, CurrentBlock
);
2737 if (isCriticalEdge(PREPred
->getTerminator(), SuccNum
)) {
2738 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), SuccNum
));
2741 // We need to insert somewhere, so let's give it a shot
2742 PREInstr
= CurInst
->clone();
2743 if (!performScalarPREInsertion(PREInstr
, PREPred
, CurrentBlock
, ValNo
)) {
2744 // If we failed insertion, make sure we remove the instruction.
2745 LLVM_DEBUG(verifyRemoved(PREInstr
));
2746 PREInstr
->deleteValue();
2751 // Either we should have filled in the PRE instruction, or we should
2752 // not have needed insertions.
2753 assert(PREInstr
!= nullptr || NumWithout
== 0);
2757 // Create a PHI to make the value available in this block.
2759 PHINode::Create(CurInst
->getType(), predMap
.size(),
2760 CurInst
->getName() + ".pre-phi", &CurrentBlock
->front());
2761 for (unsigned i
= 0, e
= predMap
.size(); i
!= e
; ++i
) {
2762 if (Value
*V
= predMap
[i
].first
) {
2763 // If we use an existing value in this phi, we have to patch the original
2764 // value because the phi will be used to replace a later value.
2765 patchReplacementInstruction(CurInst
, V
);
2766 Phi
->addIncoming(V
, predMap
[i
].second
);
2768 Phi
->addIncoming(PREInstr
, PREPred
);
2772 // After creating a new PHI for ValNo, the phi translate result for ValNo will
2773 // be changed, so erase the related stale entries in phi translate cache.
2774 VN
.eraseTranslateCacheEntry(ValNo
, *CurrentBlock
);
2775 addToLeaderTable(ValNo
, Phi
, CurrentBlock
);
2776 Phi
->setDebugLoc(CurInst
->getDebugLoc());
2777 CurInst
->replaceAllUsesWith(Phi
);
2778 if (MD
&& Phi
->getType()->isPtrOrPtrVectorTy())
2779 MD
->invalidateCachedPointerInfo(Phi
);
2781 removeFromLeaderTable(ValNo
, CurInst
, CurrentBlock
);
2783 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst
<< '\n');
2785 MD
->removeInstruction(CurInst
);
2787 MSSAU
->removeMemoryAccess(CurInst
);
2788 LLVM_DEBUG(verifyRemoved(CurInst
));
2789 // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
2790 // some assertion failures.
2791 ICF
->removeInstruction(CurInst
);
2792 CurInst
->eraseFromParent();
2798 /// Perform a purely local form of PRE that looks for diamond
2799 /// control flow patterns and attempts to perform simple PRE at the join point.
2800 bool GVN::performPRE(Function
&F
) {
2801 bool Changed
= false;
2802 for (BasicBlock
*CurrentBlock
: depth_first(&F
.getEntryBlock())) {
2803 // Nothing to PRE in the entry block.
2804 if (CurrentBlock
== &F
.getEntryBlock())
2807 // Don't perform PRE on an EH pad.
2808 if (CurrentBlock
->isEHPad())
2811 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
2812 BE
= CurrentBlock
->end();
2814 Instruction
*CurInst
= &*BI
++;
2815 Changed
|= performScalarPRE(CurInst
);
2819 if (splitCriticalEdges())
2825 /// Split the critical edge connecting the given two blocks, and return
2826 /// the block inserted to the critical edge.
2827 BasicBlock
*GVN::splitCriticalEdges(BasicBlock
*Pred
, BasicBlock
*Succ
) {
2828 // GVN does not require loop-simplify, do not try to preserve it if it is not
2830 BasicBlock
*BB
= SplitCriticalEdge(
2832 CriticalEdgeSplittingOptions(DT
, LI
, MSSAU
).unsetPreserveLoopSimplify());
2835 MD
->invalidateCachedPredecessors();
2836 InvalidBlockRPONumbers
= true;
2841 /// Split critical edges found during the previous
2842 /// iteration that may enable further optimization.
2843 bool GVN::splitCriticalEdges() {
2844 if (toSplit
.empty())
2847 bool Changed
= false;
2849 std::pair
<Instruction
*, unsigned> Edge
= toSplit
.pop_back_val();
2850 Changed
|= SplitCriticalEdge(Edge
.first
, Edge
.second
,
2851 CriticalEdgeSplittingOptions(DT
, LI
, MSSAU
)) !=
2853 } while (!toSplit
.empty());
2856 MD
->invalidateCachedPredecessors();
2857 InvalidBlockRPONumbers
= true;
2862 /// Executes one iteration of GVN
2863 bool GVN::iterateOnFunction(Function
&F
) {
2864 cleanupGlobalSets();
2866 // Top-down walk of the dominator tree
2867 bool Changed
= false;
2868 // Needed for value numbering with phi construction to work.
2869 // RPOT walks the graph in its constructor and will not be invalidated during
2871 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2873 for (BasicBlock
*BB
: RPOT
)
2874 Changed
|= processBlock(BB
);
2879 void GVN::cleanupGlobalSets() {
2881 LeaderTable
.clear();
2882 BlockRPONumber
.clear();
2883 TableAllocator
.Reset();
2885 InvalidBlockRPONumbers
= true;
2888 /// Verify that the specified instruction does not occur in our
2889 /// internal data structures.
2890 void GVN::verifyRemoved(const Instruction
*Inst
) const {
2891 VN
.verifyRemoved(Inst
);
2893 // Walk through the value number scope to make sure the instruction isn't
2894 // ferreted away in it.
2895 for (const auto &I
: LeaderTable
) {
2896 const LeaderTableEntry
*Node
= &I
.second
;
2897 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
2899 while (Node
->Next
) {
2901 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
2906 /// BB is declared dead, which implied other blocks become dead as well. This
2907 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
2908 /// live successors, update their phi nodes by replacing the operands
2909 /// corresponding to dead blocks with UndefVal.
2910 void GVN::addDeadBlock(BasicBlock
*BB
) {
2911 SmallVector
<BasicBlock
*, 4> NewDead
;
2912 SmallSetVector
<BasicBlock
*, 4> DF
;
2914 NewDead
.push_back(BB
);
2915 while (!NewDead
.empty()) {
2916 BasicBlock
*D
= NewDead
.pop_back_val();
2917 if (DeadBlocks
.count(D
))
2920 // All blocks dominated by D are dead.
2921 SmallVector
<BasicBlock
*, 8> Dom
;
2922 DT
->getDescendants(D
, Dom
);
2923 DeadBlocks
.insert(Dom
.begin(), Dom
.end());
2925 // Figure out the dominance-frontier(D).
2926 for (BasicBlock
*B
: Dom
) {
2927 for (BasicBlock
*S
: successors(B
)) {
2928 if (DeadBlocks
.count(S
))
2931 bool AllPredDead
= true;
2932 for (BasicBlock
*P
: predecessors(S
))
2933 if (!DeadBlocks
.count(P
)) {
2934 AllPredDead
= false;
2939 // S could be proved dead later on. That is why we don't update phi
2940 // operands at this moment.
2943 // While S is not dominated by D, it is dead by now. This could take
2944 // place if S already have a dead predecessor before D is declared
2946 NewDead
.push_back(S
);
2952 // For the dead blocks' live successors, update their phi nodes by replacing
2953 // the operands corresponding to dead blocks with UndefVal.
2954 for (BasicBlock
*B
: DF
) {
2955 if (DeadBlocks
.count(B
))
2958 // First, split the critical edges. This might also create additional blocks
2959 // to preserve LoopSimplify form and adjust edges accordingly.
2960 SmallVector
<BasicBlock
*, 4> Preds(predecessors(B
));
2961 for (BasicBlock
*P
: Preds
) {
2962 if (!DeadBlocks
.count(P
))
2965 if (llvm::is_contained(successors(P
), B
) &&
2966 isCriticalEdge(P
->getTerminator(), B
)) {
2967 if (BasicBlock
*S
= splitCriticalEdges(P
, B
))
2968 DeadBlocks
.insert(P
= S
);
2972 // Now undef the incoming values from the dead predecessors.
2973 for (BasicBlock
*P
: predecessors(B
)) {
2974 if (!DeadBlocks
.count(P
))
2976 for (PHINode
&Phi
: B
->phis()) {
2977 Phi
.setIncomingValueForBlock(P
, UndefValue::get(Phi
.getType()));
2979 MD
->invalidateCachedPointerInfo(&Phi
);
2985 // If the given branch is recognized as a foldable branch (i.e. conditional
2986 // branch with constant condition), it will perform following analyses and
2988 // 1) If the dead out-coming edge is a critical-edge, split it. Let
2989 // R be the target of the dead out-coming edge.
2990 // 1) Identify the set of dead blocks implied by the branch's dead outcoming
2991 // edge. The result of this step will be {X| X is dominated by R}
2992 // 2) Identify those blocks which haves at least one dead predecessor. The
2993 // result of this step will be dominance-frontier(R).
2994 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to
2995 // dead blocks with "UndefVal" in an hope these PHIs will optimized away.
2997 // Return true iff *NEW* dead code are found.
2998 bool GVN::processFoldableCondBr(BranchInst
*BI
) {
2999 if (!BI
|| BI
->isUnconditional())
3002 // If a branch has two identical successors, we cannot declare either dead.
3003 if (BI
->getSuccessor(0) == BI
->getSuccessor(1))
3006 ConstantInt
*Cond
= dyn_cast
<ConstantInt
>(BI
->getCondition());
3010 BasicBlock
*DeadRoot
=
3011 Cond
->getZExtValue() ? BI
->getSuccessor(1) : BI
->getSuccessor(0);
3012 if (DeadBlocks
.count(DeadRoot
))
3015 if (!DeadRoot
->getSinglePredecessor())
3016 DeadRoot
= splitCriticalEdges(BI
->getParent(), DeadRoot
);
3018 addDeadBlock(DeadRoot
);
3022 // performPRE() will trigger assert if it comes across an instruction without
3023 // associated val-num. As it normally has far more live instructions than dead
3024 // instructions, it makes more sense just to "fabricate" a val-number for the
3025 // dead code than checking if instruction involved is dead or not.
3026 void GVN::assignValNumForDeadCode() {
3027 for (BasicBlock
*BB
: DeadBlocks
) {
3028 for (Instruction
&Inst
: *BB
) {
3029 unsigned ValNum
= VN
.lookupOrAdd(&Inst
);
3030 addToLeaderTable(ValNum
, &Inst
, BB
);
3035 class llvm::gvn::GVNLegacyPass
: public FunctionPass
{
3037 static char ID
; // Pass identification, replacement for typeid
3039 explicit GVNLegacyPass(bool NoMemDepAnalysis
= !GVNEnableMemDep
)
3040 : FunctionPass(ID
), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis
)) {
3041 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3044 bool runOnFunction(Function
&F
) override
{
3045 if (skipFunction(F
))
3048 auto *LIWP
= getAnalysisIfAvailable
<LoopInfoWrapperPass
>();
3050 auto *MSSAWP
= getAnalysisIfAvailable
<MemorySSAWrapperPass
>();
3051 return Impl
.runImpl(
3052 F
, getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
),
3053 getAnalysis
<DominatorTreeWrapperPass
>().getDomTree(),
3054 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
),
3055 getAnalysis
<AAResultsWrapperPass
>().getAAResults(),
3056 Impl
.isMemDepEnabled()
3057 ? &getAnalysis
<MemoryDependenceWrapperPass
>().getMemDep()
3059 LIWP
? &LIWP
->getLoopInfo() : nullptr,
3060 &getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE(),
3061 MSSAWP
? &MSSAWP
->getMSSA() : nullptr);
3064 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
3065 AU
.addRequired
<AssumptionCacheTracker
>();
3066 AU
.addRequired
<DominatorTreeWrapperPass
>();
3067 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
3068 AU
.addRequired
<LoopInfoWrapperPass
>();
3069 if (Impl
.isMemDepEnabled())
3070 AU
.addRequired
<MemoryDependenceWrapperPass
>();
3071 AU
.addRequired
<AAResultsWrapperPass
>();
3072 AU
.addPreserved
<DominatorTreeWrapperPass
>();
3073 AU
.addPreserved
<GlobalsAAWrapperPass
>();
3074 AU
.addPreserved
<TargetLibraryInfoWrapperPass
>();
3075 AU
.addPreserved
<LoopInfoWrapperPass
>();
3076 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
3077 AU
.addPreserved
<MemorySSAWrapperPass
>();
3084 char GVNLegacyPass::ID
= 0;
3086 INITIALIZE_PASS_BEGIN(GVNLegacyPass
, "gvn", "Global Value Numbering", false, false)
3087 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
3088 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass
)
3089 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
3090 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
3091 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
3092 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass
)
3093 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass
)
3094 INITIALIZE_PASS_END(GVNLegacyPass
, "gvn", "Global Value Numbering", false, false)
3096 // The public interface to this file...
3097 FunctionPass
*llvm::createGVNPass(bool NoMemDepAnalysis
) {
3098 return new GVNLegacyPass(NoMemDepAnalysis
);