1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs global value numbering to eliminate fully redundant
10 // instructions. It also performs simple dead load elimination.
12 // Note that this pass does the value numbering itself; it does not use the
13 // ValueNumbering analysis passes.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Transforms/Scalar/GVN.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DepthFirstIterator.h"
20 #include "llvm/ADT/Hashing.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/PointerIntPair.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
30 #include "llvm/Analysis/AssumptionCache.h"
31 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/DomTreeUpdater.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/InstructionSimplify.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/MemoryBuiltins.h"
37 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
38 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
39 #include "llvm/Analysis/PHITransAddr.h"
40 #include "llvm/Analysis/TargetLibraryInfo.h"
41 #include "llvm/Analysis/ValueTracking.h"
42 #include "llvm/Config/llvm-config.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DebugInfoMetadata.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/LLVMContext.h"
59 #include "llvm/IR/Metadata.h"
60 #include "llvm/IR/Module.h"
61 #include "llvm/IR/Operator.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/Type.h"
65 #include "llvm/IR/Use.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Pass.h"
68 #include "llvm/Support/Casting.h"
69 #include "llvm/Support/CommandLine.h"
70 #include "llvm/Support/Compiler.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Utils.h"
74 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
75 #include "llvm/Transforms/Utils/Local.h"
76 #include "llvm/Transforms/Utils/SSAUpdater.h"
77 #include "llvm/Transforms/Utils/VNCoercion.h"
85 using namespace llvm::gvn
;
86 using namespace llvm::VNCoercion
;
87 using namespace PatternMatch
;
89 #define DEBUG_TYPE "gvn"
91 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
92 STATISTIC(NumGVNLoad
, "Number of loads deleted");
93 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
94 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
95 STATISTIC(NumGVNSimpl
, "Number of instructions simplified");
96 STATISTIC(NumGVNEqProp
, "Number of equalities propagated");
97 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
99 static cl::opt
<bool> EnablePRE("enable-pre",
100 cl::init(true), cl::Hidden
);
101 static cl::opt
<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
102 static cl::opt
<bool> EnableMemDep("enable-gvn-memdep", cl::init(true));
104 // Maximum allowed recursion depth.
105 static cl::opt
<uint32_t>
106 MaxRecurseDepth("gvn-max-recurse-depth", cl::Hidden
, cl::init(1000), cl::ZeroOrMore
,
107 cl::desc("Max recurse depth in GVN (default = 1000)"));
109 static cl::opt
<uint32_t> MaxNumDeps(
110 "gvn-max-num-deps", cl::Hidden
, cl::init(100), cl::ZeroOrMore
,
111 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
113 struct llvm::GVN::Expression
{
116 bool commutative
= false;
117 SmallVector
<uint32_t, 4> varargs
;
119 Expression(uint32_t o
= ~2U) : opcode(o
) {}
121 bool operator==(const Expression
&other
) const {
122 if (opcode
!= other
.opcode
)
124 if (opcode
== ~0U || opcode
== ~1U)
126 if (type
!= other
.type
)
128 if (varargs
!= other
.varargs
)
133 friend hash_code
hash_value(const Expression
&Value
) {
135 Value
.opcode
, Value
.type
,
136 hash_combine_range(Value
.varargs
.begin(), Value
.varargs
.end()));
142 template <> struct DenseMapInfo
<GVN::Expression
> {
143 static inline GVN::Expression
getEmptyKey() { return ~0U; }
144 static inline GVN::Expression
getTombstoneKey() { return ~1U; }
146 static unsigned getHashValue(const GVN::Expression
&e
) {
147 using llvm::hash_value
;
149 return static_cast<unsigned>(hash_value(e
));
152 static bool isEqual(const GVN::Expression
&LHS
, const GVN::Expression
&RHS
) {
157 } // end namespace llvm
159 /// Represents a particular available value that we know how to materialize.
160 /// Materialization of an AvailableValue never fails. An AvailableValue is
161 /// implicitly associated with a rematerialization point which is the
162 /// location of the instruction from which it was formed.
163 struct llvm::gvn::AvailableValue
{
165 SimpleVal
, // A simple offsetted value that is accessed.
166 LoadVal
, // A value produced by a load.
167 MemIntrin
, // A memory intrinsic which is loaded from.
168 UndefVal
// A UndefValue representing a value from dead block (which
169 // is not yet physically removed from the CFG).
172 /// V - The value that is live out of the block.
173 PointerIntPair
<Value
*, 2, ValType
> Val
;
175 /// Offset - The byte offset in Val that is interesting for the load query.
178 static AvailableValue
get(Value
*V
, unsigned Offset
= 0) {
180 Res
.Val
.setPointer(V
);
181 Res
.Val
.setInt(SimpleVal
);
186 static AvailableValue
getMI(MemIntrinsic
*MI
, unsigned Offset
= 0) {
188 Res
.Val
.setPointer(MI
);
189 Res
.Val
.setInt(MemIntrin
);
194 static AvailableValue
getLoad(LoadInst
*LI
, unsigned Offset
= 0) {
196 Res
.Val
.setPointer(LI
);
197 Res
.Val
.setInt(LoadVal
);
202 static AvailableValue
getUndef() {
204 Res
.Val
.setPointer(nullptr);
205 Res
.Val
.setInt(UndefVal
);
210 bool isSimpleValue() const { return Val
.getInt() == SimpleVal
; }
211 bool isCoercedLoadValue() const { return Val
.getInt() == LoadVal
; }
212 bool isMemIntrinValue() const { return Val
.getInt() == MemIntrin
; }
213 bool isUndefValue() const { return Val
.getInt() == UndefVal
; }
215 Value
*getSimpleValue() const {
216 assert(isSimpleValue() && "Wrong accessor");
217 return Val
.getPointer();
220 LoadInst
*getCoercedLoadValue() const {
221 assert(isCoercedLoadValue() && "Wrong accessor");
222 return cast
<LoadInst
>(Val
.getPointer());
225 MemIntrinsic
*getMemIntrinValue() const {
226 assert(isMemIntrinValue() && "Wrong accessor");
227 return cast
<MemIntrinsic
>(Val
.getPointer());
230 /// Emit code at the specified insertion point to adjust the value defined
231 /// here to the specified type. This handles various coercion cases.
232 Value
*MaterializeAdjustedValue(LoadInst
*LI
, Instruction
*InsertPt
,
236 /// Represents an AvailableValue which can be rematerialized at the end of
237 /// the associated BasicBlock.
238 struct llvm::gvn::AvailableValueInBlock
{
239 /// BB - The basic block in question.
242 /// AV - The actual available value
245 static AvailableValueInBlock
get(BasicBlock
*BB
, AvailableValue
&&AV
) {
246 AvailableValueInBlock Res
;
248 Res
.AV
= std::move(AV
);
252 static AvailableValueInBlock
get(BasicBlock
*BB
, Value
*V
,
253 unsigned Offset
= 0) {
254 return get(BB
, AvailableValue::get(V
, Offset
));
257 static AvailableValueInBlock
getUndef(BasicBlock
*BB
) {
258 return get(BB
, AvailableValue::getUndef());
261 /// Emit code at the end of this block to adjust the value defined here to
262 /// the specified type. This handles various coercion cases.
263 Value
*MaterializeAdjustedValue(LoadInst
*LI
, GVN
&gvn
) const {
264 return AV
.MaterializeAdjustedValue(LI
, BB
->getTerminator(), gvn
);
268 //===----------------------------------------------------------------------===//
269 // ValueTable Internal Functions
270 //===----------------------------------------------------------------------===//
272 GVN::Expression
GVN::ValueTable::createExpr(Instruction
*I
) {
274 e
.type
= I
->getType();
275 e
.opcode
= I
->getOpcode();
276 for (Instruction::op_iterator OI
= I
->op_begin(), OE
= I
->op_end();
278 e
.varargs
.push_back(lookupOrAdd(*OI
));
279 if (I
->isCommutative()) {
280 // Ensure that commutative instructions that only differ by a permutation
281 // of their operands get the same value number by sorting the operand value
282 // numbers. Since all commutative instructions have two operands it is more
283 // efficient to sort by hand rather than using, say, std::sort.
284 assert(I
->getNumOperands() == 2 && "Unsupported commutative instruction!");
285 if (e
.varargs
[0] > e
.varargs
[1])
286 std::swap(e
.varargs
[0], e
.varargs
[1]);
287 e
.commutative
= true;
290 if (CmpInst
*C
= dyn_cast
<CmpInst
>(I
)) {
291 // Sort the operand value numbers so x<y and y>x get the same value number.
292 CmpInst::Predicate Predicate
= C
->getPredicate();
293 if (e
.varargs
[0] > e
.varargs
[1]) {
294 std::swap(e
.varargs
[0], e
.varargs
[1]);
295 Predicate
= CmpInst::getSwappedPredicate(Predicate
);
297 e
.opcode
= (C
->getOpcode() << 8) | Predicate
;
298 e
.commutative
= true;
299 } else if (InsertValueInst
*E
= dyn_cast
<InsertValueInst
>(I
)) {
300 for (InsertValueInst::idx_iterator II
= E
->idx_begin(), IE
= E
->idx_end();
302 e
.varargs
.push_back(*II
);
308 GVN::Expression
GVN::ValueTable::createCmpExpr(unsigned Opcode
,
309 CmpInst::Predicate Predicate
,
310 Value
*LHS
, Value
*RHS
) {
311 assert((Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
) &&
312 "Not a comparison!");
314 e
.type
= CmpInst::makeCmpResultType(LHS
->getType());
315 e
.varargs
.push_back(lookupOrAdd(LHS
));
316 e
.varargs
.push_back(lookupOrAdd(RHS
));
318 // Sort the operand value numbers so x<y and y>x get the same value number.
319 if (e
.varargs
[0] > e
.varargs
[1]) {
320 std::swap(e
.varargs
[0], e
.varargs
[1]);
321 Predicate
= CmpInst::getSwappedPredicate(Predicate
);
323 e
.opcode
= (Opcode
<< 8) | Predicate
;
324 e
.commutative
= true;
328 GVN::Expression
GVN::ValueTable::createExtractvalueExpr(ExtractValueInst
*EI
) {
329 assert(EI
&& "Not an ExtractValueInst?");
331 e
.type
= EI
->getType();
334 WithOverflowInst
*WO
= dyn_cast
<WithOverflowInst
>(EI
->getAggregateOperand());
335 if (WO
!= nullptr && EI
->getNumIndices() == 1 && *EI
->idx_begin() == 0) {
336 // EI is an extract from one of our with.overflow intrinsics. Synthesize
337 // a semantically equivalent expression instead of an extract value
339 e
.opcode
= WO
->getBinaryOp();
340 e
.varargs
.push_back(lookupOrAdd(WO
->getLHS()));
341 e
.varargs
.push_back(lookupOrAdd(WO
->getRHS()));
345 // Not a recognised intrinsic. Fall back to producing an extract value
347 e
.opcode
= EI
->getOpcode();
348 for (Instruction::op_iterator OI
= EI
->op_begin(), OE
= EI
->op_end();
350 e
.varargs
.push_back(lookupOrAdd(*OI
));
352 for (ExtractValueInst::idx_iterator II
= EI
->idx_begin(), IE
= EI
->idx_end();
354 e
.varargs
.push_back(*II
);
359 //===----------------------------------------------------------------------===//
360 // ValueTable External Functions
361 //===----------------------------------------------------------------------===//
363 GVN::ValueTable::ValueTable() = default;
364 GVN::ValueTable::ValueTable(const ValueTable
&) = default;
365 GVN::ValueTable::ValueTable(ValueTable
&&) = default;
366 GVN::ValueTable::~ValueTable() = default;
368 /// add - Insert a value into the table with a specified value number.
369 void GVN::ValueTable::add(Value
*V
, uint32_t num
) {
370 valueNumbering
.insert(std::make_pair(V
, num
));
371 if (PHINode
*PN
= dyn_cast
<PHINode
>(V
))
372 NumberingPhi
[num
] = PN
;
375 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst
*C
) {
376 if (AA
->doesNotAccessMemory(C
)) {
377 Expression exp
= createExpr(C
);
378 uint32_t e
= assignExpNewValueNum(exp
).first
;
379 valueNumbering
[C
] = e
;
381 } else if (MD
&& AA
->onlyReadsMemory(C
)) {
382 Expression exp
= createExpr(C
);
383 auto ValNum
= assignExpNewValueNum(exp
);
385 valueNumbering
[C
] = ValNum
.first
;
389 MemDepResult local_dep
= MD
->getDependency(C
);
391 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
392 valueNumbering
[C
] = nextValueNumber
;
393 return nextValueNumber
++;
396 if (local_dep
.isDef()) {
397 CallInst
* local_cdep
= cast
<CallInst
>(local_dep
.getInst());
399 if (local_cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
400 valueNumbering
[C
] = nextValueNumber
;
401 return nextValueNumber
++;
404 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
405 uint32_t c_vn
= lookupOrAdd(C
->getArgOperand(i
));
406 uint32_t cd_vn
= lookupOrAdd(local_cdep
->getArgOperand(i
));
408 valueNumbering
[C
] = nextValueNumber
;
409 return nextValueNumber
++;
413 uint32_t v
= lookupOrAdd(local_cdep
);
414 valueNumbering
[C
] = v
;
419 const MemoryDependenceResults::NonLocalDepInfo
&deps
=
420 MD
->getNonLocalCallDependency(C
);
421 // FIXME: Move the checking logic to MemDep!
422 CallInst
* cdep
= nullptr;
424 // Check to see if we have a single dominating call instruction that is
426 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
427 const NonLocalDepEntry
*I
= &deps
[i
];
428 if (I
->getResult().isNonLocal())
431 // We don't handle non-definitions. If we already have a call, reject
432 // instruction dependencies.
433 if (!I
->getResult().isDef() || cdep
!= nullptr) {
438 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->getResult().getInst());
439 // FIXME: All duplicated with non-local case.
440 if (NonLocalDepCall
&& DT
->properlyDominates(I
->getBB(), C
->getParent())){
441 cdep
= NonLocalDepCall
;
450 valueNumbering
[C
] = nextValueNumber
;
451 return nextValueNumber
++;
454 if (cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
455 valueNumbering
[C
] = nextValueNumber
;
456 return nextValueNumber
++;
458 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
459 uint32_t c_vn
= lookupOrAdd(C
->getArgOperand(i
));
460 uint32_t cd_vn
= lookupOrAdd(cdep
->getArgOperand(i
));
462 valueNumbering
[C
] = nextValueNumber
;
463 return nextValueNumber
++;
467 uint32_t v
= lookupOrAdd(cdep
);
468 valueNumbering
[C
] = v
;
471 valueNumbering
[C
] = nextValueNumber
;
472 return nextValueNumber
++;
476 /// Returns true if a value number exists for the specified value.
477 bool GVN::ValueTable::exists(Value
*V
) const { return valueNumbering
.count(V
) != 0; }
479 /// lookup_or_add - Returns the value number for the specified value, assigning
480 /// it a new number if it did not have one before.
481 uint32_t GVN::ValueTable::lookupOrAdd(Value
*V
) {
482 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
483 if (VI
!= valueNumbering
.end())
486 if (!isa
<Instruction
>(V
)) {
487 valueNumbering
[V
] = nextValueNumber
;
488 return nextValueNumber
++;
491 Instruction
* I
= cast
<Instruction
>(V
);
493 switch (I
->getOpcode()) {
494 case Instruction::Call
:
495 return lookupOrAddCall(cast
<CallInst
>(I
));
496 case Instruction::FNeg
:
497 case Instruction::Add
:
498 case Instruction::FAdd
:
499 case Instruction::Sub
:
500 case Instruction::FSub
:
501 case Instruction::Mul
:
502 case Instruction::FMul
:
503 case Instruction::UDiv
:
504 case Instruction::SDiv
:
505 case Instruction::FDiv
:
506 case Instruction::URem
:
507 case Instruction::SRem
:
508 case Instruction::FRem
:
509 case Instruction::Shl
:
510 case Instruction::LShr
:
511 case Instruction::AShr
:
512 case Instruction::And
:
513 case Instruction::Or
:
514 case Instruction::Xor
:
515 case Instruction::ICmp
:
516 case Instruction::FCmp
:
517 case Instruction::Trunc
:
518 case Instruction::ZExt
:
519 case Instruction::SExt
:
520 case Instruction::FPToUI
:
521 case Instruction::FPToSI
:
522 case Instruction::UIToFP
:
523 case Instruction::SIToFP
:
524 case Instruction::FPTrunc
:
525 case Instruction::FPExt
:
526 case Instruction::PtrToInt
:
527 case Instruction::IntToPtr
:
528 case Instruction::AddrSpaceCast
:
529 case Instruction::BitCast
:
530 case Instruction::Select
:
531 case Instruction::ExtractElement
:
532 case Instruction::InsertElement
:
533 case Instruction::ShuffleVector
:
534 case Instruction::InsertValue
:
535 case Instruction::GetElementPtr
:
538 case Instruction::ExtractValue
:
539 exp
= createExtractvalueExpr(cast
<ExtractValueInst
>(I
));
541 case Instruction::PHI
:
542 valueNumbering
[V
] = nextValueNumber
;
543 NumberingPhi
[nextValueNumber
] = cast
<PHINode
>(V
);
544 return nextValueNumber
++;
546 valueNumbering
[V
] = nextValueNumber
;
547 return nextValueNumber
++;
550 uint32_t e
= assignExpNewValueNum(exp
).first
;
551 valueNumbering
[V
] = e
;
555 /// Returns the value number of the specified value. Fails if
556 /// the value has not yet been numbered.
557 uint32_t GVN::ValueTable::lookup(Value
*V
, bool Verify
) const {
558 DenseMap
<Value
*, uint32_t>::const_iterator VI
= valueNumbering
.find(V
);
560 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
563 return (VI
!= valueNumbering
.end()) ? VI
->second
: 0;
566 /// Returns the value number of the given comparison,
567 /// assigning it a new number if it did not have one before. Useful when
568 /// we deduced the result of a comparison, but don't immediately have an
569 /// instruction realizing that comparison to hand.
570 uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode
,
571 CmpInst::Predicate Predicate
,
572 Value
*LHS
, Value
*RHS
) {
573 Expression exp
= createCmpExpr(Opcode
, Predicate
, LHS
, RHS
);
574 return assignExpNewValueNum(exp
).first
;
577 /// Remove all entries from the ValueTable.
578 void GVN::ValueTable::clear() {
579 valueNumbering
.clear();
580 expressionNumbering
.clear();
581 NumberingPhi
.clear();
582 PhiTranslateTable
.clear();
589 /// Remove a value from the value numbering.
590 void GVN::ValueTable::erase(Value
*V
) {
591 uint32_t Num
= valueNumbering
.lookup(V
);
592 valueNumbering
.erase(V
);
593 // If V is PHINode, V <--> value number is an one-to-one mapping.
595 NumberingPhi
.erase(Num
);
598 /// verifyRemoved - Verify that the value is removed from all internal data
600 void GVN::ValueTable::verifyRemoved(const Value
*V
) const {
601 for (DenseMap
<Value
*, uint32_t>::const_iterator
602 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
603 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
607 //===----------------------------------------------------------------------===//
609 //===----------------------------------------------------------------------===//
611 PreservedAnalyses
GVN::run(Function
&F
, FunctionAnalysisManager
&AM
) {
612 // FIXME: The order of evaluation of these 'getResult' calls is very
613 // significant! Re-ordering these variables will cause GVN when run alone to
614 // be less effective! We should fix memdep and basic-aa to not exhibit this
615 // behavior, but until then don't change the order here.
616 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
617 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
618 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
619 auto &AA
= AM
.getResult
<AAManager
>(F
);
620 auto &MemDep
= AM
.getResult
<MemoryDependenceAnalysis
>(F
);
621 auto *LI
= AM
.getCachedResult
<LoopAnalysis
>(F
);
622 auto &ORE
= AM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
623 bool Changed
= runImpl(F
, AC
, DT
, TLI
, AA
, &MemDep
, LI
, &ORE
);
625 return PreservedAnalyses::all();
626 PreservedAnalyses PA
;
627 PA
.preserve
<DominatorTreeAnalysis
>();
628 PA
.preserve
<GlobalsAA
>();
629 PA
.preserve
<TargetLibraryAnalysis
>();
631 PA
.preserve
<LoopAnalysis
>();
635 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
636 LLVM_DUMP_METHOD
void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) const {
638 for (DenseMap
<uint32_t, Value
*>::iterator I
= d
.begin(),
639 E
= d
.end(); I
!= E
; ++I
) {
640 errs() << I
->first
<< "\n";
647 /// Return true if we can prove that the value
648 /// we're analyzing is fully available in the specified block. As we go, keep
649 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
650 /// map is actually a tri-state map with the following values:
651 /// 0) we know the block *is not* fully available.
652 /// 1) we know the block *is* fully available.
653 /// 2) we do not know whether the block is fully available or not, but we are
654 /// currently speculating that it will be.
655 /// 3) we are speculating for this block and have used that to speculate for
657 static bool IsValueFullyAvailableInBlock(BasicBlock
*BB
,
658 DenseMap
<BasicBlock
*, char> &FullyAvailableBlocks
,
659 uint32_t RecurseDepth
) {
660 if (RecurseDepth
> MaxRecurseDepth
)
663 // Optimistically assume that the block is fully available and check to see
664 // if we already know about this block in one lookup.
665 std::pair
<DenseMap
<BasicBlock
*, char>::iterator
, bool> IV
=
666 FullyAvailableBlocks
.insert(std::make_pair(BB
, 2));
668 // If the entry already existed for this block, return the precomputed value.
670 // If this is a speculative "available" value, mark it as being used for
671 // speculation of other blocks.
672 if (IV
.first
->second
== 2)
673 IV
.first
->second
= 3;
674 return IV
.first
->second
!= 0;
677 // Otherwise, see if it is fully available in all predecessors.
678 pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
);
680 // If this block has no predecessors, it isn't live-in here.
682 goto SpeculationFailure
;
684 for (; PI
!= PE
; ++PI
)
685 // If the value isn't fully available in one of our predecessors, then it
686 // isn't fully available in this block either. Undo our previous
687 // optimistic assumption and bail out.
688 if (!IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
,RecurseDepth
+1))
689 goto SpeculationFailure
;
693 // If we get here, we found out that this is not, after
694 // all, a fully-available block. We have a problem if we speculated on this and
695 // used the speculation to mark other blocks as available.
697 char &BBVal
= FullyAvailableBlocks
[BB
];
699 // If we didn't speculate on this, just return with it set to false.
705 // If we did speculate on this value, we could have blocks set to 1 that are
706 // incorrect. Walk the (transitive) successors of this block and mark them as
708 SmallVector
<BasicBlock
*, 32> BBWorklist
;
709 BBWorklist
.push_back(BB
);
712 BasicBlock
*Entry
= BBWorklist
.pop_back_val();
713 // Note that this sets blocks to 0 (unavailable) if they happen to not
714 // already be in FullyAvailableBlocks. This is safe.
715 char &EntryVal
= FullyAvailableBlocks
[Entry
];
716 if (EntryVal
== 0) continue; // Already unavailable.
718 // Mark as unavailable.
721 BBWorklist
.append(succ_begin(Entry
), succ_end(Entry
));
722 } while (!BBWorklist
.empty());
727 /// Given a set of loads specified by ValuesPerBlock,
728 /// construct SSA form, allowing us to eliminate LI. This returns the value
729 /// that should be used at LI's definition site.
730 static Value
*ConstructSSAForLoadSet(LoadInst
*LI
,
731 SmallVectorImpl
<AvailableValueInBlock
> &ValuesPerBlock
,
733 // Check for the fully redundant, dominating load case. In this case, we can
734 // just use the dominating value directly.
735 if (ValuesPerBlock
.size() == 1 &&
736 gvn
.getDominatorTree().properlyDominates(ValuesPerBlock
[0].BB
,
738 assert(!ValuesPerBlock
[0].AV
.isUndefValue() &&
739 "Dead BB dominate this block");
740 return ValuesPerBlock
[0].MaterializeAdjustedValue(LI
, gvn
);
743 // Otherwise, we have to construct SSA form.
744 SmallVector
<PHINode
*, 8> NewPHIs
;
745 SSAUpdater
SSAUpdate(&NewPHIs
);
746 SSAUpdate
.Initialize(LI
->getType(), LI
->getName());
748 for (const AvailableValueInBlock
&AV
: ValuesPerBlock
) {
749 BasicBlock
*BB
= AV
.BB
;
751 if (SSAUpdate
.HasValueForBlock(BB
))
754 // If the value is the load that we will be eliminating, and the block it's
755 // available in is the block that the load is in, then don't add it as
756 // SSAUpdater will resolve the value to the relevant phi which may let it
757 // avoid phi construction entirely if there's actually only one value.
758 if (BB
== LI
->getParent() &&
759 ((AV
.AV
.isSimpleValue() && AV
.AV
.getSimpleValue() == LI
) ||
760 (AV
.AV
.isCoercedLoadValue() && AV
.AV
.getCoercedLoadValue() == LI
)))
763 SSAUpdate
.AddAvailableValue(BB
, AV
.MaterializeAdjustedValue(LI
, gvn
));
766 // Perform PHI construction.
767 return SSAUpdate
.GetValueInMiddleOfBlock(LI
->getParent());
770 Value
*AvailableValue::MaterializeAdjustedValue(LoadInst
*LI
,
771 Instruction
*InsertPt
,
774 Type
*LoadTy
= LI
->getType();
775 const DataLayout
&DL
= LI
->getModule()->getDataLayout();
776 if (isSimpleValue()) {
777 Res
= getSimpleValue();
778 if (Res
->getType() != LoadTy
) {
779 Res
= getStoreValueForLoad(Res
, Offset
, LoadTy
, InsertPt
, DL
);
781 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
782 << " " << *getSimpleValue() << '\n'
786 } else if (isCoercedLoadValue()) {
787 LoadInst
*Load
= getCoercedLoadValue();
788 if (Load
->getType() == LoadTy
&& Offset
== 0) {
791 Res
= getLoadValueForLoad(Load
, Offset
, LoadTy
, InsertPt
, DL
);
792 // We would like to use gvn.markInstructionForDeletion here, but we can't
793 // because the load is already memoized into the leader map table that GVN
794 // tracks. It is potentially possible to remove the load from the table,
795 // but then there all of the operations based on it would need to be
796 // rehashed. Just leave the dead load around.
797 gvn
.getMemDep().removeInstruction(Load
);
798 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
799 << " " << *getCoercedLoadValue() << '\n'
803 } else if (isMemIntrinValue()) {
804 Res
= getMemInstValueForLoad(getMemIntrinValue(), Offset
, LoadTy
,
806 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
807 << " " << *getMemIntrinValue() << '\n'
811 assert(isUndefValue() && "Should be UndefVal");
812 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
813 return UndefValue::get(LoadTy
);
815 assert(Res
&& "failed to materialize?");
819 static bool isLifetimeStart(const Instruction
*Inst
) {
820 if (const IntrinsicInst
* II
= dyn_cast
<IntrinsicInst
>(Inst
))
821 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
825 /// Try to locate the three instruction involved in a missed
826 /// load-elimination case that is due to an intervening store.
827 static void reportMayClobberedLoad(LoadInst
*LI
, MemDepResult DepInfo
,
829 OptimizationRemarkEmitter
*ORE
) {
832 User
*OtherAccess
= nullptr;
834 OptimizationRemarkMissed
R(DEBUG_TYPE
, "LoadClobbered", LI
);
835 R
<< "load of type " << NV("Type", LI
->getType()) << " not eliminated"
838 for (auto *U
: LI
->getPointerOperand()->users())
839 if (U
!= LI
&& (isa
<LoadInst
>(U
) || isa
<StoreInst
>(U
)) &&
840 DT
->dominates(cast
<Instruction
>(U
), LI
)) {
841 // FIXME: for now give up if there are multiple memory accesses that
842 // dominate the load. We need further analysis to decide which one is
843 // that we're forwarding from.
845 OtherAccess
= nullptr;
851 R
<< " in favor of " << NV("OtherAccess", OtherAccess
);
853 R
<< " because it is clobbered by " << NV("ClobberedBy", DepInfo
.getInst());
858 bool GVN::AnalyzeLoadAvailability(LoadInst
*LI
, MemDepResult DepInfo
,
859 Value
*Address
, AvailableValue
&Res
) {
860 assert((DepInfo
.isDef() || DepInfo
.isClobber()) &&
861 "expected a local dependence");
862 assert(LI
->isUnordered() && "rules below are incorrect for ordered access");
864 const DataLayout
&DL
= LI
->getModule()->getDataLayout();
866 Instruction
*DepInst
= DepInfo
.getInst();
867 if (DepInfo
.isClobber()) {
868 // If the dependence is to a store that writes to a superset of the bits
869 // read by the load, we can extract the bits we need for the load from the
871 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
872 // Can't forward from non-atomic to atomic without violating memory model.
873 if (Address
&& LI
->isAtomic() <= DepSI
->isAtomic()) {
875 analyzeLoadFromClobberingStore(LI
->getType(), Address
, DepSI
, DL
);
877 Res
= AvailableValue::get(DepSI
->getValueOperand(), Offset
);
883 // Check to see if we have something like this:
886 // if we have this, replace the later with an extraction from the former.
887 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInst
)) {
888 // If this is a clobber and L is the first instruction in its block, then
889 // we have the first instruction in the entry block.
890 // Can't forward from non-atomic to atomic without violating memory model.
891 if (DepLI
!= LI
&& Address
&& LI
->isAtomic() <= DepLI
->isAtomic()) {
893 analyzeLoadFromClobberingLoad(LI
->getType(), Address
, DepLI
, DL
);
896 Res
= AvailableValue::getLoad(DepLI
, Offset
);
902 // If the clobbering value is a memset/memcpy/memmove, see if we can
903 // forward a value on from it.
904 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(DepInst
)) {
905 if (Address
&& !LI
->isAtomic()) {
906 int Offset
= analyzeLoadFromClobberingMemInst(LI
->getType(), Address
,
909 Res
= AvailableValue::getMI(DepMI
, Offset
);
914 // Nothing known about this clobber, have to be conservative
916 // fast print dep, using operator<< on instruction is too slow.
917 dbgs() << "GVN: load "; LI
->printAsOperand(dbgs());
918 dbgs() << " is clobbered by " << *DepInst
<< '\n';);
919 if (ORE
->allowExtraAnalysis(DEBUG_TYPE
))
920 reportMayClobberedLoad(LI
, DepInfo
, DT
, ORE
);
924 assert(DepInfo
.isDef() && "follows from above");
926 // Loading the allocation -> undef.
927 if (isa
<AllocaInst
>(DepInst
) || isMallocLikeFn(DepInst
, TLI
) ||
928 // Loading immediately after lifetime begin -> undef.
929 isLifetimeStart(DepInst
)) {
930 Res
= AvailableValue::get(UndefValue::get(LI
->getType()));
934 // Loading from calloc (which zero initializes memory) -> zero
935 if (isCallocLikeFn(DepInst
, TLI
)) {
936 Res
= AvailableValue::get(Constant::getNullValue(LI
->getType()));
940 if (StoreInst
*S
= dyn_cast
<StoreInst
>(DepInst
)) {
941 // Reject loads and stores that are to the same address but are of
942 // different types if we have to. If the stored value is larger or equal to
943 // the loaded value, we can reuse it.
944 if (!canCoerceMustAliasedValueToLoad(S
->getValueOperand(), LI
->getType(),
948 // Can't forward from non-atomic to atomic without violating memory model.
949 if (S
->isAtomic() < LI
->isAtomic())
952 Res
= AvailableValue::get(S
->getValueOperand());
956 if (LoadInst
*LD
= dyn_cast
<LoadInst
>(DepInst
)) {
957 // If the types mismatch and we can't handle it, reject reuse of the load.
958 // If the stored value is larger or equal to the loaded value, we can reuse
960 if (!canCoerceMustAliasedValueToLoad(LD
, LI
->getType(), DL
))
963 // Can't forward from non-atomic to atomic without violating memory model.
964 if (LD
->isAtomic() < LI
->isAtomic())
967 Res
= AvailableValue::getLoad(LD
);
971 // Unknown def - must be conservative
973 // fast print dep, using operator<< on instruction is too slow.
974 dbgs() << "GVN: load "; LI
->printAsOperand(dbgs());
975 dbgs() << " has unknown def " << *DepInst
<< '\n';);
979 void GVN::AnalyzeLoadAvailability(LoadInst
*LI
, LoadDepVect
&Deps
,
980 AvailValInBlkVect
&ValuesPerBlock
,
981 UnavailBlkVect
&UnavailableBlocks
) {
982 // Filter out useless results (non-locals, etc). Keep track of the blocks
983 // where we have a value available in repl, also keep track of whether we see
984 // dependencies that produce an unknown value for the load (such as a call
985 // that could potentially clobber the load).
986 unsigned NumDeps
= Deps
.size();
987 for (unsigned i
= 0, e
= NumDeps
; i
!= e
; ++i
) {
988 BasicBlock
*DepBB
= Deps
[i
].getBB();
989 MemDepResult DepInfo
= Deps
[i
].getResult();
991 if (DeadBlocks
.count(DepBB
)) {
992 // Dead dependent mem-op disguise as a load evaluating the same value
993 // as the load in question.
994 ValuesPerBlock
.push_back(AvailableValueInBlock::getUndef(DepBB
));
998 if (!DepInfo
.isDef() && !DepInfo
.isClobber()) {
999 UnavailableBlocks
.push_back(DepBB
);
1003 // The address being loaded in this non-local block may not be the same as
1004 // the pointer operand of the load if PHI translation occurs. Make sure
1005 // to consider the right address.
1006 Value
*Address
= Deps
[i
].getAddress();
1009 if (AnalyzeLoadAvailability(LI
, DepInfo
, Address
, AV
)) {
1010 // subtlety: because we know this was a non-local dependency, we know
1011 // it's safe to materialize anywhere between the instruction within
1012 // DepInfo and the end of it's block.
1013 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1016 UnavailableBlocks
.push_back(DepBB
);
1020 assert(NumDeps
== ValuesPerBlock
.size() + UnavailableBlocks
.size() &&
1021 "post condition violation");
1024 bool GVN::PerformLoadPRE(LoadInst
*LI
, AvailValInBlkVect
&ValuesPerBlock
,
1025 UnavailBlkVect
&UnavailableBlocks
) {
1026 // Okay, we have *some* definitions of the value. This means that the value
1027 // is available in some of our (transitive) predecessors. Lets think about
1028 // doing PRE of this load. This will involve inserting a new load into the
1029 // predecessor when it's not available. We could do this in general, but
1030 // prefer to not increase code size. As such, we only do this when we know
1031 // that we only have to insert *one* load (which means we're basically moving
1032 // the load, not inserting a new one).
1034 SmallPtrSet
<BasicBlock
*, 4> Blockers(UnavailableBlocks
.begin(),
1035 UnavailableBlocks
.end());
1037 // Let's find the first basic block with more than one predecessor. Walk
1038 // backwards through predecessors if needed.
1039 BasicBlock
*LoadBB
= LI
->getParent();
1040 BasicBlock
*TmpBB
= LoadBB
;
1041 bool IsSafeToSpeculativelyExecute
= isSafeToSpeculativelyExecute(LI
);
1043 // Check that there is no implicit control flow instructions above our load in
1044 // its block. If there is an instruction that doesn't always pass the
1045 // execution to the following instruction, then moving through it may become
1046 // invalid. For example:
1051 // guard(0 <= index && index < LEN);
1054 // It is illegal to move the array access to any point above the guard,
1055 // because if the index is out of bounds we should deoptimize rather than
1056 // access the array.
1057 // Check that there is no guard in this block above our instruction.
1058 if (!IsSafeToSpeculativelyExecute
&& ICF
->isDominatedByICFIFromSameBlock(LI
))
1060 while (TmpBB
->getSinglePredecessor()) {
1061 TmpBB
= TmpBB
->getSinglePredecessor();
1062 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1064 if (Blockers
.count(TmpBB
))
1067 // If any of these blocks has more than one successor (i.e. if the edge we
1068 // just traversed was critical), then there are other paths through this
1069 // block along which the load may not be anticipated. Hoisting the load
1070 // above this block would be adding the load to execution paths along
1071 // which it was not previously executed.
1072 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1075 // Check that there is no implicit control flow in a block above.
1076 if (!IsSafeToSpeculativelyExecute
&& ICF
->hasICF(TmpBB
))
1083 // Check to see how many predecessors have the loaded value fully
1085 MapVector
<BasicBlock
*, Value
*> PredLoads
;
1086 DenseMap
<BasicBlock
*, char> FullyAvailableBlocks
;
1087 for (const AvailableValueInBlock
&AV
: ValuesPerBlock
)
1088 FullyAvailableBlocks
[AV
.BB
] = true;
1089 for (BasicBlock
*UnavailableBB
: UnavailableBlocks
)
1090 FullyAvailableBlocks
[UnavailableBB
] = false;
1092 SmallVector
<BasicBlock
*, 4> CriticalEdgePred
;
1093 for (BasicBlock
*Pred
: predecessors(LoadBB
)) {
1094 // If any predecessor block is an EH pad that does not allow non-PHI
1095 // instructions before the terminator, we can't PRE the load.
1096 if (Pred
->getTerminator()->isEHPad()) {
1098 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1099 << Pred
->getName() << "': " << *LI
<< '\n');
1103 if (IsValueFullyAvailableInBlock(Pred
, FullyAvailableBlocks
, 0)) {
1107 if (Pred
->getTerminator()->getNumSuccessors() != 1) {
1108 if (isa
<IndirectBrInst
>(Pred
->getTerminator())) {
1110 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1111 << Pred
->getName() << "': " << *LI
<< '\n');
1115 // FIXME: Can we support the fallthrough edge?
1116 if (isa
<CallBrInst
>(Pred
->getTerminator())) {
1118 dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
1119 << Pred
->getName() << "': " << *LI
<< '\n');
1123 if (LoadBB
->isEHPad()) {
1125 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1126 << Pred
->getName() << "': " << *LI
<< '\n');
1130 CriticalEdgePred
.push_back(Pred
);
1132 // Only add the predecessors that will not be split for now.
1133 PredLoads
[Pred
] = nullptr;
1137 // Decide whether PRE is profitable for this load.
1138 unsigned NumUnavailablePreds
= PredLoads
.size() + CriticalEdgePred
.size();
1139 assert(NumUnavailablePreds
!= 0 &&
1140 "Fully available value should already be eliminated!");
1142 // If this load is unavailable in multiple predecessors, reject it.
1143 // FIXME: If we could restructure the CFG, we could make a common pred with
1144 // all the preds that don't have an available LI and insert a new load into
1146 if (NumUnavailablePreds
!= 1)
1149 // Split critical edges, and update the unavailable predecessors accordingly.
1150 for (BasicBlock
*OrigPred
: CriticalEdgePred
) {
1151 BasicBlock
*NewPred
= splitCriticalEdges(OrigPred
, LoadBB
);
1152 assert(!PredLoads
.count(OrigPred
) && "Split edges shouldn't be in map!");
1153 PredLoads
[NewPred
] = nullptr;
1154 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred
->getName() << "->"
1155 << LoadBB
->getName() << '\n');
1158 // Check if the load can safely be moved to all the unavailable predecessors.
1159 bool CanDoPRE
= true;
1160 const DataLayout
&DL
= LI
->getModule()->getDataLayout();
1161 SmallVector
<Instruction
*, 8> NewInsts
;
1162 for (auto &PredLoad
: PredLoads
) {
1163 BasicBlock
*UnavailablePred
= PredLoad
.first
;
1165 // Do PHI translation to get its value in the predecessor if necessary. The
1166 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1167 // We do the translation for each edge we skipped by going from LI's block
1168 // to LoadBB, otherwise we might miss pieces needing translation.
1170 // If all preds have a single successor, then we know it is safe to insert
1171 // the load on the pred (?!?), so we can insert code to materialize the
1172 // pointer if it is not available.
1173 Value
*LoadPtr
= LI
->getPointerOperand();
1174 BasicBlock
*Cur
= LI
->getParent();
1175 while (Cur
!= LoadBB
) {
1176 PHITransAddr
Address(LoadPtr
, DL
, AC
);
1177 LoadPtr
= Address
.PHITranslateWithInsertion(
1178 Cur
, Cur
->getSinglePredecessor(), *DT
, NewInsts
);
1183 Cur
= Cur
->getSinglePredecessor();
1187 PHITransAddr
Address(LoadPtr
, DL
, AC
);
1188 LoadPtr
= Address
.PHITranslateWithInsertion(LoadBB
, UnavailablePred
, *DT
,
1191 // If we couldn't find or insert a computation of this phi translated value,
1194 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1195 << *LI
->getPointerOperand() << "\n");
1200 PredLoad
.second
= LoadPtr
;
1204 while (!NewInsts
.empty()) {
1205 // Erase instructions generated by the failed PHI translation before
1206 // trying to number them. PHI translation might insert instructions
1207 // in basic blocks other than the current one, and we delete them
1208 // directly, as markInstructionForDeletion only allows removing from the
1209 // current basic block.
1210 NewInsts
.pop_back_val()->eraseFromParent();
1212 // HINT: Don't revert the edge-splitting as following transformation may
1213 // also need to split these critical edges.
1214 return !CriticalEdgePred
.empty();
1217 // Okay, we can eliminate this load by inserting a reload in the predecessor
1218 // and using PHI construction to get the value in the other predecessors, do
1220 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI
<< '\n');
1221 LLVM_DEBUG(if (!NewInsts
.empty()) dbgs()
1222 << "INSERTED " << NewInsts
.size() << " INSTS: " << *NewInsts
.back()
1225 // Assign value numbers to the new instructions.
1226 for (Instruction
*I
: NewInsts
) {
1227 // Instructions that have been inserted in predecessor(s) to materialize
1228 // the load address do not retain their original debug locations. Doing
1229 // so could lead to confusing (but correct) source attributions.
1230 if (const DebugLoc
&DL
= I
->getDebugLoc())
1231 I
->setDebugLoc(DebugLoc::get(0, 0, DL
.getScope(), DL
.getInlinedAt()));
1233 // FIXME: We really _ought_ to insert these value numbers into their
1234 // parent's availability map. However, in doing so, we risk getting into
1235 // ordering issues. If a block hasn't been processed yet, we would be
1236 // marking a value as AVAIL-IN, which isn't what we intend.
1240 for (const auto &PredLoad
: PredLoads
) {
1241 BasicBlock
*UnavailablePred
= PredLoad
.first
;
1242 Value
*LoadPtr
= PredLoad
.second
;
1245 new LoadInst(LI
->getType(), LoadPtr
, LI
->getName() + ".pre",
1246 LI
->isVolatile(), LI
->getAlignment(), LI
->getOrdering(),
1247 LI
->getSyncScopeID(), UnavailablePred
->getTerminator());
1248 NewLoad
->setDebugLoc(LI
->getDebugLoc());
1250 // Transfer the old load's AA tags to the new load.
1252 LI
->getAAMetadata(Tags
);
1254 NewLoad
->setAAMetadata(Tags
);
1256 if (auto *MD
= LI
->getMetadata(LLVMContext::MD_invariant_load
))
1257 NewLoad
->setMetadata(LLVMContext::MD_invariant_load
, MD
);
1258 if (auto *InvGroupMD
= LI
->getMetadata(LLVMContext::MD_invariant_group
))
1259 NewLoad
->setMetadata(LLVMContext::MD_invariant_group
, InvGroupMD
);
1260 if (auto *RangeMD
= LI
->getMetadata(LLVMContext::MD_range
))
1261 NewLoad
->setMetadata(LLVMContext::MD_range
, RangeMD
);
1263 // We do not propagate the old load's debug location, because the new
1264 // load now lives in a different BB, and we want to avoid a jumpy line
1266 // FIXME: How do we retain source locations without causing poor debugging
1269 // Add the newly created load.
1270 ValuesPerBlock
.push_back(AvailableValueInBlock::get(UnavailablePred
,
1272 MD
->invalidateCachedPointerInfo(LoadPtr
);
1273 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad
<< '\n');
1276 // Perform PHI construction.
1277 Value
*V
= ConstructSSAForLoadSet(LI
, ValuesPerBlock
, *this);
1278 LI
->replaceAllUsesWith(V
);
1279 if (isa
<PHINode
>(V
))
1281 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1282 I
->setDebugLoc(LI
->getDebugLoc());
1283 if (V
->getType()->isPtrOrPtrVectorTy())
1284 MD
->invalidateCachedPointerInfo(V
);
1285 markInstructionForDeletion(LI
);
1287 return OptimizationRemark(DEBUG_TYPE
, "LoadPRE", LI
)
1288 << "load eliminated by PRE";
1294 static void reportLoadElim(LoadInst
*LI
, Value
*AvailableValue
,
1295 OptimizationRemarkEmitter
*ORE
) {
1296 using namespace ore
;
1299 return OptimizationRemark(DEBUG_TYPE
, "LoadElim", LI
)
1300 << "load of type " << NV("Type", LI
->getType()) << " eliminated"
1301 << setExtraArgs() << " in favor of "
1302 << NV("InfavorOfValue", AvailableValue
);
1306 /// Attempt to eliminate a load whose dependencies are
1307 /// non-local by performing PHI construction.
1308 bool GVN::processNonLocalLoad(LoadInst
*LI
) {
1309 // non-local speculations are not allowed under asan.
1310 if (LI
->getParent()->getParent()->hasFnAttribute(
1311 Attribute::SanitizeAddress
) ||
1312 LI
->getParent()->getParent()->hasFnAttribute(
1313 Attribute::SanitizeHWAddress
))
1316 // Step 1: Find the non-local dependencies of the load.
1318 MD
->getNonLocalPointerDependency(LI
, Deps
);
1320 // If we had to process more than one hundred blocks to find the
1321 // dependencies, this load isn't worth worrying about. Optimizing
1322 // it will be too expensive.
1323 unsigned NumDeps
= Deps
.size();
1324 if (NumDeps
> MaxNumDeps
)
1327 // If we had a phi translation failure, we'll have a single entry which is a
1328 // clobber in the current block. Reject this early.
1330 !Deps
[0].getResult().isDef() && !Deps
[0].getResult().isClobber()) {
1331 LLVM_DEBUG(dbgs() << "GVN: non-local load "; LI
->printAsOperand(dbgs());
1332 dbgs() << " has unknown dependencies\n";);
1336 // If this load follows a GEP, see if we can PRE the indices before analyzing.
1337 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(LI
->getOperand(0))) {
1338 for (GetElementPtrInst::op_iterator OI
= GEP
->idx_begin(),
1339 OE
= GEP
->idx_end();
1341 if (Instruction
*I
= dyn_cast
<Instruction
>(OI
->get()))
1342 performScalarPRE(I
);
1345 // Step 2: Analyze the availability of the load
1346 AvailValInBlkVect ValuesPerBlock
;
1347 UnavailBlkVect UnavailableBlocks
;
1348 AnalyzeLoadAvailability(LI
, Deps
, ValuesPerBlock
, UnavailableBlocks
);
1350 // If we have no predecessors that produce a known value for this load, exit
1352 if (ValuesPerBlock
.empty())
1355 // Step 3: Eliminate fully redundancy.
1357 // If all of the instructions we depend on produce a known value for this
1358 // load, then it is fully redundant and we can use PHI insertion to compute
1359 // its value. Insert PHIs and remove the fully redundant value now.
1360 if (UnavailableBlocks
.empty()) {
1361 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI
<< '\n');
1363 // Perform PHI construction.
1364 Value
*V
= ConstructSSAForLoadSet(LI
, ValuesPerBlock
, *this);
1365 LI
->replaceAllUsesWith(V
);
1367 if (isa
<PHINode
>(V
))
1369 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1370 // If instruction I has debug info, then we should not update it.
1371 // Also, if I has a null DebugLoc, then it is still potentially incorrect
1372 // to propagate LI's DebugLoc because LI may not post-dominate I.
1373 if (LI
->getDebugLoc() && LI
->getParent() == I
->getParent())
1374 I
->setDebugLoc(LI
->getDebugLoc());
1375 if (V
->getType()->isPtrOrPtrVectorTy())
1376 MD
->invalidateCachedPointerInfo(V
);
1377 markInstructionForDeletion(LI
);
1379 reportLoadElim(LI
, V
, ORE
);
1383 // Step 4: Eliminate partial redundancy.
1384 if (!EnablePRE
|| !EnableLoadPRE
)
1387 return PerformLoadPRE(LI
, ValuesPerBlock
, UnavailableBlocks
);
1390 static bool hasUsersIn(Value
*V
, BasicBlock
*BB
) {
1391 for (User
*U
: V
->users())
1392 if (isa
<Instruction
>(U
) &&
1393 cast
<Instruction
>(U
)->getParent() == BB
)
1398 bool GVN::processAssumeIntrinsic(IntrinsicInst
*IntrinsicI
) {
1399 assert(IntrinsicI
->getIntrinsicID() == Intrinsic::assume
&&
1400 "This function can only be called with llvm.assume intrinsic");
1401 Value
*V
= IntrinsicI
->getArgOperand(0);
1403 if (ConstantInt
*Cond
= dyn_cast
<ConstantInt
>(V
)) {
1404 if (Cond
->isZero()) {
1405 Type
*Int8Ty
= Type::getInt8Ty(V
->getContext());
1406 // Insert a new store to null instruction before the load to indicate that
1407 // this code is not reachable. FIXME: We could insert unreachable
1408 // instruction directly because we can modify the CFG.
1409 new StoreInst(UndefValue::get(Int8Ty
),
1410 Constant::getNullValue(Int8Ty
->getPointerTo()),
1413 markInstructionForDeletion(IntrinsicI
);
1415 } else if (isa
<Constant
>(V
)) {
1416 // If it's not false, and constant, it must evaluate to true. This means our
1417 // assume is assume(true), and thus, pointless, and we don't want to do
1418 // anything more here.
1422 Constant
*True
= ConstantInt::getTrue(V
->getContext());
1423 bool Changed
= false;
1425 for (BasicBlock
*Successor
: successors(IntrinsicI
->getParent())) {
1426 BasicBlockEdge
Edge(IntrinsicI
->getParent(), Successor
);
1428 // This property is only true in dominated successors, propagateEquality
1429 // will check dominance for us.
1430 Changed
|= propagateEquality(V
, True
, Edge
, false);
1433 // We can replace assume value with true, which covers cases like this:
1434 // call void @llvm.assume(i1 %cmp)
1435 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
1436 ReplaceOperandsWithMap
[V
] = True
;
1438 // If we find an equality fact, canonicalize all dominated uses in this block
1439 // to one of the two values. We heuristically choice the "oldest" of the
1440 // two where age is determined by value number. (Note that propagateEquality
1441 // above handles the cross block case.)
1443 // Key case to cover are:
1445 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
1446 // call void @llvm.assume(i1 %cmp)
1447 // ret float %0 ; will change it to ret float 3.000000e+00
1449 // %load = load float, float* %addr
1450 // %cmp = fcmp oeq float %load, %0
1451 // call void @llvm.assume(i1 %cmp)
1452 // ret float %load ; will change it to ret float %0
1453 if (auto *CmpI
= dyn_cast
<CmpInst
>(V
)) {
1454 if (CmpI
->getPredicate() == CmpInst::Predicate::ICMP_EQ
||
1455 CmpI
->getPredicate() == CmpInst::Predicate::FCMP_OEQ
||
1456 (CmpI
->getPredicate() == CmpInst::Predicate::FCMP_UEQ
&&
1457 CmpI
->getFastMathFlags().noNaNs())) {
1458 Value
*CmpLHS
= CmpI
->getOperand(0);
1459 Value
*CmpRHS
= CmpI
->getOperand(1);
1460 // Heuristically pick the better replacement -- the choice of heuristic
1461 // isn't terribly important here, but the fact we canonicalize on some
1462 // replacement is for exposing other simplifications.
1463 // TODO: pull this out as a helper function and reuse w/existing
1464 // (slightly different) logic.
1465 if (isa
<Constant
>(CmpLHS
) && !isa
<Constant
>(CmpRHS
))
1466 std::swap(CmpLHS
, CmpRHS
);
1467 if (!isa
<Instruction
>(CmpLHS
) && isa
<Instruction
>(CmpRHS
))
1468 std::swap(CmpLHS
, CmpRHS
);
1469 if ((isa
<Argument
>(CmpLHS
) && isa
<Argument
>(CmpRHS
)) ||
1470 (isa
<Instruction
>(CmpLHS
) && isa
<Instruction
>(CmpRHS
))) {
1471 // Move the 'oldest' value to the right-hand side, using the value
1472 // number as a proxy for age.
1473 uint32_t LVN
= VN
.lookupOrAdd(CmpLHS
);
1474 uint32_t RVN
= VN
.lookupOrAdd(CmpRHS
);
1476 std::swap(CmpLHS
, CmpRHS
);
1479 // Handle degenerate case where we either haven't pruned a dead path or a
1480 // removed a trivial assume yet.
1481 if (isa
<Constant
>(CmpLHS
) && isa
<Constant
>(CmpRHS
))
1484 // +0.0 and -0.0 compare equal, but do not imply equivalence. Unless we
1485 // can prove equivalence, bail.
1486 if (CmpRHS
->getType()->isFloatTy() &&
1487 (!isa
<ConstantFP
>(CmpRHS
) || cast
<ConstantFP
>(CmpRHS
)->isZero()))
1490 LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
1491 << *CmpLHS
<< " with "
1492 << *CmpRHS
<< " in block "
1493 << IntrinsicI
->getParent()->getName() << "\n");
1496 // Setup the replacement map - this handles uses within the same block
1497 if (hasUsersIn(CmpLHS
, IntrinsicI
->getParent()))
1498 ReplaceOperandsWithMap
[CmpLHS
] = CmpRHS
;
1500 // NOTE: The non-block local cases are handled by the call to
1501 // propagateEquality above; this block is just about handling the block
1502 // local cases. TODO: There's a bunch of logic in propagateEqualiy which
1503 // isn't duplicated for the block local case, can we share it somehow?
1509 static void patchAndReplaceAllUsesWith(Instruction
*I
, Value
*Repl
) {
1510 patchReplacementInstruction(I
, Repl
);
1511 I
->replaceAllUsesWith(Repl
);
1514 /// Attempt to eliminate a load, first by eliminating it
1515 /// locally, and then attempting non-local elimination if that fails.
1516 bool GVN::processLoad(LoadInst
*L
) {
1520 // This code hasn't been audited for ordered or volatile memory access
1521 if (!L
->isUnordered())
1524 if (L
->use_empty()) {
1525 markInstructionForDeletion(L
);
1529 // ... to a pointer that has been loaded from before...
1530 MemDepResult Dep
= MD
->getDependency(L
);
1532 // If it is defined in another block, try harder.
1533 if (Dep
.isNonLocal())
1534 return processNonLocalLoad(L
);
1536 // Only handle the local case below
1537 if (!Dep
.isDef() && !Dep
.isClobber()) {
1538 // This might be a NonFuncLocal or an Unknown
1540 // fast print dep, using operator<< on instruction is too slow.
1541 dbgs() << "GVN: load "; L
->printAsOperand(dbgs());
1542 dbgs() << " has unknown dependence\n";);
1547 if (AnalyzeLoadAvailability(L
, Dep
, L
->getPointerOperand(), AV
)) {
1548 Value
*AvailableValue
= AV
.MaterializeAdjustedValue(L
, L
, *this);
1550 // Replace the load!
1551 patchAndReplaceAllUsesWith(L
, AvailableValue
);
1552 markInstructionForDeletion(L
);
1554 reportLoadElim(L
, AvailableValue
, ORE
);
1555 // Tell MDA to rexamine the reused pointer since we might have more
1556 // information after forwarding it.
1557 if (MD
&& AvailableValue
->getType()->isPtrOrPtrVectorTy())
1558 MD
->invalidateCachedPointerInfo(AvailableValue
);
1565 /// Return a pair the first field showing the value number of \p Exp and the
1566 /// second field showing whether it is a value number newly created.
1567 std::pair
<uint32_t, bool>
1568 GVN::ValueTable::assignExpNewValueNum(Expression
&Exp
) {
1569 uint32_t &e
= expressionNumbering
[Exp
];
1570 bool CreateNewValNum
= !e
;
1571 if (CreateNewValNum
) {
1572 Expressions
.push_back(Exp
);
1573 if (ExprIdx
.size() < nextValueNumber
+ 1)
1574 ExprIdx
.resize(nextValueNumber
* 2);
1575 e
= nextValueNumber
;
1576 ExprIdx
[nextValueNumber
++] = nextExprNumber
++;
1578 return {e
, CreateNewValNum
};
1581 /// Return whether all the values related with the same \p num are
1582 /// defined in \p BB.
1583 bool GVN::ValueTable::areAllValsInBB(uint32_t Num
, const BasicBlock
*BB
,
1585 LeaderTableEntry
*Vals
= &Gvn
.LeaderTable
[Num
];
1586 while (Vals
&& Vals
->BB
== BB
)
1591 /// Wrap phiTranslateImpl to provide caching functionality.
1592 uint32_t GVN::ValueTable::phiTranslate(const BasicBlock
*Pred
,
1593 const BasicBlock
*PhiBlock
, uint32_t Num
,
1595 auto FindRes
= PhiTranslateTable
.find({Num
, Pred
});
1596 if (FindRes
!= PhiTranslateTable
.end())
1597 return FindRes
->second
;
1598 uint32_t NewNum
= phiTranslateImpl(Pred
, PhiBlock
, Num
, Gvn
);
1599 PhiTranslateTable
.insert({{Num
, Pred
}, NewNum
});
1603 // Return true if the value number \p Num and NewNum have equal value.
1604 // Return false if the result is unknown.
1605 bool GVN::ValueTable::areCallValsEqual(uint32_t Num
, uint32_t NewNum
,
1606 const BasicBlock
*Pred
,
1607 const BasicBlock
*PhiBlock
, GVN
&Gvn
) {
1608 CallInst
*Call
= nullptr;
1609 LeaderTableEntry
*Vals
= &Gvn
.LeaderTable
[Num
];
1611 Call
= dyn_cast
<CallInst
>(Vals
->Val
);
1612 if (Call
&& Call
->getParent() == PhiBlock
)
1617 if (AA
->doesNotAccessMemory(Call
))
1620 if (!MD
|| !AA
->onlyReadsMemory(Call
))
1623 MemDepResult local_dep
= MD
->getDependency(Call
);
1624 if (!local_dep
.isNonLocal())
1627 const MemoryDependenceResults::NonLocalDepInfo
&deps
=
1628 MD
->getNonLocalCallDependency(Call
);
1630 // Check to see if the Call has no function local clobber.
1631 for (unsigned i
= 0; i
< deps
.size(); i
++) {
1632 if (deps
[i
].getResult().isNonFuncLocal())
1638 /// Translate value number \p Num using phis, so that it has the values of
1640 uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock
*Pred
,
1641 const BasicBlock
*PhiBlock
,
1642 uint32_t Num
, GVN
&Gvn
) {
1643 if (PHINode
*PN
= NumberingPhi
[Num
]) {
1644 for (unsigned i
= 0; i
!= PN
->getNumIncomingValues(); ++i
) {
1645 if (PN
->getParent() == PhiBlock
&& PN
->getIncomingBlock(i
) == Pred
)
1646 if (uint32_t TransVal
= lookup(PN
->getIncomingValue(i
), false))
1652 // If there is any value related with Num is defined in a BB other than
1653 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
1654 // a backedge. We can do an early exit in that case to save compile time.
1655 if (!areAllValsInBB(Num
, PhiBlock
, Gvn
))
1658 if (Num
>= ExprIdx
.size() || ExprIdx
[Num
] == 0)
1660 Expression Exp
= Expressions
[ExprIdx
[Num
]];
1662 for (unsigned i
= 0; i
< Exp
.varargs
.size(); i
++) {
1663 // For InsertValue and ExtractValue, some varargs are index numbers
1664 // instead of value numbers. Those index numbers should not be
1666 if ((i
> 1 && Exp
.opcode
== Instruction::InsertValue
) ||
1667 (i
> 0 && Exp
.opcode
== Instruction::ExtractValue
))
1669 Exp
.varargs
[i
] = phiTranslate(Pred
, PhiBlock
, Exp
.varargs
[i
], Gvn
);
1672 if (Exp
.commutative
) {
1673 assert(Exp
.varargs
.size() == 2 && "Unsupported commutative expression!");
1674 if (Exp
.varargs
[0] > Exp
.varargs
[1]) {
1675 std::swap(Exp
.varargs
[0], Exp
.varargs
[1]);
1676 uint32_t Opcode
= Exp
.opcode
>> 8;
1677 if (Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
)
1678 Exp
.opcode
= (Opcode
<< 8) |
1679 CmpInst::getSwappedPredicate(
1680 static_cast<CmpInst::Predicate
>(Exp
.opcode
& 255));
1684 if (uint32_t NewNum
= expressionNumbering
[Exp
]) {
1685 if (Exp
.opcode
== Instruction::Call
&& NewNum
!= Num
)
1686 return areCallValsEqual(Num
, NewNum
, Pred
, PhiBlock
, Gvn
) ? NewNum
: Num
;
1692 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
1694 void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num
,
1695 const BasicBlock
&CurrBlock
) {
1696 for (const BasicBlock
*Pred
: predecessors(&CurrBlock
)) {
1697 auto FindRes
= PhiTranslateTable
.find({Num
, Pred
});
1698 if (FindRes
!= PhiTranslateTable
.end())
1699 PhiTranslateTable
.erase(FindRes
);
1703 // In order to find a leader for a given value number at a
1704 // specific basic block, we first obtain the list of all Values for that number,
1705 // and then scan the list to find one whose block dominates the block in
1706 // question. This is fast because dominator tree queries consist of only
1707 // a few comparisons of DFS numbers.
1708 Value
*GVN::findLeader(const BasicBlock
*BB
, uint32_t num
) {
1709 LeaderTableEntry Vals
= LeaderTable
[num
];
1710 if (!Vals
.Val
) return nullptr;
1712 Value
*Val
= nullptr;
1713 if (DT
->dominates(Vals
.BB
, BB
)) {
1715 if (isa
<Constant
>(Val
)) return Val
;
1718 LeaderTableEntry
* Next
= Vals
.Next
;
1720 if (DT
->dominates(Next
->BB
, BB
)) {
1721 if (isa
<Constant
>(Next
->Val
)) return Next
->Val
;
1722 if (!Val
) Val
= Next
->Val
;
1731 /// There is an edge from 'Src' to 'Dst'. Return
1732 /// true if every path from the entry block to 'Dst' passes via this edge. In
1733 /// particular 'Dst' must not be reachable via another edge from 'Src'.
1734 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge
&E
,
1735 DominatorTree
*DT
) {
1736 // While in theory it is interesting to consider the case in which Dst has
1737 // more than one predecessor, because Dst might be part of a loop which is
1738 // only reachable from Src, in practice it is pointless since at the time
1739 // GVN runs all such loops have preheaders, which means that Dst will have
1740 // been changed to have only one predecessor, namely Src.
1741 const BasicBlock
*Pred
= E
.getEnd()->getSinglePredecessor();
1742 assert((!Pred
|| Pred
== E
.getStart()) &&
1743 "No edge between these basic blocks!");
1744 return Pred
!= nullptr;
1747 void GVN::assignBlockRPONumber(Function
&F
) {
1748 BlockRPONumber
.clear();
1749 uint32_t NextBlockNumber
= 1;
1750 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
1751 for (BasicBlock
*BB
: RPOT
)
1752 BlockRPONumber
[BB
] = NextBlockNumber
++;
1753 InvalidBlockRPONumbers
= false;
1756 bool GVN::replaceOperandsForInBlockEquality(Instruction
*Instr
) const {
1757 bool Changed
= false;
1758 for (unsigned OpNum
= 0; OpNum
< Instr
->getNumOperands(); ++OpNum
) {
1759 Value
*Operand
= Instr
->getOperand(OpNum
);
1760 auto it
= ReplaceOperandsWithMap
.find(Operand
);
1761 if (it
!= ReplaceOperandsWithMap
.end()) {
1762 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand
<< " with "
1763 << *it
->second
<< " in instruction " << *Instr
<< '\n');
1764 Instr
->setOperand(OpNum
, it
->second
);
1771 /// The given values are known to be equal in every block
1772 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
1773 /// 'RHS' everywhere in the scope. Returns whether a change was made.
1774 /// If DominatesByEdge is false, then it means that we will propagate the RHS
1775 /// value starting from the end of Root.Start.
1776 bool GVN::propagateEquality(Value
*LHS
, Value
*RHS
, const BasicBlockEdge
&Root
,
1777 bool DominatesByEdge
) {
1778 SmallVector
<std::pair
<Value
*, Value
*>, 4> Worklist
;
1779 Worklist
.push_back(std::make_pair(LHS
, RHS
));
1780 bool Changed
= false;
1781 // For speed, compute a conservative fast approximation to
1782 // DT->dominates(Root, Root.getEnd());
1783 const bool RootDominatesEnd
= isOnlyReachableViaThisEdge(Root
, DT
);
1785 while (!Worklist
.empty()) {
1786 std::pair
<Value
*, Value
*> Item
= Worklist
.pop_back_val();
1787 LHS
= Item
.first
; RHS
= Item
.second
;
1791 assert(LHS
->getType() == RHS
->getType() && "Equality but unequal types!");
1793 // Don't try to propagate equalities between constants.
1794 if (isa
<Constant
>(LHS
) && isa
<Constant
>(RHS
))
1797 // Prefer a constant on the right-hand side, or an Argument if no constants.
1798 if (isa
<Constant
>(LHS
) || (isa
<Argument
>(LHS
) && !isa
<Constant
>(RHS
)))
1799 std::swap(LHS
, RHS
);
1800 assert((isa
<Argument
>(LHS
) || isa
<Instruction
>(LHS
)) && "Unexpected value!");
1802 // If there is no obvious reason to prefer the left-hand side over the
1803 // right-hand side, ensure the longest lived term is on the right-hand side,
1804 // so the shortest lived term will be replaced by the longest lived.
1805 // This tends to expose more simplifications.
1806 uint32_t LVN
= VN
.lookupOrAdd(LHS
);
1807 if ((isa
<Argument
>(LHS
) && isa
<Argument
>(RHS
)) ||
1808 (isa
<Instruction
>(LHS
) && isa
<Instruction
>(RHS
))) {
1809 // Move the 'oldest' value to the right-hand side, using the value number
1810 // as a proxy for age.
1811 uint32_t RVN
= VN
.lookupOrAdd(RHS
);
1813 std::swap(LHS
, RHS
);
1818 // If value numbering later sees that an instruction in the scope is equal
1819 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
1820 // the invariant that instructions only occur in the leader table for their
1821 // own value number (this is used by removeFromLeaderTable), do not do this
1822 // if RHS is an instruction (if an instruction in the scope is morphed into
1823 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
1824 // using the leader table is about compiling faster, not optimizing better).
1825 // The leader table only tracks basic blocks, not edges. Only add to if we
1826 // have the simple case where the edge dominates the end.
1827 if (RootDominatesEnd
&& !isa
<Instruction
>(RHS
))
1828 addToLeaderTable(LVN
, RHS
, Root
.getEnd());
1830 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
1831 // LHS always has at least one use that is not dominated by Root, this will
1832 // never do anything if LHS has only one use.
1833 if (!LHS
->hasOneUse()) {
1834 unsigned NumReplacements
=
1836 ? replaceDominatedUsesWith(LHS
, RHS
, *DT
, Root
)
1837 : replaceDominatedUsesWith(LHS
, RHS
, *DT
, Root
.getStart());
1839 Changed
|= NumReplacements
> 0;
1840 NumGVNEqProp
+= NumReplacements
;
1841 // Cached information for anything that uses LHS will be invalid.
1843 MD
->invalidateCachedPointerInfo(LHS
);
1846 // Now try to deduce additional equalities from this one. For example, if
1847 // the known equality was "(A != B)" == "false" then it follows that A and B
1848 // are equal in the scope. Only boolean equalities with an explicit true or
1849 // false RHS are currently supported.
1850 if (!RHS
->getType()->isIntegerTy(1))
1851 // Not a boolean equality - bail out.
1853 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(RHS
);
1855 // RHS neither 'true' nor 'false' - bail out.
1857 // Whether RHS equals 'true'. Otherwise it equals 'false'.
1858 bool isKnownTrue
= CI
->isMinusOne();
1859 bool isKnownFalse
= !isKnownTrue
;
1861 // If "A && B" is known true then both A and B are known true. If "A || B"
1862 // is known false then both A and B are known false.
1864 if ((isKnownTrue
&& match(LHS
, m_And(m_Value(A
), m_Value(B
)))) ||
1865 (isKnownFalse
&& match(LHS
, m_Or(m_Value(A
), m_Value(B
))))) {
1866 Worklist
.push_back(std::make_pair(A
, RHS
));
1867 Worklist
.push_back(std::make_pair(B
, RHS
));
1871 // If we are propagating an equality like "(A == B)" == "true" then also
1872 // propagate the equality A == B. When propagating a comparison such as
1873 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
1874 if (CmpInst
*Cmp
= dyn_cast
<CmpInst
>(LHS
)) {
1875 Value
*Op0
= Cmp
->getOperand(0), *Op1
= Cmp
->getOperand(1);
1877 // If "A == B" is known true, or "A != B" is known false, then replace
1878 // A with B everywhere in the scope.
1879 if ((isKnownTrue
&& Cmp
->getPredicate() == CmpInst::ICMP_EQ
) ||
1880 (isKnownFalse
&& Cmp
->getPredicate() == CmpInst::ICMP_NE
))
1881 Worklist
.push_back(std::make_pair(Op0
, Op1
));
1883 // Handle the floating point versions of equality comparisons too.
1884 if ((isKnownTrue
&& Cmp
->getPredicate() == CmpInst::FCMP_OEQ
) ||
1885 (isKnownFalse
&& Cmp
->getPredicate() == CmpInst::FCMP_UNE
)) {
1887 // Floating point -0.0 and 0.0 compare equal, so we can only
1888 // propagate values if we know that we have a constant and that
1889 // its value is non-zero.
1891 // FIXME: We should do this optimization if 'no signed zeros' is
1892 // applicable via an instruction-level fast-math-flag or some other
1893 // indicator that relaxed FP semantics are being used.
1895 if (isa
<ConstantFP
>(Op1
) && !cast
<ConstantFP
>(Op1
)->isZero())
1896 Worklist
.push_back(std::make_pair(Op0
, Op1
));
1899 // If "A >= B" is known true, replace "A < B" with false everywhere.
1900 CmpInst::Predicate NotPred
= Cmp
->getInversePredicate();
1901 Constant
*NotVal
= ConstantInt::get(Cmp
->getType(), isKnownFalse
);
1902 // Since we don't have the instruction "A < B" immediately to hand, work
1903 // out the value number that it would have and use that to find an
1904 // appropriate instruction (if any).
1905 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
1906 uint32_t Num
= VN
.lookupOrAddCmp(Cmp
->getOpcode(), NotPred
, Op0
, Op1
);
1907 // If the number we were assigned was brand new then there is no point in
1908 // looking for an instruction realizing it: there cannot be one!
1909 if (Num
< NextNum
) {
1910 Value
*NotCmp
= findLeader(Root
.getEnd(), Num
);
1911 if (NotCmp
&& isa
<Instruction
>(NotCmp
)) {
1912 unsigned NumReplacements
=
1914 ? replaceDominatedUsesWith(NotCmp
, NotVal
, *DT
, Root
)
1915 : replaceDominatedUsesWith(NotCmp
, NotVal
, *DT
,
1917 Changed
|= NumReplacements
> 0;
1918 NumGVNEqProp
+= NumReplacements
;
1919 // Cached information for anything that uses NotCmp will be invalid.
1921 MD
->invalidateCachedPointerInfo(NotCmp
);
1924 // Ensure that any instruction in scope that gets the "A < B" value number
1925 // is replaced with false.
1926 // The leader table only tracks basic blocks, not edges. Only add to if we
1927 // have the simple case where the edge dominates the end.
1928 if (RootDominatesEnd
)
1929 addToLeaderTable(Num
, NotVal
, Root
.getEnd());
1938 /// When calculating availability, handle an instruction
1939 /// by inserting it into the appropriate sets
1940 bool GVN::processInstruction(Instruction
*I
) {
1941 // Ignore dbg info intrinsics.
1942 if (isa
<DbgInfoIntrinsic
>(I
))
1945 // If the instruction can be easily simplified then do so now in preference
1946 // to value numbering it. Value numbering often exposes redundancies, for
1947 // example if it determines that %y is equal to %x then the instruction
1948 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1949 const DataLayout
&DL
= I
->getModule()->getDataLayout();
1950 if (Value
*V
= SimplifyInstruction(I
, {DL
, TLI
, DT
, AC
})) {
1951 bool Changed
= false;
1952 if (!I
->use_empty()) {
1953 I
->replaceAllUsesWith(V
);
1956 if (isInstructionTriviallyDead(I
, TLI
)) {
1957 markInstructionForDeletion(I
);
1961 if (MD
&& V
->getType()->isPtrOrPtrVectorTy())
1962 MD
->invalidateCachedPointerInfo(V
);
1968 if (IntrinsicInst
*IntrinsicI
= dyn_cast
<IntrinsicInst
>(I
))
1969 if (IntrinsicI
->getIntrinsicID() == Intrinsic::assume
)
1970 return processAssumeIntrinsic(IntrinsicI
);
1972 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
1973 if (processLoad(LI
))
1976 unsigned Num
= VN
.lookupOrAdd(LI
);
1977 addToLeaderTable(Num
, LI
, LI
->getParent());
1981 // For conditional branches, we can perform simple conditional propagation on
1982 // the condition value itself.
1983 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(I
)) {
1984 if (!BI
->isConditional())
1987 if (isa
<Constant
>(BI
->getCondition()))
1988 return processFoldableCondBr(BI
);
1990 Value
*BranchCond
= BI
->getCondition();
1991 BasicBlock
*TrueSucc
= BI
->getSuccessor(0);
1992 BasicBlock
*FalseSucc
= BI
->getSuccessor(1);
1993 // Avoid multiple edges early.
1994 if (TrueSucc
== FalseSucc
)
1997 BasicBlock
*Parent
= BI
->getParent();
1998 bool Changed
= false;
2000 Value
*TrueVal
= ConstantInt::getTrue(TrueSucc
->getContext());
2001 BasicBlockEdge
TrueE(Parent
, TrueSucc
);
2002 Changed
|= propagateEquality(BranchCond
, TrueVal
, TrueE
, true);
2004 Value
*FalseVal
= ConstantInt::getFalse(FalseSucc
->getContext());
2005 BasicBlockEdge
FalseE(Parent
, FalseSucc
);
2006 Changed
|= propagateEquality(BranchCond
, FalseVal
, FalseE
, true);
2011 // For switches, propagate the case values into the case destinations.
2012 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(I
)) {
2013 Value
*SwitchCond
= SI
->getCondition();
2014 BasicBlock
*Parent
= SI
->getParent();
2015 bool Changed
= false;
2017 // Remember how many outgoing edges there are to every successor.
2018 SmallDenseMap
<BasicBlock
*, unsigned, 16> SwitchEdges
;
2019 for (unsigned i
= 0, n
= SI
->getNumSuccessors(); i
!= n
; ++i
)
2020 ++SwitchEdges
[SI
->getSuccessor(i
)];
2022 for (SwitchInst::CaseIt i
= SI
->case_begin(), e
= SI
->case_end();
2024 BasicBlock
*Dst
= i
->getCaseSuccessor();
2025 // If there is only a single edge, propagate the case value into it.
2026 if (SwitchEdges
.lookup(Dst
) == 1) {
2027 BasicBlockEdge
E(Parent
, Dst
);
2028 Changed
|= propagateEquality(SwitchCond
, i
->getCaseValue(), E
, true);
2034 // Instructions with void type don't return a value, so there's
2035 // no point in trying to find redundancies in them.
2036 if (I
->getType()->isVoidTy())
2039 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
2040 unsigned Num
= VN
.lookupOrAdd(I
);
2042 // Allocations are always uniquely numbered, so we can save time and memory
2043 // by fast failing them.
2044 if (isa
<AllocaInst
>(I
) || I
->isTerminator() || isa
<PHINode
>(I
)) {
2045 addToLeaderTable(Num
, I
, I
->getParent());
2049 // If the number we were assigned was a brand new VN, then we don't
2050 // need to do a lookup to see if the number already exists
2051 // somewhere in the domtree: it can't!
2052 if (Num
>= NextNum
) {
2053 addToLeaderTable(Num
, I
, I
->getParent());
2057 // Perform fast-path value-number based elimination of values inherited from
2059 Value
*Repl
= findLeader(I
->getParent(), Num
);
2061 // Failure, just remember this instance for future use.
2062 addToLeaderTable(Num
, I
, I
->getParent());
2064 } else if (Repl
== I
) {
2065 // If I was the result of a shortcut PRE, it might already be in the table
2066 // and the best replacement for itself. Nothing to do.
2071 patchAndReplaceAllUsesWith(I
, Repl
);
2072 if (MD
&& Repl
->getType()->isPtrOrPtrVectorTy())
2073 MD
->invalidateCachedPointerInfo(Repl
);
2074 markInstructionForDeletion(I
);
2078 /// runOnFunction - This is the main transformation entry point for a function.
2079 bool GVN::runImpl(Function
&F
, AssumptionCache
&RunAC
, DominatorTree
&RunDT
,
2080 const TargetLibraryInfo
&RunTLI
, AAResults
&RunAA
,
2081 MemoryDependenceResults
*RunMD
, LoopInfo
*LI
,
2082 OptimizationRemarkEmitter
*RunORE
) {
2087 VN
.setAliasAnalysis(&RunAA
);
2089 ImplicitControlFlowTracking
ImplicitCFT(DT
);
2094 InvalidBlockRPONumbers
= true;
2096 bool Changed
= false;
2097 bool ShouldContinue
= true;
2099 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Eager
);
2100 // Merge unconditional branches, allowing PRE to catch more
2101 // optimization opportunities.
2102 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
2103 BasicBlock
*BB
= &*FI
++;
2105 bool removedBlock
= MergeBlockIntoPredecessor(BB
, &DTU
, LI
, nullptr, MD
);
2109 Changed
|= removedBlock
;
2112 unsigned Iteration
= 0;
2113 while (ShouldContinue
) {
2114 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration
<< "\n");
2115 ShouldContinue
= iterateOnFunction(F
);
2116 Changed
|= ShouldContinue
;
2121 // Fabricate val-num for dead-code in order to suppress assertion in
2123 assignValNumForDeadCode();
2124 bool PREChanged
= true;
2125 while (PREChanged
) {
2126 PREChanged
= performPRE(F
);
2127 Changed
|= PREChanged
;
2131 // FIXME: Should perform GVN again after PRE does something. PRE can move
2132 // computations into blocks where they become fully redundant. Note that
2133 // we can't do this until PRE's critical edge splitting updates memdep.
2134 // Actually, when this happens, we should just fully integrate PRE into GVN.
2136 cleanupGlobalSets();
2137 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2144 bool GVN::processBlock(BasicBlock
*BB
) {
2145 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2146 // (and incrementing BI before processing an instruction).
2147 assert(InstrsToErase
.empty() &&
2148 "We expect InstrsToErase to be empty across iterations");
2149 if (DeadBlocks
.count(BB
))
2152 // Clearing map before every BB because it can be used only for single BB.
2153 ReplaceOperandsWithMap
.clear();
2154 bool ChangedFunction
= false;
2156 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
2158 if (!ReplaceOperandsWithMap
.empty())
2159 ChangedFunction
|= replaceOperandsForInBlockEquality(&*BI
);
2160 ChangedFunction
|= processInstruction(&*BI
);
2162 if (InstrsToErase
.empty()) {
2167 // If we need some instructions deleted, do it now.
2168 NumGVNInstr
+= InstrsToErase
.size();
2170 // Avoid iterator invalidation.
2171 bool AtStart
= BI
== BB
->begin();
2175 for (auto *I
: InstrsToErase
) {
2176 assert(I
->getParent() == BB
&& "Removing instruction from wrong block?");
2177 LLVM_DEBUG(dbgs() << "GVN removed: " << *I
<< '\n');
2178 salvageDebugInfo(*I
);
2179 if (MD
) MD
->removeInstruction(I
);
2180 LLVM_DEBUG(verifyRemoved(I
));
2181 ICF
->removeInstruction(I
);
2182 I
->eraseFromParent();
2184 InstrsToErase
.clear();
2192 return ChangedFunction
;
2195 // Instantiate an expression in a predecessor that lacked it.
2196 bool GVN::performScalarPREInsertion(Instruction
*Instr
, BasicBlock
*Pred
,
2197 BasicBlock
*Curr
, unsigned int ValNo
) {
2198 // Because we are going top-down through the block, all value numbers
2199 // will be available in the predecessor by the time we need them. Any
2200 // that weren't originally present will have been instantiated earlier
2202 bool success
= true;
2203 for (unsigned i
= 0, e
= Instr
->getNumOperands(); i
!= e
; ++i
) {
2204 Value
*Op
= Instr
->getOperand(i
);
2205 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
2207 // This could be a newly inserted instruction, in which case, we won't
2208 // find a value number, and should give up before we hurt ourselves.
2209 // FIXME: Rewrite the infrastructure to let it easier to value number
2210 // and process newly inserted instructions.
2211 if (!VN
.exists(Op
)) {
2216 VN
.phiTranslate(Pred
, Curr
, VN
.lookup(Op
), *this);
2217 if (Value
*V
= findLeader(Pred
, TValNo
)) {
2218 Instr
->setOperand(i
, V
);
2225 // Fail out if we encounter an operand that is not available in
2226 // the PRE predecessor. This is typically because of loads which
2227 // are not value numbered precisely.
2231 Instr
->insertBefore(Pred
->getTerminator());
2232 Instr
->setName(Instr
->getName() + ".pre");
2233 Instr
->setDebugLoc(Instr
->getDebugLoc());
2235 unsigned Num
= VN
.lookupOrAdd(Instr
);
2238 // Update the availability map to include the new instruction.
2239 addToLeaderTable(Num
, Instr
, Pred
);
2243 bool GVN::performScalarPRE(Instruction
*CurInst
) {
2244 if (isa
<AllocaInst
>(CurInst
) || CurInst
->isTerminator() ||
2245 isa
<PHINode
>(CurInst
) || CurInst
->getType()->isVoidTy() ||
2246 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
2247 isa
<DbgInfoIntrinsic
>(CurInst
))
2250 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2251 // sinking the compare again, and it would force the code generator to
2252 // move the i1 from processor flags or predicate registers into a general
2253 // purpose register.
2254 if (isa
<CmpInst
>(CurInst
))
2257 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2258 // sinking the addressing mode computation back to its uses. Extending the
2259 // GEP's live range increases the register pressure, and therefore it can
2260 // introduce unnecessary spills.
2262 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2263 // to the load by moving it to the predecessor block if necessary.
2264 if (isa
<GetElementPtrInst
>(CurInst
))
2267 // We don't currently value number ANY inline asm calls.
2268 if (auto *CallB
= dyn_cast
<CallBase
>(CurInst
))
2269 if (CallB
->isInlineAsm())
2272 uint32_t ValNo
= VN
.lookup(CurInst
);
2274 // Look for the predecessors for PRE opportunities. We're
2275 // only trying to solve the basic diamond case, where
2276 // a value is computed in the successor and one predecessor,
2277 // but not the other. We also explicitly disallow cases
2278 // where the successor is its own predecessor, because they're
2279 // more complicated to get right.
2280 unsigned NumWith
= 0;
2281 unsigned NumWithout
= 0;
2282 BasicBlock
*PREPred
= nullptr;
2283 BasicBlock
*CurrentBlock
= CurInst
->getParent();
2285 // Update the RPO numbers for this function.
2286 if (InvalidBlockRPONumbers
)
2287 assignBlockRPONumber(*CurrentBlock
->getParent());
2289 SmallVector
<std::pair
<Value
*, BasicBlock
*>, 8> predMap
;
2290 for (BasicBlock
*P
: predecessors(CurrentBlock
)) {
2291 // We're not interested in PRE where blocks with predecessors that are
2293 if (!DT
->isReachableFromEntry(P
)) {
2297 // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
2298 // when CurInst has operand defined in CurrentBlock (so it may be defined
2299 // by phi in the loop header).
2300 assert(BlockRPONumber
.count(P
) && BlockRPONumber
.count(CurrentBlock
) &&
2301 "Invalid BlockRPONumber map.");
2302 if (BlockRPONumber
[P
] >= BlockRPONumber
[CurrentBlock
] &&
2303 llvm::any_of(CurInst
->operands(), [&](const Use
&U
) {
2304 if (auto *Inst
= dyn_cast
<Instruction
>(U
.get()))
2305 return Inst
->getParent() == CurrentBlock
;
2312 uint32_t TValNo
= VN
.phiTranslate(P
, CurrentBlock
, ValNo
, *this);
2313 Value
*predV
= findLeader(P
, TValNo
);
2315 predMap
.push_back(std::make_pair(static_cast<Value
*>(nullptr), P
));
2318 } else if (predV
== CurInst
) {
2319 /* CurInst dominates this predecessor. */
2323 predMap
.push_back(std::make_pair(predV
, P
));
2328 // Don't do PRE when it might increase code size, i.e. when
2329 // we would need to insert instructions in more than one pred.
2330 if (NumWithout
> 1 || NumWith
== 0)
2333 // We may have a case where all predecessors have the instruction,
2334 // and we just need to insert a phi node. Otherwise, perform
2336 Instruction
*PREInstr
= nullptr;
2338 if (NumWithout
!= 0) {
2339 if (!isSafeToSpeculativelyExecute(CurInst
)) {
2340 // It is only valid to insert a new instruction if the current instruction
2341 // is always executed. An instruction with implicit control flow could
2342 // prevent us from doing it. If we cannot speculate the execution, then
2343 // PRE should be prohibited.
2344 if (ICF
->isDominatedByICFIFromSameBlock(CurInst
))
2348 // Don't do PRE across indirect branch.
2349 if (isa
<IndirectBrInst
>(PREPred
->getTerminator()))
2352 // Don't do PRE across callbr.
2353 // FIXME: Can we do this across the fallthrough edge?
2354 if (isa
<CallBrInst
>(PREPred
->getTerminator()))
2357 // We can't do PRE safely on a critical edge, so instead we schedule
2358 // the edge to be split and perform the PRE the next time we iterate
2360 unsigned SuccNum
= GetSuccessorNumber(PREPred
, CurrentBlock
);
2361 if (isCriticalEdge(PREPred
->getTerminator(), SuccNum
)) {
2362 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), SuccNum
));
2365 // We need to insert somewhere, so let's give it a shot
2366 PREInstr
= CurInst
->clone();
2367 if (!performScalarPREInsertion(PREInstr
, PREPred
, CurrentBlock
, ValNo
)) {
2368 // If we failed insertion, make sure we remove the instruction.
2369 LLVM_DEBUG(verifyRemoved(PREInstr
));
2370 PREInstr
->deleteValue();
2375 // Either we should have filled in the PRE instruction, or we should
2376 // not have needed insertions.
2377 assert(PREInstr
!= nullptr || NumWithout
== 0);
2381 // Create a PHI to make the value available in this block.
2383 PHINode::Create(CurInst
->getType(), predMap
.size(),
2384 CurInst
->getName() + ".pre-phi", &CurrentBlock
->front());
2385 for (unsigned i
= 0, e
= predMap
.size(); i
!= e
; ++i
) {
2386 if (Value
*V
= predMap
[i
].first
) {
2387 // If we use an existing value in this phi, we have to patch the original
2388 // value because the phi will be used to replace a later value.
2389 patchReplacementInstruction(CurInst
, V
);
2390 Phi
->addIncoming(V
, predMap
[i
].second
);
2392 Phi
->addIncoming(PREInstr
, PREPred
);
2396 // After creating a new PHI for ValNo, the phi translate result for ValNo will
2397 // be changed, so erase the related stale entries in phi translate cache.
2398 VN
.eraseTranslateCacheEntry(ValNo
, *CurrentBlock
);
2399 addToLeaderTable(ValNo
, Phi
, CurrentBlock
);
2400 Phi
->setDebugLoc(CurInst
->getDebugLoc());
2401 CurInst
->replaceAllUsesWith(Phi
);
2402 if (MD
&& Phi
->getType()->isPtrOrPtrVectorTy())
2403 MD
->invalidateCachedPointerInfo(Phi
);
2405 removeFromLeaderTable(ValNo
, CurInst
, CurrentBlock
);
2407 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst
<< '\n');
2409 MD
->removeInstruction(CurInst
);
2410 LLVM_DEBUG(verifyRemoved(CurInst
));
2411 // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
2412 // some assertion failures.
2413 ICF
->removeInstruction(CurInst
);
2414 CurInst
->eraseFromParent();
2420 /// Perform a purely local form of PRE that looks for diamond
2421 /// control flow patterns and attempts to perform simple PRE at the join point.
2422 bool GVN::performPRE(Function
&F
) {
2423 bool Changed
= false;
2424 for (BasicBlock
*CurrentBlock
: depth_first(&F
.getEntryBlock())) {
2425 // Nothing to PRE in the entry block.
2426 if (CurrentBlock
== &F
.getEntryBlock())
2429 // Don't perform PRE on an EH pad.
2430 if (CurrentBlock
->isEHPad())
2433 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
2434 BE
= CurrentBlock
->end();
2436 Instruction
*CurInst
= &*BI
++;
2437 Changed
|= performScalarPRE(CurInst
);
2441 if (splitCriticalEdges())
2447 /// Split the critical edge connecting the given two blocks, and return
2448 /// the block inserted to the critical edge.
2449 BasicBlock
*GVN::splitCriticalEdges(BasicBlock
*Pred
, BasicBlock
*Succ
) {
2451 SplitCriticalEdge(Pred
, Succ
, CriticalEdgeSplittingOptions(DT
, LI
));
2453 MD
->invalidateCachedPredecessors();
2454 InvalidBlockRPONumbers
= true;
2458 /// Split critical edges found during the previous
2459 /// iteration that may enable further optimization.
2460 bool GVN::splitCriticalEdges() {
2461 if (toSplit
.empty())
2464 std::pair
<Instruction
*, unsigned> Edge
= toSplit
.pop_back_val();
2465 SplitCriticalEdge(Edge
.first
, Edge
.second
,
2466 CriticalEdgeSplittingOptions(DT
, LI
));
2467 } while (!toSplit
.empty());
2468 if (MD
) MD
->invalidateCachedPredecessors();
2469 InvalidBlockRPONumbers
= true;
2473 /// Executes one iteration of GVN
2474 bool GVN::iterateOnFunction(Function
&F
) {
2475 cleanupGlobalSets();
2477 // Top-down walk of the dominator tree
2478 bool Changed
= false;
2479 // Needed for value numbering with phi construction to work.
2480 // RPOT walks the graph in its constructor and will not be invalidated during
2482 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2484 for (BasicBlock
*BB
: RPOT
)
2485 Changed
|= processBlock(BB
);
2490 void GVN::cleanupGlobalSets() {
2492 LeaderTable
.clear();
2493 BlockRPONumber
.clear();
2494 TableAllocator
.Reset();
2496 InvalidBlockRPONumbers
= true;
2499 /// Verify that the specified instruction does not occur in our
2500 /// internal data structures.
2501 void GVN::verifyRemoved(const Instruction
*Inst
) const {
2502 VN
.verifyRemoved(Inst
);
2504 // Walk through the value number scope to make sure the instruction isn't
2505 // ferreted away in it.
2506 for (DenseMap
<uint32_t, LeaderTableEntry
>::const_iterator
2507 I
= LeaderTable
.begin(), E
= LeaderTable
.end(); I
!= E
; ++I
) {
2508 const LeaderTableEntry
*Node
= &I
->second
;
2509 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
2511 while (Node
->Next
) {
2513 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
2518 /// BB is declared dead, which implied other blocks become dead as well. This
2519 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
2520 /// live successors, update their phi nodes by replacing the operands
2521 /// corresponding to dead blocks with UndefVal.
2522 void GVN::addDeadBlock(BasicBlock
*BB
) {
2523 SmallVector
<BasicBlock
*, 4> NewDead
;
2524 SmallSetVector
<BasicBlock
*, 4> DF
;
2526 NewDead
.push_back(BB
);
2527 while (!NewDead
.empty()) {
2528 BasicBlock
*D
= NewDead
.pop_back_val();
2529 if (DeadBlocks
.count(D
))
2532 // All blocks dominated by D are dead.
2533 SmallVector
<BasicBlock
*, 8> Dom
;
2534 DT
->getDescendants(D
, Dom
);
2535 DeadBlocks
.insert(Dom
.begin(), Dom
.end());
2537 // Figure out the dominance-frontier(D).
2538 for (BasicBlock
*B
: Dom
) {
2539 for (BasicBlock
*S
: successors(B
)) {
2540 if (DeadBlocks
.count(S
))
2543 bool AllPredDead
= true;
2544 for (BasicBlock
*P
: predecessors(S
))
2545 if (!DeadBlocks
.count(P
)) {
2546 AllPredDead
= false;
2551 // S could be proved dead later on. That is why we don't update phi
2552 // operands at this moment.
2555 // While S is not dominated by D, it is dead by now. This could take
2556 // place if S already have a dead predecessor before D is declared
2558 NewDead
.push_back(S
);
2564 // For the dead blocks' live successors, update their phi nodes by replacing
2565 // the operands corresponding to dead blocks with UndefVal.
2566 for(SmallSetVector
<BasicBlock
*, 4>::iterator I
= DF
.begin(), E
= DF
.end();
2569 if (DeadBlocks
.count(B
))
2572 // First, split the critical edges. This might also create additional blocks
2573 // to preserve LoopSimplify form and adjust edges accordingly.
2574 SmallVector
<BasicBlock
*, 4> Preds(pred_begin(B
), pred_end(B
));
2575 for (BasicBlock
*P
: Preds
) {
2576 if (!DeadBlocks
.count(P
))
2579 if (llvm::any_of(successors(P
),
2580 [B
](BasicBlock
*Succ
) { return Succ
== B
; }) &&
2581 isCriticalEdge(P
->getTerminator(), B
)) {
2582 if (BasicBlock
*S
= splitCriticalEdges(P
, B
))
2583 DeadBlocks
.insert(P
= S
);
2587 // Now undef the incoming values from the dead predecessors.
2588 for (BasicBlock
*P
: predecessors(B
)) {
2589 if (!DeadBlocks
.count(P
))
2591 for (PHINode
&Phi
: B
->phis()) {
2592 Phi
.setIncomingValueForBlock(P
, UndefValue::get(Phi
.getType()));
2594 MD
->invalidateCachedPointerInfo(&Phi
);
2600 // If the given branch is recognized as a foldable branch (i.e. conditional
2601 // branch with constant condition), it will perform following analyses and
2603 // 1) If the dead out-coming edge is a critical-edge, split it. Let
2604 // R be the target of the dead out-coming edge.
2605 // 1) Identify the set of dead blocks implied by the branch's dead outcoming
2606 // edge. The result of this step will be {X| X is dominated by R}
2607 // 2) Identify those blocks which haves at least one dead predecessor. The
2608 // result of this step will be dominance-frontier(R).
2609 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to
2610 // dead blocks with "UndefVal" in an hope these PHIs will optimized away.
2612 // Return true iff *NEW* dead code are found.
2613 bool GVN::processFoldableCondBr(BranchInst
*BI
) {
2614 if (!BI
|| BI
->isUnconditional())
2617 // If a branch has two identical successors, we cannot declare either dead.
2618 if (BI
->getSuccessor(0) == BI
->getSuccessor(1))
2621 ConstantInt
*Cond
= dyn_cast
<ConstantInt
>(BI
->getCondition());
2625 BasicBlock
*DeadRoot
=
2626 Cond
->getZExtValue() ? BI
->getSuccessor(1) : BI
->getSuccessor(0);
2627 if (DeadBlocks
.count(DeadRoot
))
2630 if (!DeadRoot
->getSinglePredecessor())
2631 DeadRoot
= splitCriticalEdges(BI
->getParent(), DeadRoot
);
2633 addDeadBlock(DeadRoot
);
2637 // performPRE() will trigger assert if it comes across an instruction without
2638 // associated val-num. As it normally has far more live instructions than dead
2639 // instructions, it makes more sense just to "fabricate" a val-number for the
2640 // dead code than checking if instruction involved is dead or not.
2641 void GVN::assignValNumForDeadCode() {
2642 for (BasicBlock
*BB
: DeadBlocks
) {
2643 for (Instruction
&Inst
: *BB
) {
2644 unsigned ValNum
= VN
.lookupOrAdd(&Inst
);
2645 addToLeaderTable(ValNum
, &Inst
, BB
);
2650 class llvm::gvn::GVNLegacyPass
: public FunctionPass
{
2652 static char ID
; // Pass identification, replacement for typeid
2654 explicit GVNLegacyPass(bool NoMemDepAnalysis
= !EnableMemDep
)
2655 : FunctionPass(ID
), NoMemDepAnalysis(NoMemDepAnalysis
) {
2656 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
2659 bool runOnFunction(Function
&F
) override
{
2660 if (skipFunction(F
))
2663 auto *LIWP
= getAnalysisIfAvailable
<LoopInfoWrapperPass
>();
2665 return Impl
.runImpl(
2666 F
, getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
),
2667 getAnalysis
<DominatorTreeWrapperPass
>().getDomTree(),
2668 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(),
2669 getAnalysis
<AAResultsWrapperPass
>().getAAResults(),
2670 NoMemDepAnalysis
? nullptr
2671 : &getAnalysis
<MemoryDependenceWrapperPass
>().getMemDep(),
2672 LIWP
? &LIWP
->getLoopInfo() : nullptr,
2673 &getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE());
2676 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
2677 AU
.addRequired
<AssumptionCacheTracker
>();
2678 AU
.addRequired
<DominatorTreeWrapperPass
>();
2679 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
2680 AU
.addRequired
<LoopInfoWrapperPass
>();
2681 if (!NoMemDepAnalysis
)
2682 AU
.addRequired
<MemoryDependenceWrapperPass
>();
2683 AU
.addRequired
<AAResultsWrapperPass
>();
2685 AU
.addPreserved
<DominatorTreeWrapperPass
>();
2686 AU
.addPreserved
<GlobalsAAWrapperPass
>();
2687 AU
.addPreserved
<TargetLibraryInfoWrapperPass
>();
2688 AU
.addPreserved
<LoopInfoWrapperPass
>();
2689 AU
.addPreservedID(LoopSimplifyID
);
2690 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
2694 bool NoMemDepAnalysis
;
2698 char GVNLegacyPass::ID
= 0;
2700 INITIALIZE_PASS_BEGIN(GVNLegacyPass
, "gvn", "Global Value Numbering", false, false)
2701 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
2702 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass
)
2703 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
2704 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
2705 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
2706 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass
)
2707 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass
)
2708 INITIALIZE_PASS_END(GVNLegacyPass
, "gvn", "Global Value Numbering", false, false)
2710 // The public interface to this file...
2711 FunctionPass
*llvm::createGVNPass(bool NoMemDepAnalysis
) {
2712 return new GVNLegacyPass(NoMemDepAnalysis
);