1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs global value numbering to eliminate fully redundant
10 // instructions. It also performs simple dead load elimination.
12 // Note that this pass does the value numbering itself; it does not use the
13 // ValueNumbering analysis passes.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Transforms/Scalar/GVN.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DepthFirstIterator.h"
20 #include "llvm/ADT/Hashing.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/AliasAnalysis.h"
29 #include "llvm/Analysis/AssumeBundleQueries.h"
30 #include "llvm/Analysis/AssumptionCache.h"
31 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/DomTreeUpdater.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
35 #include "llvm/Analysis/InstructionSimplify.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/MemoryBuiltins.h"
38 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
39 #include "llvm/Analysis/MemorySSA.h"
40 #include "llvm/Analysis/MemorySSAUpdater.h"
41 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
42 #include "llvm/Analysis/PHITransAddr.h"
43 #include "llvm/Analysis/TargetLibraryInfo.h"
44 #include "llvm/Analysis/ValueTracking.h"
45 #include "llvm/IR/Attributes.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DebugLoc.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/LLVMContext.h"
57 #include "llvm/IR/Metadata.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/PassManager.h"
60 #include "llvm/IR/PatternMatch.h"
61 #include "llvm/IR/Type.h"
62 #include "llvm/IR/Use.h"
63 #include "llvm/IR/Value.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/CommandLine.h"
68 #include "llvm/Support/Compiler.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Support/raw_ostream.h"
71 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
73 #include "llvm/Transforms/Utils/Local.h"
74 #include "llvm/Transforms/Utils/SSAUpdater.h"
75 #include "llvm/Transforms/Utils/VNCoercion.h"
83 using namespace llvm::gvn
;
84 using namespace llvm::VNCoercion
;
85 using namespace PatternMatch
;
87 #define DEBUG_TYPE "gvn"
89 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
90 STATISTIC(NumGVNLoad
, "Number of loads deleted");
91 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
92 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
93 STATISTIC(NumGVNSimpl
, "Number of instructions simplified");
94 STATISTIC(NumGVNEqProp
, "Number of equalities propagated");
95 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
96 STATISTIC(NumPRELoopLoad
, "Number of loop loads PRE'd");
97 STATISTIC(NumPRELoadMoved2CEPred
,
98 "Number of loads moved to predecessor of a critical edge in PRE");
100 STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax
,
101 "Number of blocks speculated as available in "
102 "IsValueFullyAvailableInBlock(), max");
103 STATISTIC(MaxBBSpeculationCutoffReachedTimes
,
104 "Number of times we we reached gvn-max-block-speculations cut-off "
105 "preventing further exploration");
107 static cl::opt
<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden
);
108 static cl::opt
<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
109 static cl::opt
<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
112 GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114 static cl::opt
<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
116 static cl::opt
<uint32_t> MaxNumDeps(
117 "gvn-max-num-deps", cl::Hidden
, cl::init(100),
118 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
120 // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
121 static cl::opt
<uint32_t> MaxBBSpeculations(
122 "gvn-max-block-speculations", cl::Hidden
, cl::init(600),
123 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
124 "into) when deducing if a value is fully available or not in GVN "
127 static cl::opt
<uint32_t> MaxNumVisitedInsts(
128 "gvn-max-num-visited-insts", cl::Hidden
, cl::init(100),
129 cl::desc("Max number of visited instructions when trying to find "
130 "dominating value of select dependency (default = 100)"));
132 static cl::opt
<uint32_t> MaxNumInsnsPerBlock(
133 "gvn-max-num-insns", cl::Hidden
, cl::init(100),
134 cl::desc("Max number of instructions to scan in each basic block in GVN "
137 struct llvm::GVNPass::Expression
{
139 bool commutative
= false;
140 // The type is not necessarily the result type of the expression, it may be
141 // any additional type needed to disambiguate the expression.
142 Type
*type
= nullptr;
143 SmallVector
<uint32_t, 4> varargs
;
145 Expression(uint32_t o
= ~2U) : opcode(o
) {}
147 bool operator==(const Expression
&other
) const {
148 if (opcode
!= other
.opcode
)
150 if (opcode
== ~0U || opcode
== ~1U)
152 if (type
!= other
.type
)
154 if (varargs
!= other
.varargs
)
159 friend hash_code
hash_value(const Expression
&Value
) {
161 Value
.opcode
, Value
.type
,
162 hash_combine_range(Value
.varargs
.begin(), Value
.varargs
.end()));
168 template <> struct DenseMapInfo
<GVNPass::Expression
> {
169 static inline GVNPass::Expression
getEmptyKey() { return ~0U; }
170 static inline GVNPass::Expression
getTombstoneKey() { return ~1U; }
172 static unsigned getHashValue(const GVNPass::Expression
&e
) {
173 using llvm::hash_value
;
175 return static_cast<unsigned>(hash_value(e
));
178 static bool isEqual(const GVNPass::Expression
&LHS
,
179 const GVNPass::Expression
&RHS
) {
184 } // end namespace llvm
186 /// Represents a particular available value that we know how to materialize.
187 /// Materialization of an AvailableValue never fails. An AvailableValue is
188 /// implicitly associated with a rematerialization point which is the
189 /// location of the instruction from which it was formed.
190 struct llvm::gvn::AvailableValue
{
192 SimpleVal
, // A simple offsetted value that is accessed.
193 LoadVal
, // A value produced by a load.
194 MemIntrin
, // A memory intrinsic which is loaded from.
195 UndefVal
, // A UndefValue representing a value from dead block (which
196 // is not yet physically removed from the CFG).
197 SelectVal
, // A pointer select which is loaded from and for which the load
198 // can be replace by a value select.
201 /// Val - The value that is live out of the block.
203 /// Kind of the live-out value.
206 /// Offset - The byte offset in Val that is interesting for the load query.
208 /// V1, V2 - The dominating non-clobbered values of SelectVal.
209 Value
*V1
= nullptr, *V2
= nullptr;
211 static AvailableValue
get(Value
*V
, unsigned Offset
= 0) {
214 Res
.Kind
= ValType::SimpleVal
;
219 static AvailableValue
getMI(MemIntrinsic
*MI
, unsigned Offset
= 0) {
222 Res
.Kind
= ValType::MemIntrin
;
227 static AvailableValue
getLoad(LoadInst
*Load
, unsigned Offset
= 0) {
230 Res
.Kind
= ValType::LoadVal
;
235 static AvailableValue
getUndef() {
238 Res
.Kind
= ValType::UndefVal
;
243 static AvailableValue
getSelect(SelectInst
*Sel
, Value
*V1
, Value
*V2
) {
246 Res
.Kind
= ValType::SelectVal
;
253 bool isSimpleValue() const { return Kind
== ValType::SimpleVal
; }
254 bool isCoercedLoadValue() const { return Kind
== ValType::LoadVal
; }
255 bool isMemIntrinValue() const { return Kind
== ValType::MemIntrin
; }
256 bool isUndefValue() const { return Kind
== ValType::UndefVal
; }
257 bool isSelectValue() const { return Kind
== ValType::SelectVal
; }
259 Value
*getSimpleValue() const {
260 assert(isSimpleValue() && "Wrong accessor");
264 LoadInst
*getCoercedLoadValue() const {
265 assert(isCoercedLoadValue() && "Wrong accessor");
266 return cast
<LoadInst
>(Val
);
269 MemIntrinsic
*getMemIntrinValue() const {
270 assert(isMemIntrinValue() && "Wrong accessor");
271 return cast
<MemIntrinsic
>(Val
);
274 SelectInst
*getSelectValue() const {
275 assert(isSelectValue() && "Wrong accessor");
276 return cast
<SelectInst
>(Val
);
279 /// Emit code at the specified insertion point to adjust the value defined
280 /// here to the specified type. This handles various coercion cases.
281 Value
*MaterializeAdjustedValue(LoadInst
*Load
, Instruction
*InsertPt
,
285 /// Represents an AvailableValue which can be rematerialized at the end of
286 /// the associated BasicBlock.
287 struct llvm::gvn::AvailableValueInBlock
{
288 /// BB - The basic block in question.
289 BasicBlock
*BB
= nullptr;
291 /// AV - The actual available value
294 static AvailableValueInBlock
get(BasicBlock
*BB
, AvailableValue
&&AV
) {
295 AvailableValueInBlock Res
;
297 Res
.AV
= std::move(AV
);
301 static AvailableValueInBlock
get(BasicBlock
*BB
, Value
*V
,
302 unsigned Offset
= 0) {
303 return get(BB
, AvailableValue::get(V
, Offset
));
306 static AvailableValueInBlock
getUndef(BasicBlock
*BB
) {
307 return get(BB
, AvailableValue::getUndef());
310 static AvailableValueInBlock
getSelect(BasicBlock
*BB
, SelectInst
*Sel
,
311 Value
*V1
, Value
*V2
) {
312 return get(BB
, AvailableValue::getSelect(Sel
, V1
, V2
));
315 /// Emit code at the end of this block to adjust the value defined here to
316 /// the specified type. This handles various coercion cases.
317 Value
*MaterializeAdjustedValue(LoadInst
*Load
, GVNPass
&gvn
) const {
318 return AV
.MaterializeAdjustedValue(Load
, BB
->getTerminator(), gvn
);
322 //===----------------------------------------------------------------------===//
323 // ValueTable Internal Functions
324 //===----------------------------------------------------------------------===//
326 GVNPass::Expression
GVNPass::ValueTable::createExpr(Instruction
*I
) {
328 e
.type
= I
->getType();
329 e
.opcode
= I
->getOpcode();
330 if (const GCRelocateInst
*GCR
= dyn_cast
<GCRelocateInst
>(I
)) {
331 // gc.relocate is 'special' call: its second and third operands are
332 // not real values, but indices into statepoint's argument list.
333 // Use the refered to values for purposes of identity.
334 e
.varargs
.push_back(lookupOrAdd(GCR
->getOperand(0)));
335 e
.varargs
.push_back(lookupOrAdd(GCR
->getBasePtr()));
336 e
.varargs
.push_back(lookupOrAdd(GCR
->getDerivedPtr()));
338 for (Use
&Op
: I
->operands())
339 e
.varargs
.push_back(lookupOrAdd(Op
));
341 if (I
->isCommutative()) {
342 // Ensure that commutative instructions that only differ by a permutation
343 // of their operands get the same value number by sorting the operand value
344 // numbers. Since commutative operands are the 1st two operands it is more
345 // efficient to sort by hand rather than using, say, std::sort.
346 assert(I
->getNumOperands() >= 2 && "Unsupported commutative instruction!");
347 if (e
.varargs
[0] > e
.varargs
[1])
348 std::swap(e
.varargs
[0], e
.varargs
[1]);
349 e
.commutative
= true;
352 if (auto *C
= dyn_cast
<CmpInst
>(I
)) {
353 // Sort the operand value numbers so x<y and y>x get the same value number.
354 CmpInst::Predicate Predicate
= C
->getPredicate();
355 if (e
.varargs
[0] > e
.varargs
[1]) {
356 std::swap(e
.varargs
[0], e
.varargs
[1]);
357 Predicate
= CmpInst::getSwappedPredicate(Predicate
);
359 e
.opcode
= (C
->getOpcode() << 8) | Predicate
;
360 e
.commutative
= true;
361 } else if (auto *E
= dyn_cast
<InsertValueInst
>(I
)) {
362 e
.varargs
.append(E
->idx_begin(), E
->idx_end());
363 } else if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(I
)) {
364 ArrayRef
<int> ShuffleMask
= SVI
->getShuffleMask();
365 e
.varargs
.append(ShuffleMask
.begin(), ShuffleMask
.end());
371 GVNPass::Expression
GVNPass::ValueTable::createCmpExpr(
372 unsigned Opcode
, CmpInst::Predicate Predicate
, Value
*LHS
, Value
*RHS
) {
373 assert((Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
) &&
374 "Not a comparison!");
376 e
.type
= CmpInst::makeCmpResultType(LHS
->getType());
377 e
.varargs
.push_back(lookupOrAdd(LHS
));
378 e
.varargs
.push_back(lookupOrAdd(RHS
));
380 // Sort the operand value numbers so x<y and y>x get the same value number.
381 if (e
.varargs
[0] > e
.varargs
[1]) {
382 std::swap(e
.varargs
[0], e
.varargs
[1]);
383 Predicate
= CmpInst::getSwappedPredicate(Predicate
);
385 e
.opcode
= (Opcode
<< 8) | Predicate
;
386 e
.commutative
= true;
391 GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst
*EI
) {
392 assert(EI
&& "Not an ExtractValueInst?");
394 e
.type
= EI
->getType();
397 WithOverflowInst
*WO
= dyn_cast
<WithOverflowInst
>(EI
->getAggregateOperand());
398 if (WO
!= nullptr && EI
->getNumIndices() == 1 && *EI
->idx_begin() == 0) {
399 // EI is an extract from one of our with.overflow intrinsics. Synthesize
400 // a semantically equivalent expression instead of an extract value
402 e
.opcode
= WO
->getBinaryOp();
403 e
.varargs
.push_back(lookupOrAdd(WO
->getLHS()));
404 e
.varargs
.push_back(lookupOrAdd(WO
->getRHS()));
408 // Not a recognised intrinsic. Fall back to producing an extract value
410 e
.opcode
= EI
->getOpcode();
411 for (Use
&Op
: EI
->operands())
412 e
.varargs
.push_back(lookupOrAdd(Op
));
414 append_range(e
.varargs
, EI
->indices());
419 GVNPass::Expression
GVNPass::ValueTable::createGEPExpr(GetElementPtrInst
*GEP
) {
421 Type
*PtrTy
= GEP
->getType()->getScalarType();
422 const DataLayout
&DL
= GEP
->getModule()->getDataLayout();
423 unsigned BitWidth
= DL
.getIndexTypeSizeInBits(PtrTy
);
424 MapVector
<Value
*, APInt
> VariableOffsets
;
425 APInt
ConstantOffset(BitWidth
, 0);
426 if (GEP
->collectOffset(DL
, BitWidth
, VariableOffsets
, ConstantOffset
)) {
427 // Convert into offset representation, to recognize equivalent address
428 // calculations that use different type encoding.
429 LLVMContext
&Context
= GEP
->getContext();
430 E
.opcode
= GEP
->getOpcode();
432 E
.varargs
.push_back(lookupOrAdd(GEP
->getPointerOperand()));
433 for (const auto &Pair
: VariableOffsets
) {
434 E
.varargs
.push_back(lookupOrAdd(Pair
.first
));
435 E
.varargs
.push_back(lookupOrAdd(ConstantInt::get(Context
, Pair
.second
)));
437 if (!ConstantOffset
.isZero())
439 lookupOrAdd(ConstantInt::get(Context
, ConstantOffset
)));
441 // If converting to offset representation fails (for scalable vectors),
442 // fall back to type-based implementation:
443 E
.opcode
= GEP
->getOpcode();
444 E
.type
= GEP
->getSourceElementType();
445 for (Use
&Op
: GEP
->operands())
446 E
.varargs
.push_back(lookupOrAdd(Op
));
451 //===----------------------------------------------------------------------===//
452 // ValueTable External Functions
453 //===----------------------------------------------------------------------===//
455 GVNPass::ValueTable::ValueTable() = default;
456 GVNPass::ValueTable::ValueTable(const ValueTable
&) = default;
457 GVNPass::ValueTable::ValueTable(ValueTable
&&) = default;
458 GVNPass::ValueTable::~ValueTable() = default;
459 GVNPass::ValueTable
&
460 GVNPass::ValueTable::operator=(const GVNPass::ValueTable
&Arg
) = default;
462 /// add - Insert a value into the table with a specified value number.
463 void GVNPass::ValueTable::add(Value
*V
, uint32_t num
) {
464 valueNumbering
.insert(std::make_pair(V
, num
));
465 if (PHINode
*PN
= dyn_cast
<PHINode
>(V
))
466 NumberingPhi
[num
] = PN
;
469 uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst
*C
) {
470 // FIXME: Currently the calls which may access the thread id may
471 // be considered as not accessing the memory. But this is
472 // problematic for coroutines, since coroutines may resume in a
473 // different thread. So we disable the optimization here for the
474 // correctness. However, it may block many other correct
475 // optimizations. Revert this one when we detect the memory
476 // accessing kind more precisely.
477 if (C
->getFunction()->isPresplitCoroutine()) {
478 valueNumbering
[C
] = nextValueNumber
;
479 return nextValueNumber
++;
482 // Do not combine convergent calls since they implicitly depend on the set of
483 // threads that is currently executing, and they might be in different basic
485 if (C
->isConvergent()) {
486 valueNumbering
[C
] = nextValueNumber
;
487 return nextValueNumber
++;
490 if (AA
->doesNotAccessMemory(C
)) {
491 Expression exp
= createExpr(C
);
492 uint32_t e
= assignExpNewValueNum(exp
).first
;
493 valueNumbering
[C
] = e
;
497 if (MD
&& AA
->onlyReadsMemory(C
)) {
498 Expression exp
= createExpr(C
);
499 auto ValNum
= assignExpNewValueNum(exp
);
501 valueNumbering
[C
] = ValNum
.first
;
505 MemDepResult local_dep
= MD
->getDependency(C
);
507 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
508 valueNumbering
[C
] = nextValueNumber
;
509 return nextValueNumber
++;
512 if (local_dep
.isDef()) {
513 // For masked load/store intrinsics, the local_dep may actually be
514 // a normal load or store instruction.
515 CallInst
*local_cdep
= dyn_cast
<CallInst
>(local_dep
.getInst());
517 if (!local_cdep
|| local_cdep
->arg_size() != C
->arg_size()) {
518 valueNumbering
[C
] = nextValueNumber
;
519 return nextValueNumber
++;
522 for (unsigned i
= 0, e
= C
->arg_size(); i
< e
; ++i
) {
523 uint32_t c_vn
= lookupOrAdd(C
->getArgOperand(i
));
524 uint32_t cd_vn
= lookupOrAdd(local_cdep
->getArgOperand(i
));
526 valueNumbering
[C
] = nextValueNumber
;
527 return nextValueNumber
++;
531 uint32_t v
= lookupOrAdd(local_cdep
);
532 valueNumbering
[C
] = v
;
537 const MemoryDependenceResults::NonLocalDepInfo
&deps
=
538 MD
->getNonLocalCallDependency(C
);
539 // FIXME: Move the checking logic to MemDep!
540 CallInst
* cdep
= nullptr;
542 // Check to see if we have a single dominating call instruction that is
544 for (const NonLocalDepEntry
&I
: deps
) {
545 if (I
.getResult().isNonLocal())
548 // We don't handle non-definitions. If we already have a call, reject
549 // instruction dependencies.
550 if (!I
.getResult().isDef() || cdep
!= nullptr) {
555 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
.getResult().getInst());
556 // FIXME: All duplicated with non-local case.
557 if (NonLocalDepCall
&& DT
->properlyDominates(I
.getBB(), C
->getParent())) {
558 cdep
= NonLocalDepCall
;
567 valueNumbering
[C
] = nextValueNumber
;
568 return nextValueNumber
++;
571 if (cdep
->arg_size() != C
->arg_size()) {
572 valueNumbering
[C
] = nextValueNumber
;
573 return nextValueNumber
++;
575 for (unsigned i
= 0, e
= C
->arg_size(); i
< e
; ++i
) {
576 uint32_t c_vn
= lookupOrAdd(C
->getArgOperand(i
));
577 uint32_t cd_vn
= lookupOrAdd(cdep
->getArgOperand(i
));
579 valueNumbering
[C
] = nextValueNumber
;
580 return nextValueNumber
++;
584 uint32_t v
= lookupOrAdd(cdep
);
585 valueNumbering
[C
] = v
;
589 valueNumbering
[C
] = nextValueNumber
;
590 return nextValueNumber
++;
593 /// Returns true if a value number exists for the specified value.
594 bool GVNPass::ValueTable::exists(Value
*V
) const {
595 return valueNumbering
.contains(V
);
598 /// lookup_or_add - Returns the value number for the specified value, assigning
599 /// it a new number if it did not have one before.
600 uint32_t GVNPass::ValueTable::lookupOrAdd(Value
*V
) {
601 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
602 if (VI
!= valueNumbering
.end())
605 auto *I
= dyn_cast
<Instruction
>(V
);
607 valueNumbering
[V
] = nextValueNumber
;
608 return nextValueNumber
++;
612 switch (I
->getOpcode()) {
613 case Instruction::Call
:
614 return lookupOrAddCall(cast
<CallInst
>(I
));
615 case Instruction::FNeg
:
616 case Instruction::Add
:
617 case Instruction::FAdd
:
618 case Instruction::Sub
:
619 case Instruction::FSub
:
620 case Instruction::Mul
:
621 case Instruction::FMul
:
622 case Instruction::UDiv
:
623 case Instruction::SDiv
:
624 case Instruction::FDiv
:
625 case Instruction::URem
:
626 case Instruction::SRem
:
627 case Instruction::FRem
:
628 case Instruction::Shl
:
629 case Instruction::LShr
:
630 case Instruction::AShr
:
631 case Instruction::And
:
632 case Instruction::Or
:
633 case Instruction::Xor
:
634 case Instruction::ICmp
:
635 case Instruction::FCmp
:
636 case Instruction::Trunc
:
637 case Instruction::ZExt
:
638 case Instruction::SExt
:
639 case Instruction::FPToUI
:
640 case Instruction::FPToSI
:
641 case Instruction::UIToFP
:
642 case Instruction::SIToFP
:
643 case Instruction::FPTrunc
:
644 case Instruction::FPExt
:
645 case Instruction::PtrToInt
:
646 case Instruction::IntToPtr
:
647 case Instruction::AddrSpaceCast
:
648 case Instruction::BitCast
:
649 case Instruction::Select
:
650 case Instruction::Freeze
:
651 case Instruction::ExtractElement
:
652 case Instruction::InsertElement
:
653 case Instruction::ShuffleVector
:
654 case Instruction::InsertValue
:
657 case Instruction::GetElementPtr
:
658 exp
= createGEPExpr(cast
<GetElementPtrInst
>(I
));
660 case Instruction::ExtractValue
:
661 exp
= createExtractvalueExpr(cast
<ExtractValueInst
>(I
));
663 case Instruction::PHI
:
664 valueNumbering
[V
] = nextValueNumber
;
665 NumberingPhi
[nextValueNumber
] = cast
<PHINode
>(V
);
666 return nextValueNumber
++;
668 valueNumbering
[V
] = nextValueNumber
;
669 return nextValueNumber
++;
672 uint32_t e
= assignExpNewValueNum(exp
).first
;
673 valueNumbering
[V
] = e
;
677 /// Returns the value number of the specified value. Fails if
678 /// the value has not yet been numbered.
679 uint32_t GVNPass::ValueTable::lookup(Value
*V
, bool Verify
) const {
680 DenseMap
<Value
*, uint32_t>::const_iterator VI
= valueNumbering
.find(V
);
682 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
685 return (VI
!= valueNumbering
.end()) ? VI
->second
: 0;
688 /// Returns the value number of the given comparison,
689 /// assigning it a new number if it did not have one before. Useful when
690 /// we deduced the result of a comparison, but don't immediately have an
691 /// instruction realizing that comparison to hand.
692 uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode
,
693 CmpInst::Predicate Predicate
,
694 Value
*LHS
, Value
*RHS
) {
695 Expression exp
= createCmpExpr(Opcode
, Predicate
, LHS
, RHS
);
696 return assignExpNewValueNum(exp
).first
;
699 /// Remove all entries from the ValueTable.
700 void GVNPass::ValueTable::clear() {
701 valueNumbering
.clear();
702 expressionNumbering
.clear();
703 NumberingPhi
.clear();
704 PhiTranslateTable
.clear();
711 /// Remove a value from the value numbering.
712 void GVNPass::ValueTable::erase(Value
*V
) {
713 uint32_t Num
= valueNumbering
.lookup(V
);
714 valueNumbering
.erase(V
);
715 // If V is PHINode, V <--> value number is an one-to-one mapping.
717 NumberingPhi
.erase(Num
);
720 /// verifyRemoved - Verify that the value is removed from all internal data
722 void GVNPass::ValueTable::verifyRemoved(const Value
*V
) const {
723 assert(!valueNumbering
.contains(V
) &&
724 "Inst still occurs in value numbering map!");
727 //===----------------------------------------------------------------------===//
729 //===----------------------------------------------------------------------===//
731 bool GVNPass::isPREEnabled() const {
732 return Options
.AllowPRE
.value_or(GVNEnablePRE
);
735 bool GVNPass::isLoadPREEnabled() const {
736 return Options
.AllowLoadPRE
.value_or(GVNEnableLoadPRE
);
739 bool GVNPass::isLoadInLoopPREEnabled() const {
740 return Options
.AllowLoadInLoopPRE
.value_or(GVNEnableLoadInLoopPRE
);
743 bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
744 return Options
.AllowLoadPRESplitBackedge
.value_or(
745 GVNEnableSplitBackedgeInLoadPRE
);
748 bool GVNPass::isMemDepEnabled() const {
749 return Options
.AllowMemDep
.value_or(GVNEnableMemDep
);
752 PreservedAnalyses
GVNPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
753 // FIXME: The order of evaluation of these 'getResult' calls is very
754 // significant! Re-ordering these variables will cause GVN when run alone to
755 // be less effective! We should fix memdep and basic-aa to not exhibit this
756 // behavior, but until then don't change the order here.
757 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
758 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
759 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
760 auto &AA
= AM
.getResult
<AAManager
>(F
);
762 isMemDepEnabled() ? &AM
.getResult
<MemoryDependenceAnalysis
>(F
) : nullptr;
763 auto &LI
= AM
.getResult
<LoopAnalysis
>(F
);
764 auto *MSSA
= AM
.getCachedResult
<MemorySSAAnalysis
>(F
);
765 auto &ORE
= AM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
766 bool Changed
= runImpl(F
, AC
, DT
, TLI
, AA
, MemDep
, LI
, &ORE
,
767 MSSA
? &MSSA
->getMSSA() : nullptr);
769 return PreservedAnalyses::all();
770 PreservedAnalyses PA
;
771 PA
.preserve
<DominatorTreeAnalysis
>();
772 PA
.preserve
<TargetLibraryAnalysis
>();
774 PA
.preserve
<MemorySSAAnalysis
>();
775 PA
.preserve
<LoopAnalysis
>();
779 void GVNPass::printPipeline(
780 raw_ostream
&OS
, function_ref
<StringRef(StringRef
)> MapClassName2PassName
) {
781 static_cast<PassInfoMixin
<GVNPass
> *>(this)->printPipeline(
782 OS
, MapClassName2PassName
);
785 if (Options
.AllowPRE
!= std::nullopt
)
786 OS
<< (*Options
.AllowPRE
? "" : "no-") << "pre;";
787 if (Options
.AllowLoadPRE
!= std::nullopt
)
788 OS
<< (*Options
.AllowLoadPRE
? "" : "no-") << "load-pre;";
789 if (Options
.AllowLoadPRESplitBackedge
!= std::nullopt
)
790 OS
<< (*Options
.AllowLoadPRESplitBackedge
? "" : "no-")
791 << "split-backedge-load-pre;";
792 if (Options
.AllowMemDep
!= std::nullopt
)
793 OS
<< (*Options
.AllowMemDep
? "" : "no-") << "memdep";
797 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
798 LLVM_DUMP_METHOD
void GVNPass::dump(DenseMap
<uint32_t, Value
*> &d
) const {
801 errs() << I
.first
<< "\n";
808 enum class AvailabilityState
: char {
809 /// We know the block *is not* fully available. This is a fixpoint.
811 /// We know the block *is* fully available. This is a fixpoint.
813 /// We do not know whether the block is fully available or not,
814 /// but we are currently speculating that it will be.
815 /// If it would have turned out that the block was, in fact, not fully
816 /// available, this would have been cleaned up into an Unavailable.
817 SpeculativelyAvailable
= 2,
820 /// Return true if we can prove that the value
821 /// we're analyzing is fully available in the specified block. As we go, keep
822 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
823 /// map is actually a tri-state map with the following values:
824 /// 0) we know the block *is not* fully available.
825 /// 1) we know the block *is* fully available.
826 /// 2) we do not know whether the block is fully available or not, but we are
827 /// currently speculating that it will be.
828 static bool IsValueFullyAvailableInBlock(
830 DenseMap
<BasicBlock
*, AvailabilityState
> &FullyAvailableBlocks
) {
831 SmallVector
<BasicBlock
*, 32> Worklist
;
832 std::optional
<BasicBlock
*> UnavailableBB
;
834 // The number of times we didn't find an entry for a block in a map and
835 // optimistically inserted an entry marking block as speculatively available.
836 unsigned NumNewNewSpeculativelyAvailableBBs
= 0;
839 SmallSet
<BasicBlock
*, 32> NewSpeculativelyAvailableBBs
;
840 SmallVector
<BasicBlock
*, 32> AvailableBBs
;
843 Worklist
.emplace_back(BB
);
844 while (!Worklist
.empty()) {
845 BasicBlock
*CurrBB
= Worklist
.pop_back_val(); // LoadFO - depth-first!
846 // Optimistically assume that the block is Speculatively Available and check
847 // to see if we already know about this block in one lookup.
848 std::pair
<DenseMap
<BasicBlock
*, AvailabilityState
>::iterator
, bool> IV
=
849 FullyAvailableBlocks
.try_emplace(
850 CurrBB
, AvailabilityState::SpeculativelyAvailable
);
851 AvailabilityState
&State
= IV
.first
->second
;
853 // Did the entry already exist for this block?
855 if (State
== AvailabilityState::Unavailable
) {
856 UnavailableBB
= CurrBB
;
857 break; // Backpropagate unavailability info.
861 AvailableBBs
.emplace_back(CurrBB
);
863 continue; // Don't recurse further, but continue processing worklist.
866 // No entry found for block.
867 ++NumNewNewSpeculativelyAvailableBBs
;
868 bool OutOfBudget
= NumNewNewSpeculativelyAvailableBBs
> MaxBBSpeculations
;
870 // If we have exhausted our budget, mark this block as unavailable.
871 // Also, if this block has no predecessors, the value isn't live-in here.
872 if (OutOfBudget
|| pred_empty(CurrBB
)) {
873 MaxBBSpeculationCutoffReachedTimes
+= (int)OutOfBudget
;
874 State
= AvailabilityState::Unavailable
;
875 UnavailableBB
= CurrBB
;
876 break; // Backpropagate unavailability info.
879 // Tentatively consider this block as speculatively available.
881 NewSpeculativelyAvailableBBs
.insert(CurrBB
);
883 // And further recurse into block's predecessors, in depth-first order!
884 Worklist
.append(pred_begin(CurrBB
), pred_end(CurrBB
));
887 #if LLVM_ENABLE_STATS
888 IsValueFullyAvailableInBlockNumSpeculationsMax
.updateMax(
889 NumNewNewSpeculativelyAvailableBBs
);
892 // If the block isn't marked as fixpoint yet
893 // (the Unavailable and Available states are fixpoints)
894 auto MarkAsFixpointAndEnqueueSuccessors
=
895 [&](BasicBlock
*BB
, AvailabilityState FixpointState
) {
896 auto It
= FullyAvailableBlocks
.find(BB
);
897 if (It
== FullyAvailableBlocks
.end())
898 return; // Never queried this block, leave as-is.
899 switch (AvailabilityState
&State
= It
->second
) {
900 case AvailabilityState::Unavailable
:
901 case AvailabilityState::Available
:
902 return; // Don't backpropagate further, continue processing worklist.
903 case AvailabilityState::SpeculativelyAvailable
: // Fix it!
904 State
= FixpointState
;
906 assert(NewSpeculativelyAvailableBBs
.erase(BB
) &&
907 "Found a speculatively available successor leftover?");
909 // Queue successors for further processing.
910 Worklist
.append(succ_begin(BB
), succ_end(BB
));
916 // Okay, we have encountered an unavailable block.
917 // Mark speculatively available blocks reachable from UnavailableBB as
918 // unavailable as well. Paths are terminated when they reach blocks not in
919 // FullyAvailableBlocks or they are not marked as speculatively available.
921 Worklist
.append(succ_begin(*UnavailableBB
), succ_end(*UnavailableBB
));
922 while (!Worklist
.empty())
923 MarkAsFixpointAndEnqueueSuccessors(Worklist
.pop_back_val(),
924 AvailabilityState::Unavailable
);
929 for (BasicBlock
*AvailableBB
: AvailableBBs
)
930 Worklist
.append(succ_begin(AvailableBB
), succ_end(AvailableBB
));
931 while (!Worklist
.empty())
932 MarkAsFixpointAndEnqueueSuccessors(Worklist
.pop_back_val(),
933 AvailabilityState::Available
);
935 assert(NewSpeculativelyAvailableBBs
.empty() &&
936 "Must have fixed all the new speculatively available blocks.");
939 return !UnavailableBB
;
942 /// If the specified OldValue exists in ValuesPerBlock, replace its value with
944 static void replaceValuesPerBlockEntry(
945 SmallVectorImpl
<AvailableValueInBlock
> &ValuesPerBlock
, Value
*OldValue
,
947 for (AvailableValueInBlock
&V
: ValuesPerBlock
) {
948 if (V
.AV
.Val
== OldValue
)
950 if (V
.AV
.isSelectValue()) {
951 if (V
.AV
.V1
== OldValue
)
953 if (V
.AV
.V2
== OldValue
)
959 /// Given a set of loads specified by ValuesPerBlock,
960 /// construct SSA form, allowing us to eliminate Load. This returns the value
961 /// that should be used at Load's definition site.
963 ConstructSSAForLoadSet(LoadInst
*Load
,
964 SmallVectorImpl
<AvailableValueInBlock
> &ValuesPerBlock
,
966 // Check for the fully redundant, dominating load case. In this case, we can
967 // just use the dominating value directly.
968 if (ValuesPerBlock
.size() == 1 &&
969 gvn
.getDominatorTree().properlyDominates(ValuesPerBlock
[0].BB
,
970 Load
->getParent())) {
971 assert(!ValuesPerBlock
[0].AV
.isUndefValue() &&
972 "Dead BB dominate this block");
973 return ValuesPerBlock
[0].MaterializeAdjustedValue(Load
, gvn
);
976 // Otherwise, we have to construct SSA form.
977 SmallVector
<PHINode
*, 8> NewPHIs
;
978 SSAUpdater
SSAUpdate(&NewPHIs
);
979 SSAUpdate
.Initialize(Load
->getType(), Load
->getName());
981 for (const AvailableValueInBlock
&AV
: ValuesPerBlock
) {
982 BasicBlock
*BB
= AV
.BB
;
984 if (AV
.AV
.isUndefValue())
987 if (SSAUpdate
.HasValueForBlock(BB
))
990 // If the value is the load that we will be eliminating, and the block it's
991 // available in is the block that the load is in, then don't add it as
992 // SSAUpdater will resolve the value to the relevant phi which may let it
993 // avoid phi construction entirely if there's actually only one value.
994 if (BB
== Load
->getParent() &&
995 ((AV
.AV
.isSimpleValue() && AV
.AV
.getSimpleValue() == Load
) ||
996 (AV
.AV
.isCoercedLoadValue() && AV
.AV
.getCoercedLoadValue() == Load
)))
999 SSAUpdate
.AddAvailableValue(BB
, AV
.MaterializeAdjustedValue(Load
, gvn
));
1002 // Perform PHI construction.
1003 return SSAUpdate
.GetValueInMiddleOfBlock(Load
->getParent());
1006 Value
*AvailableValue::MaterializeAdjustedValue(LoadInst
*Load
,
1007 Instruction
*InsertPt
,
1008 GVNPass
&gvn
) const {
1010 Type
*LoadTy
= Load
->getType();
1011 const DataLayout
&DL
= Load
->getModule()->getDataLayout();
1012 if (isSimpleValue()) {
1013 Res
= getSimpleValue();
1014 if (Res
->getType() != LoadTy
) {
1015 Res
= getValueForLoad(Res
, Offset
, LoadTy
, InsertPt
, DL
);
1017 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1018 << " " << *getSimpleValue() << '\n'
1022 } else if (isCoercedLoadValue()) {
1023 LoadInst
*CoercedLoad
= getCoercedLoadValue();
1024 if (CoercedLoad
->getType() == LoadTy
&& Offset
== 0) {
1026 combineMetadataForCSE(CoercedLoad
, Load
, false);
1028 Res
= getValueForLoad(CoercedLoad
, Offset
, LoadTy
, InsertPt
, DL
);
1029 // We are adding a new user for this load, for which the original
1030 // metadata may not hold. Additionally, the new load may have a different
1031 // size and type, so their metadata cannot be combined in any
1032 // straightforward way.
1033 // Drop all metadata that is not known to cause immediate UB on violation,
1034 // unless the load has !noundef, in which case all metadata violations
1035 // will be promoted to UB.
1036 // TODO: We can combine noalias/alias.scope metadata here, because it is
1037 // independent of the load type.
1038 if (!CoercedLoad
->hasMetadata(LLVMContext::MD_noundef
))
1039 CoercedLoad
->dropUnknownNonDebugMetadata(
1040 {LLVMContext::MD_dereferenceable
,
1041 LLVMContext::MD_dereferenceable_or_null
,
1042 LLVMContext::MD_invariant_load
, LLVMContext::MD_invariant_group
});
1043 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1044 << " " << *getCoercedLoadValue() << '\n'
1048 } else if (isMemIntrinValue()) {
1049 Res
= getMemInstValueForLoad(getMemIntrinValue(), Offset
, LoadTy
,
1051 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1052 << " " << *getMemIntrinValue() << '\n'
1055 } else if (isSelectValue()) {
1056 // Introduce a new value select for a load from an eligible pointer select.
1057 SelectInst
*Sel
= getSelectValue();
1058 assert(V1
&& V2
&& "both value operands of the select must be present");
1059 Res
= SelectInst::Create(Sel
->getCondition(), V1
, V2
, "", Sel
);
1061 llvm_unreachable("Should not materialize value from dead block");
1063 assert(Res
&& "failed to materialize?");
1067 static bool isLifetimeStart(const Instruction
*Inst
) {
1068 if (const IntrinsicInst
* II
= dyn_cast
<IntrinsicInst
>(Inst
))
1069 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
1073 /// Assuming To can be reached from both From and Between, does Between lie on
1074 /// every path from From to To?
1075 static bool liesBetween(const Instruction
*From
, Instruction
*Between
,
1076 const Instruction
*To
, DominatorTree
*DT
) {
1077 if (From
->getParent() == Between
->getParent())
1078 return DT
->dominates(From
, Between
);
1079 SmallSet
<BasicBlock
*, 1> Exclusion
;
1080 Exclusion
.insert(Between
->getParent());
1081 return !isPotentiallyReachable(From
, To
, &Exclusion
, DT
);
1084 /// Try to locate the three instruction involved in a missed
1085 /// load-elimination case that is due to an intervening store.
1086 static void reportMayClobberedLoad(LoadInst
*Load
, MemDepResult DepInfo
,
1088 OptimizationRemarkEmitter
*ORE
) {
1089 using namespace ore
;
1091 Instruction
*OtherAccess
= nullptr;
1093 OptimizationRemarkMissed
R(DEBUG_TYPE
, "LoadClobbered", Load
);
1094 R
<< "load of type " << NV("Type", Load
->getType()) << " not eliminated"
1097 for (auto *U
: Load
->getPointerOperand()->users()) {
1098 if (U
!= Load
&& (isa
<LoadInst
>(U
) || isa
<StoreInst
>(U
))) {
1099 auto *I
= cast
<Instruction
>(U
);
1100 if (I
->getFunction() == Load
->getFunction() && DT
->dominates(I
, Load
)) {
1101 // Use the most immediately dominating value
1103 if (DT
->dominates(OtherAccess
, I
))
1106 assert(U
== OtherAccess
|| DT
->dominates(I
, OtherAccess
));
1114 // There is no dominating use, check if we can find a closest non-dominating
1115 // use that lies between any other potentially available use and Load.
1116 for (auto *U
: Load
->getPointerOperand()->users()) {
1117 if (U
!= Load
&& (isa
<LoadInst
>(U
) || isa
<StoreInst
>(U
))) {
1118 auto *I
= cast
<Instruction
>(U
);
1119 if (I
->getFunction() == Load
->getFunction() &&
1120 isPotentiallyReachable(I
, Load
, nullptr, DT
)) {
1122 if (liesBetween(OtherAccess
, I
, Load
, DT
)) {
1124 } else if (!liesBetween(I
, OtherAccess
, Load
, DT
)) {
1125 // These uses are both partially available at Load were it not for
1126 // the clobber, but neither lies strictly after the other.
1127 OtherAccess
= nullptr;
1129 } // else: keep current OtherAccess since it lies between U and Load
1139 R
<< " in favor of " << NV("OtherAccess", OtherAccess
);
1141 R
<< " because it is clobbered by " << NV("ClobberedBy", DepInfo
.getInst());
1146 // Find non-clobbered value for Loc memory location in extended basic block
1147 // (chain of basic blocks with single predecessors) starting From instruction.
1148 static Value
*findDominatingValue(const MemoryLocation
&Loc
, Type
*LoadTy
,
1149 Instruction
*From
, AAResults
*AA
) {
1150 uint32_t NumVisitedInsts
= 0;
1151 BasicBlock
*FromBB
= From
->getParent();
1152 BatchAAResults
BatchAA(*AA
);
1153 for (BasicBlock
*BB
= FromBB
; BB
; BB
= BB
->getSinglePredecessor())
1154 for (auto *Inst
= BB
== FromBB
? From
: BB
->getTerminator();
1155 Inst
!= nullptr; Inst
= Inst
->getPrevNonDebugInstruction()) {
1156 // Stop the search if limit is reached.
1157 if (++NumVisitedInsts
> MaxNumVisitedInsts
)
1159 if (isModSet(BatchAA
.getModRefInfo(Inst
, Loc
)))
1161 if (auto *LI
= dyn_cast
<LoadInst
>(Inst
))
1162 if (LI
->getPointerOperand() == Loc
.Ptr
&& LI
->getType() == LoadTy
)
1168 std::optional
<AvailableValue
>
1169 GVNPass::AnalyzeLoadAvailability(LoadInst
*Load
, MemDepResult DepInfo
,
1171 assert(Load
->isUnordered() && "rules below are incorrect for ordered access");
1172 assert(DepInfo
.isLocal() && "expected a local dependence");
1174 Instruction
*DepInst
= DepInfo
.getInst();
1176 const DataLayout
&DL
= Load
->getModule()->getDataLayout();
1177 if (DepInfo
.isClobber()) {
1178 // If the dependence is to a store that writes to a superset of the bits
1179 // read by the load, we can extract the bits we need for the load from the
1181 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1182 // Can't forward from non-atomic to atomic without violating memory model.
1183 if (Address
&& Load
->isAtomic() <= DepSI
->isAtomic()) {
1185 analyzeLoadFromClobberingStore(Load
->getType(), Address
, DepSI
, DL
);
1187 return AvailableValue::get(DepSI
->getValueOperand(), Offset
);
1191 // Check to see if we have something like this:
1194 // if we have this, replace the later with an extraction from the former.
1195 if (LoadInst
*DepLoad
= dyn_cast
<LoadInst
>(DepInst
)) {
1196 // If this is a clobber and L is the first instruction in its block, then
1197 // we have the first instruction in the entry block.
1198 // Can't forward from non-atomic to atomic without violating memory model.
1199 if (DepLoad
!= Load
&& Address
&&
1200 Load
->isAtomic() <= DepLoad
->isAtomic()) {
1201 Type
*LoadType
= Load
->getType();
1204 // If MD reported clobber, check it was nested.
1205 if (DepInfo
.isClobber() &&
1206 canCoerceMustAliasedValueToLoad(DepLoad
, LoadType
, DL
)) {
1207 const auto ClobberOff
= MD
->getClobberOffset(DepLoad
);
1208 // GVN has no deal with a negative offset.
1209 Offset
= (ClobberOff
== std::nullopt
|| *ClobberOff
< 0)
1215 analyzeLoadFromClobberingLoad(LoadType
, Address
, DepLoad
, DL
);
1217 return AvailableValue::getLoad(DepLoad
, Offset
);
1221 // If the clobbering value is a memset/memcpy/memmove, see if we can
1222 // forward a value on from it.
1223 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(DepInst
)) {
1224 if (Address
&& !Load
->isAtomic()) {
1225 int Offset
= analyzeLoadFromClobberingMemInst(Load
->getType(), Address
,
1228 return AvailableValue::getMI(DepMI
, Offset
);
1232 // Nothing known about this clobber, have to be conservative
1234 // fast print dep, using operator<< on instruction is too slow.
1235 dbgs() << "GVN: load "; Load
->printAsOperand(dbgs());
1236 dbgs() << " is clobbered by " << *DepInst
<< '\n';);
1237 if (ORE
->allowExtraAnalysis(DEBUG_TYPE
))
1238 reportMayClobberedLoad(Load
, DepInfo
, DT
, ORE
);
1240 return std::nullopt
;
1242 assert(DepInfo
.isDef() && "follows from above");
1244 // Loading the alloca -> undef.
1245 // Loading immediately after lifetime begin -> undef.
1246 if (isa
<AllocaInst
>(DepInst
) || isLifetimeStart(DepInst
))
1247 return AvailableValue::get(UndefValue::get(Load
->getType()));
1249 if (Constant
*InitVal
=
1250 getInitialValueOfAllocation(DepInst
, TLI
, Load
->getType()))
1251 return AvailableValue::get(InitVal
);
1253 if (StoreInst
*S
= dyn_cast
<StoreInst
>(DepInst
)) {
1254 // Reject loads and stores that are to the same address but are of
1255 // different types if we have to. If the stored value is convertable to
1256 // the loaded value, we can reuse it.
1257 if (!canCoerceMustAliasedValueToLoad(S
->getValueOperand(), Load
->getType(),
1259 return std::nullopt
;
1261 // Can't forward from non-atomic to atomic without violating memory model.
1262 if (S
->isAtomic() < Load
->isAtomic())
1263 return std::nullopt
;
1265 return AvailableValue::get(S
->getValueOperand());
1268 if (LoadInst
*LD
= dyn_cast
<LoadInst
>(DepInst
)) {
1269 // If the types mismatch and we can't handle it, reject reuse of the load.
1270 // If the stored value is larger or equal to the loaded value, we can reuse
1272 if (!canCoerceMustAliasedValueToLoad(LD
, Load
->getType(), DL
))
1273 return std::nullopt
;
1275 // Can't forward from non-atomic to atomic without violating memory model.
1276 if (LD
->isAtomic() < Load
->isAtomic())
1277 return std::nullopt
;
1279 return AvailableValue::getLoad(LD
);
1282 // Check if load with Addr dependent from select can be converted to select
1283 // between load values. There must be no instructions between the found
1284 // loads and DepInst that may clobber the loads.
1285 if (auto *Sel
= dyn_cast
<SelectInst
>(DepInst
)) {
1286 assert(Sel
->getType() == Load
->getPointerOperandType());
1287 auto Loc
= MemoryLocation::get(Load
);
1289 findDominatingValue(Loc
.getWithNewPtr(Sel
->getTrueValue()),
1290 Load
->getType(), DepInst
, getAliasAnalysis());
1292 return std::nullopt
;
1294 findDominatingValue(Loc
.getWithNewPtr(Sel
->getFalseValue()),
1295 Load
->getType(), DepInst
, getAliasAnalysis());
1297 return std::nullopt
;
1298 return AvailableValue::getSelect(Sel
, V1
, V2
);
1301 // Unknown def - must be conservative
1303 // fast print dep, using operator<< on instruction is too slow.
1304 dbgs() << "GVN: load "; Load
->printAsOperand(dbgs());
1305 dbgs() << " has unknown def " << *DepInst
<< '\n';);
1306 return std::nullopt
;
1309 void GVNPass::AnalyzeLoadAvailability(LoadInst
*Load
, LoadDepVect
&Deps
,
1310 AvailValInBlkVect
&ValuesPerBlock
,
1311 UnavailBlkVect
&UnavailableBlocks
) {
1312 // Filter out useless results (non-locals, etc). Keep track of the blocks
1313 // where we have a value available in repl, also keep track of whether we see
1314 // dependencies that produce an unknown value for the load (such as a call
1315 // that could potentially clobber the load).
1316 for (const auto &Dep
: Deps
) {
1317 BasicBlock
*DepBB
= Dep
.getBB();
1318 MemDepResult DepInfo
= Dep
.getResult();
1320 if (DeadBlocks
.count(DepBB
)) {
1321 // Dead dependent mem-op disguise as a load evaluating the same value
1322 // as the load in question.
1323 ValuesPerBlock
.push_back(AvailableValueInBlock::getUndef(DepBB
));
1327 if (!DepInfo
.isLocal()) {
1328 UnavailableBlocks
.push_back(DepBB
);
1332 // The address being loaded in this non-local block may not be the same as
1333 // the pointer operand of the load if PHI translation occurs. Make sure
1334 // to consider the right address.
1335 if (auto AV
= AnalyzeLoadAvailability(Load
, DepInfo
, Dep
.getAddress())) {
1336 // subtlety: because we know this was a non-local dependency, we know
1337 // it's safe to materialize anywhere between the instruction within
1338 // DepInfo and the end of it's block.
1339 ValuesPerBlock
.push_back(
1340 AvailableValueInBlock::get(DepBB
, std::move(*AV
)));
1342 UnavailableBlocks
.push_back(DepBB
);
1346 assert(Deps
.size() == ValuesPerBlock
.size() + UnavailableBlocks
.size() &&
1347 "post condition violation");
1350 /// Given the following code, v1 is partially available on some edges, but not
1351 /// available on the edge from PredBB. This function tries to find if there is
1352 /// another identical load in the other successor of PredBB.
1363 /// br %cond, label %LoadBB, label %SuccBB
1369 LoadInst
*GVNPass::findLoadToHoistIntoPred(BasicBlock
*Pred
, BasicBlock
*LoadBB
,
1371 // For simplicity we handle a Pred has 2 successors only.
1372 auto *Term
= Pred
->getTerminator();
1373 if (Term
->getNumSuccessors() != 2 || Term
->isSpecialTerminator())
1375 auto *SuccBB
= Term
->getSuccessor(0);
1376 if (SuccBB
== LoadBB
)
1377 SuccBB
= Term
->getSuccessor(1);
1378 if (!SuccBB
->getSinglePredecessor())
1381 unsigned int NumInsts
= MaxNumInsnsPerBlock
;
1382 for (Instruction
&Inst
: *SuccBB
) {
1383 if (Inst
.isDebugOrPseudoInst())
1385 if (--NumInsts
== 0)
1388 if (!Inst
.isIdenticalTo(Load
))
1391 MemDepResult Dep
= MD
->getDependency(&Inst
);
1392 // If an identical load doesn't depends on any local instructions, it can
1393 // be safely moved to PredBB.
1394 // Also check for the implicit control flow instructions. See the comments
1395 // in PerformLoadPRE for details.
1396 if (Dep
.isNonLocal() && !ICF
->isDominatedByICFIFromSameBlock(&Inst
))
1397 return cast
<LoadInst
>(&Inst
);
1399 // Otherwise there is something in the same BB clobbers the memory, we can't
1400 // move this and later load to PredBB.
1407 void GVNPass::eliminatePartiallyRedundantLoad(
1408 LoadInst
*Load
, AvailValInBlkVect
&ValuesPerBlock
,
1409 MapVector
<BasicBlock
*, Value
*> &AvailableLoads
,
1410 MapVector
<BasicBlock
*, LoadInst
*> *CriticalEdgePredAndLoad
) {
1411 for (const auto &AvailableLoad
: AvailableLoads
) {
1412 BasicBlock
*UnavailableBlock
= AvailableLoad
.first
;
1413 Value
*LoadPtr
= AvailableLoad
.second
;
1416 new LoadInst(Load
->getType(), LoadPtr
, Load
->getName() + ".pre",
1417 Load
->isVolatile(), Load
->getAlign(), Load
->getOrdering(),
1418 Load
->getSyncScopeID(), UnavailableBlock
->getTerminator());
1419 NewLoad
->setDebugLoc(Load
->getDebugLoc());
1421 auto *NewAccess
= MSSAU
->createMemoryAccessInBB(
1422 NewLoad
, nullptr, NewLoad
->getParent(), MemorySSA::BeforeTerminator
);
1423 if (auto *NewDef
= dyn_cast
<MemoryDef
>(NewAccess
))
1424 MSSAU
->insertDef(NewDef
, /*RenameUses=*/true);
1426 MSSAU
->insertUse(cast
<MemoryUse
>(NewAccess
), /*RenameUses=*/true);
1429 // Transfer the old load's AA tags to the new load.
1430 AAMDNodes Tags
= Load
->getAAMetadata();
1432 NewLoad
->setAAMetadata(Tags
);
1434 if (auto *MD
= Load
->getMetadata(LLVMContext::MD_invariant_load
))
1435 NewLoad
->setMetadata(LLVMContext::MD_invariant_load
, MD
);
1436 if (auto *InvGroupMD
= Load
->getMetadata(LLVMContext::MD_invariant_group
))
1437 NewLoad
->setMetadata(LLVMContext::MD_invariant_group
, InvGroupMD
);
1438 if (auto *RangeMD
= Load
->getMetadata(LLVMContext::MD_range
))
1439 NewLoad
->setMetadata(LLVMContext::MD_range
, RangeMD
);
1440 if (auto *AccessMD
= Load
->getMetadata(LLVMContext::MD_access_group
))
1441 if (LI
->getLoopFor(Load
->getParent()) == LI
->getLoopFor(UnavailableBlock
))
1442 NewLoad
->setMetadata(LLVMContext::MD_access_group
, AccessMD
);
1444 // We do not propagate the old load's debug location, because the new
1445 // load now lives in a different BB, and we want to avoid a jumpy line
1447 // FIXME: How do we retain source locations without causing poor debugging
1450 // Add the newly created load.
1451 ValuesPerBlock
.push_back(
1452 AvailableValueInBlock::get(UnavailableBlock
, NewLoad
));
1453 MD
->invalidateCachedPointerInfo(LoadPtr
);
1454 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad
<< '\n');
1456 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1457 // load instruction with the new created load instruction.
1458 if (CriticalEdgePredAndLoad
) {
1459 auto I
= CriticalEdgePredAndLoad
->find(UnavailableBlock
);
1460 if (I
!= CriticalEdgePredAndLoad
->end()) {
1461 ++NumPRELoadMoved2CEPred
;
1462 ICF
->insertInstructionTo(NewLoad
, UnavailableBlock
);
1463 LoadInst
*OldLoad
= I
->second
;
1464 combineMetadataForCSE(NewLoad
, OldLoad
, false);
1465 OldLoad
->replaceAllUsesWith(NewLoad
);
1466 replaceValuesPerBlockEntry(ValuesPerBlock
, OldLoad
, NewLoad
);
1467 if (uint32_t ValNo
= VN
.lookup(OldLoad
, false))
1468 removeFromLeaderTable(ValNo
, OldLoad
, OldLoad
->getParent());
1470 removeInstruction(OldLoad
);
1475 // Perform PHI construction.
1476 Value
*V
= ConstructSSAForLoadSet(Load
, ValuesPerBlock
, *this);
1477 // ConstructSSAForLoadSet is responsible for combining metadata.
1478 ICF
->removeUsersOf(Load
);
1479 Load
->replaceAllUsesWith(V
);
1480 if (isa
<PHINode
>(V
))
1482 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1483 I
->setDebugLoc(Load
->getDebugLoc());
1484 if (V
->getType()->isPtrOrPtrVectorTy())
1485 MD
->invalidateCachedPointerInfo(V
);
1486 markInstructionForDeletion(Load
);
1488 return OptimizationRemark(DEBUG_TYPE
, "LoadPRE", Load
)
1489 << "load eliminated by PRE";
1493 bool GVNPass::PerformLoadPRE(LoadInst
*Load
, AvailValInBlkVect
&ValuesPerBlock
,
1494 UnavailBlkVect
&UnavailableBlocks
) {
1495 // Okay, we have *some* definitions of the value. This means that the value
1496 // is available in some of our (transitive) predecessors. Lets think about
1497 // doing PRE of this load. This will involve inserting a new load into the
1498 // predecessor when it's not available. We could do this in general, but
1499 // prefer to not increase code size. As such, we only do this when we know
1500 // that we only have to insert *one* load (which means we're basically moving
1501 // the load, not inserting a new one).
1503 SmallPtrSet
<BasicBlock
*, 4> Blockers(UnavailableBlocks
.begin(),
1504 UnavailableBlocks
.end());
1506 // Let's find the first basic block with more than one predecessor. Walk
1507 // backwards through predecessors if needed.
1508 BasicBlock
*LoadBB
= Load
->getParent();
1509 BasicBlock
*TmpBB
= LoadBB
;
1511 // Check that there is no implicit control flow instructions above our load in
1512 // its block. If there is an instruction that doesn't always pass the
1513 // execution to the following instruction, then moving through it may become
1514 // invalid. For example:
1519 // guard(0 <= index && index < LEN);
1522 // It is illegal to move the array access to any point above the guard,
1523 // because if the index is out of bounds we should deoptimize rather than
1524 // access the array.
1525 // Check that there is no guard in this block above our instruction.
1526 bool MustEnsureSafetyOfSpeculativeExecution
=
1527 ICF
->isDominatedByICFIFromSameBlock(Load
);
1529 while (TmpBB
->getSinglePredecessor()) {
1530 TmpBB
= TmpBB
->getSinglePredecessor();
1531 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1533 if (Blockers
.count(TmpBB
))
1536 // If any of these blocks has more than one successor (i.e. if the edge we
1537 // just traversed was critical), then there are other paths through this
1538 // block along which the load may not be anticipated. Hoisting the load
1539 // above this block would be adding the load to execution paths along
1540 // which it was not previously executed.
1541 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1544 // Check that there is no implicit control flow in a block above.
1545 MustEnsureSafetyOfSpeculativeExecution
=
1546 MustEnsureSafetyOfSpeculativeExecution
|| ICF
->hasICF(TmpBB
);
1552 // Check to see how many predecessors have the loaded value fully
1554 MapVector
<BasicBlock
*, Value
*> PredLoads
;
1555 DenseMap
<BasicBlock
*, AvailabilityState
> FullyAvailableBlocks
;
1556 for (const AvailableValueInBlock
&AV
: ValuesPerBlock
)
1557 FullyAvailableBlocks
[AV
.BB
] = AvailabilityState::Available
;
1558 for (BasicBlock
*UnavailableBB
: UnavailableBlocks
)
1559 FullyAvailableBlocks
[UnavailableBB
] = AvailabilityState::Unavailable
;
1561 // The edge from Pred to LoadBB is a critical edge will be splitted.
1562 SmallVector
<BasicBlock
*, 4> CriticalEdgePredSplit
;
1563 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1564 // contains a load can be moved to Pred. This data structure maps the Pred to
1565 // the movable load.
1566 MapVector
<BasicBlock
*, LoadInst
*> CriticalEdgePredAndLoad
;
1567 for (BasicBlock
*Pred
: predecessors(LoadBB
)) {
1568 // If any predecessor block is an EH pad that does not allow non-PHI
1569 // instructions before the terminator, we can't PRE the load.
1570 if (Pred
->getTerminator()->isEHPad()) {
1572 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1573 << Pred
->getName() << "': " << *Load
<< '\n');
1577 if (IsValueFullyAvailableInBlock(Pred
, FullyAvailableBlocks
)) {
1581 if (Pred
->getTerminator()->getNumSuccessors() != 1) {
1582 if (isa
<IndirectBrInst
>(Pred
->getTerminator())) {
1584 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1585 << Pred
->getName() << "': " << *Load
<< '\n');
1589 if (LoadBB
->isEHPad()) {
1591 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1592 << Pred
->getName() << "': " << *Load
<< '\n');
1596 // Do not split backedge as it will break the canonical loop form.
1597 if (!isLoadPRESplitBackedgeEnabled())
1598 if (DT
->dominates(LoadBB
, Pred
)) {
1601 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1602 << Pred
->getName() << "': " << *Load
<< '\n');
1606 if (LoadInst
*LI
= findLoadToHoistIntoPred(Pred
, LoadBB
, Load
))
1607 CriticalEdgePredAndLoad
[Pred
] = LI
;
1609 CriticalEdgePredSplit
.push_back(Pred
);
1611 // Only add the predecessors that will not be split for now.
1612 PredLoads
[Pred
] = nullptr;
1616 // Decide whether PRE is profitable for this load.
1617 unsigned NumInsertPreds
= PredLoads
.size() + CriticalEdgePredSplit
.size();
1618 unsigned NumUnavailablePreds
= NumInsertPreds
+
1619 CriticalEdgePredAndLoad
.size();
1620 assert(NumUnavailablePreds
!= 0 &&
1621 "Fully available value should already be eliminated!");
1622 (void)NumUnavailablePreds
;
1624 // If we need to insert new load in multiple predecessors, reject it.
1625 // FIXME: If we could restructure the CFG, we could make a common pred with
1626 // all the preds that don't have an available Load and insert a new load into
1628 if (NumInsertPreds
> 1)
1631 // Now we know where we will insert load. We must ensure that it is safe
1632 // to speculatively execute the load at that points.
1633 if (MustEnsureSafetyOfSpeculativeExecution
) {
1634 if (CriticalEdgePredSplit
.size())
1635 if (!isSafeToSpeculativelyExecute(Load
, LoadBB
->getFirstNonPHI(), AC
, DT
))
1637 for (auto &PL
: PredLoads
)
1638 if (!isSafeToSpeculativelyExecute(Load
, PL
.first
->getTerminator(), AC
,
1641 for (auto &CEP
: CriticalEdgePredAndLoad
)
1642 if (!isSafeToSpeculativelyExecute(Load
, CEP
.first
->getTerminator(), AC
,
1647 // Split critical edges, and update the unavailable predecessors accordingly.
1648 for (BasicBlock
*OrigPred
: CriticalEdgePredSplit
) {
1649 BasicBlock
*NewPred
= splitCriticalEdges(OrigPred
, LoadBB
);
1650 assert(!PredLoads
.count(OrigPred
) && "Split edges shouldn't be in map!");
1651 PredLoads
[NewPred
] = nullptr;
1652 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred
->getName() << "->"
1653 << LoadBB
->getName() << '\n');
1656 for (auto &CEP
: CriticalEdgePredAndLoad
)
1657 PredLoads
[CEP
.first
] = nullptr;
1659 // Check if the load can safely be moved to all the unavailable predecessors.
1660 bool CanDoPRE
= true;
1661 const DataLayout
&DL
= Load
->getModule()->getDataLayout();
1662 SmallVector
<Instruction
*, 8> NewInsts
;
1663 for (auto &PredLoad
: PredLoads
) {
1664 BasicBlock
*UnavailablePred
= PredLoad
.first
;
1666 // Do PHI translation to get its value in the predecessor if necessary. The
1667 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1668 // We do the translation for each edge we skipped by going from Load's block
1669 // to LoadBB, otherwise we might miss pieces needing translation.
1671 // If all preds have a single successor, then we know it is safe to insert
1672 // the load on the pred (?!?), so we can insert code to materialize the
1673 // pointer if it is not available.
1674 Value
*LoadPtr
= Load
->getPointerOperand();
1675 BasicBlock
*Cur
= Load
->getParent();
1676 while (Cur
!= LoadBB
) {
1677 PHITransAddr
Address(LoadPtr
, DL
, AC
);
1678 LoadPtr
= Address
.translateWithInsertion(Cur
, Cur
->getSinglePredecessor(),
1684 Cur
= Cur
->getSinglePredecessor();
1688 PHITransAddr
Address(LoadPtr
, DL
, AC
);
1689 LoadPtr
= Address
.translateWithInsertion(LoadBB
, UnavailablePred
, *DT
,
1692 // If we couldn't find or insert a computation of this phi translated value,
1695 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1696 << *Load
->getPointerOperand() << "\n");
1701 PredLoad
.second
= LoadPtr
;
1705 while (!NewInsts
.empty()) {
1706 // Erase instructions generated by the failed PHI translation before
1707 // trying to number them. PHI translation might insert instructions
1708 // in basic blocks other than the current one, and we delete them
1709 // directly, as markInstructionForDeletion only allows removing from the
1710 // current basic block.
1711 NewInsts
.pop_back_val()->eraseFromParent();
1713 // HINT: Don't revert the edge-splitting as following transformation may
1714 // also need to split these critical edges.
1715 return !CriticalEdgePredSplit
.empty();
1718 // Okay, we can eliminate this load by inserting a reload in the predecessor
1719 // and using PHI construction to get the value in the other predecessors, do
1721 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load
<< '\n');
1722 LLVM_DEBUG(if (!NewInsts
.empty()) dbgs() << "INSERTED " << NewInsts
.size()
1723 << " INSTS: " << *NewInsts
.back()
1726 // Assign value numbers to the new instructions.
1727 for (Instruction
*I
: NewInsts
) {
1728 // Instructions that have been inserted in predecessor(s) to materialize
1729 // the load address do not retain their original debug locations. Doing
1730 // so could lead to confusing (but correct) source attributions.
1731 I
->updateLocationAfterHoist();
1733 // FIXME: We really _ought_ to insert these value numbers into their
1734 // parent's availability map. However, in doing so, we risk getting into
1735 // ordering issues. If a block hasn't been processed yet, we would be
1736 // marking a value as AVAIL-IN, which isn't what we intend.
1740 eliminatePartiallyRedundantLoad(Load
, ValuesPerBlock
, PredLoads
,
1741 &CriticalEdgePredAndLoad
);
1746 bool GVNPass::performLoopLoadPRE(LoadInst
*Load
,
1747 AvailValInBlkVect
&ValuesPerBlock
,
1748 UnavailBlkVect
&UnavailableBlocks
) {
1749 const Loop
*L
= LI
->getLoopFor(Load
->getParent());
1750 // TODO: Generalize to other loop blocks that dominate the latch.
1751 if (!L
|| L
->getHeader() != Load
->getParent())
1754 BasicBlock
*Preheader
= L
->getLoopPreheader();
1755 BasicBlock
*Latch
= L
->getLoopLatch();
1756 if (!Preheader
|| !Latch
)
1759 Value
*LoadPtr
= Load
->getPointerOperand();
1760 // Must be available in preheader.
1761 if (!L
->isLoopInvariant(LoadPtr
))
1764 // We plan to hoist the load to preheader without introducing a new fault.
1765 // In order to do it, we need to prove that we cannot side-exit the loop
1766 // once loop header is first entered before execution of the load.
1767 if (ICF
->isDominatedByICFIFromSameBlock(Load
))
1770 BasicBlock
*LoopBlock
= nullptr;
1771 for (auto *Blocker
: UnavailableBlocks
) {
1772 // Blockers from outside the loop are handled in preheader.
1773 if (!L
->contains(Blocker
))
1776 // Only allow one loop block. Loop header is not less frequently executed
1777 // than each loop block, and likely it is much more frequently executed. But
1778 // in case of multiple loop blocks, we need extra information (such as block
1779 // frequency info) to understand whether it is profitable to PRE into
1780 // multiple loop blocks.
1784 // Do not sink into inner loops. This may be non-profitable.
1785 if (L
!= LI
->getLoopFor(Blocker
))
1788 // Blocks that dominate the latch execute on every single iteration, maybe
1789 // except the last one. So PREing into these blocks doesn't make much sense
1790 // in most cases. But the blocks that do not necessarily execute on each
1791 // iteration are sometimes much colder than the header, and this is when
1792 // PRE is potentially profitable.
1793 if (DT
->dominates(Blocker
, Latch
))
1796 // Make sure that the terminator itself doesn't clobber.
1797 if (Blocker
->getTerminator()->mayWriteToMemory())
1800 LoopBlock
= Blocker
;
1806 // Make sure the memory at this pointer cannot be freed, therefore we can
1807 // safely reload from it after clobber.
1808 if (LoadPtr
->canBeFreed())
1811 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1812 MapVector
<BasicBlock
*, Value
*> AvailableLoads
;
1813 AvailableLoads
[LoopBlock
] = LoadPtr
;
1814 AvailableLoads
[Preheader
] = LoadPtr
;
1816 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load
<< '\n');
1817 eliminatePartiallyRedundantLoad(Load
, ValuesPerBlock
, AvailableLoads
,
1818 /*CriticalEdgePredAndLoad*/ nullptr);
1823 static void reportLoadElim(LoadInst
*Load
, Value
*AvailableValue
,
1824 OptimizationRemarkEmitter
*ORE
) {
1825 using namespace ore
;
1828 return OptimizationRemark(DEBUG_TYPE
, "LoadElim", Load
)
1829 << "load of type " << NV("Type", Load
->getType()) << " eliminated"
1830 << setExtraArgs() << " in favor of "
1831 << NV("InfavorOfValue", AvailableValue
);
1835 /// Attempt to eliminate a load whose dependencies are
1836 /// non-local by performing PHI construction.
1837 bool GVNPass::processNonLocalLoad(LoadInst
*Load
) {
1838 // non-local speculations are not allowed under asan.
1839 if (Load
->getParent()->getParent()->hasFnAttribute(
1840 Attribute::SanitizeAddress
) ||
1841 Load
->getParent()->getParent()->hasFnAttribute(
1842 Attribute::SanitizeHWAddress
))
1845 // Step 1: Find the non-local dependencies of the load.
1847 MD
->getNonLocalPointerDependency(Load
, Deps
);
1849 // If we had to process more than one hundred blocks to find the
1850 // dependencies, this load isn't worth worrying about. Optimizing
1851 // it will be too expensive.
1852 unsigned NumDeps
= Deps
.size();
1853 if (NumDeps
> MaxNumDeps
)
1856 // If we had a phi translation failure, we'll have a single entry which is a
1857 // clobber in the current block. Reject this early.
1859 !Deps
[0].getResult().isDef() && !Deps
[0].getResult().isClobber()) {
1860 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load
->printAsOperand(dbgs());
1861 dbgs() << " has unknown dependencies\n";);
1865 bool Changed
= false;
1866 // If this load follows a GEP, see if we can PRE the indices before analyzing.
1867 if (GetElementPtrInst
*GEP
=
1868 dyn_cast
<GetElementPtrInst
>(Load
->getOperand(0))) {
1869 for (Use
&U
: GEP
->indices())
1870 if (Instruction
*I
= dyn_cast
<Instruction
>(U
.get()))
1871 Changed
|= performScalarPRE(I
);
1874 // Step 2: Analyze the availability of the load
1875 AvailValInBlkVect ValuesPerBlock
;
1876 UnavailBlkVect UnavailableBlocks
;
1877 AnalyzeLoadAvailability(Load
, Deps
, ValuesPerBlock
, UnavailableBlocks
);
1879 // If we have no predecessors that produce a known value for this load, exit
1881 if (ValuesPerBlock
.empty())
1884 // Step 3: Eliminate fully redundancy.
1886 // If all of the instructions we depend on produce a known value for this
1887 // load, then it is fully redundant and we can use PHI insertion to compute
1888 // its value. Insert PHIs and remove the fully redundant value now.
1889 if (UnavailableBlocks
.empty()) {
1890 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load
<< '\n');
1892 // Perform PHI construction.
1893 Value
*V
= ConstructSSAForLoadSet(Load
, ValuesPerBlock
, *this);
1894 // ConstructSSAForLoadSet is responsible for combining metadata.
1895 ICF
->removeUsersOf(Load
);
1896 Load
->replaceAllUsesWith(V
);
1898 if (isa
<PHINode
>(V
))
1900 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1901 // If instruction I has debug info, then we should not update it.
1902 // Also, if I has a null DebugLoc, then it is still potentially incorrect
1903 // to propagate Load's DebugLoc because Load may not post-dominate I.
1904 if (Load
->getDebugLoc() && Load
->getParent() == I
->getParent())
1905 I
->setDebugLoc(Load
->getDebugLoc());
1906 if (V
->getType()->isPtrOrPtrVectorTy())
1907 MD
->invalidateCachedPointerInfo(V
);
1908 markInstructionForDeletion(Load
);
1910 reportLoadElim(Load
, V
, ORE
);
1914 // Step 4: Eliminate partial redundancy.
1915 if (!isPREEnabled() || !isLoadPREEnabled())
1917 if (!isLoadInLoopPREEnabled() && LI
->getLoopFor(Load
->getParent()))
1920 if (performLoopLoadPRE(Load
, ValuesPerBlock
, UnavailableBlocks
) ||
1921 PerformLoadPRE(Load
, ValuesPerBlock
, UnavailableBlocks
))
1927 static bool impliesEquivalanceIfTrue(CmpInst
* Cmp
) {
1928 if (Cmp
->getPredicate() == CmpInst::Predicate::ICMP_EQ
)
1931 // Floating point comparisons can be equal, but not equivalent. Cases:
1932 // NaNs for unordered operators
1933 // +0.0 vs 0.0 for all operators
1934 if (Cmp
->getPredicate() == CmpInst::Predicate::FCMP_OEQ
||
1935 (Cmp
->getPredicate() == CmpInst::Predicate::FCMP_UEQ
&&
1936 Cmp
->getFastMathFlags().noNaNs())) {
1937 Value
*LHS
= Cmp
->getOperand(0);
1938 Value
*RHS
= Cmp
->getOperand(1);
1939 // If we can prove either side non-zero, then equality must imply
1941 // FIXME: We should do this optimization if 'no signed zeros' is
1942 // applicable via an instruction-level fast-math-flag or some other
1943 // indicator that relaxed FP semantics are being used.
1944 if (isa
<ConstantFP
>(LHS
) && !cast
<ConstantFP
>(LHS
)->isZero())
1946 if (isa
<ConstantFP
>(RHS
) && !cast
<ConstantFP
>(RHS
)->isZero())
1948 // TODO: Handle vector floating point constants
1953 static bool impliesEquivalanceIfFalse(CmpInst
* Cmp
) {
1954 if (Cmp
->getPredicate() == CmpInst::Predicate::ICMP_NE
)
1957 // Floating point comparisons can be equal, but not equivelent. Cases:
1958 // NaNs for unordered operators
1959 // +0.0 vs 0.0 for all operators
1960 if ((Cmp
->getPredicate() == CmpInst::Predicate::FCMP_ONE
&&
1961 Cmp
->getFastMathFlags().noNaNs()) ||
1962 Cmp
->getPredicate() == CmpInst::Predicate::FCMP_UNE
) {
1963 Value
*LHS
= Cmp
->getOperand(0);
1964 Value
*RHS
= Cmp
->getOperand(1);
1965 // If we can prove either side non-zero, then equality must imply
1967 // FIXME: We should do this optimization if 'no signed zeros' is
1968 // applicable via an instruction-level fast-math-flag or some other
1969 // indicator that relaxed FP semantics are being used.
1970 if (isa
<ConstantFP
>(LHS
) && !cast
<ConstantFP
>(LHS
)->isZero())
1972 if (isa
<ConstantFP
>(RHS
) && !cast
<ConstantFP
>(RHS
)->isZero())
1974 // TODO: Handle vector floating point constants
1980 static bool hasUsersIn(Value
*V
, BasicBlock
*BB
) {
1981 return llvm::any_of(V
->users(), [BB
](User
*U
) {
1982 auto *I
= dyn_cast
<Instruction
>(U
);
1983 return I
&& I
->getParent() == BB
;
1987 bool GVNPass::processAssumeIntrinsic(AssumeInst
*IntrinsicI
) {
1988 Value
*V
= IntrinsicI
->getArgOperand(0);
1990 if (ConstantInt
*Cond
= dyn_cast
<ConstantInt
>(V
)) {
1991 if (Cond
->isZero()) {
1992 Type
*Int8Ty
= Type::getInt8Ty(V
->getContext());
1993 Type
*PtrTy
= PointerType::get(V
->getContext(), 0);
1994 // Insert a new store to null instruction before the load to indicate that
1995 // this code is not reachable. FIXME: We could insert unreachable
1996 // instruction directly because we can modify the CFG.
1997 auto *NewS
= new StoreInst(PoisonValue::get(Int8Ty
),
1998 Constant::getNullValue(PtrTy
), IntrinsicI
);
2000 const MemoryUseOrDef
*FirstNonDom
= nullptr;
2002 MSSAU
->getMemorySSA()->getBlockAccesses(IntrinsicI
->getParent());
2004 // If there are accesses in the current basic block, find the first one
2005 // that does not come before NewS. The new memory access is inserted
2006 // after the found access or before the terminator if no such access is
2009 for (const auto &Acc
: *AL
) {
2010 if (auto *Current
= dyn_cast
<MemoryUseOrDef
>(&Acc
))
2011 if (!Current
->getMemoryInst()->comesBefore(NewS
)) {
2012 FirstNonDom
= Current
;
2019 FirstNonDom
? MSSAU
->createMemoryAccessBefore(
2021 const_cast<MemoryUseOrDef
*>(FirstNonDom
))
2022 : MSSAU
->createMemoryAccessInBB(
2024 NewS
->getParent(), MemorySSA::BeforeTerminator
);
2026 MSSAU
->insertDef(cast
<MemoryDef
>(NewDef
), /*RenameUses=*/false);
2029 if (isAssumeWithEmptyBundle(*IntrinsicI
)) {
2030 markInstructionForDeletion(IntrinsicI
);
2036 if (isa
<Constant
>(V
)) {
2037 // If it's not false, and constant, it must evaluate to true. This means our
2038 // assume is assume(true), and thus, pointless, and we don't want to do
2039 // anything more here.
2043 Constant
*True
= ConstantInt::getTrue(V
->getContext());
2044 bool Changed
= false;
2046 for (BasicBlock
*Successor
: successors(IntrinsicI
->getParent())) {
2047 BasicBlockEdge
Edge(IntrinsicI
->getParent(), Successor
);
2049 // This property is only true in dominated successors, propagateEquality
2050 // will check dominance for us.
2051 Changed
|= propagateEquality(V
, True
, Edge
, false);
2054 // We can replace assume value with true, which covers cases like this:
2055 // call void @llvm.assume(i1 %cmp)
2056 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
2057 ReplaceOperandsWithMap
[V
] = True
;
2059 // Similarly, after assume(!NotV) we know that NotV == false.
2061 if (match(V
, m_Not(m_Value(NotV
))))
2062 ReplaceOperandsWithMap
[NotV
] = ConstantInt::getFalse(V
->getContext());
2064 // If we find an equality fact, canonicalize all dominated uses in this block
2065 // to one of the two values. We heuristically choice the "oldest" of the
2066 // two where age is determined by value number. (Note that propagateEquality
2067 // above handles the cross block case.)
2069 // Key case to cover are:
2071 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
2072 // call void @llvm.assume(i1 %cmp)
2073 // ret float %0 ; will change it to ret float 3.000000e+00
2075 // %load = load float, float* %addr
2076 // %cmp = fcmp oeq float %load, %0
2077 // call void @llvm.assume(i1 %cmp)
2078 // ret float %load ; will change it to ret float %0
2079 if (auto *CmpI
= dyn_cast
<CmpInst
>(V
)) {
2080 if (impliesEquivalanceIfTrue(CmpI
)) {
2081 Value
*CmpLHS
= CmpI
->getOperand(0);
2082 Value
*CmpRHS
= CmpI
->getOperand(1);
2083 // Heuristically pick the better replacement -- the choice of heuristic
2084 // isn't terribly important here, but the fact we canonicalize on some
2085 // replacement is for exposing other simplifications.
2086 // TODO: pull this out as a helper function and reuse w/existing
2087 // (slightly different) logic.
2088 if (isa
<Constant
>(CmpLHS
) && !isa
<Constant
>(CmpRHS
))
2089 std::swap(CmpLHS
, CmpRHS
);
2090 if (!isa
<Instruction
>(CmpLHS
) && isa
<Instruction
>(CmpRHS
))
2091 std::swap(CmpLHS
, CmpRHS
);
2092 if ((isa
<Argument
>(CmpLHS
) && isa
<Argument
>(CmpRHS
)) ||
2093 (isa
<Instruction
>(CmpLHS
) && isa
<Instruction
>(CmpRHS
))) {
2094 // Move the 'oldest' value to the right-hand side, using the value
2095 // number as a proxy for age.
2096 uint32_t LVN
= VN
.lookupOrAdd(CmpLHS
);
2097 uint32_t RVN
= VN
.lookupOrAdd(CmpRHS
);
2099 std::swap(CmpLHS
, CmpRHS
);
2102 // Handle degenerate case where we either haven't pruned a dead path or a
2103 // removed a trivial assume yet.
2104 if (isa
<Constant
>(CmpLHS
) && isa
<Constant
>(CmpRHS
))
2107 LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
2108 << *CmpLHS
<< " with "
2109 << *CmpRHS
<< " in block "
2110 << IntrinsicI
->getParent()->getName() << "\n");
2113 // Setup the replacement map - this handles uses within the same block
2114 if (hasUsersIn(CmpLHS
, IntrinsicI
->getParent()))
2115 ReplaceOperandsWithMap
[CmpLHS
] = CmpRHS
;
2117 // NOTE: The non-block local cases are handled by the call to
2118 // propagateEquality above; this block is just about handling the block
2119 // local cases. TODO: There's a bunch of logic in propagateEqualiy which
2120 // isn't duplicated for the block local case, can we share it somehow?
2126 static void patchAndReplaceAllUsesWith(Instruction
*I
, Value
*Repl
) {
2127 patchReplacementInstruction(I
, Repl
);
2128 I
->replaceAllUsesWith(Repl
);
2131 /// Attempt to eliminate a load, first by eliminating it
2132 /// locally, and then attempting non-local elimination if that fails.
2133 bool GVNPass::processLoad(LoadInst
*L
) {
2137 // This code hasn't been audited for ordered or volatile memory access
2138 if (!L
->isUnordered())
2141 if (L
->use_empty()) {
2142 markInstructionForDeletion(L
);
2146 // ... to a pointer that has been loaded from before...
2147 MemDepResult Dep
= MD
->getDependency(L
);
2149 // If it is defined in another block, try harder.
2150 if (Dep
.isNonLocal())
2151 return processNonLocalLoad(L
);
2153 // Only handle the local case below
2154 if (!Dep
.isLocal()) {
2155 // This might be a NonFuncLocal or an Unknown
2157 // fast print dep, using operator<< on instruction is too slow.
2158 dbgs() << "GVN: load "; L
->printAsOperand(dbgs());
2159 dbgs() << " has unknown dependence\n";);
2163 auto AV
= AnalyzeLoadAvailability(L
, Dep
, L
->getPointerOperand());
2167 Value
*AvailableValue
= AV
->MaterializeAdjustedValue(L
, L
, *this);
2169 // MaterializeAdjustedValue is responsible for combining metadata.
2170 ICF
->removeUsersOf(L
);
2171 L
->replaceAllUsesWith(AvailableValue
);
2172 markInstructionForDeletion(L
);
2174 MSSAU
->removeMemoryAccess(L
);
2176 reportLoadElim(L
, AvailableValue
, ORE
);
2177 // Tell MDA to reexamine the reused pointer since we might have more
2178 // information after forwarding it.
2179 if (MD
&& AvailableValue
->getType()->isPtrOrPtrVectorTy())
2180 MD
->invalidateCachedPointerInfo(AvailableValue
);
2184 /// Return a pair the first field showing the value number of \p Exp and the
2185 /// second field showing whether it is a value number newly created.
2186 std::pair
<uint32_t, bool>
2187 GVNPass::ValueTable::assignExpNewValueNum(Expression
&Exp
) {
2188 uint32_t &e
= expressionNumbering
[Exp
];
2189 bool CreateNewValNum
= !e
;
2190 if (CreateNewValNum
) {
2191 Expressions
.push_back(Exp
);
2192 if (ExprIdx
.size() < nextValueNumber
+ 1)
2193 ExprIdx
.resize(nextValueNumber
* 2);
2194 e
= nextValueNumber
;
2195 ExprIdx
[nextValueNumber
++] = nextExprNumber
++;
2197 return {e
, CreateNewValNum
};
2200 /// Return whether all the values related with the same \p num are
2201 /// defined in \p BB.
2202 bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num
, const BasicBlock
*BB
,
2204 LeaderTableEntry
*Vals
= &Gvn
.LeaderTable
[Num
];
2205 while (Vals
&& Vals
->BB
== BB
)
2210 /// Wrap phiTranslateImpl to provide caching functionality.
2211 uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock
*Pred
,
2212 const BasicBlock
*PhiBlock
,
2213 uint32_t Num
, GVNPass
&Gvn
) {
2214 auto FindRes
= PhiTranslateTable
.find({Num
, Pred
});
2215 if (FindRes
!= PhiTranslateTable
.end())
2216 return FindRes
->second
;
2217 uint32_t NewNum
= phiTranslateImpl(Pred
, PhiBlock
, Num
, Gvn
);
2218 PhiTranslateTable
.insert({{Num
, Pred
}, NewNum
});
2222 // Return true if the value number \p Num and NewNum have equal value.
2223 // Return false if the result is unknown.
2224 bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num
, uint32_t NewNum
,
2225 const BasicBlock
*Pred
,
2226 const BasicBlock
*PhiBlock
,
2228 CallInst
*Call
= nullptr;
2229 LeaderTableEntry
*Vals
= &Gvn
.LeaderTable
[Num
];
2231 Call
= dyn_cast
<CallInst
>(Vals
->Val
);
2232 if (Call
&& Call
->getParent() == PhiBlock
)
2237 if (AA
->doesNotAccessMemory(Call
))
2240 if (!MD
|| !AA
->onlyReadsMemory(Call
))
2243 MemDepResult local_dep
= MD
->getDependency(Call
);
2244 if (!local_dep
.isNonLocal())
2247 const MemoryDependenceResults::NonLocalDepInfo
&deps
=
2248 MD
->getNonLocalCallDependency(Call
);
2250 // Check to see if the Call has no function local clobber.
2251 for (const NonLocalDepEntry
&D
: deps
) {
2252 if (D
.getResult().isNonFuncLocal())
2258 /// Translate value number \p Num using phis, so that it has the values of
2260 uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock
*Pred
,
2261 const BasicBlock
*PhiBlock
,
2262 uint32_t Num
, GVNPass
&Gvn
) {
2263 if (PHINode
*PN
= NumberingPhi
[Num
]) {
2264 for (unsigned i
= 0; i
!= PN
->getNumIncomingValues(); ++i
) {
2265 if (PN
->getParent() == PhiBlock
&& PN
->getIncomingBlock(i
) == Pred
)
2266 if (uint32_t TransVal
= lookup(PN
->getIncomingValue(i
), false))
2272 // If there is any value related with Num is defined in a BB other than
2273 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2274 // a backedge. We can do an early exit in that case to save compile time.
2275 if (!areAllValsInBB(Num
, PhiBlock
, Gvn
))
2278 if (Num
>= ExprIdx
.size() || ExprIdx
[Num
] == 0)
2280 Expression Exp
= Expressions
[ExprIdx
[Num
]];
2282 for (unsigned i
= 0; i
< Exp
.varargs
.size(); i
++) {
2283 // For InsertValue and ExtractValue, some varargs are index numbers
2284 // instead of value numbers. Those index numbers should not be
2286 if ((i
> 1 && Exp
.opcode
== Instruction::InsertValue
) ||
2287 (i
> 0 && Exp
.opcode
== Instruction::ExtractValue
) ||
2288 (i
> 1 && Exp
.opcode
== Instruction::ShuffleVector
))
2290 Exp
.varargs
[i
] = phiTranslate(Pred
, PhiBlock
, Exp
.varargs
[i
], Gvn
);
2293 if (Exp
.commutative
) {
2294 assert(Exp
.varargs
.size() >= 2 && "Unsupported commutative instruction!");
2295 if (Exp
.varargs
[0] > Exp
.varargs
[1]) {
2296 std::swap(Exp
.varargs
[0], Exp
.varargs
[1]);
2297 uint32_t Opcode
= Exp
.opcode
>> 8;
2298 if (Opcode
== Instruction::ICmp
|| Opcode
== Instruction::FCmp
)
2299 Exp
.opcode
= (Opcode
<< 8) |
2300 CmpInst::getSwappedPredicate(
2301 static_cast<CmpInst::Predicate
>(Exp
.opcode
& 255));
2305 if (uint32_t NewNum
= expressionNumbering
[Exp
]) {
2306 if (Exp
.opcode
== Instruction::Call
&& NewNum
!= Num
)
2307 return areCallValsEqual(Num
, NewNum
, Pred
, PhiBlock
, Gvn
) ? NewNum
: Num
;
2313 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2315 void GVNPass::ValueTable::eraseTranslateCacheEntry(
2316 uint32_t Num
, const BasicBlock
&CurrBlock
) {
2317 for (const BasicBlock
*Pred
: predecessors(&CurrBlock
))
2318 PhiTranslateTable
.erase({Num
, Pred
});
2321 // In order to find a leader for a given value number at a
2322 // specific basic block, we first obtain the list of all Values for that number,
2323 // and then scan the list to find one whose block dominates the block in
2324 // question. This is fast because dominator tree queries consist of only
2325 // a few comparisons of DFS numbers.
2326 Value
*GVNPass::findLeader(const BasicBlock
*BB
, uint32_t num
) {
2327 LeaderTableEntry Vals
= LeaderTable
[num
];
2328 if (!Vals
.Val
) return nullptr;
2330 Value
*Val
= nullptr;
2331 if (DT
->dominates(Vals
.BB
, BB
)) {
2333 if (isa
<Constant
>(Val
)) return Val
;
2336 LeaderTableEntry
* Next
= Vals
.Next
;
2338 if (DT
->dominates(Next
->BB
, BB
)) {
2339 if (isa
<Constant
>(Next
->Val
)) return Next
->Val
;
2340 if (!Val
) Val
= Next
->Val
;
2349 /// There is an edge from 'Src' to 'Dst'. Return
2350 /// true if every path from the entry block to 'Dst' passes via this edge. In
2351 /// particular 'Dst' must not be reachable via another edge from 'Src'.
2352 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge
&E
,
2353 DominatorTree
*DT
) {
2354 // While in theory it is interesting to consider the case in which Dst has
2355 // more than one predecessor, because Dst might be part of a loop which is
2356 // only reachable from Src, in practice it is pointless since at the time
2357 // GVN runs all such loops have preheaders, which means that Dst will have
2358 // been changed to have only one predecessor, namely Src.
2359 const BasicBlock
*Pred
= E
.getEnd()->getSinglePredecessor();
2360 assert((!Pred
|| Pred
== E
.getStart()) &&
2361 "No edge between these basic blocks!");
2362 return Pred
!= nullptr;
2365 void GVNPass::assignBlockRPONumber(Function
&F
) {
2366 BlockRPONumber
.clear();
2367 uint32_t NextBlockNumber
= 1;
2368 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2369 for (BasicBlock
*BB
: RPOT
)
2370 BlockRPONumber
[BB
] = NextBlockNumber
++;
2371 InvalidBlockRPONumbers
= false;
2374 bool GVNPass::replaceOperandsForInBlockEquality(Instruction
*Instr
) const {
2375 bool Changed
= false;
2376 for (unsigned OpNum
= 0; OpNum
< Instr
->getNumOperands(); ++OpNum
) {
2377 Value
*Operand
= Instr
->getOperand(OpNum
);
2378 auto it
= ReplaceOperandsWithMap
.find(Operand
);
2379 if (it
!= ReplaceOperandsWithMap
.end()) {
2380 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand
<< " with "
2381 << *it
->second
<< " in instruction " << *Instr
<< '\n');
2382 Instr
->setOperand(OpNum
, it
->second
);
2389 /// The given values are known to be equal in every block
2390 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2391 /// 'RHS' everywhere in the scope. Returns whether a change was made.
2392 /// If DominatesByEdge is false, then it means that we will propagate the RHS
2393 /// value starting from the end of Root.Start.
2394 bool GVNPass::propagateEquality(Value
*LHS
, Value
*RHS
,
2395 const BasicBlockEdge
&Root
,
2396 bool DominatesByEdge
) {
2397 SmallVector
<std::pair
<Value
*, Value
*>, 4> Worklist
;
2398 Worklist
.push_back(std::make_pair(LHS
, RHS
));
2399 bool Changed
= false;
2400 // For speed, compute a conservative fast approximation to
2401 // DT->dominates(Root, Root.getEnd());
2402 const bool RootDominatesEnd
= isOnlyReachableViaThisEdge(Root
, DT
);
2404 while (!Worklist
.empty()) {
2405 std::pair
<Value
*, Value
*> Item
= Worklist
.pop_back_val();
2406 LHS
= Item
.first
; RHS
= Item
.second
;
2410 assert(LHS
->getType() == RHS
->getType() && "Equality but unequal types!");
2412 // Don't try to propagate equalities between constants.
2413 if (isa
<Constant
>(LHS
) && isa
<Constant
>(RHS
))
2416 // Prefer a constant on the right-hand side, or an Argument if no constants.
2417 if (isa
<Constant
>(LHS
) || (isa
<Argument
>(LHS
) && !isa
<Constant
>(RHS
)))
2418 std::swap(LHS
, RHS
);
2419 assert((isa
<Argument
>(LHS
) || isa
<Instruction
>(LHS
)) && "Unexpected value!");
2421 // If there is no obvious reason to prefer the left-hand side over the
2422 // right-hand side, ensure the longest lived term is on the right-hand side,
2423 // so the shortest lived term will be replaced by the longest lived.
2424 // This tends to expose more simplifications.
2425 uint32_t LVN
= VN
.lookupOrAdd(LHS
);
2426 if ((isa
<Argument
>(LHS
) && isa
<Argument
>(RHS
)) ||
2427 (isa
<Instruction
>(LHS
) && isa
<Instruction
>(RHS
))) {
2428 // Move the 'oldest' value to the right-hand side, using the value number
2429 // as a proxy for age.
2430 uint32_t RVN
= VN
.lookupOrAdd(RHS
);
2432 std::swap(LHS
, RHS
);
2437 // If value numbering later sees that an instruction in the scope is equal
2438 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2439 // the invariant that instructions only occur in the leader table for their
2440 // own value number (this is used by removeFromLeaderTable), do not do this
2441 // if RHS is an instruction (if an instruction in the scope is morphed into
2442 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2443 // using the leader table is about compiling faster, not optimizing better).
2444 // The leader table only tracks basic blocks, not edges. Only add to if we
2445 // have the simple case where the edge dominates the end.
2446 if (RootDominatesEnd
&& !isa
<Instruction
>(RHS
))
2447 addToLeaderTable(LVN
, RHS
, Root
.getEnd());
2449 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2450 // LHS always has at least one use that is not dominated by Root, this will
2451 // never do anything if LHS has only one use.
2452 if (!LHS
->hasOneUse()) {
2453 unsigned NumReplacements
=
2455 ? replaceDominatedUsesWith(LHS
, RHS
, *DT
, Root
)
2456 : replaceDominatedUsesWith(LHS
, RHS
, *DT
, Root
.getStart());
2458 Changed
|= NumReplacements
> 0;
2459 NumGVNEqProp
+= NumReplacements
;
2460 // Cached information for anything that uses LHS will be invalid.
2462 MD
->invalidateCachedPointerInfo(LHS
);
2465 // Now try to deduce additional equalities from this one. For example, if
2466 // the known equality was "(A != B)" == "false" then it follows that A and B
2467 // are equal in the scope. Only boolean equalities with an explicit true or
2468 // false RHS are currently supported.
2469 if (!RHS
->getType()->isIntegerTy(1))
2470 // Not a boolean equality - bail out.
2472 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(RHS
);
2474 // RHS neither 'true' nor 'false' - bail out.
2476 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2477 bool isKnownTrue
= CI
->isMinusOne();
2478 bool isKnownFalse
= !isKnownTrue
;
2480 // If "A && B" is known true then both A and B are known true. If "A || B"
2481 // is known false then both A and B are known false.
2483 if ((isKnownTrue
&& match(LHS
, m_LogicalAnd(m_Value(A
), m_Value(B
)))) ||
2484 (isKnownFalse
&& match(LHS
, m_LogicalOr(m_Value(A
), m_Value(B
))))) {
2485 Worklist
.push_back(std::make_pair(A
, RHS
));
2486 Worklist
.push_back(std::make_pair(B
, RHS
));
2490 // If we are propagating an equality like "(A == B)" == "true" then also
2491 // propagate the equality A == B. When propagating a comparison such as
2492 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2493 if (CmpInst
*Cmp
= dyn_cast
<CmpInst
>(LHS
)) {
2494 Value
*Op0
= Cmp
->getOperand(0), *Op1
= Cmp
->getOperand(1);
2496 // If "A == B" is known true, or "A != B" is known false, then replace
2497 // A with B everywhere in the scope. For floating point operations, we
2498 // have to be careful since equality does not always imply equivalance.
2499 if ((isKnownTrue
&& impliesEquivalanceIfTrue(Cmp
)) ||
2500 (isKnownFalse
&& impliesEquivalanceIfFalse(Cmp
)))
2501 Worklist
.push_back(std::make_pair(Op0
, Op1
));
2503 // If "A >= B" is known true, replace "A < B" with false everywhere.
2504 CmpInst::Predicate NotPred
= Cmp
->getInversePredicate();
2505 Constant
*NotVal
= ConstantInt::get(Cmp
->getType(), isKnownFalse
);
2506 // Since we don't have the instruction "A < B" immediately to hand, work
2507 // out the value number that it would have and use that to find an
2508 // appropriate instruction (if any).
2509 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
2510 uint32_t Num
= VN
.lookupOrAddCmp(Cmp
->getOpcode(), NotPred
, Op0
, Op1
);
2511 // If the number we were assigned was brand new then there is no point in
2512 // looking for an instruction realizing it: there cannot be one!
2513 if (Num
< NextNum
) {
2514 Value
*NotCmp
= findLeader(Root
.getEnd(), Num
);
2515 if (NotCmp
&& isa
<Instruction
>(NotCmp
)) {
2516 unsigned NumReplacements
=
2518 ? replaceDominatedUsesWith(NotCmp
, NotVal
, *DT
, Root
)
2519 : replaceDominatedUsesWith(NotCmp
, NotVal
, *DT
,
2521 Changed
|= NumReplacements
> 0;
2522 NumGVNEqProp
+= NumReplacements
;
2523 // Cached information for anything that uses NotCmp will be invalid.
2525 MD
->invalidateCachedPointerInfo(NotCmp
);
2528 // Ensure that any instruction in scope that gets the "A < B" value number
2529 // is replaced with false.
2530 // The leader table only tracks basic blocks, not edges. Only add to if we
2531 // have the simple case where the edge dominates the end.
2532 if (RootDominatesEnd
)
2533 addToLeaderTable(Num
, NotVal
, Root
.getEnd());
2542 /// When calculating availability, handle an instruction
2543 /// by inserting it into the appropriate sets
2544 bool GVNPass::processInstruction(Instruction
*I
) {
2545 // Ignore dbg info intrinsics.
2546 if (isa
<DbgInfoIntrinsic
>(I
))
2549 // If the instruction can be easily simplified then do so now in preference
2550 // to value numbering it. Value numbering often exposes redundancies, for
2551 // example if it determines that %y is equal to %x then the instruction
2552 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2553 const DataLayout
&DL
= I
->getModule()->getDataLayout();
2554 if (Value
*V
= simplifyInstruction(I
, {DL
, TLI
, DT
, AC
})) {
2555 bool Changed
= false;
2556 if (!I
->use_empty()) {
2557 // Simplification can cause a special instruction to become not special.
2558 // For example, devirtualization to a willreturn function.
2559 ICF
->removeUsersOf(I
);
2560 I
->replaceAllUsesWith(V
);
2563 if (isInstructionTriviallyDead(I
, TLI
)) {
2564 markInstructionForDeletion(I
);
2568 if (MD
&& V
->getType()->isPtrOrPtrVectorTy())
2569 MD
->invalidateCachedPointerInfo(V
);
2575 if (auto *Assume
= dyn_cast
<AssumeInst
>(I
))
2576 return processAssumeIntrinsic(Assume
);
2578 if (LoadInst
*Load
= dyn_cast
<LoadInst
>(I
)) {
2579 if (processLoad(Load
))
2582 unsigned Num
= VN
.lookupOrAdd(Load
);
2583 addToLeaderTable(Num
, Load
, Load
->getParent());
2587 // For conditional branches, we can perform simple conditional propagation on
2588 // the condition value itself.
2589 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(I
)) {
2590 if (!BI
->isConditional())
2593 if (isa
<Constant
>(BI
->getCondition()))
2594 return processFoldableCondBr(BI
);
2596 Value
*BranchCond
= BI
->getCondition();
2597 BasicBlock
*TrueSucc
= BI
->getSuccessor(0);
2598 BasicBlock
*FalseSucc
= BI
->getSuccessor(1);
2599 // Avoid multiple edges early.
2600 if (TrueSucc
== FalseSucc
)
2603 BasicBlock
*Parent
= BI
->getParent();
2604 bool Changed
= false;
2606 Value
*TrueVal
= ConstantInt::getTrue(TrueSucc
->getContext());
2607 BasicBlockEdge
TrueE(Parent
, TrueSucc
);
2608 Changed
|= propagateEquality(BranchCond
, TrueVal
, TrueE
, true);
2610 Value
*FalseVal
= ConstantInt::getFalse(FalseSucc
->getContext());
2611 BasicBlockEdge
FalseE(Parent
, FalseSucc
);
2612 Changed
|= propagateEquality(BranchCond
, FalseVal
, FalseE
, true);
2617 // For switches, propagate the case values into the case destinations.
2618 if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(I
)) {
2619 Value
*SwitchCond
= SI
->getCondition();
2620 BasicBlock
*Parent
= SI
->getParent();
2621 bool Changed
= false;
2623 // Remember how many outgoing edges there are to every successor.
2624 SmallDenseMap
<BasicBlock
*, unsigned, 16> SwitchEdges
;
2625 for (unsigned i
= 0, n
= SI
->getNumSuccessors(); i
!= n
; ++i
)
2626 ++SwitchEdges
[SI
->getSuccessor(i
)];
2628 for (SwitchInst::CaseIt i
= SI
->case_begin(), e
= SI
->case_end();
2630 BasicBlock
*Dst
= i
->getCaseSuccessor();
2631 // If there is only a single edge, propagate the case value into it.
2632 if (SwitchEdges
.lookup(Dst
) == 1) {
2633 BasicBlockEdge
E(Parent
, Dst
);
2634 Changed
|= propagateEquality(SwitchCond
, i
->getCaseValue(), E
, true);
2640 // Instructions with void type don't return a value, so there's
2641 // no point in trying to find redundancies in them.
2642 if (I
->getType()->isVoidTy())
2645 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
2646 unsigned Num
= VN
.lookupOrAdd(I
);
2648 // Allocations are always uniquely numbered, so we can save time and memory
2649 // by fast failing them.
2650 if (isa
<AllocaInst
>(I
) || I
->isTerminator() || isa
<PHINode
>(I
)) {
2651 addToLeaderTable(Num
, I
, I
->getParent());
2655 // If the number we were assigned was a brand new VN, then we don't
2656 // need to do a lookup to see if the number already exists
2657 // somewhere in the domtree: it can't!
2658 if (Num
>= NextNum
) {
2659 addToLeaderTable(Num
, I
, I
->getParent());
2663 // Perform fast-path value-number based elimination of values inherited from
2665 Value
*Repl
= findLeader(I
->getParent(), Num
);
2667 // Failure, just remember this instance for future use.
2668 addToLeaderTable(Num
, I
, I
->getParent());
2673 // If I was the result of a shortcut PRE, it might already be in the table
2674 // and the best replacement for itself. Nothing to do.
2679 patchAndReplaceAllUsesWith(I
, Repl
);
2680 if (MD
&& Repl
->getType()->isPtrOrPtrVectorTy())
2681 MD
->invalidateCachedPointerInfo(Repl
);
2682 markInstructionForDeletion(I
);
2686 /// runOnFunction - This is the main transformation entry point for a function.
2687 bool GVNPass::runImpl(Function
&F
, AssumptionCache
&RunAC
, DominatorTree
&RunDT
,
2688 const TargetLibraryInfo
&RunTLI
, AAResults
&RunAA
,
2689 MemoryDependenceResults
*RunMD
, LoopInfo
&LI
,
2690 OptimizationRemarkEmitter
*RunORE
, MemorySSA
*MSSA
) {
2695 VN
.setAliasAnalysis(&RunAA
);
2697 ImplicitControlFlowTracking ImplicitCFT
;
2702 InvalidBlockRPONumbers
= true;
2703 MemorySSAUpdater
Updater(MSSA
);
2704 MSSAU
= MSSA
? &Updater
: nullptr;
2706 bool Changed
= false;
2707 bool ShouldContinue
= true;
2709 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Eager
);
2710 // Merge unconditional branches, allowing PRE to catch more
2711 // optimization opportunities.
2712 for (BasicBlock
&BB
: llvm::make_early_inc_range(F
)) {
2713 bool removedBlock
= MergeBlockIntoPredecessor(&BB
, &DTU
, &LI
, MSSAU
, MD
);
2717 Changed
|= removedBlock
;
2720 unsigned Iteration
= 0;
2721 while (ShouldContinue
) {
2722 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration
<< "\n");
2724 ShouldContinue
= iterateOnFunction(F
);
2725 Changed
|= ShouldContinue
;
2729 if (isPREEnabled()) {
2730 // Fabricate val-num for dead-code in order to suppress assertion in
2732 assignValNumForDeadCode();
2733 bool PREChanged
= true;
2734 while (PREChanged
) {
2735 PREChanged
= performPRE(F
);
2736 Changed
|= PREChanged
;
2740 // FIXME: Should perform GVN again after PRE does something. PRE can move
2741 // computations into blocks where they become fully redundant. Note that
2742 // we can't do this until PRE's critical edge splitting updates memdep.
2743 // Actually, when this happens, we should just fully integrate PRE into GVN.
2745 cleanupGlobalSets();
2746 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2750 if (MSSA
&& VerifyMemorySSA
)
2751 MSSA
->verifyMemorySSA();
2756 bool GVNPass::processBlock(BasicBlock
*BB
) {
2757 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2758 // (and incrementing BI before processing an instruction).
2759 assert(InstrsToErase
.empty() &&
2760 "We expect InstrsToErase to be empty across iterations");
2761 if (DeadBlocks
.count(BB
))
2764 // Clearing map before every BB because it can be used only for single BB.
2765 ReplaceOperandsWithMap
.clear();
2766 bool ChangedFunction
= false;
2768 // Since we may not have visited the input blocks of the phis, we can't
2769 // use our normal hash approach for phis. Instead, simply look for
2770 // obvious duplicates. The first pass of GVN will tend to create
2771 // identical phis, and the second or later passes can eliminate them.
2772 SmallPtrSet
<PHINode
*, 8> PHINodesToRemove
;
2773 ChangedFunction
|= EliminateDuplicatePHINodes(BB
, PHINodesToRemove
);
2774 for (PHINode
*PN
: PHINodesToRemove
) {
2776 removeInstruction(PN
);
2779 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
2781 if (!ReplaceOperandsWithMap
.empty())
2782 ChangedFunction
|= replaceOperandsForInBlockEquality(&*BI
);
2783 ChangedFunction
|= processInstruction(&*BI
);
2785 if (InstrsToErase
.empty()) {
2790 // If we need some instructions deleted, do it now.
2791 NumGVNInstr
+= InstrsToErase
.size();
2793 // Avoid iterator invalidation.
2794 bool AtStart
= BI
== BB
->begin();
2798 for (auto *I
: InstrsToErase
) {
2799 assert(I
->getParent() == BB
&& "Removing instruction from wrong block?");
2800 LLVM_DEBUG(dbgs() << "GVN removed: " << *I
<< '\n');
2801 salvageKnowledge(I
, AC
);
2802 salvageDebugInfo(*I
);
2803 removeInstruction(I
);
2805 InstrsToErase
.clear();
2813 return ChangedFunction
;
2816 // Instantiate an expression in a predecessor that lacked it.
2817 bool GVNPass::performScalarPREInsertion(Instruction
*Instr
, BasicBlock
*Pred
,
2818 BasicBlock
*Curr
, unsigned int ValNo
) {
2819 // Because we are going top-down through the block, all value numbers
2820 // will be available in the predecessor by the time we need them. Any
2821 // that weren't originally present will have been instantiated earlier
2823 bool success
= true;
2824 for (unsigned i
= 0, e
= Instr
->getNumOperands(); i
!= e
; ++i
) {
2825 Value
*Op
= Instr
->getOperand(i
);
2826 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
2828 // This could be a newly inserted instruction, in which case, we won't
2829 // find a value number, and should give up before we hurt ourselves.
2830 // FIXME: Rewrite the infrastructure to let it easier to value number
2831 // and process newly inserted instructions.
2832 if (!VN
.exists(Op
)) {
2837 VN
.phiTranslate(Pred
, Curr
, VN
.lookup(Op
), *this);
2838 if (Value
*V
= findLeader(Pred
, TValNo
)) {
2839 Instr
->setOperand(i
, V
);
2846 // Fail out if we encounter an operand that is not available in
2847 // the PRE predecessor. This is typically because of loads which
2848 // are not value numbered precisely.
2852 Instr
->insertBefore(Pred
->getTerminator());
2853 Instr
->setName(Instr
->getName() + ".pre");
2854 Instr
->setDebugLoc(Instr
->getDebugLoc());
2856 ICF
->insertInstructionTo(Instr
, Pred
);
2858 unsigned Num
= VN
.lookupOrAdd(Instr
);
2861 // Update the availability map to include the new instruction.
2862 addToLeaderTable(Num
, Instr
, Pred
);
2866 bool GVNPass::performScalarPRE(Instruction
*CurInst
) {
2867 if (isa
<AllocaInst
>(CurInst
) || CurInst
->isTerminator() ||
2868 isa
<PHINode
>(CurInst
) || CurInst
->getType()->isVoidTy() ||
2869 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
2870 isa
<DbgInfoIntrinsic
>(CurInst
))
2873 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2874 // sinking the compare again, and it would force the code generator to
2875 // move the i1 from processor flags or predicate registers into a general
2876 // purpose register.
2877 if (isa
<CmpInst
>(CurInst
))
2880 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2881 // sinking the addressing mode computation back to its uses. Extending the
2882 // GEP's live range increases the register pressure, and therefore it can
2883 // introduce unnecessary spills.
2885 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2886 // to the load by moving it to the predecessor block if necessary.
2887 if (isa
<GetElementPtrInst
>(CurInst
))
2890 if (auto *CallB
= dyn_cast
<CallBase
>(CurInst
)) {
2891 // We don't currently value number ANY inline asm calls.
2892 if (CallB
->isInlineAsm())
2896 uint32_t ValNo
= VN
.lookup(CurInst
);
2898 // Look for the predecessors for PRE opportunities. We're
2899 // only trying to solve the basic diamond case, where
2900 // a value is computed in the successor and one predecessor,
2901 // but not the other. We also explicitly disallow cases
2902 // where the successor is its own predecessor, because they're
2903 // more complicated to get right.
2904 unsigned NumWith
= 0;
2905 unsigned NumWithout
= 0;
2906 BasicBlock
*PREPred
= nullptr;
2907 BasicBlock
*CurrentBlock
= CurInst
->getParent();
2909 // Update the RPO numbers for this function.
2910 if (InvalidBlockRPONumbers
)
2911 assignBlockRPONumber(*CurrentBlock
->getParent());
2913 SmallVector
<std::pair
<Value
*, BasicBlock
*>, 8> predMap
;
2914 for (BasicBlock
*P
: predecessors(CurrentBlock
)) {
2915 // We're not interested in PRE where blocks with predecessors that are
2917 if (!DT
->isReachableFromEntry(P
)) {
2921 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
2922 assert(BlockRPONumber
.count(P
) && BlockRPONumber
.count(CurrentBlock
) &&
2923 "Invalid BlockRPONumber map.");
2924 if (BlockRPONumber
[P
] >= BlockRPONumber
[CurrentBlock
]) {
2929 uint32_t TValNo
= VN
.phiTranslate(P
, CurrentBlock
, ValNo
, *this);
2930 Value
*predV
= findLeader(P
, TValNo
);
2932 predMap
.push_back(std::make_pair(static_cast<Value
*>(nullptr), P
));
2935 } else if (predV
== CurInst
) {
2936 /* CurInst dominates this predecessor. */
2940 predMap
.push_back(std::make_pair(predV
, P
));
2945 // Don't do PRE when it might increase code size, i.e. when
2946 // we would need to insert instructions in more than one pred.
2947 if (NumWithout
> 1 || NumWith
== 0)
2950 // We may have a case where all predecessors have the instruction,
2951 // and we just need to insert a phi node. Otherwise, perform
2953 Instruction
*PREInstr
= nullptr;
2955 if (NumWithout
!= 0) {
2956 if (!isSafeToSpeculativelyExecute(CurInst
)) {
2957 // It is only valid to insert a new instruction if the current instruction
2958 // is always executed. An instruction with implicit control flow could
2959 // prevent us from doing it. If we cannot speculate the execution, then
2960 // PRE should be prohibited.
2961 if (ICF
->isDominatedByICFIFromSameBlock(CurInst
))
2965 // Don't do PRE across indirect branch.
2966 if (isa
<IndirectBrInst
>(PREPred
->getTerminator()))
2969 // We can't do PRE safely on a critical edge, so instead we schedule
2970 // the edge to be split and perform the PRE the next time we iterate
2972 unsigned SuccNum
= GetSuccessorNumber(PREPred
, CurrentBlock
);
2973 if (isCriticalEdge(PREPred
->getTerminator(), SuccNum
)) {
2974 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), SuccNum
));
2977 // We need to insert somewhere, so let's give it a shot
2978 PREInstr
= CurInst
->clone();
2979 if (!performScalarPREInsertion(PREInstr
, PREPred
, CurrentBlock
, ValNo
)) {
2980 // If we failed insertion, make sure we remove the instruction.
2982 verifyRemoved(PREInstr
);
2984 PREInstr
->deleteValue();
2989 // Either we should have filled in the PRE instruction, or we should
2990 // not have needed insertions.
2991 assert(PREInstr
!= nullptr || NumWithout
== 0);
2995 // Create a PHI to make the value available in this block.
2996 PHINode
*Phi
= PHINode::Create(CurInst
->getType(), predMap
.size(),
2997 CurInst
->getName() + ".pre-phi");
2998 Phi
->insertBefore(CurrentBlock
->begin());
2999 for (unsigned i
= 0, e
= predMap
.size(); i
!= e
; ++i
) {
3000 if (Value
*V
= predMap
[i
].first
) {
3001 // If we use an existing value in this phi, we have to patch the original
3002 // value because the phi will be used to replace a later value.
3003 patchReplacementInstruction(CurInst
, V
);
3004 Phi
->addIncoming(V
, predMap
[i
].second
);
3006 Phi
->addIncoming(PREInstr
, PREPred
);
3010 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3011 // be changed, so erase the related stale entries in phi translate cache.
3012 VN
.eraseTranslateCacheEntry(ValNo
, *CurrentBlock
);
3013 addToLeaderTable(ValNo
, Phi
, CurrentBlock
);
3014 Phi
->setDebugLoc(CurInst
->getDebugLoc());
3015 CurInst
->replaceAllUsesWith(Phi
);
3016 if (MD
&& Phi
->getType()->isPtrOrPtrVectorTy())
3017 MD
->invalidateCachedPointerInfo(Phi
);
3019 removeFromLeaderTable(ValNo
, CurInst
, CurrentBlock
);
3021 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst
<< '\n');
3022 removeInstruction(CurInst
);
3028 /// Perform a purely local form of PRE that looks for diamond
3029 /// control flow patterns and attempts to perform simple PRE at the join point.
3030 bool GVNPass::performPRE(Function
&F
) {
3031 bool Changed
= false;
3032 for (BasicBlock
*CurrentBlock
: depth_first(&F
.getEntryBlock())) {
3033 // Nothing to PRE in the entry block.
3034 if (CurrentBlock
== &F
.getEntryBlock())
3037 // Don't perform PRE on an EH pad.
3038 if (CurrentBlock
->isEHPad())
3041 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
3042 BE
= CurrentBlock
->end();
3044 Instruction
*CurInst
= &*BI
++;
3045 Changed
|= performScalarPRE(CurInst
);
3049 if (splitCriticalEdges())
3055 /// Split the critical edge connecting the given two blocks, and return
3056 /// the block inserted to the critical edge.
3057 BasicBlock
*GVNPass::splitCriticalEdges(BasicBlock
*Pred
, BasicBlock
*Succ
) {
3058 // GVN does not require loop-simplify, do not try to preserve it if it is not
3060 BasicBlock
*BB
= SplitCriticalEdge(
3062 CriticalEdgeSplittingOptions(DT
, LI
, MSSAU
).unsetPreserveLoopSimplify());
3065 MD
->invalidateCachedPredecessors();
3066 InvalidBlockRPONumbers
= true;
3071 /// Split critical edges found during the previous
3072 /// iteration that may enable further optimization.
3073 bool GVNPass::splitCriticalEdges() {
3074 if (toSplit
.empty())
3077 bool Changed
= false;
3079 std::pair
<Instruction
*, unsigned> Edge
= toSplit
.pop_back_val();
3080 Changed
|= SplitCriticalEdge(Edge
.first
, Edge
.second
,
3081 CriticalEdgeSplittingOptions(DT
, LI
, MSSAU
)) !=
3083 } while (!toSplit
.empty());
3086 MD
->invalidateCachedPredecessors();
3087 InvalidBlockRPONumbers
= true;
3092 /// Executes one iteration of GVN
3093 bool GVNPass::iterateOnFunction(Function
&F
) {
3094 cleanupGlobalSets();
3096 // Top-down walk of the dominator tree
3097 bool Changed
= false;
3098 // Needed for value numbering with phi construction to work.
3099 // RPOT walks the graph in its constructor and will not be invalidated during
3101 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
3103 for (BasicBlock
*BB
: RPOT
)
3104 Changed
|= processBlock(BB
);
3109 void GVNPass::cleanupGlobalSets() {
3111 LeaderTable
.clear();
3112 BlockRPONumber
.clear();
3113 TableAllocator
.Reset();
3115 InvalidBlockRPONumbers
= true;
3118 void GVNPass::removeInstruction(Instruction
*I
) {
3119 if (MD
) MD
->removeInstruction(I
);
3121 MSSAU
->removeMemoryAccess(I
);
3125 ICF
->removeInstruction(I
);
3126 I
->eraseFromParent();
3129 /// Verify that the specified instruction does not occur in our
3130 /// internal data structures.
3131 void GVNPass::verifyRemoved(const Instruction
*Inst
) const {
3132 VN
.verifyRemoved(Inst
);
3134 // Walk through the value number scope to make sure the instruction isn't
3135 // ferreted away in it.
3136 for (const auto &I
: LeaderTable
) {
3137 const LeaderTableEntry
*Node
= &I
.second
;
3138 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
3140 while (Node
->Next
) {
3142 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
3147 /// BB is declared dead, which implied other blocks become dead as well. This
3148 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3149 /// live successors, update their phi nodes by replacing the operands
3150 /// corresponding to dead blocks with UndefVal.
3151 void GVNPass::addDeadBlock(BasicBlock
*BB
) {
3152 SmallVector
<BasicBlock
*, 4> NewDead
;
3153 SmallSetVector
<BasicBlock
*, 4> DF
;
3155 NewDead
.push_back(BB
);
3156 while (!NewDead
.empty()) {
3157 BasicBlock
*D
= NewDead
.pop_back_val();
3158 if (DeadBlocks
.count(D
))
3161 // All blocks dominated by D are dead.
3162 SmallVector
<BasicBlock
*, 8> Dom
;
3163 DT
->getDescendants(D
, Dom
);
3164 DeadBlocks
.insert(Dom
.begin(), Dom
.end());
3166 // Figure out the dominance-frontier(D).
3167 for (BasicBlock
*B
: Dom
) {
3168 for (BasicBlock
*S
: successors(B
)) {
3169 if (DeadBlocks
.count(S
))
3172 bool AllPredDead
= true;
3173 for (BasicBlock
*P
: predecessors(S
))
3174 if (!DeadBlocks
.count(P
)) {
3175 AllPredDead
= false;
3180 // S could be proved dead later on. That is why we don't update phi
3181 // operands at this moment.
3184 // While S is not dominated by D, it is dead by now. This could take
3185 // place if S already have a dead predecessor before D is declared
3187 NewDead
.push_back(S
);
3193 // For the dead blocks' live successors, update their phi nodes by replacing
3194 // the operands corresponding to dead blocks with UndefVal.
3195 for (BasicBlock
*B
: DF
) {
3196 if (DeadBlocks
.count(B
))
3199 // First, split the critical edges. This might also create additional blocks
3200 // to preserve LoopSimplify form and adjust edges accordingly.
3201 SmallVector
<BasicBlock
*, 4> Preds(predecessors(B
));
3202 for (BasicBlock
*P
: Preds
) {
3203 if (!DeadBlocks
.count(P
))
3206 if (llvm::is_contained(successors(P
), B
) &&
3207 isCriticalEdge(P
->getTerminator(), B
)) {
3208 if (BasicBlock
*S
= splitCriticalEdges(P
, B
))
3209 DeadBlocks
.insert(P
= S
);
3213 // Now poison the incoming values from the dead predecessors.
3214 for (BasicBlock
*P
: predecessors(B
)) {
3215 if (!DeadBlocks
.count(P
))
3217 for (PHINode
&Phi
: B
->phis()) {
3218 Phi
.setIncomingValueForBlock(P
, PoisonValue::get(Phi
.getType()));
3220 MD
->invalidateCachedPointerInfo(&Phi
);
3226 // If the given branch is recognized as a foldable branch (i.e. conditional
3227 // branch with constant condition), it will perform following analyses and
3229 // 1) If the dead out-coming edge is a critical-edge, split it. Let
3230 // R be the target of the dead out-coming edge.
3231 // 1) Identify the set of dead blocks implied by the branch's dead outcoming
3232 // edge. The result of this step will be {X| X is dominated by R}
3233 // 2) Identify those blocks which haves at least one dead predecessor. The
3234 // result of this step will be dominance-frontier(R).
3235 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3236 // dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3238 // Return true iff *NEW* dead code are found.
3239 bool GVNPass::processFoldableCondBr(BranchInst
*BI
) {
3240 if (!BI
|| BI
->isUnconditional())
3243 // If a branch has two identical successors, we cannot declare either dead.
3244 if (BI
->getSuccessor(0) == BI
->getSuccessor(1))
3247 ConstantInt
*Cond
= dyn_cast
<ConstantInt
>(BI
->getCondition());
3251 BasicBlock
*DeadRoot
=
3252 Cond
->getZExtValue() ? BI
->getSuccessor(1) : BI
->getSuccessor(0);
3253 if (DeadBlocks
.count(DeadRoot
))
3256 if (!DeadRoot
->getSinglePredecessor())
3257 DeadRoot
= splitCriticalEdges(BI
->getParent(), DeadRoot
);
3259 addDeadBlock(DeadRoot
);
3263 // performPRE() will trigger assert if it comes across an instruction without
3264 // associated val-num. As it normally has far more live instructions than dead
3265 // instructions, it makes more sense just to "fabricate" a val-number for the
3266 // dead code than checking if instruction involved is dead or not.
3267 void GVNPass::assignValNumForDeadCode() {
3268 for (BasicBlock
*BB
: DeadBlocks
) {
3269 for (Instruction
&Inst
: *BB
) {
3270 unsigned ValNum
= VN
.lookupOrAdd(&Inst
);
3271 addToLeaderTable(ValNum
, &Inst
, BB
);
3276 class llvm::gvn::GVNLegacyPass
: public FunctionPass
{
3278 static char ID
; // Pass identification, replacement for typeid
3280 explicit GVNLegacyPass(bool NoMemDepAnalysis
= !GVNEnableMemDep
)
3281 : FunctionPass(ID
), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis
)) {
3282 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3285 bool runOnFunction(Function
&F
) override
{
3286 if (skipFunction(F
))
3289 auto *MSSAWP
= getAnalysisIfAvailable
<MemorySSAWrapperPass
>();
3290 return Impl
.runImpl(
3291 F
, getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
),
3292 getAnalysis
<DominatorTreeWrapperPass
>().getDomTree(),
3293 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
),
3294 getAnalysis
<AAResultsWrapperPass
>().getAAResults(),
3295 Impl
.isMemDepEnabled()
3296 ? &getAnalysis
<MemoryDependenceWrapperPass
>().getMemDep()
3298 getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo(),
3299 &getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE(),
3300 MSSAWP
? &MSSAWP
->getMSSA() : nullptr);
3303 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
3304 AU
.addRequired
<AssumptionCacheTracker
>();
3305 AU
.addRequired
<DominatorTreeWrapperPass
>();
3306 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
3307 AU
.addRequired
<LoopInfoWrapperPass
>();
3308 if (Impl
.isMemDepEnabled())
3309 AU
.addRequired
<MemoryDependenceWrapperPass
>();
3310 AU
.addRequired
<AAResultsWrapperPass
>();
3311 AU
.addPreserved
<DominatorTreeWrapperPass
>();
3312 AU
.addPreserved
<GlobalsAAWrapperPass
>();
3313 AU
.addPreserved
<TargetLibraryInfoWrapperPass
>();
3314 AU
.addPreserved
<LoopInfoWrapperPass
>();
3315 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
3316 AU
.addPreserved
<MemorySSAWrapperPass
>();
3323 char GVNLegacyPass::ID
= 0;
3325 INITIALIZE_PASS_BEGIN(GVNLegacyPass
, "gvn", "Global Value Numbering", false, false)
3326 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
3327 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass
)
3328 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
3329 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
3330 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
3331 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass
)
3332 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass
)
3333 INITIALIZE_PASS_END(GVNLegacyPass
, "gvn", "Global Value Numbering", false, false)
3335 // The public interface to this file...
3336 FunctionPass
*llvm::createGVNPass(bool NoMemDepAnalysis
) {
3337 return new GVNLegacyPass(NoMemDepAnalysis
);